k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/kubelet_pods.go

k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/kubelet_pods.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package kubelet
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	goerrors "errors"
    23  	"fmt"
    24  	"io"
    25  	"net/http"
    26  	"net/url"
    27  	"os"
    28  	"os/exec"
    29  	"os/user"
    30  	"path/filepath"
    31  	"runtime"
    32  	"sort"
    33  	"strconv"
    34  	"strings"
    35  
    36  	"github.com/google/go-cmp/cmp"
    37  	v1 "k8s.io/api/core/v1"
    38  	"k8s.io/apimachinery/pkg/api/errors"
    39  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    40  	"k8s.io/apimachinery/pkg/labels"
    41  	"k8s.io/apimachinery/pkg/types"
    42  	"k8s.io/apimachinery/pkg/util/sets"
    43  	utilvalidation "k8s.io/apimachinery/pkg/util/validation"
    44  	"k8s.io/apimachinery/pkg/util/version"
    45  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    46  	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
    47  	"k8s.io/klog/v2"
    48  	"k8s.io/kubelet/pkg/cri/streaming/portforward"
    49  	remotecommandserver "k8s.io/kubelet/pkg/cri/streaming/remotecommand"
    50  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    51  	"k8s.io/kubernetes/pkg/api/v1/resource"
    52  	podshelper "k8s.io/kubernetes/pkg/apis/core/pods"
    53  	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
    54  	v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
    55  	"k8s.io/kubernetes/pkg/features"
    56  	"k8s.io/kubernetes/pkg/fieldpath"
    57  	"k8s.io/kubernetes/pkg/kubelet/cm"
    58  	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
    59  	"k8s.io/kubernetes/pkg/kubelet/envvars"
    60  	"k8s.io/kubernetes/pkg/kubelet/images"
    61  	"k8s.io/kubernetes/pkg/kubelet/metrics"
    62  	"k8s.io/kubernetes/pkg/kubelet/status"
    63  	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
    64  	"k8s.io/kubernetes/pkg/kubelet/util"
    65  	utilfs "k8s.io/kubernetes/pkg/util/filesystem"
    66  	utilkernel "k8s.io/kubernetes/pkg/util/kernel"
    67  	utilpod "k8s.io/kubernetes/pkg/util/pod"
    68  	volumeutil "k8s.io/kubernetes/pkg/volume/util"
    69  	"k8s.io/kubernetes/pkg/volume/util/hostutil"
    70  	"k8s.io/kubernetes/pkg/volume/util/subpath"
    71  	"k8s.io/kubernetes/pkg/volume/util/volumepathhandler"
    72  	volumevalidation "k8s.io/kubernetes/pkg/volume/validation"
    73  	"k8s.io/kubernetes/third_party/forked/golang/expansion"
    74  	utilnet "k8s.io/utils/net"
    75  )
    76  
    77  const (
    78  	managedHostsHeader                = "# Kubernetes-managed hosts file.\n"
    79  	managedHostsHeaderWithHostNetwork = "# Kubernetes-managed hosts file (host network).\n"
    80  )
    81  
    82  // Container state reason list
    83  const (
    84  	PodInitializing   = "PodInitializing"
    85  	ContainerCreating = "ContainerCreating"
    86  
    87  	kubeletUser = "kubelet"
    88  )
    89  
    90  // parseGetSubIdsOutput parses the output from the `getsubids` tool, which is used to query subordinate user or group ID ranges for
    91  // a given user or group. getsubids produces a line for each mapping configured.
    92  // Here we expect that there is a single mapping, and the same values are used for the subordinate user and group ID ranges.
    93  // The output is something like:
    94  // $ getsubids kubelet
    95  // 0: kubelet 65536 2147483648
    96  // $ getsubids -g kubelet
    97  // 0: kubelet 65536 2147483648
    98  func parseGetSubIdsOutput(input string) (uint32, uint32, error) {
    99  	lines := strings.Split(strings.Trim(input, "\n"), "\n")
   100  	if len(lines) != 1 {
   101  		return 0, 0, fmt.Errorf("error parsing line %q: it must contain only one line", input)
   102  	}
   103  
   104  	parts := strings.Fields(lines[0])
   105  	if len(parts) != 4 {
   106  		return 0, 0, fmt.Errorf("invalid line %q", input)
   107  	}
   108  
   109  	// Parsing the numbers
   110  	num1, err := strconv.ParseUint(parts[2], 10, 32)
   111  	if err != nil {
   112  		return 0, 0, fmt.Errorf("error parsing line %q: %w", input, err)
   113  	}
   114  
   115  	num2, err := strconv.ParseUint(parts[3], 10, 32)
   116  	if err != nil {
   117  		return 0, 0, fmt.Errorf("error parsing line %q: %w", input, err)
   118  	}
   119  
   120  	return uint32(num1), uint32(num2), nil
   121  }
   122  
   123  // getKubeletMappings returns the range of IDs that can be used to configure user namespaces.
   124  // If subordinate user or group ID ranges are specified for the kubelet user and the getsubids tool
   125  // is installed, then the single mapping specified both for user and group IDs will be used.
   126  // If the tool is not installed, or there are no IDs configured, the default mapping is returned.
   127  // The default mapping includes the entire IDs range except IDs below 65536.
   128  func (kl *Kubelet) getKubeletMappings() (uint32, uint32, error) {
   129  	// default mappings to return if there is no specific configuration
   130  	const defaultFirstID = 1 << 16
   131  	const defaultLen = 1<<32 - defaultFirstID
   132  
   133  	if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesSupport) {
   134  		return defaultFirstID, defaultLen, nil
   135  	} else {
   136  		kernelVersion, err := utilkernel.GetVersion()
   137  		if err != nil {
   138  			return 0, 0, fmt.Errorf("failed to get kernel version, unable to determine if feature %s can be supported : %w",
   139  				features.UserNamespacesSupport, err)
   140  		}
   141  		if kernelVersion != nil && !kernelVersion.AtLeast(version.MustParseGeneric(utilkernel.UserNamespacesSupportKernelVersion)) {
   142  			klog.InfoS("WARNING: the kernel version is incompatible with the feature gate, which needs as a minimum kernel version",
   143  				"kernelVersion", kernelVersion, "feature", features.UserNamespacesSupport, "minKernelVersion", utilkernel.UserNamespacesSupportKernelVersion)
   144  		}
   145  	}
   146  
   147  	_, err := user.Lookup(kubeletUser)
   148  	if err != nil {
   149  		var unknownUserErr user.UnknownUserError
   150  		if goerrors.As(err, &unknownUserErr) {
   151  			// if the user is not found, we assume that the user is not configured
   152  			return defaultFirstID, defaultLen, nil
   153  		}
   154  		return 0, 0, err
   155  	}
   156  
   157  	execName := "getsubids"
   158  	cmd, err := exec.LookPath(execName)
   159  	if err != nil {
   160  		if os.IsNotExist(err) {
   161  			klog.V(2).InfoS("Could not find executable, default mappings will be used for the user namespaces", "executable", execName, "err", err)
   162  			return defaultFirstID, defaultLen, nil
   163  		}
   164  		return 0, 0, err
   165  	}
   166  	outUids, err := exec.Command(cmd, kubeletUser).Output()
   167  	if err != nil {
   168  		return 0, 0, fmt.Errorf("error retrieving additional ids for user %q", kubeletUser)
   169  	}
   170  	outGids, err := exec.Command(cmd, "-g", kubeletUser).Output()
   171  	if err != nil {
   172  		return 0, 0, fmt.Errorf("error retrieving additional gids for user %q", kubeletUser)
   173  	}
   174  	if string(outUids) != string(outGids) {
   175  		return 0, 0, fmt.Errorf("mismatched subuids and subgids for user %q", kubeletUser)
   176  	}
   177  	return parseGetSubIdsOutput(string(outUids))
   178  }
   179  
   180  // Get a list of pods that have data directories.
   181  func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
   182  	podInfos, err := os.ReadDir(kl.getPodsDir())
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  	pods := []types.UID{}
   187  	for i := range podInfos {
   188  		if podInfos[i].IsDir() {
   189  			pods = append(pods, types.UID(podInfos[i].Name()))
   190  		}
   191  	}
   192  	return pods, nil
   193  }
   194  
   195  // GetActivePods returns pods that have been admitted to the kubelet that
   196  // are not fully terminated. This is mapped to the "desired state" of the
   197  // kubelet - what pods should be running.
   198  //
   199  // WARNING: Currently this list does not include pods that have been force
   200  // deleted but may still be terminating, which means resources assigned to
   201  // those pods during admission may still be in use. See
   202  // https://github.com/kubernetes/kubernetes/issues/104824
   203  func (kl *Kubelet) GetActivePods() []*v1.Pod {
   204  	allPods := kl.podManager.GetPods()
   205  	activePods := kl.filterOutInactivePods(allPods)
   206  	return activePods
   207  }
   208  
   209  // makeBlockVolumes maps the raw block devices specified in the path of the container
   210  // Experimental
   211  func (kl *Kubelet) makeBlockVolumes(pod *v1.Pod, container *v1.Container, podVolumes kubecontainer.VolumeMap, blkutil volumepathhandler.BlockVolumePathHandler) ([]kubecontainer.DeviceInfo, error) {
   212  	var devices []kubecontainer.DeviceInfo
   213  	for _, device := range container.VolumeDevices {
   214  		// check path is absolute
   215  		if !utilfs.IsAbs(device.DevicePath) {
   216  			return nil, fmt.Errorf("error DevicePath `%s` must be an absolute path", device.DevicePath)
   217  		}
   218  		vol, ok := podVolumes[device.Name]
   219  		if !ok || vol.BlockVolumeMapper == nil {
   220  			klog.ErrorS(nil, "Block volume cannot be satisfied for container, because the volume is missing or the volume mapper is nil", "containerName", container.Name, "device", device)
   221  			return nil, fmt.Errorf("cannot find volume %q to pass into container %q", device.Name, container.Name)
   222  		}
   223  		// Get a symbolic link associated to a block device under pod device path
   224  		dirPath, volName := vol.BlockVolumeMapper.GetPodDeviceMapPath()
   225  		symlinkPath := filepath.Join(dirPath, volName)
   226  		if islinkExist, checkErr := blkutil.IsSymlinkExist(symlinkPath); checkErr != nil {
   227  			return nil, checkErr
   228  		} else if islinkExist {
   229  			// Check readOnly in PVCVolumeSource and set read only permission if it's true.
   230  			permission := "mrw"
   231  			if vol.ReadOnly {
   232  				permission = "r"
   233  			}
   234  			klog.V(4).InfoS("Device will be attached to container in the corresponding path on host", "containerName", container.Name, "path", symlinkPath)
   235  			devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: symlinkPath, PathInContainer: device.DevicePath, Permissions: permission})
   236  		}
   237  	}
   238  
   239  	return devices, nil
   240  }
   241  
   242  // shouldMountHostsFile checks if the nodes /etc/hosts should be mounted
   243  // Kubernetes only mounts on /etc/hosts if:
   244  // - container is not an infrastructure (pause) container
   245  // - container is not already mounting on /etc/hosts
   246  // Kubernetes will not mount /etc/hosts if:
   247  // - when the Pod sandbox is being created, its IP is still unknown. Hence, PodIP will not have been set.
   248  // - Windows pod contains a hostProcess container
   249  func shouldMountHostsFile(pod *v1.Pod, podIPs []string) bool {
   250  	shouldMount := len(podIPs) > 0
   251  	if runtime.GOOS == "windows" {
   252  		return shouldMount && !kubecontainer.HasWindowsHostProcessContainer(pod)
   253  	}
   254  	return shouldMount
   255  }
   256  
   257  // makeMounts determines the mount points for the given container.
   258  func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain string, podIPs []string, podVolumes kubecontainer.VolumeMap, hu hostutil.HostUtils, subpather subpath.Interface, expandEnvs []kubecontainer.EnvVar, supportsRRO bool) ([]kubecontainer.Mount, func(), error) {
   259  	mountEtcHostsFile := shouldMountHostsFile(pod, podIPs)
   260  	klog.V(3).InfoS("Creating hosts mount for container", "pod", klog.KObj(pod), "containerName", container.Name, "podIPs", podIPs, "path", mountEtcHostsFile)
   261  	mounts := []kubecontainer.Mount{}
   262  	var cleanupAction func()
   263  	for i, mount := range container.VolumeMounts {
   264  		// do not mount /etc/hosts if container is already mounting on the path
   265  		mountEtcHostsFile = mountEtcHostsFile && (mount.MountPath != etcHostsPath)
   266  		vol, ok := podVolumes[mount.Name]
   267  		if !ok || vol.Mounter == nil {
   268  			klog.ErrorS(nil, "Mount cannot be satisfied for the container, because the volume is missing or the volume mounter (vol.Mounter) is nil",
   269  				"containerName", container.Name, "ok", ok, "volumeMounter", mount)
   270  			return nil, cleanupAction, fmt.Errorf("cannot find volume %q to mount into container %q", mount.Name, container.Name)
   271  		}
   272  
   273  		relabelVolume := false
   274  		// If the volume supports SELinux and it has not been
   275  		// relabeled already and it is not a read-only volume,
   276  		// relabel it and mark it as labeled
   277  		if vol.Mounter.GetAttributes().Managed && vol.Mounter.GetAttributes().SELinuxRelabel && !vol.SELinuxLabeled {
   278  			vol.SELinuxLabeled = true
   279  			relabelVolume = true
   280  		}
   281  		hostPath, err := volumeutil.GetPath(vol.Mounter)
   282  		if err != nil {
   283  			return nil, cleanupAction, err
   284  		}
   285  
   286  		subPath := mount.SubPath
   287  		if mount.SubPathExpr != "" {
   288  			subPath, err = kubecontainer.ExpandContainerVolumeMounts(mount, expandEnvs)
   289  
   290  			if err != nil {
   291  				return nil, cleanupAction, err
   292  			}
   293  		}
   294  
   295  		if subPath != "" {
   296  			if utilfs.IsAbs(subPath) {
   297  				return nil, cleanupAction, fmt.Errorf("error SubPath `%s` must not be an absolute path", subPath)
   298  			}
   299  
   300  			err = volumevalidation.ValidatePathNoBacksteps(subPath)
   301  			if err != nil {
   302  				return nil, cleanupAction, fmt.Errorf("unable to provision SubPath `%s`: %v", subPath, err)
   303  			}
   304  
   305  			volumePath := hostPath
   306  			hostPath = filepath.Join(volumePath, subPath)
   307  
   308  			if subPathExists, err := hu.PathExists(hostPath); err != nil {
   309  				klog.ErrorS(nil, "Could not determine if subPath exists, will not attempt to change its permissions", "path", hostPath)
   310  			} else if !subPathExists {
   311  				// Create the sub path now because if it's auto-created later when referenced, it may have an
   312  				// incorrect ownership and mode. For example, the sub path directory must have at least g+rwx
   313  				// when the pod specifies an fsGroup, and if the directory is not created here, Docker will
   314  				// later auto-create it with the incorrect mode 0750
   315  				// Make extra care not to escape the volume!
   316  				perm, err := hu.GetMode(volumePath)
   317  				if err != nil {
   318  					return nil, cleanupAction, err
   319  				}
   320  				if err := subpather.SafeMakeDir(subPath, volumePath, perm); err != nil {
   321  					// Don't pass detailed error back to the user because it could give information about host filesystem
   322  					klog.ErrorS(err, "Failed to create subPath directory for volumeMount of the container", "containerName", container.Name, "volumeMountName", mount.Name)
   323  					return nil, cleanupAction, fmt.Errorf("failed to create subPath directory for volumeMount %q of container %q", mount.Name, container.Name)
   324  				}
   325  			}
   326  			hostPath, cleanupAction, err = subpather.PrepareSafeSubpath(subpath.Subpath{
   327  				VolumeMountIndex: i,
   328  				Path:             hostPath,
   329  				VolumeName:       vol.InnerVolumeSpecName,
   330  				VolumePath:       volumePath,
   331  				PodDir:           podDir,
   332  				ContainerName:    container.Name,
   333  			})
   334  			if err != nil {
   335  				// Don't pass detailed error back to the user because it could give information about host filesystem
   336  				klog.ErrorS(err, "Failed to prepare subPath for volumeMount of the container", "containerName", container.Name, "volumeMountName", mount.Name)
   337  				return nil, cleanupAction, fmt.Errorf("failed to prepare subPath for volumeMount %q of container %q", mount.Name, container.Name)
   338  			}
   339  		}
   340  
   341  		// Docker Volume Mounts fail on Windows if it is not of the form C:/
   342  		if volumeutil.IsWindowsLocalPath(runtime.GOOS, hostPath) {
   343  			hostPath = volumeutil.MakeAbsolutePath(runtime.GOOS, hostPath)
   344  		}
   345  
   346  		containerPath := mount.MountPath
   347  		// IsAbs returns false for UNC path/SMB shares/named pipes in Windows. So check for those specifically and skip MakeAbsolutePath
   348  		if !volumeutil.IsWindowsUNCPath(runtime.GOOS, containerPath) && !utilfs.IsAbs(containerPath) {
   349  			containerPath = volumeutil.MakeAbsolutePath(runtime.GOOS, containerPath)
   350  		}
   351  
   352  		propagation, err := translateMountPropagation(mount.MountPropagation)
   353  		if err != nil {
   354  			return nil, cleanupAction, err
   355  		}
   356  		klog.V(5).InfoS("Mount has propagation", "pod", klog.KObj(pod), "containerName", container.Name, "volumeMountName", mount.Name, "propagation", propagation)
   357  		mustMountRO := vol.Mounter.GetAttributes().ReadOnly
   358  
   359  		rro, err := resolveRecursiveReadOnly(mount, supportsRRO)
   360  		if err != nil {
   361  			return nil, cleanupAction, fmt.Errorf("failed to resolve recursive read-only mode: %w", err)
   362  		}
   363  		if rro && !utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
   364  			return nil, cleanupAction, fmt.Errorf("recursive read-only mount needs feature gate %q to be enabled", features.RecursiveReadOnlyMounts)
   365  		}
   366  
   367  		mounts = append(mounts, kubecontainer.Mount{
   368  			Name:              mount.Name,
   369  			ContainerPath:     containerPath,
   370  			HostPath:          hostPath,
   371  			ReadOnly:          mount.ReadOnly || mustMountRO,
   372  			RecursiveReadOnly: rro,
   373  			SELinuxRelabel:    relabelVolume,
   374  			Propagation:       propagation,
   375  		})
   376  	}
   377  	if mountEtcHostsFile {
   378  		hostAliases := pod.Spec.HostAliases
   379  		hostsMount, err := makeHostsMount(podDir, podIPs, hostName, hostDomain, hostAliases, pod.Spec.HostNetwork)
   380  		if err != nil {
   381  			return nil, cleanupAction, err
   382  		}
   383  		mounts = append(mounts, *hostsMount)
   384  	}
   385  	return mounts, cleanupAction, nil
   386  }
   387  
   388  // translateMountPropagation transforms v1.MountPropagationMode to
   389  // runtimeapi.MountPropagation.
   390  func translateMountPropagation(mountMode *v1.MountPropagationMode) (runtimeapi.MountPropagation, error) {
   391  	if runtime.GOOS == "windows" {
   392  		// Windows containers doesn't support mount propagation, use private for it.
   393  		// Refer https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation.
   394  		return runtimeapi.MountPropagation_PROPAGATION_PRIVATE, nil
   395  	}
   396  
   397  	switch {
   398  	case mountMode == nil:
   399  		// PRIVATE is the default
   400  		return runtimeapi.MountPropagation_PROPAGATION_PRIVATE, nil
   401  	case *mountMode == v1.MountPropagationHostToContainer:
   402  		return runtimeapi.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, nil
   403  	case *mountMode == v1.MountPropagationBidirectional:
   404  		return runtimeapi.MountPropagation_PROPAGATION_BIDIRECTIONAL, nil
   405  	case *mountMode == v1.MountPropagationNone:
   406  		return runtimeapi.MountPropagation_PROPAGATION_PRIVATE, nil
   407  	default:
   408  		return 0, fmt.Errorf("invalid MountPropagation mode: %q", *mountMode)
   409  	}
   410  }
   411  
   412  // getEtcHostsPath returns the full host-side path to a pod's generated /etc/hosts file
   413  func getEtcHostsPath(podDir string) string {
   414  	hostsFilePath := filepath.Join(podDir, "etc-hosts")
   415  	// Volume Mounts fail on Windows if it is not of the form C:/
   416  	return volumeutil.MakeAbsolutePath(runtime.GOOS, hostsFilePath)
   417  }
   418  
   419  // makeHostsMount makes the mountpoint for the hosts file that the containers
   420  // in a pod are injected with. podIPs is provided instead of podIP as podIPs
   421  // are present even if dual-stack feature flag is not enabled.
   422  func makeHostsMount(podDir string, podIPs []string, hostName, hostDomainName string, hostAliases []v1.HostAlias, useHostNetwork bool) (*kubecontainer.Mount, error) {
   423  	hostsFilePath := getEtcHostsPath(podDir)
   424  	if err := ensureHostsFile(hostsFilePath, podIPs, hostName, hostDomainName, hostAliases, useHostNetwork); err != nil {
   425  		return nil, err
   426  	}
   427  	return &kubecontainer.Mount{
   428  		Name:           "k8s-managed-etc-hosts",
   429  		ContainerPath:  etcHostsPath,
   430  		HostPath:       hostsFilePath,
   431  		ReadOnly:       false,
   432  		SELinuxRelabel: true,
   433  	}, nil
   434  }
   435  
   436  // ensureHostsFile ensures that the given host file has an up-to-date ip, host
   437  // name, and domain name.
   438  func ensureHostsFile(fileName string, hostIPs []string, hostName, hostDomainName string, hostAliases []v1.HostAlias, useHostNetwork bool) error {
   439  	var hostsFileContent []byte
   440  	var err error
   441  
   442  	if useHostNetwork {
   443  		// if Pod is using host network, read hosts file from the node's filesystem.
   444  		// `etcHostsPath` references the location of the hosts file on the node.
   445  		// `/etc/hosts` for *nix systems.
   446  		hostsFileContent, err = nodeHostsFileContent(etcHostsPath, hostAliases)
   447  		if err != nil {
   448  			return err
   449  		}
   450  	} else {
   451  		// if Pod is not using host network, create a managed hosts file with Pod IP and other information.
   452  		hostsFileContent = managedHostsFileContent(hostIPs, hostName, hostDomainName, hostAliases)
   453  	}
   454  
   455  	hostsFilePerm := os.FileMode(0644)
   456  	if err := os.WriteFile(fileName, hostsFileContent, hostsFilePerm); err != nil {
   457  		return err
   458  	}
   459  	return os.Chmod(fileName, hostsFilePerm)
   460  }
   461  
   462  // nodeHostsFileContent reads the content of node's hosts file.
   463  func nodeHostsFileContent(hostsFilePath string, hostAliases []v1.HostAlias) ([]byte, error) {
   464  	hostsFileContent, err := os.ReadFile(hostsFilePath)
   465  	if err != nil {
   466  		return nil, err
   467  	}
   468  	var buffer bytes.Buffer
   469  	buffer.WriteString(managedHostsHeaderWithHostNetwork)
   470  	buffer.Write(hostsFileContent)
   471  	buffer.Write(hostsEntriesFromHostAliases(hostAliases))
   472  	return buffer.Bytes(), nil
   473  }
   474  
   475  // managedHostsFileContent generates the content of the managed etc hosts based on Pod IPs and other
   476  // information.
   477  func managedHostsFileContent(hostIPs []string, hostName, hostDomainName string, hostAliases []v1.HostAlias) []byte {
   478  	var buffer bytes.Buffer
   479  	buffer.WriteString(managedHostsHeader)
   480  	buffer.WriteString("127.0.0.1\tlocalhost\n")                      // ipv4 localhost
   481  	buffer.WriteString("::1\tlocalhost ip6-localhost ip6-loopback\n") // ipv6 localhost
   482  	buffer.WriteString("fe00::0\tip6-localnet\n")
   483  	buffer.WriteString("fe00::0\tip6-mcastprefix\n")
   484  	buffer.WriteString("fe00::1\tip6-allnodes\n")
   485  	buffer.WriteString("fe00::2\tip6-allrouters\n")
   486  	if len(hostDomainName) > 0 {
   487  		// host entry generated for all IPs in podIPs
   488  		// podIPs field is populated for clusters even
   489  		// dual-stack feature flag is not enabled.
   490  		for _, hostIP := range hostIPs {
   491  			buffer.WriteString(fmt.Sprintf("%s\t%s.%s\t%s\n", hostIP, hostName, hostDomainName, hostName))
   492  		}
   493  	} else {
   494  		for _, hostIP := range hostIPs {
   495  			buffer.WriteString(fmt.Sprintf("%s\t%s\n", hostIP, hostName))
   496  		}
   497  	}
   498  	buffer.Write(hostsEntriesFromHostAliases(hostAliases))
   499  	return buffer.Bytes()
   500  }
   501  
   502  func hostsEntriesFromHostAliases(hostAliases []v1.HostAlias) []byte {
   503  	if len(hostAliases) == 0 {
   504  		return []byte{}
   505  	}
   506  
   507  	var buffer bytes.Buffer
   508  	buffer.WriteString("\n")
   509  	buffer.WriteString("# Entries added by HostAliases.\n")
   510  	// for each IP, write all aliases onto single line in hosts file
   511  	for _, hostAlias := range hostAliases {
   512  		buffer.WriteString(fmt.Sprintf("%s\t%s\n", hostAlias.IP, strings.Join(hostAlias.Hostnames, "\t")))
   513  	}
   514  	return buffer.Bytes()
   515  }
   516  
   517  // truncatePodHostnameIfNeeded truncates the pod hostname if it's longer than 63 chars.
   518  func truncatePodHostnameIfNeeded(podName, hostname string) (string, error) {
   519  	// Cap hostname at 63 chars (specification is 64bytes which is 63 chars and the null terminating char).
   520  	const hostnameMaxLen = 63
   521  	if len(hostname) <= hostnameMaxLen {
   522  		return hostname, nil
   523  	}
   524  	truncated := hostname[:hostnameMaxLen]
   525  	klog.ErrorS(nil, "Hostname for pod was too long, truncated it", "podName", podName, "hostnameMaxLen", hostnameMaxLen, "truncatedHostname", truncated)
   526  	// hostname should not end with '-' or '.'
   527  	truncated = strings.TrimRight(truncated, "-.")
   528  	if len(truncated) == 0 {
   529  		// This should never happen.
   530  		return "", fmt.Errorf("hostname for pod %q was invalid: %q", podName, hostname)
   531  	}
   532  	return truncated, nil
   533  }
   534  
   535  // GetOrCreateUserNamespaceMappings returns the configuration for the sandbox user namespace
   536  func (kl *Kubelet) GetOrCreateUserNamespaceMappings(pod *v1.Pod, runtimeHandler string) (*runtimeapi.UserNamespace, error) {
   537  	return kl.usernsManager.GetOrCreateUserNamespaceMappings(pod, runtimeHandler)
   538  }
   539  
   540  // GeneratePodHostNameAndDomain creates a hostname and domain name for a pod,
   541  // given that pod's spec and annotations or returns an error.
   542  func (kl *Kubelet) GeneratePodHostNameAndDomain(pod *v1.Pod) (string, string, error) {
   543  	clusterDomain := kl.dnsConfigurer.ClusterDomain
   544  
   545  	hostname := pod.Name
   546  	if len(pod.Spec.Hostname) > 0 {
   547  		if msgs := utilvalidation.IsDNS1123Label(pod.Spec.Hostname); len(msgs) != 0 {
   548  			return "", "", fmt.Errorf("pod Hostname %q is not a valid DNS label: %s", pod.Spec.Hostname, strings.Join(msgs, ";"))
   549  		}
   550  		hostname = pod.Spec.Hostname
   551  	}
   552  
   553  	hostname, err := truncatePodHostnameIfNeeded(pod.Name, hostname)
   554  	if err != nil {
   555  		return "", "", err
   556  	}
   557  
   558  	hostDomain := ""
   559  	if len(pod.Spec.Subdomain) > 0 {
   560  		if msgs := utilvalidation.IsDNS1123Label(pod.Spec.Subdomain); len(msgs) != 0 {
   561  			return "", "", fmt.Errorf("pod Subdomain %q is not a valid DNS label: %s", pod.Spec.Subdomain, strings.Join(msgs, ";"))
   562  		}
   563  		hostDomain = fmt.Sprintf("%s.%s.svc.%s", pod.Spec.Subdomain, pod.Namespace, clusterDomain)
   564  	}
   565  
   566  	return hostname, hostDomain, nil
   567  }
   568  
   569  // GetPodCgroupParent gets pod cgroup parent from container manager.
   570  func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string {
   571  	pcm := kl.containerManager.NewPodContainerManager()
   572  	_, cgroupParent := pcm.GetPodContainerName(pod)
   573  	return cgroupParent
   574  }
   575  
   576  // GenerateRunContainerOptions generates the RunContainerOptions, which can be used by
   577  // the container runtime to set parameters for launching a container.
   578  func (kl *Kubelet) GenerateRunContainerOptions(ctx context.Context, pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) (*kubecontainer.RunContainerOptions, func(), error) {
   579  	supportsRRO := kl.runtimeClassSupportsRecursiveReadOnlyMounts(pod)
   580  
   581  	opts, err := kl.containerManager.GetResources(pod, container)
   582  	if err != nil {
   583  		return nil, nil, err
   584  	}
   585  	// The value of hostname is the short host name and it is sent to makeMounts to create /etc/hosts file.
   586  	hostname, hostDomainName, err := kl.GeneratePodHostNameAndDomain(pod)
   587  	if err != nil {
   588  		return nil, nil, err
   589  	}
   590  	// nodename will be equal to hostname if SetHostnameAsFQDN is nil or false. If SetHostnameFQDN
   591  	// is true and hostDomainName is defined, nodename will be the FQDN (hostname.hostDomainName)
   592  	nodename, err := util.GetNodenameForKernel(hostname, hostDomainName, pod.Spec.SetHostnameAsFQDN)
   593  	if err != nil {
   594  		return nil, nil, err
   595  	}
   596  	opts.Hostname = nodename
   597  	podName := volumeutil.GetUniquePodName(pod)
   598  	volumes := kl.volumeManager.GetMountedVolumesForPod(podName)
   599  
   600  	blkutil := volumepathhandler.NewBlockVolumePathHandler()
   601  	blkVolumes, err := kl.makeBlockVolumes(pod, container, volumes, blkutil)
   602  	if err != nil {
   603  		return nil, nil, err
   604  	}
   605  	opts.Devices = append(opts.Devices, blkVolumes...)
   606  
   607  	envs, err := kl.makeEnvironmentVariables(pod, container, podIP, podIPs)
   608  	if err != nil {
   609  		return nil, nil, err
   610  	}
   611  	opts.Envs = append(opts.Envs, envs...)
   612  
   613  	// only podIPs is sent to makeMounts, as podIPs is populated even if dual-stack feature flag is not enabled.
   614  	mounts, cleanupAction, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIPs, volumes, kl.hostutil, kl.subpather, opts.Envs, supportsRRO)
   615  	if err != nil {
   616  		return nil, cleanupAction, err
   617  	}
   618  	opts.Mounts = append(opts.Mounts, mounts...)
   619  
   620  	// adding TerminationMessagePath on Windows is only allowed if ContainerD is used. Individual files cannot
   621  	// be mounted as volumes using Docker for Windows.
   622  	if len(container.TerminationMessagePath) != 0 {
   623  		p := kl.getPodContainerDir(pod.UID, container.Name)
   624  		if err := os.MkdirAll(p, 0750); err != nil {
   625  			klog.ErrorS(err, "Error on creating dir", "path", p)
   626  		} else {
   627  			opts.PodContainerDir = p
   628  		}
   629  	}
   630  
   631  	return opts, cleanupAction, nil
   632  }
   633  
   634  var masterServices = sets.NewString("kubernetes")
   635  
   636  // getServiceEnvVarMap makes a map[string]string of env vars for services a
   637  // pod in namespace ns should see.
   638  func (kl *Kubelet) getServiceEnvVarMap(ns string, enableServiceLinks bool) (map[string]string, error) {
   639  	var (
   640  		serviceMap = make(map[string]*v1.Service)
   641  		m          = make(map[string]string)
   642  	)
   643  
   644  	// Get all service resources from the master (via a cache),
   645  	// and populate them into service environment variables.
   646  	if kl.serviceLister == nil {
   647  		// Kubelets without masters (e.g. plain GCE ContainerVM) don't set env vars.
   648  		return m, nil
   649  	}
   650  	services, err := kl.serviceLister.List(labels.Everything())
   651  	if err != nil {
   652  		return m, fmt.Errorf("failed to list services when setting up env vars")
   653  	}
   654  
   655  	// project the services in namespace ns onto the master services
   656  	for i := range services {
   657  		service := services[i]
   658  		// ignore services where ClusterIP is "None" or empty
   659  		if !v1helper.IsServiceIPSet(service) {
   660  			continue
   661  		}
   662  		serviceName := service.Name
   663  
   664  		// We always want to add environment variabled for master services
   665  		// from the default namespace, even if enableServiceLinks is false.
   666  		// We also add environment variables for other services in the same
   667  		// namespace, if enableServiceLinks is true.
   668  		if service.Namespace == metav1.NamespaceDefault && masterServices.Has(serviceName) {
   669  			if _, exists := serviceMap[serviceName]; !exists {
   670  				serviceMap[serviceName] = service
   671  			}
   672  		} else if service.Namespace == ns && enableServiceLinks {
   673  			serviceMap[serviceName] = service
   674  		}
   675  	}
   676  
   677  	mappedServices := []*v1.Service{}
   678  	for key := range serviceMap {
   679  		mappedServices = append(mappedServices, serviceMap[key])
   680  	}
   681  
   682  	for _, e := range envvars.FromServices(mappedServices) {
   683  		m[e.Name] = e.Value
   684  	}
   685  	return m, nil
   686  }
   687  
   688  // Make the environment variables for a pod in the given namespace.
   689  func (kl *Kubelet) makeEnvironmentVariables(pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) ([]kubecontainer.EnvVar, error) {
   690  	if pod.Spec.EnableServiceLinks == nil {
   691  		return nil, fmt.Errorf("nil pod.spec.enableServiceLinks encountered, cannot construct envvars")
   692  	}
   693  
   694  	// If the pod originates from the kube-api, when we know that the kube-apiserver is responding and the kubelet's credentials are valid.
   695  	// Knowing this, it is reasonable to wait until the service lister has synchronized at least once before attempting to build
   696  	// a service env var map.  This doesn't present the race below from happening entirely, but it does prevent the "obvious"
   697  	// failure case of services simply not having completed a list operation that can reasonably be expected to succeed.
   698  	// One common case this prevents is a kubelet restart reading pods before services and some pod not having the
   699  	// KUBERNETES_SERVICE_HOST injected because we didn't wait a short time for services to sync before proceeding.
   700  	// The KUBERNETES_SERVICE_HOST link is special because it is unconditionally injected into pods and is read by the
   701  	// in-cluster-config for pod clients
   702  	if !kubetypes.IsStaticPod(pod) && !kl.serviceHasSynced() {
   703  		return nil, fmt.Errorf("services have not yet been read at least once, cannot construct envvars")
   704  	}
   705  
   706  	var result []kubecontainer.EnvVar
   707  	// Note:  These are added to the docker Config, but are not included in the checksum computed
   708  	// by kubecontainer.HashContainer(...).  That way, we can still determine whether an
   709  	// v1.Container is already running by its hash. (We don't want to restart a container just
   710  	// because some service changed.)
   711  	//
   712  	// Note that there is a race between Kubelet seeing the pod and kubelet seeing the service.
   713  	// To avoid this users can: (1) wait between starting a service and starting; or (2) detect
   714  	// missing service env var and exit and be restarted; or (3) use DNS instead of env vars
   715  	// and keep trying to resolve the DNS name of the service (recommended).
   716  	serviceEnv, err := kl.getServiceEnvVarMap(pod.Namespace, *pod.Spec.EnableServiceLinks)
   717  	if err != nil {
   718  		return result, err
   719  	}
   720  
   721  	var (
   722  		configMaps = make(map[string]*v1.ConfigMap)
   723  		secrets    = make(map[string]*v1.Secret)
   724  		tmpEnv     = make(map[string]string)
   725  	)
   726  
   727  	// Env will override EnvFrom variables.
   728  	// Process EnvFrom first then allow Env to replace existing values.
   729  	for _, envFrom := range container.EnvFrom {
   730  		switch {
   731  		case envFrom.ConfigMapRef != nil:
   732  			cm := envFrom.ConfigMapRef
   733  			name := cm.Name
   734  			configMap, ok := configMaps[name]
   735  			if !ok {
   736  				if kl.kubeClient == nil {
   737  					return result, fmt.Errorf("couldn't get configMap %v/%v, no kubeClient defined", pod.Namespace, name)
   738  				}
   739  				optional := cm.Optional != nil && *cm.Optional
   740  				configMap, err = kl.configMapManager.GetConfigMap(pod.Namespace, name)
   741  				if err != nil {
   742  					if errors.IsNotFound(err) && optional {
   743  						// ignore error when marked optional
   744  						continue
   745  					}
   746  					return result, err
   747  				}
   748  				configMaps[name] = configMap
   749  			}
   750  
   751  			for k, v := range configMap.Data {
   752  				if len(envFrom.Prefix) > 0 {
   753  					k = envFrom.Prefix + k
   754  				}
   755  
   756  				tmpEnv[k] = v
   757  			}
   758  		case envFrom.SecretRef != nil:
   759  			s := envFrom.SecretRef
   760  			name := s.Name
   761  			secret, ok := secrets[name]
   762  			if !ok {
   763  				if kl.kubeClient == nil {
   764  					return result, fmt.Errorf("couldn't get secret %v/%v, no kubeClient defined", pod.Namespace, name)
   765  				}
   766  				optional := s.Optional != nil && *s.Optional
   767  				secret, err = kl.secretManager.GetSecret(pod.Namespace, name)
   768  				if err != nil {
   769  					if errors.IsNotFound(err) && optional {
   770  						// ignore error when marked optional
   771  						continue
   772  					}
   773  					return result, err
   774  				}
   775  				secrets[name] = secret
   776  			}
   777  
   778  			for k, v := range secret.Data {
   779  				if len(envFrom.Prefix) > 0 {
   780  					k = envFrom.Prefix + k
   781  				}
   782  
   783  				tmpEnv[k] = string(v)
   784  			}
   785  		}
   786  	}
   787  
   788  	// Determine the final values of variables:
   789  	//
   790  	// 1.  Determine the final value of each variable:
   791  	//     a.  If the variable's Value is set, expand the `$(var)` references to other
   792  	//         variables in the .Value field; the sources of variables are the declared
   793  	//         variables of the container and the service environment variables
   794  	//     b.  If a source is defined for an environment variable, resolve the source
   795  	// 2.  Create the container's environment in the order variables are declared
   796  	// 3.  Add remaining service environment vars
   797  	var (
   798  		mappingFunc = expansion.MappingFuncFor(tmpEnv, serviceEnv)
   799  	)
   800  	for _, envVar := range container.Env {
   801  		runtimeVal := envVar.Value
   802  		if runtimeVal != "" {
   803  			// Step 1a: expand variable references
   804  			runtimeVal = expansion.Expand(runtimeVal, mappingFunc)
   805  		} else if envVar.ValueFrom != nil {
   806  			// Step 1b: resolve alternate env var sources
   807  			switch {
   808  			case envVar.ValueFrom.FieldRef != nil:
   809  				runtimeVal, err = kl.podFieldSelectorRuntimeValue(envVar.ValueFrom.FieldRef, pod, podIP, podIPs)
   810  				if err != nil {
   811  					return result, err
   812  				}
   813  			case envVar.ValueFrom.ResourceFieldRef != nil:
   814  				defaultedPod, defaultedContainer, err := kl.defaultPodLimitsForDownwardAPI(pod, container)
   815  				if err != nil {
   816  					return result, err
   817  				}
   818  				runtimeVal, err = containerResourceRuntimeValue(envVar.ValueFrom.ResourceFieldRef, defaultedPod, defaultedContainer)
   819  				if err != nil {
   820  					return result, err
   821  				}
   822  			case envVar.ValueFrom.ConfigMapKeyRef != nil:
   823  				cm := envVar.ValueFrom.ConfigMapKeyRef
   824  				name := cm.Name
   825  				key := cm.Key
   826  				optional := cm.Optional != nil && *cm.Optional
   827  				configMap, ok := configMaps[name]
   828  				if !ok {
   829  					if kl.kubeClient == nil {
   830  						return result, fmt.Errorf("couldn't get configMap %v/%v, no kubeClient defined", pod.Namespace, name)
   831  					}
   832  					configMap, err = kl.configMapManager.GetConfigMap(pod.Namespace, name)
   833  					if err != nil {
   834  						if errors.IsNotFound(err) && optional {
   835  							// ignore error when marked optional
   836  							continue
   837  						}
   838  						return result, err
   839  					}
   840  					configMaps[name] = configMap
   841  				}
   842  				runtimeVal, ok = configMap.Data[key]
   843  				if !ok {
   844  					if optional {
   845  						continue
   846  					}
   847  					return result, fmt.Errorf("couldn't find key %v in ConfigMap %v/%v", key, pod.Namespace, name)
   848  				}
   849  			case envVar.ValueFrom.SecretKeyRef != nil:
   850  				s := envVar.ValueFrom.SecretKeyRef
   851  				name := s.Name
   852  				key := s.Key
   853  				optional := s.Optional != nil && *s.Optional
   854  				secret, ok := secrets[name]
   855  				if !ok {
   856  					if kl.kubeClient == nil {
   857  						return result, fmt.Errorf("couldn't get secret %v/%v, no kubeClient defined", pod.Namespace, name)
   858  					}
   859  					secret, err = kl.secretManager.GetSecret(pod.Namespace, name)
   860  					if err != nil {
   861  						if errors.IsNotFound(err) && optional {
   862  							// ignore error when marked optional
   863  							continue
   864  						}
   865  						return result, err
   866  					}
   867  					secrets[name] = secret
   868  				}
   869  				runtimeValBytes, ok := secret.Data[key]
   870  				if !ok {
   871  					if optional {
   872  						continue
   873  					}
   874  					return result, fmt.Errorf("couldn't find key %v in Secret %v/%v", key, pod.Namespace, name)
   875  				}
   876  				runtimeVal = string(runtimeValBytes)
   877  			}
   878  		}
   879  
   880  		tmpEnv[envVar.Name] = runtimeVal
   881  	}
   882  
   883  	// Append the env vars
   884  	for k, v := range tmpEnv {
   885  		result = append(result, kubecontainer.EnvVar{Name: k, Value: v})
   886  	}
   887  
   888  	// Append remaining service env vars.
   889  	for k, v := range serviceEnv {
   890  		// Accesses apiserver+Pods.
   891  		// So, the master may set service env vars, or kubelet may.  In case both are doing
   892  		// it, we skip the key from the kubelet-generated ones so we don't have duplicate
   893  		// env vars.
   894  		// TODO: remove this next line once all platforms use apiserver+Pods.
   895  		if _, present := tmpEnv[k]; !present {
   896  			result = append(result, kubecontainer.EnvVar{Name: k, Value: v})
   897  		}
   898  	}
   899  	return result, nil
   900  }
   901  
   902  // podFieldSelectorRuntimeValue returns the runtime value of the given
   903  // selector for a pod.
   904  func (kl *Kubelet) podFieldSelectorRuntimeValue(fs *v1.ObjectFieldSelector, pod *v1.Pod, podIP string, podIPs []string) (string, error) {
   905  	internalFieldPath, _, err := podshelper.ConvertDownwardAPIFieldLabel(fs.APIVersion, fs.FieldPath, "")
   906  	if err != nil {
   907  		return "", err
   908  	}
   909  
   910  	// make podIPs order match node IP family preference #97979
   911  	podIPs = kl.sortPodIPs(podIPs)
   912  	if len(podIPs) > 0 {
   913  		podIP = podIPs[0]
   914  	}
   915  
   916  	switch internalFieldPath {
   917  	case "spec.nodeName":
   918  		return pod.Spec.NodeName, nil
   919  	case "spec.serviceAccountName":
   920  		return pod.Spec.ServiceAccountName, nil
   921  	case "status.hostIP":
   922  		hostIPs, err := kl.getHostIPsAnyWay()
   923  		if err != nil {
   924  			return "", err
   925  		}
   926  		return hostIPs[0].String(), nil
   927  	case "status.hostIPs":
   928  		if !utilfeature.DefaultFeatureGate.Enabled(features.PodHostIPs) {
   929  			return "", nil
   930  		}
   931  		hostIPs, err := kl.getHostIPsAnyWay()
   932  		if err != nil {
   933  			return "", err
   934  		}
   935  		ips := make([]string, 0, len(hostIPs))
   936  		for _, ip := range hostIPs {
   937  			ips = append(ips, ip.String())
   938  		}
   939  		return strings.Join(ips, ","), nil
   940  	case "status.podIP":
   941  		return podIP, nil
   942  	case "status.podIPs":
   943  		return strings.Join(podIPs, ","), nil
   944  	}
   945  	return fieldpath.ExtractFieldPathAsString(pod, internalFieldPath)
   946  }
   947  
   948  // containerResourceRuntimeValue returns the value of the provided container resource
   949  func containerResourceRuntimeValue(fs *v1.ResourceFieldSelector, pod *v1.Pod, container *v1.Container) (string, error) {
   950  	containerName := fs.ContainerName
   951  	if len(containerName) == 0 {
   952  		return resource.ExtractContainerResourceValue(fs, container)
   953  	}
   954  	return resource.ExtractResourceValueByContainerName(fs, pod, containerName)
   955  }
   956  
   957  // killPod instructs the container runtime to kill the pod. This method requires that
   958  // the pod status contains the result of the last syncPod, otherwise it may fail to
   959  // terminate newly created containers and sandboxes.
   960  func (kl *Kubelet) killPod(ctx context.Context, pod *v1.Pod, p kubecontainer.Pod, gracePeriodOverride *int64) error {
   961  	// Call the container runtime KillPod method which stops all known running containers of the pod
   962  	if err := kl.containerRuntime.KillPod(ctx, pod, p, gracePeriodOverride); err != nil {
   963  		return err
   964  	}
   965  	if err := kl.containerManager.UpdateQOSCgroups(); err != nil {
   966  		klog.V(2).InfoS("Failed to update QoS cgroups while killing pod", "err", err)
   967  	}
   968  	return nil
   969  }
   970  
   971  // makePodDataDirs creates the dirs for the pod datas.
   972  func (kl *Kubelet) makePodDataDirs(pod *v1.Pod) error {
   973  	uid := pod.UID
   974  	if err := os.MkdirAll(kl.getPodDir(uid), 0750); err != nil && !os.IsExist(err) {
   975  		return err
   976  	}
   977  	if err := os.MkdirAll(kl.getPodVolumesDir(uid), 0750); err != nil && !os.IsExist(err) {
   978  		return err
   979  	}
   980  	if err := os.MkdirAll(kl.getPodPluginsDir(uid), 0750); err != nil && !os.IsExist(err) {
   981  		return err
   982  	}
   983  	return nil
   984  }
   985  
   986  // getPullSecretsForPod inspects the Pod and retrieves the referenced pull
   987  // secrets.
   988  func (kl *Kubelet) getPullSecretsForPod(pod *v1.Pod) []v1.Secret {
   989  	pullSecrets := []v1.Secret{}
   990  	failedPullSecrets := []string{}
   991  
   992  	for _, secretRef := range pod.Spec.ImagePullSecrets {
   993  		if len(secretRef.Name) == 0 {
   994  			// API validation permitted entries with empty names (https://issue.k8s.io/99454#issuecomment-787838112).
   995  			// Ignore to avoid unnecessary warnings.
   996  			continue
   997  		}
   998  		secret, err := kl.secretManager.GetSecret(pod.Namespace, secretRef.Name)
   999  		if err != nil {
  1000  			klog.InfoS("Unable to retrieve pull secret, the image pull may not succeed.", "pod", klog.KObj(pod), "secret", klog.KObj(secret), "err", err)
  1001  			failedPullSecrets = append(failedPullSecrets, secretRef.Name)
  1002  			continue
  1003  		}
  1004  
  1005  		pullSecrets = append(pullSecrets, *secret)
  1006  	}
  1007  
  1008  	if len(failedPullSecrets) > 0 {
  1009  		kl.recorder.Eventf(pod, v1.EventTypeWarning, "FailedToRetrieveImagePullSecret", "Unable to retrieve some image pull secrets (%s); attempting to pull the image may not succeed.", strings.Join(failedPullSecrets, ", "))
  1010  	}
  1011  
  1012  	return pullSecrets
  1013  }
  1014  
  1015  // PodCouldHaveRunningContainers returns true if the pod with the given UID could still have running
  1016  // containers. This returns false if the pod has not yet been started or the pod is unknown.
  1017  func (kl *Kubelet) PodCouldHaveRunningContainers(pod *v1.Pod) bool {
  1018  	if kl.podWorkers.CouldHaveRunningContainers(pod.UID) {
  1019  		return true
  1020  	}
  1021  
  1022  	// Check if pod might need to unprepare resources before termination
  1023  	// NOTE: This is a temporary solution. This call is here to avoid changing
  1024  	// status manager and its tests.
  1025  	// TODO: extend PodDeletionSafetyProvider interface and implement it
  1026  	// in a separate Kubelet method.
  1027  	if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
  1028  		if kl.containerManager.PodMightNeedToUnprepareResources(pod.UID) {
  1029  			return true
  1030  		}
  1031  	}
  1032  	return false
  1033  }
  1034  
  1035  // PodIsFinished returns true if SyncTerminatedPod is finished, ie.
  1036  // all required node-level resources that a pod was consuming have
  1037  // been reclaimed by the kubelet.
  1038  func (kl *Kubelet) PodIsFinished(pod *v1.Pod) bool {
  1039  	return kl.podWorkers.ShouldPodBeFinished(pod.UID)
  1040  }
  1041  
  1042  // filterOutInactivePods returns pods that are not in a terminal phase
  1043  // or are known to be fully terminated. This method should only be used
  1044  // when the set of pods being filtered is upstream of the pod worker, i.e.
  1045  // the pods the pod manager is aware of.
  1046  func (kl *Kubelet) filterOutInactivePods(pods []*v1.Pod) []*v1.Pod {
  1047  	filteredPods := make([]*v1.Pod, 0, len(pods))
  1048  	for _, p := range pods {
  1049  		// if a pod is fully terminated by UID, it should be excluded from the
  1050  		// list of pods
  1051  		if kl.podWorkers.IsPodKnownTerminated(p.UID) {
  1052  			continue
  1053  		}
  1054  
  1055  		// terminal pods are considered inactive UNLESS they are actively terminating
  1056  		if kl.isAdmittedPodTerminal(p) && !kl.podWorkers.IsPodTerminationRequested(p.UID) {
  1057  			continue
  1058  		}
  1059  
  1060  		filteredPods = append(filteredPods, p)
  1061  	}
  1062  	return filteredPods
  1063  }
  1064  
  1065  // isAdmittedPodTerminal returns true if the provided config source pod is in
  1066  // a terminal phase, or if the Kubelet has already indicated the pod has reached
  1067  // a terminal phase but the config source has not accepted it yet. This method
  1068  // should only be used within the pod configuration loops that notify the pod
  1069  // worker, other components should treat the pod worker as authoritative.
  1070  func (kl *Kubelet) isAdmittedPodTerminal(pod *v1.Pod) bool {
  1071  	// pods are considered inactive if the config source has observed a
  1072  	// terminal phase (if the Kubelet recorded that the pod reached a terminal
  1073  	// phase the pod should never be restarted)
  1074  	if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
  1075  		return true
  1076  	}
  1077  	// a pod that has been marked terminal within the Kubelet is considered
  1078  	// inactive (may have been rejected by Kubelet admission)
  1079  	if status, ok := kl.statusManager.GetPodStatus(pod.UID); ok {
  1080  		if status.Phase == v1.PodSucceeded || status.Phase == v1.PodFailed {
  1081  			return true
  1082  		}
  1083  	}
  1084  	return false
  1085  }
  1086  
  1087  // removeOrphanedPodStatuses removes obsolete entries in podStatus where
  1088  // the pod is no longer considered bound to this node.
  1089  func (kl *Kubelet) removeOrphanedPodStatuses(pods []*v1.Pod, mirrorPods []*v1.Pod) {
  1090  	podUIDs := make(map[types.UID]bool)
  1091  	for _, pod := range pods {
  1092  		podUIDs[pod.UID] = true
  1093  	}
  1094  	for _, pod := range mirrorPods {
  1095  		podUIDs[pod.UID] = true
  1096  	}
  1097  	kl.statusManager.RemoveOrphanedStatuses(podUIDs)
  1098  }
  1099  
  1100  // HandlePodCleanups performs a series of cleanup work, including terminating
  1101  // pod workers, killing unwanted pods, and removing orphaned volumes/pod
  1102  // directories. No config changes are sent to pod workers while this method
  1103  // is executing which means no new pods can appear. After this method completes
  1104  // the desired state of the kubelet should be reconciled with the actual state
  1105  // in the pod worker and other pod-related components.
  1106  //
  1107  // This function is executed by the main sync loop, so it must execute quickly
  1108  // and all nested calls should be asynchronous. Any slow reconciliation actions
  1109  // should be performed by other components (like the volume manager). The duration
  1110  // of this call is the minimum latency for static pods to be restarted if they
  1111  // are updated with a fixed UID (most should use a dynamic UID), and no config
  1112  // updates are delivered to the pod workers while this method is running.
  1113  func (kl *Kubelet) HandlePodCleanups(ctx context.Context) error {
  1114  	// The kubelet lacks checkpointing, so we need to introspect the set of pods
  1115  	// in the cgroup tree prior to inspecting the set of pods in our pod manager.
  1116  	// this ensures our view of the cgroup tree does not mistakenly observe pods
  1117  	// that are added after the fact...
  1118  	var (
  1119  		cgroupPods map[types.UID]cm.CgroupName
  1120  		err        error
  1121  	)
  1122  	if kl.cgroupsPerQOS {
  1123  		pcm := kl.containerManager.NewPodContainerManager()
  1124  		cgroupPods, err = pcm.GetAllPodsFromCgroups()
  1125  		if err != nil {
  1126  			return fmt.Errorf("failed to get list of pods that still exist on cgroup mounts: %v", err)
  1127  		}
  1128  	}
  1129  
  1130  	allPods, mirrorPods, orphanedMirrorPodFullnames := kl.podManager.GetPodsAndMirrorPods()
  1131  
  1132  	// Pod phase progresses monotonically. Once a pod has reached a final state,
  1133  	// it should never leave regardless of the restart policy. The statuses
  1134  	// of such pods should not be changed, and there is no need to sync them.
  1135  	// TODO: the logic here does not handle two cases:
  1136  	//   1. If the containers were removed immediately after they died, kubelet
  1137  	//      may fail to generate correct statuses, let alone filtering correctly.
  1138  	//   2. If kubelet restarted before writing the terminated status for a pod
  1139  	//      to the apiserver, it could still restart the terminated pod (even
  1140  	//      though the pod was not considered terminated by the apiserver).
  1141  	// These two conditions could be alleviated by checkpointing kubelet.
  1142  
  1143  	// Stop the workers for terminated pods not in the config source
  1144  	klog.V(3).InfoS("Clean up pod workers for terminated pods")
  1145  	workingPods := kl.podWorkers.SyncKnownPods(allPods)
  1146  
  1147  	// Reconcile: At this point the pod workers have been pruned to the set of
  1148  	// desired pods. Pods that must be restarted due to UID reuse, or leftover
  1149  	// pods from previous runs, are not known to the pod worker.
  1150  
  1151  	allPodsByUID := make(map[types.UID]*v1.Pod)
  1152  	for _, pod := range allPods {
  1153  		allPodsByUID[pod.UID] = pod
  1154  	}
  1155  
  1156  	// Identify the set of pods that have workers, which should be all pods
  1157  	// from config that are not terminated, as well as any terminating pods
  1158  	// that have already been removed from config. Pods that are terminating
  1159  	// will be added to possiblyRunningPods, to prevent overly aggressive
  1160  	// cleanup of pod cgroups.
  1161  	stringIfTrue := func(t bool) string {
  1162  		if t {
  1163  			return "true"
  1164  		}
  1165  		return ""
  1166  	}
  1167  	runningPods := make(map[types.UID]sets.Empty)
  1168  	possiblyRunningPods := make(map[types.UID]sets.Empty)
  1169  	for uid, sync := range workingPods {
  1170  		switch sync.State {
  1171  		case SyncPod:
  1172  			runningPods[uid] = struct{}{}
  1173  			possiblyRunningPods[uid] = struct{}{}
  1174  		case TerminatingPod:
  1175  			possiblyRunningPods[uid] = struct{}{}
  1176  		default:
  1177  		}
  1178  	}
  1179  
  1180  	// Retrieve the list of running containers from the runtime to perform cleanup.
  1181  	// We need the latest state to avoid delaying restarts of static pods that reuse
  1182  	// a UID.
  1183  	if err := kl.runtimeCache.ForceUpdateIfOlder(ctx, kl.clock.Now()); err != nil {
  1184  		klog.ErrorS(err, "Error listing containers")
  1185  		return err
  1186  	}
  1187  	runningRuntimePods, err := kl.runtimeCache.GetPods(ctx)
  1188  	if err != nil {
  1189  		klog.ErrorS(err, "Error listing containers")
  1190  		return err
  1191  	}
  1192  
  1193  	// Stop probing pods that are not running
  1194  	klog.V(3).InfoS("Clean up probes for terminated pods")
  1195  	kl.probeManager.CleanupPods(possiblyRunningPods)
  1196  
  1197  	// Remove orphaned pod statuses not in the total list of known config pods
  1198  	klog.V(3).InfoS("Clean up orphaned pod statuses")
  1199  	kl.removeOrphanedPodStatuses(allPods, mirrorPods)
  1200  
  1201  	// Remove orphaned pod user namespace allocations (if any).
  1202  	klog.V(3).InfoS("Clean up orphaned pod user namespace allocations")
  1203  	if err = kl.usernsManager.CleanupOrphanedPodUsernsAllocations(allPods, runningRuntimePods); err != nil {
  1204  		klog.ErrorS(err, "Failed cleaning up orphaned pod user namespaces allocations")
  1205  	}
  1206  
  1207  	// Remove orphaned volumes from pods that are known not to have any
  1208  	// containers. Note that we pass all pods (including terminated pods) to
  1209  	// the function, so that we don't remove volumes associated with terminated
  1210  	// but not yet deleted pods.
  1211  	// TODO: this method could more aggressively cleanup terminated pods
  1212  	// in the future (volumes, mount dirs, logs, and containers could all be
  1213  	// better separated)
  1214  	klog.V(3).InfoS("Clean up orphaned pod directories")
  1215  	err = kl.cleanupOrphanedPodDirs(allPods, runningRuntimePods)
  1216  	if err != nil {
  1217  		// We want all cleanup tasks to be run even if one of them failed. So
  1218  		// we just log an error here and continue other cleanup tasks.
  1219  		// This also applies to the other clean up tasks.
  1220  		klog.ErrorS(err, "Failed cleaning up orphaned pod directories")
  1221  	}
  1222  
  1223  	// Remove any orphaned mirror pods (mirror pods are tracked by name via the
  1224  	// pod worker)
  1225  	klog.V(3).InfoS("Clean up orphaned mirror pods")
  1226  	for _, podFullname := range orphanedMirrorPodFullnames {
  1227  		if !kl.podWorkers.IsPodForMirrorPodTerminatingByFullName(podFullname) {
  1228  			_, err := kl.mirrorPodClient.DeleteMirrorPod(podFullname, nil)
  1229  			if err != nil {
  1230  				klog.ErrorS(err, "Encountered error when deleting mirror pod", "podName", podFullname)
  1231  			} else {
  1232  				klog.V(3).InfoS("Deleted mirror pod", "podName", podFullname)
  1233  			}
  1234  		}
  1235  	}
  1236  
  1237  	// After pruning pod workers for terminated pods get the list of active pods for
  1238  	// metrics and to determine restarts.
  1239  	activePods := kl.filterOutInactivePods(allPods)
  1240  	allRegularPods, allStaticPods := splitPodsByStatic(allPods)
  1241  	activeRegularPods, activeStaticPods := splitPodsByStatic(activePods)
  1242  	metrics.DesiredPodCount.WithLabelValues("").Set(float64(len(allRegularPods)))
  1243  	metrics.DesiredPodCount.WithLabelValues("true").Set(float64(len(allStaticPods)))
  1244  	metrics.ActivePodCount.WithLabelValues("").Set(float64(len(activeRegularPods)))
  1245  	metrics.ActivePodCount.WithLabelValues("true").Set(float64(len(activeStaticPods)))
  1246  	metrics.MirrorPodCount.Set(float64(len(mirrorPods)))
  1247  
  1248  	// At this point, the pod worker is aware of which pods are not desired (SyncKnownPods).
  1249  	// We now look through the set of active pods for those that the pod worker is not aware of
  1250  	// and deliver an update. The most common reason a pod is not known is because the pod was
  1251  	// deleted and recreated with the same UID while the pod worker was driving its lifecycle (very
  1252  	// very rare for API pods, common for static pods with fixed UIDs). Containers that may still
  1253  	// be running from a previous execution must be reconciled by the pod worker's sync method.
  1254  	// We must use active pods because that is the set of admitted pods (podManager includes pods
  1255  	// that will never be run, and statusManager tracks already rejected pods).
  1256  	var restartCount, restartCountStatic int
  1257  	for _, desiredPod := range activePods {
  1258  		if _, knownPod := workingPods[desiredPod.UID]; knownPod {
  1259  			continue
  1260  		}
  1261  
  1262  		klog.V(3).InfoS("Pod will be restarted because it is in the desired set and not known to the pod workers (likely due to UID reuse)", "podUID", desiredPod.UID)
  1263  		isStatic := kubetypes.IsStaticPod(desiredPod)
  1264  		pod, mirrorPod, wasMirror := kl.podManager.GetPodAndMirrorPod(desiredPod)
  1265  		if pod == nil || wasMirror {
  1266  			klog.V(2).InfoS("Programmer error, restartable pod was a mirror pod but activePods should never contain a mirror pod", "podUID", desiredPod.UID)
  1267  			continue
  1268  		}
  1269  		kl.podWorkers.UpdatePod(UpdatePodOptions{
  1270  			UpdateType: kubetypes.SyncPodCreate,
  1271  			Pod:        pod,
  1272  			MirrorPod:  mirrorPod,
  1273  		})
  1274  
  1275  		// the desired pod is now known as well
  1276  		workingPods[desiredPod.UID] = PodWorkerSync{State: SyncPod, HasConfig: true, Static: isStatic}
  1277  		if isStatic {
  1278  			// restartable static pods are the normal case
  1279  			restartCountStatic++
  1280  		} else {
  1281  			// almost certainly means shenanigans, as API pods should never have the same UID after being deleted and recreated
  1282  			// unless there is a major API violation
  1283  			restartCount++
  1284  		}
  1285  	}
  1286  	metrics.RestartedPodTotal.WithLabelValues("true").Add(float64(restartCountStatic))
  1287  	metrics.RestartedPodTotal.WithLabelValues("").Add(float64(restartCount))
  1288  
  1289  	// Complete termination of deleted pods that are not runtime pods (don't have
  1290  	// running containers), are terminal, and are not known to pod workers.
  1291  	// An example is pods rejected during kubelet admission that have never
  1292  	// started before (i.e. does not have an orphaned pod).
  1293  	// Adding the pods with SyncPodKill to pod workers allows to proceed with
  1294  	// force-deletion of such pods, yet preventing re-entry of the routine in the
  1295  	// next invocation of HandlePodCleanups.
  1296  	for _, pod := range kl.filterTerminalPodsToDelete(allPods, runningRuntimePods, workingPods) {
  1297  		klog.V(3).InfoS("Handling termination and deletion of the pod to pod workers", "pod", klog.KObj(pod), "podUID", pod.UID)
  1298  		kl.podWorkers.UpdatePod(UpdatePodOptions{
  1299  			UpdateType: kubetypes.SyncPodKill,
  1300  			Pod:        pod,
  1301  		})
  1302  	}
  1303  
  1304  	// Finally, terminate any pods that are observed in the runtime but not present in the list of
  1305  	// known running pods from config. If we do terminate running runtime pods that will happen
  1306  	// asynchronously in the background and those will be processed in the next invocation of
  1307  	// HandlePodCleanups.
  1308  	var orphanCount int
  1309  	for _, runningPod := range runningRuntimePods {
  1310  		// If there are orphaned pod resources in CRI that are unknown to the pod worker, terminate them
  1311  		// now. Since housekeeping is exclusive to other pod worker updates, we know that no pods have
  1312  		// been added to the pod worker in the meantime. Note that pods that are not visible in the runtime
  1313  		// but which were previously known are terminated by SyncKnownPods().
  1314  		_, knownPod := workingPods[runningPod.ID]
  1315  		if !knownPod {
  1316  			one := int64(1)
  1317  			killPodOptions := &KillPodOptions{
  1318  				PodTerminationGracePeriodSecondsOverride: &one,
  1319  			}
  1320  			klog.V(2).InfoS("Clean up containers for orphaned pod we had not seen before", "podUID", runningPod.ID, "killPodOptions", killPodOptions)
  1321  			kl.podWorkers.UpdatePod(UpdatePodOptions{
  1322  				UpdateType:     kubetypes.SyncPodKill,
  1323  				RunningPod:     runningPod,
  1324  				KillPodOptions: killPodOptions,
  1325  			})
  1326  
  1327  			// the running pod is now known as well
  1328  			workingPods[runningPod.ID] = PodWorkerSync{State: TerminatingPod, Orphan: true}
  1329  			orphanCount++
  1330  		}
  1331  	}
  1332  	metrics.OrphanedRuntimePodTotal.Add(float64(orphanCount))
  1333  
  1334  	// Now that we have recorded any terminating pods, and added new pods that should be running,
  1335  	// record a summary here. Not all possible combinations of PodWorkerSync values are valid.
  1336  	counts := make(map[PodWorkerSync]int)
  1337  	for _, sync := range workingPods {
  1338  		counts[sync]++
  1339  	}
  1340  	for validSync, configState := range map[PodWorkerSync]string{
  1341  		{HasConfig: true, Static: true}:                "desired",
  1342  		{HasConfig: true, Static: false}:               "desired",
  1343  		{Orphan: true, HasConfig: true, Static: true}:  "orphan",
  1344  		{Orphan: true, HasConfig: true, Static: false}: "orphan",
  1345  		{Orphan: true, HasConfig: false}:               "runtime_only",
  1346  	} {
  1347  		for _, state := range []PodWorkerState{SyncPod, TerminatingPod, TerminatedPod} {
  1348  			validSync.State = state
  1349  			count := counts[validSync]
  1350  			delete(counts, validSync)
  1351  			staticString := stringIfTrue(validSync.Static)
  1352  			if !validSync.HasConfig {
  1353  				staticString = "unknown"
  1354  			}
  1355  			metrics.WorkingPodCount.WithLabelValues(state.String(), configState, staticString).Set(float64(count))
  1356  		}
  1357  	}
  1358  	if len(counts) > 0 {
  1359  		// in case a combination is lost
  1360  		klog.V(3).InfoS("Programmer error, did not report a kubelet_working_pods metric for a value returned by SyncKnownPods", "counts", counts)
  1361  	}
  1362  
  1363  	// Remove any cgroups in the hierarchy for pods that are definitely no longer
  1364  	// running (not in the container runtime).
  1365  	if kl.cgroupsPerQOS {
  1366  		pcm := kl.containerManager.NewPodContainerManager()
  1367  		klog.V(3).InfoS("Clean up orphaned pod cgroups")
  1368  		kl.cleanupOrphanedPodCgroups(pcm, cgroupPods, possiblyRunningPods)
  1369  	}
  1370  
  1371  	// Cleanup any backoff entries.
  1372  	kl.backOff.GC()
  1373  	return nil
  1374  }
  1375  
  1376  // filterTerminalPodsToDelete returns terminal pods which are ready to be
  1377  // deleted by the status manager, but are not in pod workers.
  1378  // First, the check for deletionTimestamp is a performance optimization as we
  1379  // don't need to do anything with terminal pods without deletionTimestamp.
  1380  // Second, the check for terminal pods is to avoid race conditions of triggering
  1381  // deletion on Pending pods which are not yet added to pod workers.
  1382  // Third, the check to skip pods known to pod workers is that the lifecycle of
  1383  // such pods is already handled by pod workers.
  1384  // Finally, we skip runtime pods as their termination is handled separately in
  1385  // the HandlePodCleanups routine.
  1386  func (kl *Kubelet) filterTerminalPodsToDelete(allPods []*v1.Pod, runningRuntimePods []*kubecontainer.Pod, workingPods map[types.UID]PodWorkerSync) map[types.UID]*v1.Pod {
  1387  	terminalPodsToDelete := make(map[types.UID]*v1.Pod)
  1388  	for _, pod := range allPods {
  1389  		if pod.DeletionTimestamp == nil {
  1390  			// skip pods which don't have a deletion timestamp
  1391  			continue
  1392  		}
  1393  		if !podutil.IsPodPhaseTerminal(pod.Status.Phase) {
  1394  			// skip the non-terminal pods
  1395  			continue
  1396  		}
  1397  		if _, knownPod := workingPods[pod.UID]; knownPod {
  1398  			// skip pods known to pod workers
  1399  			continue
  1400  		}
  1401  		terminalPodsToDelete[pod.UID] = pod
  1402  	}
  1403  	for _, runningRuntimePod := range runningRuntimePods {
  1404  		// skip running runtime pods - they are handled by a dedicated routine
  1405  		// which terminates the containers
  1406  		delete(terminalPodsToDelete, runningRuntimePod.ID)
  1407  	}
  1408  	return terminalPodsToDelete
  1409  }
  1410  
  1411  // splitPodsByStatic separates a list of desired pods from the pod manager into
  1412  // regular or static pods. Mirror pods are not valid config sources (a mirror pod
  1413  // being created cannot cause the Kubelet to start running a static pod) and are
  1414  // excluded.
  1415  func splitPodsByStatic(pods []*v1.Pod) (regular, static []*v1.Pod) {
  1416  	regular, static = make([]*v1.Pod, 0, len(pods)), make([]*v1.Pod, 0, len(pods))
  1417  	for _, pod := range pods {
  1418  		if kubetypes.IsMirrorPod(pod) {
  1419  			continue
  1420  		}
  1421  		if kubetypes.IsStaticPod(pod) {
  1422  			static = append(static, pod)
  1423  		} else {
  1424  			regular = append(regular, pod)
  1425  		}
  1426  	}
  1427  	return regular, static
  1428  }
  1429  
  1430  // validateContainerLogStatus returns the container ID for the desired container to retrieve logs for, based on the state
  1431  // of the container. The previous flag will only return the logs for the last terminated container, otherwise, the current
  1432  // running container is preferred over a previous termination. If info about the container is not available then a specific
  1433  // error is returned to the end user.
  1434  func (kl *Kubelet) validateContainerLogStatus(podName string, podStatus *v1.PodStatus, containerName string, previous bool) (containerID kubecontainer.ContainerID, err error) {
  1435  	var cID string
  1436  
  1437  	cStatus, found := podutil.GetContainerStatus(podStatus.ContainerStatuses, containerName)
  1438  	if !found {
  1439  		cStatus, found = podutil.GetContainerStatus(podStatus.InitContainerStatuses, containerName)
  1440  	}
  1441  	if !found {
  1442  		cStatus, found = podutil.GetContainerStatus(podStatus.EphemeralContainerStatuses, containerName)
  1443  	}
  1444  	if !found {
  1445  		return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is not available", containerName, podName)
  1446  	}
  1447  	lastState := cStatus.LastTerminationState
  1448  	waiting, running, terminated := cStatus.State.Waiting, cStatus.State.Running, cStatus.State.Terminated
  1449  
  1450  	switch {
  1451  	case previous:
  1452  		if lastState.Terminated == nil || lastState.Terminated.ContainerID == "" {
  1453  			return kubecontainer.ContainerID{}, fmt.Errorf("previous terminated container %q in pod %q not found", containerName, podName)
  1454  		}
  1455  		cID = lastState.Terminated.ContainerID
  1456  
  1457  	case running != nil:
  1458  		cID = cStatus.ContainerID
  1459  
  1460  	case terminated != nil:
  1461  		// in cases where the next container didn't start, terminated.ContainerID will be empty, so get logs from the lastState.Terminated.
  1462  		if terminated.ContainerID == "" {
  1463  			if lastState.Terminated != nil && lastState.Terminated.ContainerID != "" {
  1464  				cID = lastState.Terminated.ContainerID
  1465  			} else {
  1466  				return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is terminated", containerName, podName)
  1467  			}
  1468  		} else {
  1469  			cID = terminated.ContainerID
  1470  		}
  1471  
  1472  	case lastState.Terminated != nil:
  1473  		if lastState.Terminated.ContainerID == "" {
  1474  			return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is terminated", containerName, podName)
  1475  		}
  1476  		cID = lastState.Terminated.ContainerID
  1477  
  1478  	case waiting != nil:
  1479  		// output some info for the most common pending failures
  1480  		switch reason := waiting.Reason; reason {
  1481  		case images.ErrImagePull.Error():
  1482  			return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: image can't be pulled", containerName, podName)
  1483  		case images.ErrImagePullBackOff.Error():
  1484  			return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: trying and failing to pull image", containerName, podName)
  1485  		default:
  1486  			return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: %v", containerName, podName, reason)
  1487  		}
  1488  	default:
  1489  		// unrecognized state
  1490  		return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start - no logs yet", containerName, podName)
  1491  	}
  1492  
  1493  	return kubecontainer.ParseContainerID(cID), nil
  1494  }
  1495  
  1496  // GetKubeletContainerLogs returns logs from the container
  1497  // TODO: this method is returning logs of random container attempts, when it should be returning the most recent attempt
  1498  // or all of them.
  1499  func (kl *Kubelet) GetKubeletContainerLogs(ctx context.Context, podFullName, containerName string, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) error {
  1500  	// Pod workers periodically write status to statusManager. If status is not
  1501  	// cached there, something is wrong (or kubelet just restarted and hasn't
  1502  	// caught up yet). Just assume the pod is not ready yet.
  1503  	name, namespace, err := kubecontainer.ParsePodFullName(podFullName)
  1504  	if err != nil {
  1505  		return fmt.Errorf("unable to parse pod full name %q: %v", podFullName, err)
  1506  	}
  1507  
  1508  	pod, ok := kl.GetPodByName(namespace, name)
  1509  	if !ok {
  1510  		return fmt.Errorf("pod %q cannot be found - no logs available", name)
  1511  	}
  1512  
  1513  	// TODO: this should be using the podWorker's pod store as authoritative, since
  1514  	// the mirrorPod might still exist, the pod may have been force deleted but
  1515  	// is still terminating (users should be able to view logs of force deleted static pods
  1516  	// based on full name).
  1517  	var podUID types.UID
  1518  	pod, mirrorPod, wasMirror := kl.podManager.GetPodAndMirrorPod(pod)
  1519  	if wasMirror {
  1520  		if pod == nil {
  1521  			return fmt.Errorf("mirror pod %q does not have a corresponding pod", name)
  1522  		}
  1523  		podUID = mirrorPod.UID
  1524  	} else {
  1525  		podUID = pod.UID
  1526  	}
  1527  
  1528  	podStatus, found := kl.statusManager.GetPodStatus(podUID)
  1529  	if !found {
  1530  		// If there is no cached status, use the status from the
  1531  		// config source (apiserver). This is useful if kubelet
  1532  		// has recently been restarted.
  1533  		podStatus = pod.Status
  1534  	}
  1535  
  1536  	// TODO: Consolidate the logic here with kuberuntime.GetContainerLogs, here we convert container name to containerID,
  1537  	// but inside kuberuntime we convert container id back to container name and restart count.
  1538  	// TODO: After separate container log lifecycle management, we should get log based on the existing log files
  1539  	// instead of container status.
  1540  	containerID, err := kl.validateContainerLogStatus(pod.Name, &podStatus, containerName, logOptions.Previous)
  1541  	if err != nil {
  1542  		return err
  1543  	}
  1544  
  1545  	// Do a zero-byte write to stdout before handing off to the container runtime.
  1546  	// This ensures at least one Write call is made to the writer when copying starts,
  1547  	// even if we then block waiting for log output from the container.
  1548  	if _, err := stdout.Write([]byte{}); err != nil {
  1549  		return err
  1550  	}
  1551  
  1552  	return kl.containerRuntime.GetContainerLogs(ctx, pod, containerID, logOptions, stdout, stderr)
  1553  }
  1554  
  1555  // getPhase returns the phase of a pod given its container info.
  1556  func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.PodPhase {
  1557  	spec := pod.Spec
  1558  	pendingInitialization := 0
  1559  	failedInitialization := 0
  1560  
  1561  	// regular init containers
  1562  	for _, container := range spec.InitContainers {
  1563  		if kubetypes.IsRestartableInitContainer(&container) {
  1564  			// Skip the restartable init containers here to handle them separately as
  1565  			// they are slightly different from the init containers in terms of the
  1566  			// pod phase.
  1567  			continue
  1568  		}
  1569  
  1570  		containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
  1571  		if !ok {
  1572  			pendingInitialization++
  1573  			continue
  1574  		}
  1575  
  1576  		switch {
  1577  		case containerStatus.State.Running != nil:
  1578  			pendingInitialization++
  1579  		case containerStatus.State.Terminated != nil:
  1580  			if containerStatus.State.Terminated.ExitCode != 0 {
  1581  				failedInitialization++
  1582  			}
  1583  		case containerStatus.State.Waiting != nil:
  1584  			if containerStatus.LastTerminationState.Terminated != nil {
  1585  				if containerStatus.LastTerminationState.Terminated.ExitCode != 0 {
  1586  					failedInitialization++
  1587  				}
  1588  			} else {
  1589  				pendingInitialization++
  1590  			}
  1591  		default:
  1592  			pendingInitialization++
  1593  		}
  1594  	}
  1595  
  1596  	// counters for restartable init and regular containers
  1597  	unknown := 0
  1598  	running := 0
  1599  	waiting := 0
  1600  	stopped := 0
  1601  	succeeded := 0
  1602  
  1603  	// restartable init containers
  1604  	for _, container := range spec.InitContainers {
  1605  		if !kubetypes.IsRestartableInitContainer(&container) {
  1606  			// Skip the regular init containers, as they have been handled above.
  1607  			continue
  1608  		}
  1609  		containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
  1610  		if !ok {
  1611  			unknown++
  1612  			continue
  1613  		}
  1614  
  1615  		switch {
  1616  		case containerStatus.State.Running != nil:
  1617  			if containerStatus.Started == nil || !*containerStatus.Started {
  1618  				pendingInitialization++
  1619  			}
  1620  			running++
  1621  		case containerStatus.State.Terminated != nil:
  1622  			// Do nothing here, as terminated restartable init containers are not
  1623  			// taken into account for the pod phase.
  1624  		case containerStatus.State.Waiting != nil:
  1625  			if containerStatus.LastTerminationState.Terminated != nil {
  1626  				// Do nothing here, as terminated restartable init containers are not
  1627  				// taken into account for the pod phase.
  1628  			} else {
  1629  				pendingInitialization++
  1630  				waiting++
  1631  			}
  1632  		default:
  1633  			pendingInitialization++
  1634  			unknown++
  1635  		}
  1636  	}
  1637  
  1638  	for _, container := range spec.Containers {
  1639  		containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
  1640  		if !ok {
  1641  			unknown++
  1642  			continue
  1643  		}
  1644  
  1645  		switch {
  1646  		case containerStatus.State.Running != nil:
  1647  			running++
  1648  		case containerStatus.State.Terminated != nil:
  1649  			stopped++
  1650  			if containerStatus.State.Terminated.ExitCode == 0 {
  1651  				succeeded++
  1652  			}
  1653  		case containerStatus.State.Waiting != nil:
  1654  			if containerStatus.LastTerminationState.Terminated != nil {
  1655  				stopped++
  1656  			} else {
  1657  				waiting++
  1658  			}
  1659  		default:
  1660  			unknown++
  1661  		}
  1662  	}
  1663  
  1664  	if failedInitialization > 0 && spec.RestartPolicy == v1.RestartPolicyNever {
  1665  		return v1.PodFailed
  1666  	}
  1667  
  1668  	switch {
  1669  	case pendingInitialization > 0 &&
  1670  		// This is needed to handle the case where the pod has been initialized but
  1671  		// the restartable init containers are restarting and the pod should not be
  1672  		// placed back into v1.PodPending since the regular containers have run.
  1673  		!kubecontainer.HasAnyRegularContainerStarted(&spec, info):
  1674  		fallthrough
  1675  	case waiting > 0:
  1676  		klog.V(5).InfoS("Pod waiting > 0, pending")
  1677  		// One or more containers has not been started
  1678  		return v1.PodPending
  1679  	case running > 0 && unknown == 0:
  1680  		// All containers have been started, and at least
  1681  		// one container is running
  1682  		return v1.PodRunning
  1683  	case running == 0 && stopped > 0 && unknown == 0:
  1684  		// The pod is terminal so its containers won't be restarted regardless
  1685  		// of the restart policy.
  1686  		if podIsTerminal {
  1687  			// TODO(#116484): Also assign terminal phase to static pods.
  1688  			if !kubetypes.IsStaticPod(pod) {
  1689  				// All regular containers are terminated in success and all restartable
  1690  				// init containers are stopped.
  1691  				if stopped == succeeded {
  1692  					return v1.PodSucceeded
  1693  				}
  1694  				// There is at least one failure
  1695  				return v1.PodFailed
  1696  			}
  1697  		}
  1698  		// All containers are terminated
  1699  		if spec.RestartPolicy == v1.RestartPolicyAlways {
  1700  			// All containers are in the process of restarting
  1701  			return v1.PodRunning
  1702  		}
  1703  		if stopped == succeeded {
  1704  			// RestartPolicy is not Always, all containers are terminated in success
  1705  			// and all restartable init containers are stopped.
  1706  			return v1.PodSucceeded
  1707  		}
  1708  		if spec.RestartPolicy == v1.RestartPolicyNever {
  1709  			// RestartPolicy is Never, and all containers are
  1710  			// terminated with at least one in failure
  1711  			return v1.PodFailed
  1712  		}
  1713  		// RestartPolicy is OnFailure, and at least one in failure
  1714  		// and in the process of restarting
  1715  		return v1.PodRunning
  1716  	default:
  1717  		klog.V(5).InfoS("Pod default case, pending")
  1718  		return v1.PodPending
  1719  	}
  1720  }
  1721  
  1722  func deleteCustomResourceFromResourceRequirements(target *v1.ResourceRequirements) {
  1723  	for resource := range target.Limits {
  1724  		if resource != v1.ResourceCPU && resource != v1.ResourceMemory && resource != v1.ResourceEphemeralStorage {
  1725  			delete(target.Limits, resource)
  1726  		}
  1727  	}
  1728  	for resource := range target.Requests {
  1729  		if resource != v1.ResourceCPU && resource != v1.ResourceMemory && resource != v1.ResourceEphemeralStorage {
  1730  			delete(target.Requests, resource)
  1731  		}
  1732  	}
  1733  }
  1734  
  1735  func (kl *Kubelet) determinePodResizeStatus(pod *v1.Pod, podStatus *v1.PodStatus) v1.PodResizeStatus {
  1736  	var podResizeStatus v1.PodResizeStatus
  1737  	specStatusDiffer := false
  1738  	for _, c := range pod.Spec.Containers {
  1739  		if cs, ok := podutil.GetContainerStatus(podStatus.ContainerStatuses, c.Name); ok {
  1740  			cResourceCopy := c.Resources.DeepCopy()
  1741  			// for both requests and limits, we only compare the cpu, memory and ephemeralstorage
  1742  			// which are included in convertToAPIContainerStatuses
  1743  			deleteCustomResourceFromResourceRequirements(cResourceCopy)
  1744  			csResourceCopy := cs.Resources.DeepCopy()
  1745  			if csResourceCopy != nil && !cmp.Equal(*cResourceCopy, *csResourceCopy) {
  1746  				specStatusDiffer = true
  1747  				break
  1748  			}
  1749  		}
  1750  	}
  1751  	if !specStatusDiffer {
  1752  		// Clear last resize state from checkpoint
  1753  		if err := kl.statusManager.SetPodResizeStatus(pod.UID, ""); err != nil {
  1754  			klog.ErrorS(err, "SetPodResizeStatus failed", "pod", pod.Name)
  1755  		}
  1756  	} else {
  1757  		if resizeStatus, found := kl.statusManager.GetPodResizeStatus(string(pod.UID)); found {
  1758  			podResizeStatus = resizeStatus
  1759  		}
  1760  	}
  1761  	return podResizeStatus
  1762  }
  1763  
  1764  // generateAPIPodStatus creates the final API pod status for a pod, given the
  1765  // internal pod status. This method should only be called from within sync*Pod methods.
  1766  func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podIsTerminal bool) v1.PodStatus {
  1767  	klog.V(3).InfoS("Generating pod status", "podIsTerminal", podIsTerminal, "pod", klog.KObj(pod))
  1768  	// use the previous pod status, or the api status, as the basis for this pod
  1769  	oldPodStatus, found := kl.statusManager.GetPodStatus(pod.UID)
  1770  	if !found {
  1771  		oldPodStatus = pod.Status
  1772  	}
  1773  	s := kl.convertStatusToAPIStatus(pod, podStatus, oldPodStatus)
  1774  	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
  1775  		s.Resize = kl.determinePodResizeStatus(pod, s)
  1776  	}
  1777  	// calculate the next phase and preserve reason
  1778  	allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...)
  1779  	s.Phase = getPhase(pod, allStatus, podIsTerminal)
  1780  	klog.V(4).InfoS("Got phase for pod", "pod", klog.KObj(pod), "oldPhase", oldPodStatus.Phase, "phase", s.Phase)
  1781  
  1782  	// Perform a three-way merge between the statuses from the status manager,
  1783  	// runtime, and generated status to ensure terminal status is correctly set.
  1784  	if s.Phase != v1.PodFailed && s.Phase != v1.PodSucceeded {
  1785  		switch {
  1786  		case oldPodStatus.Phase == v1.PodFailed || oldPodStatus.Phase == v1.PodSucceeded:
  1787  			klog.V(4).InfoS("Status manager phase was terminal, updating phase to match", "pod", klog.KObj(pod), "phase", oldPodStatus.Phase)
  1788  			s.Phase = oldPodStatus.Phase
  1789  		case pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded:
  1790  			klog.V(4).InfoS("API phase was terminal, updating phase to match", "pod", klog.KObj(pod), "phase", pod.Status.Phase)
  1791  			s.Phase = pod.Status.Phase
  1792  		}
  1793  	}
  1794  
  1795  	if s.Phase == oldPodStatus.Phase {
  1796  		// preserve the reason and message which is associated with the phase
  1797  		s.Reason = oldPodStatus.Reason
  1798  		s.Message = oldPodStatus.Message
  1799  		if len(s.Reason) == 0 {
  1800  			s.Reason = pod.Status.Reason
  1801  		}
  1802  		if len(s.Message) == 0 {
  1803  			s.Message = pod.Status.Message
  1804  		}
  1805  	}
  1806  
  1807  	// check if an internal module has requested the pod is evicted and override the reason and message
  1808  	for _, podSyncHandler := range kl.PodSyncHandlers {
  1809  		if result := podSyncHandler.ShouldEvict(pod); result.Evict {
  1810  			s.Phase = v1.PodFailed
  1811  			s.Reason = result.Reason
  1812  			s.Message = result.Message
  1813  			break
  1814  		}
  1815  	}
  1816  
  1817  	// pods are not allowed to transition out of terminal phases
  1818  	if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
  1819  		// API server shows terminal phase; transitions are not allowed
  1820  		if s.Phase != pod.Status.Phase {
  1821  			klog.ErrorS(nil, "Pod attempted illegal phase transition", "pod", klog.KObj(pod), "originalStatusPhase", pod.Status.Phase, "apiStatusPhase", s.Phase, "apiStatus", s)
  1822  			// Force back to phase from the API server
  1823  			s.Phase = pod.Status.Phase
  1824  		}
  1825  	}
  1826  
  1827  	// ensure the probe managers have up to date status for containers
  1828  	kl.probeManager.UpdatePodStatus(pod, s)
  1829  
  1830  	// preserve all conditions not owned by the kubelet
  1831  	s.Conditions = make([]v1.PodCondition, 0, len(pod.Status.Conditions)+1)
  1832  	for _, c := range pod.Status.Conditions {
  1833  		if !kubetypes.PodConditionByKubelet(c.Type) {
  1834  			s.Conditions = append(s.Conditions, c)
  1835  		}
  1836  	}
  1837  
  1838  	if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
  1839  		// copy over the pod disruption conditions from state which is already
  1840  		// updated during the eviciton (due to either node resource pressure or
  1841  		// node graceful shutdown). We do not re-generate the conditions based
  1842  		// on the container statuses as they are added based on one-time events.
  1843  		cType := v1.DisruptionTarget
  1844  		if _, condition := podutil.GetPodConditionFromList(oldPodStatus.Conditions, cType); condition != nil {
  1845  			s.Conditions = utilpod.ReplaceOrAppendPodCondition(s.Conditions, condition)
  1846  		}
  1847  	}
  1848  
  1849  	// set all Kubelet-owned conditions
  1850  	if utilfeature.DefaultFeatureGate.Enabled(features.PodReadyToStartContainersCondition) {
  1851  		s.Conditions = append(s.Conditions, status.GeneratePodReadyToStartContainersCondition(pod, podStatus))
  1852  	}
  1853  	allContainerStatuses := append(s.InitContainerStatuses, s.ContainerStatuses...)
  1854  	s.Conditions = append(s.Conditions, status.GeneratePodInitializedCondition(&pod.Spec, allContainerStatuses, s.Phase))
  1855  	s.Conditions = append(s.Conditions, status.GeneratePodReadyCondition(&pod.Spec, s.Conditions, allContainerStatuses, s.Phase))
  1856  	s.Conditions = append(s.Conditions, status.GenerateContainersReadyCondition(&pod.Spec, allContainerStatuses, s.Phase))
  1857  	s.Conditions = append(s.Conditions, v1.PodCondition{
  1858  		Type:   v1.PodScheduled,
  1859  		Status: v1.ConditionTrue,
  1860  	})
  1861  	// set HostIP/HostIPs and initialize PodIP/PodIPs for host network pods
  1862  	if kl.kubeClient != nil {
  1863  		hostIPs, err := kl.getHostIPsAnyWay()
  1864  		if err != nil {
  1865  			klog.V(4).InfoS("Cannot get host IPs", "err", err)
  1866  		} else {
  1867  			if s.HostIP != "" {
  1868  				if utilnet.IPFamilyOfString(s.HostIP) != utilnet.IPFamilyOf(hostIPs[0]) {
  1869  					kl.recorder.Eventf(pod, v1.EventTypeWarning, "HostIPsIPFamilyMismatch",
  1870  						"Kubelet detected an IPv%s node IP (%s), but the cloud provider selected an IPv%s node IP (%s); pass an explicit `--node-ip` to kubelet to fix this.",
  1871  						utilnet.IPFamilyOfString(s.HostIP), s.HostIP, utilnet.IPFamilyOf(hostIPs[0]), hostIPs[0].String())
  1872  				}
  1873  			}
  1874  			s.HostIP = hostIPs[0].String()
  1875  			if utilfeature.DefaultFeatureGate.Enabled(features.PodHostIPs) {
  1876  				s.HostIPs = []v1.HostIP{{IP: s.HostIP}}
  1877  				if len(hostIPs) == 2 {
  1878  					s.HostIPs = append(s.HostIPs, v1.HostIP{IP: hostIPs[1].String()})
  1879  				}
  1880  			}
  1881  
  1882  			// HostNetwork Pods inherit the node IPs as PodIPs. They are immutable once set,
  1883  			// other than that if the node becomes dual-stack, we add the secondary IP.
  1884  			if kubecontainer.IsHostNetworkPod(pod) {
  1885  				// Primary IP is not set
  1886  				if s.PodIP == "" {
  1887  					s.PodIP = hostIPs[0].String()
  1888  					s.PodIPs = []v1.PodIP{{IP: s.PodIP}}
  1889  				}
  1890  				// Secondary IP is not set #105320
  1891  				if len(hostIPs) == 2 && len(s.PodIPs) == 1 {
  1892  					if utilnet.IPFamilyOfString(s.PodIPs[0].IP) != utilnet.IPFamilyOf(hostIPs[1]) {
  1893  						s.PodIPs = append(s.PodIPs, v1.PodIP{IP: hostIPs[1].String()})
  1894  					}
  1895  				}
  1896  			}
  1897  		}
  1898  	}
  1899  
  1900  	return *s
  1901  }
  1902  
  1903  // sortPodIPs return the PodIPs sorted and truncated by the cluster IP family preference.
  1904  // The runtime pod status may have an arbitrary number of IPs, in an arbitrary order.
  1905  // PodIPs are obtained by: func (m *kubeGenericRuntimeManager) determinePodSandboxIPs()
  1906  // Pick out the first returned IP of the same IP family as the node IP
  1907  // first, followed by the first IP of the opposite IP family (if any)
  1908  // and use them for the Pod.Status.PodIPs and the Downward API environment variables
  1909  func (kl *Kubelet) sortPodIPs(podIPs []string) []string {
  1910  	ips := make([]string, 0, 2)
  1911  	var validPrimaryIP, validSecondaryIP func(ip string) bool
  1912  	if len(kl.nodeIPs) == 0 || utilnet.IsIPv4(kl.nodeIPs[0]) {
  1913  		validPrimaryIP = utilnet.IsIPv4String
  1914  		validSecondaryIP = utilnet.IsIPv6String
  1915  	} else {
  1916  		validPrimaryIP = utilnet.IsIPv6String
  1917  		validSecondaryIP = utilnet.IsIPv4String
  1918  	}
  1919  	for _, ip := range podIPs {
  1920  		if validPrimaryIP(ip) {
  1921  			ips = append(ips, ip)
  1922  			break
  1923  		}
  1924  	}
  1925  	for _, ip := range podIPs {
  1926  		if validSecondaryIP(ip) {
  1927  			ips = append(ips, ip)
  1928  			break
  1929  		}
  1930  	}
  1931  	return ips
  1932  }
  1933  
  1934  // convertStatusToAPIStatus initialize an api PodStatus for the given pod from
  1935  // the given internal pod status and the previous state of the pod from the API.
  1936  // It is purely transformative and does not alter the kubelet state at all.
  1937  func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus, oldPodStatus v1.PodStatus) *v1.PodStatus {
  1938  	var apiPodStatus v1.PodStatus
  1939  
  1940  	// copy pod status IPs to avoid race conditions with PodStatus #102806
  1941  	podIPs := make([]string, len(podStatus.IPs))
  1942  	copy(podIPs, podStatus.IPs)
  1943  
  1944  	// make podIPs order match node IP family preference #97979
  1945  	podIPs = kl.sortPodIPs(podIPs)
  1946  	for _, ip := range podIPs {
  1947  		apiPodStatus.PodIPs = append(apiPodStatus.PodIPs, v1.PodIP{IP: ip})
  1948  	}
  1949  	if len(apiPodStatus.PodIPs) > 0 {
  1950  		apiPodStatus.PodIP = apiPodStatus.PodIPs[0].IP
  1951  	}
  1952  
  1953  	// set status for Pods created on versions of kube older than 1.6
  1954  	apiPodStatus.QOSClass = v1qos.GetPodQOS(pod)
  1955  
  1956  	apiPodStatus.ContainerStatuses = kl.convertToAPIContainerStatuses(
  1957  		pod, podStatus,
  1958  		oldPodStatus.ContainerStatuses,
  1959  		pod.Spec.Containers,
  1960  		len(pod.Spec.InitContainers) > 0,
  1961  		false,
  1962  	)
  1963  	apiPodStatus.InitContainerStatuses = kl.convertToAPIContainerStatuses(
  1964  		pod, podStatus,
  1965  		oldPodStatus.InitContainerStatuses,
  1966  		pod.Spec.InitContainers,
  1967  		len(pod.Spec.InitContainers) > 0,
  1968  		true,
  1969  	)
  1970  	var ecSpecs []v1.Container
  1971  	for i := range pod.Spec.EphemeralContainers {
  1972  		ecSpecs = append(ecSpecs, v1.Container(pod.Spec.EphemeralContainers[i].EphemeralContainerCommon))
  1973  	}
  1974  
  1975  	// #80875: By now we've iterated podStatus 3 times. We could refactor this to make a single
  1976  	// pass through podStatus.ContainerStatuses
  1977  	apiPodStatus.EphemeralContainerStatuses = kl.convertToAPIContainerStatuses(
  1978  		pod, podStatus,
  1979  		oldPodStatus.EphemeralContainerStatuses,
  1980  		ecSpecs,
  1981  		len(pod.Spec.InitContainers) > 0,
  1982  		false,
  1983  	)
  1984  
  1985  	return &apiPodStatus
  1986  }
  1987  
  1988  // convertToAPIContainerStatuses converts the given internal container
  1989  // statuses into API container statuses.
  1990  func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecontainer.PodStatus, previousStatus []v1.ContainerStatus, containers []v1.Container, hasInitContainers, isInitContainer bool) []v1.ContainerStatus {
  1991  	convertContainerStatus := func(cs *kubecontainer.Status, oldStatus *v1.ContainerStatus) *v1.ContainerStatus {
  1992  		cid := cs.ID.String()
  1993  		status := &v1.ContainerStatus{
  1994  			Name:         cs.Name,
  1995  			RestartCount: int32(cs.RestartCount),
  1996  			Image:        cs.Image,
  1997  			// Converting the digested image ref to the Kubernetes public
  1998  			// ContainerStatus.ImageID is historically intentional and should
  1999  			// not change.
  2000  			ImageID:     cs.ImageRef,
  2001  			ContainerID: cid,
  2002  		}
  2003  		if oldStatus != nil {
  2004  			status.VolumeMounts = oldStatus.VolumeMounts // immutable
  2005  		}
  2006  		switch {
  2007  		case cs.State == kubecontainer.ContainerStateRunning:
  2008  			status.State.Running = &v1.ContainerStateRunning{StartedAt: metav1.NewTime(cs.StartedAt)}
  2009  		case cs.State == kubecontainer.ContainerStateCreated:
  2010  			// containers that are created but not running are "waiting to be running"
  2011  			status.State.Waiting = &v1.ContainerStateWaiting{}
  2012  		case cs.State == kubecontainer.ContainerStateExited:
  2013  			status.State.Terminated = &v1.ContainerStateTerminated{
  2014  				ExitCode:    int32(cs.ExitCode),
  2015  				Reason:      cs.Reason,
  2016  				Message:     cs.Message,
  2017  				StartedAt:   metav1.NewTime(cs.StartedAt),
  2018  				FinishedAt:  metav1.NewTime(cs.FinishedAt),
  2019  				ContainerID: cid,
  2020  			}
  2021  
  2022  		case cs.State == kubecontainer.ContainerStateUnknown &&
  2023  			oldStatus != nil && // we have an old status
  2024  			oldStatus.State.Running != nil: // our previous status was running
  2025  			// if this happens, then we know that this container was previously running and isn't anymore (assuming the CRI isn't failing to return running containers).
  2026  			// you can imagine this happening in cases where a container failed and the kubelet didn't ask about it in time to see the result.
  2027  			// in this case, the container should not to into waiting state immediately because that can make cases like runonce pods actually run
  2028  			// twice. "container never ran" is different than "container ran and failed".  This is handled differently in the kubelet
  2029  			// and it is handled differently in higher order logic like crashloop detection and handling
  2030  			status.State.Terminated = &v1.ContainerStateTerminated{
  2031  				Reason:   "ContainerStatusUnknown",
  2032  				Message:  "The container could not be located when the pod was terminated",
  2033  				ExitCode: 137, // this code indicates an error
  2034  			}
  2035  			// the restart count normally comes from the CRI (see near the top of this method), but since this is being added explicitly
  2036  			// for the case where the CRI did not return a status, we need to manually increment the restart count to be accurate.
  2037  			status.RestartCount = oldStatus.RestartCount + 1
  2038  
  2039  		default:
  2040  			// this collapses any unknown state to container waiting.  If any container is waiting, then the pod status moves to pending even if it is running.
  2041  			// if I'm reading this correctly, then any failure to read status on any container results in the entire pod going pending even if the containers
  2042  			// are actually running.
  2043  			// see https://github.com/kubernetes/kubernetes/blob/5d1b3e26af73dde33ecb6a3e69fb5876ceab192f/pkg/kubelet/kuberuntime/kuberuntime_container.go#L497 to
  2044  			// https://github.com/kubernetes/kubernetes/blob/8976e3620f8963e72084971d9d4decbd026bf49f/pkg/kubelet/kuberuntime/helpers.go#L58-L71
  2045  			// and interpreted here https://github.com/kubernetes/kubernetes/blob/b27e78f590a0d43e4a23ca3b2bf1739ca4c6e109/pkg/kubelet/kubelet_pods.go#L1434-L1439
  2046  			status.State.Waiting = &v1.ContainerStateWaiting{}
  2047  		}
  2048  		return status
  2049  	}
  2050  
  2051  	convertContainerStatusResources := func(cName string, status *v1.ContainerStatus, cStatus *kubecontainer.Status, oldStatuses map[string]v1.ContainerStatus) *v1.ResourceRequirements {
  2052  		var requests, limits v1.ResourceList
  2053  		// oldStatus should always exist if container is running
  2054  		oldStatus, oldStatusFound := oldStatuses[cName]
  2055  		// Initialize limits/requests from container's spec upon transition to Running state
  2056  		// For cpu & memory, values queried from runtime via CRI always supercedes spec values
  2057  		// For ephemeral-storage, a running container's status.limit/request equals spec.limit/request
  2058  		determineResource := func(rName v1.ResourceName, v1ContainerResource, oldStatusResource, resource v1.ResourceList) {
  2059  			if oldStatusFound {
  2060  				if oldStatus.State.Running == nil || status.ContainerID != oldStatus.ContainerID {
  2061  					if r, exists := v1ContainerResource[rName]; exists {
  2062  						resource[rName] = r.DeepCopy()
  2063  					}
  2064  				} else {
  2065  					if oldStatusResource != nil {
  2066  						if r, exists := oldStatusResource[rName]; exists {
  2067  							resource[rName] = r.DeepCopy()
  2068  						}
  2069  					}
  2070  				}
  2071  			}
  2072  		}
  2073  		container := kubecontainer.GetContainerSpec(pod, cName)
  2074  		// AllocatedResources values come from checkpoint. It is the source-of-truth.
  2075  		found := false
  2076  		status.AllocatedResources, found = kl.statusManager.GetContainerResourceAllocation(string(pod.UID), cName)
  2077  		if !(container.Resources.Requests == nil && container.Resources.Limits == nil) && !found {
  2078  			// Log error and fallback to AllocatedResources in oldStatus if it exists
  2079  			klog.ErrorS(nil, "resource allocation not found in checkpoint store", "pod", pod.Name, "container", cName)
  2080  			if oldStatusFound {
  2081  				status.AllocatedResources = oldStatus.AllocatedResources
  2082  			}
  2083  		}
  2084  		if oldStatus.Resources == nil {
  2085  			oldStatus.Resources = &v1.ResourceRequirements{}
  2086  		}
  2087  		// Convert Limits
  2088  		if container.Resources.Limits != nil {
  2089  			limits = make(v1.ResourceList)
  2090  			if cStatus.Resources != nil && cStatus.Resources.CPULimit != nil {
  2091  				limits[v1.ResourceCPU] = cStatus.Resources.CPULimit.DeepCopy()
  2092  			} else {
  2093  				determineResource(v1.ResourceCPU, container.Resources.Limits, oldStatus.Resources.Limits, limits)
  2094  			}
  2095  			if cStatus.Resources != nil && cStatus.Resources.MemoryLimit != nil {
  2096  				limits[v1.ResourceMemory] = cStatus.Resources.MemoryLimit.DeepCopy()
  2097  			} else {
  2098  				determineResource(v1.ResourceMemory, container.Resources.Limits, oldStatus.Resources.Limits, limits)
  2099  			}
  2100  			if ephemeralStorage, found := container.Resources.Limits[v1.ResourceEphemeralStorage]; found {
  2101  				limits[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
  2102  			}
  2103  		}
  2104  		// Convert Requests
  2105  		if status.AllocatedResources != nil {
  2106  			requests = make(v1.ResourceList)
  2107  			if cStatus.Resources != nil && cStatus.Resources.CPURequest != nil {
  2108  				requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy()
  2109  			} else {
  2110  				determineResource(v1.ResourceCPU, status.AllocatedResources, oldStatus.Resources.Requests, requests)
  2111  			}
  2112  			if memory, found := status.AllocatedResources[v1.ResourceMemory]; found {
  2113  				requests[v1.ResourceMemory] = memory.DeepCopy()
  2114  			}
  2115  			if ephemeralStorage, found := status.AllocatedResources[v1.ResourceEphemeralStorage]; found {
  2116  				requests[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
  2117  			}
  2118  		}
  2119  		//TODO(vinaykul,derekwaynecarr,InPlacePodVerticalScaling): Update this to include extended resources in
  2120  		// addition to CPU, memory, ephemeral storage. Add test case for extended resources.
  2121  		resources := &v1.ResourceRequirements{
  2122  			Limits:   limits,
  2123  			Requests: requests,
  2124  		}
  2125  		return resources
  2126  	}
  2127  
  2128  	// Fetch old containers statuses from old pod status.
  2129  	oldStatuses := make(map[string]v1.ContainerStatus, len(containers))
  2130  	for _, status := range previousStatus {
  2131  		oldStatuses[status.Name] = status
  2132  	}
  2133  
  2134  	// Set all container statuses to default waiting state
  2135  	statuses := make(map[string]*v1.ContainerStatus, len(containers))
  2136  	defaultWaitingState := v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: ContainerCreating}}
  2137  	if hasInitContainers {
  2138  		defaultWaitingState = v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: PodInitializing}}
  2139  	}
  2140  
  2141  	supportsRRO := kl.runtimeClassSupportsRecursiveReadOnlyMounts(pod)
  2142  
  2143  	for _, container := range containers {
  2144  		status := &v1.ContainerStatus{
  2145  			Name:  container.Name,
  2146  			Image: container.Image,
  2147  			State: defaultWaitingState,
  2148  		}
  2149  		// status.VolumeMounts cannot be propagated from kubecontainer.Status
  2150  		// because the CRI API is unaware of the volume names.
  2151  		if utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
  2152  			for _, vol := range container.VolumeMounts {
  2153  				volStatus := v1.VolumeMountStatus{
  2154  					Name:      vol.Name,
  2155  					MountPath: vol.MountPath,
  2156  					ReadOnly:  vol.ReadOnly,
  2157  				}
  2158  				if vol.ReadOnly {
  2159  					rroMode := v1.RecursiveReadOnlyDisabled
  2160  					if b, err := resolveRecursiveReadOnly(vol, supportsRRO); err != nil {
  2161  						klog.ErrorS(err, "failed to resolve recursive read-only mode", "mode", *vol.RecursiveReadOnly)
  2162  					} else if b {
  2163  						if utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
  2164  							rroMode = v1.RecursiveReadOnlyEnabled
  2165  						} else {
  2166  							klog.ErrorS(nil, "recursive read-only mount needs feature gate to be enabled",
  2167  								"featureGate", features.RecursiveReadOnlyMounts)
  2168  						}
  2169  					}
  2170  					volStatus.RecursiveReadOnly = &rroMode // Disabled or Enabled
  2171  				}
  2172  				status.VolumeMounts = append(status.VolumeMounts, volStatus)
  2173  			}
  2174  		}
  2175  		oldStatus, found := oldStatuses[container.Name]
  2176  		if found {
  2177  			if oldStatus.State.Terminated != nil {
  2178  				status = &oldStatus
  2179  			} else {
  2180  				// Apply some values from the old statuses as the default values.
  2181  				status.RestartCount = oldStatus.RestartCount
  2182  				status.LastTerminationState = oldStatus.LastTerminationState
  2183  			}
  2184  		}
  2185  		statuses[container.Name] = status
  2186  	}
  2187  
  2188  	for _, container := range containers {
  2189  		found := false
  2190  		for _, cStatus := range podStatus.ContainerStatuses {
  2191  			if container.Name == cStatus.Name {
  2192  				found = true
  2193  				break
  2194  			}
  2195  		}
  2196  		if found {
  2197  			continue
  2198  		}
  2199  		// if no container is found, then assuming it should be waiting seems plausible, but the status code requires
  2200  		// that a previous termination be present.  If we're offline long enough or something removed the container, then
  2201  		// the previous termination may not be present.  This next code block ensures that if the container was previously running
  2202  		// then when that container status disappears, we can infer that it terminated even if we don't know the status code.
  2203  		// By setting the lasttermination state we are able to leave the container status waiting and present more accurate
  2204  		// data via the API.
  2205  
  2206  		oldStatus, ok := oldStatuses[container.Name]
  2207  		if !ok {
  2208  			continue
  2209  		}
  2210  		if oldStatus.State.Terminated != nil {
  2211  			// if the old container status was terminated, the lasttermination status is correct
  2212  			continue
  2213  		}
  2214  		if oldStatus.State.Running == nil {
  2215  			// if the old container status isn't running, then waiting is an appropriate status and we have nothing to do
  2216  			continue
  2217  		}
  2218  
  2219  		// If we're here, we know the pod was previously running, but doesn't have a terminated status. We will check now to
  2220  		// see if it's in a pending state.
  2221  		status := statuses[container.Name]
  2222  		// If the status we're about to write indicates the default, the Waiting status will force this pod back into Pending.
  2223  		// That isn't true, we know the pod was previously running.
  2224  		isDefaultWaitingStatus := status.State.Waiting != nil && status.State.Waiting.Reason == ContainerCreating
  2225  		if hasInitContainers {
  2226  			isDefaultWaitingStatus = status.State.Waiting != nil && status.State.Waiting.Reason == PodInitializing
  2227  		}
  2228  		if !isDefaultWaitingStatus {
  2229  			// the status was written, don't override
  2230  			continue
  2231  		}
  2232  		if status.LastTerminationState.Terminated != nil {
  2233  			// if we already have a termination state, nothing to do
  2234  			continue
  2235  		}
  2236  
  2237  		// setting this value ensures that we show as stopped here, not as waiting:
  2238  		// https://github.com/kubernetes/kubernetes/blob/90c9f7b3e198e82a756a68ffeac978a00d606e55/pkg/kubelet/kubelet_pods.go#L1440-L1445
  2239  		// This prevents the pod from becoming pending
  2240  		status.LastTerminationState.Terminated = &v1.ContainerStateTerminated{
  2241  			Reason:   "ContainerStatusUnknown",
  2242  			Message:  "The container could not be located when the pod was deleted.  The container used to be Running",
  2243  			ExitCode: 137,
  2244  		}
  2245  
  2246  		// If the pod was not deleted, then it's been restarted. Increment restart count.
  2247  		if pod.DeletionTimestamp == nil {
  2248  			status.RestartCount += 1
  2249  		}
  2250  
  2251  		statuses[container.Name] = status
  2252  	}
  2253  
  2254  	// Copy the slice before sorting it
  2255  	containerStatusesCopy := make([]*kubecontainer.Status, len(podStatus.ContainerStatuses))
  2256  	copy(containerStatusesCopy, podStatus.ContainerStatuses)
  2257  
  2258  	// Make the latest container status comes first.
  2259  	sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(containerStatusesCopy)))
  2260  	// Set container statuses according to the statuses seen in pod status
  2261  	containerSeen := map[string]int{}
  2262  	for _, cStatus := range containerStatusesCopy {
  2263  		cName := cStatus.Name
  2264  		if _, ok := statuses[cName]; !ok {
  2265  			// This would also ignore the infra container.
  2266  			continue
  2267  		}
  2268  		if containerSeen[cName] >= 2 {
  2269  			continue
  2270  		}
  2271  		var oldStatusPtr *v1.ContainerStatus
  2272  		if oldStatus, ok := oldStatuses[cName]; ok {
  2273  			oldStatusPtr = &oldStatus
  2274  		}
  2275  		status := convertContainerStatus(cStatus, oldStatusPtr)
  2276  		if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
  2277  			if status.State.Running != nil {
  2278  				status.Resources = convertContainerStatusResources(cName, status, cStatus, oldStatuses)
  2279  			}
  2280  		}
  2281  		if containerSeen[cName] == 0 {
  2282  			statuses[cName] = status
  2283  		} else {
  2284  			statuses[cName].LastTerminationState = status.State
  2285  		}
  2286  		containerSeen[cName] = containerSeen[cName] + 1
  2287  	}
  2288  
  2289  	// Handle the containers failed to be started, which should be in Waiting state.
  2290  	for _, container := range containers {
  2291  		if isInitContainer {
  2292  			// If the init container is terminated with exit code 0, it won't be restarted.
  2293  			// TODO(random-liu): Handle this in a cleaner way.
  2294  			s := podStatus.FindContainerStatusByName(container.Name)
  2295  			if s != nil && s.State == kubecontainer.ContainerStateExited && s.ExitCode == 0 {
  2296  				continue
  2297  			}
  2298  		}
  2299  		// If a container should be restarted in next syncpod, it is *Waiting*.
  2300  		if !kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
  2301  			continue
  2302  		}
  2303  		status := statuses[container.Name]
  2304  		reason, ok := kl.reasonCache.Get(pod.UID, container.Name)
  2305  		if !ok {
  2306  			// In fact, we could also apply Waiting state here, but it is less informative,
  2307  			// and the container will be restarted soon, so we prefer the original state here.
  2308  			// Note that with the current implementation of ShouldContainerBeRestarted the original state here
  2309  			// could be:
  2310  			//   * Waiting: There is no associated historical container and start failure reason record.
  2311  			//   * Terminated: The container is terminated.
  2312  			continue
  2313  		}
  2314  		if status.State.Terminated != nil {
  2315  			status.LastTerminationState = status.State
  2316  		}
  2317  		status.State = v1.ContainerState{
  2318  			Waiting: &v1.ContainerStateWaiting{
  2319  				Reason:  reason.Err.Error(),
  2320  				Message: reason.Message,
  2321  			},
  2322  		}
  2323  		statuses[container.Name] = status
  2324  	}
  2325  
  2326  	// Sort the container statuses since clients of this interface expect the list
  2327  	// of containers in a pod has a deterministic order.
  2328  	if isInitContainer {
  2329  		return kubetypes.SortStatusesOfInitContainers(pod, statuses)
  2330  	}
  2331  	containerStatuses := make([]v1.ContainerStatus, 0, len(statuses))
  2332  	for _, status := range statuses {
  2333  		containerStatuses = append(containerStatuses, *status)
  2334  	}
  2335  
  2336  	sort.Sort(kubetypes.SortedContainerStatuses(containerStatuses))
  2337  	return containerStatuses
  2338  }
  2339  
  2340  // ServeLogs returns logs of current machine.
  2341  func (kl *Kubelet) ServeLogs(w http.ResponseWriter, req *http.Request) {
  2342  	// TODO: allowlist logs we are willing to serve
  2343  	kl.logServer.ServeHTTP(w, req)
  2344  }
  2345  
  2346  // findContainer finds and returns the container with the given pod ID, full name, and container name.
  2347  // It returns nil if not found.
  2348  func (kl *Kubelet) findContainer(ctx context.Context, podFullName string, podUID types.UID, containerName string) (*kubecontainer.Container, error) {
  2349  	pods, err := kl.containerRuntime.GetPods(ctx, false)
  2350  	if err != nil {
  2351  		return nil, err
  2352  	}
  2353  	// Resolve and type convert back again.
  2354  	// We need the static pod UID but the kubecontainer API works with types.UID.
  2355  	podUID = types.UID(kl.podManager.TranslatePodUID(podUID))
  2356  	pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)
  2357  	return pod.FindContainerByName(containerName), nil
  2358  }
  2359  
  2360  // RunInContainer runs a command in a container, returns the combined stdout, stderr as an array of bytes
  2361  func (kl *Kubelet) RunInContainer(ctx context.Context, podFullName string, podUID types.UID, containerName string, cmd []string) ([]byte, error) {
  2362  	container, err := kl.findContainer(ctx, podFullName, podUID, containerName)
  2363  	if err != nil {
  2364  		return nil, err
  2365  	}
  2366  	if container == nil {
  2367  		return nil, fmt.Errorf("container not found (%q)", containerName)
  2368  	}
  2369  	// TODO(tallclair): Pass a proper timeout value.
  2370  	return kl.runner.RunInContainer(ctx, container.ID, cmd, 0)
  2371  }
  2372  
  2373  // GetExec gets the URL the exec will be served from, or nil if the Kubelet will serve it.
  2374  func (kl *Kubelet) GetExec(ctx context.Context, podFullName string, podUID types.UID, containerName string, cmd []string, streamOpts remotecommandserver.Options) (*url.URL, error) {
  2375  	container, err := kl.findContainer(ctx, podFullName, podUID, containerName)
  2376  	if err != nil {
  2377  		return nil, err
  2378  	}
  2379  	if container == nil {
  2380  		return nil, fmt.Errorf("container not found (%q)", containerName)
  2381  	}
  2382  	return kl.streamingRuntime.GetExec(ctx, container.ID, cmd, streamOpts.Stdin, streamOpts.Stdout, streamOpts.Stderr, streamOpts.TTY)
  2383  }
  2384  
  2385  // GetAttach gets the URL the attach will be served from, or nil if the Kubelet will serve it.
  2386  func (kl *Kubelet) GetAttach(ctx context.Context, podFullName string, podUID types.UID, containerName string, streamOpts remotecommandserver.Options) (*url.URL, error) {
  2387  	container, err := kl.findContainer(ctx, podFullName, podUID, containerName)
  2388  	if err != nil {
  2389  		return nil, err
  2390  	}
  2391  	if container == nil {
  2392  		return nil, fmt.Errorf("container %s not found in pod %s", containerName, podFullName)
  2393  	}
  2394  
  2395  	// The TTY setting for attach must match the TTY setting in the initial container configuration,
  2396  	// since whether the process is running in a TTY cannot be changed after it has started.  We
  2397  	// need the api.Pod to get the TTY status.
  2398  	pod, found := kl.GetPodByFullName(podFullName)
  2399  	if !found || (string(podUID) != "" && pod.UID != podUID) {
  2400  		return nil, fmt.Errorf("pod %s not found", podFullName)
  2401  	}
  2402  	containerSpec := kubecontainer.GetContainerSpec(pod, containerName)
  2403  	if containerSpec == nil {
  2404  		return nil, fmt.Errorf("container %s not found in pod %s", containerName, podFullName)
  2405  	}
  2406  	tty := containerSpec.TTY
  2407  
  2408  	return kl.streamingRuntime.GetAttach(ctx, container.ID, streamOpts.Stdin, streamOpts.Stdout, streamOpts.Stderr, tty)
  2409  }
  2410  
  2411  // GetPortForward gets the URL the port-forward will be served from, or nil if the Kubelet will serve it.
  2412  func (kl *Kubelet) GetPortForward(ctx context.Context, podName, podNamespace string, podUID types.UID, portForwardOpts portforward.V4Options) (*url.URL, error) {
  2413  	pods, err := kl.containerRuntime.GetPods(ctx, false)
  2414  	if err != nil {
  2415  		return nil, err
  2416  	}
  2417  	// Resolve and type convert back again.
  2418  	// We need the static pod UID but the kubecontainer API works with types.UID.
  2419  	podUID = types.UID(kl.podManager.TranslatePodUID(podUID))
  2420  	podFullName := kubecontainer.BuildPodFullName(podName, podNamespace)
  2421  	pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)
  2422  	if pod.IsEmpty() {
  2423  		return nil, fmt.Errorf("pod not found (%q)", podFullName)
  2424  	}
  2425  
  2426  	return kl.streamingRuntime.GetPortForward(ctx, podName, podNamespace, podUID, portForwardOpts.Ports)
  2427  }
  2428  
  2429  // cleanupOrphanedPodCgroups removes cgroups that should no longer exist.
  2430  // it reconciles the cached state of cgroupPods with the specified list of runningPods
  2431  func (kl *Kubelet) cleanupOrphanedPodCgroups(pcm cm.PodContainerManager, cgroupPods map[types.UID]cm.CgroupName, possiblyRunningPods map[types.UID]sets.Empty) {
  2432  	// Iterate over all the found pods to verify if they should be running
  2433  	for uid, val := range cgroupPods {
  2434  		// if the pod is in the running set, its not a candidate for cleanup
  2435  		if _, ok := possiblyRunningPods[uid]; ok {
  2436  			continue
  2437  		}
  2438  
  2439  		// If volumes have not been unmounted/detached, do not delete the cgroup
  2440  		// so any memory backed volumes don't have their charges propagated to the
  2441  		// parent croup.  If the volumes still exist, reduce the cpu shares for any
  2442  		// process in the cgroup to the minimum value while we wait.
  2443  		if podVolumesExist := kl.podVolumesExist(uid); podVolumesExist {
  2444  			klog.V(3).InfoS("Orphaned pod found, but volumes not yet removed.  Reducing cpu to minimum", "podUID", uid)
  2445  			if err := pcm.ReduceCPULimits(val); err != nil {
  2446  				klog.InfoS("Failed to reduce cpu time for pod pending volume cleanup", "podUID", uid, "err", err)
  2447  			}
  2448  			continue
  2449  		}
  2450  		klog.V(3).InfoS("Orphaned pod found, removing pod cgroups", "podUID", uid)
  2451  		// Destroy all cgroups of pod that should not be running,
  2452  		// by first killing all the attached processes to these cgroups.
  2453  		// We ignore errors thrown by the method, as the housekeeping loop would
  2454  		// again try to delete these unwanted pod cgroups
  2455  		go pcm.Destroy(val)
  2456  	}
  2457  }
  2458  
  2459  func (kl *Kubelet) runtimeClassSupportsRecursiveReadOnlyMounts(pod *v1.Pod) bool {
  2460  	if kl.runtimeClassManager == nil {
  2461  		return false
  2462  	}
  2463  	runtimeHandlerName, err := kl.runtimeClassManager.LookupRuntimeHandler(pod.Spec.RuntimeClassName)
  2464  	if err != nil {
  2465  		klog.ErrorS(err, "failed to look up the runtime handler", "runtimeClassName", pod.Spec.RuntimeClassName)
  2466  		return false
  2467  	}
  2468  	runtimeHandlers := kl.runtimeState.runtimeHandlers()
  2469  	return runtimeHandlerSupportsRecursiveReadOnlyMounts(runtimeHandlerName, runtimeHandlers)
  2470  }
  2471  
  2472  // runtimeHandlerSupportsRecursiveReadOnlyMounts checks whether the runtime handler supports recursive read-only mounts.
  2473  // The kubelet feature gate is not checked here.
  2474  func runtimeHandlerSupportsRecursiveReadOnlyMounts(runtimeHandlerName string, runtimeHandlers []kubecontainer.RuntimeHandler) bool {
  2475  	if len(runtimeHandlers) == 0 {
  2476  		// The runtime does not support returning the handler list.
  2477  		// No need to print a warning here.
  2478  		return false
  2479  	}
  2480  	for _, h := range runtimeHandlers {
  2481  		if h.Name == runtimeHandlerName {
  2482  			return h.SupportsRecursiveReadOnlyMounts
  2483  		}
  2484  	}
  2485  	klog.ErrorS(nil, "Unknown runtime handler", "runtimeHandlerName", runtimeHandlerName)
  2486  	return false
  2487  }
  2488  
  2489  // resolveRecursiveReadOnly resolves the recursive read-only mount mode.
  2490  func resolveRecursiveReadOnly(m v1.VolumeMount, runtimeSupportsRRO bool) (bool, error) {
  2491  	if m.RecursiveReadOnly == nil || *m.RecursiveReadOnly == v1.RecursiveReadOnlyDisabled {
  2492  		return false, nil
  2493  	}
  2494  	if !m.ReadOnly {
  2495  		return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not read-only", m.Name)
  2496  	}
  2497  	if m.MountPropagation != nil && *m.MountPropagation != v1.MountPropagationNone {
  2498  		return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not compatible with propagation %q",
  2499  			m.Name, *m.MountPropagation)
  2500  	}
  2501  	switch rroMode := *m.RecursiveReadOnly; rroMode {
  2502  	case v1.RecursiveReadOnlyIfPossible:
  2503  		return runtimeSupportsRRO, nil
  2504  	case v1.RecursiveReadOnlyEnabled:
  2505  		if !runtimeSupportsRRO {
  2506  			return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not supported by the runtime", m.Name)
  2507  		}
  2508  		return true, nil
  2509  	default:
  2510  		return false, fmt.Errorf("unknown recursive read-only mode %q", rroMode)
  2511  	}
  2512  }