k8s.io/kubernetes@v1.29.3/pkg/kubelet/cm/container_manager.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cm
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  
    25  	"k8s.io/apimachinery/pkg/types"
    26  	"k8s.io/apimachinery/pkg/util/sets"
    27  
    28  	// TODO: Migrate kubelet to either use its own internal objects or client library.
    29  	v1 "k8s.io/api/core/v1"
    30  	internalapi "k8s.io/cri-api/pkg/apis"
    31  	podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
    32  	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    33  	"k8s.io/kubernetes/pkg/kubelet/apis/podresources"
    34  	"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
    35  	"k8s.io/kubernetes/pkg/kubelet/config"
    36  	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
    37  	evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
    38  	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
    39  	"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
    40  	"k8s.io/kubernetes/pkg/kubelet/status"
    41  	schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
    42  	"k8s.io/utils/cpuset"
    43  )
    44  
    45  type ActivePodsFunc func() []*v1.Pod
    46  
    47  // Manages the containers running on a machine.
    48  type ContainerManager interface {
    49  	// Runs the container manager's housekeeping.
    50  	// - Ensures that the Docker daemon is in a container.
    51  	// - Creates the system container where all non-containerized processes run.
    52  	Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error
    53  
    54  	// SystemCgroupsLimit returns resources allocated to system cgroups in the machine.
    55  	// These cgroups include the system and Kubernetes services.
    56  	SystemCgroupsLimit() v1.ResourceList
    57  
    58  	// GetNodeConfig returns a NodeConfig that is being used by the container manager.
    59  	GetNodeConfig() NodeConfig
    60  
    61  	// Status returns internal Status.
    62  	Status() Status
    63  
    64  	// NewPodContainerManager is a factory method which returns a podContainerManager object
    65  	// Returns a noop implementation if qos cgroup hierarchy is not enabled
    66  	NewPodContainerManager() PodContainerManager
    67  
    68  	// GetMountedSubsystems returns the mounted cgroup subsystems on the node
    69  	GetMountedSubsystems() *CgroupSubsystems
    70  
    71  	// GetQOSContainersInfo returns the names of top level QoS containers
    72  	GetQOSContainersInfo() QOSContainersInfo
    73  
    74  	// GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling.
    75  	GetNodeAllocatableReservation() v1.ResourceList
    76  
    77  	// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
    78  	GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList
    79  
    80  	// GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources),
    81  	// node allocatable (amount of total healthy resources reported by device plugin),
    82  	// and inactive device plugin resources previously registered on the node.
    83  	GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string)
    84  
    85  	// UpdateQOSCgroups performs housekeeping updates to ensure that the top
    86  	// level QoS containers have their desired state in a thread-safe way
    87  	UpdateQOSCgroups() error
    88  
    89  	// GetResources returns RunContainerOptions with devices, mounts, and env fields populated for
    90  	// extended resources required by container.
    91  	GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error)
    92  
    93  	// UpdatePluginResources calls Allocate of device plugin handler for potential
    94  	// requests for device plugin resources, and returns an error if fails.
    95  	// Otherwise, it updates allocatableResource in nodeInfo if necessary,
    96  	// to make sure it is at least equal to the pod's requested capacity for
    97  	// any registered device plugin resource
    98  	UpdatePluginResources(*schedulerframework.NodeInfo, *lifecycle.PodAdmitAttributes) error
    99  
   100  	InternalContainerLifecycle() InternalContainerLifecycle
   101  
   102  	// GetPodCgroupRoot returns the cgroup which contains all pods.
   103  	GetPodCgroupRoot() string
   104  
   105  	// GetPluginRegistrationHandler returns a plugin registration handler
   106  	// The pluginwatcher's Handlers allow to have a single module for handling
   107  	// registration.
   108  	GetPluginRegistrationHandler() cache.PluginHandler
   109  
   110  	// ShouldResetExtendedResourceCapacity returns whether or not the extended resources should be zeroed,
   111  	// due to node recreation.
   112  	ShouldResetExtendedResourceCapacity() bool
   113  
   114  	// GetAllocateResourcesPodAdmitHandler returns an instance of a PodAdmitHandler responsible for allocating pod resources.
   115  	GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler
   116  
   117  	// GetNodeAllocatableAbsolute returns the absolute value of Node Allocatable which is primarily useful for enforcement.
   118  	GetNodeAllocatableAbsolute() v1.ResourceList
   119  
   120  	// PrepareDynamicResource prepares dynamic pod resources
   121  	PrepareDynamicResources(*v1.Pod) error
   122  
   123  	// UnrepareDynamicResources unprepares dynamic pod resources
   124  	UnprepareDynamicResources(*v1.Pod) error
   125  
   126  	// PodMightNeedToUnprepareResources returns true if the pod with the given UID
   127  	// might need to unprepare resources.
   128  	PodMightNeedToUnprepareResources(UID types.UID) bool
   129  
   130  	// Implements the PodResources Provider API
   131  	podresources.CPUsProvider
   132  	podresources.DevicesProvider
   133  	podresources.MemoryProvider
   134  	podresources.DynamicResourcesProvider
   135  }
   136  
   137  type NodeConfig struct {
   138  	RuntimeCgroupsName    string
   139  	SystemCgroupsName     string
   140  	KubeletCgroupsName    string
   141  	KubeletOOMScoreAdj    int32
   142  	ContainerRuntime      string
   143  	CgroupsPerQOS         bool
   144  	CgroupRoot            string
   145  	CgroupDriver          string
   146  	KubeletRootDir        string
   147  	ProtectKernelDefaults bool
   148  	NodeAllocatableConfig
   149  	QOSReserved                             map[v1.ResourceName]int64
   150  	CPUManagerPolicy                        string
   151  	CPUManagerPolicyOptions                 map[string]string
   152  	TopologyManagerScope                    string
   153  	CPUManagerReconcilePeriod               time.Duration
   154  	ExperimentalMemoryManagerPolicy         string
   155  	ExperimentalMemoryManagerReservedMemory []kubeletconfig.MemoryReservation
   156  	PodPidsLimit                            int64
   157  	EnforceCPULimits                        bool
   158  	CPUCFSQuotaPeriod                       time.Duration
   159  	TopologyManagerPolicy                   string
   160  	TopologyManagerPolicyOptions            map[string]string
   161  }
   162  
   163  type NodeAllocatableConfig struct {
   164  	KubeReservedCgroupName   string
   165  	SystemReservedCgroupName string
   166  	ReservedSystemCPUs       cpuset.CPUSet
   167  	EnforceNodeAllocatable   sets.Set[string]
   168  	KubeReserved             v1.ResourceList
   169  	SystemReserved           v1.ResourceList
   170  	HardEvictionThresholds   []evictionapi.Threshold
   171  }
   172  
   173  type Status struct {
   174  	// Any soft requirements that were unsatisfied.
   175  	SoftRequirements error
   176  }
   177  
   178  // parsePercentage parses the percentage string to numeric value.
   179  func parsePercentage(v string) (int64, error) {
   180  	if !strings.HasSuffix(v, "%") {
   181  		return 0, fmt.Errorf("percentage expected, got '%s'", v)
   182  	}
   183  	percentage, err := strconv.ParseInt(strings.TrimRight(v, "%"), 10, 0)
   184  	if err != nil {
   185  		return 0, fmt.Errorf("invalid number in percentage '%s'", v)
   186  	}
   187  	if percentage < 0 || percentage > 100 {
   188  		return 0, fmt.Errorf("percentage must be between 0 and 100")
   189  	}
   190  	return percentage, nil
   191  }
   192  
   193  // ParseQOSReserved parses the --qos-reserved option
   194  func ParseQOSReserved(m map[string]string) (*map[v1.ResourceName]int64, error) {
   195  	reservations := make(map[v1.ResourceName]int64)
   196  	for k, v := range m {
   197  		switch v1.ResourceName(k) {
   198  		// Only memory resources are supported.
   199  		case v1.ResourceMemory:
   200  			q, err := parsePercentage(v)
   201  			if err != nil {
   202  				return nil, fmt.Errorf("failed to parse percentage %q for %q resource: %w", v, k, err)
   203  			}
   204  			reservations[v1.ResourceName(k)] = q
   205  		default:
   206  			return nil, fmt.Errorf("cannot reserve %q resource", k)
   207  		}
   208  	}
   209  	return &reservations, nil
   210  }
   211  
   212  func containerDevicesFromResourceDeviceInstances(devs devicemanager.ResourceDeviceInstances) []*podresourcesapi.ContainerDevices {
   213  	var respDevs []*podresourcesapi.ContainerDevices
   214  
   215  	for resourceName, resourceDevs := range devs {
   216  		for devID, dev := range resourceDevs {
   217  			topo := dev.GetTopology()
   218  			if topo == nil {
   219  				// Some device plugin do not report the topology information.
   220  				// This is legal, so we report the devices anyway,
   221  				// let the client decide what to do.
   222  				respDevs = append(respDevs, &podresourcesapi.ContainerDevices{
   223  					ResourceName: resourceName,
   224  					DeviceIds:    []string{devID},
   225  				})
   226  				continue
   227  			}
   228  
   229  			for _, node := range topo.GetNodes() {
   230  				respDevs = append(respDevs, &podresourcesapi.ContainerDevices{
   231  					ResourceName: resourceName,
   232  					DeviceIds:    []string{devID},
   233  					Topology: &podresourcesapi.TopologyInfo{
   234  						Nodes: []*podresourcesapi.NUMANode{
   235  							{
   236  								ID: node.GetID(),
   237  							},
   238  						},
   239  					},
   240  				})
   241  			}
   242  		}
   243  	}
   244  
   245  	return respDevs
   246  }