k8s.io/kubernetes@v1.29.3/pkg/kubelet/cm/container_manager.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cm 18 19 import ( 20 "fmt" 21 "strconv" 22 "strings" 23 "time" 24 25 "k8s.io/apimachinery/pkg/types" 26 "k8s.io/apimachinery/pkg/util/sets" 27 28 // TODO: Migrate kubelet to either use its own internal objects or client library. 29 v1 "k8s.io/api/core/v1" 30 internalapi "k8s.io/cri-api/pkg/apis" 31 podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" 32 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" 33 "k8s.io/kubernetes/pkg/kubelet/apis/podresources" 34 "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" 35 "k8s.io/kubernetes/pkg/kubelet/config" 36 kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" 37 evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" 38 "k8s.io/kubernetes/pkg/kubelet/lifecycle" 39 "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache" 40 "k8s.io/kubernetes/pkg/kubelet/status" 41 schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" 42 "k8s.io/utils/cpuset" 43 ) 44 45 type ActivePodsFunc func() []*v1.Pod 46 47 // Manages the containers running on a machine. 48 type ContainerManager interface { 49 // Runs the container manager's housekeeping. 50 // - Ensures that the Docker daemon is in a container. 51 // - Creates the system container where all non-containerized processes run. 52 Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error 53 54 // SystemCgroupsLimit returns resources allocated to system cgroups in the machine. 55 // These cgroups include the system and Kubernetes services. 56 SystemCgroupsLimit() v1.ResourceList 57 58 // GetNodeConfig returns a NodeConfig that is being used by the container manager. 59 GetNodeConfig() NodeConfig 60 61 // Status returns internal Status. 62 Status() Status 63 64 // NewPodContainerManager is a factory method which returns a podContainerManager object 65 // Returns a noop implementation if qos cgroup hierarchy is not enabled 66 NewPodContainerManager() PodContainerManager 67 68 // GetMountedSubsystems returns the mounted cgroup subsystems on the node 69 GetMountedSubsystems() *CgroupSubsystems 70 71 // GetQOSContainersInfo returns the names of top level QoS containers 72 GetQOSContainersInfo() QOSContainersInfo 73 74 // GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling. 75 GetNodeAllocatableReservation() v1.ResourceList 76 77 // GetCapacity returns the amount of compute resources tracked by container manager available on the node. 78 GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList 79 80 // GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources), 81 // node allocatable (amount of total healthy resources reported by device plugin), 82 // and inactive device plugin resources previously registered on the node. 83 GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) 84 85 // UpdateQOSCgroups performs housekeeping updates to ensure that the top 86 // level QoS containers have their desired state in a thread-safe way 87 UpdateQOSCgroups() error 88 89 // GetResources returns RunContainerOptions with devices, mounts, and env fields populated for 90 // extended resources required by container. 91 GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) 92 93 // UpdatePluginResources calls Allocate of device plugin handler for potential 94 // requests for device plugin resources, and returns an error if fails. 95 // Otherwise, it updates allocatableResource in nodeInfo if necessary, 96 // to make sure it is at least equal to the pod's requested capacity for 97 // any registered device plugin resource 98 UpdatePluginResources(*schedulerframework.NodeInfo, *lifecycle.PodAdmitAttributes) error 99 100 InternalContainerLifecycle() InternalContainerLifecycle 101 102 // GetPodCgroupRoot returns the cgroup which contains all pods. 103 GetPodCgroupRoot() string 104 105 // GetPluginRegistrationHandler returns a plugin registration handler 106 // The pluginwatcher's Handlers allow to have a single module for handling 107 // registration. 108 GetPluginRegistrationHandler() cache.PluginHandler 109 110 // ShouldResetExtendedResourceCapacity returns whether or not the extended resources should be zeroed, 111 // due to node recreation. 112 ShouldResetExtendedResourceCapacity() bool 113 114 // GetAllocateResourcesPodAdmitHandler returns an instance of a PodAdmitHandler responsible for allocating pod resources. 115 GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler 116 117 // GetNodeAllocatableAbsolute returns the absolute value of Node Allocatable which is primarily useful for enforcement. 118 GetNodeAllocatableAbsolute() v1.ResourceList 119 120 // PrepareDynamicResource prepares dynamic pod resources 121 PrepareDynamicResources(*v1.Pod) error 122 123 // UnrepareDynamicResources unprepares dynamic pod resources 124 UnprepareDynamicResources(*v1.Pod) error 125 126 // PodMightNeedToUnprepareResources returns true if the pod with the given UID 127 // might need to unprepare resources. 128 PodMightNeedToUnprepareResources(UID types.UID) bool 129 130 // Implements the PodResources Provider API 131 podresources.CPUsProvider 132 podresources.DevicesProvider 133 podresources.MemoryProvider 134 podresources.DynamicResourcesProvider 135 } 136 137 type NodeConfig struct { 138 RuntimeCgroupsName string 139 SystemCgroupsName string 140 KubeletCgroupsName string 141 KubeletOOMScoreAdj int32 142 ContainerRuntime string 143 CgroupsPerQOS bool 144 CgroupRoot string 145 CgroupDriver string 146 KubeletRootDir string 147 ProtectKernelDefaults bool 148 NodeAllocatableConfig 149 QOSReserved map[v1.ResourceName]int64 150 CPUManagerPolicy string 151 CPUManagerPolicyOptions map[string]string 152 TopologyManagerScope string 153 CPUManagerReconcilePeriod time.Duration 154 ExperimentalMemoryManagerPolicy string 155 ExperimentalMemoryManagerReservedMemory []kubeletconfig.MemoryReservation 156 PodPidsLimit int64 157 EnforceCPULimits bool 158 CPUCFSQuotaPeriod time.Duration 159 TopologyManagerPolicy string 160 TopologyManagerPolicyOptions map[string]string 161 } 162 163 type NodeAllocatableConfig struct { 164 KubeReservedCgroupName string 165 SystemReservedCgroupName string 166 ReservedSystemCPUs cpuset.CPUSet 167 EnforceNodeAllocatable sets.Set[string] 168 KubeReserved v1.ResourceList 169 SystemReserved v1.ResourceList 170 HardEvictionThresholds []evictionapi.Threshold 171 } 172 173 type Status struct { 174 // Any soft requirements that were unsatisfied. 175 SoftRequirements error 176 } 177 178 // parsePercentage parses the percentage string to numeric value. 179 func parsePercentage(v string) (int64, error) { 180 if !strings.HasSuffix(v, "%") { 181 return 0, fmt.Errorf("percentage expected, got '%s'", v) 182 } 183 percentage, err := strconv.ParseInt(strings.TrimRight(v, "%"), 10, 0) 184 if err != nil { 185 return 0, fmt.Errorf("invalid number in percentage '%s'", v) 186 } 187 if percentage < 0 || percentage > 100 { 188 return 0, fmt.Errorf("percentage must be between 0 and 100") 189 } 190 return percentage, nil 191 } 192 193 // ParseQOSReserved parses the --qos-reserved option 194 func ParseQOSReserved(m map[string]string) (*map[v1.ResourceName]int64, error) { 195 reservations := make(map[v1.ResourceName]int64) 196 for k, v := range m { 197 switch v1.ResourceName(k) { 198 // Only memory resources are supported. 199 case v1.ResourceMemory: 200 q, err := parsePercentage(v) 201 if err != nil { 202 return nil, fmt.Errorf("failed to parse percentage %q for %q resource: %w", v, k, err) 203 } 204 reservations[v1.ResourceName(k)] = q 205 default: 206 return nil, fmt.Errorf("cannot reserve %q resource", k) 207 } 208 } 209 return &reservations, nil 210 } 211 212 func containerDevicesFromResourceDeviceInstances(devs devicemanager.ResourceDeviceInstances) []*podresourcesapi.ContainerDevices { 213 var respDevs []*podresourcesapi.ContainerDevices 214 215 for resourceName, resourceDevs := range devs { 216 for devID, dev := range resourceDevs { 217 topo := dev.GetTopology() 218 if topo == nil { 219 // Some device plugin do not report the topology information. 220 // This is legal, so we report the devices anyway, 221 // let the client decide what to do. 222 respDevs = append(respDevs, &podresourcesapi.ContainerDevices{ 223 ResourceName: resourceName, 224 DeviceIds: []string{devID}, 225 }) 226 continue 227 } 228 229 for _, node := range topo.GetNodes() { 230 respDevs = append(respDevs, &podresourcesapi.ContainerDevices{ 231 ResourceName: resourceName, 232 DeviceIds: []string{devID}, 233 Topology: &podresourcesapi.TopologyInfo{ 234 Nodes: []*podresourcesapi.NUMANode{ 235 { 236 ID: node.GetID(), 237 }, 238 }, 239 }, 240 }) 241 } 242 } 243 } 244 245 return respDevs 246 }