k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/cm/container_manager.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cm 18 19 import ( 20 "fmt" 21 "strconv" 22 "strings" 23 "time" 24 25 "k8s.io/apimachinery/pkg/types" 26 "k8s.io/apimachinery/pkg/util/sets" 27 28 // TODO: Migrate kubelet to either use its own internal objects or client library. 29 v1 "k8s.io/api/core/v1" 30 internalapi "k8s.io/cri-api/pkg/apis" 31 podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" 32 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" 33 "k8s.io/kubernetes/pkg/kubelet/apis/podresources" 34 "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" 35 "k8s.io/kubernetes/pkg/kubelet/config" 36 kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" 37 evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" 38 "k8s.io/kubernetes/pkg/kubelet/lifecycle" 39 "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache" 40 "k8s.io/kubernetes/pkg/kubelet/status" 41 schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" 42 "k8s.io/utils/cpuset" 43 ) 44 45 type ActivePodsFunc func() []*v1.Pod 46 47 // Manages the containers running on a machine. 48 type ContainerManager interface { 49 // Runs the container manager's housekeeping. 50 // - Ensures that the Docker daemon is in a container. 51 // - Creates the system container where all non-containerized processes run. 52 Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error 53 54 // SystemCgroupsLimit returns resources allocated to system cgroups in the machine. 55 // These cgroups include the system and Kubernetes services. 56 SystemCgroupsLimit() v1.ResourceList 57 58 // GetNodeConfig returns a NodeConfig that is being used by the container manager. 59 GetNodeConfig() NodeConfig 60 61 // Status returns internal Status. 62 Status() Status 63 64 // NewPodContainerManager is a factory method which returns a podContainerManager object 65 // Returns a noop implementation if qos cgroup hierarchy is not enabled 66 NewPodContainerManager() PodContainerManager 67 68 // GetMountedSubsystems returns the mounted cgroup subsystems on the node 69 GetMountedSubsystems() *CgroupSubsystems 70 71 // GetQOSContainersInfo returns the names of top level QoS containers 72 GetQOSContainersInfo() QOSContainersInfo 73 74 // GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling. 75 GetNodeAllocatableReservation() v1.ResourceList 76 77 // GetCapacity returns the amount of compute resources tracked by container manager available on the node. 78 GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList 79 80 // GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources), 81 // node allocatable (amount of total healthy resources reported by device plugin), 82 // and inactive device plugin resources previously registered on the node. 83 GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) 84 85 // UpdateQOSCgroups performs housekeeping updates to ensure that the top 86 // level QoS containers have their desired state in a thread-safe way 87 UpdateQOSCgroups() error 88 89 // GetResources returns RunContainerOptions with devices, mounts, and env fields populated for 90 // extended resources required by container. 91 GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) 92 93 // UpdatePluginResources calls Allocate of device plugin handler for potential 94 // requests for device plugin resources, and returns an error if fails. 95 // Otherwise, it updates allocatableResource in nodeInfo if necessary, 96 // to make sure it is at least equal to the pod's requested capacity for 97 // any registered device plugin resource 98 UpdatePluginResources(*schedulerframework.NodeInfo, *lifecycle.PodAdmitAttributes) error 99 100 InternalContainerLifecycle() InternalContainerLifecycle 101 102 // GetPodCgroupRoot returns the cgroup which contains all pods. 103 GetPodCgroupRoot() string 104 105 // GetPluginRegistrationHandler returns a plugin registration handler 106 // The pluginwatcher's Handlers allow to have a single module for handling 107 // registration. 108 GetPluginRegistrationHandler() cache.PluginHandler 109 110 // ShouldResetExtendedResourceCapacity returns whether or not the extended resources should be zeroed, 111 // due to node recreation. 112 ShouldResetExtendedResourceCapacity() bool 113 114 // GetAllocateResourcesPodAdmitHandler returns an instance of a PodAdmitHandler responsible for allocating pod resources. 115 GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler 116 117 // GetNodeAllocatableAbsolute returns the absolute value of Node Allocatable which is primarily useful for enforcement. 118 GetNodeAllocatableAbsolute() v1.ResourceList 119 120 // PrepareDynamicResource prepares dynamic pod resources 121 PrepareDynamicResources(*v1.Pod) error 122 123 // UnrepareDynamicResources unprepares dynamic pod resources 124 UnprepareDynamicResources(*v1.Pod) error 125 126 // PodMightNeedToUnprepareResources returns true if the pod with the given UID 127 // might need to unprepare resources. 128 PodMightNeedToUnprepareResources(UID types.UID) bool 129 130 // Implements the PodResources Provider API 131 podresources.CPUsProvider 132 podresources.DevicesProvider 133 podresources.MemoryProvider 134 podresources.DynamicResourcesProvider 135 } 136 137 type NodeConfig struct { 138 NodeName types.NodeName 139 RuntimeCgroupsName string 140 SystemCgroupsName string 141 KubeletCgroupsName string 142 KubeletOOMScoreAdj int32 143 ContainerRuntime string 144 CgroupsPerQOS bool 145 CgroupRoot string 146 CgroupDriver string 147 KubeletRootDir string 148 ProtectKernelDefaults bool 149 NodeAllocatableConfig 150 QOSReserved map[v1.ResourceName]int64 151 CPUManagerPolicy string 152 CPUManagerPolicyOptions map[string]string 153 TopologyManagerScope string 154 CPUManagerReconcilePeriod time.Duration 155 ExperimentalMemoryManagerPolicy string 156 ExperimentalMemoryManagerReservedMemory []kubeletconfig.MemoryReservation 157 PodPidsLimit int64 158 EnforceCPULimits bool 159 CPUCFSQuotaPeriod time.Duration 160 TopologyManagerPolicy string 161 TopologyManagerPolicyOptions map[string]string 162 } 163 164 type NodeAllocatableConfig struct { 165 KubeReservedCgroupName string 166 SystemReservedCgroupName string 167 ReservedSystemCPUs cpuset.CPUSet 168 EnforceNodeAllocatable sets.Set[string] 169 KubeReserved v1.ResourceList 170 SystemReserved v1.ResourceList 171 HardEvictionThresholds []evictionapi.Threshold 172 } 173 174 type Status struct { 175 // Any soft requirements that were unsatisfied. 176 SoftRequirements error 177 } 178 179 // parsePercentage parses the percentage string to numeric value. 180 func parsePercentage(v string) (int64, error) { 181 if !strings.HasSuffix(v, "%") { 182 return 0, fmt.Errorf("percentage expected, got '%s'", v) 183 } 184 percentage, err := strconv.ParseInt(strings.TrimRight(v, "%"), 10, 0) 185 if err != nil { 186 return 0, fmt.Errorf("invalid number in percentage '%s'", v) 187 } 188 if percentage < 0 || percentage > 100 { 189 return 0, fmt.Errorf("percentage must be between 0 and 100") 190 } 191 return percentage, nil 192 } 193 194 // ParseQOSReserved parses the --qos-reserved option 195 func ParseQOSReserved(m map[string]string) (*map[v1.ResourceName]int64, error) { 196 reservations := make(map[v1.ResourceName]int64) 197 for k, v := range m { 198 switch v1.ResourceName(k) { 199 // Only memory resources are supported. 200 case v1.ResourceMemory: 201 q, err := parsePercentage(v) 202 if err != nil { 203 return nil, fmt.Errorf("failed to parse percentage %q for %q resource: %w", v, k, err) 204 } 205 reservations[v1.ResourceName(k)] = q 206 default: 207 return nil, fmt.Errorf("cannot reserve %q resource", k) 208 } 209 } 210 return &reservations, nil 211 } 212 213 func containerDevicesFromResourceDeviceInstances(devs devicemanager.ResourceDeviceInstances) []*podresourcesapi.ContainerDevices { 214 var respDevs []*podresourcesapi.ContainerDevices 215 216 for resourceName, resourceDevs := range devs { 217 for devID, dev := range resourceDevs { 218 topo := dev.GetTopology() 219 if topo == nil { 220 // Some device plugin do not report the topology information. 221 // This is legal, so we report the devices anyway, 222 // let the client decide what to do. 223 respDevs = append(respDevs, &podresourcesapi.ContainerDevices{ 224 ResourceName: resourceName, 225 DeviceIds: []string{devID}, 226 }) 227 continue 228 } 229 230 for _, node := range topo.GetNodes() { 231 respDevs = append(respDevs, &podresourcesapi.ContainerDevices{ 232 ResourceName: resourceName, 233 DeviceIds: []string{devID}, 234 Topology: &podresourcesapi.TopologyInfo{ 235 Nodes: []*podresourcesapi.NUMANode{ 236 { 237 ID: node.GetID(), 238 }, 239 }, 240 }, 241 }) 242 } 243 } 244 } 245 246 return respDevs 247 }