k8s.io/kubernetes@v1.29.3/pkg/controller/volume/attachdetach/cache/desired_state_of_world.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 Package cache implements data structures used by the attach/detach controller 19 to keep track of volumes, the nodes they are attached to, and the pods that 20 reference them. 21 */ 22 package cache 23 24 import ( 25 "fmt" 26 "sync" 27 28 "k8s.io/api/core/v1" 29 k8stypes "k8s.io/apimachinery/pkg/types" 30 "k8s.io/kubernetes/pkg/volume" 31 "k8s.io/kubernetes/pkg/volume/util" 32 "k8s.io/kubernetes/pkg/volume/util/operationexecutor" 33 "k8s.io/kubernetes/pkg/volume/util/types" 34 ) 35 36 // DesiredStateOfWorld defines a set of thread-safe operations supported on 37 // the attach/detach controller's desired state of the world cache. 38 // This cache contains nodes->volumes->pods where nodes are all the nodes 39 // managed by the attach/detach controller, volumes are all the volumes that 40 // should be attached to the specified node, and pods are the pods that 41 // reference the volume and are scheduled to that node. 42 // Note: This is distinct from the DesiredStateOfWorld implemented by the 43 // kubelet volume manager. They both keep track of different objects. This 44 // contains attach/detach controller specific state. 45 type DesiredStateOfWorld interface { 46 // AddNode adds the given node to the list of nodes managed by the attach/ 47 // detach controller. 48 // If the node already exists this is a no-op. 49 // keepTerminatedPodVolumes is a property of the node that determines 50 // if volumes should be mounted and attached for terminated pods. 51 AddNode(nodeName k8stypes.NodeName, keepTerminatedPodVolumes bool) 52 53 // AddPod adds the given pod to the list of pods that reference the 54 // specified volume and is scheduled to the specified node. 55 // A unique volumeName is generated from the volumeSpec and returned on 56 // success. 57 // If the pod already exists under the specified volume, this is a no-op. 58 // If volumeSpec is not an attachable volume plugin, an error is returned. 59 // If no volume with the name volumeName exists in the list of volumes that 60 // should be attached to the specified node, the volume is implicitly added. 61 // If no node with the name nodeName exists in list of nodes managed by the 62 // attach/detach attached controller, an error is returned. 63 AddPod(podName types.UniquePodName, pod *v1.Pod, volumeSpec *volume.Spec, nodeName k8stypes.NodeName) (v1.UniqueVolumeName, error) 64 65 // DeleteNode removes the given node from the list of nodes managed by the 66 // attach/detach controller. 67 // If the node does not exist this is a no-op. 68 // If the node exists but has 1 or more child volumes, an error is returned. 69 DeleteNode(nodeName k8stypes.NodeName) error 70 71 // DeletePod removes the given pod from the list of pods that reference the 72 // specified volume and are scheduled to the specified node. 73 // If no pod exists in the list of pods that reference the specified volume 74 // and are scheduled to the specified node, this is a no-op. 75 // If a node with the name nodeName does not exist in the list of nodes 76 // managed by the attach/detach attached controller, this is a no-op. 77 // If no volume with the name volumeName exists in the list of managed 78 // volumes under the specified node, this is a no-op. 79 // If after deleting the pod, the specified volume contains no other child 80 // pods, the volume is also deleted. 81 DeletePod(podName types.UniquePodName, volumeName v1.UniqueVolumeName, nodeName k8stypes.NodeName) 82 83 // NodeExists returns true if the node with the specified name exists in 84 // the list of nodes managed by the attach/detach controller. 85 NodeExists(nodeName k8stypes.NodeName) bool 86 87 // VolumeExists returns true if the volume with the specified name exists 88 // in the list of volumes that should be attached to the specified node by 89 // the attach detach controller. 90 VolumeExists(volumeName v1.UniqueVolumeName, nodeName k8stypes.NodeName) bool 91 92 // GetVolumesToAttach generates and returns a list of volumes to attach 93 // and the nodes they should be attached to based on the current desired 94 // state of the world. 95 GetVolumesToAttach() []VolumeToAttach 96 97 // GetPodToAdd generates and returns a map of pods based on the current desired 98 // state of world 99 GetPodToAdd() map[types.UniquePodName]PodToAdd 100 101 // GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be 102 // mounted and attached for terminated pods 103 GetKeepTerminatedPodVolumesForNode(k8stypes.NodeName) bool 104 105 // Mark multi-attach error as reported to prevent spamming multiple 106 // events for same error 107 SetMultiAttachError(v1.UniqueVolumeName, k8stypes.NodeName) 108 109 // GetPodsOnNodes returns list of pods ("namespace/name") that require 110 // given volume on given nodes. 111 GetVolumePodsOnNodes(nodes []k8stypes.NodeName, volumeName v1.UniqueVolumeName) []*v1.Pod 112 } 113 114 // VolumeToAttach represents a volume that should be attached to a node. 115 type VolumeToAttach struct { 116 operationexecutor.VolumeToAttach 117 } 118 119 // PodToAdd represents a pod that references the underlying volume and is 120 // scheduled to the underlying node. 121 type PodToAdd struct { 122 // pod contains the api object of pod 123 Pod *v1.Pod 124 125 // volumeName contains the unique identifier for this volume. 126 VolumeName v1.UniqueVolumeName 127 128 // nodeName contains the name of this node. 129 NodeName k8stypes.NodeName 130 } 131 132 // NewDesiredStateOfWorld returns a new instance of DesiredStateOfWorld. 133 func NewDesiredStateOfWorld(volumePluginMgr *volume.VolumePluginMgr) DesiredStateOfWorld { 134 return &desiredStateOfWorld{ 135 nodesManaged: make(map[k8stypes.NodeName]nodeManaged), 136 volumePluginMgr: volumePluginMgr, 137 } 138 } 139 140 type desiredStateOfWorld struct { 141 // nodesManaged is a map containing the set of nodes managed by the attach/ 142 // detach controller. The key in this map is the name of the node and the 143 // value is a node object containing more information about the node. 144 nodesManaged map[k8stypes.NodeName]nodeManaged 145 // volumePluginMgr is the volume plugin manager used to create volume 146 // plugin objects. 147 volumePluginMgr *volume.VolumePluginMgr 148 sync.RWMutex 149 } 150 151 // nodeManaged represents a node that is being managed by the attach/detach 152 // controller. 153 type nodeManaged struct { 154 // nodeName contains the name of this node. 155 nodeName k8stypes.NodeName 156 157 // volumesToAttach is a map containing the set of volumes that should be 158 // attached to this node. The key in the map is the name of the volume and 159 // the value is a volumeToAttach object containing more information about the volume. 160 volumesToAttach map[v1.UniqueVolumeName]volumeToAttach 161 162 // keepTerminatedPodVolumes determines if for terminated pods(on this node) - volumes 163 // should be kept mounted and attached. 164 keepTerminatedPodVolumes bool 165 } 166 167 // The volumeToAttach object represents a volume that should be attached to a node. 168 type volumeToAttach struct { 169 // multiAttachErrorReported indicates whether the multi-attach error has been reported for the given volume. 170 // It is used to prevent reporting the error from being reported more than once for a given volume. 171 multiAttachErrorReported bool 172 173 // volumeName contains the unique identifier for this volume. 174 volumeName v1.UniqueVolumeName 175 176 // spec is the volume spec containing the specification for this volume. 177 // Used to generate the volume plugin object, and passed to attach/detach 178 // methods. 179 spec *volume.Spec 180 181 // scheduledPods is a map containing the set of pods that reference this 182 // volume and are scheduled to the underlying node. The key in the map is 183 // the name of the pod and the value is a pod object containing more 184 // information about the pod. 185 scheduledPods map[types.UniquePodName]pod 186 } 187 188 // The pod represents a pod that references the underlying volume and is 189 // scheduled to the underlying node. 190 type pod struct { 191 // podName contains the unique identifier for this pod 192 podName types.UniquePodName 193 194 // pod object contains the api object of pod 195 podObj *v1.Pod 196 } 197 198 func (dsw *desiredStateOfWorld) AddNode(nodeName k8stypes.NodeName, keepTerminatedPodVolumes bool) { 199 dsw.Lock() 200 defer dsw.Unlock() 201 202 if _, nodeExists := dsw.nodesManaged[nodeName]; !nodeExists { 203 dsw.nodesManaged[nodeName] = nodeManaged{ 204 nodeName: nodeName, 205 volumesToAttach: make(map[v1.UniqueVolumeName]volumeToAttach), 206 keepTerminatedPodVolumes: keepTerminatedPodVolumes, 207 } 208 } 209 } 210 211 func (dsw *desiredStateOfWorld) AddPod( 212 podName types.UniquePodName, 213 podToAdd *v1.Pod, 214 volumeSpec *volume.Spec, 215 nodeName k8stypes.NodeName) (v1.UniqueVolumeName, error) { 216 dsw.Lock() 217 defer dsw.Unlock() 218 219 nodeObj, nodeExists := dsw.nodesManaged[nodeName] 220 if !nodeExists { 221 return "", fmt.Errorf( 222 "no node with the name %q exists in the list of managed nodes", 223 nodeName) 224 } 225 226 attachableVolumePlugin, err := dsw.volumePluginMgr.FindAttachablePluginBySpec(volumeSpec) 227 if err != nil || attachableVolumePlugin == nil { 228 if attachableVolumePlugin == nil { 229 err = fmt.Errorf("plugin do not support attachment") 230 } 231 return "", fmt.Errorf( 232 "failed to get AttachablePlugin from volumeSpec for volume %q err=%v", 233 volumeSpec.Name(), 234 err) 235 } 236 237 volumeName, err := util.GetUniqueVolumeNameFromSpec( 238 attachableVolumePlugin, volumeSpec) 239 if err != nil { 240 return "", fmt.Errorf( 241 "failed to get UniqueVolumeName from volumeSpec for plugin=%q and volume=%q err=%v", 242 attachableVolumePlugin.GetPluginName(), 243 volumeSpec.Name(), 244 err) 245 } 246 247 volumeObj, volumeExists := nodeObj.volumesToAttach[volumeName] 248 if !volumeExists { 249 volumeObj = volumeToAttach{ 250 multiAttachErrorReported: false, 251 volumeName: volumeName, 252 spec: volumeSpec, 253 scheduledPods: make(map[types.UniquePodName]pod), 254 } 255 dsw.nodesManaged[nodeName].volumesToAttach[volumeName] = volumeObj 256 } 257 if _, podExists := volumeObj.scheduledPods[podName]; !podExists { 258 dsw.nodesManaged[nodeName].volumesToAttach[volumeName].scheduledPods[podName] = 259 pod{ 260 podName: podName, 261 podObj: podToAdd, 262 } 263 } 264 265 return volumeName, nil 266 } 267 268 func (dsw *desiredStateOfWorld) DeleteNode(nodeName k8stypes.NodeName) error { 269 dsw.Lock() 270 defer dsw.Unlock() 271 272 nodeObj, nodeExists := dsw.nodesManaged[nodeName] 273 if !nodeExists { 274 return nil 275 } 276 277 if len(nodeObj.volumesToAttach) > 0 { 278 return fmt.Errorf( 279 "failed to delete node %q from list of nodes managed by attach/detach controller--the node still contains %v volumes in its list of volumes to attach", 280 nodeName, 281 len(nodeObj.volumesToAttach)) 282 } 283 284 delete( 285 dsw.nodesManaged, 286 nodeName) 287 return nil 288 } 289 290 func (dsw *desiredStateOfWorld) DeletePod( 291 podName types.UniquePodName, 292 volumeName v1.UniqueVolumeName, 293 nodeName k8stypes.NodeName) { 294 dsw.Lock() 295 defer dsw.Unlock() 296 297 nodeObj, nodeExists := dsw.nodesManaged[nodeName] 298 if !nodeExists { 299 return 300 } 301 302 volumeObj, volumeExists := nodeObj.volumesToAttach[volumeName] 303 if !volumeExists { 304 return 305 } 306 if _, podExists := volumeObj.scheduledPods[podName]; !podExists { 307 return 308 } 309 310 delete( 311 dsw.nodesManaged[nodeName].volumesToAttach[volumeName].scheduledPods, 312 podName) 313 314 if len(volumeObj.scheduledPods) == 0 { 315 delete( 316 dsw.nodesManaged[nodeName].volumesToAttach, 317 volumeName) 318 } 319 } 320 321 func (dsw *desiredStateOfWorld) NodeExists(nodeName k8stypes.NodeName) bool { 322 dsw.RLock() 323 defer dsw.RUnlock() 324 325 _, nodeExists := dsw.nodesManaged[nodeName] 326 return nodeExists 327 } 328 329 func (dsw *desiredStateOfWorld) VolumeExists( 330 volumeName v1.UniqueVolumeName, nodeName k8stypes.NodeName) bool { 331 dsw.RLock() 332 defer dsw.RUnlock() 333 334 nodeObj, nodeExists := dsw.nodesManaged[nodeName] 335 if nodeExists { 336 if _, volumeExists := nodeObj.volumesToAttach[volumeName]; volumeExists { 337 return true 338 } 339 } 340 341 return false 342 } 343 344 func (dsw *desiredStateOfWorld) SetMultiAttachError( 345 volumeName v1.UniqueVolumeName, 346 nodeName k8stypes.NodeName) { 347 dsw.Lock() 348 defer dsw.Unlock() 349 350 nodeObj, nodeExists := dsw.nodesManaged[nodeName] 351 if nodeExists { 352 if volumeObj, volumeExists := nodeObj.volumesToAttach[volumeName]; volumeExists { 353 volumeObj.multiAttachErrorReported = true 354 dsw.nodesManaged[nodeName].volumesToAttach[volumeName] = volumeObj 355 } 356 } 357 } 358 359 // GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be 360 // mounted and attached for terminated pods 361 func (dsw *desiredStateOfWorld) GetKeepTerminatedPodVolumesForNode(nodeName k8stypes.NodeName) bool { 362 dsw.RLock() 363 defer dsw.RUnlock() 364 365 if nodeName == "" { 366 return false 367 } 368 if node, ok := dsw.nodesManaged[nodeName]; ok { 369 return node.keepTerminatedPodVolumes 370 } 371 return false 372 } 373 374 func (dsw *desiredStateOfWorld) GetVolumesToAttach() []VolumeToAttach { 375 dsw.RLock() 376 defer dsw.RUnlock() 377 378 volumesToAttach := make([]VolumeToAttach, 0 /* len */, len(dsw.nodesManaged) /* cap */) 379 for nodeName, nodeObj := range dsw.nodesManaged { 380 for volumeName, volumeObj := range nodeObj.volumesToAttach { 381 volumesToAttach = append(volumesToAttach, 382 VolumeToAttach{ 383 VolumeToAttach: operationexecutor.VolumeToAttach{ 384 MultiAttachErrorReported: volumeObj.multiAttachErrorReported, 385 VolumeName: volumeName, 386 VolumeSpec: volumeObj.spec, 387 NodeName: nodeName, 388 ScheduledPods: getPodsFromMap(volumeObj.scheduledPods), 389 }}) 390 } 391 } 392 393 return volumesToAttach 394 } 395 396 // Construct a list of v1.Pod objects from the given pod map 397 func getPodsFromMap(podMap map[types.UniquePodName]pod) []*v1.Pod { 398 pods := make([]*v1.Pod, 0, len(podMap)) 399 for _, pod := range podMap { 400 pods = append(pods, pod.podObj) 401 } 402 return pods 403 } 404 405 func (dsw *desiredStateOfWorld) GetPodToAdd() map[types.UniquePodName]PodToAdd { 406 dsw.RLock() 407 defer dsw.RUnlock() 408 409 pods := make(map[types.UniquePodName]PodToAdd) 410 for nodeName, nodeObj := range dsw.nodesManaged { 411 for volumeName, volumeObj := range nodeObj.volumesToAttach { 412 for podUID, pod := range volumeObj.scheduledPods { 413 pods[podUID] = PodToAdd{ 414 Pod: pod.podObj, 415 VolumeName: volumeName, 416 NodeName: nodeName, 417 } 418 } 419 } 420 } 421 return pods 422 } 423 424 func (dsw *desiredStateOfWorld) GetVolumePodsOnNodes(nodes []k8stypes.NodeName, volumeName v1.UniqueVolumeName) []*v1.Pod { 425 dsw.RLock() 426 defer dsw.RUnlock() 427 428 pods := []*v1.Pod{} 429 for _, nodeName := range nodes { 430 node, ok := dsw.nodesManaged[nodeName] 431 if !ok { 432 continue 433 } 434 volume, ok := node.volumesToAttach[volumeName] 435 if !ok { 436 continue 437 } 438 for _, pod := range volume.scheduledPods { 439 pods = append(pods, pod.podObj) 440 } 441 } 442 return pods 443 }