k8s.io/kubernetes@v1.29.3/pkg/controller/volume/attachdetach/cache/desired_state_of_world.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Package cache implements data structures used by the attach/detach controller
    19  to keep track of volumes, the nodes they are attached to, and the pods that
    20  reference them.
    21  */
    22  package cache
    23  
    24  import (
    25  	"fmt"
    26  	"sync"
    27  
    28  	"k8s.io/api/core/v1"
    29  	k8stypes "k8s.io/apimachinery/pkg/types"
    30  	"k8s.io/kubernetes/pkg/volume"
    31  	"k8s.io/kubernetes/pkg/volume/util"
    32  	"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
    33  	"k8s.io/kubernetes/pkg/volume/util/types"
    34  )
    35  
    36  // DesiredStateOfWorld defines a set of thread-safe operations supported on
    37  // the attach/detach controller's desired state of the world cache.
    38  // This cache contains nodes->volumes->pods where nodes are all the nodes
    39  // managed by the attach/detach controller, volumes are all the volumes that
    40  // should be attached to the specified node, and pods are the pods that
    41  // reference the volume and are scheduled to that node.
    42  // Note: This is distinct from the DesiredStateOfWorld implemented by the
    43  // kubelet volume manager. They both keep track of different objects. This
    44  // contains attach/detach controller specific state.
    45  type DesiredStateOfWorld interface {
    46  	// AddNode adds the given node to the list of nodes managed by the attach/
    47  	// detach controller.
    48  	// If the node already exists this is a no-op.
    49  	// keepTerminatedPodVolumes is a property of the node that determines
    50  	// if volumes should be mounted and attached for terminated pods.
    51  	AddNode(nodeName k8stypes.NodeName, keepTerminatedPodVolumes bool)
    52  
    53  	// AddPod adds the given pod to the list of pods that reference the
    54  	// specified volume and is scheduled to the specified node.
    55  	// A unique volumeName is generated from the volumeSpec and returned on
    56  	// success.
    57  	// If the pod already exists under the specified volume, this is a no-op.
    58  	// If volumeSpec is not an attachable volume plugin, an error is returned.
    59  	// If no volume with the name volumeName exists in the list of volumes that
    60  	// should be attached to the specified node, the volume is implicitly added.
    61  	// If no node with the name nodeName exists in list of nodes managed by the
    62  	// attach/detach attached controller, an error is returned.
    63  	AddPod(podName types.UniquePodName, pod *v1.Pod, volumeSpec *volume.Spec, nodeName k8stypes.NodeName) (v1.UniqueVolumeName, error)
    64  
    65  	// DeleteNode removes the given node from the list of nodes managed by the
    66  	// attach/detach controller.
    67  	// If the node does not exist this is a no-op.
    68  	// If the node exists but has 1 or more child volumes, an error is returned.
    69  	DeleteNode(nodeName k8stypes.NodeName) error
    70  
    71  	// DeletePod removes the given pod from the list of pods that reference the
    72  	// specified volume and are scheduled to the specified node.
    73  	// If no pod exists in the list of pods that reference the specified volume
    74  	// and are scheduled to the specified node, this is a no-op.
    75  	// If a node with the name nodeName does not exist in the list of nodes
    76  	// managed by the attach/detach attached controller, this is a no-op.
    77  	// If no volume with the name volumeName exists in the list of managed
    78  	// volumes under the specified node, this is a no-op.
    79  	// If after deleting the pod, the specified volume contains no other child
    80  	// pods, the volume is also deleted.
    81  	DeletePod(podName types.UniquePodName, volumeName v1.UniqueVolumeName, nodeName k8stypes.NodeName)
    82  
    83  	// NodeExists returns true if the node with the specified name exists in
    84  	// the list of nodes managed by the attach/detach controller.
    85  	NodeExists(nodeName k8stypes.NodeName) bool
    86  
    87  	// VolumeExists returns true if the volume with the specified name exists
    88  	// in the list of volumes that should be attached to the specified node by
    89  	// the attach detach controller.
    90  	VolumeExists(volumeName v1.UniqueVolumeName, nodeName k8stypes.NodeName) bool
    91  
    92  	// GetVolumesToAttach generates and returns a list of volumes to attach
    93  	// and the nodes they should be attached to based on the current desired
    94  	// state of the world.
    95  	GetVolumesToAttach() []VolumeToAttach
    96  
    97  	// GetPodToAdd generates and returns a map of pods based on the current desired
    98  	// state of world
    99  	GetPodToAdd() map[types.UniquePodName]PodToAdd
   100  
   101  	// GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be
   102  	// mounted and attached for terminated pods
   103  	GetKeepTerminatedPodVolumesForNode(k8stypes.NodeName) bool
   104  
   105  	// Mark multi-attach error as reported to prevent spamming multiple
   106  	// events for same error
   107  	SetMultiAttachError(v1.UniqueVolumeName, k8stypes.NodeName)
   108  
   109  	// GetPodsOnNodes returns list of pods ("namespace/name") that require
   110  	// given volume on given nodes.
   111  	GetVolumePodsOnNodes(nodes []k8stypes.NodeName, volumeName v1.UniqueVolumeName) []*v1.Pod
   112  }
   113  
   114  // VolumeToAttach represents a volume that should be attached to a node.
   115  type VolumeToAttach struct {
   116  	operationexecutor.VolumeToAttach
   117  }
   118  
   119  // PodToAdd represents a pod that references the underlying volume and is
   120  // scheduled to the underlying node.
   121  type PodToAdd struct {
   122  	// pod contains the api object of pod
   123  	Pod *v1.Pod
   124  
   125  	// volumeName contains the unique identifier for this volume.
   126  	VolumeName v1.UniqueVolumeName
   127  
   128  	// nodeName contains the name of this node.
   129  	NodeName k8stypes.NodeName
   130  }
   131  
   132  // NewDesiredStateOfWorld returns a new instance of DesiredStateOfWorld.
   133  func NewDesiredStateOfWorld(volumePluginMgr *volume.VolumePluginMgr) DesiredStateOfWorld {
   134  	return &desiredStateOfWorld{
   135  		nodesManaged:    make(map[k8stypes.NodeName]nodeManaged),
   136  		volumePluginMgr: volumePluginMgr,
   137  	}
   138  }
   139  
   140  type desiredStateOfWorld struct {
   141  	// nodesManaged is a map containing the set of nodes managed by the attach/
   142  	// detach controller. The key in this map is the name of the node and the
   143  	// value is a node object containing more information about the node.
   144  	nodesManaged map[k8stypes.NodeName]nodeManaged
   145  	// volumePluginMgr is the volume plugin manager used to create volume
   146  	// plugin objects.
   147  	volumePluginMgr *volume.VolumePluginMgr
   148  	sync.RWMutex
   149  }
   150  
   151  // nodeManaged represents a node that is being managed by the attach/detach
   152  // controller.
   153  type nodeManaged struct {
   154  	// nodeName contains the name of this node.
   155  	nodeName k8stypes.NodeName
   156  
   157  	// volumesToAttach is a map containing the set of volumes that should be
   158  	// attached to this node. The key in the map is the name of the volume and
   159  	// the value is a volumeToAttach object containing more information about the volume.
   160  	volumesToAttach map[v1.UniqueVolumeName]volumeToAttach
   161  
   162  	// keepTerminatedPodVolumes determines if for terminated pods(on this node) - volumes
   163  	// should be kept mounted and attached.
   164  	keepTerminatedPodVolumes bool
   165  }
   166  
   167  // The volumeToAttach object represents a volume that should be attached to a node.
   168  type volumeToAttach struct {
   169  	// multiAttachErrorReported indicates whether the multi-attach error has been reported for the given volume.
   170  	// It is used to prevent reporting the error from being reported more than once for a given volume.
   171  	multiAttachErrorReported bool
   172  
   173  	// volumeName contains the unique identifier for this volume.
   174  	volumeName v1.UniqueVolumeName
   175  
   176  	// spec is the volume spec containing the specification for this volume.
   177  	// Used to generate the volume plugin object, and passed to attach/detach
   178  	// methods.
   179  	spec *volume.Spec
   180  
   181  	// scheduledPods is a map containing the set of pods that reference this
   182  	// volume and are scheduled to the underlying node. The key in the map is
   183  	// the name of the pod and the value is a pod object containing more
   184  	// information about the pod.
   185  	scheduledPods map[types.UniquePodName]pod
   186  }
   187  
   188  // The pod represents a pod that references the underlying volume and is
   189  // scheduled to the underlying node.
   190  type pod struct {
   191  	// podName contains the unique identifier for this pod
   192  	podName types.UniquePodName
   193  
   194  	// pod object contains the api object of pod
   195  	podObj *v1.Pod
   196  }
   197  
   198  func (dsw *desiredStateOfWorld) AddNode(nodeName k8stypes.NodeName, keepTerminatedPodVolumes bool) {
   199  	dsw.Lock()
   200  	defer dsw.Unlock()
   201  
   202  	if _, nodeExists := dsw.nodesManaged[nodeName]; !nodeExists {
   203  		dsw.nodesManaged[nodeName] = nodeManaged{
   204  			nodeName:                 nodeName,
   205  			volumesToAttach:          make(map[v1.UniqueVolumeName]volumeToAttach),
   206  			keepTerminatedPodVolumes: keepTerminatedPodVolumes,
   207  		}
   208  	}
   209  }
   210  
   211  func (dsw *desiredStateOfWorld) AddPod(
   212  	podName types.UniquePodName,
   213  	podToAdd *v1.Pod,
   214  	volumeSpec *volume.Spec,
   215  	nodeName k8stypes.NodeName) (v1.UniqueVolumeName, error) {
   216  	dsw.Lock()
   217  	defer dsw.Unlock()
   218  
   219  	nodeObj, nodeExists := dsw.nodesManaged[nodeName]
   220  	if !nodeExists {
   221  		return "", fmt.Errorf(
   222  			"no node with the name %q exists in the list of managed nodes",
   223  			nodeName)
   224  	}
   225  
   226  	attachableVolumePlugin, err := dsw.volumePluginMgr.FindAttachablePluginBySpec(volumeSpec)
   227  	if err != nil || attachableVolumePlugin == nil {
   228  		if attachableVolumePlugin == nil {
   229  			err = fmt.Errorf("plugin do not support attachment")
   230  		}
   231  		return "", fmt.Errorf(
   232  			"failed to get AttachablePlugin from volumeSpec for volume %q err=%v",
   233  			volumeSpec.Name(),
   234  			err)
   235  	}
   236  
   237  	volumeName, err := util.GetUniqueVolumeNameFromSpec(
   238  		attachableVolumePlugin, volumeSpec)
   239  	if err != nil {
   240  		return "", fmt.Errorf(
   241  			"failed to get UniqueVolumeName from volumeSpec for plugin=%q and volume=%q err=%v",
   242  			attachableVolumePlugin.GetPluginName(),
   243  			volumeSpec.Name(),
   244  			err)
   245  	}
   246  
   247  	volumeObj, volumeExists := nodeObj.volumesToAttach[volumeName]
   248  	if !volumeExists {
   249  		volumeObj = volumeToAttach{
   250  			multiAttachErrorReported: false,
   251  			volumeName:               volumeName,
   252  			spec:                     volumeSpec,
   253  			scheduledPods:            make(map[types.UniquePodName]pod),
   254  		}
   255  		dsw.nodesManaged[nodeName].volumesToAttach[volumeName] = volumeObj
   256  	}
   257  	if _, podExists := volumeObj.scheduledPods[podName]; !podExists {
   258  		dsw.nodesManaged[nodeName].volumesToAttach[volumeName].scheduledPods[podName] =
   259  			pod{
   260  				podName: podName,
   261  				podObj:  podToAdd,
   262  			}
   263  	}
   264  
   265  	return volumeName, nil
   266  }
   267  
   268  func (dsw *desiredStateOfWorld) DeleteNode(nodeName k8stypes.NodeName) error {
   269  	dsw.Lock()
   270  	defer dsw.Unlock()
   271  
   272  	nodeObj, nodeExists := dsw.nodesManaged[nodeName]
   273  	if !nodeExists {
   274  		return nil
   275  	}
   276  
   277  	if len(nodeObj.volumesToAttach) > 0 {
   278  		return fmt.Errorf(
   279  			"failed to delete node %q from list of nodes managed by attach/detach controller--the node still contains %v volumes in its list of volumes to attach",
   280  			nodeName,
   281  			len(nodeObj.volumesToAttach))
   282  	}
   283  
   284  	delete(
   285  		dsw.nodesManaged,
   286  		nodeName)
   287  	return nil
   288  }
   289  
   290  func (dsw *desiredStateOfWorld) DeletePod(
   291  	podName types.UniquePodName,
   292  	volumeName v1.UniqueVolumeName,
   293  	nodeName k8stypes.NodeName) {
   294  	dsw.Lock()
   295  	defer dsw.Unlock()
   296  
   297  	nodeObj, nodeExists := dsw.nodesManaged[nodeName]
   298  	if !nodeExists {
   299  		return
   300  	}
   301  
   302  	volumeObj, volumeExists := nodeObj.volumesToAttach[volumeName]
   303  	if !volumeExists {
   304  		return
   305  	}
   306  	if _, podExists := volumeObj.scheduledPods[podName]; !podExists {
   307  		return
   308  	}
   309  
   310  	delete(
   311  		dsw.nodesManaged[nodeName].volumesToAttach[volumeName].scheduledPods,
   312  		podName)
   313  
   314  	if len(volumeObj.scheduledPods) == 0 {
   315  		delete(
   316  			dsw.nodesManaged[nodeName].volumesToAttach,
   317  			volumeName)
   318  	}
   319  }
   320  
   321  func (dsw *desiredStateOfWorld) NodeExists(nodeName k8stypes.NodeName) bool {
   322  	dsw.RLock()
   323  	defer dsw.RUnlock()
   324  
   325  	_, nodeExists := dsw.nodesManaged[nodeName]
   326  	return nodeExists
   327  }
   328  
   329  func (dsw *desiredStateOfWorld) VolumeExists(
   330  	volumeName v1.UniqueVolumeName, nodeName k8stypes.NodeName) bool {
   331  	dsw.RLock()
   332  	defer dsw.RUnlock()
   333  
   334  	nodeObj, nodeExists := dsw.nodesManaged[nodeName]
   335  	if nodeExists {
   336  		if _, volumeExists := nodeObj.volumesToAttach[volumeName]; volumeExists {
   337  			return true
   338  		}
   339  	}
   340  
   341  	return false
   342  }
   343  
   344  func (dsw *desiredStateOfWorld) SetMultiAttachError(
   345  	volumeName v1.UniqueVolumeName,
   346  	nodeName k8stypes.NodeName) {
   347  	dsw.Lock()
   348  	defer dsw.Unlock()
   349  
   350  	nodeObj, nodeExists := dsw.nodesManaged[nodeName]
   351  	if nodeExists {
   352  		if volumeObj, volumeExists := nodeObj.volumesToAttach[volumeName]; volumeExists {
   353  			volumeObj.multiAttachErrorReported = true
   354  			dsw.nodesManaged[nodeName].volumesToAttach[volumeName] = volumeObj
   355  		}
   356  	}
   357  }
   358  
   359  // GetKeepTerminatedPodVolumesForNode determines if node wants volumes to be
   360  // mounted and attached for terminated pods
   361  func (dsw *desiredStateOfWorld) GetKeepTerminatedPodVolumesForNode(nodeName k8stypes.NodeName) bool {
   362  	dsw.RLock()
   363  	defer dsw.RUnlock()
   364  
   365  	if nodeName == "" {
   366  		return false
   367  	}
   368  	if node, ok := dsw.nodesManaged[nodeName]; ok {
   369  		return node.keepTerminatedPodVolumes
   370  	}
   371  	return false
   372  }
   373  
   374  func (dsw *desiredStateOfWorld) GetVolumesToAttach() []VolumeToAttach {
   375  	dsw.RLock()
   376  	defer dsw.RUnlock()
   377  
   378  	volumesToAttach := make([]VolumeToAttach, 0 /* len */, len(dsw.nodesManaged) /* cap */)
   379  	for nodeName, nodeObj := range dsw.nodesManaged {
   380  		for volumeName, volumeObj := range nodeObj.volumesToAttach {
   381  			volumesToAttach = append(volumesToAttach,
   382  				VolumeToAttach{
   383  					VolumeToAttach: operationexecutor.VolumeToAttach{
   384  						MultiAttachErrorReported: volumeObj.multiAttachErrorReported,
   385  						VolumeName:               volumeName,
   386  						VolumeSpec:               volumeObj.spec,
   387  						NodeName:                 nodeName,
   388  						ScheduledPods:            getPodsFromMap(volumeObj.scheduledPods),
   389  					}})
   390  		}
   391  	}
   392  
   393  	return volumesToAttach
   394  }
   395  
   396  // Construct a list of v1.Pod objects from the given pod map
   397  func getPodsFromMap(podMap map[types.UniquePodName]pod) []*v1.Pod {
   398  	pods := make([]*v1.Pod, 0, len(podMap))
   399  	for _, pod := range podMap {
   400  		pods = append(pods, pod.podObj)
   401  	}
   402  	return pods
   403  }
   404  
   405  func (dsw *desiredStateOfWorld) GetPodToAdd() map[types.UniquePodName]PodToAdd {
   406  	dsw.RLock()
   407  	defer dsw.RUnlock()
   408  
   409  	pods := make(map[types.UniquePodName]PodToAdd)
   410  	for nodeName, nodeObj := range dsw.nodesManaged {
   411  		for volumeName, volumeObj := range nodeObj.volumesToAttach {
   412  			for podUID, pod := range volumeObj.scheduledPods {
   413  				pods[podUID] = PodToAdd{
   414  					Pod:        pod.podObj,
   415  					VolumeName: volumeName,
   416  					NodeName:   nodeName,
   417  				}
   418  			}
   419  		}
   420  	}
   421  	return pods
   422  }
   423  
   424  func (dsw *desiredStateOfWorld) GetVolumePodsOnNodes(nodes []k8stypes.NodeName, volumeName v1.UniqueVolumeName) []*v1.Pod {
   425  	dsw.RLock()
   426  	defer dsw.RUnlock()
   427  
   428  	pods := []*v1.Pod{}
   429  	for _, nodeName := range nodes {
   430  		node, ok := dsw.nodesManaged[nodeName]
   431  		if !ok {
   432  			continue
   433  		}
   434  		volume, ok := node.volumesToAttach[volumeName]
   435  		if !ok {
   436  			continue
   437  		}
   438  		for _, pod := range volume.scheduledPods {
   439  			pods = append(pods, pod.podObj)
   440  		}
   441  	}
   442  	return pods
   443  }