k8s.io/kubernetes@v1.29.3/pkg/kubelet/volumemanager/cache/desired_state_of_world.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Package cache implements data structures used by the kubelet volume manager to
    19  keep track of attached volumes and the pods that mounted them.
    20  */
    21  package cache
    22  
    23  import (
    24  	"fmt"
    25  	"sync"
    26  	"time"
    27  
    28  	v1 "k8s.io/api/core/v1"
    29  	"k8s.io/apimachinery/pkg/api/resource"
    30  	"k8s.io/apimachinery/pkg/util/sets"
    31  	"k8s.io/apiserver/pkg/util/feature"
    32  	"k8s.io/component-base/metrics"
    33  	"k8s.io/klog/v2"
    34  	"k8s.io/kubernetes/pkg/volume/csi"
    35  
    36  	resourcehelper "k8s.io/kubernetes/pkg/api/v1/resource"
    37  	"k8s.io/kubernetes/pkg/features"
    38  	"k8s.io/kubernetes/pkg/volume"
    39  	"k8s.io/kubernetes/pkg/volume/util"
    40  	"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
    41  	"k8s.io/kubernetes/pkg/volume/util/types"
    42  )
    43  
    44  // DesiredStateOfWorld defines a set of thread-safe operations for the kubelet
    45  // volume manager's desired state of the world cache.
    46  // This cache contains volumes->pods i.e. a set of all volumes that should be
    47  // attached to this node and the pods that reference them and should mount the
    48  // volume.
    49  // Note: This is distinct from the DesiredStateOfWorld implemented by the
    50  // attach/detach controller. They both keep track of different objects. This
    51  // contains kubelet volume manager specific state.
    52  type DesiredStateOfWorld interface {
    53  	// AddPodToVolume adds the given pod to the given volume in the cache
    54  	// indicating the specified pod should mount the specified volume.
    55  	// A unique volumeName is generated from the volumeSpec and returned on
    56  	// success.
    57  	// If no volume plugin can support the given volumeSpec or more than one
    58  	// plugin can support it, an error is returned.
    59  	// If a volume with the name volumeName does not exist in the list of
    60  	// volumes that should be attached to this node, the volume is implicitly
    61  	// added.
    62  	// If a pod with the same unique name already exists under the specified
    63  	// volume, this is a no-op.
    64  	AddPodToVolume(podName types.UniquePodName, pod *v1.Pod, volumeSpec *volume.Spec, outerVolumeSpecName string, volumeGidValue string, seLinuxContainerContexts []*v1.SELinuxOptions) (v1.UniqueVolumeName, error)
    65  
    66  	// MarkVolumesReportedInUse sets the ReportedInUse value to true for the
    67  	// reportedVolumes. For volumes not in the reportedVolumes list, the
    68  	// ReportedInUse value is reset to false. The default ReportedInUse value
    69  	// for a newly created volume is false.
    70  	// When set to true this value indicates that the volume was successfully
    71  	// added to the VolumesInUse field in the node's status. Mount operation needs
    72  	// to check this value before issuing the operation.
    73  	// If a volume in the reportedVolumes list does not exist in the list of
    74  	// volumes that should be attached to this node, it is skipped without error.
    75  	MarkVolumesReportedInUse(reportedVolumes []v1.UniqueVolumeName)
    76  
    77  	// DeletePodFromVolume removes the given pod from the given volume in the
    78  	// cache indicating the specified pod no longer requires the specified
    79  	// volume.
    80  	// If a pod with the same unique name does not exist under the specified
    81  	// volume, this is a no-op.
    82  	// If a volume with the name volumeName does not exist in the list of
    83  	// attached volumes, this is a no-op.
    84  	// If after deleting the pod, the specified volume contains no other child
    85  	// pods, the volume is also deleted.
    86  	DeletePodFromVolume(podName types.UniquePodName, volumeName v1.UniqueVolumeName)
    87  
    88  	// VolumeExists returns true if the given volume exists in the list of
    89  	// volumes that should be attached to this node.
    90  	// If a pod with the same unique name does not exist under the specified
    91  	// volume, false is returned.
    92  	VolumeExists(volumeName v1.UniqueVolumeName, seLinuxMountContext string) bool
    93  
    94  	// PodExistsInVolume returns true if the given pod exists in the list of
    95  	// podsToMount for the given volume in the cache.
    96  	// If a pod with the same unique name does not exist under the specified
    97  	// volume, false is returned.
    98  	// If a volume with the name volumeName does not exist in the list of
    99  	// attached volumes, false is returned.
   100  	PodExistsInVolume(podName types.UniquePodName, volumeName v1.UniqueVolumeName, seLinuxMountContext string) bool
   101  
   102  	// GetVolumesToMount generates and returns a list of volumes that should be
   103  	// attached to this node and the pods they should be mounted to based on the
   104  	// current desired state of the world.
   105  	GetVolumesToMount() []VolumeToMount
   106  
   107  	// GetPods generates and returns a map of pods in which map is indexed
   108  	// with pod's unique name. This map can be used to determine which pod is currently
   109  	// in desired state of world.
   110  	GetPods() map[types.UniquePodName]bool
   111  
   112  	// VolumeExistsWithSpecName returns true if the given volume specified with the
   113  	// volume spec name (a.k.a., InnerVolumeSpecName) exists in the list of
   114  	// volumes that should be attached to this node.
   115  	// If a pod with the same name does not exist under the specified
   116  	// volume, false is returned.
   117  	VolumeExistsWithSpecName(podName types.UniquePodName, volumeSpecName string) bool
   118  
   119  	// AddErrorToPod adds the given error to the given pod in the cache.
   120  	// It will be returned by subsequent GetPodErrors().
   121  	// Each error string is stored only once.
   122  	AddErrorToPod(podName types.UniquePodName, err string)
   123  
   124  	// PopPodErrors returns accumulated errors on a given pod and clears
   125  	// them.
   126  	PopPodErrors(podName types.UniquePodName) []string
   127  
   128  	// GetPodsWithErrors returns names of pods that have stored errors.
   129  	GetPodsWithErrors() []types.UniquePodName
   130  
   131  	// MarkVolumeAttachability updates the volume's attachability for a given volume
   132  	MarkVolumeAttachability(volumeName v1.UniqueVolumeName, attachable bool)
   133  
   134  	// UpdatePersistentVolumeSize updates persistentVolumeSize in desired state of the world
   135  	// so as it can be compared against actual size and volume expansion performed
   136  	// if necessary
   137  	UpdatePersistentVolumeSize(volumeName v1.UniqueVolumeName, size *resource.Quantity)
   138  }
   139  
   140  // VolumeToMount represents a volume that is attached to this node and needs to
   141  // be mounted to PodName.
   142  type VolumeToMount struct {
   143  	operationexecutor.VolumeToMount
   144  }
   145  
   146  // NewDesiredStateOfWorld returns a new instance of DesiredStateOfWorld.
   147  func NewDesiredStateOfWorld(volumePluginMgr *volume.VolumePluginMgr, seLinuxTranslator util.SELinuxLabelTranslator) DesiredStateOfWorld {
   148  	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
   149  		registerSELinuxMetrics()
   150  	}
   151  	return &desiredStateOfWorld{
   152  		volumesToMount:    make(map[v1.UniqueVolumeName]volumeToMount),
   153  		volumePluginMgr:   volumePluginMgr,
   154  		podErrors:         make(map[types.UniquePodName]sets.String),
   155  		seLinuxTranslator: seLinuxTranslator,
   156  	}
   157  }
   158  
   159  type desiredStateOfWorld struct {
   160  	// volumesToMount is a map containing the set of volumes that should be
   161  	// attached to this node and mounted to the pods referencing it. The key in
   162  	// the map is the name of the volume and the value is a volume object
   163  	// containing more information about the volume.
   164  	volumesToMount map[v1.UniqueVolumeName]volumeToMount
   165  	// volumePluginMgr is the volume plugin manager used to create volume
   166  	// plugin objects.
   167  	volumePluginMgr *volume.VolumePluginMgr
   168  	// podErrors are errors caught by desiredStateOfWorldPopulator about volumes for a given pod.
   169  	podErrors map[types.UniquePodName]sets.String
   170  	// seLinuxTranslator translates v1.SELinuxOptions to a file SELinux label.
   171  	seLinuxTranslator util.SELinuxLabelTranslator
   172  
   173  	sync.RWMutex
   174  }
   175  
   176  // The volume object represents a volume that should be attached to this node,
   177  // and mounted to podsToMount.
   178  type volumeToMount struct {
   179  	// volumeName contains the unique identifier for this volume.
   180  	volumeName v1.UniqueVolumeName
   181  
   182  	// podsToMount is a map containing the set of pods that reference this
   183  	// volume and should mount it once it is attached. The key in the map is
   184  	// the name of the pod and the value is a pod object containing more
   185  	// information about the pod.
   186  	podsToMount map[types.UniquePodName]podToMount
   187  
   188  	// pluginIsAttachable indicates that the plugin for this volume implements
   189  	// the volume.Attacher interface
   190  	pluginIsAttachable bool
   191  
   192  	// pluginIsDeviceMountable indicates that the plugin for this volume implements
   193  	// the volume.DeviceMounter interface
   194  	pluginIsDeviceMountable bool
   195  
   196  	// volumeGidValue contains the value of the GID annotation, if present.
   197  	volumeGidValue string
   198  
   199  	// reportedInUse indicates that the volume was successfully added to the
   200  	// VolumesInUse field in the node's status.
   201  	reportedInUse bool
   202  
   203  	// desiredSizeLimit indicates the desired upper bound on the size of the volume
   204  	// (if so implemented)
   205  	desiredSizeLimit *resource.Quantity
   206  
   207  	// persistentVolumeSize records desired size of a persistent volume.
   208  	// Usually this value reflects size recorded in pv.Spec.Capacity
   209  	persistentVolumeSize *resource.Quantity
   210  
   211  	// effectiveSELinuxMountFileLabel is the SELinux label that will be applied to the volume using mount options.
   212  	// If empty, then:
   213  	// - either the context+label is unknown (assigned randomly by the container runtime)
   214  	// - or the volume plugin responsible for this volume does not support mounting with -o context
   215  	// - or the volume is not ReadWriteOncePod
   216  	// - or the OS does not support SELinux
   217  	// In all cases, the SELinux context does not matter when mounting the volume.
   218  	effectiveSELinuxMountFileLabel string
   219  
   220  	// originalSELinuxLabel is the SELinux label that would be used if SELinux mount was supported for all access modes.
   221  	// For RWOP volumes it's the same as effectiveSELinuxMountFileLabel.
   222  	// It is used only to report potential SELinux mismatch metrics.
   223  	// If empty, then:
   224  	// - either the context+label is unknown (assigned randomly by the container runtime)
   225  	// - or the volume plugin responsible for this volume does not support mounting with -o context
   226  	// - or the OS does not support SELinux
   227  	originalSELinuxLabel string
   228  }
   229  
   230  // The pod object represents a pod that references the underlying volume and
   231  // should mount it once it is attached.
   232  type podToMount struct {
   233  	// podName contains the name of this pod.
   234  	podName types.UniquePodName
   235  
   236  	// Pod to mount the volume to. Used to create NewMounter.
   237  	pod *v1.Pod
   238  
   239  	// volume spec containing the specification for this volume. Used to
   240  	// generate the volume plugin object, and passed to plugin methods.
   241  	// For non-PVC volumes this is the same as defined in the pod object. For
   242  	// PVC volumes it is from the dereferenced PV object.
   243  	volumeSpec *volume.Spec
   244  
   245  	// outerVolumeSpecName is the volume.Spec.Name() of the volume as referenced
   246  	// directly in the pod. If the volume was referenced through a persistent
   247  	// volume claim, this contains the volume.Spec.Name() of the persistent
   248  	// volume claim
   249  	outerVolumeSpecName string
   250  	// mountRequestTime stores time at which mount was requested
   251  	mountRequestTime time.Time
   252  }
   253  
   254  const (
   255  	// Maximum errors to be stored per pod in desiredStateOfWorld.podErrors to
   256  	// prevent unbound growth.
   257  	maxPodErrors = 10
   258  )
   259  
   260  func (dsw *desiredStateOfWorld) AddPodToVolume(
   261  	podName types.UniquePodName,
   262  	pod *v1.Pod,
   263  	volumeSpec *volume.Spec,
   264  	outerVolumeSpecName string,
   265  	volumeGidValue string,
   266  	seLinuxContainerContexts []*v1.SELinuxOptions) (v1.UniqueVolumeName, error) {
   267  	dsw.Lock()
   268  	defer dsw.Unlock()
   269  
   270  	volumePlugin, err := dsw.volumePluginMgr.FindPluginBySpec(volumeSpec)
   271  	if err != nil || volumePlugin == nil {
   272  		return "", fmt.Errorf(
   273  			"failed to get Plugin from volumeSpec for volume %q err=%v",
   274  			volumeSpec.Name(),
   275  			err)
   276  	}
   277  	volumePluginName := getVolumePluginNameWithDriver(volumePlugin, volumeSpec)
   278  
   279  	var volumeName v1.UniqueVolumeName
   280  
   281  	// The unique volume name used depends on whether the volume is attachable/device-mountable
   282  	// or not.
   283  	attachable := util.IsAttachableVolume(volumeSpec, dsw.volumePluginMgr)
   284  	deviceMountable := util.IsDeviceMountableVolume(volumeSpec, dsw.volumePluginMgr)
   285  	if attachable || deviceMountable {
   286  		// For attachable/device-mountable volumes, use the unique volume name as reported by
   287  		// the plugin.
   288  		volumeName, err =
   289  			util.GetUniqueVolumeNameFromSpec(volumePlugin, volumeSpec)
   290  		if err != nil {
   291  			return "", fmt.Errorf(
   292  				"failed to GetUniqueVolumeNameFromSpec for volumeSpec %q using volume plugin %q err=%v",
   293  				volumeSpec.Name(),
   294  				volumePlugin.GetPluginName(),
   295  				err)
   296  		}
   297  	} else {
   298  		// For non-attachable and non-device-mountable volumes, generate a unique name based on the pod
   299  		// namespace and name and the name of the volume within the pod.
   300  		volumeName = util.GetUniqueVolumeNameFromSpecWithPod(podName, volumePlugin, volumeSpec)
   301  	}
   302  
   303  	seLinuxFileLabel, pluginSupportsSELinuxContextMount, err := dsw.getSELinuxLabel(volumeSpec, seLinuxContainerContexts)
   304  	if err != nil {
   305  		return "", err
   306  	}
   307  	klog.V(4).InfoS("expected volume SELinux label context", "volume", volumeSpec.Name(), "label", seLinuxFileLabel)
   308  
   309  	if _, volumeExists := dsw.volumesToMount[volumeName]; !volumeExists {
   310  		var sizeLimit *resource.Quantity
   311  		if volumeSpec.Volume != nil {
   312  			if util.IsLocalEphemeralVolume(*volumeSpec.Volume) {
   313  				podLimits := resourcehelper.PodLimits(pod, resourcehelper.PodResourcesOptions{})
   314  				ephemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
   315  				sizeLimit = resource.NewQuantity(ephemeralStorageLimit.Value(), resource.BinarySI)
   316  				if volumeSpec.Volume.EmptyDir != nil &&
   317  					volumeSpec.Volume.EmptyDir.SizeLimit != nil &&
   318  					volumeSpec.Volume.EmptyDir.SizeLimit.Value() > 0 &&
   319  					(sizeLimit.Value() == 0 || volumeSpec.Volume.EmptyDir.SizeLimit.Value() < sizeLimit.Value()) {
   320  					sizeLimit = resource.NewQuantity(volumeSpec.Volume.EmptyDir.SizeLimit.Value(), resource.BinarySI)
   321  				}
   322  			}
   323  		}
   324  		effectiveSELinuxMountLabel := seLinuxFileLabel
   325  		if !util.VolumeSupportsSELinuxMount(volumeSpec) {
   326  			// Clear SELinux label for the volume with unsupported access modes.
   327  			klog.V(4).InfoS("volume does not support SELinux context mount, clearing the expected label", "volume", volumeSpec.Name())
   328  			effectiveSELinuxMountLabel = ""
   329  		}
   330  		if seLinuxFileLabel != "" {
   331  			seLinuxVolumesAdmitted.WithLabelValues(volumePluginName).Add(1.0)
   332  		}
   333  		vmt := volumeToMount{
   334  			volumeName:                     volumeName,
   335  			podsToMount:                    make(map[types.UniquePodName]podToMount),
   336  			pluginIsAttachable:             attachable,
   337  			pluginIsDeviceMountable:        deviceMountable,
   338  			volumeGidValue:                 volumeGidValue,
   339  			reportedInUse:                  false,
   340  			desiredSizeLimit:               sizeLimit,
   341  			effectiveSELinuxMountFileLabel: effectiveSELinuxMountLabel,
   342  			originalSELinuxLabel:           seLinuxFileLabel,
   343  		}
   344  		// record desired size of the volume
   345  		if volumeSpec.PersistentVolume != nil {
   346  			pvCap := volumeSpec.PersistentVolume.Spec.Capacity.Storage()
   347  			if pvCap != nil {
   348  				pvCapCopy := pvCap.DeepCopy()
   349  				vmt.persistentVolumeSize = &pvCapCopy
   350  			}
   351  		}
   352  		dsw.volumesToMount[volumeName] = vmt
   353  	}
   354  
   355  	oldPodMount, ok := dsw.volumesToMount[volumeName].podsToMount[podName]
   356  	mountRequestTime := time.Now()
   357  	if ok && !volumePlugin.RequiresRemount(volumeSpec) {
   358  		mountRequestTime = oldPodMount.mountRequestTime
   359  	}
   360  
   361  	if !ok {
   362  		// The volume exists, but not with this pod.
   363  		// It will be added below as podToMount, now just report SELinux metric.
   364  		if pluginSupportsSELinuxContextMount {
   365  			existingVolume := dsw.volumesToMount[volumeName]
   366  			if seLinuxFileLabel != existingVolume.originalSELinuxLabel {
   367  				fullErr := fmt.Errorf("conflicting SELinux labels of volume %s: %q and %q", volumeSpec.Name(), existingVolume.originalSELinuxLabel, seLinuxFileLabel)
   368  				supported := util.VolumeSupportsSELinuxMount(volumeSpec)
   369  				err := handleSELinuxMetricError(
   370  					fullErr,
   371  					supported,
   372  					seLinuxVolumeContextMismatchWarnings.WithLabelValues(volumePluginName),
   373  					seLinuxVolumeContextMismatchErrors.WithLabelValues(volumePluginName))
   374  				if err != nil {
   375  					return "", err
   376  				}
   377  			}
   378  		}
   379  	}
   380  
   381  	// Create new podToMount object. If it already exists, it is refreshed with
   382  	// updated values (this is required for volumes that require remounting on
   383  	// pod update, like Downward API volumes).
   384  	dsw.volumesToMount[volumeName].podsToMount[podName] = podToMount{
   385  		podName:             podName,
   386  		pod:                 pod,
   387  		volumeSpec:          volumeSpec,
   388  		outerVolumeSpecName: outerVolumeSpecName,
   389  		mountRequestTime:    mountRequestTime,
   390  	}
   391  	return volumeName, nil
   392  }
   393  
   394  func (dsw *desiredStateOfWorld) getSELinuxLabel(volumeSpec *volume.Spec, seLinuxContainerContexts []*v1.SELinuxOptions) (string, bool, error) {
   395  	var seLinuxFileLabel string
   396  	var pluginSupportsSELinuxContextMount bool
   397  
   398  	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
   399  		var err error
   400  
   401  		if !dsw.seLinuxTranslator.SELinuxEnabled() {
   402  			return "", false, nil
   403  		}
   404  
   405  		pluginSupportsSELinuxContextMount, err = dsw.getSELinuxMountSupport(volumeSpec)
   406  		if err != nil {
   407  			return "", false, err
   408  		}
   409  		seLinuxSupported := util.VolumeSupportsSELinuxMount(volumeSpec)
   410  		if pluginSupportsSELinuxContextMount {
   411  			// Ensure that a volume that can be mounted with "-o context=XYZ" is
   412  			// used only by containers with the same SELinux contexts.
   413  			for _, containerContext := range seLinuxContainerContexts {
   414  				newLabel, err := dsw.seLinuxTranslator.SELinuxOptionsToFileLabel(containerContext)
   415  				if err != nil {
   416  					fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %s", containerContext, err)
   417  					if err := handleSELinuxMetricError(fullErr, seLinuxSupported, seLinuxContainerContextWarnings, seLinuxContainerContextErrors); err != nil {
   418  						return "", false, err
   419  					}
   420  				}
   421  				if seLinuxFileLabel == "" {
   422  					seLinuxFileLabel = newLabel
   423  					continue
   424  				}
   425  				if seLinuxFileLabel != newLabel {
   426  					fullErr := fmt.Errorf("volume %s is used with two different SELinux contexts in the same pod: %q, %q", volumeSpec.Name(), seLinuxFileLabel, newLabel)
   427  					if err := handleSELinuxMetricError(fullErr, seLinuxSupported, seLinuxPodContextMismatchWarnings, seLinuxPodContextMismatchErrors); err != nil {
   428  						return "", false, err
   429  					}
   430  				}
   431  			}
   432  		} else {
   433  			// Volume plugin does not support SELinux context mount.
   434  			// DSW will track this volume with SELinux label "", i.e. no mount with
   435  			// -o context.
   436  			seLinuxFileLabel = ""
   437  		}
   438  	}
   439  	return seLinuxFileLabel, pluginSupportsSELinuxContextMount, nil
   440  }
   441  
   442  func (dsw *desiredStateOfWorld) MarkVolumesReportedInUse(
   443  	reportedVolumes []v1.UniqueVolumeName) {
   444  	dsw.Lock()
   445  	defer dsw.Unlock()
   446  
   447  	reportedVolumesMap := make(
   448  		map[v1.UniqueVolumeName]bool, len(reportedVolumes) /* capacity */)
   449  
   450  	for _, reportedVolume := range reportedVolumes {
   451  		reportedVolumesMap[reportedVolume] = true
   452  	}
   453  
   454  	for volumeName, volumeObj := range dsw.volumesToMount {
   455  		_, volumeReported := reportedVolumesMap[volumeName]
   456  		volumeObj.reportedInUse = volumeReported
   457  		dsw.volumesToMount[volumeName] = volumeObj
   458  	}
   459  }
   460  
   461  func (dsw *desiredStateOfWorld) DeletePodFromVolume(
   462  	podName types.UniquePodName, volumeName v1.UniqueVolumeName) {
   463  	dsw.Lock()
   464  	defer dsw.Unlock()
   465  
   466  	delete(dsw.podErrors, podName)
   467  
   468  	volumeObj, volumeExists := dsw.volumesToMount[volumeName]
   469  	if !volumeExists {
   470  		return
   471  	}
   472  
   473  	if _, podExists := volumeObj.podsToMount[podName]; !podExists {
   474  		return
   475  	}
   476  
   477  	// Delete pod if it exists
   478  	delete(dsw.volumesToMount[volumeName].podsToMount, podName)
   479  
   480  	if len(dsw.volumesToMount[volumeName].podsToMount) == 0 {
   481  		// Delete volume if no child pods left
   482  		delete(dsw.volumesToMount, volumeName)
   483  	}
   484  }
   485  
   486  // UpdatePersistentVolumeSize updates last known PV size. This is used for volume expansion and
   487  // should be only used for persistent volumes.
   488  func (dsw *desiredStateOfWorld) UpdatePersistentVolumeSize(volumeName v1.UniqueVolumeName, size *resource.Quantity) {
   489  	dsw.Lock()
   490  	defer dsw.Unlock()
   491  
   492  	vol, volExists := dsw.volumesToMount[volumeName]
   493  	if volExists {
   494  		vol.persistentVolumeSize = size
   495  		dsw.volumesToMount[volumeName] = vol
   496  	}
   497  }
   498  
   499  func (dsw *desiredStateOfWorld) VolumeExists(
   500  	volumeName v1.UniqueVolumeName, seLinuxMountContext string) bool {
   501  	dsw.RLock()
   502  	defer dsw.RUnlock()
   503  
   504  	vol, volumeExists := dsw.volumesToMount[volumeName]
   505  	if !volumeExists {
   506  		return false
   507  	}
   508  	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
   509  		// Handling two volumes with the same name and different SELinux context
   510  		// as two *different* volumes here. Because if a volume is mounted with
   511  		// an old SELinux context, it must be unmounted first and then mounted again
   512  		// with the new context.
   513  		//
   514  		// This will happen when a pod A with context alpha_t runs and is being
   515  		// terminated by kubelet and its volumes are being torn down, while a
   516  		// pod B with context beta_t is already scheduled on the same node,
   517  		// using the same volumes
   518  		// The volumes from Pod A must be fully unmounted (incl. UnmountDevice)
   519  		// and mounted with new SELinux mount options for pod B.
   520  		// Without SELinux, kubelet can (and often does) reuse device mounted
   521  		// for A.
   522  		return vol.effectiveSELinuxMountFileLabel == seLinuxMountContext
   523  	}
   524  	return true
   525  }
   526  
   527  func (dsw *desiredStateOfWorld) PodExistsInVolume(
   528  	podName types.UniquePodName, volumeName v1.UniqueVolumeName, seLinuxMountOption string) bool {
   529  	dsw.RLock()
   530  	defer dsw.RUnlock()
   531  
   532  	volumeObj, volumeExists := dsw.volumesToMount[volumeName]
   533  	if !volumeExists {
   534  		return false
   535  	}
   536  
   537  	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
   538  		if volumeObj.effectiveSELinuxMountFileLabel != seLinuxMountOption {
   539  			// The volume is in DSW, but with a different SELinux mount option.
   540  			// Report it as unused, so the volume is unmounted and mounted back
   541  			// with the right SELinux option.
   542  			return false
   543  		}
   544  	}
   545  
   546  	_, podExists := volumeObj.podsToMount[podName]
   547  	return podExists
   548  }
   549  
   550  func (dsw *desiredStateOfWorld) VolumeExistsWithSpecName(podName types.UniquePodName, volumeSpecName string) bool {
   551  	dsw.RLock()
   552  	defer dsw.RUnlock()
   553  	for _, volumeObj := range dsw.volumesToMount {
   554  		if podObj, podExists := volumeObj.podsToMount[podName]; podExists {
   555  			if podObj.volumeSpec.Name() == volumeSpecName {
   556  				return true
   557  			}
   558  		}
   559  	}
   560  	return false
   561  }
   562  
   563  func (dsw *desiredStateOfWorld) GetPods() map[types.UniquePodName]bool {
   564  	dsw.RLock()
   565  	defer dsw.RUnlock()
   566  
   567  	podList := make(map[types.UniquePodName]bool)
   568  	for _, volumeObj := range dsw.volumesToMount {
   569  		for podName := range volumeObj.podsToMount {
   570  			podList[podName] = true
   571  		}
   572  	}
   573  	return podList
   574  }
   575  
   576  func (dsw *desiredStateOfWorld) GetVolumesToMount() []VolumeToMount {
   577  	dsw.RLock()
   578  	defer dsw.RUnlock()
   579  
   580  	volumesToMount := make([]VolumeToMount, 0 /* len */, len(dsw.volumesToMount) /* cap */)
   581  	for volumeName, volumeObj := range dsw.volumesToMount {
   582  		for podName, podObj := range volumeObj.podsToMount {
   583  			vmt := VolumeToMount{
   584  				VolumeToMount: operationexecutor.VolumeToMount{
   585  					VolumeName:              volumeName,
   586  					PodName:                 podName,
   587  					Pod:                     podObj.pod,
   588  					VolumeSpec:              podObj.volumeSpec,
   589  					PluginIsAttachable:      volumeObj.pluginIsAttachable,
   590  					PluginIsDeviceMountable: volumeObj.pluginIsDeviceMountable,
   591  					OuterVolumeSpecName:     podObj.outerVolumeSpecName,
   592  					VolumeGidValue:          volumeObj.volumeGidValue,
   593  					ReportedInUse:           volumeObj.reportedInUse,
   594  					MountRequestTime:        podObj.mountRequestTime,
   595  					DesiredSizeLimit:        volumeObj.desiredSizeLimit,
   596  					SELinuxLabel:            volumeObj.effectiveSELinuxMountFileLabel,
   597  				},
   598  			}
   599  			if volumeObj.persistentVolumeSize != nil {
   600  				vmt.DesiredPersistentVolumeSize = volumeObj.persistentVolumeSize.DeepCopy()
   601  			}
   602  			volumesToMount = append(volumesToMount, vmt)
   603  		}
   604  	}
   605  	return volumesToMount
   606  }
   607  
   608  func (dsw *desiredStateOfWorld) AddErrorToPod(podName types.UniquePodName, err string) {
   609  	dsw.Lock()
   610  	defer dsw.Unlock()
   611  
   612  	if errs, found := dsw.podErrors[podName]; found {
   613  		if errs.Len() <= maxPodErrors {
   614  			errs.Insert(err)
   615  		}
   616  		return
   617  	}
   618  	dsw.podErrors[podName] = sets.NewString(err)
   619  }
   620  
   621  func (dsw *desiredStateOfWorld) PopPodErrors(podName types.UniquePodName) []string {
   622  	dsw.Lock()
   623  	defer dsw.Unlock()
   624  
   625  	if errs, found := dsw.podErrors[podName]; found {
   626  		delete(dsw.podErrors, podName)
   627  		return errs.List()
   628  	}
   629  	return []string{}
   630  }
   631  
   632  func (dsw *desiredStateOfWorld) GetPodsWithErrors() []types.UniquePodName {
   633  	dsw.RLock()
   634  	defer dsw.RUnlock()
   635  
   636  	pods := make([]types.UniquePodName, 0, len(dsw.podErrors))
   637  	for podName := range dsw.podErrors {
   638  		pods = append(pods, podName)
   639  	}
   640  	return pods
   641  }
   642  
   643  func (dsw *desiredStateOfWorld) MarkVolumeAttachability(volumeName v1.UniqueVolumeName, attachable bool) {
   644  	dsw.Lock()
   645  	defer dsw.Unlock()
   646  	volumeObj, volumeExists := dsw.volumesToMount[volumeName]
   647  	if !volumeExists {
   648  		return
   649  	}
   650  	volumeObj.pluginIsAttachable = attachable
   651  	dsw.volumesToMount[volumeName] = volumeObj
   652  }
   653  
   654  func (dsw *desiredStateOfWorld) getSELinuxMountSupport(volumeSpec *volume.Spec) (bool, error) {
   655  	return util.SupportsSELinuxContextMount(volumeSpec, dsw.volumePluginMgr)
   656  }
   657  
   658  // Based on isRWOP, bump the right warning / error metric and either consume the error or return it.
   659  func handleSELinuxMetricError(err error, seLinuxSupported bool, warningMetric, errorMetric metrics.GaugeMetric) error {
   660  	if seLinuxSupported {
   661  		errorMetric.Add(1.0)
   662  		return err
   663  	}
   664  
   665  	// This is not an error yet, but it will be when support for other access modes is added.
   666  	warningMetric.Add(1.0)
   667  	klog.V(4).ErrorS(err, "Please report this error in https://github.com/kubernetes/enhancements/issues/1710, together with full Pod yaml file")
   668  	return nil
   669  }
   670  
   671  // Return the volume plugin name, together with the CSI driver name if it's a CSI volume.
   672  func getVolumePluginNameWithDriver(plugin volume.VolumePlugin, spec *volume.Spec) string {
   673  	pluginName := plugin.GetPluginName()
   674  	if pluginName != csi.CSIPluginName {
   675  		return pluginName
   676  	}
   677  
   678  	// It's a CSI volume
   679  	driverName, err := csi.GetCSIDriverName(spec)
   680  	if err != nil {
   681  		// In theory this is unreachable - such volume would not pass validation.
   682  		klog.V(4).ErrorS(err, "failed to get CSI driver name from volume spec")
   683  		driverName = "unknown"
   684  	}
   685  	// `/` is used to separate plugin + CSI driver in util.GetUniqueVolumeName() too
   686  	return pluginName + "/" + driverName
   687  }