k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Package populator implements interfaces that monitor and keep the states of the
    19  caches in sync with the "ground truth".
    20  */
    21  package populator
    22  
    23  import (
    24  	"context"
    25  	"errors"
    26  	"fmt"
    27  	"sync"
    28  	"time"
    29  
    30  	"k8s.io/klog/v2"
    31  
    32  	v1 "k8s.io/api/core/v1"
    33  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    34  	"k8s.io/apimachinery/pkg/types"
    35  	"k8s.io/apimachinery/pkg/util/sets"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  	clientset "k8s.io/client-go/kubernetes"
    38  	"k8s.io/component-helpers/storage/ephemeral"
    39  	"k8s.io/kubernetes/pkg/kubelet/config"
    40  	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
    41  	"k8s.io/kubernetes/pkg/kubelet/volumemanager/cache"
    42  	"k8s.io/kubernetes/pkg/volume"
    43  	"k8s.io/kubernetes/pkg/volume/csimigration"
    44  	"k8s.io/kubernetes/pkg/volume/util"
    45  	volumetypes "k8s.io/kubernetes/pkg/volume/util/types"
    46  )
    47  
    48  // DesiredStateOfWorldPopulator periodically loops through the list of active
    49  // pods and ensures that each one exists in the desired state of the world cache
    50  // if it has volumes. It also verifies that the pods in the desired state of the
    51  // world cache still exist, if not, it removes them.
    52  type DesiredStateOfWorldPopulator interface {
    53  	Run(sourcesReady config.SourcesReady, stopCh <-chan struct{})
    54  
    55  	// ReprocessPod sets value for the specified pod in processedPods
    56  	// to false, forcing it to be reprocessed. This is required to enable
    57  	// remounting volumes on pod updates (volumes like Downward API volumes
    58  	// depend on this behavior to ensure volume content is updated).
    59  	ReprocessPod(podName volumetypes.UniquePodName)
    60  
    61  	// HasAddedPods returns whether the populator has looped through the list
    62  	// of active pods and added them to the desired state of the world cache,
    63  	// at a time after sources are all ready, at least once. It does not
    64  	// return true before sources are all ready because before then, there is
    65  	// a chance many or all pods are missing from the list of active pods and
    66  	// so few to none will have been added.
    67  	HasAddedPods() bool
    68  }
    69  
    70  // PodStateProvider can determine if a pod is going to be terminated.
    71  type PodStateProvider interface {
    72  	ShouldPodContainersBeTerminating(types.UID) bool
    73  	ShouldPodRuntimeBeRemoved(types.UID) bool
    74  }
    75  
    76  // PodManager is the subset of methods the manager needs to observe the actual state of the kubelet.
    77  // See pkg/k8s.io/kubernetes/pkg/kubelet/pod.Manager for method godoc.
    78  type PodManager interface {
    79  	GetPodByUID(types.UID) (*v1.Pod, bool)
    80  	GetPods() []*v1.Pod
    81  }
    82  
    83  // NewDesiredStateOfWorldPopulator returns a new instance of
    84  // DesiredStateOfWorldPopulator.
    85  //
    86  // kubeClient - used to fetch PV and PVC objects from the API server
    87  // loopSleepDuration - the amount of time the populator loop sleeps between
    88  // successive executions
    89  //
    90  // podManager - the kubelet podManager that is the source of truth for the pods
    91  // that exist on this host
    92  //
    93  // desiredStateOfWorld - the cache to populate
    94  func NewDesiredStateOfWorldPopulator(
    95  	kubeClient clientset.Interface,
    96  	loopSleepDuration time.Duration,
    97  	podManager PodManager,
    98  	podStateProvider PodStateProvider,
    99  	desiredStateOfWorld cache.DesiredStateOfWorld,
   100  	actualStateOfWorld cache.ActualStateOfWorld,
   101  	kubeContainerRuntime kubecontainer.Runtime,
   102  	csiMigratedPluginManager csimigration.PluginManager,
   103  	intreeToCSITranslator csimigration.InTreeToCSITranslator,
   104  	volumePluginMgr *volume.VolumePluginMgr) DesiredStateOfWorldPopulator {
   105  	return &desiredStateOfWorldPopulator{
   106  		kubeClient:          kubeClient,
   107  		loopSleepDuration:   loopSleepDuration,
   108  		podManager:          podManager,
   109  		podStateProvider:    podStateProvider,
   110  		desiredStateOfWorld: desiredStateOfWorld,
   111  		actualStateOfWorld:  actualStateOfWorld,
   112  		pods: processedPods{
   113  			processedPods: make(map[volumetypes.UniquePodName]bool)},
   114  		kubeContainerRuntime:     kubeContainerRuntime,
   115  		hasAddedPods:             false,
   116  		hasAddedPodsLock:         sync.RWMutex{},
   117  		csiMigratedPluginManager: csiMigratedPluginManager,
   118  		intreeToCSITranslator:    intreeToCSITranslator,
   119  		volumePluginMgr:          volumePluginMgr,
   120  	}
   121  }
   122  
   123  type desiredStateOfWorldPopulator struct {
   124  	kubeClient               clientset.Interface
   125  	loopSleepDuration        time.Duration
   126  	podManager               PodManager
   127  	podStateProvider         PodStateProvider
   128  	desiredStateOfWorld      cache.DesiredStateOfWorld
   129  	actualStateOfWorld       cache.ActualStateOfWorld
   130  	pods                     processedPods
   131  	kubeContainerRuntime     kubecontainer.Runtime
   132  	hasAddedPods             bool
   133  	hasAddedPodsLock         sync.RWMutex
   134  	csiMigratedPluginManager csimigration.PluginManager
   135  	intreeToCSITranslator    csimigration.InTreeToCSITranslator
   136  	volumePluginMgr          *volume.VolumePluginMgr
   137  }
   138  
   139  type processedPods struct {
   140  	processedPods map[volumetypes.UniquePodName]bool
   141  	sync.RWMutex
   142  }
   143  
   144  func (dswp *desiredStateOfWorldPopulator) Run(sourcesReady config.SourcesReady, stopCh <-chan struct{}) {
   145  	// Wait for the completion of a loop that started after sources are all ready, then set hasAddedPods accordingly
   146  	klog.InfoS("Desired state populator starts to run")
   147  	wait.PollUntil(dswp.loopSleepDuration, func() (bool, error) {
   148  		done := sourcesReady.AllReady()
   149  		dswp.populatorLoop()
   150  		return done, nil
   151  	}, stopCh)
   152  	dswp.hasAddedPodsLock.Lock()
   153  	if !dswp.hasAddedPods {
   154  		klog.InfoS("Finished populating initial desired state of world")
   155  		dswp.hasAddedPods = true
   156  	}
   157  	dswp.hasAddedPodsLock.Unlock()
   158  	wait.Until(dswp.populatorLoop, dswp.loopSleepDuration, stopCh)
   159  }
   160  
   161  func (dswp *desiredStateOfWorldPopulator) ReprocessPod(
   162  	podName volumetypes.UniquePodName) {
   163  	dswp.markPodProcessingFailed(podName)
   164  }
   165  
   166  func (dswp *desiredStateOfWorldPopulator) HasAddedPods() bool {
   167  	dswp.hasAddedPodsLock.RLock()
   168  	defer dswp.hasAddedPodsLock.RUnlock()
   169  	return dswp.hasAddedPods
   170  }
   171  
   172  func (dswp *desiredStateOfWorldPopulator) populatorLoop() {
   173  	dswp.findAndAddNewPods()
   174  	dswp.findAndRemoveDeletedPods()
   175  }
   176  
   177  // Iterate through all pods and add to desired state of world if they don't
   178  // exist but should
   179  func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() {
   180  	// Map unique pod name to outer volume name to MountedVolume.
   181  	mountedVolumesForPod := make(map[volumetypes.UniquePodName]map[string]cache.MountedVolume)
   182  	for _, mountedVolume := range dswp.actualStateOfWorld.GetMountedVolumes() {
   183  		mountedVolumes, exist := mountedVolumesForPod[mountedVolume.PodName]
   184  		if !exist {
   185  			mountedVolumes = make(map[string]cache.MountedVolume)
   186  			mountedVolumesForPod[mountedVolume.PodName] = mountedVolumes
   187  		}
   188  		mountedVolumes[mountedVolume.OuterVolumeSpecName] = mountedVolume
   189  	}
   190  
   191  	for _, pod := range dswp.podManager.GetPods() {
   192  		// Keep consistency of adding pod during reconstruction
   193  		if dswp.hasAddedPods && dswp.podStateProvider.ShouldPodContainersBeTerminating(pod.UID) {
   194  			// Do not (re)add volumes for pods that can't also be starting containers
   195  			continue
   196  		}
   197  
   198  		if !dswp.hasAddedPods && dswp.podStateProvider.ShouldPodRuntimeBeRemoved(pod.UID) {
   199  			// When kubelet restarts, we need to add pods to dsw if there is a possibility
   200  			// that the container may still be running
   201  			continue
   202  		}
   203  
   204  		dswp.processPodVolumes(pod, mountedVolumesForPod)
   205  	}
   206  }
   207  
   208  // Iterate through all pods in desired state of world, and remove if they no
   209  // longer exist
   210  func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() {
   211  	podsFromCache := make(map[volumetypes.UniquePodName]struct{})
   212  	for _, volumeToMount := range dswp.desiredStateOfWorld.GetVolumesToMount() {
   213  		podsFromCache[volumetypes.UniquePodName(volumeToMount.Pod.UID)] = struct{}{}
   214  		pod, podExists := dswp.podManager.GetPodByUID(volumeToMount.Pod.UID)
   215  		if podExists {
   216  
   217  			// check if the attachability has changed for this volume
   218  			if volumeToMount.PluginIsAttachable {
   219  				attachableVolumePlugin, err := dswp.volumePluginMgr.FindAttachablePluginBySpec(volumeToMount.VolumeSpec)
   220  				// only this means the plugin is truly non-attachable
   221  				if err == nil && attachableVolumePlugin == nil {
   222  					// It is not possible right now for a CSI plugin to be both attachable and non-deviceMountable
   223  					// So the uniqueVolumeName should remain the same after the attachability change
   224  					dswp.desiredStateOfWorld.MarkVolumeAttachability(volumeToMount.VolumeName, false)
   225  					klog.InfoS("Volume changes from attachable to non-attachable", "volumeName", volumeToMount.VolumeName)
   226  					continue
   227  				}
   228  			}
   229  
   230  			// Exclude known pods that we expect to be running
   231  			if !dswp.podStateProvider.ShouldPodRuntimeBeRemoved(pod.UID) {
   232  				continue
   233  			}
   234  		}
   235  
   236  		// Once a pod has been deleted from kubelet pod manager, do not delete
   237  		// it immediately from volume manager. Instead, check the kubelet
   238  		// pod state provider to verify that all containers in the pod have been
   239  		// terminated.
   240  		if !dswp.podStateProvider.ShouldPodRuntimeBeRemoved(volumeToMount.Pod.UID) {
   241  			klog.V(4).InfoS("Pod still has one or more containers in the non-exited state and will not be removed from desired state", "pod", klog.KObj(volumeToMount.Pod))
   242  			continue
   243  		}
   244  		var volumeToMountSpecName string
   245  		if volumeToMount.VolumeSpec != nil {
   246  			volumeToMountSpecName = volumeToMount.VolumeSpec.Name()
   247  		}
   248  		removed := dswp.actualStateOfWorld.PodRemovedFromVolume(volumeToMount.PodName, volumeToMount.VolumeName)
   249  		if removed && podExists {
   250  			klog.V(4).InfoS("Actual state does not yet have volume mount information and pod still exists in pod manager, skip removing volume from desired state", "pod", klog.KObj(volumeToMount.Pod), "podUID", volumeToMount.Pod.UID, "volumeName", volumeToMountSpecName)
   251  			continue
   252  		}
   253  		klog.V(4).InfoS("Removing volume from desired state", "pod", klog.KObj(volumeToMount.Pod), "podUID", volumeToMount.Pod.UID, "volumeName", volumeToMountSpecName)
   254  		dswp.desiredStateOfWorld.DeletePodFromVolume(
   255  			volumeToMount.PodName, volumeToMount.VolumeName)
   256  		dswp.deleteProcessedPod(volumeToMount.PodName)
   257  	}
   258  
   259  	// Cleanup orphanded entries from processedPods
   260  	dswp.pods.Lock()
   261  	orphanedPods := make([]volumetypes.UniquePodName, 0, len(dswp.pods.processedPods))
   262  	for k := range dswp.pods.processedPods {
   263  		if _, ok := podsFromCache[k]; !ok {
   264  			orphanedPods = append(orphanedPods, k)
   265  		}
   266  	}
   267  	dswp.pods.Unlock()
   268  	for _, orphanedPod := range orphanedPods {
   269  		uid := types.UID(orphanedPod)
   270  		_, podExists := dswp.podManager.GetPodByUID(uid)
   271  		if !podExists && dswp.podStateProvider.ShouldPodRuntimeBeRemoved(uid) {
   272  			dswp.deleteProcessedPod(orphanedPod)
   273  		}
   274  	}
   275  
   276  	podsWithError := dswp.desiredStateOfWorld.GetPodsWithErrors()
   277  	for _, podName := range podsWithError {
   278  		if _, podExists := dswp.podManager.GetPodByUID(types.UID(podName)); !podExists {
   279  			dswp.desiredStateOfWorld.PopPodErrors(podName)
   280  		}
   281  	}
   282  }
   283  
   284  // processPodVolumes processes the volumes in the given pod and adds them to the
   285  // desired state of the world.
   286  func (dswp *desiredStateOfWorldPopulator) processPodVolumes(
   287  	pod *v1.Pod,
   288  	mountedVolumesForPod map[volumetypes.UniquePodName]map[string]cache.MountedVolume) {
   289  	if pod == nil {
   290  		return
   291  	}
   292  
   293  	uniquePodName := util.GetUniquePodName(pod)
   294  	if dswp.podPreviouslyProcessed(uniquePodName) {
   295  		return
   296  	}
   297  
   298  	allVolumesAdded := true
   299  	mounts, devices, seLinuxContainerContexts := util.GetPodVolumeNames(pod)
   300  
   301  	// Process volume spec for each volume defined in pod
   302  	for _, podVolume := range pod.Spec.Volumes {
   303  		if !mounts.Has(podVolume.Name) && !devices.Has(podVolume.Name) {
   304  			// Volume is not used in the pod, ignore it.
   305  			klog.V(4).InfoS("Skipping unused volume", "pod", klog.KObj(pod), "volumeName", podVolume.Name)
   306  			continue
   307  		}
   308  
   309  		pvc, volumeSpec, volumeGidValue, err :=
   310  			dswp.createVolumeSpec(podVolume, pod, mounts, devices)
   311  		if err != nil {
   312  			klog.ErrorS(err, "Error processing volume", "pod", klog.KObj(pod), "volumeName", podVolume.Name)
   313  			dswp.desiredStateOfWorld.AddErrorToPod(uniquePodName, err.Error())
   314  			allVolumesAdded = false
   315  			continue
   316  		}
   317  
   318  		// Add volume to desired state of world
   319  		_, err = dswp.desiredStateOfWorld.AddPodToVolume(
   320  			uniquePodName, pod, volumeSpec, podVolume.Name, volumeGidValue, seLinuxContainerContexts[podVolume.Name])
   321  		if err != nil {
   322  			klog.ErrorS(err, "Failed to add volume to desiredStateOfWorld", "pod", klog.KObj(pod), "volumeName", podVolume.Name, "volumeSpecName", volumeSpec.Name())
   323  			dswp.desiredStateOfWorld.AddErrorToPod(uniquePodName, err.Error())
   324  			allVolumesAdded = false
   325  		} else {
   326  			klog.V(4).InfoS("Added volume to desired state", "pod", klog.KObj(pod), "volumeName", podVolume.Name, "volumeSpecName", volumeSpec.Name())
   327  		}
   328  
   329  		dswp.checkVolumeFSResize(pod, podVolume, pvc, volumeSpec, uniquePodName, mountedVolumesForPod)
   330  	}
   331  
   332  	// some of the volume additions may have failed, should not mark this pod as fully processed
   333  	if allVolumesAdded {
   334  		dswp.markPodProcessed(uniquePodName)
   335  		// New pod has been synced. Re-mount all volumes that need it
   336  		// (e.g. DownwardAPI)
   337  		dswp.actualStateOfWorld.MarkRemountRequired(uniquePodName)
   338  		// Remove any stored errors for the pod, everything went well in this processPodVolumes
   339  		dswp.desiredStateOfWorld.PopPodErrors(uniquePodName)
   340  	} else if dswp.podHasBeenSeenOnce(uniquePodName) {
   341  		// For the Pod which has been processed at least once, even though some volumes
   342  		// may not have been reprocessed successfully this round, we still mark it as processed to avoid
   343  		// processing it at a very high frequency. The pod will be reprocessed when volume manager calls
   344  		// ReprocessPod() which is triggered by SyncPod.
   345  		dswp.markPodProcessed(uniquePodName)
   346  	}
   347  
   348  }
   349  
   350  // checkVolumeFSResize records desired PVC size for a volume mounted by the pod.
   351  // It is used for comparison with actual size(coming from pvc.Status.Capacity) and calling
   352  // volume expansion on the node if needed.
   353  func (dswp *desiredStateOfWorldPopulator) checkVolumeFSResize(
   354  	pod *v1.Pod,
   355  	podVolume v1.Volume,
   356  	pvc *v1.PersistentVolumeClaim,
   357  	volumeSpec *volume.Spec,
   358  	uniquePodName volumetypes.UniquePodName,
   359  	mountedVolumesForPod map[volumetypes.UniquePodName]map[string]cache.MountedVolume) {
   360  
   361  	// if a volumeSpec does not have PV or has InlineVolumeSpecForCSIMigration set or pvc is nil
   362  	// we can't resize the volume and hence resizing should be skipped.
   363  	if volumeSpec.PersistentVolume == nil || volumeSpec.InlineVolumeSpecForCSIMigration || pvc == nil {
   364  		// Only PVC supports resize operation.
   365  		return
   366  	}
   367  
   368  	uniqueVolumeName, exist := getUniqueVolumeName(uniquePodName, podVolume.Name, mountedVolumesForPod)
   369  	if !exist {
   370  		// Volume not exist in ASW, we assume it hasn't been mounted yet. If it needs resize,
   371  		// it will be handled as offline resize(if it indeed hasn't been mounted yet),
   372  		// or online resize in subsequent loop(after we confirm it has been mounted).
   373  		return
   374  	}
   375  	// volumeSpec.ReadOnly is the value that determines if volume could be formatted when being mounted.
   376  	// This is the same flag that determines filesystem resizing behaviour for offline resizing and hence
   377  	// we should use it here. This value comes from Pod.spec.volumes.persistentVolumeClaim.readOnly.
   378  	if volumeSpec.ReadOnly {
   379  		// This volume is used as read only by this pod, we don't perform resize for read only volumes.
   380  		klog.V(5).InfoS("Skip file system resize check for the volume, as the volume is mounted as readonly", "pod", klog.KObj(pod), "volumeName", podVolume.Name)
   381  		return
   382  	}
   383  	pvCap := volumeSpec.PersistentVolume.Spec.Capacity.Storage()
   384  	pvcStatusCap := pvc.Status.Capacity.Storage()
   385  	dswp.desiredStateOfWorld.UpdatePersistentVolumeSize(uniqueVolumeName, pvCap)
   386  
   387  	// in case the actualStateOfWorld was rebuild after kubelet restart ensure that claimSize is set to accurate value
   388  	dswp.actualStateOfWorld.InitializeClaimSize(klog.TODO(), uniqueVolumeName, pvcStatusCap)
   389  }
   390  
   391  func getUniqueVolumeName(
   392  	podName volumetypes.UniquePodName,
   393  	outerVolumeSpecName string,
   394  	mountedVolumesForPod map[volumetypes.UniquePodName]map[string]cache.MountedVolume) (v1.UniqueVolumeName, bool) {
   395  	mountedVolumes, exist := mountedVolumesForPod[podName]
   396  	if !exist {
   397  		return "", false
   398  	}
   399  	mountedVolume, exist := mountedVolumes[outerVolumeSpecName]
   400  	if !exist {
   401  		return "", false
   402  	}
   403  	return mountedVolume.VolumeName, true
   404  }
   405  
   406  // podPreviouslyProcessed returns true if the volumes for this pod have already
   407  // been processed/reprocessed by the populator. Otherwise, the volumes for this pod need to
   408  // be reprocessed.
   409  func (dswp *desiredStateOfWorldPopulator) podPreviouslyProcessed(
   410  	podName volumetypes.UniquePodName) bool {
   411  	dswp.pods.RLock()
   412  	defer dswp.pods.RUnlock()
   413  
   414  	return dswp.pods.processedPods[podName]
   415  }
   416  
   417  // markPodProcessingFailed marks the specified pod from processedPods as false to indicate that it failed processing
   418  func (dswp *desiredStateOfWorldPopulator) markPodProcessingFailed(
   419  	podName volumetypes.UniquePodName) {
   420  	dswp.pods.Lock()
   421  	dswp.pods.processedPods[podName] = false
   422  	dswp.pods.Unlock()
   423  }
   424  
   425  // podHasBeenSeenOnce returns true if the pod has been seen by the popoulator
   426  // at least once.
   427  func (dswp *desiredStateOfWorldPopulator) podHasBeenSeenOnce(
   428  	podName volumetypes.UniquePodName) bool {
   429  	dswp.pods.RLock()
   430  	_, exist := dswp.pods.processedPods[podName]
   431  	dswp.pods.RUnlock()
   432  	return exist
   433  }
   434  
   435  // markPodProcessed records that the volumes for the specified pod have been
   436  // processed by the populator
   437  func (dswp *desiredStateOfWorldPopulator) markPodProcessed(
   438  	podName volumetypes.UniquePodName) {
   439  	dswp.pods.Lock()
   440  	defer dswp.pods.Unlock()
   441  
   442  	dswp.pods.processedPods[podName] = true
   443  }
   444  
   445  // deleteProcessedPod removes the specified pod from processedPods
   446  func (dswp *desiredStateOfWorldPopulator) deleteProcessedPod(
   447  	podName volumetypes.UniquePodName) {
   448  	dswp.pods.Lock()
   449  	defer dswp.pods.Unlock()
   450  
   451  	delete(dswp.pods.processedPods, podName)
   452  }
   453  
   454  // createVolumeSpec creates and returns a mutable volume.Spec object for the
   455  // specified volume. It dereference any PVC to get PV objects, if needed.
   456  // Returns an error if unable to obtain the volume at this time.
   457  func (dswp *desiredStateOfWorldPopulator) createVolumeSpec(
   458  	podVolume v1.Volume, pod *v1.Pod, mounts, devices sets.String) (*v1.PersistentVolumeClaim, *volume.Spec, string, error) {
   459  	pvcSource := podVolume.VolumeSource.PersistentVolumeClaim
   460  	isEphemeral := pvcSource == nil && podVolume.VolumeSource.Ephemeral != nil
   461  	if isEphemeral {
   462  		// Generic ephemeral inline volumes are handled the
   463  		// same way as a PVC reference. The only additional
   464  		// constraint (checked below) is that the PVC must be
   465  		// owned by the pod.
   466  		pvcSource = &v1.PersistentVolumeClaimVolumeSource{
   467  			ClaimName: ephemeral.VolumeClaimName(pod, &podVolume),
   468  		}
   469  	}
   470  	if pvcSource != nil {
   471  		klog.V(5).InfoS("Found PVC", "PVC", klog.KRef(pod.Namespace, pvcSource.ClaimName))
   472  		// If podVolume is a PVC, fetch the real PV behind the claim
   473  		pvc, err := dswp.getPVCExtractPV(
   474  			pod.Namespace, pvcSource.ClaimName)
   475  		if err != nil {
   476  			return nil, nil, "", fmt.Errorf(
   477  				"error processing PVC %s/%s: %v",
   478  				pod.Namespace,
   479  				pvcSource.ClaimName,
   480  				err)
   481  		}
   482  		if isEphemeral {
   483  			if err := ephemeral.VolumeIsForPod(pod, pvc); err != nil {
   484  				return nil, nil, "", err
   485  			}
   486  		}
   487  		pvName, pvcUID := pvc.Spec.VolumeName, pvc.UID
   488  		klog.V(5).InfoS("Found bound PV for PVC", "PVC", klog.KRef(pod.Namespace, pvcSource.ClaimName), "PVCUID", pvcUID, "PVName", pvName)
   489  		// Fetch actual PV object
   490  		volumeSpec, volumeGidValue, err :=
   491  			dswp.getPVSpec(pvName, pvcSource.ReadOnly, pvcUID)
   492  		if err != nil {
   493  			return nil, nil, "", fmt.Errorf(
   494  				"error processing PVC %s/%s: %v",
   495  				pod.Namespace,
   496  				pvcSource.ClaimName,
   497  				err)
   498  		}
   499  		klog.V(5).InfoS("Extracted volumeSpec from bound PV and PVC", "PVC", klog.KRef(pod.Namespace, pvcSource.ClaimName), "PVCUID", pvcUID, "PVName", pvName, "volumeSpecName", volumeSpec.Name())
   500  		migratable, err := dswp.csiMigratedPluginManager.IsMigratable(volumeSpec)
   501  		if err != nil {
   502  			return nil, nil, "", err
   503  		}
   504  		if migratable {
   505  			volumeSpec, err = csimigration.TranslateInTreeSpecToCSI(volumeSpec, pod.Namespace, dswp.intreeToCSITranslator)
   506  			if err != nil {
   507  				return nil, nil, "", err
   508  			}
   509  		}
   510  
   511  		volumeMode, err := util.GetVolumeMode(volumeSpec)
   512  		if err != nil {
   513  			return nil, nil, "", err
   514  		}
   515  		// Error if a container has volumeMounts but the volumeMode of PVC isn't Filesystem.
   516  		if mounts.Has(podVolume.Name) && volumeMode != v1.PersistentVolumeFilesystem {
   517  			return nil, nil, "", fmt.Errorf(
   518  				"volume %s has volumeMode %s, but is specified in volumeMounts",
   519  				podVolume.Name,
   520  				volumeMode)
   521  		}
   522  		// Error if a container has volumeDevices but the volumeMode of PVC isn't Block
   523  		if devices.Has(podVolume.Name) && volumeMode != v1.PersistentVolumeBlock {
   524  			return nil, nil, "", fmt.Errorf(
   525  				"volume %s has volumeMode %s, but is specified in volumeDevices",
   526  				podVolume.Name,
   527  				volumeMode)
   528  		}
   529  		return pvc, volumeSpec, volumeGidValue, nil
   530  	}
   531  
   532  	// Do not return the original volume object, since the source could mutate it
   533  	clonedPodVolume := podVolume.DeepCopy()
   534  
   535  	spec := volume.NewSpecFromVolume(clonedPodVolume)
   536  	migratable, err := dswp.csiMigratedPluginManager.IsMigratable(spec)
   537  	if err != nil {
   538  		return nil, nil, "", err
   539  	}
   540  	if migratable {
   541  		spec, err = csimigration.TranslateInTreeSpecToCSI(spec, pod.Namespace, dswp.intreeToCSITranslator)
   542  		if err != nil {
   543  			return nil, nil, "", err
   544  		}
   545  	}
   546  	return nil, spec, "", nil
   547  }
   548  
   549  // getPVCExtractPV fetches the PVC object with the given namespace and name from
   550  // the API server, checks whether PVC is being deleted, extracts the name of the PV
   551  // it is pointing to and returns it.
   552  // An error is returned if the PVC object's phase is not "Bound".
   553  func (dswp *desiredStateOfWorldPopulator) getPVCExtractPV(
   554  	namespace string, claimName string) (*v1.PersistentVolumeClaim, error) {
   555  	pvc, err :=
   556  		dswp.kubeClient.CoreV1().PersistentVolumeClaims(namespace).Get(context.TODO(), claimName, metav1.GetOptions{})
   557  	if err != nil || pvc == nil {
   558  		return nil, fmt.Errorf("failed to fetch PVC from API server: %v", err)
   559  	}
   560  
   561  	// Pods that uses a PVC that is being deleted must not be started.
   562  	//
   563  	// In case an old kubelet is running without this check or some kubelets
   564  	// have this feature disabled, the worst that can happen is that such
   565  	// pod is scheduled. This was the default behavior in 1.8 and earlier
   566  	// and users should not be that surprised.
   567  	// It should happen only in very rare case when scheduler schedules
   568  	// a pod and user deletes a PVC that's used by it at the same time.
   569  	if pvc.ObjectMeta.DeletionTimestamp != nil {
   570  		return nil, errors.New("PVC is being deleted")
   571  	}
   572  
   573  	if pvc.Status.Phase != v1.ClaimBound {
   574  		return nil, errors.New("PVC is not bound")
   575  	}
   576  	if pvc.Spec.VolumeName == "" {
   577  		return nil, errors.New("PVC has empty pvc.Spec.VolumeName")
   578  	}
   579  
   580  	return pvc, nil
   581  }
   582  
   583  // getPVSpec fetches the PV object with the given name from the API server
   584  // and returns a volume.Spec representing it.
   585  // An error is returned if the call to fetch the PV object fails.
   586  func (dswp *desiredStateOfWorldPopulator) getPVSpec(
   587  	name string,
   588  	pvcReadOnly bool,
   589  	expectedClaimUID types.UID) (*volume.Spec, string, error) {
   590  	pv, err := dswp.kubeClient.CoreV1().PersistentVolumes().Get(context.TODO(), name, metav1.GetOptions{})
   591  	if err != nil || pv == nil {
   592  		return nil, "", fmt.Errorf(
   593  			"failed to fetch PV %s from API server: %v", name, err)
   594  	}
   595  
   596  	if pv.Spec.ClaimRef == nil {
   597  		return nil, "", fmt.Errorf(
   598  			"found PV object %s but it has a nil pv.Spec.ClaimRef indicating it is not yet bound to the claim",
   599  			name)
   600  	}
   601  
   602  	if pv.Spec.ClaimRef.UID != expectedClaimUID {
   603  		return nil, "", fmt.Errorf(
   604  			"found PV object %s but its pv.Spec.ClaimRef.UID %s does not point to claim.UID %s",
   605  			name,
   606  			pv.Spec.ClaimRef.UID,
   607  			expectedClaimUID)
   608  	}
   609  
   610  	volumeGidValue := getPVVolumeGidAnnotationValue(pv)
   611  	return volume.NewSpecFromPersistentVolume(pv, pvcReadOnly), volumeGidValue, nil
   612  }
   613  
   614  func getPVVolumeGidAnnotationValue(pv *v1.PersistentVolume) string {
   615  	if volumeGid, ok := pv.Annotations[util.VolumeGidAnnotationKey]; ok {
   616  		return volumeGid
   617  	}
   618  
   619  	return ""
   620  }