k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/volume/pvcprotection/pvc_protection_controller.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package pvcprotection
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	v1 "k8s.io/api/core/v1"
    25  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    28  	"k8s.io/apimachinery/pkg/util/wait"
    29  	coreinformers "k8s.io/client-go/informers/core/v1"
    30  	clientset "k8s.io/client-go/kubernetes"
    31  	corelisters "k8s.io/client-go/listers/core/v1"
    32  	"k8s.io/client-go/tools/cache"
    33  	"k8s.io/client-go/util/workqueue"
    34  	"k8s.io/component-helpers/storage/ephemeral"
    35  	"k8s.io/klog/v2"
    36  	"k8s.io/kubernetes/pkg/controller/volume/common"
    37  	"k8s.io/kubernetes/pkg/controller/volume/protectionutil"
    38  	"k8s.io/kubernetes/pkg/util/slice"
    39  	volumeutil "k8s.io/kubernetes/pkg/volume/util"
    40  )
    41  
    42  // Controller is controller that removes PVCProtectionFinalizer
    43  // from PVCs that are used by no pods.
    44  type Controller struct {
    45  	client clientset.Interface
    46  
    47  	pvcLister       corelisters.PersistentVolumeClaimLister
    48  	pvcListerSynced cache.InformerSynced
    49  
    50  	podLister       corelisters.PodLister
    51  	podListerSynced cache.InformerSynced
    52  	podIndexer      cache.Indexer
    53  
    54  	queue workqueue.TypedRateLimitingInterface[string]
    55  }
    56  
    57  // NewPVCProtectionController returns a new instance of PVCProtectionController.
    58  func NewPVCProtectionController(logger klog.Logger, pvcInformer coreinformers.PersistentVolumeClaimInformer, podInformer coreinformers.PodInformer, cl clientset.Interface) (*Controller, error) {
    59  	e := &Controller{
    60  		client: cl,
    61  		queue: workqueue.NewTypedRateLimitingQueueWithConfig(
    62  			workqueue.DefaultTypedControllerRateLimiter[string](),
    63  			workqueue.TypedRateLimitingQueueConfig[string]{Name: "pvcprotection"},
    64  		),
    65  	}
    66  
    67  	e.pvcLister = pvcInformer.Lister()
    68  	e.pvcListerSynced = pvcInformer.Informer().HasSynced
    69  	pvcInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
    70  		AddFunc: func(obj interface{}) {
    71  			e.pvcAddedUpdated(logger, obj)
    72  		},
    73  		UpdateFunc: func(old, new interface{}) {
    74  			e.pvcAddedUpdated(logger, new)
    75  		},
    76  	})
    77  
    78  	e.podLister = podInformer.Lister()
    79  	e.podListerSynced = podInformer.Informer().HasSynced
    80  	e.podIndexer = podInformer.Informer().GetIndexer()
    81  	if err := common.AddIndexerIfNotPresent(e.podIndexer, common.PodPVCIndex, common.PodPVCIndexFunc()); err != nil {
    82  		return nil, fmt.Errorf("could not initialize pvc protection controller: %w", err)
    83  	}
    84  	podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
    85  		AddFunc: func(obj interface{}) {
    86  			e.podAddedDeletedUpdated(logger, nil, obj, false)
    87  		},
    88  		DeleteFunc: func(obj interface{}) {
    89  			e.podAddedDeletedUpdated(logger, nil, obj, true)
    90  		},
    91  		UpdateFunc: func(old, new interface{}) {
    92  			e.podAddedDeletedUpdated(logger, old, new, false)
    93  		},
    94  	})
    95  
    96  	return e, nil
    97  }
    98  
    99  // Run runs the controller goroutines.
   100  func (c *Controller) Run(ctx context.Context, workers int) {
   101  	defer utilruntime.HandleCrash()
   102  	defer c.queue.ShutDown()
   103  
   104  	logger := klog.FromContext(ctx)
   105  	logger.Info("Starting PVC protection controller")
   106  	defer logger.Info("Shutting down PVC protection controller")
   107  
   108  	if !cache.WaitForNamedCacheSync("PVC protection", ctx.Done(), c.pvcListerSynced, c.podListerSynced) {
   109  		return
   110  	}
   111  
   112  	for i := 0; i < workers; i++ {
   113  		go wait.UntilWithContext(ctx, c.runWorker, time.Second)
   114  	}
   115  
   116  	<-ctx.Done()
   117  }
   118  
   119  func (c *Controller) runWorker(ctx context.Context) {
   120  	for c.processNextWorkItem(ctx) {
   121  	}
   122  }
   123  
   124  // processNextWorkItem deals with one pvcKey off the queue.  It returns false when it's time to quit.
   125  func (c *Controller) processNextWorkItem(ctx context.Context) bool {
   126  	pvcKey, quit := c.queue.Get()
   127  	if quit {
   128  		return false
   129  	}
   130  	defer c.queue.Done(pvcKey)
   131  
   132  	pvcNamespace, pvcName, err := cache.SplitMetaNamespaceKey(pvcKey)
   133  	if err != nil {
   134  		utilruntime.HandleError(fmt.Errorf("error parsing PVC key %q: %v", pvcKey, err))
   135  		return true
   136  	}
   137  
   138  	err = c.processPVC(ctx, pvcNamespace, pvcName)
   139  	if err == nil {
   140  		c.queue.Forget(pvcKey)
   141  		return true
   142  	}
   143  
   144  	utilruntime.HandleError(fmt.Errorf("PVC %v failed with : %v", pvcKey, err))
   145  	c.queue.AddRateLimited(pvcKey)
   146  
   147  	return true
   148  }
   149  
   150  func (c *Controller) processPVC(ctx context.Context, pvcNamespace, pvcName string) error {
   151  	logger := klog.FromContext(ctx)
   152  	logger.V(4).Info("Processing PVC", "PVC", klog.KRef(pvcNamespace, pvcName))
   153  	startTime := time.Now()
   154  	defer func() {
   155  		logger.V(4).Info("Finished processing PVC", "PVC", klog.KRef(pvcNamespace, pvcName), "duration", time.Since(startTime))
   156  	}()
   157  
   158  	pvc, err := c.pvcLister.PersistentVolumeClaims(pvcNamespace).Get(pvcName)
   159  	if apierrors.IsNotFound(err) {
   160  		logger.V(4).Info("PVC not found, ignoring", "PVC", klog.KRef(pvcNamespace, pvcName))
   161  		return nil
   162  	}
   163  	if err != nil {
   164  		return err
   165  	}
   166  
   167  	if protectionutil.IsDeletionCandidate(pvc, volumeutil.PVCProtectionFinalizer) {
   168  		// PVC should be deleted. Check if it's used and remove finalizer if
   169  		// it's not.
   170  		isUsed, err := c.isBeingUsed(ctx, pvc)
   171  		if err != nil {
   172  			return err
   173  		}
   174  		if !isUsed {
   175  			return c.removeFinalizer(ctx, pvc)
   176  		}
   177  		logger.V(2).Info("Keeping PVC because it is being used", "PVC", klog.KObj(pvc))
   178  	}
   179  
   180  	if protectionutil.NeedToAddFinalizer(pvc, volumeutil.PVCProtectionFinalizer) {
   181  		// PVC is not being deleted -> it should have the finalizer. The
   182  		// finalizer should be added by admission plugin, this is just to add
   183  		// the finalizer to old PVCs that were created before the admission
   184  		// plugin was enabled.
   185  		return c.addFinalizer(ctx, pvc)
   186  	}
   187  	return nil
   188  }
   189  
   190  func (c *Controller) addFinalizer(ctx context.Context, pvc *v1.PersistentVolumeClaim) error {
   191  	claimClone := pvc.DeepCopy()
   192  	claimClone.ObjectMeta.Finalizers = append(claimClone.ObjectMeta.Finalizers, volumeutil.PVCProtectionFinalizer)
   193  	_, err := c.client.CoreV1().PersistentVolumeClaims(claimClone.Namespace).Update(ctx, claimClone, metav1.UpdateOptions{})
   194  	logger := klog.FromContext(ctx)
   195  	if err != nil {
   196  		logger.Error(err, "Error adding protection finalizer to PVC", "PVC", klog.KObj(pvc))
   197  		return err
   198  	}
   199  	logger.V(3).Info("Added protection finalizer to PVC", "PVC", klog.KObj(pvc))
   200  	return nil
   201  }
   202  
   203  func (c *Controller) removeFinalizer(ctx context.Context, pvc *v1.PersistentVolumeClaim) error {
   204  	claimClone := pvc.DeepCopy()
   205  	claimClone.ObjectMeta.Finalizers = slice.RemoveString(claimClone.ObjectMeta.Finalizers, volumeutil.PVCProtectionFinalizer, nil)
   206  	_, err := c.client.CoreV1().PersistentVolumeClaims(claimClone.Namespace).Update(ctx, claimClone, metav1.UpdateOptions{})
   207  	logger := klog.FromContext(ctx)
   208  	if err != nil {
   209  		logger.Error(err, "Error removing protection finalizer from PVC", "PVC", klog.KObj(pvc))
   210  		return err
   211  	}
   212  	logger.V(3).Info("Removed protection finalizer from PVC", "PVC", klog.KObj(pvc))
   213  	return nil
   214  }
   215  
   216  func (c *Controller) isBeingUsed(ctx context.Context, pvc *v1.PersistentVolumeClaim) (bool, error) {
   217  	// Look for a Pod using pvc in the Informer's cache. If one is found the
   218  	// correct decision to keep pvc is taken without doing an expensive live
   219  	// list.
   220  	logger := klog.FromContext(ctx)
   221  	if inUse, err := c.askInformer(logger, pvc); err != nil {
   222  		// No need to return because a live list will follow.
   223  		logger.Error(err, "")
   224  	} else if inUse {
   225  		return true, nil
   226  	}
   227  
   228  	// Even if no Pod using pvc was found in the Informer's cache it doesn't
   229  	// mean such a Pod doesn't exist: it might just not be in the cache yet. To
   230  	// be 100% confident that it is safe to delete pvc make sure no Pod is using
   231  	// it among those returned by a live list.
   232  	return c.askAPIServer(ctx, pvc)
   233  }
   234  
   235  func (c *Controller) askInformer(logger klog.Logger, pvc *v1.PersistentVolumeClaim) (bool, error) {
   236  	logger.V(4).Info("Looking for Pods using PVC in the Informer's cache", "PVC", klog.KObj(pvc))
   237  
   238  	// The indexer is used to find pods which might use the PVC.
   239  	objs, err := c.podIndexer.ByIndex(common.PodPVCIndex, fmt.Sprintf("%s/%s", pvc.Namespace, pvc.Name))
   240  	if err != nil {
   241  		return false, fmt.Errorf("cache-based list of pods failed while processing %s/%s: %s", pvc.Namespace, pvc.Name, err.Error())
   242  	}
   243  	for _, obj := range objs {
   244  		pod, ok := obj.(*v1.Pod)
   245  		if !ok {
   246  			continue
   247  		}
   248  
   249  		// We still need to look at each volume: that's redundant for volume.PersistentVolumeClaim,
   250  		// but for volume.Ephemeral we need to be sure that this particular PVC is the one
   251  		// created for the ephemeral volume.
   252  		if c.podUsesPVC(logger, pod, pvc) {
   253  			return true, nil
   254  		}
   255  	}
   256  
   257  	logger.V(4).Info("No Pod using PVC was found in the Informer's cache", "PVC", klog.KObj(pvc))
   258  	return false, nil
   259  }
   260  
   261  func (c *Controller) askAPIServer(ctx context.Context, pvc *v1.PersistentVolumeClaim) (bool, error) {
   262  	logger := klog.FromContext(ctx)
   263  	logger.V(4).Info("Looking for Pods using PVC with a live list", "PVC", klog.KObj(pvc))
   264  
   265  	podsList, err := c.client.CoreV1().Pods(pvc.Namespace).List(ctx, metav1.ListOptions{})
   266  	if err != nil {
   267  		return false, fmt.Errorf("live list of pods failed: %s", err.Error())
   268  	}
   269  
   270  	for _, pod := range podsList.Items {
   271  		if c.podUsesPVC(logger, &pod, pvc) {
   272  			return true, nil
   273  		}
   274  	}
   275  
   276  	logger.V(2).Info("PVC is unused", "PVC", klog.KObj(pvc))
   277  	return false, nil
   278  }
   279  
   280  func (c *Controller) podUsesPVC(logger klog.Logger, pod *v1.Pod, pvc *v1.PersistentVolumeClaim) bool {
   281  	// Check whether pvc is used by pod only if pod is scheduled, because
   282  	// kubelet sees pods after they have been scheduled and it won't allow
   283  	// starting a pod referencing a PVC with a non-nil deletionTimestamp.
   284  	if pod.Spec.NodeName != "" {
   285  		for _, volume := range pod.Spec.Volumes {
   286  			if volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.ClaimName == pvc.Name ||
   287  				!podIsShutDown(pod) && volume.Ephemeral != nil && ephemeral.VolumeClaimName(pod, &volume) == pvc.Name && ephemeral.VolumeIsForPod(pod, pvc) == nil {
   288  				logger.V(2).Info("Pod uses PVC", "pod", klog.KObj(pod), "PVC", klog.KObj(pvc))
   289  				return true
   290  			}
   291  		}
   292  	}
   293  	return false
   294  }
   295  
   296  // podIsShutDown returns true if kubelet is done with the pod or
   297  // it was force-deleted.
   298  func podIsShutDown(pod *v1.Pod) bool {
   299  	// A pod that has a deletionTimestamp and a zero
   300  	// deletionGracePeriodSeconds
   301  	// a) has been processed by kubelet and was set up for deletion
   302  	//    by the apiserver:
   303  	//    - canBeDeleted has verified that volumes were unpublished
   304  	//      https://github.com/kubernetes/kubernetes/blob/5404b5a28a2114299608bab00e4292960dd864a0/pkg/kubelet/kubelet_pods.go#L980
   305  	//    - deletionGracePeriodSeconds was set via a delete
   306  	//      with zero GracePeriodSeconds
   307  	//      https://github.com/kubernetes/kubernetes/blob/5404b5a28a2114299608bab00e4292960dd864a0/pkg/kubelet/status/status_manager.go#L580-L592
   308  	// or
   309  	// b) was force-deleted.
   310  	//
   311  	// It's now just waiting for garbage collection. We could wait
   312  	// for it to actually get removed, but that may be blocked by
   313  	// finalizers for the pod and thus get delayed.
   314  	//
   315  	// Worse, it is possible that there is a cyclic dependency
   316  	// (pod finalizer waits for PVC to get removed, PVC protection
   317  	// controller waits for pod to get removed).  By considering
   318  	// the PVC unused in this case, we allow the PVC to get
   319  	// removed and break such a cycle.
   320  	//
   321  	// Therefore it is better to proceed with PVC removal,
   322  	// which is safe (case a) and/or desirable (case b).
   323  	return pod.DeletionTimestamp != nil && pod.DeletionGracePeriodSeconds != nil && *pod.DeletionGracePeriodSeconds == 0
   324  }
   325  
   326  // pvcAddedUpdated reacts to pvc added/updated events
   327  func (c *Controller) pvcAddedUpdated(logger klog.Logger, obj interface{}) {
   328  	pvc, ok := obj.(*v1.PersistentVolumeClaim)
   329  	if !ok {
   330  		utilruntime.HandleError(fmt.Errorf("PVC informer returned non-PVC object: %#v", obj))
   331  		return
   332  	}
   333  	key, err := cache.MetaNamespaceKeyFunc(pvc)
   334  	if err != nil {
   335  		utilruntime.HandleError(fmt.Errorf("couldn't get key for Persistent Volume Claim %#v: %v", pvc, err))
   336  		return
   337  	}
   338  	logger.V(4).Info("Got event on PVC", "pvc", klog.KObj(pvc))
   339  
   340  	if protectionutil.NeedToAddFinalizer(pvc, volumeutil.PVCProtectionFinalizer) || protectionutil.IsDeletionCandidate(pvc, volumeutil.PVCProtectionFinalizer) {
   341  		c.queue.Add(key)
   342  	}
   343  }
   344  
   345  // podAddedDeletedUpdated reacts to Pod events
   346  func (c *Controller) podAddedDeletedUpdated(logger klog.Logger, old, new interface{}, deleted bool) {
   347  	if pod := c.parsePod(new); pod != nil {
   348  		c.enqueuePVCs(logger, pod, deleted)
   349  
   350  		// An update notification might mask the deletion of a pod X and the
   351  		// following creation of a pod Y with the same namespaced name as X. If
   352  		// that's the case X needs to be processed as well to handle the case
   353  		// where it is blocking deletion of a PVC not referenced by Y, otherwise
   354  		// such PVC will never be deleted.
   355  		if oldPod := c.parsePod(old); oldPod != nil && oldPod.UID != pod.UID {
   356  			c.enqueuePVCs(logger, oldPod, true)
   357  		}
   358  	}
   359  }
   360  
   361  func (*Controller) parsePod(obj interface{}) *v1.Pod {
   362  	if obj == nil {
   363  		return nil
   364  	}
   365  	pod, ok := obj.(*v1.Pod)
   366  	if !ok {
   367  		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
   368  		if !ok {
   369  			utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %#v", obj))
   370  			return nil
   371  		}
   372  		pod, ok = tombstone.Obj.(*v1.Pod)
   373  		if !ok {
   374  			utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a Pod %#v", obj))
   375  			return nil
   376  		}
   377  	}
   378  	return pod
   379  }
   380  
   381  func (c *Controller) enqueuePVCs(logger klog.Logger, pod *v1.Pod, deleted bool) {
   382  	// Filter out pods that can't help us to remove a finalizer on PVC
   383  	if !deleted && !volumeutil.IsPodTerminated(pod, pod.Status) && pod.Spec.NodeName != "" {
   384  		return
   385  	}
   386  
   387  	logger.V(4).Info("Enqueuing PVCs for Pod", "pod", klog.KObj(pod), "podUID", pod.UID)
   388  
   389  	// Enqueue all PVCs that the pod uses
   390  	for _, volume := range pod.Spec.Volumes {
   391  		switch {
   392  		case volume.PersistentVolumeClaim != nil:
   393  			c.queue.Add(pod.Namespace + "/" + volume.PersistentVolumeClaim.ClaimName)
   394  		case volume.Ephemeral != nil:
   395  			c.queue.Add(pod.Namespace + "/" + ephemeral.VolumeClaimName(pod, &volume))
   396  		}
   397  	}
   398  }