k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/volume/pvcprotection/pvc_protection_controller.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package pvcprotection 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 v1 "k8s.io/api/core/v1" 25 apierrors "k8s.io/apimachinery/pkg/api/errors" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 28 "k8s.io/apimachinery/pkg/util/wait" 29 coreinformers "k8s.io/client-go/informers/core/v1" 30 clientset "k8s.io/client-go/kubernetes" 31 corelisters "k8s.io/client-go/listers/core/v1" 32 "k8s.io/client-go/tools/cache" 33 "k8s.io/client-go/util/workqueue" 34 "k8s.io/component-helpers/storage/ephemeral" 35 "k8s.io/klog/v2" 36 "k8s.io/kubernetes/pkg/controller/volume/common" 37 "k8s.io/kubernetes/pkg/controller/volume/protectionutil" 38 "k8s.io/kubernetes/pkg/util/slice" 39 volumeutil "k8s.io/kubernetes/pkg/volume/util" 40 ) 41 42 // Controller is controller that removes PVCProtectionFinalizer 43 // from PVCs that are used by no pods. 44 type Controller struct { 45 client clientset.Interface 46 47 pvcLister corelisters.PersistentVolumeClaimLister 48 pvcListerSynced cache.InformerSynced 49 50 podLister corelisters.PodLister 51 podListerSynced cache.InformerSynced 52 podIndexer cache.Indexer 53 54 queue workqueue.TypedRateLimitingInterface[string] 55 } 56 57 // NewPVCProtectionController returns a new instance of PVCProtectionController. 58 func NewPVCProtectionController(logger klog.Logger, pvcInformer coreinformers.PersistentVolumeClaimInformer, podInformer coreinformers.PodInformer, cl clientset.Interface) (*Controller, error) { 59 e := &Controller{ 60 client: cl, 61 queue: workqueue.NewTypedRateLimitingQueueWithConfig( 62 workqueue.DefaultTypedControllerRateLimiter[string](), 63 workqueue.TypedRateLimitingQueueConfig[string]{Name: "pvcprotection"}, 64 ), 65 } 66 67 e.pvcLister = pvcInformer.Lister() 68 e.pvcListerSynced = pvcInformer.Informer().HasSynced 69 pvcInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 70 AddFunc: func(obj interface{}) { 71 e.pvcAddedUpdated(logger, obj) 72 }, 73 UpdateFunc: func(old, new interface{}) { 74 e.pvcAddedUpdated(logger, new) 75 }, 76 }) 77 78 e.podLister = podInformer.Lister() 79 e.podListerSynced = podInformer.Informer().HasSynced 80 e.podIndexer = podInformer.Informer().GetIndexer() 81 if err := common.AddIndexerIfNotPresent(e.podIndexer, common.PodPVCIndex, common.PodPVCIndexFunc()); err != nil { 82 return nil, fmt.Errorf("could not initialize pvc protection controller: %w", err) 83 } 84 podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 85 AddFunc: func(obj interface{}) { 86 e.podAddedDeletedUpdated(logger, nil, obj, false) 87 }, 88 DeleteFunc: func(obj interface{}) { 89 e.podAddedDeletedUpdated(logger, nil, obj, true) 90 }, 91 UpdateFunc: func(old, new interface{}) { 92 e.podAddedDeletedUpdated(logger, old, new, false) 93 }, 94 }) 95 96 return e, nil 97 } 98 99 // Run runs the controller goroutines. 100 func (c *Controller) Run(ctx context.Context, workers int) { 101 defer utilruntime.HandleCrash() 102 defer c.queue.ShutDown() 103 104 logger := klog.FromContext(ctx) 105 logger.Info("Starting PVC protection controller") 106 defer logger.Info("Shutting down PVC protection controller") 107 108 if !cache.WaitForNamedCacheSync("PVC protection", ctx.Done(), c.pvcListerSynced, c.podListerSynced) { 109 return 110 } 111 112 for i := 0; i < workers; i++ { 113 go wait.UntilWithContext(ctx, c.runWorker, time.Second) 114 } 115 116 <-ctx.Done() 117 } 118 119 func (c *Controller) runWorker(ctx context.Context) { 120 for c.processNextWorkItem(ctx) { 121 } 122 } 123 124 // processNextWorkItem deals with one pvcKey off the queue. It returns false when it's time to quit. 125 func (c *Controller) processNextWorkItem(ctx context.Context) bool { 126 pvcKey, quit := c.queue.Get() 127 if quit { 128 return false 129 } 130 defer c.queue.Done(pvcKey) 131 132 pvcNamespace, pvcName, err := cache.SplitMetaNamespaceKey(pvcKey) 133 if err != nil { 134 utilruntime.HandleError(fmt.Errorf("error parsing PVC key %q: %v", pvcKey, err)) 135 return true 136 } 137 138 err = c.processPVC(ctx, pvcNamespace, pvcName) 139 if err == nil { 140 c.queue.Forget(pvcKey) 141 return true 142 } 143 144 utilruntime.HandleError(fmt.Errorf("PVC %v failed with : %v", pvcKey, err)) 145 c.queue.AddRateLimited(pvcKey) 146 147 return true 148 } 149 150 func (c *Controller) processPVC(ctx context.Context, pvcNamespace, pvcName string) error { 151 logger := klog.FromContext(ctx) 152 logger.V(4).Info("Processing PVC", "PVC", klog.KRef(pvcNamespace, pvcName)) 153 startTime := time.Now() 154 defer func() { 155 logger.V(4).Info("Finished processing PVC", "PVC", klog.KRef(pvcNamespace, pvcName), "duration", time.Since(startTime)) 156 }() 157 158 pvc, err := c.pvcLister.PersistentVolumeClaims(pvcNamespace).Get(pvcName) 159 if apierrors.IsNotFound(err) { 160 logger.V(4).Info("PVC not found, ignoring", "PVC", klog.KRef(pvcNamespace, pvcName)) 161 return nil 162 } 163 if err != nil { 164 return err 165 } 166 167 if protectionutil.IsDeletionCandidate(pvc, volumeutil.PVCProtectionFinalizer) { 168 // PVC should be deleted. Check if it's used and remove finalizer if 169 // it's not. 170 isUsed, err := c.isBeingUsed(ctx, pvc) 171 if err != nil { 172 return err 173 } 174 if !isUsed { 175 return c.removeFinalizer(ctx, pvc) 176 } 177 logger.V(2).Info("Keeping PVC because it is being used", "PVC", klog.KObj(pvc)) 178 } 179 180 if protectionutil.NeedToAddFinalizer(pvc, volumeutil.PVCProtectionFinalizer) { 181 // PVC is not being deleted -> it should have the finalizer. The 182 // finalizer should be added by admission plugin, this is just to add 183 // the finalizer to old PVCs that were created before the admission 184 // plugin was enabled. 185 return c.addFinalizer(ctx, pvc) 186 } 187 return nil 188 } 189 190 func (c *Controller) addFinalizer(ctx context.Context, pvc *v1.PersistentVolumeClaim) error { 191 claimClone := pvc.DeepCopy() 192 claimClone.ObjectMeta.Finalizers = append(claimClone.ObjectMeta.Finalizers, volumeutil.PVCProtectionFinalizer) 193 _, err := c.client.CoreV1().PersistentVolumeClaims(claimClone.Namespace).Update(ctx, claimClone, metav1.UpdateOptions{}) 194 logger := klog.FromContext(ctx) 195 if err != nil { 196 logger.Error(err, "Error adding protection finalizer to PVC", "PVC", klog.KObj(pvc)) 197 return err 198 } 199 logger.V(3).Info("Added protection finalizer to PVC", "PVC", klog.KObj(pvc)) 200 return nil 201 } 202 203 func (c *Controller) removeFinalizer(ctx context.Context, pvc *v1.PersistentVolumeClaim) error { 204 claimClone := pvc.DeepCopy() 205 claimClone.ObjectMeta.Finalizers = slice.RemoveString(claimClone.ObjectMeta.Finalizers, volumeutil.PVCProtectionFinalizer, nil) 206 _, err := c.client.CoreV1().PersistentVolumeClaims(claimClone.Namespace).Update(ctx, claimClone, metav1.UpdateOptions{}) 207 logger := klog.FromContext(ctx) 208 if err != nil { 209 logger.Error(err, "Error removing protection finalizer from PVC", "PVC", klog.KObj(pvc)) 210 return err 211 } 212 logger.V(3).Info("Removed protection finalizer from PVC", "PVC", klog.KObj(pvc)) 213 return nil 214 } 215 216 func (c *Controller) isBeingUsed(ctx context.Context, pvc *v1.PersistentVolumeClaim) (bool, error) { 217 // Look for a Pod using pvc in the Informer's cache. If one is found the 218 // correct decision to keep pvc is taken without doing an expensive live 219 // list. 220 logger := klog.FromContext(ctx) 221 if inUse, err := c.askInformer(logger, pvc); err != nil { 222 // No need to return because a live list will follow. 223 logger.Error(err, "") 224 } else if inUse { 225 return true, nil 226 } 227 228 // Even if no Pod using pvc was found in the Informer's cache it doesn't 229 // mean such a Pod doesn't exist: it might just not be in the cache yet. To 230 // be 100% confident that it is safe to delete pvc make sure no Pod is using 231 // it among those returned by a live list. 232 return c.askAPIServer(ctx, pvc) 233 } 234 235 func (c *Controller) askInformer(logger klog.Logger, pvc *v1.PersistentVolumeClaim) (bool, error) { 236 logger.V(4).Info("Looking for Pods using PVC in the Informer's cache", "PVC", klog.KObj(pvc)) 237 238 // The indexer is used to find pods which might use the PVC. 239 objs, err := c.podIndexer.ByIndex(common.PodPVCIndex, fmt.Sprintf("%s/%s", pvc.Namespace, pvc.Name)) 240 if err != nil { 241 return false, fmt.Errorf("cache-based list of pods failed while processing %s/%s: %s", pvc.Namespace, pvc.Name, err.Error()) 242 } 243 for _, obj := range objs { 244 pod, ok := obj.(*v1.Pod) 245 if !ok { 246 continue 247 } 248 249 // We still need to look at each volume: that's redundant for volume.PersistentVolumeClaim, 250 // but for volume.Ephemeral we need to be sure that this particular PVC is the one 251 // created for the ephemeral volume. 252 if c.podUsesPVC(logger, pod, pvc) { 253 return true, nil 254 } 255 } 256 257 logger.V(4).Info("No Pod using PVC was found in the Informer's cache", "PVC", klog.KObj(pvc)) 258 return false, nil 259 } 260 261 func (c *Controller) askAPIServer(ctx context.Context, pvc *v1.PersistentVolumeClaim) (bool, error) { 262 logger := klog.FromContext(ctx) 263 logger.V(4).Info("Looking for Pods using PVC with a live list", "PVC", klog.KObj(pvc)) 264 265 podsList, err := c.client.CoreV1().Pods(pvc.Namespace).List(ctx, metav1.ListOptions{}) 266 if err != nil { 267 return false, fmt.Errorf("live list of pods failed: %s", err.Error()) 268 } 269 270 for _, pod := range podsList.Items { 271 if c.podUsesPVC(logger, &pod, pvc) { 272 return true, nil 273 } 274 } 275 276 logger.V(2).Info("PVC is unused", "PVC", klog.KObj(pvc)) 277 return false, nil 278 } 279 280 func (c *Controller) podUsesPVC(logger klog.Logger, pod *v1.Pod, pvc *v1.PersistentVolumeClaim) bool { 281 // Check whether pvc is used by pod only if pod is scheduled, because 282 // kubelet sees pods after they have been scheduled and it won't allow 283 // starting a pod referencing a PVC with a non-nil deletionTimestamp. 284 if pod.Spec.NodeName != "" { 285 for _, volume := range pod.Spec.Volumes { 286 if volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.ClaimName == pvc.Name || 287 !podIsShutDown(pod) && volume.Ephemeral != nil && ephemeral.VolumeClaimName(pod, &volume) == pvc.Name && ephemeral.VolumeIsForPod(pod, pvc) == nil { 288 logger.V(2).Info("Pod uses PVC", "pod", klog.KObj(pod), "PVC", klog.KObj(pvc)) 289 return true 290 } 291 } 292 } 293 return false 294 } 295 296 // podIsShutDown returns true if kubelet is done with the pod or 297 // it was force-deleted. 298 func podIsShutDown(pod *v1.Pod) bool { 299 // A pod that has a deletionTimestamp and a zero 300 // deletionGracePeriodSeconds 301 // a) has been processed by kubelet and was set up for deletion 302 // by the apiserver: 303 // - canBeDeleted has verified that volumes were unpublished 304 // https://github.com/kubernetes/kubernetes/blob/5404b5a28a2114299608bab00e4292960dd864a0/pkg/kubelet/kubelet_pods.go#L980 305 // - deletionGracePeriodSeconds was set via a delete 306 // with zero GracePeriodSeconds 307 // https://github.com/kubernetes/kubernetes/blob/5404b5a28a2114299608bab00e4292960dd864a0/pkg/kubelet/status/status_manager.go#L580-L592 308 // or 309 // b) was force-deleted. 310 // 311 // It's now just waiting for garbage collection. We could wait 312 // for it to actually get removed, but that may be blocked by 313 // finalizers for the pod and thus get delayed. 314 // 315 // Worse, it is possible that there is a cyclic dependency 316 // (pod finalizer waits for PVC to get removed, PVC protection 317 // controller waits for pod to get removed). By considering 318 // the PVC unused in this case, we allow the PVC to get 319 // removed and break such a cycle. 320 // 321 // Therefore it is better to proceed with PVC removal, 322 // which is safe (case a) and/or desirable (case b). 323 return pod.DeletionTimestamp != nil && pod.DeletionGracePeriodSeconds != nil && *pod.DeletionGracePeriodSeconds == 0 324 } 325 326 // pvcAddedUpdated reacts to pvc added/updated events 327 func (c *Controller) pvcAddedUpdated(logger klog.Logger, obj interface{}) { 328 pvc, ok := obj.(*v1.PersistentVolumeClaim) 329 if !ok { 330 utilruntime.HandleError(fmt.Errorf("PVC informer returned non-PVC object: %#v", obj)) 331 return 332 } 333 key, err := cache.MetaNamespaceKeyFunc(pvc) 334 if err != nil { 335 utilruntime.HandleError(fmt.Errorf("couldn't get key for Persistent Volume Claim %#v: %v", pvc, err)) 336 return 337 } 338 logger.V(4).Info("Got event on PVC", "pvc", klog.KObj(pvc)) 339 340 if protectionutil.NeedToAddFinalizer(pvc, volumeutil.PVCProtectionFinalizer) || protectionutil.IsDeletionCandidate(pvc, volumeutil.PVCProtectionFinalizer) { 341 c.queue.Add(key) 342 } 343 } 344 345 // podAddedDeletedUpdated reacts to Pod events 346 func (c *Controller) podAddedDeletedUpdated(logger klog.Logger, old, new interface{}, deleted bool) { 347 if pod := c.parsePod(new); pod != nil { 348 c.enqueuePVCs(logger, pod, deleted) 349 350 // An update notification might mask the deletion of a pod X and the 351 // following creation of a pod Y with the same namespaced name as X. If 352 // that's the case X needs to be processed as well to handle the case 353 // where it is blocking deletion of a PVC not referenced by Y, otherwise 354 // such PVC will never be deleted. 355 if oldPod := c.parsePod(old); oldPod != nil && oldPod.UID != pod.UID { 356 c.enqueuePVCs(logger, oldPod, true) 357 } 358 } 359 } 360 361 func (*Controller) parsePod(obj interface{}) *v1.Pod { 362 if obj == nil { 363 return nil 364 } 365 pod, ok := obj.(*v1.Pod) 366 if !ok { 367 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 368 if !ok { 369 utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %#v", obj)) 370 return nil 371 } 372 pod, ok = tombstone.Obj.(*v1.Pod) 373 if !ok { 374 utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a Pod %#v", obj)) 375 return nil 376 } 377 } 378 return pod 379 } 380 381 func (c *Controller) enqueuePVCs(logger klog.Logger, pod *v1.Pod, deleted bool) { 382 // Filter out pods that can't help us to remove a finalizer on PVC 383 if !deleted && !volumeutil.IsPodTerminated(pod, pod.Status) && pod.Spec.NodeName != "" { 384 return 385 } 386 387 logger.V(4).Info("Enqueuing PVCs for Pod", "pod", klog.KObj(pod), "podUID", pod.UID) 388 389 // Enqueue all PVCs that the pod uses 390 for _, volume := range pod.Spec.Volumes { 391 switch { 392 case volume.PersistentVolumeClaim != nil: 393 c.queue.Add(pod.Namespace + "/" + volume.PersistentVolumeClaim.ClaimName) 394 case volume.Ephemeral != nil: 395 c.queue.Add(pod.Namespace + "/" + ephemeral.VolumeClaimName(pod, &volume)) 396 } 397 } 398 }