k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/scheduler/framework/plugins/volumebinding/binder.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package volumebinding 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "sort" 24 "strings" 25 "time" 26 27 v1 "k8s.io/api/core/v1" 28 storagev1 "k8s.io/api/storage/v1" 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/labels" 32 "k8s.io/apimachinery/pkg/util/sets" 33 "k8s.io/apimachinery/pkg/util/wait" 34 "k8s.io/apiserver/pkg/storage" 35 utilfeature "k8s.io/apiserver/pkg/util/feature" 36 coreinformers "k8s.io/client-go/informers/core/v1" 37 storageinformers "k8s.io/client-go/informers/storage/v1" 38 clientset "k8s.io/client-go/kubernetes" 39 corelisters "k8s.io/client-go/listers/core/v1" 40 storagelisters "k8s.io/client-go/listers/storage/v1" 41 "k8s.io/component-helpers/storage/ephemeral" 42 "k8s.io/component-helpers/storage/volume" 43 csitrans "k8s.io/csi-translation-lib" 44 csiplugins "k8s.io/csi-translation-lib/plugins" 45 "k8s.io/klog/v2" 46 v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 47 "k8s.io/kubernetes/pkg/features" 48 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/metrics" 49 "k8s.io/kubernetes/pkg/scheduler/util/assumecache" 50 "k8s.io/kubernetes/pkg/volume/util" 51 ) 52 53 // ConflictReason is used for the special strings which explain why 54 // volume binding is impossible for a node. 55 type ConflictReason string 56 57 // ConflictReasons contains all reasons that explain why volume binding is impossible for a node. 58 type ConflictReasons []ConflictReason 59 60 func (reasons ConflictReasons) Len() int { return len(reasons) } 61 func (reasons ConflictReasons) Less(i, j int) bool { return reasons[i] < reasons[j] } 62 func (reasons ConflictReasons) Swap(i, j int) { reasons[i], reasons[j] = reasons[j], reasons[i] } 63 64 const ( 65 // ErrReasonBindConflict is used for VolumeBindingNoMatch predicate error. 66 ErrReasonBindConflict ConflictReason = "node(s) didn't find available persistent volumes to bind" 67 // ErrReasonNodeConflict is used for VolumeNodeAffinityConflict predicate error. 68 ErrReasonNodeConflict ConflictReason = "node(s) had volume node affinity conflict" 69 // ErrReasonNotEnoughSpace is used when a pod cannot start on a node because not enough storage space is available. 70 ErrReasonNotEnoughSpace = "node(s) did not have enough free storage" 71 // ErrReasonPVNotExist is used when a pod has one or more PVC(s) bound to non-existent persistent volume(s)" 72 ErrReasonPVNotExist = "node(s) unavailable due to one or more pvc(s) bound to non-existent pv(s)" 73 ) 74 75 // BindingInfo holds a binding between PV and PVC. 76 type BindingInfo struct { 77 // PVC that needs to be bound 78 pvc *v1.PersistentVolumeClaim 79 80 // Proposed PV to bind to this PVC 81 pv *v1.PersistentVolume 82 } 83 84 // StorageClassName returns the name of the storage class. 85 func (b *BindingInfo) StorageClassName() string { 86 return b.pv.Spec.StorageClassName 87 } 88 89 // StorageResource represents storage resource. 90 type StorageResource struct { 91 Requested int64 92 Capacity int64 93 } 94 95 // StorageResource returns storage resource. 96 func (b *BindingInfo) StorageResource() *StorageResource { 97 // both fields are mandatory 98 requestedQty := b.pvc.Spec.Resources.Requests[v1.ResourceName(v1.ResourceStorage)] 99 capacityQty := b.pv.Spec.Capacity[v1.ResourceName(v1.ResourceStorage)] 100 return &StorageResource{ 101 Requested: requestedQty.Value(), 102 Capacity: capacityQty.Value(), 103 } 104 } 105 106 // PodVolumes holds pod's volumes information used in volume scheduling. 107 type PodVolumes struct { 108 // StaticBindings are binding decisions for PVCs which can be bound to 109 // pre-provisioned static PVs. 110 StaticBindings []*BindingInfo 111 // DynamicProvisions are PVCs that require dynamic provisioning 112 DynamicProvisions []*v1.PersistentVolumeClaim 113 } 114 115 // InTreeToCSITranslator contains methods required to check migratable status 116 // and perform translations from InTree PV's to CSI 117 type InTreeToCSITranslator interface { 118 IsPVMigratable(pv *v1.PersistentVolume) bool 119 GetInTreePluginNameFromSpec(pv *v1.PersistentVolume, vol *v1.Volume) (string, error) 120 TranslateInTreePVToCSI(pv *v1.PersistentVolume) (*v1.PersistentVolume, error) 121 } 122 123 // SchedulerVolumeBinder is used by the scheduler VolumeBinding plugin to 124 // handle PVC/PV binding and dynamic provisioning. The binding decisions are 125 // integrated into the pod scheduling workflow so that the PV NodeAffinity is 126 // also considered along with the pod's other scheduling requirements. 127 // 128 // This integrates into the existing scheduler workflow as follows: 129 // 1. The scheduler takes a Pod off the scheduler queue and processes it serially: 130 // a. Invokes all pre-filter plugins for the pod. GetPodVolumeClaims() is invoked 131 // here, pod volume information will be saved in current scheduling cycle state for later use. 132 // If pod has bound immediate PVCs, GetEligibleNodes() is invoked to potentially reduce 133 // down the list of eligible nodes based on the bound PV's NodeAffinity (if any). 134 // b. Invokes all filter plugins, parallelized across nodes. FindPodVolumes() is invoked here. 135 // c. Invokes all score plugins. Future/TBD 136 // d. Selects the best node for the Pod. 137 // e. Invokes all reserve plugins. AssumePodVolumes() is invoked here. 138 // i. If PVC binding is required, cache in-memory only: 139 // * For manual binding: update PV objects for prebinding to the corresponding PVCs. 140 // * For dynamic provisioning: update PVC object with a selected node from c) 141 // * For the pod, which PVCs and PVs need API updates. 142 // ii. Afterwards, the main scheduler caches the Pod->Node binding in the scheduler's pod cache, 143 // This is handled in the scheduler and not here. 144 // f. Asynchronously bind volumes and pod in a separate goroutine 145 // i. BindPodVolumes() is called first in PreBind phase. It makes all the necessary API updates and waits for 146 // PV controller to fully bind and provision the PVCs. If binding fails, the Pod is sent 147 // back through the scheduler. 148 // ii. After BindPodVolumes() is complete, then the scheduler does the final Pod->Node binding. 149 // 2. Once all the assume operations are done in e), the scheduler processes the next Pod in the scheduler queue 150 // while the actual binding operation occurs in the background. 151 type SchedulerVolumeBinder interface { 152 // GetPodVolumeClaims returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning), 153 // unbound with immediate binding (including prebound) and PVs that belong to storage classes of unbound PVCs with delayed binding. 154 GetPodVolumeClaims(logger klog.Logger, pod *v1.Pod) (podVolumeClaims *PodVolumeClaims, err error) 155 156 // GetEligibleNodes checks the existing bound claims of the pod to determine if the list of nodes can be 157 // potentially reduced down to a subset of eligible nodes based on the bound claims which then can be used 158 // in subsequent scheduling stages. 159 // 160 // If eligibleNodes is 'nil', then it indicates that such eligible node reduction cannot be made 161 // and all nodes should be considered. 162 GetEligibleNodes(logger klog.Logger, boundClaims []*v1.PersistentVolumeClaim) (eligibleNodes sets.Set[string]) 163 164 // FindPodVolumes checks if all of a Pod's PVCs can be satisfied by the 165 // node and returns pod's volumes information. 166 // 167 // If a PVC is bound, it checks if the PV's NodeAffinity matches the Node. 168 // Otherwise, it tries to find an available PV to bind to the PVC. 169 // 170 // It returns an error when something went wrong or a list of reasons why the node is 171 // (currently) not usable for the pod. 172 // 173 // If the CSIStorageCapacity feature is enabled, then it also checks for sufficient storage 174 // for volumes that still need to be created. 175 // 176 // This function is called by the scheduler VolumeBinding plugin and can be called in parallel 177 FindPodVolumes(logger klog.Logger, pod *v1.Pod, podVolumeClaims *PodVolumeClaims, node *v1.Node) (podVolumes *PodVolumes, reasons ConflictReasons, err error) 178 179 // AssumePodVolumes will: 180 // 1. Take the PV matches for unbound PVCs and update the PV cache assuming 181 // that the PV is prebound to the PVC. 182 // 2. Take the PVCs that need provisioning and update the PVC cache with related 183 // annotations set. 184 // 185 // It returns true if all volumes are fully bound 186 // 187 // This function is called serially. 188 AssumePodVolumes(logger klog.Logger, assumedPod *v1.Pod, nodeName string, podVolumes *PodVolumes) (allFullyBound bool, err error) 189 190 // RevertAssumedPodVolumes will revert assumed PV and PVC cache. 191 RevertAssumedPodVolumes(podVolumes *PodVolumes) 192 193 // BindPodVolumes will: 194 // 1. Initiate the volume binding by making the API call to prebind the PV 195 // to its matching PVC. 196 // 2. Trigger the volume provisioning by making the API call to set related 197 // annotations on the PVC 198 // 3. Wait for PVCs to be completely bound by the PV controller 199 // 200 // This function can be called in parallel. 201 BindPodVolumes(ctx context.Context, assumedPod *v1.Pod, podVolumes *PodVolumes) error 202 } 203 204 type PodVolumeClaims struct { 205 // boundClaims are the pod's bound PVCs. 206 boundClaims []*v1.PersistentVolumeClaim 207 // unboundClaimsDelayBinding are the pod's unbound with delayed binding (including provisioning) PVCs. 208 unboundClaimsDelayBinding []*v1.PersistentVolumeClaim 209 // unboundClaimsImmediate are the pod's unbound with immediate binding PVCs (i.e., supposed to be bound already) . 210 unboundClaimsImmediate []*v1.PersistentVolumeClaim 211 // unboundVolumesDelayBinding are PVs that belong to storage classes of the pod's unbound PVCs with delayed binding. 212 unboundVolumesDelayBinding map[string][]*v1.PersistentVolume 213 } 214 215 type volumeBinder struct { 216 kubeClient clientset.Interface 217 218 classLister storagelisters.StorageClassLister 219 podLister corelisters.PodLister 220 nodeLister corelisters.NodeLister 221 csiNodeLister storagelisters.CSINodeLister 222 223 pvcCache *PVCAssumeCache 224 pvCache *PVAssumeCache 225 226 // Amount of time to wait for the bind operation to succeed 227 bindTimeout time.Duration 228 229 translator InTreeToCSITranslator 230 231 csiDriverLister storagelisters.CSIDriverLister 232 csiStorageCapacityLister storagelisters.CSIStorageCapacityLister 233 } 234 235 var _ SchedulerVolumeBinder = &volumeBinder{} 236 237 // CapacityCheck contains additional parameters for NewVolumeBinder that 238 // are only needed when checking volume sizes against available storage 239 // capacity is desired. 240 type CapacityCheck struct { 241 CSIDriverInformer storageinformers.CSIDriverInformer 242 CSIStorageCapacityInformer storageinformers.CSIStorageCapacityInformer 243 } 244 245 // NewVolumeBinder sets up all the caches needed for the scheduler to make volume binding decisions. 246 // 247 // capacityCheck determines how storage capacity is checked (CSIStorageCapacity feature). 248 func NewVolumeBinder( 249 logger klog.Logger, 250 kubeClient clientset.Interface, 251 podInformer coreinformers.PodInformer, 252 nodeInformer coreinformers.NodeInformer, 253 csiNodeInformer storageinformers.CSINodeInformer, 254 pvcInformer coreinformers.PersistentVolumeClaimInformer, 255 pvInformer coreinformers.PersistentVolumeInformer, 256 storageClassInformer storageinformers.StorageClassInformer, 257 capacityCheck CapacityCheck, 258 bindTimeout time.Duration) SchedulerVolumeBinder { 259 b := &volumeBinder{ 260 kubeClient: kubeClient, 261 podLister: podInformer.Lister(), 262 classLister: storageClassInformer.Lister(), 263 nodeLister: nodeInformer.Lister(), 264 csiNodeLister: csiNodeInformer.Lister(), 265 pvcCache: NewPVCAssumeCache(logger, pvcInformer.Informer()), 266 pvCache: NewPVAssumeCache(logger, pvInformer.Informer()), 267 bindTimeout: bindTimeout, 268 translator: csitrans.New(), 269 } 270 271 b.csiDriverLister = capacityCheck.CSIDriverInformer.Lister() 272 b.csiStorageCapacityLister = capacityCheck.CSIStorageCapacityInformer.Lister() 273 274 return b 275 } 276 277 // FindPodVolumes finds the matching PVs for PVCs and nodes to provision PVs 278 // for the given pod and node. If the node does not fit, conflict reasons are 279 // returned. 280 func (b *volumeBinder) FindPodVolumes(logger klog.Logger, pod *v1.Pod, podVolumeClaims *PodVolumeClaims, node *v1.Node) (podVolumes *PodVolumes, reasons ConflictReasons, err error) { 281 podVolumes = &PodVolumes{} 282 283 // Warning: Below log needs high verbosity as it can be printed several times (#60933). 284 logger.V(5).Info("FindPodVolumes", "pod", klog.KObj(pod), "node", klog.KObj(node)) 285 286 // Initialize to true for pods that don't have volumes. These 287 // booleans get translated into reason strings when the function 288 // returns without an error. 289 unboundVolumesSatisfied := true 290 boundVolumesSatisfied := true 291 sufficientStorage := true 292 boundPVsFound := true 293 defer func() { 294 if err != nil { 295 return 296 } 297 if !boundVolumesSatisfied { 298 reasons = append(reasons, ErrReasonNodeConflict) 299 } 300 if !unboundVolumesSatisfied { 301 reasons = append(reasons, ErrReasonBindConflict) 302 } 303 if !sufficientStorage { 304 reasons = append(reasons, ErrReasonNotEnoughSpace) 305 } 306 if !boundPVsFound { 307 reasons = append(reasons, ErrReasonPVNotExist) 308 } 309 }() 310 311 defer func() { 312 if err != nil { 313 metrics.VolumeSchedulingStageFailed.WithLabelValues("predicate").Inc() 314 } 315 }() 316 317 var ( 318 staticBindings []*BindingInfo 319 dynamicProvisions []*v1.PersistentVolumeClaim 320 ) 321 defer func() { 322 // Although we do not distinguish nil from empty in this function, for 323 // easier testing, we normalize empty to nil. 324 if len(staticBindings) == 0 { 325 staticBindings = nil 326 } 327 if len(dynamicProvisions) == 0 { 328 dynamicProvisions = nil 329 } 330 podVolumes.StaticBindings = staticBindings 331 podVolumes.DynamicProvisions = dynamicProvisions 332 }() 333 334 // Check PV node affinity on bound volumes 335 if len(podVolumeClaims.boundClaims) > 0 { 336 boundVolumesSatisfied, boundPVsFound, err = b.checkBoundClaims(logger, podVolumeClaims.boundClaims, node, pod) 337 if err != nil { 338 return 339 } 340 } 341 342 // Find matching volumes and node for unbound claims 343 if len(podVolumeClaims.unboundClaimsDelayBinding) > 0 { 344 var ( 345 claimsToFindMatching []*v1.PersistentVolumeClaim 346 claimsToProvision []*v1.PersistentVolumeClaim 347 ) 348 349 // Filter out claims to provision 350 for _, claim := range podVolumeClaims.unboundClaimsDelayBinding { 351 if selectedNode, ok := claim.Annotations[volume.AnnSelectedNode]; ok { 352 if selectedNode != node.Name { 353 // Fast path, skip unmatched node. 354 unboundVolumesSatisfied = false 355 return 356 } 357 claimsToProvision = append(claimsToProvision, claim) 358 } else { 359 claimsToFindMatching = append(claimsToFindMatching, claim) 360 } 361 } 362 363 // Find matching volumes 364 if len(claimsToFindMatching) > 0 { 365 var unboundClaims []*v1.PersistentVolumeClaim 366 unboundVolumesSatisfied, staticBindings, unboundClaims, err = b.findMatchingVolumes(logger, pod, claimsToFindMatching, podVolumeClaims.unboundVolumesDelayBinding, node) 367 if err != nil { 368 return 369 } 370 claimsToProvision = append(claimsToProvision, unboundClaims...) 371 } 372 373 // Check for claims to provision. This is the first time where we potentially 374 // find out that storage is not sufficient for the node. 375 if len(claimsToProvision) > 0 { 376 unboundVolumesSatisfied, sufficientStorage, dynamicProvisions, err = b.checkVolumeProvisions(logger, pod, claimsToProvision, node) 377 if err != nil { 378 return 379 } 380 } 381 } 382 383 return 384 } 385 386 // GetEligibleNodes checks the existing bound claims of the pod to determine if the list of nodes can be 387 // potentially reduced down to a subset of eligible nodes based on the bound claims which then can be used 388 // in subsequent scheduling stages. 389 // 390 // Returning 'nil' for eligibleNodes indicates that such eligible node reduction cannot be made and all nodes 391 // should be considered. 392 func (b *volumeBinder) GetEligibleNodes(logger klog.Logger, boundClaims []*v1.PersistentVolumeClaim) (eligibleNodes sets.Set[string]) { 393 if len(boundClaims) == 0 { 394 return 395 } 396 397 var errs []error 398 for _, pvc := range boundClaims { 399 pvName := pvc.Spec.VolumeName 400 pv, err := b.pvCache.GetPV(pvName) 401 if err != nil { 402 errs = append(errs, err) 403 continue 404 } 405 406 // if the PersistentVolume is local and has node affinity matching specific node(s), 407 // add them to the eligible nodes 408 nodeNames := util.GetLocalPersistentVolumeNodeNames(pv) 409 if len(nodeNames) != 0 { 410 // on the first found list of eligible nodes for the local PersistentVolume, 411 // insert to the eligible node set. 412 if eligibleNodes == nil { 413 eligibleNodes = sets.New(nodeNames...) 414 } else { 415 // for subsequent finding of eligible nodes for the local PersistentVolume, 416 // take the intersection of the nodes with the existing eligible nodes 417 // for cases if PV1 has node affinity to node1 and PV2 has node affinity to node2, 418 // then the eligible node list should be empty. 419 eligibleNodes = eligibleNodes.Intersection(sets.New(nodeNames...)) 420 } 421 } 422 } 423 424 if len(errs) > 0 { 425 logger.V(4).Info("GetEligibleNodes: one or more error occurred finding eligible nodes", "error", errs) 426 return nil 427 } 428 429 if eligibleNodes != nil { 430 logger.V(4).Info("GetEligibleNodes: reduced down eligible nodes", "nodes", eligibleNodes) 431 } 432 return 433 } 434 435 // AssumePodVolumes will take the matching PVs and PVCs to provision in pod's 436 // volume information for the chosen node, and: 437 // 1. Update the pvCache with the new prebound PV. 438 // 2. Update the pvcCache with the new PVCs with annotations set 439 // 3. Update PodVolumes again with cached API updates for PVs and PVCs. 440 func (b *volumeBinder) AssumePodVolumes(logger klog.Logger, assumedPod *v1.Pod, nodeName string, podVolumes *PodVolumes) (allFullyBound bool, err error) { 441 logger.V(4).Info("AssumePodVolumes", "pod", klog.KObj(assumedPod), "node", klog.KRef("", nodeName)) 442 defer func() { 443 if err != nil { 444 metrics.VolumeSchedulingStageFailed.WithLabelValues("assume").Inc() 445 } 446 }() 447 448 if allBound := b.arePodVolumesBound(logger, assumedPod); allBound { 449 logger.V(4).Info("AssumePodVolumes: all PVCs bound and nothing to do", "pod", klog.KObj(assumedPod), "node", klog.KRef("", nodeName)) 450 return true, nil 451 } 452 453 // Assume PV 454 newBindings := []*BindingInfo{} 455 for _, binding := range podVolumes.StaticBindings { 456 newPV, dirty, err := volume.GetBindVolumeToClaim(binding.pv, binding.pvc) 457 logger.V(5).Info("AssumePodVolumes: GetBindVolumeToClaim", 458 "pod", klog.KObj(assumedPod), 459 "PV", klog.KObj(binding.pv), 460 "PVC", klog.KObj(binding.pvc), 461 "newPV", klog.KObj(newPV), 462 "dirty", dirty, 463 ) 464 if err != nil { 465 logger.Error(err, "AssumePodVolumes: fail to GetBindVolumeToClaim") 466 b.revertAssumedPVs(newBindings) 467 return false, err 468 } 469 // TODO: can we assume every time? 470 if dirty { 471 err = b.pvCache.Assume(newPV) 472 if err != nil { 473 b.revertAssumedPVs(newBindings) 474 return false, err 475 } 476 } 477 newBindings = append(newBindings, &BindingInfo{pv: newPV, pvc: binding.pvc}) 478 } 479 480 // Assume PVCs 481 newProvisionedPVCs := []*v1.PersistentVolumeClaim{} 482 for _, claim := range podVolumes.DynamicProvisions { 483 // The claims from method args can be pointing to watcher cache. We must not 484 // modify these, therefore create a copy. 485 claimClone := claim.DeepCopy() 486 metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, volume.AnnSelectedNode, nodeName) 487 err = b.pvcCache.Assume(claimClone) 488 if err != nil { 489 b.revertAssumedPVs(newBindings) 490 b.revertAssumedPVCs(newProvisionedPVCs) 491 return 492 } 493 494 newProvisionedPVCs = append(newProvisionedPVCs, claimClone) 495 } 496 497 podVolumes.StaticBindings = newBindings 498 podVolumes.DynamicProvisions = newProvisionedPVCs 499 return 500 } 501 502 // RevertAssumedPodVolumes will revert assumed PV and PVC cache. 503 func (b *volumeBinder) RevertAssumedPodVolumes(podVolumes *PodVolumes) { 504 b.revertAssumedPVs(podVolumes.StaticBindings) 505 b.revertAssumedPVCs(podVolumes.DynamicProvisions) 506 } 507 508 // BindPodVolumes gets the cached bindings and PVCs to provision in pod's volumes information, 509 // makes the API update for those PVs/PVCs, and waits for the PVCs to be completely bound 510 // by the PV controller. 511 func (b *volumeBinder) BindPodVolumes(ctx context.Context, assumedPod *v1.Pod, podVolumes *PodVolumes) (err error) { 512 logger := klog.FromContext(ctx) 513 logger.V(4).Info("BindPodVolumes", "pod", klog.KObj(assumedPod), "node", klog.KRef("", assumedPod.Spec.NodeName)) 514 515 defer func() { 516 if err != nil { 517 metrics.VolumeSchedulingStageFailed.WithLabelValues("bind").Inc() 518 } 519 }() 520 521 bindings := podVolumes.StaticBindings 522 claimsToProvision := podVolumes.DynamicProvisions 523 524 // Start API operations 525 err = b.bindAPIUpdate(ctx, assumedPod, bindings, claimsToProvision) 526 if err != nil { 527 return err 528 } 529 530 err = wait.PollUntilContextTimeout(ctx, time.Second, b.bindTimeout, false, func(ctx context.Context) (bool, error) { 531 b, err := b.checkBindings(logger, assumedPod, bindings, claimsToProvision) 532 return b, err 533 }) 534 if err != nil { 535 return fmt.Errorf("binding volumes: %w", err) 536 } 537 return nil 538 } 539 540 func getPodName(pod *v1.Pod) string { 541 return pod.Namespace + "/" + pod.Name 542 } 543 544 func getPVCName(pvc *v1.PersistentVolumeClaim) string { 545 return pvc.Namespace + "/" + pvc.Name 546 } 547 548 // bindAPIUpdate makes the API update for those PVs/PVCs. 549 func (b *volumeBinder) bindAPIUpdate(ctx context.Context, pod *v1.Pod, bindings []*BindingInfo, claimsToProvision []*v1.PersistentVolumeClaim) error { 550 logger := klog.FromContext(ctx) 551 podName := getPodName(pod) 552 if bindings == nil { 553 return fmt.Errorf("failed to get cached bindings for pod %q", podName) 554 } 555 if claimsToProvision == nil { 556 return fmt.Errorf("failed to get cached claims to provision for pod %q", podName) 557 } 558 559 lastProcessedBinding := 0 560 lastProcessedProvisioning := 0 561 defer func() { 562 // only revert assumed cached updates for volumes we haven't successfully bound 563 if lastProcessedBinding < len(bindings) { 564 b.revertAssumedPVs(bindings[lastProcessedBinding:]) 565 } 566 // only revert assumed cached updates for claims we haven't updated, 567 if lastProcessedProvisioning < len(claimsToProvision) { 568 b.revertAssumedPVCs(claimsToProvision[lastProcessedProvisioning:]) 569 } 570 }() 571 572 var ( 573 binding *BindingInfo 574 i int 575 claim *v1.PersistentVolumeClaim 576 ) 577 578 // Do the actual prebinding. Let the PV controller take care of the rest 579 // There is no API rollback if the actual binding fails 580 for _, binding = range bindings { 581 // TODO: does it hurt if we make an api call and nothing needs to be updated? 582 logger.V(5).Info("Updating PersistentVolume: binding to claim", "pod", klog.KObj(pod), "PV", klog.KObj(binding.pv), "PVC", klog.KObj(binding.pvc)) 583 newPV, err := b.kubeClient.CoreV1().PersistentVolumes().Update(ctx, binding.pv, metav1.UpdateOptions{}) 584 if err != nil { 585 logger.V(4).Info("Updating PersistentVolume: binding to claim failed", "pod", klog.KObj(pod), "PV", klog.KObj(binding.pv), "PVC", klog.KObj(binding.pvc), "err", err) 586 return err 587 } 588 589 logger.V(2).Info("Updated PersistentVolume with claim. Waiting for binding to complete", "pod", klog.KObj(pod), "PV", klog.KObj(binding.pv), "PVC", klog.KObj(binding.pvc)) 590 // Save updated object from apiserver for later checking. 591 binding.pv = newPV 592 lastProcessedBinding++ 593 } 594 595 // Update claims objects to trigger volume provisioning. Let the PV controller take care of the rest 596 // PV controller is expected to signal back by removing related annotations if actual provisioning fails 597 for i, claim = range claimsToProvision { 598 logger.V(5).Info("Updating claims objects to trigger volume provisioning", "pod", klog.KObj(pod), "PVC", klog.KObj(claim)) 599 newClaim, err := b.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(ctx, claim, metav1.UpdateOptions{}) 600 if err != nil { 601 logger.V(4).Info("Updating PersistentVolumeClaim: binding to volume failed", "PVC", klog.KObj(claim), "err", err) 602 return err 603 } 604 605 // Save updated object from apiserver for later checking. 606 claimsToProvision[i] = newClaim 607 lastProcessedProvisioning++ 608 } 609 610 return nil 611 } 612 613 var ( 614 versioner = storage.APIObjectVersioner{} 615 ) 616 617 // checkBindings runs through all the PVCs in the Pod and checks: 618 // * if the PVC is fully bound 619 // * if there are any conditions that require binding to fail and be retried 620 // 621 // It returns true when all of the Pod's PVCs are fully bound, and error if 622 // binding (and scheduling) needs to be retried 623 // Note that it checks on API objects not PV/PVC cache, this is because 624 // PV/PVC cache can be assumed again in main scheduler loop, we must check 625 // latest state in API server which are shared with PV controller and 626 // provisioners 627 func (b *volumeBinder) checkBindings(logger klog.Logger, pod *v1.Pod, bindings []*BindingInfo, claimsToProvision []*v1.PersistentVolumeClaim) (bool, error) { 628 podName := getPodName(pod) 629 if bindings == nil { 630 return false, fmt.Errorf("failed to get cached bindings for pod %q", podName) 631 } 632 if claimsToProvision == nil { 633 return false, fmt.Errorf("failed to get cached claims to provision for pod %q", podName) 634 } 635 636 node, err := b.nodeLister.Get(pod.Spec.NodeName) 637 if err != nil { 638 return false, fmt.Errorf("failed to get node %q: %w", pod.Spec.NodeName, err) 639 } 640 641 csiNode, err := b.csiNodeLister.Get(node.Name) 642 if err != nil { 643 // TODO: return the error once CSINode is created by default 644 logger.V(4).Info("Could not get a CSINode object for the node", "node", klog.KObj(node), "err", err) 645 } 646 647 // Check for any conditions that might require scheduling retry 648 649 // When pod is deleted, binding operation should be cancelled. There is no 650 // need to check PV/PVC bindings any more. 651 _, err = b.podLister.Pods(pod.Namespace).Get(pod.Name) 652 if err != nil { 653 if apierrors.IsNotFound(err) { 654 return false, fmt.Errorf("pod does not exist any more: %w", err) 655 } 656 logger.Error(err, "Failed to get pod from the lister", "pod", klog.KObj(pod)) 657 } 658 659 for _, binding := range bindings { 660 pv, err := b.pvCache.GetAPIPV(binding.pv.Name) 661 if err != nil { 662 return false, fmt.Errorf("failed to check binding: %w", err) 663 } 664 665 pvc, err := b.pvcCache.GetAPIPVC(getPVCName(binding.pvc)) 666 if err != nil { 667 return false, fmt.Errorf("failed to check binding: %w", err) 668 } 669 670 // Because we updated PV in apiserver, skip if API object is older 671 // and wait for new API object propagated from apiserver. 672 if versioner.CompareResourceVersion(binding.pv, pv) > 0 { 673 return false, nil 674 } 675 676 pv, err = b.tryTranslatePVToCSI(pv, csiNode) 677 if err != nil { 678 return false, fmt.Errorf("failed to translate pv to csi: %w", err) 679 } 680 681 // Check PV's node affinity (the node might not have the proper label) 682 if err := volume.CheckNodeAffinity(pv, node.Labels); err != nil { 683 return false, fmt.Errorf("pv %q node affinity doesn't match node %q: %w", pv.Name, node.Name, err) 684 } 685 686 // Check if pv.ClaimRef got dropped by unbindVolume() 687 if pv.Spec.ClaimRef == nil || pv.Spec.ClaimRef.UID == "" { 688 return false, fmt.Errorf("ClaimRef got reset for pv %q", pv.Name) 689 } 690 691 // Check if pvc is fully bound 692 if !b.isPVCFullyBound(pvc) { 693 return false, nil 694 } 695 } 696 697 for _, claim := range claimsToProvision { 698 pvc, err := b.pvcCache.GetAPIPVC(getPVCName(claim)) 699 if err != nil { 700 return false, fmt.Errorf("failed to check provisioning pvc: %w", err) 701 } 702 703 // Because we updated PVC in apiserver, skip if API object is older 704 // and wait for new API object propagated from apiserver. 705 if versioner.CompareResourceVersion(claim, pvc) > 0 { 706 return false, nil 707 } 708 709 // Check if selectedNode annotation is still set 710 if pvc.Annotations == nil { 711 return false, fmt.Errorf("selectedNode annotation reset for PVC %q", pvc.Name) 712 } 713 selectedNode := pvc.Annotations[volume.AnnSelectedNode] 714 if selectedNode != pod.Spec.NodeName { 715 // If provisioner fails to provision a volume, selectedNode 716 // annotation will be removed to signal back to the scheduler to 717 // retry. 718 return false, fmt.Errorf("provisioning failed for PVC %q", pvc.Name) 719 } 720 721 // If the PVC is bound to a PV, check its node affinity 722 if pvc.Spec.VolumeName != "" { 723 pv, err := b.pvCache.GetAPIPV(pvc.Spec.VolumeName) 724 if err != nil { 725 if errors.Is(err, assumecache.ErrNotFound) { 726 // We tolerate NotFound error here, because PV is possibly 727 // not found because of API delay, we can check next time. 728 // And if PV does not exist because it's deleted, PVC will 729 // be unbound eventually. 730 return false, nil 731 } 732 return false, fmt.Errorf("failed to get pv %q from cache: %w", pvc.Spec.VolumeName, err) 733 } 734 735 pv, err = b.tryTranslatePVToCSI(pv, csiNode) 736 if err != nil { 737 return false, err 738 } 739 740 if err := volume.CheckNodeAffinity(pv, node.Labels); err != nil { 741 return false, fmt.Errorf("pv %q node affinity doesn't match node %q: %w", pv.Name, node.Name, err) 742 } 743 } 744 745 // Check if pvc is fully bound 746 if !b.isPVCFullyBound(pvc) { 747 return false, nil 748 } 749 } 750 751 // All pvs and pvcs that we operated on are bound 752 logger.V(2).Info("All PVCs for pod are bound", "pod", klog.KObj(pod)) 753 return true, nil 754 } 755 756 func (b *volumeBinder) isVolumeBound(logger klog.Logger, pod *v1.Pod, vol *v1.Volume) (bound bool, pvc *v1.PersistentVolumeClaim, err error) { 757 pvcName := "" 758 isEphemeral := false 759 switch { 760 case vol.PersistentVolumeClaim != nil: 761 pvcName = vol.PersistentVolumeClaim.ClaimName 762 case vol.Ephemeral != nil: 763 // Generic ephemeral inline volumes also use a PVC, 764 // just with a computed name, and... 765 pvcName = ephemeral.VolumeClaimName(pod, vol) 766 isEphemeral = true 767 default: 768 return true, nil, nil 769 } 770 771 bound, pvc, err = b.isPVCBound(logger, pod.Namespace, pvcName) 772 // ... the PVC must be owned by the pod. 773 if isEphemeral && err == nil && pvc != nil { 774 if err := ephemeral.VolumeIsForPod(pod, pvc); err != nil { 775 return false, nil, err 776 } 777 } 778 return 779 } 780 781 func (b *volumeBinder) isPVCBound(logger klog.Logger, namespace, pvcName string) (bool, *v1.PersistentVolumeClaim, error) { 782 claim := &v1.PersistentVolumeClaim{ 783 ObjectMeta: metav1.ObjectMeta{ 784 Name: pvcName, 785 Namespace: namespace, 786 }, 787 } 788 pvcKey := getPVCName(claim) 789 pvc, err := b.pvcCache.GetPVC(pvcKey) 790 if err != nil || pvc == nil { 791 return false, nil, fmt.Errorf("error getting PVC %q: %v", pvcKey, err) 792 } 793 794 fullyBound := b.isPVCFullyBound(pvc) 795 if fullyBound { 796 logger.V(5).Info("PVC is fully bound to PV", "PVC", klog.KObj(pvc), "PV", klog.KRef("", pvc.Spec.VolumeName)) 797 } else { 798 if pvc.Spec.VolumeName != "" { 799 logger.V(5).Info("PVC is not fully bound to PV", "PVC", klog.KObj(pvc), "PV", klog.KRef("", pvc.Spec.VolumeName)) 800 } else { 801 logger.V(5).Info("PVC is not bound", "PVC", klog.KObj(pvc)) 802 } 803 } 804 return fullyBound, pvc, nil 805 } 806 807 func (b *volumeBinder) isPVCFullyBound(pvc *v1.PersistentVolumeClaim) bool { 808 return pvc.Spec.VolumeName != "" && metav1.HasAnnotation(pvc.ObjectMeta, volume.AnnBindCompleted) 809 } 810 811 // arePodVolumesBound returns true if all volumes are fully bound 812 func (b *volumeBinder) arePodVolumesBound(logger klog.Logger, pod *v1.Pod) bool { 813 for _, vol := range pod.Spec.Volumes { 814 if isBound, _, _ := b.isVolumeBound(logger, pod, &vol); !isBound { 815 // Pod has at least one PVC that needs binding 816 return false 817 } 818 } 819 return true 820 } 821 822 // GetPodVolumeClaims returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning), 823 // unbound with immediate binding (including prebound) and PVs that belong to storage classes of unbound PVCs with delayed binding. 824 func (b *volumeBinder) GetPodVolumeClaims(logger klog.Logger, pod *v1.Pod) (podVolumeClaims *PodVolumeClaims, err error) { 825 podVolumeClaims = &PodVolumeClaims{ 826 boundClaims: []*v1.PersistentVolumeClaim{}, 827 unboundClaimsImmediate: []*v1.PersistentVolumeClaim{}, 828 unboundClaimsDelayBinding: []*v1.PersistentVolumeClaim{}, 829 } 830 831 for _, vol := range pod.Spec.Volumes { 832 volumeBound, pvc, err := b.isVolumeBound(logger, pod, &vol) 833 if err != nil { 834 return podVolumeClaims, err 835 } 836 if pvc == nil { 837 continue 838 } 839 if volumeBound { 840 podVolumeClaims.boundClaims = append(podVolumeClaims.boundClaims, pvc) 841 } else { 842 delayBindingMode, err := volume.IsDelayBindingMode(pvc, b.classLister) 843 if err != nil { 844 return podVolumeClaims, err 845 } 846 // Prebound PVCs are treated as unbound immediate binding 847 if delayBindingMode && pvc.Spec.VolumeName == "" { 848 // Scheduler path 849 podVolumeClaims.unboundClaimsDelayBinding = append(podVolumeClaims.unboundClaimsDelayBinding, pvc) 850 } else { 851 // !delayBindingMode || pvc.Spec.VolumeName != "" 852 // Immediate binding should have already been bound 853 podVolumeClaims.unboundClaimsImmediate = append(podVolumeClaims.unboundClaimsImmediate, pvc) 854 } 855 } 856 } 857 858 podVolumeClaims.unboundVolumesDelayBinding = map[string][]*v1.PersistentVolume{} 859 for _, pvc := range podVolumeClaims.unboundClaimsDelayBinding { 860 // Get storage class name from each PVC 861 storageClassName := volume.GetPersistentVolumeClaimClass(pvc) 862 podVolumeClaims.unboundVolumesDelayBinding[storageClassName] = b.pvCache.ListPVs(storageClassName) 863 } 864 return podVolumeClaims, nil 865 } 866 867 func (b *volumeBinder) checkBoundClaims(logger klog.Logger, claims []*v1.PersistentVolumeClaim, node *v1.Node, pod *v1.Pod) (bool, bool, error) { 868 csiNode, err := b.csiNodeLister.Get(node.Name) 869 if err != nil { 870 // TODO: return the error once CSINode is created by default 871 logger.V(4).Info("Could not get a CSINode object for the node", "node", klog.KObj(node), "err", err) 872 } 873 874 for _, pvc := range claims { 875 pvName := pvc.Spec.VolumeName 876 pv, err := b.pvCache.GetPV(pvName) 877 if err != nil { 878 if errors.Is(err, assumecache.ErrNotFound) { 879 err = nil 880 } 881 return true, false, err 882 } 883 884 pv, err = b.tryTranslatePVToCSI(pv, csiNode) 885 if err != nil { 886 return false, true, err 887 } 888 889 err = volume.CheckNodeAffinity(pv, node.Labels) 890 if err != nil { 891 logger.V(4).Info("PersistentVolume and node mismatch for pod", "PV", klog.KRef("", pvName), "node", klog.KObj(node), "pod", klog.KObj(pod), "err", err) 892 return false, true, nil 893 } 894 logger.V(5).Info("PersistentVolume and node matches for pod", "PV", klog.KRef("", pvName), "node", klog.KObj(node), "pod", klog.KObj(pod)) 895 } 896 897 logger.V(4).Info("All bound volumes for pod match with node", "pod", klog.KObj(pod), "node", klog.KObj(node)) 898 return true, true, nil 899 } 900 901 // findMatchingVolumes tries to find matching volumes for given claims, 902 // and return unbound claims for further provision. 903 func (b *volumeBinder) findMatchingVolumes(logger klog.Logger, pod *v1.Pod, claimsToBind []*v1.PersistentVolumeClaim, unboundVolumesDelayBinding map[string][]*v1.PersistentVolume, node *v1.Node) (foundMatches bool, bindings []*BindingInfo, unboundClaims []*v1.PersistentVolumeClaim, err error) { 904 // Sort all the claims by increasing size request to get the smallest fits 905 sort.Sort(byPVCSize(claimsToBind)) 906 907 chosenPVs := map[string]*v1.PersistentVolume{} 908 909 foundMatches = true 910 911 for _, pvc := range claimsToBind { 912 // Get storage class name from each PVC 913 storageClassName := volume.GetPersistentVolumeClaimClass(pvc) 914 pvs := unboundVolumesDelayBinding[storageClassName] 915 916 // Find a matching PV 917 pv, err := volume.FindMatchingVolume(pvc, pvs, node, chosenPVs, true) 918 if err != nil { 919 return false, nil, nil, err 920 } 921 if pv == nil { 922 logger.V(4).Info("No matching volumes for pod", "pod", klog.KObj(pod), "PVC", klog.KObj(pvc), "node", klog.KObj(node)) 923 unboundClaims = append(unboundClaims, pvc) 924 foundMatches = false 925 continue 926 } 927 928 // matching PV needs to be excluded so we don't select it again 929 chosenPVs[pv.Name] = pv 930 bindings = append(bindings, &BindingInfo{pv: pv, pvc: pvc}) 931 logger.V(5).Info("Found matching PV for PVC for pod", "PV", klog.KObj(pv), "PVC", klog.KObj(pvc), "node", klog.KObj(node), "pod", klog.KObj(pod)) 932 } 933 934 if foundMatches { 935 logger.V(4).Info("Found matching volumes for pod", "pod", klog.KObj(pod), "node", klog.KObj(node)) 936 } 937 938 return 939 } 940 941 // checkVolumeProvisions checks given unbound claims (the claims have gone through func 942 // findMatchingVolumes, and do not have matching volumes for binding), and return true 943 // if all of the claims are eligible for dynamic provision. 944 func (b *volumeBinder) checkVolumeProvisions(logger klog.Logger, pod *v1.Pod, claimsToProvision []*v1.PersistentVolumeClaim, node *v1.Node) (provisionSatisfied, sufficientStorage bool, dynamicProvisions []*v1.PersistentVolumeClaim, err error) { 945 dynamicProvisions = []*v1.PersistentVolumeClaim{} 946 947 // We return early with provisionedClaims == nil if a check 948 // fails or we encounter an error. 949 for _, claim := range claimsToProvision { 950 pvcName := getPVCName(claim) 951 className := volume.GetPersistentVolumeClaimClass(claim) 952 if className == "" { 953 return false, false, nil, fmt.Errorf("no class for claim %q", pvcName) 954 } 955 956 class, err := b.classLister.Get(className) 957 if err != nil { 958 return false, false, nil, fmt.Errorf("failed to find storage class %q", className) 959 } 960 provisioner := class.Provisioner 961 if provisioner == "" || provisioner == volume.NotSupportedProvisioner { 962 logger.V(4).Info("Storage class of claim does not support dynamic provisioning", "storageClassName", className, "PVC", klog.KObj(claim)) 963 return false, true, nil, nil 964 } 965 966 // Check if the node can satisfy the topology requirement in the class 967 if !v1helper.MatchTopologySelectorTerms(class.AllowedTopologies, labels.Set(node.Labels)) { 968 logger.V(4).Info("Node cannot satisfy provisioning topology requirements of claim", "node", klog.KObj(node), "PVC", klog.KObj(claim)) 969 return false, true, nil, nil 970 } 971 972 // Check storage capacity. 973 sufficient, err := b.hasEnoughCapacity(logger, provisioner, claim, class, node) 974 if err != nil { 975 return false, false, nil, err 976 } 977 if !sufficient { 978 // hasEnoughCapacity logs an explanation. 979 return true, false, nil, nil 980 } 981 982 dynamicProvisions = append(dynamicProvisions, claim) 983 984 } 985 logger.V(4).Info("Provisioning for claims of pod that has no matching volumes...", "claimCount", len(claimsToProvision), "pod", klog.KObj(pod), "node", klog.KObj(node)) 986 987 return true, true, dynamicProvisions, nil 988 } 989 990 func (b *volumeBinder) revertAssumedPVs(bindings []*BindingInfo) { 991 for _, BindingInfo := range bindings { 992 b.pvCache.Restore(BindingInfo.pv.Name) 993 } 994 } 995 996 func (b *volumeBinder) revertAssumedPVCs(claims []*v1.PersistentVolumeClaim) { 997 for _, claim := range claims { 998 b.pvcCache.Restore(getPVCName(claim)) 999 } 1000 } 1001 1002 // hasEnoughCapacity checks whether the provisioner has enough capacity left for a new volume of the given size 1003 // that is available from the node. 1004 func (b *volumeBinder) hasEnoughCapacity(logger klog.Logger, provisioner string, claim *v1.PersistentVolumeClaim, storageClass *storagev1.StorageClass, node *v1.Node) (bool, error) { 1005 quantity, ok := claim.Spec.Resources.Requests[v1.ResourceStorage] 1006 if !ok { 1007 // No capacity to check for. 1008 return true, nil 1009 } 1010 1011 // Only enabled for CSI drivers which opt into it. 1012 driver, err := b.csiDriverLister.Get(provisioner) 1013 if err != nil { 1014 if apierrors.IsNotFound(err) { 1015 // Either the provisioner is not a CSI driver or the driver does not 1016 // opt into storage capacity scheduling. Either way, skip 1017 // capacity checking. 1018 return true, nil 1019 } 1020 return false, err 1021 } 1022 if driver.Spec.StorageCapacity == nil || !*driver.Spec.StorageCapacity { 1023 return true, nil 1024 } 1025 1026 // Look for a matching CSIStorageCapacity object(s). 1027 // TODO (for beta): benchmark this and potentially introduce some kind of lookup structure (https://github.com/kubernetes/enhancements/issues/1698#issuecomment-654356718). 1028 capacities, err := b.csiStorageCapacityLister.List(labels.Everything()) 1029 if err != nil { 1030 return false, err 1031 } 1032 1033 sizeInBytes := quantity.Value() 1034 for _, capacity := range capacities { 1035 if capacity.StorageClassName == storageClass.Name && 1036 capacitySufficient(capacity, sizeInBytes) && 1037 b.nodeHasAccess(logger, node, capacity) { 1038 // Enough capacity found. 1039 return true, nil 1040 } 1041 } 1042 1043 // TODO (?): this doesn't give any information about which pools where considered and why 1044 // they had to be rejected. Log that above? But that might be a lot of log output... 1045 logger.V(4).Info("Node has no accessible CSIStorageCapacity with enough capacity for PVC", 1046 "node", klog.KObj(node), "PVC", klog.KObj(claim), "size", sizeInBytes, "storageClass", klog.KObj(storageClass)) 1047 return false, nil 1048 } 1049 1050 func capacitySufficient(capacity *storagev1.CSIStorageCapacity, sizeInBytes int64) bool { 1051 limit := capacity.Capacity 1052 if capacity.MaximumVolumeSize != nil { 1053 // Prefer MaximumVolumeSize if available, it is more precise. 1054 limit = capacity.MaximumVolumeSize 1055 } 1056 return limit != nil && limit.Value() >= sizeInBytes 1057 } 1058 1059 func (b *volumeBinder) nodeHasAccess(logger klog.Logger, node *v1.Node, capacity *storagev1.CSIStorageCapacity) bool { 1060 if capacity.NodeTopology == nil { 1061 // Unavailable 1062 return false 1063 } 1064 // Only matching by label is supported. 1065 selector, err := metav1.LabelSelectorAsSelector(capacity.NodeTopology) 1066 if err != nil { 1067 logger.Error(err, "Unexpected error converting to a label selector", "nodeTopology", capacity.NodeTopology) 1068 return false 1069 } 1070 return selector.Matches(labels.Set(node.Labels)) 1071 } 1072 1073 type byPVCSize []*v1.PersistentVolumeClaim 1074 1075 func (a byPVCSize) Len() int { 1076 return len(a) 1077 } 1078 1079 func (a byPVCSize) Swap(i, j int) { 1080 a[i], a[j] = a[j], a[i] 1081 } 1082 1083 func (a byPVCSize) Less(i, j int) bool { 1084 iSize := a[i].Spec.Resources.Requests[v1.ResourceStorage] 1085 jSize := a[j].Spec.Resources.Requests[v1.ResourceStorage] 1086 // return true if iSize is less than jSize 1087 return iSize.Cmp(jSize) == -1 1088 } 1089 1090 // isCSIMigrationOnForPlugin checks if CSI migration is enabled for a given plugin. 1091 func isCSIMigrationOnForPlugin(pluginName string) bool { 1092 switch pluginName { 1093 case csiplugins.AWSEBSInTreePluginName: 1094 return true 1095 case csiplugins.GCEPDInTreePluginName: 1096 return true 1097 case csiplugins.AzureDiskInTreePluginName: 1098 return true 1099 case csiplugins.CinderInTreePluginName: 1100 return true 1101 case csiplugins.PortworxVolumePluginName: 1102 return utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationPortworx) 1103 } 1104 return false 1105 } 1106 1107 // isPluginMigratedToCSIOnNode checks if an in-tree plugin has been migrated to a CSI driver on the node. 1108 func isPluginMigratedToCSIOnNode(pluginName string, csiNode *storagev1.CSINode) bool { 1109 if csiNode == nil { 1110 return false 1111 } 1112 1113 csiNodeAnn := csiNode.GetAnnotations() 1114 if csiNodeAnn == nil { 1115 return false 1116 } 1117 1118 var mpaSet sets.Set[string] 1119 mpa := csiNodeAnn[v1.MigratedPluginsAnnotationKey] 1120 if len(mpa) == 0 { 1121 mpaSet = sets.New[string]() 1122 } else { 1123 tok := strings.Split(mpa, ",") 1124 mpaSet = sets.New(tok...) 1125 } 1126 1127 return mpaSet.Has(pluginName) 1128 } 1129 1130 // tryTranslatePVToCSI will translate the in-tree PV to CSI if it meets the criteria. If not, it returns the unmodified in-tree PV. 1131 func (b *volumeBinder) tryTranslatePVToCSI(pv *v1.PersistentVolume, csiNode *storagev1.CSINode) (*v1.PersistentVolume, error) { 1132 if !b.translator.IsPVMigratable(pv) { 1133 return pv, nil 1134 } 1135 1136 pluginName, err := b.translator.GetInTreePluginNameFromSpec(pv, nil) 1137 if err != nil { 1138 return nil, fmt.Errorf("could not get plugin name from pv: %v", err) 1139 } 1140 1141 if !isCSIMigrationOnForPlugin(pluginName) { 1142 return pv, nil 1143 } 1144 1145 if !isPluginMigratedToCSIOnNode(pluginName, csiNode) { 1146 return pv, nil 1147 } 1148 1149 transPV, err := b.translator.TranslateInTreePVToCSI(pv) 1150 if err != nil { 1151 return nil, fmt.Errorf("could not translate pv: %v", err) 1152 } 1153 1154 return transPV, nil 1155 }