k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/resourcequota/resource_quota_controller.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package resourcequota 18 19 import ( 20 "context" 21 "fmt" 22 "reflect" 23 "sync" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 apiequality "k8s.io/apimachinery/pkg/api/equality" 28 "k8s.io/apimachinery/pkg/api/errors" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/labels" 31 "k8s.io/apimachinery/pkg/runtime/schema" 32 utilerrors "k8s.io/apimachinery/pkg/util/errors" 33 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 34 "k8s.io/apimachinery/pkg/util/sets" 35 "k8s.io/apimachinery/pkg/util/wait" 36 quota "k8s.io/apiserver/pkg/quota/v1" 37 "k8s.io/client-go/discovery" 38 coreinformers "k8s.io/client-go/informers/core/v1" 39 corev1client "k8s.io/client-go/kubernetes/typed/core/v1" 40 corelisters "k8s.io/client-go/listers/core/v1" 41 "k8s.io/client-go/tools/cache" 42 "k8s.io/client-go/util/workqueue" 43 "k8s.io/controller-manager/pkg/informerfactory" 44 "k8s.io/klog/v2" 45 "k8s.io/kubernetes/pkg/controller" 46 ) 47 48 // NamespacedResourcesFunc knows how to discover namespaced resources. 49 type NamespacedResourcesFunc func() ([]*metav1.APIResourceList, error) 50 51 // ReplenishmentFunc is a signal that a resource changed in specified namespace 52 // that may require quota to be recalculated. 53 type ReplenishmentFunc func(ctx context.Context, groupResource schema.GroupResource, namespace string) 54 55 // ControllerOptions holds options for creating a quota controller 56 type ControllerOptions struct { 57 // Must have authority to list all quotas, and update quota status 58 QuotaClient corev1client.ResourceQuotasGetter 59 // Shared informer for resource quotas 60 ResourceQuotaInformer coreinformers.ResourceQuotaInformer 61 // Controls full recalculation of quota usage 62 ResyncPeriod controller.ResyncPeriodFunc 63 // Maintains evaluators that know how to calculate usage for group resource 64 Registry quota.Registry 65 // Discover list of supported resources on the server. 66 DiscoveryFunc NamespacedResourcesFunc 67 // A function that returns the list of resources to ignore 68 IgnoredResourcesFunc func() map[schema.GroupResource]struct{} 69 // InformersStarted knows if informers were started. 70 InformersStarted <-chan struct{} 71 // InformerFactory interfaces with informers. 72 InformerFactory informerfactory.InformerFactory 73 // Controls full resync of objects monitored for replenishment. 74 ReplenishmentResyncPeriod controller.ResyncPeriodFunc 75 // Filters update events so we only enqueue the ones where we know quota will change 76 UpdateFilter UpdateFilter 77 } 78 79 // Controller is responsible for tracking quota usage status in the system 80 type Controller struct { 81 // Must have authority to list all resources in the system, and update quota status 82 rqClient corev1client.ResourceQuotasGetter 83 // A lister/getter of resource quota objects 84 rqLister corelisters.ResourceQuotaLister 85 // A list of functions that return true when their caches have synced 86 informerSyncedFuncs []cache.InformerSynced 87 // ResourceQuota objects that need to be synchronized 88 queue workqueue.TypedRateLimitingInterface[string] 89 // missingUsageQueue holds objects that are missing the initial usage information 90 missingUsageQueue workqueue.TypedRateLimitingInterface[string] 91 // To allow injection of syncUsage for testing. 92 syncHandler func(ctx context.Context, key string) error 93 // function that controls full recalculation of quota usage 94 resyncPeriod controller.ResyncPeriodFunc 95 // knows how to calculate usage 96 registry quota.Registry 97 // knows how to monitor all the resources tracked by quota and trigger replenishment 98 quotaMonitor *QuotaMonitor 99 // controls the workers that process quotas 100 // this lock is acquired to control write access to the monitors and ensures that all 101 // monitors are synced before the controller can process quotas. 102 workerLock sync.RWMutex 103 } 104 105 // NewController creates a quota controller with specified options 106 func NewController(ctx context.Context, options *ControllerOptions) (*Controller, error) { 107 // build the resource quota controller 108 rq := &Controller{ 109 rqClient: options.QuotaClient, 110 rqLister: options.ResourceQuotaInformer.Lister(), 111 informerSyncedFuncs: []cache.InformerSynced{options.ResourceQuotaInformer.Informer().HasSynced}, 112 queue: workqueue.NewTypedRateLimitingQueueWithConfig( 113 workqueue.DefaultTypedControllerRateLimiter[string](), 114 workqueue.TypedRateLimitingQueueConfig[string]{Name: "resourcequota_primary"}, 115 ), 116 missingUsageQueue: workqueue.NewTypedRateLimitingQueueWithConfig( 117 workqueue.DefaultTypedControllerRateLimiter[string](), 118 workqueue.TypedRateLimitingQueueConfig[string]{Name: "resourcequota_priority"}, 119 ), 120 resyncPeriod: options.ResyncPeriod, 121 registry: options.Registry, 122 } 123 // set the synchronization handler 124 rq.syncHandler = rq.syncResourceQuotaFromKey 125 126 logger := klog.FromContext(ctx) 127 128 options.ResourceQuotaInformer.Informer().AddEventHandlerWithResyncPeriod( 129 cache.ResourceEventHandlerFuncs{ 130 AddFunc: func(obj interface{}) { 131 rq.addQuota(logger, obj) 132 }, 133 UpdateFunc: func(old, cur interface{}) { 134 // We are only interested in observing updates to quota.spec to drive updates to quota.status. 135 // We ignore all updates to quota.Status because they are all driven by this controller. 136 // IMPORTANT: 137 // We do not use this function to queue up a full quota recalculation. To do so, would require 138 // us to enqueue all quota.Status updates, and since quota.Status updates involve additional queries 139 // that cannot be backed by a cache and result in a full query of a namespace's content, we do not 140 // want to pay the price on spurious status updates. As a result, we have a separate routine that is 141 // responsible for enqueue of all resource quotas when doing a full resync (enqueueAll) 142 oldResourceQuota := old.(*v1.ResourceQuota) 143 curResourceQuota := cur.(*v1.ResourceQuota) 144 if quota.Equals(oldResourceQuota.Spec.Hard, curResourceQuota.Spec.Hard) { 145 return 146 } 147 rq.addQuota(logger, curResourceQuota) 148 }, 149 // This will enter the sync loop and no-op, because the controller has been deleted from the store. 150 // Note that deleting a controller immediately after scaling it to 0 will not work. The recommended 151 // way of achieving this is by performing a `stop` operation on the controller. 152 DeleteFunc: func(obj interface{}) { 153 rq.enqueueResourceQuota(logger, obj) 154 }, 155 }, 156 rq.resyncPeriod(), 157 ) 158 159 if options.DiscoveryFunc != nil { 160 qm := NewMonitor( 161 options.InformersStarted, 162 options.InformerFactory, 163 options.IgnoredResourcesFunc(), 164 options.ReplenishmentResyncPeriod, 165 rq.replenishQuota, 166 rq.registry, 167 options.UpdateFilter, 168 ) 169 170 rq.quotaMonitor = qm 171 172 // do initial quota monitor setup. If we have a discovery failure here, it's ok. We'll discover more resources when a later sync happens. 173 resources, err := GetQuotableResources(options.DiscoveryFunc) 174 if discovery.IsGroupDiscoveryFailedError(err) { 175 utilruntime.HandleError(fmt.Errorf("initial discovery check failure, continuing and counting on future sync update: %v", err)) 176 } else if err != nil { 177 return nil, err 178 } 179 180 if err = qm.SyncMonitors(ctx, resources); err != nil { 181 utilruntime.HandleError(fmt.Errorf("initial monitor sync has error: %v", err)) 182 } 183 184 // only start quota once all informers synced 185 rq.informerSyncedFuncs = append(rq.informerSyncedFuncs, func() bool { 186 return qm.IsSynced(ctx) 187 }) 188 } 189 190 return rq, nil 191 } 192 193 // enqueueAll is called at the fullResyncPeriod interval to force a full recalculation of quota usage statistics 194 func (rq *Controller) enqueueAll(ctx context.Context) { 195 logger := klog.FromContext(ctx) 196 defer logger.V(4).Info("Resource quota controller queued all resource quota for full calculation of usage") 197 rqs, err := rq.rqLister.List(labels.Everything()) 198 if err != nil { 199 utilruntime.HandleError(fmt.Errorf("unable to enqueue all - error listing resource quotas: %v", err)) 200 return 201 } 202 for i := range rqs { 203 key, err := controller.KeyFunc(rqs[i]) 204 if err != nil { 205 utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", rqs[i], err)) 206 continue 207 } 208 rq.queue.Add(key) 209 } 210 } 211 212 // obj could be an *v1.ResourceQuota, or a DeletionFinalStateUnknown marker item. 213 func (rq *Controller) enqueueResourceQuota(logger klog.Logger, obj interface{}) { 214 key, err := controller.KeyFunc(obj) 215 if err != nil { 216 logger.Error(err, "Couldn't get key", "object", obj) 217 return 218 } 219 rq.queue.Add(key) 220 } 221 222 func (rq *Controller) addQuota(logger klog.Logger, obj interface{}) { 223 key, err := controller.KeyFunc(obj) 224 if err != nil { 225 logger.Error(err, "Couldn't get key", "object", obj) 226 return 227 } 228 229 resourceQuota := obj.(*v1.ResourceQuota) 230 231 // if we declared an intent that is not yet captured in status (prioritize it) 232 if !apiequality.Semantic.DeepEqual(resourceQuota.Spec.Hard, resourceQuota.Status.Hard) { 233 rq.missingUsageQueue.Add(key) 234 return 235 } 236 237 // if we declared a constraint that has no usage (which this controller can calculate, prioritize it) 238 for constraint := range resourceQuota.Status.Hard { 239 if _, usageFound := resourceQuota.Status.Used[constraint]; !usageFound { 240 matchedResources := []v1.ResourceName{constraint} 241 for _, evaluator := range rq.registry.List() { 242 if intersection := evaluator.MatchingResources(matchedResources); len(intersection) > 0 { 243 rq.missingUsageQueue.Add(key) 244 return 245 } 246 } 247 } 248 } 249 250 // no special priority, go in normal recalc queue 251 rq.queue.Add(key) 252 } 253 254 // worker runs a worker thread that just dequeues items, processes them, and marks them done. 255 func (rq *Controller) worker(queue workqueue.TypedRateLimitingInterface[string]) func(context.Context) { 256 workFunc := func(ctx context.Context) bool { 257 key, quit := queue.Get() 258 if quit { 259 return true 260 } 261 defer queue.Done(key) 262 263 rq.workerLock.RLock() 264 defer rq.workerLock.RUnlock() 265 266 logger := klog.FromContext(ctx) 267 logger = klog.LoggerWithValues(logger, "queueKey", key) 268 ctx = klog.NewContext(ctx, logger) 269 270 err := rq.syncHandler(ctx, key) 271 if err == nil { 272 queue.Forget(key) 273 return false 274 } 275 276 utilruntime.HandleError(err) 277 queue.AddRateLimited(key) 278 279 return false 280 } 281 282 return func(ctx context.Context) { 283 for { 284 if quit := workFunc(ctx); quit { 285 klog.FromContext(ctx).Info("resource quota controller worker shutting down") 286 return 287 } 288 } 289 } 290 } 291 292 // Run begins quota controller using the specified number of workers 293 func (rq *Controller) Run(ctx context.Context, workers int) { 294 defer utilruntime.HandleCrash() 295 defer rq.queue.ShutDown() 296 defer rq.missingUsageQueue.ShutDown() 297 298 logger := klog.FromContext(ctx) 299 300 logger.Info("Starting resource quota controller") 301 defer logger.Info("Shutting down resource quota controller") 302 303 if rq.quotaMonitor != nil { 304 go rq.quotaMonitor.Run(ctx) 305 } 306 307 if !cache.WaitForNamedCacheSync("resource quota", ctx.Done(), rq.informerSyncedFuncs...) { 308 return 309 } 310 311 // the workers that chug through the quota calculation backlog 312 for i := 0; i < workers; i++ { 313 go wait.UntilWithContext(ctx, rq.worker(rq.queue), time.Second) 314 go wait.UntilWithContext(ctx, rq.worker(rq.missingUsageQueue), time.Second) 315 } 316 // the timer for how often we do a full recalculation across all quotas 317 if rq.resyncPeriod() > 0 { 318 go wait.UntilWithContext(ctx, rq.enqueueAll, rq.resyncPeriod()) 319 } else { 320 logger.Info("periodic quota controller resync disabled") 321 } 322 <-ctx.Done() 323 } 324 325 // syncResourceQuotaFromKey syncs a quota key 326 func (rq *Controller) syncResourceQuotaFromKey(ctx context.Context, key string) (err error) { 327 startTime := time.Now() 328 329 logger := klog.FromContext(ctx) 330 logger = klog.LoggerWithValues(logger, "key", key) 331 332 defer func() { 333 logger.V(4).Info("Finished syncing resource quota", "key", key, "duration", time.Since(startTime)) 334 }() 335 336 namespace, name, err := cache.SplitMetaNamespaceKey(key) 337 if err != nil { 338 return err 339 } 340 resourceQuota, err := rq.rqLister.ResourceQuotas(namespace).Get(name) 341 if errors.IsNotFound(err) { 342 logger.Info("Resource quota has been deleted", "key", key) 343 return nil 344 } 345 if err != nil { 346 logger.Error(err, "Unable to retrieve resource quota from store", "key", key) 347 return err 348 } 349 return rq.syncResourceQuota(ctx, resourceQuota) 350 } 351 352 // syncResourceQuota runs a complete sync of resource quota status across all known kinds 353 func (rq *Controller) syncResourceQuota(ctx context.Context, resourceQuota *v1.ResourceQuota) (err error) { 354 // quota is dirty if any part of spec hard limits differs from the status hard limits 355 statusLimitsDirty := !apiequality.Semantic.DeepEqual(resourceQuota.Spec.Hard, resourceQuota.Status.Hard) 356 357 // dirty tracks if the usage status differs from the previous sync, 358 // if so, we send a new usage with latest status 359 // if this is our first sync, it will be dirty by default, since we need track usage 360 dirty := statusLimitsDirty || resourceQuota.Status.Hard == nil || resourceQuota.Status.Used == nil 361 362 used := v1.ResourceList{} 363 if resourceQuota.Status.Used != nil { 364 used = quota.Add(v1.ResourceList{}, resourceQuota.Status.Used) 365 } 366 hardLimits := quota.Add(v1.ResourceList{}, resourceQuota.Spec.Hard) 367 368 var errs []error 369 370 newUsage, err := quota.CalculateUsage(resourceQuota.Namespace, resourceQuota.Spec.Scopes, hardLimits, rq.registry, resourceQuota.Spec.ScopeSelector) 371 if err != nil { 372 // if err is non-nil, remember it to return, but continue updating status with any resources in newUsage 373 errs = append(errs, err) 374 } 375 for key, value := range newUsage { 376 used[key] = value 377 } 378 379 // ensure set of used values match those that have hard constraints 380 hardResources := quota.ResourceNames(hardLimits) 381 used = quota.Mask(used, hardResources) 382 383 // Create a usage object that is based on the quota resource version that will handle updates 384 // by default, we preserve the past usage observation, and set hard to the current spec 385 usage := resourceQuota.DeepCopy() 386 usage.Status = v1.ResourceQuotaStatus{ 387 Hard: hardLimits, 388 Used: used, 389 } 390 391 dirty = dirty || !quota.Equals(usage.Status.Used, resourceQuota.Status.Used) 392 393 // there was a change observed by this controller that requires we update quota 394 if dirty { 395 _, err = rq.rqClient.ResourceQuotas(usage.Namespace).UpdateStatus(ctx, usage, metav1.UpdateOptions{}) 396 if err != nil { 397 errs = append(errs, err) 398 } 399 } 400 return utilerrors.NewAggregate(errs) 401 } 402 403 // replenishQuota is a replenishment function invoked by a controller to notify that a quota should be recalculated 404 func (rq *Controller) replenishQuota(ctx context.Context, groupResource schema.GroupResource, namespace string) { 405 // check if the quota controller can evaluate this groupResource, if not, ignore it altogether... 406 evaluator := rq.registry.Get(groupResource) 407 if evaluator == nil { 408 return 409 } 410 411 // check if this namespace even has a quota... 412 resourceQuotas, err := rq.rqLister.ResourceQuotas(namespace).List(labels.Everything()) 413 if errors.IsNotFound(err) { 414 utilruntime.HandleError(fmt.Errorf("quota controller could not find ResourceQuota associated with namespace: %s, could take up to %v before a quota replenishes", namespace, rq.resyncPeriod())) 415 return 416 } 417 if err != nil { 418 utilruntime.HandleError(fmt.Errorf("error checking to see if namespace %s has any ResourceQuota associated with it: %v", namespace, err)) 419 return 420 } 421 if len(resourceQuotas) == 0 { 422 return 423 } 424 425 logger := klog.FromContext(ctx) 426 427 // only queue those quotas that are tracking a resource associated with this kind. 428 for i := range resourceQuotas { 429 resourceQuota := resourceQuotas[i] 430 resourceQuotaResources := quota.ResourceNames(resourceQuota.Status.Hard) 431 if intersection := evaluator.MatchingResources(resourceQuotaResources); len(intersection) > 0 { 432 // TODO: make this support targeted replenishment to a specific kind, right now it does a full recalc on that quota. 433 rq.enqueueResourceQuota(logger, resourceQuota) 434 } 435 } 436 } 437 438 // Sync periodically resyncs the controller when new resources are observed from discovery. 439 func (rq *Controller) Sync(ctx context.Context, discoveryFunc NamespacedResourcesFunc, period time.Duration) { 440 // Something has changed, so track the new state and perform a sync. 441 oldResources := make(map[schema.GroupVersionResource]struct{}) 442 wait.UntilWithContext(ctx, func(ctx context.Context) { 443 // Get the current resource list from discovery. 444 newResources, err := GetQuotableResources(discoveryFunc) 445 if err != nil { 446 utilruntime.HandleError(err) 447 448 if groupLookupFailures, isLookupFailure := discovery.GroupDiscoveryFailedErrorGroups(err); isLookupFailure && len(newResources) > 0 { 449 // In partial discovery cases, preserve existing informers for resources in the failed groups, so resyncMonitors will only add informers for newly seen resources 450 for k, v := range oldResources { 451 if _, failed := groupLookupFailures[k.GroupVersion()]; failed { 452 newResources[k] = v 453 } 454 } 455 } else { 456 // short circuit in non-discovery error cases or if discovery returned zero resources 457 return 458 } 459 } 460 461 logger := klog.FromContext(ctx) 462 463 // Decide whether discovery has reported a change. 464 if reflect.DeepEqual(oldResources, newResources) { 465 logger.V(4).Info("no resource updates from discovery, skipping resource quota sync") 466 return 467 } 468 469 // Ensure workers are paused to avoid processing events before informers 470 // have resynced. 471 rq.workerLock.Lock() 472 defer rq.workerLock.Unlock() 473 474 // Something has changed, so track the new state and perform a sync. 475 if loggerV := logger.V(2); loggerV.Enabled() { 476 loggerV.Info("syncing resource quota controller with updated resources from discovery", "diff", printDiff(oldResources, newResources)) 477 } 478 479 // Perform the monitor resync and wait for controllers to report cache sync. 480 if err := rq.resyncMonitors(ctx, newResources); err != nil { 481 utilruntime.HandleError(fmt.Errorf("failed to sync resource monitors: %v", err)) 482 return 483 } 484 485 // at this point, we've synced the new resources to our monitors, so record that fact. 486 oldResources = newResources 487 488 // wait for caches to fill for a while (our sync period). 489 // this protects us from deadlocks where available resources changed and one of our informer caches will never fill. 490 // informers keep attempting to sync in the background, so retrying doesn't interrupt them. 491 // the call to resyncMonitors on the reattempt will no-op for resources that still exist. 492 if rq.quotaMonitor != nil && 493 !cache.WaitForNamedCacheSync( 494 "resource quota", 495 waitForStopOrTimeout(ctx.Done(), period), 496 func() bool { return rq.quotaMonitor.IsSynced(ctx) }, 497 ) { 498 utilruntime.HandleError(fmt.Errorf("timed out waiting for quota monitor sync")) 499 return 500 } 501 502 logger.V(2).Info("synced quota controller") 503 }, period) 504 } 505 506 // printDiff returns a human-readable summary of what resources were added and removed 507 func printDiff(oldResources, newResources map[schema.GroupVersionResource]struct{}) string { 508 removed := sets.NewString() 509 for oldResource := range oldResources { 510 if _, ok := newResources[oldResource]; !ok { 511 removed.Insert(fmt.Sprintf("%+v", oldResource)) 512 } 513 } 514 added := sets.NewString() 515 for newResource := range newResources { 516 if _, ok := oldResources[newResource]; !ok { 517 added.Insert(fmt.Sprintf("%+v", newResource)) 518 } 519 } 520 return fmt.Sprintf("added: %v, removed: %v", added.List(), removed.List()) 521 } 522 523 // waitForStopOrTimeout returns a stop channel that closes when the provided stop channel closes or when the specified timeout is reached 524 func waitForStopOrTimeout(stopCh <-chan struct{}, timeout time.Duration) <-chan struct{} { 525 stopChWithTimeout := make(chan struct{}) 526 go func() { 527 defer close(stopChWithTimeout) 528 select { 529 case <-stopCh: 530 case <-time.After(timeout): 531 } 532 }() 533 return stopChWithTimeout 534 } 535 536 // resyncMonitors starts or stops quota monitors as needed to ensure that all 537 // (and only) those resources present in the map are monitored. 538 func (rq *Controller) resyncMonitors(ctx context.Context, resources map[schema.GroupVersionResource]struct{}) error { 539 if rq.quotaMonitor == nil { 540 return nil 541 } 542 543 if err := rq.quotaMonitor.SyncMonitors(ctx, resources); err != nil { 544 return err 545 } 546 rq.quotaMonitor.StartMonitors(ctx) 547 return nil 548 } 549 550 // GetQuotableResources returns all resources that the quota system should recognize. 551 // It requires a resource supports the following verbs: 'create','list','delete' 552 // This function may return both results and an error. If that happens, it means that the discovery calls were only 553 // partially successful. A decision about whether to proceed or not is left to the caller. 554 func GetQuotableResources(discoveryFunc NamespacedResourcesFunc) (map[schema.GroupVersionResource]struct{}, error) { 555 possibleResources, discoveryErr := discoveryFunc() 556 if discoveryErr != nil && len(possibleResources) == 0 { 557 return nil, fmt.Errorf("failed to discover resources: %v", discoveryErr) 558 } 559 quotableResources := discovery.FilteredBy(discovery.SupportsAllVerbs{Verbs: []string{"create", "list", "watch", "delete"}}, possibleResources) 560 quotableGroupVersionResources, err := discovery.GroupVersionResources(quotableResources) 561 if err != nil { 562 return nil, fmt.Errorf("failed to parse resources: %v", err) 563 } 564 // return the original discovery error (if any) in addition to the list 565 return quotableGroupVersionResources, discoveryErr 566 }