sigs.k8s.io/kueue@v0.6.2/pkg/cache/cache.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cache
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"sort"
    24  	"sync"
    25  
    26  	"github.com/go-logr/logr"
    27  	apimeta "k8s.io/apimachinery/pkg/api/meta"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/labels"
    30  	"k8s.io/apimachinery/pkg/util/sets"
    31  	"k8s.io/klog/v2"
    32  	ctrl "sigs.k8s.io/controller-runtime"
    33  	"sigs.k8s.io/controller-runtime/pkg/client"
    34  
    35  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    36  	utilindexer "sigs.k8s.io/kueue/pkg/controller/core/indexer"
    37  	"sigs.k8s.io/kueue/pkg/metrics"
    38  	"sigs.k8s.io/kueue/pkg/workload"
    39  )
    40  
    41  var (
    42  	errCqNotFound          = errors.New("cluster queue not found")
    43  	errQNotFound           = errors.New("queue not found")
    44  	errWorkloadNotAdmitted = errors.New("workload not admitted by a ClusterQueue")
    45  )
    46  
    47  const (
    48  	pending     = metrics.CQStatusPending
    49  	active      = metrics.CQStatusActive
    50  	terminating = metrics.CQStatusTerminating
    51  )
    52  
    53  type options struct {
    54  	podsReadyTracking bool
    55  }
    56  
    57  // Option configures the reconciler.
    58  type Option func(*options)
    59  
    60  // WithPodsReadyTracking indicates the cache controller tracks the PodsReady
    61  // condition for admitted workloads, and allows to block admission of new
    62  // workloads until all admitted workloads are in the PodsReady condition.
    63  func WithPodsReadyTracking(f bool) Option {
    64  	return func(o *options) {
    65  		o.podsReadyTracking = f
    66  	}
    67  }
    68  
    69  var defaultOptions = options{}
    70  
    71  // Cache keeps track of the Workloads that got admitted through ClusterQueues.
    72  type Cache struct {
    73  	sync.RWMutex
    74  	podsReadyCond sync.Cond
    75  
    76  	client            client.Client
    77  	clusterQueues     map[string]*ClusterQueue
    78  	cohorts           map[string]*Cohort
    79  	assumedWorkloads  map[string]string
    80  	resourceFlavors   map[kueue.ResourceFlavorReference]*kueue.ResourceFlavor
    81  	podsReadyTracking bool
    82  	admissionChecks   map[string]AdmissionCheck
    83  }
    84  
    85  func New(client client.Client, opts ...Option) *Cache {
    86  	options := defaultOptions
    87  	for _, opt := range opts {
    88  		opt(&options)
    89  	}
    90  	c := &Cache{
    91  		client:            client,
    92  		clusterQueues:     make(map[string]*ClusterQueue),
    93  		cohorts:           make(map[string]*Cohort),
    94  		assumedWorkloads:  make(map[string]string),
    95  		resourceFlavors:   make(map[kueue.ResourceFlavorReference]*kueue.ResourceFlavor),
    96  		admissionChecks:   make(map[string]AdmissionCheck),
    97  		podsReadyTracking: options.podsReadyTracking,
    98  	}
    99  	c.podsReadyCond.L = &c.RWMutex
   100  	return c
   101  }
   102  
   103  func (c *Cache) newClusterQueue(cq *kueue.ClusterQueue) (*ClusterQueue, error) {
   104  	cqImpl := &ClusterQueue{
   105  		Name:              cq.Name,
   106  		Workloads:         make(map[string]*workload.Info),
   107  		WorkloadsNotReady: sets.New[string](),
   108  		localQueues:       make(map[string]*queue),
   109  		podsReadyTracking: c.podsReadyTracking,
   110  	}
   111  	if err := cqImpl.update(cq, c.resourceFlavors, c.admissionChecks); err != nil {
   112  		return nil, err
   113  	}
   114  
   115  	return cqImpl, nil
   116  }
   117  
   118  // WaitForPodsReady waits for all admitted workloads to be in the PodsReady condition
   119  // if podsReadyTracking is enabled, otherwise returns immediately.
   120  func (c *Cache) WaitForPodsReady(ctx context.Context) {
   121  	if !c.podsReadyTracking {
   122  		return
   123  	}
   124  
   125  	c.Lock()
   126  	defer c.Unlock()
   127  
   128  	log := ctrl.LoggerFrom(ctx)
   129  	for {
   130  		if c.podsReadyForAllAdmittedWorkloads(log) {
   131  			return
   132  		}
   133  		log.V(3).Info("Blocking admission as not all workloads are in the PodsReady condition")
   134  		select {
   135  		case <-ctx.Done():
   136  			log.V(5).Info("Context cancelled when waiting for pods to be ready; returning")
   137  			return
   138  		default:
   139  			// wait releases the lock and acquires again when awaken
   140  			c.podsReadyCond.Wait()
   141  		}
   142  	}
   143  }
   144  
   145  func (c *Cache) PodsReadyForAllAdmittedWorkloads(log logr.Logger) bool {
   146  	if !c.podsReadyTracking {
   147  		return true
   148  	}
   149  	c.Lock()
   150  	defer c.Unlock()
   151  	return c.podsReadyForAllAdmittedWorkloads(log)
   152  }
   153  
   154  func (c *Cache) podsReadyForAllAdmittedWorkloads(log logr.Logger) bool {
   155  	for _, cq := range c.clusterQueues {
   156  		if len(cq.WorkloadsNotReady) > 0 {
   157  			log.V(3).Info("There is a ClusterQueue with not ready workloads", "clusterQueue", klog.KRef("", cq.Name))
   158  			return false
   159  		}
   160  	}
   161  	log.V(5).Info("All workloads are in the PodsReady condition")
   162  	return true
   163  }
   164  
   165  // CleanUpOnContext tracks the context. When closed, it wakes routines waiting
   166  // on the podsReady condition. It should be called before doing any calls to
   167  // cache.WaitForPodsReady.
   168  func (c *Cache) CleanUpOnContext(ctx context.Context) {
   169  	<-ctx.Done()
   170  	c.Lock()
   171  	defer c.Unlock()
   172  	c.podsReadyCond.Broadcast()
   173  }
   174  
   175  func (c *Cache) updateClusterQueues() sets.Set[string] {
   176  	cqs := sets.New[string]()
   177  
   178  	for _, cq := range c.clusterQueues {
   179  		prevStatus := cq.Status
   180  		// We call update on all ClusterQueues irrespective of which CQ actually use this flavor
   181  		// because it is not expensive to do so, and is not worth tracking which ClusterQueues use
   182  		// which flavors.
   183  		cq.UpdateWithFlavors(c.resourceFlavors)
   184  		cq.updateWithAdmissionChecks(c.admissionChecks)
   185  		curStatus := cq.Status
   186  		if prevStatus == pending && curStatus == active {
   187  			cqs.Insert(cq.Name)
   188  		}
   189  	}
   190  	return cqs
   191  }
   192  
   193  func (c *Cache) AddOrUpdateResourceFlavor(rf *kueue.ResourceFlavor) sets.Set[string] {
   194  	c.Lock()
   195  	defer c.Unlock()
   196  	c.resourceFlavors[kueue.ResourceFlavorReference(rf.Name)] = rf
   197  	return c.updateClusterQueues()
   198  }
   199  
   200  func (c *Cache) DeleteResourceFlavor(rf *kueue.ResourceFlavor) sets.Set[string] {
   201  	c.Lock()
   202  	defer c.Unlock()
   203  	delete(c.resourceFlavors, kueue.ResourceFlavorReference(rf.Name))
   204  	return c.updateClusterQueues()
   205  }
   206  
   207  func (c *Cache) AddOrUpdateAdmissionCheck(ac *kueue.AdmissionCheck) sets.Set[string] {
   208  	c.Lock()
   209  	defer c.Unlock()
   210  	c.admissionChecks[ac.Name] = AdmissionCheck{
   211  		Active: apimeta.IsStatusConditionTrue(ac.Status.Conditions, kueue.AdmissionCheckActive),
   212  	}
   213  
   214  	return c.updateClusterQueues()
   215  }
   216  
   217  func (c *Cache) DeleteAdmissionCheck(ac *kueue.AdmissionCheck) sets.Set[string] {
   218  	c.Lock()
   219  	defer c.Unlock()
   220  	delete(c.admissionChecks, ac.Name)
   221  	return c.updateClusterQueues()
   222  }
   223  
   224  func (c *Cache) ClusterQueueActive(name string) bool {
   225  	return c.clusterQueueInStatus(name, active)
   226  }
   227  
   228  func (c *Cache) ClusterQueueTerminating(name string) bool {
   229  	return c.clusterQueueInStatus(name, terminating)
   230  }
   231  
   232  func (c *Cache) ClusterQueueReadiness(name string) (metav1.ConditionStatus, string, string) {
   233  	c.RLock()
   234  	defer c.RUnlock()
   235  	cq := c.clusterQueues[name]
   236  	if cq == nil {
   237  		return metav1.ConditionFalse, "NotFound", "ClusterQueue not found"
   238  	}
   239  	if cq != nil && cq.Status == active {
   240  		return metav1.ConditionTrue, "Ready", "Can admit new workloads"
   241  	}
   242  	reason, msg := cq.inactiveReason()
   243  	return metav1.ConditionFalse, reason, msg
   244  }
   245  
   246  func (c *Cache) clusterQueueInStatus(name string, status metrics.ClusterQueueStatus) bool {
   247  	c.RLock()
   248  	defer c.RUnlock()
   249  
   250  	cq, exists := c.clusterQueues[name]
   251  	if !exists {
   252  		return false
   253  	}
   254  	return cq != nil && cq.Status == status
   255  }
   256  
   257  func (c *Cache) TerminateClusterQueue(name string) {
   258  	c.Lock()
   259  	defer c.Unlock()
   260  	if cq, exists := c.clusterQueues[name]; exists {
   261  		cq.Status = terminating
   262  		metrics.ReportClusterQueueStatus(cq.Name, cq.Status)
   263  	}
   264  }
   265  
   266  // ClusterQueueEmpty indicates whether there's any active workload admitted by
   267  // the provided clusterQueue.
   268  // Return true if the clusterQueue doesn't exist.
   269  func (c *Cache) ClusterQueueEmpty(name string) bool {
   270  	c.RLock()
   271  	defer c.RUnlock()
   272  	cq, exists := c.clusterQueues[name]
   273  	if !exists {
   274  		return true
   275  	}
   276  	return len(cq.Workloads) == 0
   277  }
   278  
   279  func (c *Cache) AddClusterQueue(ctx context.Context, cq *kueue.ClusterQueue) error {
   280  	c.Lock()
   281  	defer c.Unlock()
   282  
   283  	if _, ok := c.clusterQueues[cq.Name]; ok {
   284  		return fmt.Errorf("ClusterQueue already exists")
   285  	}
   286  	cqImpl, err := c.newClusterQueue(cq)
   287  	if err != nil {
   288  		return err
   289  	}
   290  	c.addClusterQueueToCohort(cqImpl, cq.Spec.Cohort)
   291  	c.clusterQueues[cq.Name] = cqImpl
   292  
   293  	// On controller restart, an add ClusterQueue event may come after
   294  	// add queue and workload, so here we explicitly list and add existing queues
   295  	// and workloads.
   296  	var queues kueue.LocalQueueList
   297  	if err := c.client.List(ctx, &queues, client.MatchingFields{utilindexer.QueueClusterQueueKey: cq.Name}); err != nil {
   298  		return fmt.Errorf("listing queues that match the clusterQueue: %w", err)
   299  	}
   300  	for _, q := range queues.Items {
   301  		qKey := queueKey(&q)
   302  		qImpl := &queue{
   303  			key:                qKey,
   304  			reservingWorkloads: 0,
   305  			admittedWorkloads:  0,
   306  			//TODO: rename this to better distinguish between reserved and in use quantities
   307  			usage:         make(FlavorResourceQuantities),
   308  			admittedUsage: make(FlavorResourceQuantities),
   309  		}
   310  		if err = qImpl.resetFlavorsAndResources(cqImpl.Usage, cqImpl.AdmittedUsage); err != nil {
   311  			return err
   312  		}
   313  		cqImpl.localQueues[qKey] = qImpl
   314  	}
   315  	var workloads kueue.WorkloadList
   316  	if err := c.client.List(ctx, &workloads, client.MatchingFields{utilindexer.WorkloadClusterQueueKey: cq.Name}); err != nil {
   317  		return fmt.Errorf("listing workloads that match the queue: %w", err)
   318  	}
   319  	for i, w := range workloads.Items {
   320  		if !workload.HasQuotaReservation(&w) || workload.IsFinished(&w) {
   321  			continue
   322  		}
   323  		c.addOrUpdateWorkload(&workloads.Items[i])
   324  	}
   325  
   326  	return nil
   327  }
   328  
   329  func (c *Cache) UpdateClusterQueue(cq *kueue.ClusterQueue) error {
   330  	c.Lock()
   331  	defer c.Unlock()
   332  	cqImpl, ok := c.clusterQueues[cq.Name]
   333  	if !ok {
   334  		return errCqNotFound
   335  	}
   336  	if err := cqImpl.update(cq, c.resourceFlavors, c.admissionChecks); err != nil {
   337  		return err
   338  	}
   339  	for _, qImpl := range cqImpl.localQueues {
   340  		if qImpl == nil {
   341  			return errQNotFound
   342  		}
   343  		if err := qImpl.resetFlavorsAndResources(cqImpl.Usage, cqImpl.AdmittedUsage); err != nil {
   344  			return err
   345  		}
   346  	}
   347  
   348  	if cqImpl.Cohort == nil {
   349  		c.addClusterQueueToCohort(cqImpl, cq.Spec.Cohort)
   350  		return nil
   351  	}
   352  
   353  	if cqImpl.Cohort.Name != cq.Spec.Cohort {
   354  		c.deleteClusterQueueFromCohort(cqImpl)
   355  		c.addClusterQueueToCohort(cqImpl, cq.Spec.Cohort)
   356  	}
   357  	return nil
   358  }
   359  
   360  func (c *Cache) DeleteClusterQueue(cq *kueue.ClusterQueue) {
   361  	c.Lock()
   362  	defer c.Unlock()
   363  	cqImpl, ok := c.clusterQueues[cq.Name]
   364  	if !ok {
   365  		return
   366  	}
   367  	c.deleteClusterQueueFromCohort(cqImpl)
   368  	delete(c.clusterQueues, cq.Name)
   369  	metrics.ClearCacheMetrics(cq.Name)
   370  }
   371  
   372  func (c *Cache) AddLocalQueue(q *kueue.LocalQueue) error {
   373  	c.Lock()
   374  	defer c.Unlock()
   375  	cq, ok := c.clusterQueues[string(q.Spec.ClusterQueue)]
   376  	if !ok {
   377  		return nil
   378  	}
   379  	return cq.addLocalQueue(q)
   380  }
   381  
   382  func (c *Cache) DeleteLocalQueue(q *kueue.LocalQueue) {
   383  	c.Lock()
   384  	defer c.Unlock()
   385  	cq, ok := c.clusterQueues[string(q.Spec.ClusterQueue)]
   386  	if !ok {
   387  		return
   388  	}
   389  	cq.deleteLocalQueue(q)
   390  }
   391  
   392  func (c *Cache) UpdateLocalQueue(oldQ, newQ *kueue.LocalQueue) error {
   393  	if oldQ.Spec.ClusterQueue == newQ.Spec.ClusterQueue {
   394  		return nil
   395  	}
   396  	c.Lock()
   397  	defer c.Unlock()
   398  	cq, ok := c.clusterQueues[string(oldQ.Spec.ClusterQueue)]
   399  	if ok {
   400  		cq.deleteLocalQueue(oldQ)
   401  	}
   402  	cq, ok = c.clusterQueues[string(newQ.Spec.ClusterQueue)]
   403  	if ok {
   404  		return cq.addLocalQueue(newQ)
   405  	}
   406  	return nil
   407  }
   408  
   409  func (c *Cache) AddOrUpdateWorkload(w *kueue.Workload) bool {
   410  	c.Lock()
   411  	defer c.Unlock()
   412  	return c.addOrUpdateWorkload(w)
   413  }
   414  
   415  func (c *Cache) addOrUpdateWorkload(w *kueue.Workload) bool {
   416  	if !workload.HasQuotaReservation(w) {
   417  		return false
   418  	}
   419  
   420  	clusterQueue, ok := c.clusterQueues[string(w.Status.Admission.ClusterQueue)]
   421  	if !ok {
   422  		return false
   423  	}
   424  
   425  	c.cleanupAssumedState(w)
   426  
   427  	if _, exist := clusterQueue.Workloads[workload.Key(w)]; exist {
   428  		clusterQueue.deleteWorkload(w)
   429  	}
   430  
   431  	if c.podsReadyTracking {
   432  		c.podsReadyCond.Broadcast()
   433  	}
   434  	return clusterQueue.addWorkload(w) == nil
   435  }
   436  
   437  func (c *Cache) UpdateWorkload(oldWl, newWl *kueue.Workload) error {
   438  	c.Lock()
   439  	defer c.Unlock()
   440  	if workload.HasQuotaReservation(oldWl) {
   441  		cq, ok := c.clusterQueues[string(oldWl.Status.Admission.ClusterQueue)]
   442  		if !ok {
   443  			return fmt.Errorf("old ClusterQueue doesn't exist")
   444  		}
   445  		cq.deleteWorkload(oldWl)
   446  	}
   447  	c.cleanupAssumedState(oldWl)
   448  
   449  	if !workload.HasQuotaReservation(newWl) {
   450  		return nil
   451  	}
   452  	cq, ok := c.clusterQueues[string(newWl.Status.Admission.ClusterQueue)]
   453  	if !ok {
   454  		return fmt.Errorf("new ClusterQueue doesn't exist")
   455  	}
   456  	if c.podsReadyTracking {
   457  		c.podsReadyCond.Broadcast()
   458  	}
   459  	return cq.addWorkload(newWl)
   460  }
   461  
   462  func (c *Cache) DeleteWorkload(w *kueue.Workload) error {
   463  	c.Lock()
   464  	defer c.Unlock()
   465  
   466  	cq := c.clusterQueueForWorkload(w)
   467  	if cq == nil {
   468  		return errCqNotFound
   469  	}
   470  
   471  	c.cleanupAssumedState(w)
   472  
   473  	cq.deleteWorkload(w)
   474  	if c.podsReadyTracking {
   475  		c.podsReadyCond.Broadcast()
   476  	}
   477  	return nil
   478  }
   479  
   480  func (c *Cache) IsAssumedOrAdmittedWorkload(w workload.Info) bool {
   481  	c.RLock()
   482  	defer c.RUnlock()
   483  
   484  	k := workload.Key(w.Obj)
   485  	if _, assumed := c.assumedWorkloads[k]; assumed {
   486  		return true
   487  	}
   488  	if cq, exists := c.clusterQueues[w.ClusterQueue]; exists {
   489  		if _, admitted := cq.Workloads[k]; admitted {
   490  			return true
   491  		}
   492  	}
   493  	return false
   494  }
   495  
   496  func (c *Cache) AssumeWorkload(w *kueue.Workload) error {
   497  	c.Lock()
   498  	defer c.Unlock()
   499  
   500  	if !workload.HasQuotaReservation(w) {
   501  		return errWorkloadNotAdmitted
   502  	}
   503  
   504  	k := workload.Key(w)
   505  	assumedCq, assumed := c.assumedWorkloads[k]
   506  	if assumed {
   507  		return fmt.Errorf("the workload is already assumed to ClusterQueue %q", assumedCq)
   508  	}
   509  
   510  	cq, ok := c.clusterQueues[string(w.Status.Admission.ClusterQueue)]
   511  	if !ok {
   512  		return errCqNotFound
   513  	}
   514  
   515  	if err := cq.addWorkload(w); err != nil {
   516  		return err
   517  	}
   518  	c.assumedWorkloads[k] = string(w.Status.Admission.ClusterQueue)
   519  	return nil
   520  }
   521  
   522  func (c *Cache) ForgetWorkload(w *kueue.Workload) error {
   523  	c.Lock()
   524  	defer c.Unlock()
   525  
   526  	if _, assumed := c.assumedWorkloads[workload.Key(w)]; !assumed {
   527  		return fmt.Errorf("the workload is not assumed")
   528  	}
   529  	c.cleanupAssumedState(w)
   530  
   531  	if !workload.HasQuotaReservation(w) {
   532  		return errWorkloadNotAdmitted
   533  	}
   534  
   535  	cq, ok := c.clusterQueues[string(w.Status.Admission.ClusterQueue)]
   536  	if !ok {
   537  		return errCqNotFound
   538  	}
   539  	cq.deleteWorkload(w)
   540  	if c.podsReadyTracking {
   541  		c.podsReadyCond.Broadcast()
   542  	}
   543  	return nil
   544  }
   545  
   546  type ClusterQueueUsageStats struct {
   547  	ReservedResources  []kueue.FlavorUsage
   548  	ReservingWorkloads int
   549  	AdmittedResources  []kueue.FlavorUsage
   550  	AdmittedWorkloads  int
   551  }
   552  
   553  // Usage reports the reserved and admitted resources and number of workloads holding them in the ClusterQueue.
   554  func (c *Cache) Usage(cqObj *kueue.ClusterQueue) (*ClusterQueueUsageStats, error) {
   555  	c.RLock()
   556  	defer c.RUnlock()
   557  
   558  	cq := c.clusterQueues[cqObj.Name]
   559  	if cq == nil {
   560  		return nil, errCqNotFound
   561  	}
   562  
   563  	return &ClusterQueueUsageStats{
   564  		ReservedResources:  getUsage(cq.Usage, cq.ResourceGroups, cq.Cohort),
   565  		ReservingWorkloads: len(cq.Workloads),
   566  		AdmittedResources:  getUsage(cq.AdmittedUsage, cq.ResourceGroups, cq.Cohort),
   567  		AdmittedWorkloads:  cq.admittedWorkloadsCount,
   568  	}, nil
   569  }
   570  
   571  func getUsage(frq FlavorResourceQuantities, rgs []ResourceGroup, cohort *Cohort) []kueue.FlavorUsage {
   572  	usage := make([]kueue.FlavorUsage, 0, len(frq))
   573  	for _, rg := range rgs {
   574  		for _, flvQuotas := range rg.Flavors {
   575  			flvUsage := frq[flvQuotas.Name]
   576  			outFlvUsage := kueue.FlavorUsage{
   577  				Name:      flvQuotas.Name,
   578  				Resources: make([]kueue.ResourceUsage, 0, len(flvQuotas.Resources)),
   579  			}
   580  			for rName, rQuota := range flvQuotas.Resources {
   581  				used := flvUsage[rName]
   582  				rUsage := kueue.ResourceUsage{
   583  					Name:  rName,
   584  					Total: workload.ResourceQuantity(rName, used),
   585  				}
   586  				// Enforce `borrowed=0` if the clusterQueue doesn't belong to a cohort.
   587  				if cohort != nil {
   588  					borrowed := used - rQuota.Nominal
   589  					if borrowed > 0 {
   590  						rUsage.Borrowed = workload.ResourceQuantity(rName, borrowed)
   591  					}
   592  				}
   593  				outFlvUsage.Resources = append(outFlvUsage.Resources, rUsage)
   594  			}
   595  			// The resourceUsages should be in a stable order to avoid endless creation of update events.
   596  			sort.Slice(outFlvUsage.Resources, func(i, j int) bool {
   597  				return outFlvUsage.Resources[i].Name < outFlvUsage.Resources[j].Name
   598  			})
   599  			usage = append(usage, outFlvUsage)
   600  		}
   601  	}
   602  	return usage
   603  }
   604  
   605  type LocalQueueUsageStats struct {
   606  	ReservedResources  []kueue.LocalQueueFlavorUsage
   607  	ReservingWorkloads int
   608  	AdmittedResources  []kueue.LocalQueueFlavorUsage
   609  	AdmittedWorkloads  int
   610  }
   611  
   612  func (c *Cache) LocalQueueUsage(qObj *kueue.LocalQueue) (*LocalQueueUsageStats, error) {
   613  	c.RLock()
   614  	defer c.RUnlock()
   615  
   616  	cqImpl, ok := c.clusterQueues[string(qObj.Spec.ClusterQueue)]
   617  	if !ok {
   618  		return &LocalQueueUsageStats{}, nil
   619  	}
   620  	qImpl, ok := cqImpl.localQueues[queueKey(qObj)]
   621  	if !ok {
   622  		return nil, errQNotFound
   623  	}
   624  
   625  	return &LocalQueueUsageStats{
   626  		ReservedResources:  filterLocalQueueUsage(qImpl.usage, cqImpl.ResourceGroups),
   627  		ReservingWorkloads: qImpl.reservingWorkloads,
   628  		AdmittedResources:  filterLocalQueueUsage(qImpl.admittedUsage, cqImpl.ResourceGroups),
   629  		AdmittedWorkloads:  qImpl.admittedWorkloads,
   630  	}, nil
   631  }
   632  
   633  func filterLocalQueueUsage(orig FlavorResourceQuantities, resourceGroups []ResourceGroup) []kueue.LocalQueueFlavorUsage {
   634  	qFlvUsages := make([]kueue.LocalQueueFlavorUsage, 0, len(orig))
   635  	for _, rg := range resourceGroups {
   636  		for _, flvQuotas := range rg.Flavors {
   637  			flvUsage := orig[flvQuotas.Name]
   638  			outFlvUsage := kueue.LocalQueueFlavorUsage{
   639  				Name:      flvQuotas.Name,
   640  				Resources: make([]kueue.LocalQueueResourceUsage, 0, len(flvQuotas.Resources)),
   641  			}
   642  			for rName := range flvQuotas.Resources {
   643  				outFlvUsage.Resources = append(outFlvUsage.Resources, kueue.LocalQueueResourceUsage{
   644  					Name:  rName,
   645  					Total: workload.ResourceQuantity(rName, flvUsage[rName]),
   646  				})
   647  			}
   648  			// The resourceUsages should be in a stable order to avoid endless creation of update events.
   649  			sort.Slice(outFlvUsage.Resources, func(i, j int) bool {
   650  				return outFlvUsage.Resources[i].Name < outFlvUsage.Resources[j].Name
   651  			})
   652  			qFlvUsages = append(qFlvUsages, outFlvUsage)
   653  		}
   654  	}
   655  	return qFlvUsages
   656  }
   657  
   658  func (c *Cache) cleanupAssumedState(w *kueue.Workload) {
   659  	k := workload.Key(w)
   660  	assumedCQName, assumed := c.assumedWorkloads[k]
   661  	if assumed {
   662  		// If the workload's assigned ClusterQueue is different from the assumed
   663  		// one, then we should also clean up the assumed one.
   664  		if workload.HasQuotaReservation(w) && assumedCQName != string(w.Status.Admission.ClusterQueue) {
   665  			if assumedCQ, exist := c.clusterQueues[assumedCQName]; exist {
   666  				assumedCQ.deleteWorkload(w)
   667  			}
   668  		}
   669  		delete(c.assumedWorkloads, k)
   670  	}
   671  }
   672  
   673  func (c *Cache) clusterQueueForWorkload(w *kueue.Workload) *ClusterQueue {
   674  	if workload.HasQuotaReservation(w) {
   675  		return c.clusterQueues[string(w.Status.Admission.ClusterQueue)]
   676  	}
   677  	wKey := workload.Key(w)
   678  	for _, cq := range c.clusterQueues {
   679  		if cq.Workloads[wKey] != nil {
   680  			return cq
   681  		}
   682  	}
   683  	return nil
   684  }
   685  
   686  func (c *Cache) addClusterQueueToCohort(cq *ClusterQueue, cohortName string) {
   687  	if cohortName == "" {
   688  		return
   689  	}
   690  	cohort, ok := c.cohorts[cohortName]
   691  	if !ok {
   692  		cohort = newCohort(cohortName, 1)
   693  		c.cohorts[cohortName] = cohort
   694  	}
   695  	cohort.Members.Insert(cq)
   696  	cq.Cohort = cohort
   697  }
   698  
   699  func (c *Cache) deleteClusterQueueFromCohort(cq *ClusterQueue) {
   700  	if cq.Cohort == nil {
   701  		return
   702  	}
   703  	cq.Cohort.Members.Delete(cq)
   704  	if cq.Cohort.Members.Len() == 0 {
   705  		delete(c.cohorts, cq.Cohort.Name)
   706  	}
   707  	cq.Cohort = nil
   708  }
   709  
   710  func (c *Cache) ClusterQueuesUsingFlavor(flavor string) []string {
   711  	c.RLock()
   712  	defer c.RUnlock()
   713  	var cqs []string
   714  
   715  	for _, cq := range c.clusterQueues {
   716  		if cq.flavorInUse(flavor) {
   717  			cqs = append(cqs, cq.Name)
   718  		}
   719  	}
   720  	return cqs
   721  }
   722  
   723  func (c *Cache) ClusterQueuesUsingAdmissionCheck(ac string) []string {
   724  	c.RLock()
   725  	defer c.RUnlock()
   726  	var cqs []string
   727  
   728  	for _, cq := range c.clusterQueues {
   729  		if cq.AdmissionChecks.Has(ac) {
   730  			cqs = append(cqs, cq.Name)
   731  		}
   732  	}
   733  	return cqs
   734  }
   735  
   736  func (c *Cache) MatchingClusterQueues(nsLabels map[string]string) sets.Set[string] {
   737  	c.RLock()
   738  	defer c.RUnlock()
   739  
   740  	cqs := sets.New[string]()
   741  	for _, cq := range c.clusterQueues {
   742  		if cq.NamespaceSelector.Matches(labels.Set(nsLabels)) {
   743  			cqs.Insert(cq.Name)
   744  		}
   745  	}
   746  	return cqs
   747  }
   748  
   749  // Key is the key used to index the queue.
   750  func queueKey(q *kueue.LocalQueue) string {
   751  	return fmt.Sprintf("%s/%s", q.Namespace, q.Name)
   752  }