sigs.k8s.io/kueue@v0.6.2/pkg/cache/snapshot.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cache
    18  
    19  import (
    20  	"github.com/go-logr/logr"
    21  	corev1 "k8s.io/api/core/v1"
    22  	"k8s.io/apimachinery/pkg/util/sets"
    23  	"k8s.io/klog/v2"
    24  
    25  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    26  	"sigs.k8s.io/kueue/pkg/features"
    27  	"sigs.k8s.io/kueue/pkg/util/maps"
    28  	"sigs.k8s.io/kueue/pkg/workload"
    29  )
    30  
    31  type Snapshot struct {
    32  	ClusterQueues            map[string]*ClusterQueue
    33  	ResourceFlavors          map[kueue.ResourceFlavorReference]*kueue.ResourceFlavor
    34  	InactiveClusterQueueSets sets.Set[string]
    35  }
    36  
    37  // RemoveWorkload removes a workload from its corresponding ClusterQueue and
    38  // updates resource usage.
    39  func (s *Snapshot) RemoveWorkload(wl *workload.Info) {
    40  	cq := s.ClusterQueues[wl.ClusterQueue]
    41  	delete(cq.Workloads, workload.Key(wl.Obj))
    42  	updateUsage(wl, cq.Usage, -1)
    43  	if cq.Cohort != nil {
    44  		if features.Enabled(features.LendingLimit) {
    45  			updateCohortUsage(wl, cq, -1)
    46  		} else {
    47  			updateUsage(wl, cq.Cohort.Usage, -1)
    48  		}
    49  	}
    50  }
    51  
    52  // AddWorkload adds a workload from its corresponding ClusterQueue and
    53  // updates resource usage.
    54  func (s *Snapshot) AddWorkload(wl *workload.Info) {
    55  	cq := s.ClusterQueues[wl.ClusterQueue]
    56  	cq.Workloads[workload.Key(wl.Obj)] = wl
    57  	updateUsage(wl, cq.Usage, 1)
    58  	if cq.Cohort != nil {
    59  		if features.Enabled(features.LendingLimit) {
    60  			updateCohortUsage(wl, cq, 1)
    61  		} else {
    62  			updateUsage(wl, cq.Cohort.Usage, 1)
    63  		}
    64  	}
    65  }
    66  
    67  func (s *Snapshot) Log(log logr.Logger) {
    68  	cohorts := make(map[string]*Cohort)
    69  	for name, cq := range s.ClusterQueues {
    70  		cohortName := "<none>"
    71  		if cq.Cohort != nil {
    72  			cohorts[cq.Name] = cq.Cohort
    73  			cohortName = cq.Cohort.Name
    74  		}
    75  
    76  		log.Info("Found ClusterQueue",
    77  			"clusterQueue", klog.KRef("", name),
    78  			"cohort", cohortName,
    79  			"resourceGroups", cq.ResourceGroups,
    80  			"usage", cq.Usage,
    81  			"workloads", maps.Keys(cq.Workloads),
    82  		)
    83  	}
    84  	for name, cohort := range cohorts {
    85  		log.Info("Found cohort",
    86  			"cohort", name,
    87  			"resources", cohort.RequestableResources,
    88  			"usage", cohort.Usage,
    89  		)
    90  	}
    91  }
    92  
    93  func (c *Cache) Snapshot() Snapshot {
    94  	c.RLock()
    95  	defer c.RUnlock()
    96  
    97  	snap := Snapshot{
    98  		ClusterQueues:            make(map[string]*ClusterQueue, len(c.clusterQueues)),
    99  		ResourceFlavors:          make(map[kueue.ResourceFlavorReference]*kueue.ResourceFlavor, len(c.resourceFlavors)),
   100  		InactiveClusterQueueSets: sets.New[string](),
   101  	}
   102  	for _, cq := range c.clusterQueues {
   103  		if !cq.Active() {
   104  			snap.InactiveClusterQueueSets.Insert(cq.Name)
   105  			continue
   106  		}
   107  		snap.ClusterQueues[cq.Name] = cq.snapshot()
   108  	}
   109  	for name, rf := range c.resourceFlavors {
   110  		// Shallow copy is enough
   111  		snap.ResourceFlavors[name] = rf
   112  	}
   113  	for _, cohort := range c.cohorts {
   114  		cohortCopy := newCohort(cohort.Name, cohort.Members.Len())
   115  		cohortCopy.AllocatableResourceGeneration = 0
   116  		for cq := range cohort.Members {
   117  			if cq.Active() {
   118  				cqCopy := snap.ClusterQueues[cq.Name]
   119  				cqCopy.accumulateResources(cohortCopy)
   120  				cqCopy.Cohort = cohortCopy
   121  				cohortCopy.Members.Insert(cqCopy)
   122  				cohortCopy.AllocatableResourceGeneration += cqCopy.AllocatableResourceGeneration
   123  			}
   124  		}
   125  	}
   126  	return snap
   127  }
   128  
   129  // snapshot creates a copy of ClusterQueue that includes references to immutable
   130  // objects and deep copies of changing ones. A reference to the cohort is not included.
   131  func (c *ClusterQueue) snapshot() *ClusterQueue {
   132  	cc := &ClusterQueue{
   133  		Name:                          c.Name,
   134  		ResourceGroups:                c.ResourceGroups, // Shallow copy is enough.
   135  		RGByResource:                  c.RGByResource,   // Shallow copy is enough.
   136  		FlavorFungibility:             c.FlavorFungibility,
   137  		AllocatableResourceGeneration: c.AllocatableResourceGeneration,
   138  		Usage:                         make(FlavorResourceQuantities, len(c.Usage)),
   139  		Workloads:                     make(map[string]*workload.Info, len(c.Workloads)),
   140  		Preemption:                    c.Preemption,
   141  		NamespaceSelector:             c.NamespaceSelector,
   142  		Status:                        c.Status,
   143  		AdmissionChecks:               c.AdmissionChecks.Clone(),
   144  	}
   145  	for fName, rUsage := range c.Usage {
   146  		rUsageCopy := make(map[corev1.ResourceName]int64, len(rUsage))
   147  		for k, v := range rUsage {
   148  			rUsageCopy[k] = v
   149  		}
   150  		cc.Usage[fName] = rUsageCopy
   151  	}
   152  	for k, v := range c.Workloads {
   153  		// Shallow copy is enough.
   154  		cc.Workloads[k] = v
   155  	}
   156  
   157  	if features.Enabled(features.LendingLimit) {
   158  		cc.GuaranteedQuota = c.GuaranteedQuota
   159  	}
   160  
   161  	return cc
   162  }
   163  
   164  func (c *ClusterQueue) accumulateResources(cohort *Cohort) {
   165  	if cohort.RequestableResources == nil {
   166  		cohort.RequestableResources = make(FlavorResourceQuantities, len(c.ResourceGroups))
   167  	}
   168  	for _, rg := range c.ResourceGroups {
   169  		for _, flvQuotas := range rg.Flavors {
   170  			res := cohort.RequestableResources[flvQuotas.Name]
   171  			if res == nil {
   172  				res = make(map[corev1.ResourceName]int64, len(flvQuotas.Resources))
   173  				cohort.RequestableResources[flvQuotas.Name] = res
   174  			}
   175  			for rName, rQuota := range flvQuotas.Resources {
   176  				// When feature LendingLimit enabled, cohort.RequestableResources indicates
   177  				// the sum of cq.NominalQuota and other cqs' LendingLimit (if not nil).
   178  				// If LendingLimit is not nil, we should count the lendingLimit as the requestable
   179  				// resource because we can't borrow more quota than lendingLimit.
   180  				if features.Enabled(features.LendingLimit) && rQuota.LendingLimit != nil {
   181  					res[rName] += *rQuota.LendingLimit
   182  				} else {
   183  					res[rName] += rQuota.Nominal
   184  				}
   185  			}
   186  		}
   187  	}
   188  	if cohort.Usage == nil {
   189  		cohort.Usage = make(FlavorResourceQuantities, len(c.Usage))
   190  	}
   191  	for fName, resUsages := range c.Usage {
   192  		used := cohort.Usage[fName]
   193  		if used == nil {
   194  			used = make(map[corev1.ResourceName]int64, len(resUsages))
   195  			cohort.Usage[fName] = used
   196  		}
   197  		for res, val := range resUsages {
   198  			// Similar to cohort.RequestableResources, we accumulate the usage above the guaranteed resources,
   199  			// here we should remove the guaranteed quota as well for that part can not be borrowed.
   200  			val -= c.guaranteedQuota(fName, res)
   201  			// if val < 0, it means the cq is not using any quota belongs to LendingLimit
   202  			if val < 0 {
   203  				val = 0
   204  			}
   205  			used[res] += val
   206  		}
   207  	}
   208  }