sigs.k8s.io/kueue@v0.6.2/apis/kueue/v1beta1/clusterqueue_types.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package v1beta1
    18  
    19  import (
    20  	corev1 "k8s.io/api/core/v1"
    21  	"k8s.io/apimachinery/pkg/api/resource"
    22  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    23  )
    24  
    25  // ClusterQueueSpec defines the desired state of ClusterQueue
    26  type ClusterQueueSpec struct {
    27  	// resourceGroups describes groups of resources.
    28  	// Each resource group defines the list of resources and a list of flavors
    29  	// that provide quotas for these resources.
    30  	// Each resource and each flavor can only form part of one resource group.
    31  	// resourceGroups can be up to 16.
    32  	// +listType=atomic
    33  	// +kubebuilder:validation:MaxItems=16
    34  	ResourceGroups []ResourceGroup `json:"resourceGroups,omitempty"`
    35  
    36  	// cohort that this ClusterQueue belongs to. CQs that belong to the
    37  	// same cohort can borrow unused resources from each other.
    38  	//
    39  	// A CQ can be a member of a single borrowing cohort. A workload submitted
    40  	// to a queue referencing this CQ can borrow quota from any CQ in the cohort.
    41  	// Only quota for the [resource, flavor] pairs listed in the CQ can be
    42  	// borrowed.
    43  	// If empty, this ClusterQueue cannot borrow from any other ClusterQueue and
    44  	// vice versa.
    45  	//
    46  	// A cohort is a name that links CQs together, but it doesn't reference any
    47  	// object.
    48  	//
    49  	// Validation of a cohort name is equivalent to that of object names:
    50  	// subdomain in DNS (RFC 1123).
    51  	Cohort string `json:"cohort,omitempty"`
    52  
    53  	// QueueingStrategy indicates the queueing strategy of the workloads
    54  	// across the queues in this ClusterQueue. This field is immutable.
    55  	// Current Supported Strategies:
    56  	//
    57  	// - StrictFIFO: workloads are ordered strictly by creation time.
    58  	// Older workloads that can't be admitted will block admitting newer
    59  	// workloads even if they fit available quota.
    60  	// - BestEffortFIFO: workloads are ordered by creation time,
    61  	// however older workloads that can't be admitted will not block
    62  	// admitting newer workloads that fit existing quota.
    63  	//
    64  	// +kubebuilder:default=BestEffortFIFO
    65  	// +kubebuilder:validation:Enum=StrictFIFO;BestEffortFIFO
    66  	QueueingStrategy QueueingStrategy `json:"queueingStrategy,omitempty"`
    67  
    68  	// namespaceSelector defines which namespaces are allowed to submit workloads to
    69  	// this clusterQueue. Beyond this basic support for policy, a policy agent like
    70  	// Gatekeeper should be used to enforce more advanced policies.
    71  	// Defaults to null which is a nothing selector (no namespaces eligible).
    72  	// If set to an empty selector `{}`, then all namespaces are eligible.
    73  	NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty"`
    74  
    75  	// flavorFungibility defines whether a workload should try the next flavor
    76  	// before borrowing or preempting in the flavor being evaluated.
    77  	FlavorFungibility *FlavorFungibility `json:"flavorFungibility,omitempty"`
    78  
    79  	// preemption describes policies to preempt Workloads from this ClusterQueue
    80  	// or the ClusterQueue's cohort.
    81  	//
    82  	// Preemption can happen in two scenarios:
    83  	//
    84  	// - When a Workload fits within the nominal quota of the ClusterQueue, but
    85  	//   the quota is currently borrowed by other ClusterQueues in the cohort.
    86  	//   Preempting Workloads in other ClusterQueues allows this ClusterQueue to
    87  	//   reclaim its nominal quota.
    88  	// - When a Workload doesn't fit within the nominal quota of the ClusterQueue
    89  	//   and there are admitted Workloads in the ClusterQueue with lower priority.
    90  	//
    91  	// The preemption algorithm tries to find a minimal set of Workloads to
    92  	// preempt to accomomdate the pending Workload, preempting Workloads with
    93  	// lower priority first.
    94  	Preemption *ClusterQueuePreemption `json:"preemption,omitempty"`
    95  
    96  	// admissionChecks lists the AdmissionChecks required by this ClusterQueue
    97  	// +optional
    98  	AdmissionChecks []string `json:"admissionChecks,omitempty"`
    99  
   100  	// stopPolicy - if set to a value different from None, the ClusterQueue is considered Inactive, no new reservation being
   101  	// made.
   102  	//
   103  	// Depending on its value, its associated workloads will:
   104  	//
   105  	// - None - Workloads are admitted
   106  	// - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation.
   107  	// - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation.
   108  	//
   109  	// +optional
   110  	// +kubebuilder:validation:Enum=None;Hold;HoldAndDrain
   111  	// +kubebuilder:default="None"
   112  	StopPolicy *StopPolicy `json:"stopPolicy,omitempty"`
   113  }
   114  
   115  type QueueingStrategy string
   116  
   117  const (
   118  	// StrictFIFO means that workloads of the same priority are ordered strictly by creation time.
   119  	// Older workloads that can't be admitted will block admitting newer
   120  	// workloads even if they fit available quota.
   121  	StrictFIFO QueueingStrategy = "StrictFIFO"
   122  
   123  	// BestEffortFIFO means that workloads of the same priority are ordered by creation time,
   124  	// however older workloads that can't be admitted will not block
   125  	// admitting newer workloads that fit existing quota.
   126  	BestEffortFIFO QueueingStrategy = "BestEffortFIFO"
   127  )
   128  
   129  type StopPolicy string
   130  
   131  const (
   132  	None         StopPolicy = "None"
   133  	HoldAndDrain StopPolicy = "HoldAndDrain"
   134  	Hold         StopPolicy = "Hold"
   135  )
   136  
   137  type ResourceGroup struct {
   138  	// coveredResources is the list of resources covered by the flavors in this
   139  	// group.
   140  	// Examples: cpu, memory, vendor.com/gpu.
   141  	// The list cannot be empty and it can contain up to 16 resources.
   142  	// +kubebuilder:validation:MinItems=1
   143  	// +kubebuilder:validation:MaxItems=16
   144  	CoveredResources []corev1.ResourceName `json:"coveredResources"`
   145  
   146  	// flavors is the list of flavors that provide the resources of this group.
   147  	// Typically, different flavors represent different hardware models
   148  	// (e.g., gpu models, cpu architectures) or pricing models (on-demand vs spot
   149  	// cpus).
   150  	// Each flavor MUST list all the resources listed for this group in the same
   151  	// order as the .resources field.
   152  	// The list cannot be empty and it can contain up to 16 flavors.
   153  	// +listType=map
   154  	// +listMapKey=name
   155  	// +kubebuilder:validation:MinItems=1
   156  	// +kubebuilder:validation:MaxItems=16
   157  	Flavors []FlavorQuotas `json:"flavors"`
   158  }
   159  
   160  type FlavorQuotas struct {
   161  	// name of this flavor. The name should match the .metadata.name of a
   162  	// ResourceFlavor. If a matching ResourceFlavor does not exist, the
   163  	// ClusterQueue will have an Active condition set to False.
   164  	Name ResourceFlavorReference `json:"name"`
   165  
   166  	// resources is the list of quotas for this flavor per resource.
   167  	// There could be up to 16 resources.
   168  	// +listType=map
   169  	// +listMapKey=name
   170  	// +kubebuilder:validation:MinItems=1
   171  	// +kubebuilder:validation:MaxItems=16
   172  	Resources []ResourceQuota `json:"resources"`
   173  }
   174  
   175  type ResourceQuota struct {
   176  	// name of this resource.
   177  	Name corev1.ResourceName `json:"name"`
   178  
   179  	// nominalQuota is the quantity of this resource that is available for
   180  	// Workloads admitted by this ClusterQueue at a point in time.
   181  	// The nominalQuota must be non-negative.
   182  	// nominalQuota should represent the resources in the cluster available for
   183  	// running jobs (after discounting resources consumed by system components
   184  	// and pods not managed by kueue). In an autoscaled cluster, nominalQuota
   185  	// should account for resources that can be provided by a component such as
   186  	// Kubernetes cluster-autoscaler.
   187  	//
   188  	// If the ClusterQueue belongs to a cohort, the sum of the quotas for each
   189  	// (flavor, resource) combination defines the maximum quantity that can be
   190  	// allocated by a ClusterQueue in the cohort.
   191  	NominalQuota resource.Quantity `json:"nominalQuota"`
   192  
   193  	// borrowingLimit is the maximum amount of quota for the [flavor, resource]
   194  	// combination that this ClusterQueue is allowed to borrow from the unused
   195  	// quota of other ClusterQueues in the same cohort.
   196  	// In total, at a given time, Workloads in a ClusterQueue can consume a
   197  	// quantity of quota equal to nominalQuota+borrowingLimit, assuming the other
   198  	// ClusterQueues in the cohort have enough unused quota.
   199  	// If null, it means that there is no borrowing limit.
   200  	// If not null, it must be non-negative.
   201  	// borrowingLimit must be null if spec.cohort is empty.
   202  	// +optional
   203  	BorrowingLimit *resource.Quantity `json:"borrowingLimit,omitempty"`
   204  
   205  	// lendingLimit is the maximum amount of unused quota for the [flavor, resource]
   206  	// combination that this ClusterQueue can lend to other ClusterQueues in the same cohort.
   207  	// In total, at a given time, ClusterQueue reserves for its exclusive use
   208  	// a quantity of quota equals to nominalQuota - lendingLimit.
   209  	// If null, it means that there is no lending limit, meaning that
   210  	// all the nominalQuota can be borrowed by other clusterQueues in the cohort.
   211  	// If not null, it must be non-negative.
   212  	// lendingLimit must be null if spec.cohort is empty.
   213  	// This field is in alpha stage. To be able to use this field,
   214  	// enable the feature gate LendingLimit, which is disabled by default.
   215  	// +optional
   216  	LendingLimit *resource.Quantity `json:"lendingLimit,omitempty"`
   217  }
   218  
   219  // ResourceFlavorReference is the name of the ResourceFlavor.
   220  type ResourceFlavorReference string
   221  
   222  // ClusterQueueStatus defines the observed state of ClusterQueue
   223  type ClusterQueueStatus struct {
   224  	// flavorsReservation are the reserved quotas, by flavor, currently in use by the
   225  	// workloads assigned to this ClusterQueue.
   226  	// +listType=map
   227  	// +listMapKey=name
   228  	// +kubebuilder:validation:MaxItems=16
   229  	// +optional
   230  	FlavorsReservation []FlavorUsage `json:"flavorsReservation"`
   231  
   232  	// flavorsUsage are the used quotas, by flavor, currently in use by the
   233  	// workloads admitted in this ClusterQueue.
   234  	// +listType=map
   235  	// +listMapKey=name
   236  	// +kubebuilder:validation:MaxItems=16
   237  	// +optional
   238  	FlavorsUsage []FlavorUsage `json:"flavorsUsage"`
   239  
   240  	// pendingWorkloads is the number of workloads currently waiting to be
   241  	// admitted to this clusterQueue.
   242  	// +optional
   243  	PendingWorkloads int32 `json:"pendingWorkloads"`
   244  
   245  	// reservingWorkloads is the number of workloads currently reserving quota in this
   246  	// clusterQueue.
   247  	// +optional
   248  	ReservingWorkloads int32 `json:"reservingWorkloads"`
   249  
   250  	// admittedWorkloads is the number of workloads currently admitted to this
   251  	// clusterQueue and haven't finished yet.
   252  	// +optional
   253  	AdmittedWorkloads int32 `json:"admittedWorkloads"`
   254  
   255  	// conditions hold the latest available observations of the ClusterQueue
   256  	// current state.
   257  	// +optional
   258  	// +listType=map
   259  	// +listMapKey=type
   260  	// +patchStrategy=merge
   261  	// +patchMergeKey=type
   262  	Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
   263  
   264  	// PendingWorkloadsStatus contains the information exposed about the current
   265  	// status of the pending workloads in the cluster queue.
   266  	// +optional
   267  	PendingWorkloadsStatus *ClusterQueuePendingWorkloadsStatus `json:"pendingWorkloadsStatus"`
   268  }
   269  
   270  type ClusterQueuePendingWorkloadsStatus struct {
   271  	// Head contains the list of top pending workloads.
   272  	// +listType=atomic
   273  	// +optional
   274  	Head []ClusterQueuePendingWorkload `json:"clusterQueuePendingWorkload"`
   275  
   276  	// LastChangeTime indicates the time of the last change of the structure.
   277  	LastChangeTime metav1.Time `json:"lastChangeTime"`
   278  }
   279  
   280  // ClusterQueuePendingWorkload contains the information identifying a pending workload
   281  // in the cluster queue.
   282  type ClusterQueuePendingWorkload struct {
   283  	// Name indicates the name of the pending workload.
   284  	Name string `json:"name"`
   285  
   286  	// Namespace indicates the name of the pending workload.
   287  	Namespace string `json:"namespace"`
   288  }
   289  
   290  type FlavorUsage struct {
   291  	// name of the flavor.
   292  	Name ResourceFlavorReference `json:"name"`
   293  
   294  	// resources lists the quota usage for the resources in this flavor.
   295  	// +listType=map
   296  	// +listMapKey=name
   297  	// +kubebuilder:validation:MaxItems=16
   298  	Resources []ResourceUsage `json:"resources"`
   299  }
   300  
   301  type ResourceUsage struct {
   302  	// name of the resource
   303  	Name corev1.ResourceName `json:"name"`
   304  
   305  	// total is the total quantity of used quota, including the amount borrowed
   306  	// from the cohort.
   307  	Total resource.Quantity `json:"total,omitempty"`
   308  
   309  	// Borrowed is quantity of quota that is borrowed from the cohort. In other
   310  	// words, it's the used quota that is over the nominalQuota.
   311  	Borrowed resource.Quantity `json:"borrowed,omitempty"`
   312  }
   313  
   314  const (
   315  	// ClusterQueueActive indicates that the ClusterQueue can admit new workloads and its quota
   316  	// can be borrowed by other ClusterQueues in the same cohort.
   317  	ClusterQueueActive string = "Active"
   318  )
   319  
   320  type PreemptionPolicy string
   321  
   322  const (
   323  	PreemptionPolicyNever                     PreemptionPolicy = "Never"
   324  	PreemptionPolicyAny                       PreemptionPolicy = "Any"
   325  	PreemptionPolicyLowerPriority             PreemptionPolicy = "LowerPriority"
   326  	PreemptionPolicyLowerOrNewerEqualPriority PreemptionPolicy = "LowerOrNewerEqualPriority"
   327  )
   328  
   329  type FlavorFungibilityPolicy string
   330  
   331  const (
   332  	Borrow        FlavorFungibilityPolicy = "Borrow"
   333  	Preempt       FlavorFungibilityPolicy = "Preempt"
   334  	TryNextFlavor FlavorFungibilityPolicy = "TryNextFlavor"
   335  )
   336  
   337  // FlavorFungibility determines whether a workload should try the next flavor
   338  // before borrowing or preempting in current flavor.
   339  type FlavorFungibility struct {
   340  	// whenCanBorrow determines whether a workload should try the next flavor
   341  	// before borrowing in current flavor. The possible values are:
   342  	//
   343  	// - `Borrow` (default): allocate in current flavor if borrowing
   344  	//   is possible.
   345  	// - `TryNextFlavor`: try next flavor even if the current
   346  	//   flavor has enough resources to borrow.
   347  	//
   348  	// +kubebuilder:validation:Enum={Borrow,TryNextFlavor}
   349  	// +kubebuilder:default="Borrow"
   350  	WhenCanBorrow FlavorFungibilityPolicy `json:"whenCanBorrow,omitempty"`
   351  	// whenCanPreempt determines whether a workload should try the next flavor
   352  	// before borrowing in current flavor. The possible values are:
   353  	//
   354  	// - `Preempt`: allocate in current flavor if it's possible to preempt some workloads.
   355  	// - `TryNextFlavor` (default): try next flavor even if there are enough
   356  	//   candidates for preemption in the current flavor.
   357  	//
   358  	// +kubebuilder:validation:Enum={Preempt,TryNextFlavor}
   359  	// +kubebuilder:default="TryNextFlavor"
   360  	WhenCanPreempt FlavorFungibilityPolicy `json:"whenCanPreempt,omitempty"`
   361  }
   362  
   363  // ClusterQueuePreemption contains policies to preempt Workloads from this
   364  // ClusterQueue or the ClusterQueue's cohort.
   365  type ClusterQueuePreemption struct {
   366  	// reclaimWithinCohort determines whether a pending Workload can preempt
   367  	// Workloads from other ClusterQueues in the cohort that are using more than
   368  	// their nominal quota. The possible values are:
   369  	//
   370  	// - `Never` (default): do not preempt Workloads in the cohort.
   371  	// - `LowerPriority`: if the pending Workload fits within the nominal
   372  	//   quota of its ClusterQueue, only preempt Workloads in the cohort that have
   373  	//   lower priority than the pending Workload.
   374  	// - `Any`: if the pending Workload fits within the nominal quota of its
   375  	//   ClusterQueue, preempt any Workload in the cohort, irrespective of
   376  	//   priority.
   377  	//
   378  	// +kubebuilder:default=Never
   379  	// +kubebuilder:validation:Enum=Never;LowerPriority;Any
   380  	ReclaimWithinCohort PreemptionPolicy `json:"reclaimWithinCohort,omitempty"`
   381  
   382  	// borrowWithinCohort provides configuration to allow preemption within
   383  	// cohort while borrowing.
   384  	BorrowWithinCohort *BorrowWithinCohort `json:"borrowWithinCohort,omitempty"`
   385  
   386  	// withinClusterQueue determines whether a pending Workload that doesn't fit
   387  	// within the nominal quota for its ClusterQueue, can preempt active Workloads in
   388  	// the ClusterQueue. The possible values are:
   389  	//
   390  	// - `Never` (default): do not preempt Workloads in the ClusterQueue.
   391  	// - `LowerPriority`: only preempt Workloads in the ClusterQueue that have
   392  	//   lower priority than the pending Workload.
   393  	// - `LowerOrNewerEqualPriority`: only preempt Workloads in the ClusterQueue that
   394  	//   either have a lower priority than the pending workload or equal priority
   395  	//   and are newer than the pending workload.
   396  	//
   397  	// +kubebuilder:default=Never
   398  	// +kubebuilder:validation:Enum=Never;LowerPriority;LowerOrNewerEqualPriority
   399  	WithinClusterQueue PreemptionPolicy `json:"withinClusterQueue,omitempty"`
   400  }
   401  
   402  type BorrowWithinCohortPolicy string
   403  
   404  const (
   405  	BorrowWithinCohortPolicyNever         BorrowWithinCohortPolicy = "Never"
   406  	BorrowWithinCohortPolicyLowerPriority BorrowWithinCohortPolicy = "LowerPriority"
   407  )
   408  
   409  // BorrowWithinCohort contains configuration which allows to preempt workloads
   410  // within cohort while borrowing.
   411  type BorrowWithinCohort struct {
   412  	// policy determines the policy for preemption to reclaim quota within cohort while borrowing.
   413  	// Possible values are:
   414  	// - `Never` (default): do not allow for preemption, in other
   415  	//    ClusterQueues within the cohort, for a borrowing workload.
   416  	// - `LowerPriority`: allow preemption, in other ClusterQueues
   417  	//    within the cohort, for a borrowing workload, but only if
   418  	//    the preempted workloads are of lower priority.
   419  	//
   420  	// +kubebuilder:default=Never
   421  	// +kubebuilder:validation:Enum=Never;LowerPriority
   422  	Policy BorrowWithinCohortPolicy `json:"policy,omitempty"`
   423  
   424  	// maxPriorityThreshold allows to restrict the set of workloads which
   425  	// might be preempted by a borrowing workload, to only workloads with
   426  	// priority less than or equal to the specified threshold priority.
   427  	// When the threshold is not specified, then any workload satisfying the
   428  	// policy can be preempted by the borrowing workload.
   429  	//
   430  	// +optional
   431  	MaxPriorityThreshold *int32 `json:"maxPriorityThreshold,omitempty"`
   432  }
   433  
   434  // +genclient
   435  // +genclient:nonNamespaced
   436  // +kubebuilder:object:root=true
   437  // +kubebuilder:storageversion
   438  // +kubebuilder:resource:scope=Cluster
   439  // +kubebuilder:subresource:status
   440  // +kubebuilder:printcolumn:name="Cohort",JSONPath=".spec.cohort",type=string,description="Cohort that this ClusterQueue belongs to"
   441  // +kubebuilder:printcolumn:name="Strategy",JSONPath=".spec.queueingStrategy",type=string,description="The queueing strategy used to prioritize workloads",priority=1
   442  // +kubebuilder:printcolumn:name="Pending Workloads",JSONPath=".status.pendingWorkloads",type=integer,description="Number of pending workloads"
   443  // +kubebuilder:printcolumn:name="Admitted Workloads",JSONPath=".status.admittedWorkloads",type=integer,description="Number of admitted workloads that haven't finished yet",priority=1
   444  
   445  // ClusterQueue is the Schema for the clusterQueue API.
   446  type ClusterQueue struct {
   447  	metav1.TypeMeta   `json:",inline"`
   448  	metav1.ObjectMeta `json:"metadata,omitempty"`
   449  
   450  	Spec   ClusterQueueSpec   `json:"spec,omitempty"`
   451  	Status ClusterQueueStatus `json:"status,omitempty"`
   452  }
   453  
   454  // +kubebuilder:object:root=true
   455  
   456  // ClusterQueueList contains a list of ClusterQueue
   457  type ClusterQueueList struct {
   458  	metav1.TypeMeta `json:",inline"`
   459  	metav1.ListMeta `json:"metadata,omitempty"`
   460  	Items           []ClusterQueue `json:"items"`
   461  }
   462  
   463  func init() {
   464  	SchemeBuilder.Register(&ClusterQueue{}, &ClusterQueueList{})
   465  }