sigs.k8s.io/kueue@v0.6.2/apis/kueue/v1beta1/clusterqueue_types.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package v1beta1 18 19 import ( 20 corev1 "k8s.io/api/core/v1" 21 "k8s.io/apimachinery/pkg/api/resource" 22 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 ) 24 25 // ClusterQueueSpec defines the desired state of ClusterQueue 26 type ClusterQueueSpec struct { 27 // resourceGroups describes groups of resources. 28 // Each resource group defines the list of resources and a list of flavors 29 // that provide quotas for these resources. 30 // Each resource and each flavor can only form part of one resource group. 31 // resourceGroups can be up to 16. 32 // +listType=atomic 33 // +kubebuilder:validation:MaxItems=16 34 ResourceGroups []ResourceGroup `json:"resourceGroups,omitempty"` 35 36 // cohort that this ClusterQueue belongs to. CQs that belong to the 37 // same cohort can borrow unused resources from each other. 38 // 39 // A CQ can be a member of a single borrowing cohort. A workload submitted 40 // to a queue referencing this CQ can borrow quota from any CQ in the cohort. 41 // Only quota for the [resource, flavor] pairs listed in the CQ can be 42 // borrowed. 43 // If empty, this ClusterQueue cannot borrow from any other ClusterQueue and 44 // vice versa. 45 // 46 // A cohort is a name that links CQs together, but it doesn't reference any 47 // object. 48 // 49 // Validation of a cohort name is equivalent to that of object names: 50 // subdomain in DNS (RFC 1123). 51 Cohort string `json:"cohort,omitempty"` 52 53 // QueueingStrategy indicates the queueing strategy of the workloads 54 // across the queues in this ClusterQueue. This field is immutable. 55 // Current Supported Strategies: 56 // 57 // - StrictFIFO: workloads are ordered strictly by creation time. 58 // Older workloads that can't be admitted will block admitting newer 59 // workloads even if they fit available quota. 60 // - BestEffortFIFO: workloads are ordered by creation time, 61 // however older workloads that can't be admitted will not block 62 // admitting newer workloads that fit existing quota. 63 // 64 // +kubebuilder:default=BestEffortFIFO 65 // +kubebuilder:validation:Enum=StrictFIFO;BestEffortFIFO 66 QueueingStrategy QueueingStrategy `json:"queueingStrategy,omitempty"` 67 68 // namespaceSelector defines which namespaces are allowed to submit workloads to 69 // this clusterQueue. Beyond this basic support for policy, a policy agent like 70 // Gatekeeper should be used to enforce more advanced policies. 71 // Defaults to null which is a nothing selector (no namespaces eligible). 72 // If set to an empty selector `{}`, then all namespaces are eligible. 73 NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty"` 74 75 // flavorFungibility defines whether a workload should try the next flavor 76 // before borrowing or preempting in the flavor being evaluated. 77 FlavorFungibility *FlavorFungibility `json:"flavorFungibility,omitempty"` 78 79 // preemption describes policies to preempt Workloads from this ClusterQueue 80 // or the ClusterQueue's cohort. 81 // 82 // Preemption can happen in two scenarios: 83 // 84 // - When a Workload fits within the nominal quota of the ClusterQueue, but 85 // the quota is currently borrowed by other ClusterQueues in the cohort. 86 // Preempting Workloads in other ClusterQueues allows this ClusterQueue to 87 // reclaim its nominal quota. 88 // - When a Workload doesn't fit within the nominal quota of the ClusterQueue 89 // and there are admitted Workloads in the ClusterQueue with lower priority. 90 // 91 // The preemption algorithm tries to find a minimal set of Workloads to 92 // preempt to accomomdate the pending Workload, preempting Workloads with 93 // lower priority first. 94 Preemption *ClusterQueuePreemption `json:"preemption,omitempty"` 95 96 // admissionChecks lists the AdmissionChecks required by this ClusterQueue 97 // +optional 98 AdmissionChecks []string `json:"admissionChecks,omitempty"` 99 100 // stopPolicy - if set to a value different from None, the ClusterQueue is considered Inactive, no new reservation being 101 // made. 102 // 103 // Depending on its value, its associated workloads will: 104 // 105 // - None - Workloads are admitted 106 // - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation. 107 // - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation. 108 // 109 // +optional 110 // +kubebuilder:validation:Enum=None;Hold;HoldAndDrain 111 // +kubebuilder:default="None" 112 StopPolicy *StopPolicy `json:"stopPolicy,omitempty"` 113 } 114 115 type QueueingStrategy string 116 117 const ( 118 // StrictFIFO means that workloads of the same priority are ordered strictly by creation time. 119 // Older workloads that can't be admitted will block admitting newer 120 // workloads even if they fit available quota. 121 StrictFIFO QueueingStrategy = "StrictFIFO" 122 123 // BestEffortFIFO means that workloads of the same priority are ordered by creation time, 124 // however older workloads that can't be admitted will not block 125 // admitting newer workloads that fit existing quota. 126 BestEffortFIFO QueueingStrategy = "BestEffortFIFO" 127 ) 128 129 type StopPolicy string 130 131 const ( 132 None StopPolicy = "None" 133 HoldAndDrain StopPolicy = "HoldAndDrain" 134 Hold StopPolicy = "Hold" 135 ) 136 137 type ResourceGroup struct { 138 // coveredResources is the list of resources covered by the flavors in this 139 // group. 140 // Examples: cpu, memory, vendor.com/gpu. 141 // The list cannot be empty and it can contain up to 16 resources. 142 // +kubebuilder:validation:MinItems=1 143 // +kubebuilder:validation:MaxItems=16 144 CoveredResources []corev1.ResourceName `json:"coveredResources"` 145 146 // flavors is the list of flavors that provide the resources of this group. 147 // Typically, different flavors represent different hardware models 148 // (e.g., gpu models, cpu architectures) or pricing models (on-demand vs spot 149 // cpus). 150 // Each flavor MUST list all the resources listed for this group in the same 151 // order as the .resources field. 152 // The list cannot be empty and it can contain up to 16 flavors. 153 // +listType=map 154 // +listMapKey=name 155 // +kubebuilder:validation:MinItems=1 156 // +kubebuilder:validation:MaxItems=16 157 Flavors []FlavorQuotas `json:"flavors"` 158 } 159 160 type FlavorQuotas struct { 161 // name of this flavor. The name should match the .metadata.name of a 162 // ResourceFlavor. If a matching ResourceFlavor does not exist, the 163 // ClusterQueue will have an Active condition set to False. 164 Name ResourceFlavorReference `json:"name"` 165 166 // resources is the list of quotas for this flavor per resource. 167 // There could be up to 16 resources. 168 // +listType=map 169 // +listMapKey=name 170 // +kubebuilder:validation:MinItems=1 171 // +kubebuilder:validation:MaxItems=16 172 Resources []ResourceQuota `json:"resources"` 173 } 174 175 type ResourceQuota struct { 176 // name of this resource. 177 Name corev1.ResourceName `json:"name"` 178 179 // nominalQuota is the quantity of this resource that is available for 180 // Workloads admitted by this ClusterQueue at a point in time. 181 // The nominalQuota must be non-negative. 182 // nominalQuota should represent the resources in the cluster available for 183 // running jobs (after discounting resources consumed by system components 184 // and pods not managed by kueue). In an autoscaled cluster, nominalQuota 185 // should account for resources that can be provided by a component such as 186 // Kubernetes cluster-autoscaler. 187 // 188 // If the ClusterQueue belongs to a cohort, the sum of the quotas for each 189 // (flavor, resource) combination defines the maximum quantity that can be 190 // allocated by a ClusterQueue in the cohort. 191 NominalQuota resource.Quantity `json:"nominalQuota"` 192 193 // borrowingLimit is the maximum amount of quota for the [flavor, resource] 194 // combination that this ClusterQueue is allowed to borrow from the unused 195 // quota of other ClusterQueues in the same cohort. 196 // In total, at a given time, Workloads in a ClusterQueue can consume a 197 // quantity of quota equal to nominalQuota+borrowingLimit, assuming the other 198 // ClusterQueues in the cohort have enough unused quota. 199 // If null, it means that there is no borrowing limit. 200 // If not null, it must be non-negative. 201 // borrowingLimit must be null if spec.cohort is empty. 202 // +optional 203 BorrowingLimit *resource.Quantity `json:"borrowingLimit,omitempty"` 204 205 // lendingLimit is the maximum amount of unused quota for the [flavor, resource] 206 // combination that this ClusterQueue can lend to other ClusterQueues in the same cohort. 207 // In total, at a given time, ClusterQueue reserves for its exclusive use 208 // a quantity of quota equals to nominalQuota - lendingLimit. 209 // If null, it means that there is no lending limit, meaning that 210 // all the nominalQuota can be borrowed by other clusterQueues in the cohort. 211 // If not null, it must be non-negative. 212 // lendingLimit must be null if spec.cohort is empty. 213 // This field is in alpha stage. To be able to use this field, 214 // enable the feature gate LendingLimit, which is disabled by default. 215 // +optional 216 LendingLimit *resource.Quantity `json:"lendingLimit,omitempty"` 217 } 218 219 // ResourceFlavorReference is the name of the ResourceFlavor. 220 type ResourceFlavorReference string 221 222 // ClusterQueueStatus defines the observed state of ClusterQueue 223 type ClusterQueueStatus struct { 224 // flavorsReservation are the reserved quotas, by flavor, currently in use by the 225 // workloads assigned to this ClusterQueue. 226 // +listType=map 227 // +listMapKey=name 228 // +kubebuilder:validation:MaxItems=16 229 // +optional 230 FlavorsReservation []FlavorUsage `json:"flavorsReservation"` 231 232 // flavorsUsage are the used quotas, by flavor, currently in use by the 233 // workloads admitted in this ClusterQueue. 234 // +listType=map 235 // +listMapKey=name 236 // +kubebuilder:validation:MaxItems=16 237 // +optional 238 FlavorsUsage []FlavorUsage `json:"flavorsUsage"` 239 240 // pendingWorkloads is the number of workloads currently waiting to be 241 // admitted to this clusterQueue. 242 // +optional 243 PendingWorkloads int32 `json:"pendingWorkloads"` 244 245 // reservingWorkloads is the number of workloads currently reserving quota in this 246 // clusterQueue. 247 // +optional 248 ReservingWorkloads int32 `json:"reservingWorkloads"` 249 250 // admittedWorkloads is the number of workloads currently admitted to this 251 // clusterQueue and haven't finished yet. 252 // +optional 253 AdmittedWorkloads int32 `json:"admittedWorkloads"` 254 255 // conditions hold the latest available observations of the ClusterQueue 256 // current state. 257 // +optional 258 // +listType=map 259 // +listMapKey=type 260 // +patchStrategy=merge 261 // +patchMergeKey=type 262 Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` 263 264 // PendingWorkloadsStatus contains the information exposed about the current 265 // status of the pending workloads in the cluster queue. 266 // +optional 267 PendingWorkloadsStatus *ClusterQueuePendingWorkloadsStatus `json:"pendingWorkloadsStatus"` 268 } 269 270 type ClusterQueuePendingWorkloadsStatus struct { 271 // Head contains the list of top pending workloads. 272 // +listType=atomic 273 // +optional 274 Head []ClusterQueuePendingWorkload `json:"clusterQueuePendingWorkload"` 275 276 // LastChangeTime indicates the time of the last change of the structure. 277 LastChangeTime metav1.Time `json:"lastChangeTime"` 278 } 279 280 // ClusterQueuePendingWorkload contains the information identifying a pending workload 281 // in the cluster queue. 282 type ClusterQueuePendingWorkload struct { 283 // Name indicates the name of the pending workload. 284 Name string `json:"name"` 285 286 // Namespace indicates the name of the pending workload. 287 Namespace string `json:"namespace"` 288 } 289 290 type FlavorUsage struct { 291 // name of the flavor. 292 Name ResourceFlavorReference `json:"name"` 293 294 // resources lists the quota usage for the resources in this flavor. 295 // +listType=map 296 // +listMapKey=name 297 // +kubebuilder:validation:MaxItems=16 298 Resources []ResourceUsage `json:"resources"` 299 } 300 301 type ResourceUsage struct { 302 // name of the resource 303 Name corev1.ResourceName `json:"name"` 304 305 // total is the total quantity of used quota, including the amount borrowed 306 // from the cohort. 307 Total resource.Quantity `json:"total,omitempty"` 308 309 // Borrowed is quantity of quota that is borrowed from the cohort. In other 310 // words, it's the used quota that is over the nominalQuota. 311 Borrowed resource.Quantity `json:"borrowed,omitempty"` 312 } 313 314 const ( 315 // ClusterQueueActive indicates that the ClusterQueue can admit new workloads and its quota 316 // can be borrowed by other ClusterQueues in the same cohort. 317 ClusterQueueActive string = "Active" 318 ) 319 320 type PreemptionPolicy string 321 322 const ( 323 PreemptionPolicyNever PreemptionPolicy = "Never" 324 PreemptionPolicyAny PreemptionPolicy = "Any" 325 PreemptionPolicyLowerPriority PreemptionPolicy = "LowerPriority" 326 PreemptionPolicyLowerOrNewerEqualPriority PreemptionPolicy = "LowerOrNewerEqualPriority" 327 ) 328 329 type FlavorFungibilityPolicy string 330 331 const ( 332 Borrow FlavorFungibilityPolicy = "Borrow" 333 Preempt FlavorFungibilityPolicy = "Preempt" 334 TryNextFlavor FlavorFungibilityPolicy = "TryNextFlavor" 335 ) 336 337 // FlavorFungibility determines whether a workload should try the next flavor 338 // before borrowing or preempting in current flavor. 339 type FlavorFungibility struct { 340 // whenCanBorrow determines whether a workload should try the next flavor 341 // before borrowing in current flavor. The possible values are: 342 // 343 // - `Borrow` (default): allocate in current flavor if borrowing 344 // is possible. 345 // - `TryNextFlavor`: try next flavor even if the current 346 // flavor has enough resources to borrow. 347 // 348 // +kubebuilder:validation:Enum={Borrow,TryNextFlavor} 349 // +kubebuilder:default="Borrow" 350 WhenCanBorrow FlavorFungibilityPolicy `json:"whenCanBorrow,omitempty"` 351 // whenCanPreempt determines whether a workload should try the next flavor 352 // before borrowing in current flavor. The possible values are: 353 // 354 // - `Preempt`: allocate in current flavor if it's possible to preempt some workloads. 355 // - `TryNextFlavor` (default): try next flavor even if there are enough 356 // candidates for preemption in the current flavor. 357 // 358 // +kubebuilder:validation:Enum={Preempt,TryNextFlavor} 359 // +kubebuilder:default="TryNextFlavor" 360 WhenCanPreempt FlavorFungibilityPolicy `json:"whenCanPreempt,omitempty"` 361 } 362 363 // ClusterQueuePreemption contains policies to preempt Workloads from this 364 // ClusterQueue or the ClusterQueue's cohort. 365 type ClusterQueuePreemption struct { 366 // reclaimWithinCohort determines whether a pending Workload can preempt 367 // Workloads from other ClusterQueues in the cohort that are using more than 368 // their nominal quota. The possible values are: 369 // 370 // - `Never` (default): do not preempt Workloads in the cohort. 371 // - `LowerPriority`: if the pending Workload fits within the nominal 372 // quota of its ClusterQueue, only preempt Workloads in the cohort that have 373 // lower priority than the pending Workload. 374 // - `Any`: if the pending Workload fits within the nominal quota of its 375 // ClusterQueue, preempt any Workload in the cohort, irrespective of 376 // priority. 377 // 378 // +kubebuilder:default=Never 379 // +kubebuilder:validation:Enum=Never;LowerPriority;Any 380 ReclaimWithinCohort PreemptionPolicy `json:"reclaimWithinCohort,omitempty"` 381 382 // borrowWithinCohort provides configuration to allow preemption within 383 // cohort while borrowing. 384 BorrowWithinCohort *BorrowWithinCohort `json:"borrowWithinCohort,omitempty"` 385 386 // withinClusterQueue determines whether a pending Workload that doesn't fit 387 // within the nominal quota for its ClusterQueue, can preempt active Workloads in 388 // the ClusterQueue. The possible values are: 389 // 390 // - `Never` (default): do not preempt Workloads in the ClusterQueue. 391 // - `LowerPriority`: only preempt Workloads in the ClusterQueue that have 392 // lower priority than the pending Workload. 393 // - `LowerOrNewerEqualPriority`: only preempt Workloads in the ClusterQueue that 394 // either have a lower priority than the pending workload or equal priority 395 // and are newer than the pending workload. 396 // 397 // +kubebuilder:default=Never 398 // +kubebuilder:validation:Enum=Never;LowerPriority;LowerOrNewerEqualPriority 399 WithinClusterQueue PreemptionPolicy `json:"withinClusterQueue,omitempty"` 400 } 401 402 type BorrowWithinCohortPolicy string 403 404 const ( 405 BorrowWithinCohortPolicyNever BorrowWithinCohortPolicy = "Never" 406 BorrowWithinCohortPolicyLowerPriority BorrowWithinCohortPolicy = "LowerPriority" 407 ) 408 409 // BorrowWithinCohort contains configuration which allows to preempt workloads 410 // within cohort while borrowing. 411 type BorrowWithinCohort struct { 412 // policy determines the policy for preemption to reclaim quota within cohort while borrowing. 413 // Possible values are: 414 // - `Never` (default): do not allow for preemption, in other 415 // ClusterQueues within the cohort, for a borrowing workload. 416 // - `LowerPriority`: allow preemption, in other ClusterQueues 417 // within the cohort, for a borrowing workload, but only if 418 // the preempted workloads are of lower priority. 419 // 420 // +kubebuilder:default=Never 421 // +kubebuilder:validation:Enum=Never;LowerPriority 422 Policy BorrowWithinCohortPolicy `json:"policy,omitempty"` 423 424 // maxPriorityThreshold allows to restrict the set of workloads which 425 // might be preempted by a borrowing workload, to only workloads with 426 // priority less than or equal to the specified threshold priority. 427 // When the threshold is not specified, then any workload satisfying the 428 // policy can be preempted by the borrowing workload. 429 // 430 // +optional 431 MaxPriorityThreshold *int32 `json:"maxPriorityThreshold,omitempty"` 432 } 433 434 // +genclient 435 // +genclient:nonNamespaced 436 // +kubebuilder:object:root=true 437 // +kubebuilder:storageversion 438 // +kubebuilder:resource:scope=Cluster 439 // +kubebuilder:subresource:status 440 // +kubebuilder:printcolumn:name="Cohort",JSONPath=".spec.cohort",type=string,description="Cohort that this ClusterQueue belongs to" 441 // +kubebuilder:printcolumn:name="Strategy",JSONPath=".spec.queueingStrategy",type=string,description="The queueing strategy used to prioritize workloads",priority=1 442 // +kubebuilder:printcolumn:name="Pending Workloads",JSONPath=".status.pendingWorkloads",type=integer,description="Number of pending workloads" 443 // +kubebuilder:printcolumn:name="Admitted Workloads",JSONPath=".status.admittedWorkloads",type=integer,description="Number of admitted workloads that haven't finished yet",priority=1 444 445 // ClusterQueue is the Schema for the clusterQueue API. 446 type ClusterQueue struct { 447 metav1.TypeMeta `json:",inline"` 448 metav1.ObjectMeta `json:"metadata,omitempty"` 449 450 Spec ClusterQueueSpec `json:"spec,omitempty"` 451 Status ClusterQueueStatus `json:"status,omitempty"` 452 } 453 454 // +kubebuilder:object:root=true 455 456 // ClusterQueueList contains a list of ClusterQueue 457 type ClusterQueueList struct { 458 metav1.TypeMeta `json:",inline"` 459 metav1.ListMeta `json:"metadata,omitempty"` 460 Items []ClusterQueue `json:"items"` 461 } 462 463 func init() { 464 SchemeBuilder.Register(&ClusterQueue{}, &ClusterQueueList{}) 465 }