sigs.k8s.io/kueue@v0.6.2/apis/config/v1beta1/configuration_types.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package v1beta1 18 19 import ( 20 "time" 21 22 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 configv1alpha1 "k8s.io/component-base/config/v1alpha1" 24 ) 25 26 // +k8s:defaulter-gen=true 27 // +kubebuilder:object:root=true 28 29 // Configuration is the Schema for the kueueconfigurations API 30 type Configuration struct { 31 metav1.TypeMeta `json:",inline"` 32 33 // Namespace is the namespace in which kueue is deployed. It is used as part of DNSName of the webhook Service. 34 // If not set, the value is set from the file /var/run/secrets/kubernetes.io/serviceaccount/namespace 35 // If the file doesn't exist, default value is kueue-system. 36 Namespace *string `json:"namespace,omitempty"` 37 38 // ControllerManager returns the configurations for controllers 39 ControllerManager `json:",inline"` 40 41 // ManageJobsWithoutQueueName controls whether or not Kueue reconciles 42 // batch/v1.Jobs that don't set the annotation kueue.x-k8s.io/queue-name. 43 // If set to true, then those jobs will be suspended and never started unless 44 // they are assigned a queue and eventually admitted. This also applies to 45 // jobs created before starting the kueue controller. 46 // Defaults to false; therefore, those jobs are not managed and if they are created 47 // unsuspended, they will start immediately. 48 ManageJobsWithoutQueueName bool `json:"manageJobsWithoutQueueName"` 49 50 // InternalCertManagement is configuration for internalCertManagement 51 InternalCertManagement *InternalCertManagement `json:"internalCertManagement,omitempty"` 52 53 // WaitForPodsReady is configuration to provide simple all-or-nothing 54 // scheduling semantics for jobs to ensure they get resources assigned. 55 // This is achieved by blocking the start of new jobs until the previously 56 // started job has all pods running (ready). 57 WaitForPodsReady *WaitForPodsReady `json:"waitForPodsReady,omitempty"` 58 59 // ClientConnection provides additional configuration options for Kubernetes 60 // API server client. 61 ClientConnection *ClientConnection `json:"clientConnection,omitempty"` 62 63 // Integrations provide configuration options for AI/ML/Batch frameworks 64 // integrations (including K8S job). 65 Integrations *Integrations `json:"integrations,omitempty"` 66 67 // QueueVisibility is configuration to expose the information about the top 68 // pending workloads. 69 QueueVisibility *QueueVisibility `json:"queueVisibility,omitempty"` 70 71 // MultiKueue controls the behaviour of the MultiKueue AdmissionCheck Controller. 72 MultiKueue *MultiKueue `json:"multiKueue,omitempty"` 73 } 74 75 type ControllerManager struct { 76 // Webhook contains the controllers webhook configuration 77 // +optional 78 Webhook ControllerWebhook `json:"webhook,omitempty"` 79 80 // LeaderElection is the LeaderElection config to be used when configuring 81 // the manager.Manager leader election 82 // +optional 83 LeaderElection *configv1alpha1.LeaderElectionConfiguration `json:"leaderElection,omitempty"` 84 85 // Metrics contains the controller metrics configuration 86 // +optional 87 Metrics ControllerMetrics `json:"metrics,omitempty"` 88 89 // Health contains the controller health configuration 90 // +optional 91 Health ControllerHealth `json:"health,omitempty"` 92 93 // PprofBindAddress is the TCP address that the controller should bind to 94 // for serving pprof. 95 // It can be set to "" or "0" to disable the pprof serving. 96 // Since pprof may contain sensitive information, make sure to protect it 97 // before exposing it to public. 98 // +optional 99 PprofBindAddress string `json:"pprofBindAddress,omitempty"` 100 101 // Controller contains global configuration options for controllers 102 // registered within this manager. 103 // +optional 104 Controller *ControllerConfigurationSpec `json:"controller,omitempty"` 105 } 106 107 // ControllerWebhook defines the webhook server for the controller. 108 type ControllerWebhook struct { 109 // Port is the port that the webhook server serves at. 110 // It is used to set webhook.Server.Port. 111 // +optional 112 Port *int `json:"port,omitempty"` 113 114 // Host is the hostname that the webhook server binds to. 115 // It is used to set webhook.Server.Host. 116 // +optional 117 Host string `json:"host,omitempty"` 118 119 // CertDir is the directory that contains the server key and certificate. 120 // if not set, webhook server would look up the server key and certificate in 121 // {TempDir}/k8s-webhook-server/serving-certs. The server key and certificate 122 // must be named tls.key and tls.crt, respectively. 123 // +optional 124 CertDir string `json:"certDir,omitempty"` 125 } 126 127 // ControllerMetrics defines the metrics configs. 128 type ControllerMetrics struct { 129 // BindAddress is the TCP address that the controller should bind to 130 // for serving prometheus metrics. 131 // It can be set to "0" to disable the metrics serving. 132 // +optional 133 BindAddress string `json:"bindAddress,omitempty"` 134 135 // EnableClusterQueueResources, if true the cluster queue resource usage and quotas 136 // metrics will be reported. 137 // +optional 138 EnableClusterQueueResources bool `json:"enableClusterQueueResources,omitempty"` 139 } 140 141 // ControllerHealth defines the health configs. 142 type ControllerHealth struct { 143 // HealthProbeBindAddress is the TCP address that the controller should bind to 144 // for serving health probes 145 // It can be set to "0" or "" to disable serving the health probe. 146 // +optional 147 HealthProbeBindAddress string `json:"healthProbeBindAddress,omitempty"` 148 149 // ReadinessEndpointName, defaults to "readyz" 150 // +optional 151 ReadinessEndpointName string `json:"readinessEndpointName,omitempty"` 152 153 // LivenessEndpointName, defaults to "healthz" 154 // +optional 155 LivenessEndpointName string `json:"livenessEndpointName,omitempty"` 156 } 157 158 // ControllerConfigurationSpec defines the global configuration for 159 // controllers registered with the manager. 160 type ControllerConfigurationSpec struct { 161 // GroupKindConcurrency is a map from a Kind to the number of concurrent reconciliation 162 // allowed for that controller. 163 // 164 // When a controller is registered within this manager using the builder utilities, 165 // users have to specify the type the controller reconciles in the For(...) call. 166 // If the object's kind passed matches one of the keys in this map, the concurrency 167 // for that controller is set to the number specified. 168 // 169 // The key is expected to be consistent in form with GroupKind.String(), 170 // e.g. ReplicaSet in apps group (regardless of version) would be `ReplicaSet.apps`. 171 // 172 // +optional 173 GroupKindConcurrency map[string]int `json:"groupKindConcurrency,omitempty"` 174 175 // CacheSyncTimeout refers to the time limit set to wait for syncing caches. 176 // Defaults to 2 minutes if not set. 177 // +optional 178 CacheSyncTimeout *time.Duration `json:"cacheSyncTimeout,omitempty"` 179 } 180 181 type WaitForPodsReady struct { 182 // Enable when true, indicates that each admitted workload 183 // blocks the admission of all other workloads from all queues until it is in the 184 // `PodsReady` condition. If false, all workloads start as soon as they are 185 // admitted and do not block admission of other workloads. The PodsReady 186 // condition is only added if this setting is enabled. It defaults to false. 187 Enable bool `json:"enable,omitempty"` 188 189 // Timeout defines the time for an admitted workload to reach the 190 // PodsReady=true condition. When the timeout is reached, the workload admission 191 // is cancelled and requeued in the same cluster queue. Defaults to 5min. 192 // +optional 193 Timeout *metav1.Duration `json:"timeout,omitempty"` 194 195 // BlockAdmission when true, cluster queue will block admissions for all subsequent jobs 196 // until the jobs reach the PodsReady=true condition. It defaults to false if Enable is false 197 // and defaults to true otherwise. 198 BlockAdmission *bool `json:"blockAdmission,omitempty"` 199 200 // RequeuingStrategy defines the strategy for requeuing a Workload. 201 // +optional 202 RequeuingStrategy *RequeuingStrategy `json:"requeuingStrategy,omitempty"` 203 } 204 205 type MultiKueue struct { 206 // GCInterval defines the time interval between two consecutive garbage collection runs. 207 // Defaults to 1min. If 0, the garbage collection is disabled. 208 // +optional 209 GCInterval *metav1.Duration `json:"gcInterval"` 210 211 // Origin defines a label value used to track the creator of workloads in the worker 212 // clusters. 213 // This is used by multikueue in components like its garbage collector to identify 214 // remote objects that ware created by this multikueue manager cluster and delete 215 // them if their local counterpart no longer exists. 216 // +optional 217 Origin *string `json:"origin,omitempty"` 218 } 219 220 type RequeuingStrategy struct { 221 // Timestamp defines the timestamp used for re-queuing a Workload 222 // that was evicted due to Pod readiness. The possible values are: 223 // 224 // - `Eviction` (default) indicates from Workload `Evicted` condition with `PodsReadyTimeout` reason. 225 // - `Creation` indicates from Workload .metadata.creationTimestamp. 226 // 227 // +optional 228 Timestamp *RequeuingTimestamp `json:"timestamp,omitempty"` 229 230 // BackoffLimitCount defines the maximum number of re-queuing retries. 231 // Once the number is reached, the workload is deactivated (`.spec.activate`=`false`). 232 // When it is null, the workloads will repeatedly and endless re-queueing. 233 // 234 // Every backoff duration is about "1.41284738^(n-1)+Rand" where the "n" represents the "workloadStatus.requeueState.count", 235 // and the "Rand" represents the random jitter. During this time, the workload is taken as an inadmissible and 236 // other workloads will have a chance to be admitted. 237 // For example, when the "waitForPodsReady.timeout" is the default, the workload deactivation time is as follows: 238 // {backoffLimitCount, workloadDeactivationSeconds} 239 // ~= {1, 601}, {2, 902}, ...,{5, 1811}, ...,{10, 3374}, ...,{20, 8730}, ...,{30, 86400(=24 hours)}, ... 240 // 241 // Defaults to null. 242 // +optional 243 BackoffLimitCount *int32 `json:"backoffLimitCount,omitempty"` 244 } 245 246 type RequeuingTimestamp string 247 248 const ( 249 // CreationTimestamp timestamp (from Workload .metadata.creationTimestamp). 250 CreationTimestamp RequeuingTimestamp = "Creation" 251 252 // EvictionTimestamp timestamp (from Workload .status.conditions). 253 EvictionTimestamp RequeuingTimestamp = "Eviction" 254 ) 255 256 type InternalCertManagement struct { 257 // Enable controls whether to enable internal cert management or not. 258 // Defaults to true. If you want to use a third-party management, e.g. cert-manager, 259 // set it to false. See the user guide for more information. 260 Enable *bool `json:"enable,omitempty"` 261 262 // WebhookServiceName is the name of the Service used as part of the DNSName. 263 // Defaults to kueue-webhook-service. 264 WebhookServiceName *string `json:"webhookServiceName,omitempty"` 265 266 // WebhookSecretName is the name of the Secret used to store CA and server certs. 267 // Defaults to kueue-webhook-server-cert. 268 WebhookSecretName *string `json:"webhookSecretName,omitempty"` 269 } 270 271 type ClientConnection struct { 272 // QPS controls the number of queries per second allowed for K8S api server 273 // connection. 274 QPS *float32 `json:"qps,omitempty"` 275 276 // Burst allows extra queries to accumulate when a client is exceeding its rate. 277 Burst *int32 `json:"burst,omitempty"` 278 } 279 280 type Integrations struct { 281 // List of framework names to be enabled. 282 // Possible options: 283 // - "batch/job" 284 // - "kubeflow.org/mpijob" 285 // - "ray.io/rayjob" 286 // - "ray.io/raycluster" 287 // - "jobset.x-k8s.io/jobset" 288 // - "kubeflow.org/mxjob" 289 // - "kubeflow.org/paddlejob" 290 // - "kubeflow.org/pytorchjob" 291 // - "kubeflow.org/tfjob" 292 // - "kubeflow.org/xgboostjob" 293 // - "pod" 294 Frameworks []string `json:"frameworks,omitempty"` 295 // PodOptions defines kueue controller behaviour for pod objects 296 PodOptions *PodIntegrationOptions `json:"podOptions,omitempty"` 297 } 298 299 type PodIntegrationOptions struct { 300 // NamespaceSelector can be used to omit some namespaces from pod reconciliation 301 NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty"` 302 // PodSelector can be used to choose what pods to reconcile 303 PodSelector *metav1.LabelSelector `json:"podSelector,omitempty"` 304 } 305 306 type QueueVisibility struct { 307 // ClusterQueues is configuration to expose the information 308 // about the top pending workloads in the cluster queue. 309 ClusterQueues *ClusterQueueVisibility `json:"clusterQueues,omitempty"` 310 311 // UpdateIntervalSeconds specifies the time interval for updates to the structure 312 // of the top pending workloads in the queues. 313 // The minimum value is 1. 314 // Defaults to 5. 315 UpdateIntervalSeconds int32 `json:"updateIntervalSeconds,omitempty"` 316 } 317 318 type ClusterQueueVisibility struct { 319 // MaxCount indicates the maximal number of pending workloads exposed in the 320 // cluster queue status. When the value is set to 0, then ClusterQueue 321 // visibility updates are disabled. 322 // The maximal value is 4000. 323 // Defaults to 10. 324 MaxCount int32 `json:"maxCount,omitempty"` 325 }