sigs.k8s.io/kueue@v0.6.2/pkg/controller/core/localqueue_controller.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package core 18 19 import ( 20 "context" 21 22 "github.com/go-logr/logr" 23 "k8s.io/apimachinery/pkg/api/equality" 24 apierrors "k8s.io/apimachinery/pkg/api/errors" 25 "k8s.io/apimachinery/pkg/api/meta" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/types" 28 "k8s.io/client-go/util/workqueue" 29 "k8s.io/klog/v2" 30 "k8s.io/utils/ptr" 31 ctrl "sigs.k8s.io/controller-runtime" 32 "sigs.k8s.io/controller-runtime/pkg/client" 33 "sigs.k8s.io/controller-runtime/pkg/controller" 34 "sigs.k8s.io/controller-runtime/pkg/event" 35 "sigs.k8s.io/controller-runtime/pkg/reconcile" 36 "sigs.k8s.io/controller-runtime/pkg/source" 37 38 config "sigs.k8s.io/kueue/apis/config/v1beta1" 39 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 40 "sigs.k8s.io/kueue/pkg/cache" 41 "sigs.k8s.io/kueue/pkg/constants" 42 "sigs.k8s.io/kueue/pkg/controller/core/indexer" 43 "sigs.k8s.io/kueue/pkg/queue" 44 ) 45 46 const ( 47 queueIsInactiveMsg = "Can't submit new workloads to clusterQueue" 48 failedUpdateLqStatusMsg = "Failed to retrieve localQueue status" 49 ) 50 51 // LocalQueueReconciler reconciles a LocalQueue object 52 type LocalQueueReconciler struct { 53 client client.Client 54 log logr.Logger 55 queues *queue.Manager 56 cache *cache.Cache 57 wlUpdateCh chan event.GenericEvent 58 } 59 60 func NewLocalQueueReconciler(client client.Client, queues *queue.Manager, cache *cache.Cache) *LocalQueueReconciler { 61 return &LocalQueueReconciler{ 62 log: ctrl.Log.WithName("localqueue-reconciler"), 63 queues: queues, 64 cache: cache, 65 client: client, 66 wlUpdateCh: make(chan event.GenericEvent, updateChBuffer), 67 } 68 } 69 70 func (r *LocalQueueReconciler) NotifyWorkloadUpdate(oldWl, newWl *kueue.Workload) { 71 if oldWl != nil { 72 r.wlUpdateCh <- event.GenericEvent{Object: oldWl} 73 if newWl != nil && oldWl.Spec.QueueName != newWl.Spec.QueueName { 74 r.wlUpdateCh <- event.GenericEvent{Object: newWl} 75 } 76 return 77 } 78 if newWl != nil { 79 r.wlUpdateCh <- event.GenericEvent{Object: newWl} 80 } 81 } 82 83 // +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update;patch 84 // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=localqueues,verbs=get;list;watch;create;update;patch;delete 85 // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=localqueues/status,verbs=get;update;patch 86 // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=localqueues/finalizers,verbs=update 87 88 func (r *LocalQueueReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 89 var queueObj kueue.LocalQueue 90 if err := r.client.Get(ctx, req.NamespacedName, &queueObj); err != nil { 91 // we'll ignore not-found errors, since there is nothing to do. 92 return ctrl.Result{}, client.IgnoreNotFound(err) 93 } 94 log := ctrl.LoggerFrom(ctx).WithValues("localQueue", klog.KObj(&queueObj)) 95 ctx = ctrl.LoggerInto(ctx, log) 96 log.V(2).Info("Reconciling LocalQueue") 97 98 var cq kueue.ClusterQueue 99 err := r.client.Get(ctx, client.ObjectKey{Name: string(queueObj.Spec.ClusterQueue)}, &cq) 100 if err != nil { 101 if apierrors.IsNotFound(err) { 102 err = r.UpdateStatusIfChanged(ctx, &queueObj, metav1.ConditionFalse, "ClusterQueueDoesNotExist", queueIsInactiveMsg) 103 } 104 return ctrl.Result{}, client.IgnoreNotFound(err) 105 } 106 if meta.IsStatusConditionTrue(cq.Status.Conditions, kueue.ClusterQueueActive) { 107 err = r.UpdateStatusIfChanged(ctx, &queueObj, metav1.ConditionTrue, "Ready", "Can submit new workloads to clusterQueue") 108 return ctrl.Result{}, client.IgnoreNotFound(err) 109 } 110 err = r.UpdateStatusIfChanged(ctx, &queueObj, metav1.ConditionFalse, "ClusterQueueIsInactive", queueIsInactiveMsg) 111 return ctrl.Result{}, client.IgnoreNotFound(err) 112 } 113 114 func (r *LocalQueueReconciler) Create(e event.CreateEvent) bool { 115 q, match := e.Object.(*kueue.LocalQueue) 116 if !match { 117 // No need to interact with the queue manager for other objects. 118 return true 119 } 120 log := r.log.WithValues("localQueue", klog.KObj(q)) 121 log.V(2).Info("LocalQueue create event") 122 ctx := logr.NewContext(context.Background(), log) 123 if err := r.queues.AddLocalQueue(ctx, q); err != nil { 124 log.Error(err, "Failed to add localQueue to the queueing system") 125 } 126 if err := r.cache.AddLocalQueue(q); err != nil { 127 log.Error(err, "Failed to add localQueue to the cache") 128 } 129 return true 130 } 131 132 func (r *LocalQueueReconciler) Delete(e event.DeleteEvent) bool { 133 q, match := e.Object.(*kueue.LocalQueue) 134 if !match { 135 // No need to interact with the queue manager for other objects. 136 return true 137 } 138 r.log.V(2).Info("LocalQueue delete event", "localQueue", klog.KObj(q)) 139 r.queues.DeleteLocalQueue(q) 140 r.cache.DeleteLocalQueue(q) 141 return true 142 } 143 144 func (r *LocalQueueReconciler) Update(e event.UpdateEvent) bool { 145 q, match := e.ObjectNew.(*kueue.LocalQueue) 146 if !match { 147 // No need to interact with the queue manager for other objects. 148 return true 149 } 150 log := r.log.WithValues("localQueue", klog.KObj(q)) 151 log.V(2).Info("Queue update event") 152 if err := r.queues.UpdateLocalQueue(q); err != nil { 153 log.Error(err, "Failed to update queue in the queueing system") 154 } 155 oldQ := e.ObjectOld.(*kueue.LocalQueue) 156 if err := r.cache.UpdateLocalQueue(oldQ, q); err != nil { 157 log.Error(err, "Failed to update localQueue in the cache") 158 } 159 return true 160 } 161 162 func (r *LocalQueueReconciler) Generic(e event.GenericEvent) bool { 163 r.log.V(3).Info("Got Workload event", "workload", klog.KObj(e.Object)) 164 return true 165 } 166 167 // qWorkloadHandler signals the controller to reconcile the Queue associated 168 // to the workload in the event. 169 // Since the events come from a channel Source, only the Generic handler will 170 // receive events. 171 type qWorkloadHandler struct{} 172 173 func (h *qWorkloadHandler) Create(context.Context, event.CreateEvent, workqueue.RateLimitingInterface) { 174 } 175 176 func (h *qWorkloadHandler) Update(context.Context, event.UpdateEvent, workqueue.RateLimitingInterface) { 177 } 178 179 func (h *qWorkloadHandler) Delete(context.Context, event.DeleteEvent, workqueue.RateLimitingInterface) { 180 } 181 182 func (h *qWorkloadHandler) Generic(_ context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) { 183 w := e.Object.(*kueue.Workload) 184 if w.Name == "" { 185 return 186 } 187 req := reconcile.Request{ 188 NamespacedName: types.NamespacedName{ 189 Name: w.Spec.QueueName, 190 Namespace: w.Namespace, 191 }, 192 } 193 q.AddAfter(req, constants.UpdatesBatchPeriod) 194 } 195 196 // qCQHandler signals the controller to reconcile the Queue associated 197 // to the workload in the event. 198 type qCQHandler struct { 199 client client.Client 200 } 201 202 func (h *qCQHandler) Create(ctx context.Context, e event.CreateEvent, wq workqueue.RateLimitingInterface) { 203 cq, ok := e.Object.(*kueue.ClusterQueue) 204 if !ok { 205 return 206 } 207 h.addLocalQueueToWorkQueue(ctx, cq, wq) 208 } 209 210 func (h *qCQHandler) Update(ctx context.Context, e event.UpdateEvent, wq workqueue.RateLimitingInterface) { 211 newCq, ok := e.ObjectNew.(*kueue.ClusterQueue) 212 if !ok { 213 return 214 } 215 oldCq, ok := e.ObjectOld.(*kueue.ClusterQueue) 216 if !ok { 217 return 218 } 219 // Iff .status.conditions of the clusterQueue is updated, 220 // this handler sends all queues related to the clusterQueue to workqueue. 221 if equality.Semantic.DeepEqual(oldCq.Status.Conditions, newCq.Status.Conditions) { 222 return 223 } 224 h.addLocalQueueToWorkQueue(ctx, newCq, wq) 225 } 226 227 func (h *qCQHandler) Delete(ctx context.Context, e event.DeleteEvent, wq workqueue.RateLimitingInterface) { 228 cq, ok := e.Object.(*kueue.ClusterQueue) 229 if !ok { 230 return 231 } 232 h.addLocalQueueToWorkQueue(ctx, cq, wq) 233 } 234 235 func (h *qCQHandler) Generic(context.Context, event.GenericEvent, workqueue.RateLimitingInterface) { 236 } 237 238 func (h *qCQHandler) addLocalQueueToWorkQueue(ctx context.Context, cq *kueue.ClusterQueue, wq workqueue.RateLimitingInterface) { 239 log := ctrl.LoggerFrom(ctx).WithValues("clusterQueue", klog.KObj(cq)) 240 ctx = ctrl.LoggerInto(ctx, log) 241 242 var queues kueue.LocalQueueList 243 err := h.client.List(ctx, &queues, client.MatchingFields{indexer.QueueClusterQueueKey: cq.Name}) 244 if err != nil { 245 log.Error(err, "Could not list queues that match the clusterQueue") 246 return 247 } 248 for _, q := range queues.Items { 249 wq.Add(reconcile.Request{NamespacedName: client.ObjectKeyFromObject(&q)}) 250 } 251 } 252 253 // SetupWithManager sets up the controller with the Manager. 254 func (r *LocalQueueReconciler) SetupWithManager(mgr ctrl.Manager, cfg *config.Configuration) error { 255 queueCQHandler := qCQHandler{ 256 client: r.client, 257 } 258 return ctrl.NewControllerManagedBy(mgr). 259 For(&kueue.LocalQueue{}). 260 WithOptions(controller.Options{NeedLeaderElection: ptr.To(false)}). 261 WatchesRawSource(&source.Channel{Source: r.wlUpdateCh}, &qWorkloadHandler{}). 262 Watches(&kueue.ClusterQueue{}, &queueCQHandler). 263 WithEventFilter(r). 264 Complete(WithLeadingManager(mgr, r, &kueue.LocalQueue{}, cfg)) 265 } 266 267 func (r *LocalQueueReconciler) UpdateStatusIfChanged( 268 ctx context.Context, 269 queue *kueue.LocalQueue, 270 conditionStatus metav1.ConditionStatus, 271 reason, msg string, 272 ) error { 273 oldStatus := queue.Status.DeepCopy() 274 pendingWls, err := r.queues.PendingWorkloads(queue) 275 if err != nil { 276 r.log.Error(err, failedUpdateLqStatusMsg) 277 return err 278 } 279 stats, err := r.cache.LocalQueueUsage(queue) 280 if err != nil { 281 r.log.Error(err, failedUpdateLqStatusMsg) 282 return err 283 } 284 queue.Status.PendingWorkloads = pendingWls 285 queue.Status.ReservingWorkloads = int32(stats.ReservingWorkloads) 286 queue.Status.AdmittedWorkloads = int32(stats.AdmittedWorkloads) 287 queue.Status.FlavorsReservation = stats.ReservedResources 288 queue.Status.FlavorUsage = stats.AdmittedResources 289 if len(conditionStatus) != 0 && len(reason) != 0 && len(msg) != 0 { 290 meta.SetStatusCondition(&queue.Status.Conditions, metav1.Condition{ 291 Type: kueue.LocalQueueActive, 292 Status: conditionStatus, 293 Reason: reason, 294 Message: msg, 295 }) 296 } 297 if !equality.Semantic.DeepEqual(oldStatus, queue.Status) { 298 return r.client.Status().Update(ctx, queue) 299 } 300 return nil 301 }