sigs.k8s.io/kueue@v0.6.2/pkg/controller/core/resourceflavor_controller.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package core 18 19 import ( 20 "context" 21 22 "github.com/go-logr/logr" 23 "k8s.io/apimachinery/pkg/types" 24 "k8s.io/apimachinery/pkg/util/sets" 25 "k8s.io/client-go/util/workqueue" 26 "k8s.io/klog/v2" 27 "k8s.io/utils/ptr" 28 ctrl "sigs.k8s.io/controller-runtime" 29 "sigs.k8s.io/controller-runtime/pkg/client" 30 "sigs.k8s.io/controller-runtime/pkg/controller" 31 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 32 "sigs.k8s.io/controller-runtime/pkg/event" 33 "sigs.k8s.io/controller-runtime/pkg/reconcile" 34 "sigs.k8s.io/controller-runtime/pkg/source" 35 36 config "sigs.k8s.io/kueue/apis/config/v1beta1" 37 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 38 "sigs.k8s.io/kueue/pkg/cache" 39 "sigs.k8s.io/kueue/pkg/queue" 40 ) 41 42 type ResourceFlavorUpdateWatcher interface { 43 NotifyResourceFlavorUpdate(oldRF, newRF *kueue.ResourceFlavor) 44 } 45 46 // ResourceFlavorReconciler reconciles a ResourceFlavor object 47 type ResourceFlavorReconciler struct { 48 log logr.Logger 49 qManager *queue.Manager 50 cache *cache.Cache 51 client client.Client 52 cqUpdateCh chan event.GenericEvent 53 watchers []ResourceFlavorUpdateWatcher 54 } 55 56 func NewResourceFlavorReconciler( 57 client client.Client, 58 qMgr *queue.Manager, 59 cache *cache.Cache, 60 ) *ResourceFlavorReconciler { 61 return &ResourceFlavorReconciler{ 62 log: ctrl.Log.WithName("resourceflavor-reconciler"), 63 cache: cache, 64 client: client, 65 qManager: qMgr, 66 cqUpdateCh: make(chan event.GenericEvent, updateChBuffer), 67 } 68 } 69 70 // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=resourceflavors,verbs=get;list;watch;update;delete 71 // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=resourceflavors/finalizers,verbs=update 72 73 func (r *ResourceFlavorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 74 var flavor kueue.ResourceFlavor 75 if err := r.client.Get(ctx, req.NamespacedName, &flavor); err != nil { 76 // we'll ignore not-found errors, since there is nothing to do. 77 return ctrl.Result{}, client.IgnoreNotFound(err) 78 } 79 log := ctrl.LoggerFrom(ctx).WithValues("resourceFlavor", klog.KObj(&flavor)) 80 ctx = ctrl.LoggerInto(ctx, log) 81 log.V(2).Info("Reconciling ResourceFlavor") 82 83 if flavor.DeletionTimestamp.IsZero() { 84 // Although we'll add the finalizer via webhook mutation now, this is still useful 85 // as a fallback. 86 if controllerutil.AddFinalizer(&flavor, kueue.ResourceInUseFinalizerName) { 87 if err := r.client.Update(ctx, &flavor); err != nil { 88 return ctrl.Result{}, err 89 } 90 log.V(5).Info("Added finalizer") 91 } 92 } else { 93 if controllerutil.ContainsFinalizer(&flavor, kueue.ResourceInUseFinalizerName) { 94 if cqs := r.cache.ClusterQueuesUsingFlavor(flavor.Name); len(cqs) != 0 { 95 log.V(3).Info("resourceFlavor is still in use", "ClusterQueues", cqs) 96 // We avoid to return error here to prevent backoff requeue, which is passive and wasteful. 97 // Instead, we drive the removal of finalizer by ClusterQueue Update/Delete events 98 // when resourceFlavor is no longer in use. 99 return ctrl.Result{}, nil 100 } 101 102 controllerutil.RemoveFinalizer(&flavor, kueue.ResourceInUseFinalizerName) 103 if err := r.client.Update(ctx, &flavor); err != nil { 104 return ctrl.Result{}, err 105 } 106 log.V(5).Info("Removed finalizer") 107 } 108 } 109 110 return ctrl.Result{}, nil 111 } 112 113 func (r *ResourceFlavorReconciler) AddUpdateWatcher(watchers ...ResourceFlavorUpdateWatcher) { 114 r.watchers = watchers 115 } 116 117 func (r *ResourceFlavorReconciler) notifyWatchers(oldRF, newRF *kueue.ResourceFlavor) { 118 for _, w := range r.watchers { 119 w.NotifyResourceFlavorUpdate(oldRF, newRF) 120 } 121 } 122 123 func (r *ResourceFlavorReconciler) Create(e event.CreateEvent) bool { 124 flv, match := e.Object.(*kueue.ResourceFlavor) 125 if !match { 126 return false 127 } 128 defer r.notifyWatchers(nil, flv) 129 130 log := r.log.WithValues("resourceFlavor", klog.KObj(flv)) 131 log.V(2).Info("ResourceFlavor create event") 132 133 // As long as one clusterQueue becomes active, 134 // we should inform clusterQueue controller to broadcast the event. 135 if cqNames := r.cache.AddOrUpdateResourceFlavor(flv.DeepCopy()); len(cqNames) > 0 { 136 r.qManager.QueueInadmissibleWorkloads(context.Background(), cqNames) 137 // If at least one CQ becomes active, then those CQs should now get evaluated by the scheduler; 138 // note that the workloads in those CQs are not necessarily "inadmissible", and hence we trigger a 139 // broadcast here in all cases. 140 r.qManager.Broadcast() 141 } 142 return true 143 } 144 145 func (r *ResourceFlavorReconciler) Delete(e event.DeleteEvent) bool { 146 flv, match := e.Object.(*kueue.ResourceFlavor) 147 if !match { 148 return false 149 } 150 defer r.notifyWatchers(flv, nil) 151 152 log := r.log.WithValues("resourceFlavor", klog.KObj(flv)) 153 log.V(2).Info("ResourceFlavor delete event") 154 155 if cqNames := r.cache.DeleteResourceFlavor(flv); len(cqNames) > 0 { 156 r.qManager.QueueInadmissibleWorkloads(context.Background(), cqNames) 157 } 158 return false 159 } 160 161 func (r *ResourceFlavorReconciler) Update(e event.UpdateEvent) bool { 162 oldFlv, match := e.ObjectOld.(*kueue.ResourceFlavor) 163 if !match { 164 return false 165 } 166 newFlv, match := e.ObjectNew.(*kueue.ResourceFlavor) 167 if !match { 168 return false 169 } 170 defer r.notifyWatchers(oldFlv, newFlv) 171 172 log := r.log.WithValues("resourceFlavor", klog.KObj(newFlv)) 173 log.V(2).Info("ResourceFlavor update event") 174 175 if newFlv.DeletionTimestamp != nil { 176 return true 177 } 178 179 if cqNames := r.cache.AddOrUpdateResourceFlavor(newFlv.DeepCopy()); len(cqNames) > 0 { 180 r.qManager.QueueInadmissibleWorkloads(context.Background(), cqNames) 181 } 182 return false 183 } 184 185 func (r *ResourceFlavorReconciler) Generic(e event.GenericEvent) bool { 186 r.log.V(2).Info("Got generic event", "obj", klog.KObj(e.Object), "kind", e.Object.GetObjectKind().GroupVersionKind()) 187 return true 188 } 189 190 // NotifyClusterQueueUpdate will listen for the update/delete events of clusterQueues to help 191 // verifying whether resourceFlavors are no longer in use by clusterQueues. There are mainly 192 // two reasons for this, 1) a clusterQueue is deleted 2) a clusterQueue is updated with 193 // the resourceFlavors in use. 194 func (r *ResourceFlavorReconciler) NotifyClusterQueueUpdate(oldCQ, newCQ *kueue.ClusterQueue) { 195 // if oldCQ is nil, it's a create event. 196 if oldCQ == nil { 197 return 198 } 199 200 // if newCQ is nil, it's a delete event. 201 if newCQ == nil { 202 r.cqUpdateCh <- event.GenericEvent{Object: oldCQ} 203 return 204 } 205 206 oldFlavors := resourceFlavors(oldCQ) 207 newFlavors := resourceFlavors(newCQ) 208 if !oldFlavors.Equal(newFlavors) { 209 r.cqUpdateCh <- event.GenericEvent{Object: oldCQ} 210 } 211 } 212 213 // cqHandler signals the controller to reconcile the resourceFlavor 214 // associated to the clusterQueue in the event. 215 // Since the events come from a channel Source, only the Generic handler will 216 // receive events. 217 type cqHandler struct { 218 cache *cache.Cache 219 } 220 221 func (h *cqHandler) Create(context.Context, event.CreateEvent, workqueue.RateLimitingInterface) { 222 } 223 224 func (h *cqHandler) Update(context.Context, event.UpdateEvent, workqueue.RateLimitingInterface) { 225 } 226 227 func (h *cqHandler) Delete(context.Context, event.DeleteEvent, workqueue.RateLimitingInterface) { 228 } 229 230 // Generic accepts update/delete events from clusterQueue via channel. 231 // For update events, we only check the old obj to see whether old resourceFlavors 232 // are still in use since new resourceFlavors are always in use. 233 // For delete events, we check the original obj since new obj is nil. 234 func (h *cqHandler) Generic(_ context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) { 235 cq := e.Object.(*kueue.ClusterQueue) 236 if cq.Name == "" { 237 return 238 } 239 240 for _, rg := range cq.Spec.ResourceGroups { 241 for _, flavor := range rg.Flavors { 242 if cqs := h.cache.ClusterQueuesUsingFlavor(string(flavor.Name)); len(cqs) == 0 { 243 req := reconcile.Request{ 244 NamespacedName: types.NamespacedName{ 245 Name: string(flavor.Name), 246 }, 247 } 248 q.Add(req) 249 } 250 } 251 } 252 } 253 254 // SetupWithManager sets up the controller with the Manager. 255 func (r *ResourceFlavorReconciler) SetupWithManager(mgr ctrl.Manager, cfg *config.Configuration) error { 256 handler := cqHandler{ 257 cache: r.cache, 258 } 259 return ctrl.NewControllerManagedBy(mgr). 260 For(&kueue.ResourceFlavor{}). 261 WithOptions(controller.Options{NeedLeaderElection: ptr.To(false)}). 262 WatchesRawSource(&source.Channel{Source: r.cqUpdateCh}, &handler). 263 WithEventFilter(r). 264 Complete(WithLeadingManager(mgr, r, &kueue.ResourceFlavor{}, cfg)) 265 } 266 267 func resourceFlavors(cq *kueue.ClusterQueue) sets.Set[kueue.ResourceFlavorReference] { 268 flavors := sets.New[kueue.ResourceFlavorReference]() 269 for _, rg := range cq.Spec.ResourceGroups { 270 for _, flavor := range rg.Flavors { 271 flavors.Insert(flavor.Name) 272 } 273 } 274 return flavors 275 }