sigs.k8s.io/kueue@v0.6.2/pkg/controller/core/clusterqueue_controller.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package core
    18  
    19  import (
    20  	"context"
    21  	"time"
    22  
    23  	"github.com/go-logr/logr"
    24  	corev1 "k8s.io/api/core/v1"
    25  	"k8s.io/apimachinery/pkg/api/equality"
    26  	"k8s.io/apimachinery/pkg/api/meta"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/types"
    29  	"k8s.io/apimachinery/pkg/util/sets"
    30  	"k8s.io/apimachinery/pkg/util/wait"
    31  	"k8s.io/client-go/util/workqueue"
    32  	"k8s.io/klog/v2"
    33  	"k8s.io/utils/ptr"
    34  	ctrl "sigs.k8s.io/controller-runtime"
    35  	"sigs.k8s.io/controller-runtime/pkg/client"
    36  	"sigs.k8s.io/controller-runtime/pkg/controller"
    37  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    38  	"sigs.k8s.io/controller-runtime/pkg/event"
    39  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    40  	"sigs.k8s.io/controller-runtime/pkg/source"
    41  
    42  	config "sigs.k8s.io/kueue/apis/config/v1beta1"
    43  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    44  	"sigs.k8s.io/kueue/pkg/cache"
    45  	"sigs.k8s.io/kueue/pkg/constants"
    46  	"sigs.k8s.io/kueue/pkg/features"
    47  	"sigs.k8s.io/kueue/pkg/metrics"
    48  	"sigs.k8s.io/kueue/pkg/queue"
    49  	"sigs.k8s.io/kueue/pkg/util/resource"
    50  	"sigs.k8s.io/kueue/pkg/util/slices"
    51  	"sigs.k8s.io/kueue/pkg/workload"
    52  )
    53  
    54  const snapshotWorkers = 5
    55  
    56  type ClusterQueueUpdateWatcher interface {
    57  	NotifyClusterQueueUpdate(*kueue.ClusterQueue, *kueue.ClusterQueue)
    58  }
    59  
    60  // ClusterQueueReconciler reconciles a ClusterQueue object
    61  type ClusterQueueReconciler struct {
    62  	client                               client.Client
    63  	log                                  logr.Logger
    64  	qManager                             *queue.Manager
    65  	cache                                *cache.Cache
    66  	snapshotsQueue                       workqueue.Interface
    67  	wlUpdateCh                           chan event.GenericEvent
    68  	rfUpdateCh                           chan event.GenericEvent
    69  	acUpdateCh                           chan event.GenericEvent
    70  	snapUpdateCh                         chan event.GenericEvent
    71  	watchers                             []ClusterQueueUpdateWatcher
    72  	reportResourceMetrics                bool
    73  	queueVisibilityUpdateInterval        time.Duration
    74  	queueVisibilityClusterQueuesMaxCount int32
    75  }
    76  
    77  type ClusterQueueReconcilerOptions struct {
    78  	Watchers                             []ClusterQueueUpdateWatcher
    79  	ReportResourceMetrics                bool
    80  	QueueVisibilityUpdateInterval        time.Duration
    81  	QueueVisibilityClusterQueuesMaxCount int32
    82  }
    83  
    84  // ClusterQueueReconcilerOption configures the reconciler.
    85  type ClusterQueueReconcilerOption func(*ClusterQueueReconcilerOptions)
    86  
    87  func WithWatchers(watchers ...ClusterQueueUpdateWatcher) ClusterQueueReconcilerOption {
    88  	return func(o *ClusterQueueReconcilerOptions) {
    89  		o.Watchers = watchers
    90  	}
    91  }
    92  
    93  func WithReportResourceMetrics(report bool) ClusterQueueReconcilerOption {
    94  	return func(o *ClusterQueueReconcilerOptions) {
    95  		o.ReportResourceMetrics = report
    96  	}
    97  }
    98  
    99  // WithQueueVisibilityUpdateInterval specifies the time interval for updates to the structure
   100  // of the top pending workloads in the queues.
   101  func WithQueueVisibilityUpdateInterval(interval time.Duration) ClusterQueueReconcilerOption {
   102  	return func(o *ClusterQueueReconcilerOptions) {
   103  		o.QueueVisibilityUpdateInterval = interval
   104  	}
   105  }
   106  
   107  // WithQueueVisibilityClusterQueuesMaxCount indicates the maximal number of pending workloads exposed in the
   108  // cluster queue status
   109  func WithQueueVisibilityClusterQueuesMaxCount(value int32) ClusterQueueReconcilerOption {
   110  	return func(o *ClusterQueueReconcilerOptions) {
   111  		o.QueueVisibilityClusterQueuesMaxCount = value
   112  	}
   113  }
   114  
   115  var defaultCQOptions = ClusterQueueReconcilerOptions{}
   116  
   117  func NewClusterQueueReconciler(
   118  	client client.Client,
   119  	qMgr *queue.Manager,
   120  	cache *cache.Cache,
   121  	opts ...ClusterQueueReconcilerOption,
   122  ) *ClusterQueueReconciler {
   123  	options := defaultCQOptions
   124  	for _, opt := range opts {
   125  		opt(&options)
   126  	}
   127  	return &ClusterQueueReconciler{
   128  		client:                               client,
   129  		log:                                  ctrl.Log.WithName("cluster-queue-reconciler"),
   130  		qManager:                             qMgr,
   131  		cache:                                cache,
   132  		snapshotsQueue:                       workqueue.New(),
   133  		wlUpdateCh:                           make(chan event.GenericEvent, updateChBuffer),
   134  		rfUpdateCh:                           make(chan event.GenericEvent, updateChBuffer),
   135  		acUpdateCh:                           make(chan event.GenericEvent, updateChBuffer),
   136  		snapUpdateCh:                         make(chan event.GenericEvent, updateChBuffer),
   137  		watchers:                             options.Watchers,
   138  		reportResourceMetrics:                options.ReportResourceMetrics,
   139  		queueVisibilityUpdateInterval:        options.QueueVisibilityUpdateInterval,
   140  		queueVisibilityClusterQueuesMaxCount: options.QueueVisibilityClusterQueuesMaxCount,
   141  	}
   142  }
   143  
   144  // +kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch
   145  // +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update;patch
   146  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=clusterqueues,verbs=get;list;watch;create;update;patch;delete
   147  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=clusterqueues/status,verbs=get;update;patch
   148  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=clusterqueues/finalizers,verbs=update
   149  
   150  func (r *ClusterQueueReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
   151  	var cqObj kueue.ClusterQueue
   152  	if err := r.client.Get(ctx, req.NamespacedName, &cqObj); err != nil {
   153  		// we'll ignore not-found errors, since there is nothing to do.
   154  		return ctrl.Result{}, client.IgnoreNotFound(err)
   155  	}
   156  	log := ctrl.LoggerFrom(ctx).WithValues("clusterQueue", klog.KObj(&cqObj))
   157  	ctx = ctrl.LoggerInto(ctx, log)
   158  	log.V(2).Info("Reconciling ClusterQueue")
   159  
   160  	if cqObj.ObjectMeta.DeletionTimestamp.IsZero() {
   161  		// Although we'll add the finalizer via webhook mutation now, this is still useful
   162  		// as a fallback.
   163  		if !controllerutil.ContainsFinalizer(&cqObj, kueue.ResourceInUseFinalizerName) {
   164  			controllerutil.AddFinalizer(&cqObj, kueue.ResourceInUseFinalizerName)
   165  			if err := r.client.Update(ctx, &cqObj); err != nil {
   166  				return ctrl.Result{}, client.IgnoreNotFound(err)
   167  			}
   168  		}
   169  	} else {
   170  		if !r.cache.ClusterQueueTerminating(cqObj.Name) {
   171  			r.cache.TerminateClusterQueue(cqObj.Name)
   172  		}
   173  
   174  		if controllerutil.ContainsFinalizer(&cqObj, kueue.ResourceInUseFinalizerName) {
   175  			// The clusterQueue is being deleted, remove the finalizer only if
   176  			// there are no active reserving workloads.
   177  			if r.cache.ClusterQueueEmpty(cqObj.Name) {
   178  				controllerutil.RemoveFinalizer(&cqObj, kueue.ResourceInUseFinalizerName)
   179  				if err := r.client.Update(ctx, &cqObj); err != nil {
   180  					return ctrl.Result{}, client.IgnoreNotFound(err)
   181  				}
   182  			}
   183  			return ctrl.Result{}, nil
   184  		}
   185  	}
   186  
   187  	newCQObj := cqObj.DeepCopy()
   188  	cqCondition, reason, msg := r.cache.ClusterQueueReadiness(newCQObj.Name)
   189  	if err := r.updateCqStatusIfChanged(ctx, newCQObj, cqCondition, reason, msg); err != nil {
   190  		return ctrl.Result{}, client.IgnoreNotFound(err)
   191  	}
   192  	return ctrl.Result{}, nil
   193  }
   194  
   195  func (r *ClusterQueueReconciler) NotifyWorkloadUpdate(oldWl, newWl *kueue.Workload) {
   196  	if oldWl != nil {
   197  		r.wlUpdateCh <- event.GenericEvent{Object: oldWl}
   198  		if newWl != nil && oldWl.Spec.QueueName != newWl.Spec.QueueName {
   199  			r.wlUpdateCh <- event.GenericEvent{Object: newWl}
   200  		}
   201  		return
   202  	}
   203  	if newWl != nil {
   204  		r.wlUpdateCh <- event.GenericEvent{Object: newWl}
   205  	}
   206  }
   207  
   208  func (r *ClusterQueueReconciler) notifyWatchers(oldCQ, newCQ *kueue.ClusterQueue) {
   209  	for _, w := range r.watchers {
   210  		w.NotifyClusterQueueUpdate(oldCQ, newCQ)
   211  	}
   212  }
   213  
   214  // NotifyResourceFlavorUpdate ignores updates since they have no impact on the ClusterQueue's readiness.
   215  func (r *ClusterQueueReconciler) NotifyResourceFlavorUpdate(oldRF, newRF *kueue.ResourceFlavor) {
   216  	// if oldRF is nil, it's a create event.
   217  	if oldRF == nil {
   218  		r.rfUpdateCh <- event.GenericEvent{Object: newRF}
   219  		return
   220  	}
   221  
   222  	// if newRF is nil, it's a delete event.
   223  	if newRF == nil {
   224  		r.rfUpdateCh <- event.GenericEvent{Object: oldRF}
   225  		return
   226  	}
   227  }
   228  
   229  func (r *ClusterQueueReconciler) NotifyAdmissionCheckUpdate(oldAc, newAc *kueue.AdmissionCheck) {
   230  	switch {
   231  	case oldAc != nil:
   232  		r.acUpdateCh <- event.GenericEvent{Object: oldAc}
   233  	case newAc != nil:
   234  		r.acUpdateCh <- event.GenericEvent{Object: newAc}
   235  	}
   236  }
   237  
   238  // Event handlers return true to signal the controller to reconcile the
   239  // ClusterQueue associated with the event.
   240  
   241  func (r *ClusterQueueReconciler) Create(e event.CreateEvent) bool {
   242  	cq, match := e.Object.(*kueue.ClusterQueue)
   243  	if !match {
   244  		// No need to interact with the cache for other objects.
   245  		return true
   246  	}
   247  	defer r.notifyWatchers(nil, cq)
   248  
   249  	log := r.log.WithValues("clusterQueue", klog.KObj(cq))
   250  	log.V(2).Info("ClusterQueue create event")
   251  	ctx := ctrl.LoggerInto(context.Background(), log)
   252  	if err := r.cache.AddClusterQueue(ctx, cq); err != nil {
   253  		log.Error(err, "Failed to add clusterQueue to cache")
   254  	}
   255  
   256  	if err := r.qManager.AddClusterQueue(ctx, cq); err != nil {
   257  		log.Error(err, "Failed to add clusterQueue to queue manager")
   258  	}
   259  
   260  	if r.reportResourceMetrics {
   261  		recordResourceMetrics(cq)
   262  	}
   263  
   264  	return true
   265  }
   266  
   267  func (r *ClusterQueueReconciler) Delete(e event.DeleteEvent) bool {
   268  	cq, match := e.Object.(*kueue.ClusterQueue)
   269  	if !match {
   270  		// No need to interact with the cache for other objects.
   271  		return true
   272  	}
   273  	defer r.notifyWatchers(cq, nil)
   274  
   275  	r.log.V(2).Info("ClusterQueue delete event", "clusterQueue", klog.KObj(cq))
   276  	r.cache.DeleteClusterQueue(cq)
   277  	r.qManager.DeleteClusterQueue(cq)
   278  	r.qManager.DeleteSnapshot(cq)
   279  
   280  	metrics.ClearClusterQueueResourceMetrics(cq.Name)
   281  	r.log.V(2).Info("Cleared resource metrics for deleted ClusterQueue.", "clusterQueue", klog.KObj(cq))
   282  
   283  	return true
   284  }
   285  
   286  func (r *ClusterQueueReconciler) Update(e event.UpdateEvent) bool {
   287  	oldCq, match := e.ObjectOld.(*kueue.ClusterQueue)
   288  	if !match {
   289  		// No need to interact with the cache for other objects.
   290  		return true
   291  	}
   292  	newCq, match := e.ObjectNew.(*kueue.ClusterQueue)
   293  	if !match {
   294  		// No need to interact with the cache for other objects.
   295  		return true
   296  	}
   297  
   298  	log := r.log.WithValues("clusterQueue", klog.KObj(newCq))
   299  	log.V(2).Info("ClusterQueue update event")
   300  
   301  	if newCq.DeletionTimestamp != nil {
   302  		return true
   303  	}
   304  	defer r.notifyWatchers(oldCq, newCq)
   305  	specUpdated := !equality.Semantic.DeepEqual(oldCq.Spec, newCq.Spec)
   306  
   307  	if err := r.cache.UpdateClusterQueue(newCq); err != nil {
   308  		log.Error(err, "Failed to update clusterQueue in cache")
   309  	}
   310  	if err := r.qManager.UpdateClusterQueue(context.Background(), newCq, specUpdated); err != nil {
   311  		log.Error(err, "Failed to update clusterQueue in queue manager")
   312  	}
   313  
   314  	if r.reportResourceMetrics {
   315  		updateResourceMetrics(oldCq, newCq)
   316  	}
   317  	return true
   318  }
   319  
   320  func (r *ClusterQueueReconciler) Generic(e event.GenericEvent) bool {
   321  	r.log.V(2).Info("Got generic event", "obj", klog.KObj(e.Object), "kind", e.Object.GetObjectKind().GroupVersionKind())
   322  	return true
   323  }
   324  
   325  func recordResourceMetrics(cq *kueue.ClusterQueue) {
   326  	for rgi := range cq.Spec.ResourceGroups {
   327  		rg := &cq.Spec.ResourceGroups[rgi]
   328  		for fqi := range rg.Flavors {
   329  			fq := &rg.Flavors[fqi]
   330  			for ri := range fq.Resources {
   331  				r := &fq.Resources[ri]
   332  				nominal := resource.QuantityToFloat(&r.NominalQuota)
   333  				borrow := resource.QuantityToFloat(r.BorrowingLimit)
   334  				lend := resource.QuantityToFloat(r.LendingLimit)
   335  				metrics.ReportClusterQueueQuotas(cq.Spec.Cohort, cq.Name, string(fq.Name), string(r.Name), nominal, borrow, lend)
   336  			}
   337  		}
   338  	}
   339  
   340  	for fri := range cq.Status.FlavorsReservation {
   341  		fr := &cq.Status.FlavorsReservation[fri]
   342  		for ri := range fr.Resources {
   343  			r := &fr.Resources[ri]
   344  			metrics.ReportClusterQueueResourceReservations(cq.Spec.Cohort, cq.Name, string(fr.Name), string(r.Name), resource.QuantityToFloat(&r.Total))
   345  		}
   346  	}
   347  
   348  	for fui := range cq.Status.FlavorsUsage {
   349  		fu := &cq.Status.FlavorsUsage[fui]
   350  		for ri := range fu.Resources {
   351  			r := &fu.Resources[ri]
   352  			metrics.ReportClusterQueueResourceUsage(cq.Spec.Cohort, cq.Name, string(fu.Name), string(r.Name), resource.QuantityToFloat(&r.Total))
   353  		}
   354  	}
   355  }
   356  
   357  func updateResourceMetrics(oldCq, newCq *kueue.ClusterQueue) {
   358  	// if the cohort changed, drop all the old metrics
   359  	if oldCq.Spec.Cohort != newCq.Spec.Cohort {
   360  		metrics.ClearClusterQueueResourceMetrics(oldCq.Name)
   361  	} else {
   362  		// selective remove
   363  		clearOldResourceQuotas(oldCq, newCq)
   364  	}
   365  	recordResourceMetrics(newCq)
   366  }
   367  
   368  func clearOldResourceQuotas(oldCq, newCq *kueue.ClusterQueue) {
   369  	for rgi := range oldCq.Spec.ResourceGroups {
   370  		oldRG := &oldCq.Spec.ResourceGroups[rgi]
   371  		newFlavors := map[kueue.ResourceFlavorReference]*kueue.FlavorQuotas{}
   372  		if rgi < len(newCq.Spec.ResourceGroups) && len(newCq.Spec.ResourceGroups[rgi].Flavors) > 0 {
   373  			newFlavors = slices.ToRefMap(newCq.Spec.ResourceGroups[rgi].Flavors, func(f *kueue.FlavorQuotas) kueue.ResourceFlavorReference { return f.Name })
   374  		}
   375  
   376  		for fi := range oldRG.Flavors {
   377  			flavor := &oldRG.Flavors[fi]
   378  			if newFlavor, found := newFlavors[flavor.Name]; !found || len(newFlavor.Resources) == 0 {
   379  				metrics.ClearClusterQueueResourceQuotas(oldCq.Name, string(flavor.Name), "")
   380  			} else {
   381  				// check all resources
   382  				newResources := slices.ToRefMap(newFlavor.Resources, func(r *kueue.ResourceQuota) corev1.ResourceName { return r.Name })
   383  				for ri := range flavor.Resources {
   384  					rname := flavor.Resources[ri].Name
   385  					if _, found := newResources[rname]; !found {
   386  						metrics.ClearClusterQueueResourceQuotas(oldCq.Name, string(flavor.Name), string(rname))
   387  					}
   388  				}
   389  			}
   390  		}
   391  	}
   392  
   393  	// reservation metrics
   394  	if len(oldCq.Status.FlavorsReservation) > 0 {
   395  		newFlavors := map[kueue.ResourceFlavorReference]*kueue.FlavorUsage{}
   396  		if len(newCq.Status.FlavorsReservation) > 0 {
   397  			newFlavors = slices.ToRefMap(newCq.Status.FlavorsReservation, func(f *kueue.FlavorUsage) kueue.ResourceFlavorReference { return f.Name })
   398  		}
   399  		for fi := range oldCq.Status.FlavorsReservation {
   400  			flavor := &oldCq.Status.FlavorsReservation[fi]
   401  			if newFlavor, found := newFlavors[flavor.Name]; !found || len(newFlavor.Resources) == 0 {
   402  				metrics.ClearClusterQueueResourceReservations(oldCq.Name, string(flavor.Name), "")
   403  			} else {
   404  				newResources := slices.ToRefMap(newFlavor.Resources, func(r *kueue.ResourceUsage) corev1.ResourceName { return r.Name })
   405  				for ri := range flavor.Resources {
   406  					rname := flavor.Resources[ri].Name
   407  					if _, found := newResources[rname]; !found {
   408  						metrics.ClearClusterQueueResourceReservations(oldCq.Name, string(flavor.Name), string(rname))
   409  					}
   410  				}
   411  			}
   412  		}
   413  	}
   414  
   415  	// usage metrics
   416  	if len(oldCq.Status.FlavorsUsage) > 0 {
   417  		newFlavors := map[kueue.ResourceFlavorReference]*kueue.FlavorUsage{}
   418  		if len(newCq.Status.FlavorsUsage) > 0 {
   419  			newFlavors = slices.ToRefMap(newCq.Status.FlavorsUsage, func(f *kueue.FlavorUsage) kueue.ResourceFlavorReference { return f.Name })
   420  		}
   421  		for fi := range oldCq.Status.FlavorsUsage {
   422  			flavor := &oldCq.Status.FlavorsUsage[fi]
   423  			if newFlavor, found := newFlavors[flavor.Name]; !found || len(newFlavor.Resources) == 0 {
   424  				metrics.ClearClusterQueueResourceUsage(oldCq.Name, string(flavor.Name), "")
   425  			} else {
   426  				newResources := slices.ToRefMap(newFlavor.Resources, func(r *kueue.ResourceUsage) corev1.ResourceName { return r.Name })
   427  				for ri := range flavor.Resources {
   428  					rname := flavor.Resources[ri].Name
   429  					if _, found := newResources[rname]; !found {
   430  						metrics.ClearClusterQueueResourceUsage(oldCq.Name, string(flavor.Name), string(rname))
   431  					}
   432  				}
   433  			}
   434  		}
   435  	}
   436  }
   437  
   438  // cqWorkloadHandler signals the controller to reconcile the ClusterQueue
   439  // associated to the workload in the event.
   440  // Since the events come from a channel Source, only the Generic handler will
   441  // receive events.
   442  type cqWorkloadHandler struct {
   443  	qManager *queue.Manager
   444  }
   445  
   446  func (h *cqWorkloadHandler) Create(context.Context, event.CreateEvent, workqueue.RateLimitingInterface) {
   447  }
   448  
   449  func (h *cqWorkloadHandler) Update(context.Context, event.UpdateEvent, workqueue.RateLimitingInterface) {
   450  }
   451  
   452  func (h *cqWorkloadHandler) Delete(context.Context, event.DeleteEvent, workqueue.RateLimitingInterface) {
   453  }
   454  
   455  func (h *cqWorkloadHandler) Generic(_ context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) {
   456  	w := e.Object.(*kueue.Workload)
   457  	req := h.requestForWorkloadClusterQueue(w)
   458  	if req != nil {
   459  		q.AddAfter(*req, constants.UpdatesBatchPeriod)
   460  	}
   461  }
   462  
   463  func (h *cqWorkloadHandler) requestForWorkloadClusterQueue(w *kueue.Workload) *reconcile.Request {
   464  	var name string
   465  	if workload.HasQuotaReservation(w) {
   466  		name = string(w.Status.Admission.ClusterQueue)
   467  	} else {
   468  		var ok bool
   469  		name, ok = h.qManager.ClusterQueueForWorkload(w)
   470  		if !ok {
   471  			return nil
   472  		}
   473  	}
   474  	return &reconcile.Request{
   475  		NamespacedName: types.NamespacedName{
   476  			Name: name,
   477  		},
   478  	}
   479  }
   480  
   481  // cqNamespaceHandler handles namespace update events.
   482  type cqNamespaceHandler struct {
   483  	qManager *queue.Manager
   484  	cache    *cache.Cache
   485  }
   486  
   487  func (h *cqNamespaceHandler) Create(ctx context.Context, e event.CreateEvent, q workqueue.RateLimitingInterface) {
   488  }
   489  
   490  func (h *cqNamespaceHandler) Update(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) {
   491  	oldNs := e.ObjectOld.(*corev1.Namespace)
   492  	oldMatchingCqs := h.cache.MatchingClusterQueues(oldNs.Labels)
   493  	newNs := e.ObjectNew.(*corev1.Namespace)
   494  	newMatchingCqs := h.cache.MatchingClusterQueues(newNs.Labels)
   495  	cqs := sets.New[string]()
   496  	for cq := range newMatchingCqs {
   497  		if !oldMatchingCqs.Has(cq) {
   498  			cqs.Insert(cq)
   499  		}
   500  	}
   501  	h.qManager.QueueInadmissibleWorkloads(ctx, cqs)
   502  }
   503  
   504  func (h *cqNamespaceHandler) Delete(context.Context, event.DeleteEvent, workqueue.RateLimitingInterface) {
   505  }
   506  
   507  func (h *cqNamespaceHandler) Generic(context.Context, event.GenericEvent, workqueue.RateLimitingInterface) {
   508  }
   509  
   510  type cqResourceFlavorHandler struct {
   511  	cache *cache.Cache
   512  }
   513  
   514  func (h *cqResourceFlavorHandler) Create(context.Context, event.CreateEvent, workqueue.RateLimitingInterface) {
   515  }
   516  
   517  func (h *cqResourceFlavorHandler) Update(context.Context, event.UpdateEvent, workqueue.RateLimitingInterface) {
   518  }
   519  
   520  func (h *cqResourceFlavorHandler) Delete(context.Context, event.DeleteEvent, workqueue.RateLimitingInterface) {
   521  }
   522  
   523  func (h *cqResourceFlavorHandler) Generic(_ context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) {
   524  	rf, ok := e.Object.(*kueue.ResourceFlavor)
   525  	if !ok {
   526  		return
   527  	}
   528  
   529  	if cqs := h.cache.ClusterQueuesUsingFlavor(rf.Name); len(cqs) != 0 {
   530  		for _, cq := range cqs {
   531  			req := reconcile.Request{
   532  				NamespacedName: types.NamespacedName{
   533  					Name: cq,
   534  				}}
   535  			q.Add(req)
   536  		}
   537  	}
   538  }
   539  
   540  type cqAdmissionCheckHandler struct {
   541  	cache *cache.Cache
   542  }
   543  
   544  type cqSnapshotHandler struct {
   545  	queueVisibilityUpdateInterval time.Duration
   546  }
   547  
   548  func (h *cqAdmissionCheckHandler) Create(context.Context, event.CreateEvent, workqueue.RateLimitingInterface) {
   549  }
   550  
   551  func (h *cqAdmissionCheckHandler) Update(context.Context, event.UpdateEvent, workqueue.RateLimitingInterface) {
   552  }
   553  
   554  func (h *cqAdmissionCheckHandler) Delete(context.Context, event.DeleteEvent, workqueue.RateLimitingInterface) {
   555  }
   556  
   557  func (h *cqAdmissionCheckHandler) Generic(_ context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) {
   558  	ac, isAc := e.Object.(*kueue.AdmissionCheck)
   559  	if !isAc {
   560  		return
   561  	}
   562  
   563  	if cqs := h.cache.ClusterQueuesUsingAdmissionCheck(ac.Name); len(cqs) != 0 {
   564  		for _, cq := range cqs {
   565  			req := reconcile.Request{
   566  				NamespacedName: types.NamespacedName{
   567  					Name: cq,
   568  				}}
   569  			q.Add(req)
   570  		}
   571  	}
   572  }
   573  
   574  func (h *cqSnapshotHandler) Create(context.Context, event.CreateEvent, workqueue.RateLimitingInterface) {
   575  }
   576  
   577  func (h *cqSnapshotHandler) Update(context.Context, event.UpdateEvent, workqueue.RateLimitingInterface) {
   578  }
   579  
   580  func (h *cqSnapshotHandler) Delete(context.Context, event.DeleteEvent, workqueue.RateLimitingInterface) {
   581  }
   582  
   583  func (h *cqSnapshotHandler) Generic(_ context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) {
   584  	cq, isCq := e.Object.(*kueue.ClusterQueue)
   585  	if !isCq {
   586  		return
   587  	}
   588  	remainingTime := constants.UpdatesBatchPeriod
   589  	if cq.Status.PendingWorkloadsStatus != nil {
   590  		remainingTime = h.queueVisibilityUpdateInterval - time.Since(cq.Status.PendingWorkloadsStatus.LastChangeTime.Time)
   591  		if remainingTime <= constants.UpdatesBatchPeriod {
   592  			remainingTime = constants.UpdatesBatchPeriod
   593  		}
   594  	}
   595  	q.AddAfter(reconcile.Request{
   596  		NamespacedName: types.NamespacedName{
   597  			Name: cq.Name,
   598  		}}, remainingTime)
   599  }
   600  
   601  // SetupWithManager sets up the controller with the Manager.
   602  func (r *ClusterQueueReconciler) SetupWithManager(mgr ctrl.Manager, cfg *config.Configuration) error {
   603  	wHandler := cqWorkloadHandler{
   604  		qManager: r.qManager,
   605  	}
   606  	nsHandler := cqNamespaceHandler{
   607  		qManager: r.qManager,
   608  		cache:    r.cache,
   609  	}
   610  	rfHandler := cqResourceFlavorHandler{
   611  		cache: r.cache,
   612  	}
   613  	acHandler := cqAdmissionCheckHandler{
   614  		cache: r.cache,
   615  	}
   616  	snapHandler := cqSnapshotHandler{
   617  		queueVisibilityUpdateInterval: r.queueVisibilityUpdateInterval,
   618  	}
   619  	return ctrl.NewControllerManagedBy(mgr).
   620  		For(&kueue.ClusterQueue{}).
   621  		WithOptions(controller.Options{NeedLeaderElection: ptr.To(false)}).
   622  		Watches(&corev1.Namespace{}, &nsHandler).
   623  		WatchesRawSource(&source.Channel{Source: r.wlUpdateCh}, &wHandler).
   624  		WatchesRawSource(&source.Channel{Source: r.rfUpdateCh}, &rfHandler).
   625  		WatchesRawSource(&source.Channel{Source: r.acUpdateCh}, &acHandler).
   626  		WatchesRawSource(&source.Channel{Source: r.snapUpdateCh}, &snapHandler).
   627  		WithEventFilter(r).
   628  		Complete(WithLeadingManager(mgr, r, &kueue.ClusterQueue{}, cfg))
   629  }
   630  
   631  func (r *ClusterQueueReconciler) updateCqStatusIfChanged(
   632  	ctx context.Context,
   633  	cq *kueue.ClusterQueue,
   634  	conditionStatus metav1.ConditionStatus,
   635  	reason, msg string,
   636  ) error {
   637  	oldStatus := cq.Status.DeepCopy()
   638  	pendingWorkloads := r.qManager.Pending(cq)
   639  	stats, err := r.cache.Usage(cq)
   640  	if err != nil {
   641  		r.log.Error(err, "Failed getting usage from cache")
   642  		// This is likely because the cluster queue was recently removed,
   643  		// but we didn't process that event yet.
   644  		return err
   645  	}
   646  	cq.Status.FlavorsReservation = stats.ReservedResources
   647  	cq.Status.FlavorsUsage = stats.AdmittedResources
   648  	cq.Status.ReservingWorkloads = int32(stats.ReservingWorkloads)
   649  	cq.Status.AdmittedWorkloads = int32(stats.AdmittedWorkloads)
   650  	cq.Status.PendingWorkloads = int32(pendingWorkloads)
   651  	cq.Status.PendingWorkloadsStatus = r.getWorkloadsStatus(cq)
   652  	meta.SetStatusCondition(&cq.Status.Conditions, metav1.Condition{
   653  		Type:    kueue.ClusterQueueActive,
   654  		Status:  conditionStatus,
   655  		Reason:  reason,
   656  		Message: msg,
   657  	})
   658  	if !equality.Semantic.DeepEqual(cq.Status, oldStatus) {
   659  		return r.client.Status().Update(ctx, cq)
   660  	}
   661  	return nil
   662  }
   663  
   664  // Taking snapshot of cluster queue is enabled when maxcount non-zero
   665  func (r *ClusterQueueReconciler) isVisibilityEnabled() bool {
   666  	return features.Enabled(features.QueueVisibility) && r.queueVisibilityClusterQueuesMaxCount > 0
   667  }
   668  
   669  func (r *ClusterQueueReconciler) getWorkloadsStatus(cq *kueue.ClusterQueue) *kueue.ClusterQueuePendingWorkloadsStatus {
   670  	if !r.isVisibilityEnabled() {
   671  		return nil
   672  	}
   673  	pendingWorkloads := r.qManager.GetSnapshot(cq.Name)
   674  	if cq.Status.PendingWorkloadsStatus == nil ||
   675  		cq.Status.PendingWorkloadsStatus.Head == nil ||
   676  		!equality.Semantic.DeepEqual(cq.Status.PendingWorkloadsStatus.Head, pendingWorkloads) {
   677  		return &kueue.ClusterQueuePendingWorkloadsStatus{
   678  			Head:           pendingWorkloads,
   679  			LastChangeTime: metav1.Time{Time: time.Now()},
   680  		}
   681  	}
   682  	return cq.Status.PendingWorkloadsStatus
   683  }
   684  
   685  func (r *ClusterQueueReconciler) Start(ctx context.Context) error {
   686  	if !r.isVisibilityEnabled() {
   687  		return nil
   688  	}
   689  
   690  	defer r.snapshotsQueue.ShutDown()
   691  
   692  	for i := 0; i < snapshotWorkers; i++ {
   693  		go wait.UntilWithContext(ctx, r.takeSnapshot, r.queueVisibilityUpdateInterval)
   694  	}
   695  
   696  	go wait.UntilWithContext(ctx, r.enqueueTakeSnapshot, r.queueVisibilityUpdateInterval)
   697  
   698  	<-ctx.Done()
   699  
   700  	return nil
   701  }
   702  
   703  func (r *ClusterQueueReconciler) enqueueTakeSnapshot(ctx context.Context) {
   704  	for _, cq := range r.qManager.GetClusterQueueNames() {
   705  		r.snapshotsQueue.Add(cq)
   706  	}
   707  }
   708  
   709  func (r *ClusterQueueReconciler) takeSnapshot(ctx context.Context) {
   710  	for r.processNextSnapshot(ctx) {
   711  	}
   712  }
   713  
   714  func (r *ClusterQueueReconciler) processNextSnapshot(ctx context.Context) bool {
   715  	log := ctrl.LoggerFrom(ctx).WithName("processNextSnapshot")
   716  
   717  	key, quit := r.snapshotsQueue.Get()
   718  	if quit {
   719  		return false
   720  	}
   721  
   722  	startTime := time.Now()
   723  	defer func() {
   724  		log.V(5).Info("Finished snapshot job", "key", key, "elapsed", time.Since(startTime))
   725  	}()
   726  
   727  	defer r.snapshotsQueue.Done(key)
   728  
   729  	cqName := key.(string)
   730  	if r.qManager.UpdateSnapshot(cqName, r.queueVisibilityClusterQueuesMaxCount) {
   731  		log.V(5).Info("Triggering CQ update due to snapshot change", "clusterQueue", klog.KRef("", cqName))
   732  		r.snapUpdateCh <- event.GenericEvent{Object: &kueue.ClusterQueue{
   733  			ObjectMeta: metav1.ObjectMeta{
   734  				Name: cqName,
   735  			},
   736  		}}
   737  	}
   738  	return true
   739  }