sigs.k8s.io/kueue@v0.6.2/pkg/controller/core/localqueue_controller.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package core
    18  
    19  import (
    20  	"context"
    21  
    22  	"github.com/go-logr/logr"
    23  	"k8s.io/apimachinery/pkg/api/equality"
    24  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    25  	"k8s.io/apimachinery/pkg/api/meta"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/apimachinery/pkg/types"
    28  	"k8s.io/client-go/util/workqueue"
    29  	"k8s.io/klog/v2"
    30  	"k8s.io/utils/ptr"
    31  	ctrl "sigs.k8s.io/controller-runtime"
    32  	"sigs.k8s.io/controller-runtime/pkg/client"
    33  	"sigs.k8s.io/controller-runtime/pkg/controller"
    34  	"sigs.k8s.io/controller-runtime/pkg/event"
    35  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    36  	"sigs.k8s.io/controller-runtime/pkg/source"
    37  
    38  	config "sigs.k8s.io/kueue/apis/config/v1beta1"
    39  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    40  	"sigs.k8s.io/kueue/pkg/cache"
    41  	"sigs.k8s.io/kueue/pkg/constants"
    42  	"sigs.k8s.io/kueue/pkg/controller/core/indexer"
    43  	"sigs.k8s.io/kueue/pkg/queue"
    44  )
    45  
    46  const (
    47  	queueIsInactiveMsg      = "Can't submit new workloads to clusterQueue"
    48  	failedUpdateLqStatusMsg = "Failed to retrieve localQueue status"
    49  )
    50  
    51  // LocalQueueReconciler reconciles a LocalQueue object
    52  type LocalQueueReconciler struct {
    53  	client     client.Client
    54  	log        logr.Logger
    55  	queues     *queue.Manager
    56  	cache      *cache.Cache
    57  	wlUpdateCh chan event.GenericEvent
    58  }
    59  
    60  func NewLocalQueueReconciler(client client.Client, queues *queue.Manager, cache *cache.Cache) *LocalQueueReconciler {
    61  	return &LocalQueueReconciler{
    62  		log:        ctrl.Log.WithName("localqueue-reconciler"),
    63  		queues:     queues,
    64  		cache:      cache,
    65  		client:     client,
    66  		wlUpdateCh: make(chan event.GenericEvent, updateChBuffer),
    67  	}
    68  }
    69  
    70  func (r *LocalQueueReconciler) NotifyWorkloadUpdate(oldWl, newWl *kueue.Workload) {
    71  	if oldWl != nil {
    72  		r.wlUpdateCh <- event.GenericEvent{Object: oldWl}
    73  		if newWl != nil && oldWl.Spec.QueueName != newWl.Spec.QueueName {
    74  			r.wlUpdateCh <- event.GenericEvent{Object: newWl}
    75  		}
    76  		return
    77  	}
    78  	if newWl != nil {
    79  		r.wlUpdateCh <- event.GenericEvent{Object: newWl}
    80  	}
    81  }
    82  
    83  // +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update;patch
    84  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=localqueues,verbs=get;list;watch;create;update;patch;delete
    85  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=localqueues/status,verbs=get;update;patch
    86  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=localqueues/finalizers,verbs=update
    87  
    88  func (r *LocalQueueReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
    89  	var queueObj kueue.LocalQueue
    90  	if err := r.client.Get(ctx, req.NamespacedName, &queueObj); err != nil {
    91  		// we'll ignore not-found errors, since there is nothing to do.
    92  		return ctrl.Result{}, client.IgnoreNotFound(err)
    93  	}
    94  	log := ctrl.LoggerFrom(ctx).WithValues("localQueue", klog.KObj(&queueObj))
    95  	ctx = ctrl.LoggerInto(ctx, log)
    96  	log.V(2).Info("Reconciling LocalQueue")
    97  
    98  	var cq kueue.ClusterQueue
    99  	err := r.client.Get(ctx, client.ObjectKey{Name: string(queueObj.Spec.ClusterQueue)}, &cq)
   100  	if err != nil {
   101  		if apierrors.IsNotFound(err) {
   102  			err = r.UpdateStatusIfChanged(ctx, &queueObj, metav1.ConditionFalse, "ClusterQueueDoesNotExist", queueIsInactiveMsg)
   103  		}
   104  		return ctrl.Result{}, client.IgnoreNotFound(err)
   105  	}
   106  	if meta.IsStatusConditionTrue(cq.Status.Conditions, kueue.ClusterQueueActive) {
   107  		err = r.UpdateStatusIfChanged(ctx, &queueObj, metav1.ConditionTrue, "Ready", "Can submit new workloads to clusterQueue")
   108  		return ctrl.Result{}, client.IgnoreNotFound(err)
   109  	}
   110  	err = r.UpdateStatusIfChanged(ctx, &queueObj, metav1.ConditionFalse, "ClusterQueueIsInactive", queueIsInactiveMsg)
   111  	return ctrl.Result{}, client.IgnoreNotFound(err)
   112  }
   113  
   114  func (r *LocalQueueReconciler) Create(e event.CreateEvent) bool {
   115  	q, match := e.Object.(*kueue.LocalQueue)
   116  	if !match {
   117  		// No need to interact with the queue manager for other objects.
   118  		return true
   119  	}
   120  	log := r.log.WithValues("localQueue", klog.KObj(q))
   121  	log.V(2).Info("LocalQueue create event")
   122  	ctx := logr.NewContext(context.Background(), log)
   123  	if err := r.queues.AddLocalQueue(ctx, q); err != nil {
   124  		log.Error(err, "Failed to add localQueue to the queueing system")
   125  	}
   126  	if err := r.cache.AddLocalQueue(q); err != nil {
   127  		log.Error(err, "Failed to add localQueue to the cache")
   128  	}
   129  	return true
   130  }
   131  
   132  func (r *LocalQueueReconciler) Delete(e event.DeleteEvent) bool {
   133  	q, match := e.Object.(*kueue.LocalQueue)
   134  	if !match {
   135  		// No need to interact with the queue manager for other objects.
   136  		return true
   137  	}
   138  	r.log.V(2).Info("LocalQueue delete event", "localQueue", klog.KObj(q))
   139  	r.queues.DeleteLocalQueue(q)
   140  	r.cache.DeleteLocalQueue(q)
   141  	return true
   142  }
   143  
   144  func (r *LocalQueueReconciler) Update(e event.UpdateEvent) bool {
   145  	q, match := e.ObjectNew.(*kueue.LocalQueue)
   146  	if !match {
   147  		// No need to interact with the queue manager for other objects.
   148  		return true
   149  	}
   150  	log := r.log.WithValues("localQueue", klog.KObj(q))
   151  	log.V(2).Info("Queue update event")
   152  	if err := r.queues.UpdateLocalQueue(q); err != nil {
   153  		log.Error(err, "Failed to update queue in the queueing system")
   154  	}
   155  	oldQ := e.ObjectOld.(*kueue.LocalQueue)
   156  	if err := r.cache.UpdateLocalQueue(oldQ, q); err != nil {
   157  		log.Error(err, "Failed to update localQueue in the cache")
   158  	}
   159  	return true
   160  }
   161  
   162  func (r *LocalQueueReconciler) Generic(e event.GenericEvent) bool {
   163  	r.log.V(3).Info("Got Workload event", "workload", klog.KObj(e.Object))
   164  	return true
   165  }
   166  
   167  // qWorkloadHandler signals the controller to reconcile the Queue associated
   168  // to the workload in the event.
   169  // Since the events come from a channel Source, only the Generic handler will
   170  // receive events.
   171  type qWorkloadHandler struct{}
   172  
   173  func (h *qWorkloadHandler) Create(context.Context, event.CreateEvent, workqueue.RateLimitingInterface) {
   174  }
   175  
   176  func (h *qWorkloadHandler) Update(context.Context, event.UpdateEvent, workqueue.RateLimitingInterface) {
   177  }
   178  
   179  func (h *qWorkloadHandler) Delete(context.Context, event.DeleteEvent, workqueue.RateLimitingInterface) {
   180  }
   181  
   182  func (h *qWorkloadHandler) Generic(_ context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) {
   183  	w := e.Object.(*kueue.Workload)
   184  	if w.Name == "" {
   185  		return
   186  	}
   187  	req := reconcile.Request{
   188  		NamespacedName: types.NamespacedName{
   189  			Name:      w.Spec.QueueName,
   190  			Namespace: w.Namespace,
   191  		},
   192  	}
   193  	q.AddAfter(req, constants.UpdatesBatchPeriod)
   194  }
   195  
   196  // qCQHandler signals the controller to reconcile the Queue associated
   197  // to the workload in the event.
   198  type qCQHandler struct {
   199  	client client.Client
   200  }
   201  
   202  func (h *qCQHandler) Create(ctx context.Context, e event.CreateEvent, wq workqueue.RateLimitingInterface) {
   203  	cq, ok := e.Object.(*kueue.ClusterQueue)
   204  	if !ok {
   205  		return
   206  	}
   207  	h.addLocalQueueToWorkQueue(ctx, cq, wq)
   208  }
   209  
   210  func (h *qCQHandler) Update(ctx context.Context, e event.UpdateEvent, wq workqueue.RateLimitingInterface) {
   211  	newCq, ok := e.ObjectNew.(*kueue.ClusterQueue)
   212  	if !ok {
   213  		return
   214  	}
   215  	oldCq, ok := e.ObjectOld.(*kueue.ClusterQueue)
   216  	if !ok {
   217  		return
   218  	}
   219  	// Iff .status.conditions of the clusterQueue is updated,
   220  	// this handler sends all queues related to the clusterQueue to workqueue.
   221  	if equality.Semantic.DeepEqual(oldCq.Status.Conditions, newCq.Status.Conditions) {
   222  		return
   223  	}
   224  	h.addLocalQueueToWorkQueue(ctx, newCq, wq)
   225  }
   226  
   227  func (h *qCQHandler) Delete(ctx context.Context, e event.DeleteEvent, wq workqueue.RateLimitingInterface) {
   228  	cq, ok := e.Object.(*kueue.ClusterQueue)
   229  	if !ok {
   230  		return
   231  	}
   232  	h.addLocalQueueToWorkQueue(ctx, cq, wq)
   233  }
   234  
   235  func (h *qCQHandler) Generic(context.Context, event.GenericEvent, workqueue.RateLimitingInterface) {
   236  }
   237  
   238  func (h *qCQHandler) addLocalQueueToWorkQueue(ctx context.Context, cq *kueue.ClusterQueue, wq workqueue.RateLimitingInterface) {
   239  	log := ctrl.LoggerFrom(ctx).WithValues("clusterQueue", klog.KObj(cq))
   240  	ctx = ctrl.LoggerInto(ctx, log)
   241  
   242  	var queues kueue.LocalQueueList
   243  	err := h.client.List(ctx, &queues, client.MatchingFields{indexer.QueueClusterQueueKey: cq.Name})
   244  	if err != nil {
   245  		log.Error(err, "Could not list queues that match the clusterQueue")
   246  		return
   247  	}
   248  	for _, q := range queues.Items {
   249  		wq.Add(reconcile.Request{NamespacedName: client.ObjectKeyFromObject(&q)})
   250  	}
   251  }
   252  
   253  // SetupWithManager sets up the controller with the Manager.
   254  func (r *LocalQueueReconciler) SetupWithManager(mgr ctrl.Manager, cfg *config.Configuration) error {
   255  	queueCQHandler := qCQHandler{
   256  		client: r.client,
   257  	}
   258  	return ctrl.NewControllerManagedBy(mgr).
   259  		For(&kueue.LocalQueue{}).
   260  		WithOptions(controller.Options{NeedLeaderElection: ptr.To(false)}).
   261  		WatchesRawSource(&source.Channel{Source: r.wlUpdateCh}, &qWorkloadHandler{}).
   262  		Watches(&kueue.ClusterQueue{}, &queueCQHandler).
   263  		WithEventFilter(r).
   264  		Complete(WithLeadingManager(mgr, r, &kueue.LocalQueue{}, cfg))
   265  }
   266  
   267  func (r *LocalQueueReconciler) UpdateStatusIfChanged(
   268  	ctx context.Context,
   269  	queue *kueue.LocalQueue,
   270  	conditionStatus metav1.ConditionStatus,
   271  	reason, msg string,
   272  ) error {
   273  	oldStatus := queue.Status.DeepCopy()
   274  	pendingWls, err := r.queues.PendingWorkloads(queue)
   275  	if err != nil {
   276  		r.log.Error(err, failedUpdateLqStatusMsg)
   277  		return err
   278  	}
   279  	stats, err := r.cache.LocalQueueUsage(queue)
   280  	if err != nil {
   281  		r.log.Error(err, failedUpdateLqStatusMsg)
   282  		return err
   283  	}
   284  	queue.Status.PendingWorkloads = pendingWls
   285  	queue.Status.ReservingWorkloads = int32(stats.ReservingWorkloads)
   286  	queue.Status.AdmittedWorkloads = int32(stats.AdmittedWorkloads)
   287  	queue.Status.FlavorsReservation = stats.ReservedResources
   288  	queue.Status.FlavorUsage = stats.AdmittedResources
   289  	if len(conditionStatus) != 0 && len(reason) != 0 && len(msg) != 0 {
   290  		meta.SetStatusCondition(&queue.Status.Conditions, metav1.Condition{
   291  			Type:    kueue.LocalQueueActive,
   292  			Status:  conditionStatus,
   293  			Reason:  reason,
   294  			Message: msg,
   295  		})
   296  	}
   297  	if !equality.Semantic.DeepEqual(oldStatus, queue.Status) {
   298  		return r.client.Status().Update(ctx, queue)
   299  	}
   300  	return nil
   301  }