sigs.k8s.io/kueue@v0.6.2/pkg/queue/cluster_queue_impl.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package queue
    18  
    19  import (
    20  	"context"
    21  	"sort"
    22  	"sync"
    23  
    24  	corev1 "k8s.io/api/core/v1"
    25  	"k8s.io/apimachinery/pkg/api/equality"
    26  	apimeta "k8s.io/apimachinery/pkg/api/meta"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/labels"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	"k8s.io/utils/clock"
    31  	"sigs.k8s.io/controller-runtime/pkg/client"
    32  
    33  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    34  	"sigs.k8s.io/kueue/pkg/util/heap"
    35  	"sigs.k8s.io/kueue/pkg/workload"
    36  )
    37  
    38  // clusterQueueBase is an incomplete base implementation of ClusterQueue
    39  // interface. It can be inherited and overwritten by other types.
    40  type clusterQueueBase struct {
    41  	heap              heap.Heap
    42  	cohort            string
    43  	namespaceSelector labels.Selector
    44  	active            bool
    45  
    46  	// inadmissibleWorkloads are workloads that have been tried at least once and couldn't be admitted.
    47  	inadmissibleWorkloads map[string]*workload.Info
    48  
    49  	// popCycle identifies the last call to Pop. It's incremented when calling Pop.
    50  	// popCycle and queueInadmissibleCycle are used to track when there is a requeuing
    51  	// of inadmissible workloads while a workload is being scheduled.
    52  	popCycle int64
    53  
    54  	// queueInadmissibleCycle stores the popId at the time when
    55  	// QueueInadmissibleWorkloads is called.
    56  	queueInadmissibleCycle int64
    57  
    58  	lessFunc func(a, b interface{}) bool
    59  
    60  	rwm sync.RWMutex
    61  
    62  	clock clock.Clock
    63  }
    64  
    65  func newClusterQueueImpl(
    66  	keyFunc func(obj interface{}) string,
    67  	lessFunc func(a, b interface{}) bool,
    68  	clock clock.Clock,
    69  ) *clusterQueueBase {
    70  	return &clusterQueueBase{
    71  		heap:                   heap.New(keyFunc, lessFunc),
    72  		inadmissibleWorkloads:  make(map[string]*workload.Info),
    73  		queueInadmissibleCycle: -1,
    74  		lessFunc:               lessFunc,
    75  		rwm:                    sync.RWMutex{},
    76  		clock:                  clock,
    77  	}
    78  }
    79  
    80  func (c *clusterQueueBase) Update(apiCQ *kueue.ClusterQueue) error {
    81  	c.rwm.Lock()
    82  	defer c.rwm.Unlock()
    83  	c.cohort = apiCQ.Spec.Cohort
    84  	nsSelector, err := metav1.LabelSelectorAsSelector(apiCQ.Spec.NamespaceSelector)
    85  	if err != nil {
    86  		return err
    87  	}
    88  	c.namespaceSelector = nsSelector
    89  	c.active = apimeta.IsStatusConditionTrue(apiCQ.Status.Conditions, kueue.ClusterQueueActive)
    90  	return nil
    91  }
    92  
    93  func (c *clusterQueueBase) Cohort() string {
    94  	return c.cohort
    95  }
    96  
    97  func (c *clusterQueueBase) AddFromLocalQueue(q *LocalQueue) bool {
    98  	c.rwm.Lock()
    99  	defer c.rwm.Unlock()
   100  	added := false
   101  	for _, info := range q.items {
   102  		if c.heap.PushIfNotPresent(info) {
   103  			added = true
   104  		}
   105  	}
   106  	return added
   107  }
   108  
   109  func (c *clusterQueueBase) PushOrUpdate(wInfo *workload.Info) {
   110  	c.rwm.Lock()
   111  	defer c.rwm.Unlock()
   112  	key := workload.Key(wInfo.Obj)
   113  	oldInfo := c.inadmissibleWorkloads[key]
   114  	if oldInfo != nil {
   115  		// update in place if the workload was inadmissible and didn't change
   116  		// to potentially become admissible, unless the Eviction status changed
   117  		// which can affect the workloads order in the queue.
   118  		if equality.Semantic.DeepEqual(oldInfo.Obj.Spec, wInfo.Obj.Spec) &&
   119  			equality.Semantic.DeepEqual(oldInfo.Obj.Status.ReclaimablePods, wInfo.Obj.Status.ReclaimablePods) &&
   120  			equality.Semantic.DeepEqual(apimeta.FindStatusCondition(oldInfo.Obj.Status.Conditions, kueue.WorkloadEvicted),
   121  				apimeta.FindStatusCondition(wInfo.Obj.Status.Conditions, kueue.WorkloadEvicted)) {
   122  			c.inadmissibleWorkloads[key] = wInfo
   123  			return
   124  		}
   125  		// otherwise move or update in place in the queue.
   126  		delete(c.inadmissibleWorkloads, key)
   127  	}
   128  	if c.heap.GetByKey(key) == nil && !c.backoffWaitingTimeExpired(wInfo) {
   129  		c.inadmissibleWorkloads[key] = wInfo
   130  		return
   131  	}
   132  	c.heap.PushOrUpdate(wInfo)
   133  }
   134  
   135  // backoffWaitingTimeExpired returns true if the current time is after the requeueAt.
   136  func (c *clusterQueueBase) backoffWaitingTimeExpired(wInfo *workload.Info) bool {
   137  	if wInfo.Obj.Status.RequeueState == nil || wInfo.Obj.Status.RequeueState.RequeueAt == nil {
   138  		return true
   139  	}
   140  	if _, evictedByTimeout := workload.IsEvictedByPodsReadyTimeout(wInfo.Obj); !evictedByTimeout {
   141  		return true
   142  	}
   143  	// It needs to verify the requeueAt by "Equal" function
   144  	// since the "After" function evaluates the nanoseconds despite the metav1.Time is seconds level precision.
   145  	return c.clock.Now().After(wInfo.Obj.Status.RequeueState.RequeueAt.Time) ||
   146  		c.clock.Now().Equal(wInfo.Obj.Status.RequeueState.RequeueAt.Time)
   147  }
   148  
   149  func (c *clusterQueueBase) Delete(w *kueue.Workload) {
   150  	key := workload.Key(w)
   151  	delete(c.inadmissibleWorkloads, key)
   152  	c.heap.Delete(key)
   153  }
   154  
   155  func (c *clusterQueueBase) DeleteFromLocalQueue(q *LocalQueue) {
   156  	c.rwm.Lock()
   157  	defer c.rwm.Unlock()
   158  	for _, w := range q.items {
   159  		key := workload.Key(w.Obj)
   160  		if wl := c.inadmissibleWorkloads[key]; wl != nil {
   161  			delete(c.inadmissibleWorkloads, key)
   162  		}
   163  	}
   164  	for _, w := range q.items {
   165  		c.Delete(w.Obj)
   166  	}
   167  }
   168  
   169  // requeueIfNotPresent inserts a workload that cannot be admitted into
   170  // ClusterQueue, unless it is already in the queue. If immediate is true
   171  // or if there was a call to QueueInadmissibleWorkloads after a call to Pop,
   172  // the workload will be pushed back to heap directly. Otherwise, the workload
   173  // will be put into the inadmissibleWorkloads.
   174  func (c *clusterQueueBase) requeueIfNotPresent(wInfo *workload.Info, immediate bool) bool {
   175  	c.rwm.Lock()
   176  	defer c.rwm.Unlock()
   177  	key := workload.Key(wInfo.Obj)
   178  	if c.backoffWaitingTimeExpired(wInfo) &&
   179  		(immediate || c.queueInadmissibleCycle >= c.popCycle || wInfo.LastAssignment.PendingFlavors()) {
   180  		// If the workload was inadmissible, move it back into the queue.
   181  		inadmissibleWl := c.inadmissibleWorkloads[key]
   182  		if inadmissibleWl != nil {
   183  			wInfo = inadmissibleWl
   184  			delete(c.inadmissibleWorkloads, key)
   185  		}
   186  		return c.heap.PushIfNotPresent(wInfo)
   187  	}
   188  
   189  	if c.inadmissibleWorkloads[key] != nil {
   190  		return false
   191  	}
   192  
   193  	if data := c.heap.GetByKey(key); data != nil {
   194  		return false
   195  	}
   196  
   197  	c.inadmissibleWorkloads[key] = wInfo
   198  
   199  	return true
   200  }
   201  
   202  // QueueInadmissibleWorkloads moves all workloads from inadmissibleWorkloads to heap.
   203  // If at least one workload is moved, returns true, otherwise returns false.
   204  func (c *clusterQueueBase) QueueInadmissibleWorkloads(ctx context.Context, client client.Client) bool {
   205  	c.rwm.Lock()
   206  	defer c.rwm.Unlock()
   207  	c.queueInadmissibleCycle = c.popCycle
   208  	if len(c.inadmissibleWorkloads) == 0 {
   209  		return false
   210  	}
   211  
   212  	inadmissibleWorkloads := make(map[string]*workload.Info)
   213  	moved := false
   214  	for key, wInfo := range c.inadmissibleWorkloads {
   215  		ns := corev1.Namespace{}
   216  		err := client.Get(ctx, types.NamespacedName{Name: wInfo.Obj.Namespace}, &ns)
   217  		if err != nil || !c.namespaceSelector.Matches(labels.Set(ns.Labels)) || !c.backoffWaitingTimeExpired(wInfo) {
   218  			inadmissibleWorkloads[key] = wInfo
   219  		} else {
   220  			moved = c.heap.PushIfNotPresent(wInfo) || moved
   221  		}
   222  	}
   223  
   224  	c.inadmissibleWorkloads = inadmissibleWorkloads
   225  	return moved
   226  }
   227  
   228  func (c *clusterQueueBase) Pending() int {
   229  	c.rwm.RLock()
   230  	defer c.rwm.RUnlock()
   231  	return c.PendingActive() + c.PendingInadmissible()
   232  }
   233  
   234  func (c *clusterQueueBase) PendingActive() int {
   235  	return c.heap.Len()
   236  }
   237  
   238  func (c *clusterQueueBase) PendingInadmissible() int {
   239  	return len(c.inadmissibleWorkloads)
   240  }
   241  
   242  func (c *clusterQueueBase) Pop() *workload.Info {
   243  	c.rwm.Lock()
   244  	defer c.rwm.Unlock()
   245  	c.popCycle++
   246  	if c.heap.Len() == 0 {
   247  		return nil
   248  	}
   249  
   250  	info := c.heap.Pop()
   251  	return info.(*workload.Info)
   252  }
   253  
   254  func (c *clusterQueueBase) Dump() ([]string, bool) {
   255  	c.rwm.RLock()
   256  	defer c.rwm.RUnlock()
   257  	if c.heap.Len() == 0 {
   258  		return nil, false
   259  	}
   260  	elements := make([]string, c.heap.Len())
   261  	for i, e := range c.heap.List() {
   262  		info := e.(*workload.Info)
   263  		elements[i] = workload.Key(info.Obj)
   264  	}
   265  	return elements, true
   266  }
   267  
   268  func (c *clusterQueueBase) DumpInadmissible() ([]string, bool) {
   269  	c.rwm.RLock()
   270  	defer c.rwm.RUnlock()
   271  	if len(c.inadmissibleWorkloads) == 0 {
   272  		return nil, false
   273  	}
   274  	elements := make([]string, 0, len(c.inadmissibleWorkloads))
   275  	for _, info := range c.inadmissibleWorkloads {
   276  		elements = append(elements, workload.Key(info.Obj))
   277  	}
   278  	return elements, true
   279  }
   280  
   281  func (c *clusterQueueBase) Snapshot() []*workload.Info {
   282  	elements := c.totalElements()
   283  	sort.Slice(elements, func(i, j int) bool {
   284  		return c.lessFunc(elements[i], elements[j])
   285  	})
   286  	return elements
   287  }
   288  
   289  func (c *clusterQueueBase) Info(key string) *workload.Info {
   290  	c.rwm.RLock()
   291  	defer c.rwm.RUnlock()
   292  	info := c.heap.GetByKey(key)
   293  	if info == nil {
   294  		return nil
   295  	}
   296  	return info.(*workload.Info)
   297  }
   298  
   299  func (c *clusterQueueBase) totalElements() []*workload.Info {
   300  	c.rwm.RLock()
   301  	defer c.rwm.RUnlock()
   302  	totalLen := c.heap.Len() + len(c.inadmissibleWorkloads)
   303  	elements := make([]*workload.Info, 0, totalLen)
   304  	for _, e := range c.heap.List() {
   305  		info := e.(*workload.Info)
   306  		elements = append(elements, info)
   307  	}
   308  	for _, e := range c.inadmissibleWorkloads {
   309  		elements = append(elements, e)
   310  	}
   311  	return elements
   312  }
   313  
   314  func (c *clusterQueueBase) Active() bool {
   315  	c.rwm.RLock()
   316  	defer c.rwm.RUnlock()
   317  	return c.active
   318  }