sigs.k8s.io/kueue@v0.6.2/pkg/queue/cluster_queue_impl.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package queue 18 19 import ( 20 "context" 21 "sort" 22 "sync" 23 24 corev1 "k8s.io/api/core/v1" 25 "k8s.io/apimachinery/pkg/api/equality" 26 apimeta "k8s.io/apimachinery/pkg/api/meta" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/labels" 29 "k8s.io/apimachinery/pkg/types" 30 "k8s.io/utils/clock" 31 "sigs.k8s.io/controller-runtime/pkg/client" 32 33 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 34 "sigs.k8s.io/kueue/pkg/util/heap" 35 "sigs.k8s.io/kueue/pkg/workload" 36 ) 37 38 // clusterQueueBase is an incomplete base implementation of ClusterQueue 39 // interface. It can be inherited and overwritten by other types. 40 type clusterQueueBase struct { 41 heap heap.Heap 42 cohort string 43 namespaceSelector labels.Selector 44 active bool 45 46 // inadmissibleWorkloads are workloads that have been tried at least once and couldn't be admitted. 47 inadmissibleWorkloads map[string]*workload.Info 48 49 // popCycle identifies the last call to Pop. It's incremented when calling Pop. 50 // popCycle and queueInadmissibleCycle are used to track when there is a requeuing 51 // of inadmissible workloads while a workload is being scheduled. 52 popCycle int64 53 54 // queueInadmissibleCycle stores the popId at the time when 55 // QueueInadmissibleWorkloads is called. 56 queueInadmissibleCycle int64 57 58 lessFunc func(a, b interface{}) bool 59 60 rwm sync.RWMutex 61 62 clock clock.Clock 63 } 64 65 func newClusterQueueImpl( 66 keyFunc func(obj interface{}) string, 67 lessFunc func(a, b interface{}) bool, 68 clock clock.Clock, 69 ) *clusterQueueBase { 70 return &clusterQueueBase{ 71 heap: heap.New(keyFunc, lessFunc), 72 inadmissibleWorkloads: make(map[string]*workload.Info), 73 queueInadmissibleCycle: -1, 74 lessFunc: lessFunc, 75 rwm: sync.RWMutex{}, 76 clock: clock, 77 } 78 } 79 80 func (c *clusterQueueBase) Update(apiCQ *kueue.ClusterQueue) error { 81 c.rwm.Lock() 82 defer c.rwm.Unlock() 83 c.cohort = apiCQ.Spec.Cohort 84 nsSelector, err := metav1.LabelSelectorAsSelector(apiCQ.Spec.NamespaceSelector) 85 if err != nil { 86 return err 87 } 88 c.namespaceSelector = nsSelector 89 c.active = apimeta.IsStatusConditionTrue(apiCQ.Status.Conditions, kueue.ClusterQueueActive) 90 return nil 91 } 92 93 func (c *clusterQueueBase) Cohort() string { 94 return c.cohort 95 } 96 97 func (c *clusterQueueBase) AddFromLocalQueue(q *LocalQueue) bool { 98 c.rwm.Lock() 99 defer c.rwm.Unlock() 100 added := false 101 for _, info := range q.items { 102 if c.heap.PushIfNotPresent(info) { 103 added = true 104 } 105 } 106 return added 107 } 108 109 func (c *clusterQueueBase) PushOrUpdate(wInfo *workload.Info) { 110 c.rwm.Lock() 111 defer c.rwm.Unlock() 112 key := workload.Key(wInfo.Obj) 113 oldInfo := c.inadmissibleWorkloads[key] 114 if oldInfo != nil { 115 // update in place if the workload was inadmissible and didn't change 116 // to potentially become admissible, unless the Eviction status changed 117 // which can affect the workloads order in the queue. 118 if equality.Semantic.DeepEqual(oldInfo.Obj.Spec, wInfo.Obj.Spec) && 119 equality.Semantic.DeepEqual(oldInfo.Obj.Status.ReclaimablePods, wInfo.Obj.Status.ReclaimablePods) && 120 equality.Semantic.DeepEqual(apimeta.FindStatusCondition(oldInfo.Obj.Status.Conditions, kueue.WorkloadEvicted), 121 apimeta.FindStatusCondition(wInfo.Obj.Status.Conditions, kueue.WorkloadEvicted)) { 122 c.inadmissibleWorkloads[key] = wInfo 123 return 124 } 125 // otherwise move or update in place in the queue. 126 delete(c.inadmissibleWorkloads, key) 127 } 128 if c.heap.GetByKey(key) == nil && !c.backoffWaitingTimeExpired(wInfo) { 129 c.inadmissibleWorkloads[key] = wInfo 130 return 131 } 132 c.heap.PushOrUpdate(wInfo) 133 } 134 135 // backoffWaitingTimeExpired returns true if the current time is after the requeueAt. 136 func (c *clusterQueueBase) backoffWaitingTimeExpired(wInfo *workload.Info) bool { 137 if wInfo.Obj.Status.RequeueState == nil || wInfo.Obj.Status.RequeueState.RequeueAt == nil { 138 return true 139 } 140 if _, evictedByTimeout := workload.IsEvictedByPodsReadyTimeout(wInfo.Obj); !evictedByTimeout { 141 return true 142 } 143 // It needs to verify the requeueAt by "Equal" function 144 // since the "After" function evaluates the nanoseconds despite the metav1.Time is seconds level precision. 145 return c.clock.Now().After(wInfo.Obj.Status.RequeueState.RequeueAt.Time) || 146 c.clock.Now().Equal(wInfo.Obj.Status.RequeueState.RequeueAt.Time) 147 } 148 149 func (c *clusterQueueBase) Delete(w *kueue.Workload) { 150 key := workload.Key(w) 151 delete(c.inadmissibleWorkloads, key) 152 c.heap.Delete(key) 153 } 154 155 func (c *clusterQueueBase) DeleteFromLocalQueue(q *LocalQueue) { 156 c.rwm.Lock() 157 defer c.rwm.Unlock() 158 for _, w := range q.items { 159 key := workload.Key(w.Obj) 160 if wl := c.inadmissibleWorkloads[key]; wl != nil { 161 delete(c.inadmissibleWorkloads, key) 162 } 163 } 164 for _, w := range q.items { 165 c.Delete(w.Obj) 166 } 167 } 168 169 // requeueIfNotPresent inserts a workload that cannot be admitted into 170 // ClusterQueue, unless it is already in the queue. If immediate is true 171 // or if there was a call to QueueInadmissibleWorkloads after a call to Pop, 172 // the workload will be pushed back to heap directly. Otherwise, the workload 173 // will be put into the inadmissibleWorkloads. 174 func (c *clusterQueueBase) requeueIfNotPresent(wInfo *workload.Info, immediate bool) bool { 175 c.rwm.Lock() 176 defer c.rwm.Unlock() 177 key := workload.Key(wInfo.Obj) 178 if c.backoffWaitingTimeExpired(wInfo) && 179 (immediate || c.queueInadmissibleCycle >= c.popCycle || wInfo.LastAssignment.PendingFlavors()) { 180 // If the workload was inadmissible, move it back into the queue. 181 inadmissibleWl := c.inadmissibleWorkloads[key] 182 if inadmissibleWl != nil { 183 wInfo = inadmissibleWl 184 delete(c.inadmissibleWorkloads, key) 185 } 186 return c.heap.PushIfNotPresent(wInfo) 187 } 188 189 if c.inadmissibleWorkloads[key] != nil { 190 return false 191 } 192 193 if data := c.heap.GetByKey(key); data != nil { 194 return false 195 } 196 197 c.inadmissibleWorkloads[key] = wInfo 198 199 return true 200 } 201 202 // QueueInadmissibleWorkloads moves all workloads from inadmissibleWorkloads to heap. 203 // If at least one workload is moved, returns true, otherwise returns false. 204 func (c *clusterQueueBase) QueueInadmissibleWorkloads(ctx context.Context, client client.Client) bool { 205 c.rwm.Lock() 206 defer c.rwm.Unlock() 207 c.queueInadmissibleCycle = c.popCycle 208 if len(c.inadmissibleWorkloads) == 0 { 209 return false 210 } 211 212 inadmissibleWorkloads := make(map[string]*workload.Info) 213 moved := false 214 for key, wInfo := range c.inadmissibleWorkloads { 215 ns := corev1.Namespace{} 216 err := client.Get(ctx, types.NamespacedName{Name: wInfo.Obj.Namespace}, &ns) 217 if err != nil || !c.namespaceSelector.Matches(labels.Set(ns.Labels)) || !c.backoffWaitingTimeExpired(wInfo) { 218 inadmissibleWorkloads[key] = wInfo 219 } else { 220 moved = c.heap.PushIfNotPresent(wInfo) || moved 221 } 222 } 223 224 c.inadmissibleWorkloads = inadmissibleWorkloads 225 return moved 226 } 227 228 func (c *clusterQueueBase) Pending() int { 229 c.rwm.RLock() 230 defer c.rwm.RUnlock() 231 return c.PendingActive() + c.PendingInadmissible() 232 } 233 234 func (c *clusterQueueBase) PendingActive() int { 235 return c.heap.Len() 236 } 237 238 func (c *clusterQueueBase) PendingInadmissible() int { 239 return len(c.inadmissibleWorkloads) 240 } 241 242 func (c *clusterQueueBase) Pop() *workload.Info { 243 c.rwm.Lock() 244 defer c.rwm.Unlock() 245 c.popCycle++ 246 if c.heap.Len() == 0 { 247 return nil 248 } 249 250 info := c.heap.Pop() 251 return info.(*workload.Info) 252 } 253 254 func (c *clusterQueueBase) Dump() ([]string, bool) { 255 c.rwm.RLock() 256 defer c.rwm.RUnlock() 257 if c.heap.Len() == 0 { 258 return nil, false 259 } 260 elements := make([]string, c.heap.Len()) 261 for i, e := range c.heap.List() { 262 info := e.(*workload.Info) 263 elements[i] = workload.Key(info.Obj) 264 } 265 return elements, true 266 } 267 268 func (c *clusterQueueBase) DumpInadmissible() ([]string, bool) { 269 c.rwm.RLock() 270 defer c.rwm.RUnlock() 271 if len(c.inadmissibleWorkloads) == 0 { 272 return nil, false 273 } 274 elements := make([]string, 0, len(c.inadmissibleWorkloads)) 275 for _, info := range c.inadmissibleWorkloads { 276 elements = append(elements, workload.Key(info.Obj)) 277 } 278 return elements, true 279 } 280 281 func (c *clusterQueueBase) Snapshot() []*workload.Info { 282 elements := c.totalElements() 283 sort.Slice(elements, func(i, j int) bool { 284 return c.lessFunc(elements[i], elements[j]) 285 }) 286 return elements 287 } 288 289 func (c *clusterQueueBase) Info(key string) *workload.Info { 290 c.rwm.RLock() 291 defer c.rwm.RUnlock() 292 info := c.heap.GetByKey(key) 293 if info == nil { 294 return nil 295 } 296 return info.(*workload.Info) 297 } 298 299 func (c *clusterQueueBase) totalElements() []*workload.Info { 300 c.rwm.RLock() 301 defer c.rwm.RUnlock() 302 totalLen := c.heap.Len() + len(c.inadmissibleWorkloads) 303 elements := make([]*workload.Info, 0, totalLen) 304 for _, e := range c.heap.List() { 305 info := e.(*workload.Info) 306 elements = append(elements, info) 307 } 308 for _, e := range c.inadmissibleWorkloads { 309 elements = append(elements, e) 310 } 311 return elements 312 } 313 314 func (c *clusterQueueBase) Active() bool { 315 c.rwm.RLock() 316 defer c.rwm.RUnlock() 317 return c.active 318 }