k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/scheduler/internal/queue/scheduling_queue.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // This file contains structures that implement scheduling queue types. 18 // Scheduling queues hold pods waiting to be scheduled. This file implements a 19 // priority queue which has two sub queues and a additional data structure, 20 // namely: activeQ, backoffQ and unschedulablePods. 21 // - activeQ holds pods that are being considered for scheduling. 22 // - backoffQ holds pods that moved from unschedulablePods and will move to 23 // activeQ when their backoff periods complete. 24 // - unschedulablePods holds pods that were already attempted for scheduling and 25 // are currently determined to be unschedulable. 26 27 package queue 28 29 import ( 30 "container/list" 31 "context" 32 "fmt" 33 "math/rand" 34 "reflect" 35 "sync" 36 "time" 37 38 v1 "k8s.io/api/core/v1" 39 "k8s.io/apimachinery/pkg/types" 40 "k8s.io/apimachinery/pkg/util/sets" 41 "k8s.io/apimachinery/pkg/util/wait" 42 utilfeature "k8s.io/apiserver/pkg/util/feature" 43 "k8s.io/client-go/informers" 44 listersv1 "k8s.io/client-go/listers/core/v1" 45 "k8s.io/client-go/tools/cache" 46 "k8s.io/klog/v2" 47 "k8s.io/kubernetes/pkg/features" 48 "k8s.io/kubernetes/pkg/scheduler/framework" 49 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" 50 "k8s.io/kubernetes/pkg/scheduler/internal/heap" 51 "k8s.io/kubernetes/pkg/scheduler/metrics" 52 "k8s.io/kubernetes/pkg/scheduler/util" 53 "k8s.io/utils/clock" 54 ) 55 56 const ( 57 // DefaultPodMaxInUnschedulablePodsDuration is the default value for the maximum 58 // time a pod can stay in unschedulablePods. If a pod stays in unschedulablePods 59 // for longer than this value, the pod will be moved from unschedulablePods to 60 // backoffQ or activeQ. If this value is empty, the default value (5min) 61 // will be used. 62 DefaultPodMaxInUnschedulablePodsDuration time.Duration = 5 * time.Minute 63 // Scheduling queue names 64 activeQ = "Active" 65 backoffQ = "Backoff" 66 unschedulablePods = "Unschedulable" 67 68 preEnqueue = "PreEnqueue" 69 ) 70 71 const ( 72 // DefaultPodInitialBackoffDuration is the default value for the initial backoff duration 73 // for unschedulable pods. To change the default podInitialBackoffDurationSeconds used by the 74 // scheduler, update the ComponentConfig value in defaults.go 75 DefaultPodInitialBackoffDuration time.Duration = 1 * time.Second 76 // DefaultPodMaxBackoffDuration is the default value for the max backoff duration 77 // for unschedulable pods. To change the default podMaxBackoffDurationSeconds used by the 78 // scheduler, update the ComponentConfig value in defaults.go 79 DefaultPodMaxBackoffDuration time.Duration = 10 * time.Second 80 ) 81 82 // PreEnqueueCheck is a function type. It's used to build functions that 83 // run against a Pod and the caller can choose to enqueue or skip the Pod 84 // by the checking result. 85 type PreEnqueueCheck func(pod *v1.Pod) bool 86 87 // SchedulingQueue is an interface for a queue to store pods waiting to be scheduled. 88 // The interface follows a pattern similar to cache.FIFO and cache.Heap and 89 // makes it easy to use those data structures as a SchedulingQueue. 90 type SchedulingQueue interface { 91 framework.PodNominator 92 Add(logger klog.Logger, pod *v1.Pod) error 93 // Activate moves the given pods to activeQ iff they're in unschedulablePods or backoffQ. 94 // The passed-in pods are originally compiled from plugins that want to activate Pods, 95 // by injecting the pods through a reserved CycleState struct (PodsToActivate). 96 Activate(logger klog.Logger, pods map[string]*v1.Pod) 97 // AddUnschedulableIfNotPresent adds an unschedulable pod back to scheduling queue. 98 // The podSchedulingCycle represents the current scheduling cycle number which can be 99 // returned by calling SchedulingCycle(). 100 AddUnschedulableIfNotPresent(logger klog.Logger, pod *framework.QueuedPodInfo, podSchedulingCycle int64) error 101 // SchedulingCycle returns the current number of scheduling cycle which is 102 // cached by scheduling queue. Normally, incrementing this number whenever 103 // a pod is popped (e.g. called Pop()) is enough. 104 SchedulingCycle() int64 105 // Pop removes the head of the queue and returns it. It blocks if the 106 // queue is empty and waits until a new item is added to the queue. 107 Pop(logger klog.Logger) (*framework.QueuedPodInfo, error) 108 // Done must be called for pod returned by Pop. This allows the queue to 109 // keep track of which pods are currently being processed. 110 Done(types.UID) 111 Update(logger klog.Logger, oldPod, newPod *v1.Pod) error 112 Delete(pod *v1.Pod) error 113 // TODO(sanposhiho): move all PreEnqueueCkeck to Requeue and delete it from this parameter eventually. 114 // Some PreEnqueueCheck include event filtering logic based on some in-tree plugins 115 // and it affect badly to other plugins. 116 // See https://github.com/kubernetes/kubernetes/issues/110175 117 MoveAllToActiveOrBackoffQueue(logger klog.Logger, event framework.ClusterEvent, oldObj, newObj interface{}, preCheck PreEnqueueCheck) 118 AssignedPodAdded(logger klog.Logger, pod *v1.Pod) 119 AssignedPodUpdated(logger klog.Logger, oldPod, newPod *v1.Pod) 120 PendingPods() ([]*v1.Pod, string) 121 PodsInActiveQ() []*v1.Pod 122 // Close closes the SchedulingQueue so that the goroutine which is 123 // waiting to pop items can exit gracefully. 124 Close() 125 // Run starts the goroutines managing the queue. 126 Run(logger klog.Logger) 127 } 128 129 // NewSchedulingQueue initializes a priority queue as a new scheduling queue. 130 func NewSchedulingQueue( 131 lessFn framework.LessFunc, 132 informerFactory informers.SharedInformerFactory, 133 opts ...Option) SchedulingQueue { 134 return NewPriorityQueue(lessFn, informerFactory, opts...) 135 } 136 137 // NominatedNodeName returns nominated node name of a Pod. 138 func NominatedNodeName(pod *v1.Pod) string { 139 return pod.Status.NominatedNodeName 140 } 141 142 // PriorityQueue implements a scheduling queue. 143 // The head of PriorityQueue is the highest priority pending pod. This structure 144 // has two sub queues and a additional data structure, namely: activeQ, 145 // backoffQ and unschedulablePods. 146 // - activeQ holds pods that are being considered for scheduling. 147 // - backoffQ holds pods that moved from unschedulablePods and will move to 148 // activeQ when their backoff periods complete. 149 // - unschedulablePods holds pods that were already attempted for scheduling and 150 // are currently determined to be unschedulable. 151 type PriorityQueue struct { 152 *nominator 153 154 stop chan struct{} 155 clock clock.Clock 156 157 // pod initial backoff duration. 158 podInitialBackoffDuration time.Duration 159 // pod maximum backoff duration. 160 podMaxBackoffDuration time.Duration 161 // the maximum time a pod can stay in the unschedulablePods. 162 podMaxInUnschedulablePodsDuration time.Duration 163 164 cond sync.Cond 165 166 // inFlightPods holds the UID of all pods which have been popped out for which Done 167 // hasn't been called yet - in other words, all pods that are currently being 168 // processed (being scheduled, in permit, or in the binding cycle). 169 // 170 // The values in the map are the entry of each pod in the inFlightEvents list. 171 // The value of that entry is the *v1.Pod at the time that scheduling of that 172 // pod started, which can be useful for logging or debugging. 173 inFlightPods map[types.UID]*list.Element 174 175 // inFlightEvents holds the events received by the scheduling queue 176 // (entry value is clusterEvent) together with in-flight pods (entry 177 // value is *v1.Pod). Entries get added at the end while the mutex is 178 // locked, so they get serialized. 179 // 180 // The pod entries are added in Pop and used to track which events 181 // occurred after the pod scheduling attempt for that pod started. 182 // They get removed when the scheduling attempt is done, at which 183 // point all events that occurred in the meantime are processed. 184 // 185 // After removal of a pod, events at the start of the list are no 186 // longer needed because all of the other in-flight pods started 187 // later. Those events can be removed. 188 inFlightEvents *list.List 189 190 // activeQ is heap structure that scheduler actively looks at to find pods to 191 // schedule. Head of heap is the highest priority pod. 192 activeQ *heap.Heap 193 // podBackoffQ is a heap ordered by backoff expiry. Pods which have completed backoff 194 // are popped from this heap before the scheduler looks at activeQ 195 podBackoffQ *heap.Heap 196 // unschedulablePods holds pods that have been tried and determined unschedulable. 197 unschedulablePods *UnschedulablePods 198 // schedulingCycle represents sequence number of scheduling cycle and is incremented 199 // when a pod is popped. 200 schedulingCycle int64 201 // moveRequestCycle caches the sequence number of scheduling cycle when we 202 // received a move request. Unschedulable pods in and before this scheduling 203 // cycle will be put back to activeQueue if we were trying to schedule them 204 // when we received move request. 205 // TODO: this will be removed after SchedulingQueueHint goes to stable and the feature gate is removed. 206 moveRequestCycle int64 207 208 // preEnqueuePluginMap is keyed with profile name, valued with registered preEnqueue plugins. 209 preEnqueuePluginMap map[string][]framework.PreEnqueuePlugin 210 // queueingHintMap is keyed with profile name, valued with registered queueing hint functions. 211 queueingHintMap QueueingHintMapPerProfile 212 213 // closed indicates that the queue is closed. 214 // It is mainly used to let Pop() exit its control loop while waiting for an item. 215 closed bool 216 217 nsLister listersv1.NamespaceLister 218 219 metricsRecorder metrics.MetricAsyncRecorder 220 // pluginMetricsSamplePercent is the percentage of plugin metrics to be sampled. 221 pluginMetricsSamplePercent int 222 223 // isSchedulingQueueHintEnabled indicates whether the feature gate for the scheduling queue is enabled. 224 isSchedulingQueueHintEnabled bool 225 } 226 227 // QueueingHintFunction is the wrapper of QueueingHintFn that has PluginName. 228 type QueueingHintFunction struct { 229 PluginName string 230 QueueingHintFn framework.QueueingHintFn 231 } 232 233 // clusterEvent has the event and involved objects. 234 type clusterEvent struct { 235 event framework.ClusterEvent 236 // oldObj is the object that involved this event. 237 oldObj interface{} 238 // newObj is the object that involved this event. 239 newObj interface{} 240 } 241 242 type priorityQueueOptions struct { 243 clock clock.Clock 244 podInitialBackoffDuration time.Duration 245 podMaxBackoffDuration time.Duration 246 podMaxInUnschedulablePodsDuration time.Duration 247 podLister listersv1.PodLister 248 metricsRecorder metrics.MetricAsyncRecorder 249 pluginMetricsSamplePercent int 250 preEnqueuePluginMap map[string][]framework.PreEnqueuePlugin 251 queueingHintMap QueueingHintMapPerProfile 252 } 253 254 // Option configures a PriorityQueue 255 type Option func(*priorityQueueOptions) 256 257 // WithClock sets clock for PriorityQueue, the default clock is clock.RealClock. 258 func WithClock(clock clock.Clock) Option { 259 return func(o *priorityQueueOptions) { 260 o.clock = clock 261 } 262 } 263 264 // WithPodInitialBackoffDuration sets pod initial backoff duration for PriorityQueue. 265 func WithPodInitialBackoffDuration(duration time.Duration) Option { 266 return func(o *priorityQueueOptions) { 267 o.podInitialBackoffDuration = duration 268 } 269 } 270 271 // WithPodMaxBackoffDuration sets pod max backoff duration for PriorityQueue. 272 func WithPodMaxBackoffDuration(duration time.Duration) Option { 273 return func(o *priorityQueueOptions) { 274 o.podMaxBackoffDuration = duration 275 } 276 } 277 278 // WithPodLister sets pod lister for PriorityQueue. 279 func WithPodLister(pl listersv1.PodLister) Option { 280 return func(o *priorityQueueOptions) { 281 o.podLister = pl 282 } 283 } 284 285 // WithPodMaxInUnschedulablePodsDuration sets podMaxInUnschedulablePodsDuration for PriorityQueue. 286 func WithPodMaxInUnschedulablePodsDuration(duration time.Duration) Option { 287 return func(o *priorityQueueOptions) { 288 o.podMaxInUnschedulablePodsDuration = duration 289 } 290 } 291 292 // QueueingHintMapPerProfile is keyed with profile name, valued with queueing hint map registered for the profile. 293 type QueueingHintMapPerProfile map[string]QueueingHintMap 294 295 // QueueingHintMap is keyed with ClusterEvent, valued with queueing hint functions registered for the event. 296 type QueueingHintMap map[framework.ClusterEvent][]*QueueingHintFunction 297 298 // WithQueueingHintMapPerProfile sets queueingHintMap for PriorityQueue. 299 func WithQueueingHintMapPerProfile(m QueueingHintMapPerProfile) Option { 300 return func(o *priorityQueueOptions) { 301 o.queueingHintMap = m 302 } 303 } 304 305 // WithPreEnqueuePluginMap sets preEnqueuePluginMap for PriorityQueue. 306 func WithPreEnqueuePluginMap(m map[string][]framework.PreEnqueuePlugin) Option { 307 return func(o *priorityQueueOptions) { 308 o.preEnqueuePluginMap = m 309 } 310 } 311 312 // WithMetricsRecorder sets metrics recorder. 313 func WithMetricsRecorder(recorder metrics.MetricAsyncRecorder) Option { 314 return func(o *priorityQueueOptions) { 315 o.metricsRecorder = recorder 316 } 317 } 318 319 // WithPluginMetricsSamplePercent sets the percentage of plugin metrics to be sampled. 320 func WithPluginMetricsSamplePercent(percent int) Option { 321 return func(o *priorityQueueOptions) { 322 o.pluginMetricsSamplePercent = percent 323 } 324 } 325 326 var defaultPriorityQueueOptions = priorityQueueOptions{ 327 clock: clock.RealClock{}, 328 podInitialBackoffDuration: DefaultPodInitialBackoffDuration, 329 podMaxBackoffDuration: DefaultPodMaxBackoffDuration, 330 podMaxInUnschedulablePodsDuration: DefaultPodMaxInUnschedulablePodsDuration, 331 } 332 333 // Making sure that PriorityQueue implements SchedulingQueue. 334 var _ SchedulingQueue = &PriorityQueue{} 335 336 // newQueuedPodInfoForLookup builds a QueuedPodInfo object for a lookup in the queue. 337 func newQueuedPodInfoForLookup(pod *v1.Pod, plugins ...string) *framework.QueuedPodInfo { 338 // Since this is only used for a lookup in the queue, we only need to set the Pod, 339 // and so we avoid creating a full PodInfo, which is expensive to instantiate frequently. 340 return &framework.QueuedPodInfo{ 341 PodInfo: &framework.PodInfo{Pod: pod}, 342 UnschedulablePlugins: sets.New(plugins...), 343 } 344 } 345 346 // NewPriorityQueue creates a PriorityQueue object. 347 func NewPriorityQueue( 348 lessFn framework.LessFunc, 349 informerFactory informers.SharedInformerFactory, 350 opts ...Option, 351 ) *PriorityQueue { 352 options := defaultPriorityQueueOptions 353 if options.podLister == nil { 354 options.podLister = informerFactory.Core().V1().Pods().Lister() 355 } 356 for _, opt := range opts { 357 opt(&options) 358 } 359 360 comp := func(podInfo1, podInfo2 interface{}) bool { 361 pInfo1 := podInfo1.(*framework.QueuedPodInfo) 362 pInfo2 := podInfo2.(*framework.QueuedPodInfo) 363 return lessFn(pInfo1, pInfo2) 364 } 365 366 pq := &PriorityQueue{ 367 nominator: newPodNominator(options.podLister), 368 clock: options.clock, 369 stop: make(chan struct{}), 370 podInitialBackoffDuration: options.podInitialBackoffDuration, 371 podMaxBackoffDuration: options.podMaxBackoffDuration, 372 podMaxInUnschedulablePodsDuration: options.podMaxInUnschedulablePodsDuration, 373 activeQ: heap.NewWithRecorder(podInfoKeyFunc, comp, metrics.NewActivePodsRecorder()), 374 unschedulablePods: newUnschedulablePods(metrics.NewUnschedulablePodsRecorder(), metrics.NewGatedPodsRecorder()), 375 inFlightPods: make(map[types.UID]*list.Element), 376 inFlightEvents: list.New(), 377 preEnqueuePluginMap: options.preEnqueuePluginMap, 378 queueingHintMap: options.queueingHintMap, 379 metricsRecorder: options.metricsRecorder, 380 pluginMetricsSamplePercent: options.pluginMetricsSamplePercent, 381 moveRequestCycle: -1, 382 isSchedulingQueueHintEnabled: utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints), 383 } 384 pq.cond.L = &pq.lock 385 pq.podBackoffQ = heap.NewWithRecorder(podInfoKeyFunc, pq.podsCompareBackoffCompleted, metrics.NewBackoffPodsRecorder()) 386 pq.nsLister = informerFactory.Core().V1().Namespaces().Lister() 387 388 return pq 389 } 390 391 // Run starts the goroutine to pump from podBackoffQ to activeQ 392 func (p *PriorityQueue) Run(logger klog.Logger) { 393 go wait.Until(func() { 394 p.flushBackoffQCompleted(logger) 395 }, 1.0*time.Second, p.stop) 396 go wait.Until(func() { 397 p.flushUnschedulablePodsLeftover(logger) 398 }, 30*time.Second, p.stop) 399 } 400 401 // queueingStrategy indicates how the scheduling queue should enqueue the Pod from unschedulable pod pool. 402 type queueingStrategy int 403 404 const ( 405 // queueSkip indicates that the scheduling queue should skip requeuing the Pod to activeQ/backoffQ. 406 queueSkip queueingStrategy = iota 407 // queueAfterBackoff indicates that the scheduling queue should requeue the Pod after backoff is completed. 408 queueAfterBackoff 409 // queueImmediately indicates that the scheduling queue should skip backoff and requeue the Pod immediately to activeQ. 410 queueImmediately 411 ) 412 413 // isEventOfInterest returns true if the event is of interest by some plugins. 414 func (p *PriorityQueue) isEventOfInterest(logger klog.Logger, event framework.ClusterEvent) bool { 415 if event.IsWildCard() { 416 return true 417 } 418 419 for _, hintMap := range p.queueingHintMap { 420 for eventToMatch := range hintMap { 421 if eventToMatch.Match(event) { 422 // This event is interested by some plugins. 423 return true 424 } 425 } 426 } 427 428 logger.V(6).Info("receive an event that isn't interested by any enabled plugins", "event", event) 429 430 return false 431 } 432 433 // isPodWorthRequeuing calls QueueingHintFn of only plugins registered in pInfo.unschedulablePlugins and pInfo.PendingPlugins. 434 // 435 // If any of pInfo.PendingPlugins return Queue, 436 // the scheduling queue is supposed to enqueue this Pod to activeQ, skipping backoffQ. 437 // If any of pInfo.unschedulablePlugins return Queue, 438 // the scheduling queue is supposed to enqueue this Pod to activeQ/backoffQ depending on the remaining backoff time of the Pod. 439 // If all QueueingHintFns returns Skip, the scheduling queue enqueues the Pod back to unschedulable Pod pool 440 // because no plugin changes the scheduling result via the event. 441 func (p *PriorityQueue) isPodWorthRequeuing(logger klog.Logger, pInfo *framework.QueuedPodInfo, event framework.ClusterEvent, oldObj, newObj interface{}) queueingStrategy { 442 rejectorPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins) 443 if rejectorPlugins.Len() == 0 { 444 logger.V(6).Info("Worth requeuing because no failed plugins", "pod", klog.KObj(pInfo.Pod)) 445 return queueAfterBackoff 446 } 447 448 if event.IsWildCard() { 449 // If the wildcard event is special one as someone wants to force all Pods to move to activeQ/backoffQ. 450 // We return queueAfterBackoff in this case, while resetting all blocked plugins. 451 logger.V(6).Info("Worth requeuing because the event is wildcard", "pod", klog.KObj(pInfo.Pod)) 452 return queueAfterBackoff 453 } 454 455 hintMap, ok := p.queueingHintMap[pInfo.Pod.Spec.SchedulerName] 456 if !ok { 457 // shouldn't reach here unless bug. 458 logger.Error(nil, "No QueueingHintMap is registered for this profile", "profile", pInfo.Pod.Spec.SchedulerName, "pod", klog.KObj(pInfo.Pod)) 459 return queueAfterBackoff 460 } 461 462 pod := pInfo.Pod 463 queueStrategy := queueSkip 464 for eventToMatch, hintfns := range hintMap { 465 if !eventToMatch.Match(event) { 466 continue 467 } 468 469 for _, hintfn := range hintfns { 470 if !rejectorPlugins.Has(hintfn.PluginName) { 471 // skip if it's not hintfn from rejectorPlugins. 472 continue 473 } 474 475 hint, err := hintfn.QueueingHintFn(logger, pod, oldObj, newObj) 476 if err != nil { 477 // If the QueueingHintFn returned an error, we should treat the event as Queue so that we can prevent 478 // the Pod from being stuck in the unschedulable pod pool. 479 oldObjMeta, newObjMeta, asErr := util.As[klog.KMetadata](oldObj, newObj) 480 if asErr != nil { 481 logger.Error(err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod)) 482 } else { 483 logger.Error(err, "QueueingHintFn returns error", "event", event, "plugin", hintfn.PluginName, "pod", klog.KObj(pod), "oldObj", klog.KObj(oldObjMeta), "newObj", klog.KObj(newObjMeta)) 484 } 485 hint = framework.Queue 486 } 487 if hint == framework.QueueSkip { 488 continue 489 } 490 491 if pInfo.PendingPlugins.Has(hintfn.PluginName) { 492 // interprets Queue from the Pending plugin as queueImmediately. 493 // We can return immediately because queueImmediately is the highest priority. 494 return queueImmediately 495 } 496 497 // interprets Queue from the unschedulable plugin as queueAfterBackoff. 498 499 if pInfo.PendingPlugins.Len() == 0 { 500 // We can return immediately because no Pending plugins, which only can make queueImmediately, registered in this Pod, 501 // and queueAfterBackoff is the second highest priority. 502 return queueAfterBackoff 503 } 504 505 // We can't return immediately because there are some Pending plugins registered in this Pod. 506 // We need to check if those plugins return Queue or not and if they do, we return queueImmediately. 507 queueStrategy = queueAfterBackoff 508 } 509 } 510 511 return queueStrategy 512 } 513 514 // runPreEnqueuePlugins iterates PreEnqueue function in each registered PreEnqueuePlugin. 515 // It returns true if all PreEnqueue function run successfully; otherwise returns false 516 // upon the first failure. 517 // Note: we need to associate the failed plugin to `pInfo`, so that the pod can be moved back 518 // to activeQ by related cluster event. 519 func (p *PriorityQueue) runPreEnqueuePlugins(ctx context.Context, pInfo *framework.QueuedPodInfo) bool { 520 logger := klog.FromContext(ctx) 521 var s *framework.Status 522 pod := pInfo.Pod 523 startTime := p.clock.Now() 524 defer func() { 525 metrics.FrameworkExtensionPointDuration.WithLabelValues(preEnqueue, s.Code().String(), pod.Spec.SchedulerName).Observe(metrics.SinceInSeconds(startTime)) 526 }() 527 528 shouldRecordMetric := rand.Intn(100) < p.pluginMetricsSamplePercent 529 for _, pl := range p.preEnqueuePluginMap[pod.Spec.SchedulerName] { 530 s = p.runPreEnqueuePlugin(ctx, pl, pod, shouldRecordMetric) 531 if s.IsSuccess() { 532 continue 533 } 534 pInfo.UnschedulablePlugins.Insert(pl.Name()) 535 metrics.UnschedulableReason(pl.Name(), pod.Spec.SchedulerName).Inc() 536 if s.Code() == framework.Error { 537 logger.Error(s.AsError(), "Unexpected error running PreEnqueue plugin", "pod", klog.KObj(pod), "plugin", pl.Name()) 538 } else { 539 logger.V(4).Info("Status after running PreEnqueue plugin", "pod", klog.KObj(pod), "plugin", pl.Name(), "status", s) 540 } 541 return false 542 } 543 return true 544 } 545 546 func (p *PriorityQueue) runPreEnqueuePlugin(ctx context.Context, pl framework.PreEnqueuePlugin, pod *v1.Pod, shouldRecordMetric bool) *framework.Status { 547 if !shouldRecordMetric { 548 return pl.PreEnqueue(ctx, pod) 549 } 550 startTime := p.clock.Now() 551 s := pl.PreEnqueue(ctx, pod) 552 p.metricsRecorder.ObservePluginDurationAsync(preEnqueue, pl.Name(), s.Code().String(), p.clock.Since(startTime).Seconds()) 553 return s 554 } 555 556 // addToActiveQ tries to add pod to active queue. It returns 2 parameters: 557 // 1. a boolean flag to indicate whether the pod is added successfully. 558 // 2. an error for the caller to act on. 559 func (p *PriorityQueue) addToActiveQ(logger klog.Logger, pInfo *framework.QueuedPodInfo) (bool, error) { 560 pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo) 561 if pInfo.Gated { 562 // Add the Pod to unschedulablePods if it's not passing PreEnqueuePlugins. 563 p.unschedulablePods.addOrUpdate(pInfo) 564 return false, nil 565 } 566 if pInfo.InitialAttemptTimestamp == nil { 567 now := p.clock.Now() 568 pInfo.InitialAttemptTimestamp = &now 569 } 570 if err := p.activeQ.Add(pInfo); err != nil { 571 logger.Error(err, "Error adding pod to the active queue", "pod", klog.KObj(pInfo.Pod)) 572 return false, err 573 } 574 return true, nil 575 } 576 577 // Add adds a pod to the active queue. It should be called only when a new pod 578 // is added so there is no chance the pod is already in active/unschedulable/backoff queues 579 func (p *PriorityQueue) Add(logger klog.Logger, pod *v1.Pod) error { 580 p.lock.Lock() 581 defer p.lock.Unlock() 582 583 pInfo := p.newQueuedPodInfo(pod) 584 gated := pInfo.Gated 585 if added, err := p.addToActiveQ(logger, pInfo); !added { 586 return err 587 } 588 if p.unschedulablePods.get(pod) != nil { 589 logger.Error(nil, "Error: pod is already in the unschedulable queue", "pod", klog.KObj(pod)) 590 p.unschedulablePods.delete(pod, gated) 591 } 592 // Delete pod from backoffQ if it is backing off 593 if err := p.podBackoffQ.Delete(pInfo); err == nil { 594 logger.Error(nil, "Error: pod is already in the podBackoff queue", "pod", klog.KObj(pod)) 595 } 596 logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", PodAdd, "queue", activeQ) 597 metrics.SchedulerQueueIncomingPods.WithLabelValues("active", PodAdd).Inc() 598 p.addNominatedPodUnlocked(logger, pInfo.PodInfo, nil) 599 p.cond.Broadcast() 600 601 return nil 602 } 603 604 // Activate moves the given pods to activeQ iff they're in unschedulablePods or backoffQ. 605 func (p *PriorityQueue) Activate(logger klog.Logger, pods map[string]*v1.Pod) { 606 p.lock.Lock() 607 defer p.lock.Unlock() 608 609 activated := false 610 for _, pod := range pods { 611 if p.activate(logger, pod) { 612 activated = true 613 } 614 } 615 616 if activated { 617 p.cond.Broadcast() 618 } 619 } 620 621 func (p *PriorityQueue) activate(logger klog.Logger, pod *v1.Pod) bool { 622 // Verify if the pod is present in activeQ. 623 if _, exists, _ := p.activeQ.Get(newQueuedPodInfoForLookup(pod)); exists { 624 // No need to activate if it's already present in activeQ. 625 return false 626 } 627 var pInfo *framework.QueuedPodInfo 628 // Verify if the pod is present in unschedulablePods or backoffQ. 629 if pInfo = p.unschedulablePods.get(pod); pInfo == nil { 630 // If the pod doesn't belong to unschedulablePods or backoffQ, don't activate it. 631 if obj, exists, _ := p.podBackoffQ.Get(newQueuedPodInfoForLookup(pod)); !exists { 632 logger.Error(nil, "To-activate pod does not exist in unschedulablePods or backoffQ", "pod", klog.KObj(pod)) 633 return false 634 } else { 635 pInfo = obj.(*framework.QueuedPodInfo) 636 } 637 } 638 639 if pInfo == nil { 640 // Redundant safe check. We shouldn't reach here. 641 logger.Error(nil, "Internal error: cannot obtain pInfo") 642 return false 643 } 644 645 gated := pInfo.Gated 646 if added, _ := p.addToActiveQ(logger, pInfo); !added { 647 return false 648 } 649 p.unschedulablePods.delete(pInfo.Pod, gated) 650 p.podBackoffQ.Delete(pInfo) 651 metrics.SchedulerQueueIncomingPods.WithLabelValues("active", ForceActivate).Inc() 652 p.addNominatedPodUnlocked(logger, pInfo.PodInfo, nil) 653 return true 654 } 655 656 // isPodBackingoff returns true if a pod is still waiting for its backoff timer. 657 // If this returns true, the pod should not be re-tried. 658 func (p *PriorityQueue) isPodBackingoff(podInfo *framework.QueuedPodInfo) bool { 659 if podInfo.Gated { 660 return false 661 } 662 boTime := p.getBackoffTime(podInfo) 663 return boTime.After(p.clock.Now()) 664 } 665 666 // SchedulingCycle returns current scheduling cycle. 667 func (p *PriorityQueue) SchedulingCycle() int64 { 668 p.lock.RLock() 669 defer p.lock.RUnlock() 670 return p.schedulingCycle 671 } 672 673 // determineSchedulingHintForInFlightPod looks at the unschedulable plugins of the given Pod 674 // and determines the scheduling hint for this Pod while checking the events that happened during in-flight. 675 func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) queueingStrategy { 676 logger.V(5).Info("Checking events for in-flight pod", "pod", klog.KObj(pInfo.Pod), "unschedulablePlugins", pInfo.UnschedulablePlugins, "inFlightEventsSize", p.inFlightEvents.Len(), "inFlightPodsSize", len(p.inFlightPods)) 677 678 // AddUnschedulableIfNotPresent is called with the Pod at the end of scheduling or binding. 679 // So, given pInfo should have been Pop()ed before, 680 // we can assume pInfo must be recorded in inFlightPods and thus inFlightEvents. 681 inFlightPod, ok := p.inFlightPods[pInfo.Pod.UID] 682 if !ok { 683 // This can happen while updating a pod. In that case pInfo.UnschedulablePlugins should 684 // be empty. If it is not, we may have a problem. 685 if len(pInfo.UnschedulablePlugins) != 0 { 686 logger.Error(nil, "In flight Pod isn't found in the scheduling queue. If you see this error log, it's likely a bug in the scheduler.", "pod", klog.KObj(pInfo.Pod)) 687 return queueAfterBackoff 688 } 689 if p.inFlightEvents.Len() > len(p.inFlightPods) { 690 return queueAfterBackoff 691 } 692 return queueSkip 693 } 694 695 rejectorPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins) 696 if len(rejectorPlugins) == 0 { 697 // No failed plugins are associated with this Pod. 698 // Meaning something unusual (a temporal failure on kube-apiserver, etc) happened and this Pod gets moved back to the queue. 699 // In this case, we should retry scheduling it because this Pod may not be retried until the next flush. 700 return queueAfterBackoff 701 } 702 703 // check if there is an event that makes this Pod schedulable based on pInfo.UnschedulablePlugins. 704 queueingStrategy := queueSkip 705 for event := inFlightPod.Next(); event != nil; event = event.Next() { 706 e, ok := event.Value.(*clusterEvent) 707 if !ok { 708 // Must be another in-flight Pod (*v1.Pod). Can be ignored. 709 continue 710 } 711 logger.V(5).Info("Checking event for in-flight pod", "pod", klog.KObj(pInfo.Pod), "event", e.event.Label) 712 713 switch p.isPodWorthRequeuing(logger, pInfo, e.event, e.oldObj, e.newObj) { 714 case queueSkip: 715 continue 716 case queueImmediately: 717 // queueImmediately is the highest priority. 718 // No need to go through the rest of the events. 719 return queueImmediately 720 case queueAfterBackoff: 721 // replace schedulingHint with queueAfterBackoff 722 queueingStrategy = queueAfterBackoff 723 if pInfo.PendingPlugins.Len() == 0 { 724 // We can return immediately because no Pending plugins, which only can make queueImmediately, registered in this Pod, 725 // and queueAfterBackoff is the second highest priority. 726 return queueAfterBackoff 727 } 728 } 729 } 730 return queueingStrategy 731 } 732 733 // addUnschedulableIfNotPresentWithoutQueueingHint inserts a pod that cannot be scheduled into 734 // the queue, unless it is already in the queue. Normally, PriorityQueue puts 735 // unschedulable pods in `unschedulablePods`. But if there has been a recent move 736 // request, then the pod is put in `podBackoffQ`. 737 // TODO: This function is called only when p.isSchedulingQueueHintEnabled is false, 738 // and this will be removed after SchedulingQueueHint goes to stable and the feature gate is removed. 739 func (p *PriorityQueue) addUnschedulableWithoutQueueingHint(logger klog.Logger, pInfo *framework.QueuedPodInfo, podSchedulingCycle int64) error { 740 pod := pInfo.Pod 741 // Refresh the timestamp since the pod is re-added. 742 pInfo.Timestamp = p.clock.Now() 743 744 // When the queueing hint is enabled, they are used differently. 745 // But, we use all of them as UnschedulablePlugins when the queueing hint isn't enabled so that we don't break the old behaviour. 746 rejectorPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins) 747 748 // If a move request has been received, move it to the BackoffQ, otherwise move 749 // it to unschedulablePods. 750 for plugin := range rejectorPlugins { 751 metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Inc() 752 } 753 if p.moveRequestCycle >= podSchedulingCycle || len(rejectorPlugins) == 0 { 754 // Two cases to move a Pod to the active/backoff queue: 755 // - The Pod is rejected by some plugins, but a move request is received after this Pod's scheduling cycle is started. 756 // In this case, the received event may be make Pod schedulable and we should retry scheduling it. 757 // - No unschedulable plugins are associated with this Pod, 758 // meaning something unusual (a temporal failure on kube-apiserver, etc) happened and this Pod gets moved back to the queue. 759 // In this case, we should retry scheduling it because this Pod may not be retried until the next flush. 760 if err := p.podBackoffQ.Add(pInfo); err != nil { 761 return fmt.Errorf("error adding pod %v to the backoff queue: %v", klog.KObj(pod), err) 762 } 763 logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", ScheduleAttemptFailure, "queue", backoffQ) 764 metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", ScheduleAttemptFailure).Inc() 765 } else { 766 p.unschedulablePods.addOrUpdate(pInfo) 767 logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", ScheduleAttemptFailure, "queue", unschedulablePods) 768 metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", ScheduleAttemptFailure).Inc() 769 } 770 771 p.addNominatedPodUnlocked(logger, pInfo.PodInfo, nil) 772 return nil 773 } 774 775 // AddUnschedulableIfNotPresent inserts a pod that cannot be scheduled into 776 // the queue, unless it is already in the queue. Normally, PriorityQueue puts 777 // unschedulable pods in `unschedulablePods`. But if there has been a recent move 778 // request, then the pod is put in `podBackoffQ`. 779 func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *framework.QueuedPodInfo, podSchedulingCycle int64) error { 780 p.lock.Lock() 781 defer p.lock.Unlock() 782 783 // In any case, this Pod will be moved back to the queue and we should call Done. 784 defer p.done(pInfo.Pod.UID) 785 786 pod := pInfo.Pod 787 if p.unschedulablePods.get(pod) != nil { 788 return fmt.Errorf("Pod %v is already present in unschedulable queue", klog.KObj(pod)) 789 } 790 791 if _, exists, _ := p.activeQ.Get(pInfo); exists { 792 return fmt.Errorf("Pod %v is already present in the active queue", klog.KObj(pod)) 793 } 794 if _, exists, _ := p.podBackoffQ.Get(pInfo); exists { 795 return fmt.Errorf("Pod %v is already present in the backoff queue", klog.KObj(pod)) 796 } 797 798 if !p.isSchedulingQueueHintEnabled { 799 // fall back to the old behavior which doesn't depend on the queueing hint. 800 return p.addUnschedulableWithoutQueueingHint(logger, pInfo, podSchedulingCycle) 801 } 802 803 // Refresh the timestamp since the pod is re-added. 804 pInfo.Timestamp = p.clock.Now() 805 806 // If a move request has been received, move it to the BackoffQ, otherwise move 807 // it to unschedulablePods. 808 rejectorPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins) 809 for plugin := range rejectorPlugins { 810 metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Inc() 811 } 812 813 // We check whether this Pod may change its scheduling result by any of events that happened during scheduling. 814 schedulingHint := p.determineSchedulingHintForInFlightPod(logger, pInfo) 815 816 // In this case, we try to requeue this Pod to activeQ/backoffQ. 817 queue := p.requeuePodViaQueueingHint(logger, pInfo, schedulingHint, ScheduleAttemptFailure) 818 logger.V(3).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", ScheduleAttemptFailure, "queue", queue, "schedulingCycle", podSchedulingCycle, "hint", schedulingHint, "unschedulable plugins", rejectorPlugins) 819 if queue == activeQ { 820 // When the Pod is moved to activeQ, need to let p.cond know so that the Pod will be pop()ed out. 821 p.cond.Broadcast() 822 } 823 824 p.addNominatedPodUnlocked(logger, pInfo.PodInfo, nil) 825 return nil 826 } 827 828 // flushBackoffQCompleted Moves all pods from backoffQ which have completed backoff in to activeQ 829 func (p *PriorityQueue) flushBackoffQCompleted(logger klog.Logger) { 830 p.lock.Lock() 831 defer p.lock.Unlock() 832 activated := false 833 for { 834 rawPodInfo := p.podBackoffQ.Peek() 835 if rawPodInfo == nil { 836 break 837 } 838 pInfo := rawPodInfo.(*framework.QueuedPodInfo) 839 pod := pInfo.Pod 840 if p.isPodBackingoff(pInfo) { 841 break 842 } 843 _, err := p.podBackoffQ.Pop() 844 if err != nil { 845 logger.Error(err, "Unable to pop pod from backoff queue despite backoff completion", "pod", klog.KObj(pod)) 846 break 847 } 848 if added, _ := p.addToActiveQ(logger, pInfo); added { 849 logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", BackoffComplete, "queue", activeQ) 850 metrics.SchedulerQueueIncomingPods.WithLabelValues("active", BackoffComplete).Inc() 851 activated = true 852 } 853 } 854 855 if activated { 856 p.cond.Broadcast() 857 } 858 } 859 860 // flushUnschedulablePodsLeftover moves pods which stay in unschedulablePods 861 // longer than podMaxInUnschedulablePodsDuration to backoffQ or activeQ. 862 func (p *PriorityQueue) flushUnschedulablePodsLeftover(logger klog.Logger) { 863 p.lock.Lock() 864 defer p.lock.Unlock() 865 866 var podsToMove []*framework.QueuedPodInfo 867 currentTime := p.clock.Now() 868 for _, pInfo := range p.unschedulablePods.podInfoMap { 869 lastScheduleTime := pInfo.Timestamp 870 if currentTime.Sub(lastScheduleTime) > p.podMaxInUnschedulablePodsDuration { 871 podsToMove = append(podsToMove, pInfo) 872 } 873 } 874 875 if len(podsToMove) > 0 { 876 p.movePodsToActiveOrBackoffQueue(logger, podsToMove, UnschedulableTimeout, nil, nil) 877 } 878 } 879 880 // Pop removes the head of the active queue and returns it. It blocks if the 881 // activeQ is empty and waits until a new item is added to the queue. It 882 // increments scheduling cycle when a pod is popped. 883 func (p *PriorityQueue) Pop(logger klog.Logger) (*framework.QueuedPodInfo, error) { 884 p.lock.Lock() 885 defer p.lock.Unlock() 886 for p.activeQ.Len() == 0 { 887 // When the queue is empty, invocation of Pop() is blocked until new item is enqueued. 888 // When Close() is called, the p.closed is set and the condition is broadcast, 889 // which causes this loop to continue and return from the Pop(). 890 if p.closed { 891 logger.V(2).Info("Scheduling queue is closed") 892 return nil, nil 893 } 894 p.cond.Wait() 895 } 896 obj, err := p.activeQ.Pop() 897 if err != nil { 898 return nil, err 899 } 900 pInfo := obj.(*framework.QueuedPodInfo) 901 pInfo.Attempts++ 902 p.schedulingCycle++ 903 // In flight, no concurrent events yet. 904 if p.isSchedulingQueueHintEnabled { 905 p.inFlightPods[pInfo.Pod.UID] = p.inFlightEvents.PushBack(pInfo.Pod) 906 } 907 908 // Update metrics and reset the set of unschedulable plugins for the next attempt. 909 for plugin := range pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins) { 910 metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Dec() 911 } 912 pInfo.UnschedulablePlugins.Clear() 913 pInfo.PendingPlugins.Clear() 914 915 return pInfo, nil 916 } 917 918 // Done must be called for pod returned by Pop. This allows the queue to 919 // keep track of which pods are currently being processed. 920 func (p *PriorityQueue) Done(pod types.UID) { 921 p.lock.Lock() 922 defer p.lock.Unlock() 923 924 p.done(pod) 925 } 926 927 func (p *PriorityQueue) done(pod types.UID) { 928 if !p.isSchedulingQueueHintEnabled { 929 // do nothing if schedulingQueueHint is disabled. 930 // In that case, we don't have inFlightPods and inFlightEvents. 931 return 932 } 933 inFlightPod, ok := p.inFlightPods[pod] 934 if !ok { 935 // This Pod is already done()ed. 936 return 937 } 938 delete(p.inFlightPods, pod) 939 940 // Remove the pod from the list. 941 p.inFlightEvents.Remove(inFlightPod) 942 943 // Remove events which are only referred to by this Pod 944 // so that the inFlightEvents list doesn't grow infinitely. 945 // If the pod was at the head of the list, then all 946 // events between it and the next pod are no longer needed 947 // and can be removed. 948 for { 949 e := p.inFlightEvents.Front() 950 if e == nil { 951 // Empty list. 952 break 953 } 954 if _, ok := e.Value.(*clusterEvent); !ok { 955 // A pod, must stop pruning. 956 break 957 } 958 p.inFlightEvents.Remove(e) 959 } 960 } 961 962 // isPodUpdated checks if the pod is updated in a way that it may have become 963 // schedulable. It drops status of the pod and compares it with old version, 964 // except for pod.status.resourceClaimStatuses: changing that may have an 965 // effect on scheduling. 966 func isPodUpdated(oldPod, newPod *v1.Pod) bool { 967 strip := func(pod *v1.Pod) *v1.Pod { 968 p := pod.DeepCopy() 969 p.ResourceVersion = "" 970 p.Generation = 0 971 p.Status = v1.PodStatus{ 972 ResourceClaimStatuses: pod.Status.ResourceClaimStatuses, 973 } 974 p.ManagedFields = nil 975 p.Finalizers = nil 976 return p 977 } 978 return !reflect.DeepEqual(strip(oldPod), strip(newPod)) 979 } 980 981 // Update updates a pod in the active or backoff queue if present. Otherwise, it removes 982 // the item from the unschedulable queue if pod is updated in a way that it may 983 // become schedulable and adds the updated one to the active queue. 984 // If pod is not present in any of the queues, it is added to the active queue. 985 func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) error { 986 p.lock.Lock() 987 defer p.lock.Unlock() 988 989 if oldPod != nil { 990 oldPodInfo := newQueuedPodInfoForLookup(oldPod) 991 // If the pod is already in the active queue, just update it there. 992 if oldPodInfo, exists, _ := p.activeQ.Get(oldPodInfo); exists { 993 pInfo := updatePod(oldPodInfo, newPod) 994 p.updateNominatedPodUnlocked(logger, oldPod, pInfo.PodInfo) 995 return p.activeQ.Update(pInfo) 996 } 997 998 // If the pod is in the backoff queue, update it there. 999 if oldPodInfo, exists, _ := p.podBackoffQ.Get(oldPodInfo); exists { 1000 pInfo := updatePod(oldPodInfo, newPod) 1001 p.updateNominatedPodUnlocked(logger, oldPod, pInfo.PodInfo) 1002 return p.podBackoffQ.Update(pInfo) 1003 } 1004 } 1005 1006 // If the pod is in the unschedulable queue, updating it may make it schedulable. 1007 if usPodInfo := p.unschedulablePods.get(newPod); usPodInfo != nil { 1008 pInfo := updatePod(usPodInfo, newPod) 1009 p.updateNominatedPodUnlocked(logger, oldPod, pInfo.PodInfo) 1010 if isPodUpdated(oldPod, newPod) { 1011 gated := usPodInfo.Gated 1012 if p.isPodBackingoff(usPodInfo) { 1013 if err := p.podBackoffQ.Add(pInfo); err != nil { 1014 return err 1015 } 1016 p.unschedulablePods.delete(usPodInfo.Pod, gated) 1017 logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", PodUpdate, "queue", backoffQ) 1018 } else { 1019 if added, err := p.addToActiveQ(logger, pInfo); !added { 1020 return err 1021 } 1022 p.unschedulablePods.delete(usPodInfo.Pod, gated) 1023 logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", BackoffComplete, "queue", activeQ) 1024 p.cond.Broadcast() 1025 } 1026 } else { 1027 // Pod update didn't make it schedulable, keep it in the unschedulable queue. 1028 p.unschedulablePods.addOrUpdate(pInfo) 1029 } 1030 1031 return nil 1032 } 1033 // If pod is not in any of the queues, we put it in the active queue. 1034 pInfo := p.newQueuedPodInfo(newPod) 1035 if added, err := p.addToActiveQ(logger, pInfo); !added { 1036 return err 1037 } 1038 p.addNominatedPodUnlocked(logger, pInfo.PodInfo, nil) 1039 logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", PodUpdate, "queue", activeQ) 1040 p.cond.Broadcast() 1041 return nil 1042 } 1043 1044 // Delete deletes the item from either of the two queues. It assumes the pod is 1045 // only in one queue. 1046 func (p *PriorityQueue) Delete(pod *v1.Pod) error { 1047 p.lock.Lock() 1048 defer p.lock.Unlock() 1049 p.deleteNominatedPodIfExistsUnlocked(pod) 1050 pInfo := newQueuedPodInfoForLookup(pod) 1051 if err := p.activeQ.Delete(pInfo); err != nil { 1052 // The item was probably not found in the activeQ. 1053 p.podBackoffQ.Delete(pInfo) 1054 if pInfo = p.unschedulablePods.get(pod); pInfo != nil { 1055 p.unschedulablePods.delete(pod, pInfo.Gated) 1056 } 1057 } 1058 return nil 1059 } 1060 1061 // AssignedPodAdded is called when a bound pod is added. Creation of this pod 1062 // may make pending pods with matching affinity terms schedulable. 1063 func (p *PriorityQueue) AssignedPodAdded(logger klog.Logger, pod *v1.Pod) { 1064 p.lock.Lock() 1065 p.movePodsToActiveOrBackoffQueue(logger, p.getUnschedulablePodsWithMatchingAffinityTerm(logger, pod), AssignedPodAdd, nil, pod) 1066 p.lock.Unlock() 1067 } 1068 1069 // isPodResourcesResizedDown returns true if a pod CPU and/or memory resize request has been 1070 // admitted by kubelet, is 'InProgress', and results in a net sizing down of updated resources. 1071 // It returns false if either CPU or memory resource is net resized up, or if no resize is in progress. 1072 func isPodResourcesResizedDown(pod *v1.Pod) bool { 1073 if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { 1074 // TODO(vinaykul,wangchen615,InPlacePodVerticalScaling): Fix this to determine when a 1075 // pod is truly resized down (might need oldPod if we cannot determine from Status alone) 1076 if pod.Status.Resize == v1.PodResizeStatusInProgress { 1077 return true 1078 } 1079 } 1080 return false 1081 } 1082 1083 // AssignedPodUpdated is called when a bound pod is updated. Change of labels 1084 // may make pending pods with matching affinity terms schedulable. 1085 func (p *PriorityQueue) AssignedPodUpdated(logger klog.Logger, oldPod, newPod *v1.Pod) { 1086 p.lock.Lock() 1087 if isPodResourcesResizedDown(newPod) { 1088 p.moveAllToActiveOrBackoffQueue(logger, AssignedPodUpdate, oldPod, newPod, nil) 1089 } else { 1090 p.movePodsToActiveOrBackoffQueue(logger, p.getUnschedulablePodsWithMatchingAffinityTerm(logger, newPod), AssignedPodUpdate, oldPod, newPod) 1091 } 1092 p.lock.Unlock() 1093 } 1094 1095 // NOTE: this function assumes a lock has been acquired in the caller. 1096 // moveAllToActiveOrBackoffQueue moves all pods from unschedulablePods to activeQ or backoffQ. 1097 // This function adds all pods and then signals the condition variable to ensure that 1098 // if Pop() is waiting for an item, it receives the signal after all the pods are in the 1099 // queue and the head is the highest priority pod. 1100 func (p *PriorityQueue) moveAllToActiveOrBackoffQueue(logger klog.Logger, event framework.ClusterEvent, oldObj, newObj interface{}, preCheck PreEnqueueCheck) { 1101 if !p.isEventOfInterest(logger, event) { 1102 // No plugin is interested in this event. 1103 // Return early before iterating all pods in unschedulablePods for preCheck. 1104 return 1105 } 1106 1107 unschedulablePods := make([]*framework.QueuedPodInfo, 0, len(p.unschedulablePods.podInfoMap)) 1108 for _, pInfo := range p.unschedulablePods.podInfoMap { 1109 if preCheck == nil || preCheck(pInfo.Pod) { 1110 unschedulablePods = append(unschedulablePods, pInfo) 1111 } 1112 } 1113 p.movePodsToActiveOrBackoffQueue(logger, unschedulablePods, event, oldObj, newObj) 1114 } 1115 1116 // MoveAllToActiveOrBackoffQueue moves all pods from unschedulablePods to activeQ or backoffQ. 1117 // This function adds all pods and then signals the condition variable to ensure that 1118 // if Pop() is waiting for an item, it receives the signal after all the pods are in the 1119 // queue and the head is the highest priority pod. 1120 func (p *PriorityQueue) MoveAllToActiveOrBackoffQueue(logger klog.Logger, event framework.ClusterEvent, oldObj, newObj interface{}, preCheck PreEnqueueCheck) { 1121 p.lock.Lock() 1122 defer p.lock.Unlock() 1123 p.moveAllToActiveOrBackoffQueue(logger, event, oldObj, newObj, preCheck) 1124 } 1125 1126 // requeuePodViaQueueingHint tries to requeue Pod to activeQ, backoffQ or unschedulable pod pool based on schedulingHint. 1127 // It returns the queue name Pod goes. 1128 // 1129 // NOTE: this function assumes lock has been acquired in caller 1130 func (p *PriorityQueue) requeuePodViaQueueingHint(logger klog.Logger, pInfo *framework.QueuedPodInfo, strategy queueingStrategy, event string) string { 1131 if strategy == queueSkip { 1132 p.unschedulablePods.addOrUpdate(pInfo) 1133 metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", event).Inc() 1134 return unschedulablePods 1135 } 1136 1137 pod := pInfo.Pod 1138 if strategy == queueAfterBackoff && p.isPodBackingoff(pInfo) { 1139 if err := p.podBackoffQ.Add(pInfo); err != nil { 1140 logger.Error(err, "Error adding pod to the backoff queue, queue this Pod to unschedulable pod pool", "pod", klog.KObj(pod)) 1141 p.unschedulablePods.addOrUpdate(pInfo) 1142 return unschedulablePods 1143 } 1144 1145 metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", event).Inc() 1146 return backoffQ 1147 } 1148 1149 // Reach here if schedulingHint is QueueImmediately, or schedulingHint is Queue but the pod is not backing off. 1150 1151 added, err := p.addToActiveQ(logger, pInfo) 1152 if err != nil { 1153 logger.Error(err, "Error adding pod to the active queue, queue this Pod to unschedulable pod pool", "pod", klog.KObj(pod)) 1154 } 1155 if added { 1156 metrics.SchedulerQueueIncomingPods.WithLabelValues("active", event).Inc() 1157 return activeQ 1158 } 1159 if pInfo.Gated { 1160 // In case the pod is gated, the Pod is pushed back to unschedulable Pods pool in addToActiveQ. 1161 return unschedulablePods 1162 } 1163 1164 p.unschedulablePods.addOrUpdate(pInfo) 1165 metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", ScheduleAttemptFailure).Inc() 1166 return unschedulablePods 1167 } 1168 1169 // NOTE: this function assumes lock has been acquired in caller 1170 func (p *PriorityQueue) movePodsToActiveOrBackoffQueue(logger klog.Logger, podInfoList []*framework.QueuedPodInfo, event framework.ClusterEvent, oldObj, newObj interface{}) { 1171 if !p.isEventOfInterest(logger, event) { 1172 // No plugin is interested in this event. 1173 return 1174 } 1175 1176 activated := false 1177 for _, pInfo := range podInfoList { 1178 // Since there may be many gated pods and they will not move from the 1179 // unschedulable pool, we skip calling the expensive isPodWorthRequeueing. 1180 if pInfo.Gated { 1181 continue 1182 } 1183 schedulingHint := p.isPodWorthRequeuing(logger, pInfo, event, oldObj, newObj) 1184 if schedulingHint == queueSkip { 1185 // QueueingHintFn determined that this Pod isn't worth putting to activeQ or backoffQ by this event. 1186 logger.V(5).Info("Event is not making pod schedulable", "pod", klog.KObj(pInfo.Pod), "event", event.Label) 1187 continue 1188 } 1189 1190 p.unschedulablePods.delete(pInfo.Pod, pInfo.Gated) 1191 queue := p.requeuePodViaQueueingHint(logger, pInfo, schedulingHint, event.Label) 1192 logger.V(4).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event.Label, "queue", queue, "hint", schedulingHint) 1193 if queue == activeQ { 1194 activated = true 1195 } 1196 } 1197 1198 p.moveRequestCycle = p.schedulingCycle 1199 1200 if p.isSchedulingQueueHintEnabled && len(p.inFlightPods) != 0 { 1201 logger.V(5).Info("Event received while pods are in flight", "event", event.Label, "numPods", len(p.inFlightPods)) 1202 // AddUnschedulableIfNotPresent might get called for in-flight Pods later, and in 1203 // AddUnschedulableIfNotPresent we need to know whether events were 1204 // observed while scheduling them. 1205 p.inFlightEvents.PushBack(&clusterEvent{ 1206 event: event, 1207 oldObj: oldObj, 1208 newObj: newObj, 1209 }) 1210 } 1211 1212 if activated { 1213 p.cond.Broadcast() 1214 } 1215 } 1216 1217 // getUnschedulablePodsWithMatchingAffinityTerm returns unschedulable pods which have 1218 // any affinity term that matches "pod". 1219 // NOTE: this function assumes lock has been acquired in caller. 1220 func (p *PriorityQueue) getUnschedulablePodsWithMatchingAffinityTerm(logger klog.Logger, pod *v1.Pod) []*framework.QueuedPodInfo { 1221 nsLabels := interpodaffinity.GetNamespaceLabelsSnapshot(logger, pod.Namespace, p.nsLister) 1222 1223 var podsToMove []*framework.QueuedPodInfo 1224 for _, pInfo := range p.unschedulablePods.podInfoMap { 1225 for _, term := range pInfo.RequiredAffinityTerms { 1226 if term.Matches(pod, nsLabels) { 1227 podsToMove = append(podsToMove, pInfo) 1228 break 1229 } 1230 } 1231 1232 } 1233 return podsToMove 1234 } 1235 1236 // PodsInActiveQ returns all the Pods in the activeQ. 1237 // This function is only used in tests. 1238 func (p *PriorityQueue) PodsInActiveQ() []*v1.Pod { 1239 p.lock.RLock() 1240 defer p.lock.RUnlock() 1241 var result []*v1.Pod 1242 for _, pInfo := range p.activeQ.List() { 1243 result = append(result, pInfo.(*framework.QueuedPodInfo).Pod) 1244 } 1245 return result 1246 } 1247 1248 var pendingPodsSummary = "activeQ:%v; backoffQ:%v; unschedulablePods:%v" 1249 1250 // PendingPods returns all the pending pods in the queue; accompanied by a debugging string 1251 // recording showing the number of pods in each queue respectively. 1252 // This function is used for debugging purposes in the scheduler cache dumper and comparer. 1253 func (p *PriorityQueue) PendingPods() ([]*v1.Pod, string) { 1254 p.lock.RLock() 1255 defer p.lock.RUnlock() 1256 var result []*v1.Pod 1257 for _, pInfo := range p.activeQ.List() { 1258 result = append(result, pInfo.(*framework.QueuedPodInfo).Pod) 1259 } 1260 for _, pInfo := range p.podBackoffQ.List() { 1261 result = append(result, pInfo.(*framework.QueuedPodInfo).Pod) 1262 } 1263 for _, pInfo := range p.unschedulablePods.podInfoMap { 1264 result = append(result, pInfo.Pod) 1265 } 1266 return result, fmt.Sprintf(pendingPodsSummary, p.activeQ.Len(), p.podBackoffQ.Len(), len(p.unschedulablePods.podInfoMap)) 1267 } 1268 1269 // Close closes the priority queue. 1270 func (p *PriorityQueue) Close() { 1271 p.lock.Lock() 1272 defer p.lock.Unlock() 1273 close(p.stop) 1274 p.closed = true 1275 p.cond.Broadcast() 1276 } 1277 1278 // DeleteNominatedPodIfExists deletes <pod> from nominatedPods. 1279 func (npm *nominator) DeleteNominatedPodIfExists(pod *v1.Pod) { 1280 npm.lock.Lock() 1281 npm.deleteNominatedPodIfExistsUnlocked(pod) 1282 npm.lock.Unlock() 1283 } 1284 1285 func (npm *nominator) deleteNominatedPodIfExistsUnlocked(pod *v1.Pod) { 1286 npm.delete(pod) 1287 } 1288 1289 // AddNominatedPod adds a pod to the nominated pods of the given node. 1290 // This is called during the preemption process after a node is nominated to run 1291 // the pod. We update the structure before sending a request to update the pod 1292 // object to avoid races with the following scheduling cycles. 1293 func (npm *nominator) AddNominatedPod(logger klog.Logger, pi *framework.PodInfo, nominatingInfo *framework.NominatingInfo) { 1294 npm.lock.Lock() 1295 npm.addNominatedPodUnlocked(logger, pi, nominatingInfo) 1296 npm.lock.Unlock() 1297 } 1298 1299 // NominatedPodsForNode returns a copy of pods that are nominated to run on the given node, 1300 // but they are waiting for other pods to be removed from the node. 1301 func (npm *nominator) NominatedPodsForNode(nodeName string) []*framework.PodInfo { 1302 npm.lock.RLock() 1303 defer npm.lock.RUnlock() 1304 // Make a copy of the nominated Pods so the caller can mutate safely. 1305 pods := make([]*framework.PodInfo, len(npm.nominatedPods[nodeName])) 1306 for i := 0; i < len(pods); i++ { 1307 pods[i] = npm.nominatedPods[nodeName][i].DeepCopy() 1308 } 1309 return pods 1310 } 1311 1312 func (p *PriorityQueue) podsCompareBackoffCompleted(podInfo1, podInfo2 interface{}) bool { 1313 pInfo1 := podInfo1.(*framework.QueuedPodInfo) 1314 pInfo2 := podInfo2.(*framework.QueuedPodInfo) 1315 bo1 := p.getBackoffTime(pInfo1) 1316 bo2 := p.getBackoffTime(pInfo2) 1317 return bo1.Before(bo2) 1318 } 1319 1320 // newQueuedPodInfo builds a QueuedPodInfo object. 1321 func (p *PriorityQueue) newQueuedPodInfo(pod *v1.Pod, plugins ...string) *framework.QueuedPodInfo { 1322 now := p.clock.Now() 1323 // ignore this err since apiserver doesn't properly validate affinity terms 1324 // and we can't fix the validation for backwards compatibility. 1325 podInfo, _ := framework.NewPodInfo(pod) 1326 return &framework.QueuedPodInfo{ 1327 PodInfo: podInfo, 1328 Timestamp: now, 1329 InitialAttemptTimestamp: nil, 1330 UnschedulablePlugins: sets.New(plugins...), 1331 } 1332 } 1333 1334 // getBackoffTime returns the time that podInfo completes backoff 1335 func (p *PriorityQueue) getBackoffTime(podInfo *framework.QueuedPodInfo) time.Time { 1336 duration := p.calculateBackoffDuration(podInfo) 1337 backoffTime := podInfo.Timestamp.Add(duration) 1338 return backoffTime 1339 } 1340 1341 // calculateBackoffDuration is a helper function for calculating the backoffDuration 1342 // based on the number of attempts the pod has made. 1343 func (p *PriorityQueue) calculateBackoffDuration(podInfo *framework.QueuedPodInfo) time.Duration { 1344 duration := p.podInitialBackoffDuration 1345 for i := 1; i < podInfo.Attempts; i++ { 1346 // Use subtraction instead of addition or multiplication to avoid overflow. 1347 if duration > p.podMaxBackoffDuration-duration { 1348 return p.podMaxBackoffDuration 1349 } 1350 duration += duration 1351 } 1352 return duration 1353 } 1354 1355 func updatePod(oldPodInfo interface{}, newPod *v1.Pod) *framework.QueuedPodInfo { 1356 pInfo := oldPodInfo.(*framework.QueuedPodInfo) 1357 pInfo.Update(newPod) 1358 return pInfo 1359 } 1360 1361 // UnschedulablePods holds pods that cannot be scheduled. This data structure 1362 // is used to implement unschedulablePods. 1363 type UnschedulablePods struct { 1364 // podInfoMap is a map key by a pod's full-name and the value is a pointer to the QueuedPodInfo. 1365 podInfoMap map[string]*framework.QueuedPodInfo 1366 keyFunc func(*v1.Pod) string 1367 // unschedulableRecorder/gatedRecorder updates the counter when elements of an unschedulablePodsMap 1368 // get added or removed, and it does nothing if it's nil. 1369 unschedulableRecorder, gatedRecorder metrics.MetricRecorder 1370 } 1371 1372 // addOrUpdate adds a pod to the unschedulable podInfoMap. 1373 func (u *UnschedulablePods) addOrUpdate(pInfo *framework.QueuedPodInfo) { 1374 podID := u.keyFunc(pInfo.Pod) 1375 if _, exists := u.podInfoMap[podID]; !exists { 1376 if pInfo.Gated && u.gatedRecorder != nil { 1377 u.gatedRecorder.Inc() 1378 } else if !pInfo.Gated && u.unschedulableRecorder != nil { 1379 u.unschedulableRecorder.Inc() 1380 } 1381 } 1382 u.podInfoMap[podID] = pInfo 1383 } 1384 1385 // delete deletes a pod from the unschedulable podInfoMap. 1386 // The `gated` parameter is used to figure out which metric should be decreased. 1387 func (u *UnschedulablePods) delete(pod *v1.Pod, gated bool) { 1388 podID := u.keyFunc(pod) 1389 if _, exists := u.podInfoMap[podID]; exists { 1390 if gated && u.gatedRecorder != nil { 1391 u.gatedRecorder.Dec() 1392 } else if !gated && u.unschedulableRecorder != nil { 1393 u.unschedulableRecorder.Dec() 1394 } 1395 } 1396 delete(u.podInfoMap, podID) 1397 } 1398 1399 // get returns the QueuedPodInfo if a pod with the same key as the key of the given "pod" 1400 // is found in the map. It returns nil otherwise. 1401 func (u *UnschedulablePods) get(pod *v1.Pod) *framework.QueuedPodInfo { 1402 podKey := u.keyFunc(pod) 1403 if pInfo, exists := u.podInfoMap[podKey]; exists { 1404 return pInfo 1405 } 1406 return nil 1407 } 1408 1409 // clear removes all the entries from the unschedulable podInfoMap. 1410 func (u *UnschedulablePods) clear() { 1411 u.podInfoMap = make(map[string]*framework.QueuedPodInfo) 1412 if u.unschedulableRecorder != nil { 1413 u.unschedulableRecorder.Clear() 1414 } 1415 if u.gatedRecorder != nil { 1416 u.gatedRecorder.Clear() 1417 } 1418 } 1419 1420 // newUnschedulablePods initializes a new object of UnschedulablePods. 1421 func newUnschedulablePods(unschedulableRecorder, gatedRecorder metrics.MetricRecorder) *UnschedulablePods { 1422 return &UnschedulablePods{ 1423 podInfoMap: make(map[string]*framework.QueuedPodInfo), 1424 keyFunc: util.GetPodFullName, 1425 unschedulableRecorder: unschedulableRecorder, 1426 gatedRecorder: gatedRecorder, 1427 } 1428 } 1429 1430 // nominator is a structure that stores pods nominated to run on nodes. 1431 // It exists because nominatedNodeName of pod objects stored in the structure 1432 // may be different than what scheduler has here. We should be able to find pods 1433 // by their UID and update/delete them. 1434 type nominator struct { 1435 // podLister is used to verify if the given pod is alive. 1436 podLister listersv1.PodLister 1437 // nominatedPods is a map keyed by a node name and the value is a list of 1438 // pods which are nominated to run on the node. These are pods which can be in 1439 // the activeQ or unschedulablePods. 1440 nominatedPods map[string][]*framework.PodInfo 1441 // nominatedPodToNode is map keyed by a Pod UID to the node name where it is 1442 // nominated. 1443 nominatedPodToNode map[types.UID]string 1444 1445 lock sync.RWMutex 1446 } 1447 1448 func (npm *nominator) addNominatedPodUnlocked(logger klog.Logger, pi *framework.PodInfo, nominatingInfo *framework.NominatingInfo) { 1449 // Always delete the pod if it already exists, to ensure we never store more than 1450 // one instance of the pod. 1451 npm.delete(pi.Pod) 1452 1453 var nodeName string 1454 if nominatingInfo.Mode() == framework.ModeOverride { 1455 nodeName = nominatingInfo.NominatedNodeName 1456 } else if nominatingInfo.Mode() == framework.ModeNoop { 1457 if pi.Pod.Status.NominatedNodeName == "" { 1458 return 1459 } 1460 nodeName = pi.Pod.Status.NominatedNodeName 1461 } 1462 1463 if npm.podLister != nil { 1464 // If the pod was removed or if it was already scheduled, don't nominate it. 1465 updatedPod, err := npm.podLister.Pods(pi.Pod.Namespace).Get(pi.Pod.Name) 1466 if err != nil { 1467 logger.V(4).Info("Pod doesn't exist in podLister, aborted adding it to the nominator", "pod", klog.KObj(pi.Pod)) 1468 return 1469 } 1470 if updatedPod.Spec.NodeName != "" { 1471 logger.V(4).Info("Pod is already scheduled to a node, aborted adding it to the nominator", "pod", klog.KObj(pi.Pod), "node", updatedPod.Spec.NodeName) 1472 return 1473 } 1474 } 1475 1476 npm.nominatedPodToNode[pi.Pod.UID] = nodeName 1477 for _, npi := range npm.nominatedPods[nodeName] { 1478 if npi.Pod.UID == pi.Pod.UID { 1479 logger.V(4).Info("Pod already exists in the nominator", "pod", klog.KObj(npi.Pod)) 1480 return 1481 } 1482 } 1483 npm.nominatedPods[nodeName] = append(npm.nominatedPods[nodeName], pi) 1484 } 1485 1486 func (npm *nominator) delete(p *v1.Pod) { 1487 nnn, ok := npm.nominatedPodToNode[p.UID] 1488 if !ok { 1489 return 1490 } 1491 for i, np := range npm.nominatedPods[nnn] { 1492 if np.Pod.UID == p.UID { 1493 npm.nominatedPods[nnn] = append(npm.nominatedPods[nnn][:i], npm.nominatedPods[nnn][i+1:]...) 1494 if len(npm.nominatedPods[nnn]) == 0 { 1495 delete(npm.nominatedPods, nnn) 1496 } 1497 break 1498 } 1499 } 1500 delete(npm.nominatedPodToNode, p.UID) 1501 } 1502 1503 // UpdateNominatedPod updates the <oldPod> with <newPod>. 1504 func (npm *nominator) UpdateNominatedPod(logger klog.Logger, oldPod *v1.Pod, newPodInfo *framework.PodInfo) { 1505 npm.lock.Lock() 1506 defer npm.lock.Unlock() 1507 npm.updateNominatedPodUnlocked(logger, oldPod, newPodInfo) 1508 } 1509 1510 func (npm *nominator) updateNominatedPodUnlocked(logger klog.Logger, oldPod *v1.Pod, newPodInfo *framework.PodInfo) { 1511 // In some cases, an Update event with no "NominatedNode" present is received right 1512 // after a node("NominatedNode") is reserved for this pod in memory. 1513 // In this case, we need to keep reserving the NominatedNode when updating the pod pointer. 1514 var nominatingInfo *framework.NominatingInfo 1515 // We won't fall into below `if` block if the Update event represents: 1516 // (1) NominatedNode info is added 1517 // (2) NominatedNode info is updated 1518 // (3) NominatedNode info is removed 1519 if NominatedNodeName(oldPod) == "" && NominatedNodeName(newPodInfo.Pod) == "" { 1520 if nnn, ok := npm.nominatedPodToNode[oldPod.UID]; ok { 1521 // This is the only case we should continue reserving the NominatedNode 1522 nominatingInfo = &framework.NominatingInfo{ 1523 NominatingMode: framework.ModeOverride, 1524 NominatedNodeName: nnn, 1525 } 1526 } 1527 } 1528 // We update irrespective of the nominatedNodeName changed or not, to ensure 1529 // that pod pointer is updated. 1530 npm.delete(oldPod) 1531 npm.addNominatedPodUnlocked(logger, newPodInfo, nominatingInfo) 1532 } 1533 1534 // NewPodNominator creates a nominator as a backing of framework.PodNominator. 1535 // A podLister is passed in so as to check if the pod exists 1536 // before adding its nominatedNode info. 1537 func NewPodNominator(podLister listersv1.PodLister) framework.PodNominator { 1538 return newPodNominator(podLister) 1539 } 1540 1541 func newPodNominator(podLister listersv1.PodLister) *nominator { 1542 return &nominator{ 1543 podLister: podLister, 1544 nominatedPods: make(map[string][]*framework.PodInfo), 1545 nominatedPodToNode: make(map[types.UID]string), 1546 } 1547 } 1548 1549 func podInfoKeyFunc(obj interface{}) (string, error) { 1550 return cache.MetaNamespaceKeyFunc(obj.(*framework.QueuedPodInfo).Pod) 1551 }