k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package defaultpreemption 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "math/rand" 24 "sort" 25 "strings" 26 "testing" 27 "time" 28 29 "github.com/google/go-cmp/cmp" 30 v1 "k8s.io/api/core/v1" 31 policy "k8s.io/api/policy/v1" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/apimachinery/pkg/runtime" 34 "k8s.io/apimachinery/pkg/util/sets" 35 "k8s.io/client-go/informers" 36 clientsetfake "k8s.io/client-go/kubernetes/fake" 37 clienttesting "k8s.io/client-go/testing" 38 "k8s.io/client-go/tools/events" 39 "k8s.io/klog/v2/ktesting" 40 kubeschedulerconfigv1 "k8s.io/kube-scheduler/config/v1" 41 extenderv1 "k8s.io/kube-scheduler/extender/v1" 42 "k8s.io/kubernetes/pkg/scheduler/apis/config" 43 configv1 "k8s.io/kubernetes/pkg/scheduler/apis/config/v1" 44 "k8s.io/kubernetes/pkg/scheduler/framework" 45 "k8s.io/kubernetes/pkg/scheduler/framework/parallelize" 46 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder" 47 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature" 48 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" 49 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/names" 50 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources" 51 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread" 52 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort" 53 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/tainttoleration" 54 "k8s.io/kubernetes/pkg/scheduler/framework/preemption" 55 frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" 56 internalcache "k8s.io/kubernetes/pkg/scheduler/internal/cache" 57 internalqueue "k8s.io/kubernetes/pkg/scheduler/internal/queue" 58 st "k8s.io/kubernetes/pkg/scheduler/testing" 59 tf "k8s.io/kubernetes/pkg/scheduler/testing/framework" 60 ) 61 62 var ( 63 negPriority, lowPriority, midPriority, highPriority, veryHighPriority = int32(-100), int32(0), int32(100), int32(1000), int32(10000) 64 65 smallRes = map[v1.ResourceName]string{ 66 v1.ResourceCPU: "100m", 67 v1.ResourceMemory: "100", 68 } 69 mediumRes = map[v1.ResourceName]string{ 70 v1.ResourceCPU: "200m", 71 v1.ResourceMemory: "200", 72 } 73 largeRes = map[v1.ResourceName]string{ 74 v1.ResourceCPU: "300m", 75 v1.ResourceMemory: "300", 76 } 77 veryLargeRes = map[v1.ResourceName]string{ 78 v1.ResourceCPU: "500m", 79 v1.ResourceMemory: "500", 80 } 81 82 epochTime = metav1.NewTime(time.Unix(0, 0)) 83 epochTime1 = metav1.NewTime(time.Unix(0, 1)) 84 epochTime2 = metav1.NewTime(time.Unix(0, 2)) 85 epochTime3 = metav1.NewTime(time.Unix(0, 3)) 86 epochTime4 = metav1.NewTime(time.Unix(0, 4)) 87 epochTime5 = metav1.NewTime(time.Unix(0, 5)) 88 epochTime6 = metav1.NewTime(time.Unix(0, 6)) 89 ) 90 91 func getDefaultDefaultPreemptionArgs() *config.DefaultPreemptionArgs { 92 v1dpa := &kubeschedulerconfigv1.DefaultPreemptionArgs{} 93 configv1.SetDefaults_DefaultPreemptionArgs(v1dpa) 94 dpa := &config.DefaultPreemptionArgs{} 95 configv1.Convert_v1_DefaultPreemptionArgs_To_config_DefaultPreemptionArgs(v1dpa, dpa, nil) 96 return dpa 97 } 98 99 var nodeResourcesFitFunc = frameworkruntime.FactoryAdapter(feature.Features{}, noderesources.NewFit) 100 var podTopologySpreadFunc = frameworkruntime.FactoryAdapter(feature.Features{}, podtopologyspread.New) 101 102 // TestPlugin returns Error status when trying to `AddPod` or `RemovePod` on the nodes which have the {k,v} label pair defined on the nodes. 103 type TestPlugin struct { 104 name string 105 } 106 107 func newTestPlugin(_ context.Context, injArgs runtime.Object, f framework.Handle) (framework.Plugin, error) { 108 return &TestPlugin{name: "test-plugin"}, nil 109 } 110 111 func (pl *TestPlugin) AddPod(ctx context.Context, state *framework.CycleState, podToSchedule *v1.Pod, podInfoToAdd *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status { 112 if nodeInfo.Node().GetLabels()["error"] == "true" { 113 return framework.AsStatus(fmt.Errorf("failed to add pod: %v", podToSchedule.Name)) 114 } 115 return nil 116 } 117 118 func (pl *TestPlugin) RemovePod(ctx context.Context, state *framework.CycleState, podToSchedule *v1.Pod, podInfoToRemove *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status { 119 if nodeInfo.Node().GetLabels()["error"] == "true" { 120 return framework.AsStatus(fmt.Errorf("failed to remove pod: %v", podToSchedule.Name)) 121 } 122 return nil 123 } 124 125 func (pl *TestPlugin) Name() string { 126 return pl.name 127 } 128 129 func (pl *TestPlugin) PreFilterExtensions() framework.PreFilterExtensions { 130 return pl 131 } 132 133 func (pl *TestPlugin) PreFilter(ctx context.Context, state *framework.CycleState, p *v1.Pod) (*framework.PreFilterResult, *framework.Status) { 134 return nil, nil 135 } 136 137 func (pl *TestPlugin) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status { 138 return nil 139 } 140 141 func TestPostFilter(t *testing.T) { 142 onePodRes := map[v1.ResourceName]string{v1.ResourcePods: "1"} 143 nodeRes := map[v1.ResourceName]string{v1.ResourceCPU: "200m", v1.ResourceMemory: "400"} 144 tests := []struct { 145 name string 146 pod *v1.Pod 147 pods []*v1.Pod 148 nodes []*v1.Node 149 filteredNodesStatuses framework.NodeToStatusMap 150 extender framework.Extender 151 wantResult *framework.PostFilterResult 152 wantStatus *framework.Status 153 }{ 154 { 155 name: "pod with higher priority can be made schedulable", 156 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Obj(), 157 pods: []*v1.Pod{ 158 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Node("node1").Obj(), 159 }, 160 nodes: []*v1.Node{ 161 st.MakeNode().Name("node1").Capacity(onePodRes).Obj(), 162 }, 163 filteredNodesStatuses: framework.NodeToStatusMap{ 164 "node1": framework.NewStatus(framework.Unschedulable), 165 }, 166 wantResult: framework.NewPostFilterResultWithNominatedNode("node1"), 167 wantStatus: framework.NewStatus(framework.Success), 168 }, 169 { 170 name: "pod with tied priority is still unschedulable", 171 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Obj(), 172 pods: []*v1.Pod{ 173 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Node("node1").Obj(), 174 }, 175 nodes: []*v1.Node{ 176 st.MakeNode().Name("node1").Capacity(onePodRes).Obj(), 177 }, 178 filteredNodesStatuses: framework.NodeToStatusMap{ 179 "node1": framework.NewStatus(framework.Unschedulable), 180 }, 181 wantResult: framework.NewPostFilterResultWithNominatedNode(""), 182 wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/1 nodes are available: 1 No preemption victims found for incoming pod."), 183 }, 184 { 185 name: "preemption should respect filteredNodesStatuses", 186 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Obj(), 187 pods: []*v1.Pod{ 188 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Node("node1").Obj(), 189 }, 190 nodes: []*v1.Node{ 191 st.MakeNode().Name("node1").Capacity(onePodRes).Obj(), 192 }, 193 filteredNodesStatuses: framework.NodeToStatusMap{ 194 "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable), 195 }, 196 wantResult: framework.NewPostFilterResultWithNominatedNode(""), 197 wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling."), 198 }, 199 { 200 name: "pod can be made schedulable on one node", 201 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(midPriority).Obj(), 202 pods: []*v1.Pod{ 203 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Priority(highPriority).Node("node1").Obj(), 204 st.MakePod().Name("p2").UID("p2").Namespace(v1.NamespaceDefault).Priority(lowPriority).Node("node2").Obj(), 205 }, 206 nodes: []*v1.Node{ 207 st.MakeNode().Name("node1").Capacity(onePodRes).Obj(), 208 st.MakeNode().Name("node2").Capacity(onePodRes).Obj(), 209 }, 210 filteredNodesStatuses: framework.NodeToStatusMap{ 211 "node1": framework.NewStatus(framework.Unschedulable), 212 "node2": framework.NewStatus(framework.Unschedulable), 213 }, 214 wantResult: framework.NewPostFilterResultWithNominatedNode("node2"), 215 wantStatus: framework.NewStatus(framework.Success), 216 }, 217 { 218 name: "preemption result filtered out by extenders", 219 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Obj(), 220 pods: []*v1.Pod{ 221 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Node("node1").Obj(), 222 st.MakePod().Name("p2").UID("p2").Namespace(v1.NamespaceDefault).Node("node2").Obj(), 223 }, 224 nodes: []*v1.Node{ 225 st.MakeNode().Name("node1").Capacity(onePodRes).Obj(), 226 st.MakeNode().Name("node2").Capacity(onePodRes).Obj(), 227 }, 228 filteredNodesStatuses: framework.NodeToStatusMap{ 229 "node1": framework.NewStatus(framework.Unschedulable), 230 "node2": framework.NewStatus(framework.Unschedulable), 231 }, 232 extender: &tf.FakeExtender{ 233 ExtenderName: "FakeExtender1", 234 Predicates: []tf.FitPredicate{tf.Node1PredicateExtender}, 235 }, 236 wantResult: framework.NewPostFilterResultWithNominatedNode("node1"), 237 wantStatus: framework.NewStatus(framework.Success), 238 }, 239 { 240 name: "no candidate nodes found, no enough resource after removing low priority pods", 241 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(largeRes).Obj(), 242 pods: []*v1.Pod{ 243 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Node("node1").Obj(), 244 st.MakePod().Name("p2").UID("p2").Namespace(v1.NamespaceDefault).Node("node2").Obj(), 245 }, 246 nodes: []*v1.Node{ 247 st.MakeNode().Name("node1").Capacity(nodeRes).Obj(), // no enough CPU resource 248 st.MakeNode().Name("node2").Capacity(nodeRes).Obj(), // no enough CPU resource 249 }, 250 filteredNodesStatuses: framework.NodeToStatusMap{ 251 "node1": framework.NewStatus(framework.Unschedulable), 252 "node2": framework.NewStatus(framework.Unschedulable), 253 }, 254 wantResult: framework.NewPostFilterResultWithNominatedNode(""), 255 wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/2 nodes are available: 2 Insufficient cpu."), 256 }, 257 { 258 name: "no candidate nodes found with mixed reasons, no lower priority pod and no enough CPU resource", 259 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(largeRes).Obj(), 260 pods: []*v1.Pod{ 261 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Node("node1").Priority(highPriority).Obj(), 262 st.MakePod().Name("p2").UID("p2").Namespace(v1.NamespaceDefault).Node("node2").Obj(), 263 st.MakePod().Name("p3").UID("p3").Namespace(v1.NamespaceDefault).Node("node3").Priority(highPriority).Obj(), 264 }, 265 nodes: []*v1.Node{ 266 st.MakeNode().Name("node1").Capacity(onePodRes).Obj(), // no pod will be preempted 267 st.MakeNode().Name("node2").Capacity(nodeRes).Obj(), // no enough CPU resource 268 st.MakeNode().Name("node3").Capacity(onePodRes).Obj(), // no pod will be preempted 269 }, 270 filteredNodesStatuses: framework.NodeToStatusMap{ 271 "node1": framework.NewStatus(framework.Unschedulable), 272 "node2": framework.NewStatus(framework.Unschedulable), 273 "node3": framework.NewStatus(framework.Unschedulable), 274 }, 275 wantResult: framework.NewPostFilterResultWithNominatedNode(""), 276 wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/3 nodes are available: 1 Insufficient cpu, 2 No preemption victims found for incoming pod."), 277 }, 278 { 279 name: "no candidate nodes found with mixed reason, 2 UnschedulableAndUnresolvable nodes and 2 nodes don't have enough CPU resource", 280 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(largeRes).Obj(), 281 pods: []*v1.Pod{ 282 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Node("node1").Obj(), 283 st.MakePod().Name("p2").UID("p2").Namespace(v1.NamespaceDefault).Node("node2").Obj(), 284 }, 285 nodes: []*v1.Node{ 286 st.MakeNode().Name("node1").Capacity(nodeRes).Obj(), 287 st.MakeNode().Name("node2").Capacity(nodeRes).Obj(), 288 st.MakeNode().Name("node3").Capacity(nodeRes).Obj(), 289 st.MakeNode().Name("node4").Capacity(nodeRes).Obj(), 290 }, 291 filteredNodesStatuses: framework.NodeToStatusMap{ 292 "node3": framework.NewStatus(framework.UnschedulableAndUnresolvable), 293 "node4": framework.NewStatus(framework.UnschedulableAndUnresolvable), 294 }, 295 wantResult: framework.NewPostFilterResultWithNominatedNode(""), 296 wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/4 nodes are available: 2 Insufficient cpu, 2 Preemption is not helpful for scheduling."), 297 }, 298 { 299 name: "only one node but failed with TestPlugin", 300 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(largeRes).Obj(), 301 pods: []*v1.Pod{ 302 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Node("node1").Obj(), 303 }, 304 // label the node with key as "error" so that the TestPlugin will fail with error. 305 nodes: []*v1.Node{st.MakeNode().Name("node1").Capacity(largeRes).Label("error", "true").Obj()}, 306 filteredNodesStatuses: framework.NodeToStatusMap{"node1": framework.NewStatus(framework.Unschedulable)}, 307 wantResult: nil, 308 wantStatus: framework.AsStatus(errors.New("preemption: running RemovePod on PreFilter plugin \"test-plugin\": failed to remove pod: p")), 309 }, 310 { 311 name: "one failed with TestPlugin and the other pass", 312 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(largeRes).Obj(), 313 pods: []*v1.Pod{ 314 st.MakePod().Name("p1").UID("p1").Namespace(v1.NamespaceDefault).Node("node1").Obj(), 315 st.MakePod().Name("p2").UID("p2").Namespace(v1.NamespaceDefault).Node("node2").Req(mediumRes).Obj(), 316 }, 317 // even though node1 will fail with error but node2 will still be returned as a valid nominated node. 318 nodes: []*v1.Node{ 319 st.MakeNode().Name("node1").Capacity(largeRes).Label("error", "true").Obj(), 320 st.MakeNode().Name("node2").Capacity(largeRes).Obj(), 321 }, 322 filteredNodesStatuses: framework.NodeToStatusMap{ 323 "node1": framework.NewStatus(framework.Unschedulable), 324 "node2": framework.NewStatus(framework.Unschedulable), 325 }, 326 wantResult: framework.NewPostFilterResultWithNominatedNode("node2"), 327 wantStatus: framework.NewStatus(framework.Success), 328 }, 329 } 330 331 for _, tt := range tests { 332 t.Run(tt.name, func(t *testing.T) { 333 // index the potential victim pods in the fake client so that the victims deletion logic does not fail 334 podItems := []v1.Pod{} 335 for _, pod := range tt.pods { 336 podItems = append(podItems, *pod) 337 } 338 cs := clientsetfake.NewSimpleClientset(&v1.PodList{Items: podItems}) 339 informerFactory := informers.NewSharedInformerFactory(cs, 0) 340 podInformer := informerFactory.Core().V1().Pods().Informer() 341 podInformer.GetStore().Add(tt.pod) 342 for i := range tt.pods { 343 podInformer.GetStore().Add(tt.pods[i]) 344 } 345 // Register NodeResourceFit as the Filter & PreFilter plugin. 346 registeredPlugins := []tf.RegisterPluginFunc{ 347 tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), 348 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 349 tf.RegisterPluginAsExtensions("test-plugin", newTestPlugin, "PreFilter"), 350 tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), 351 } 352 var extenders []framework.Extender 353 if tt.extender != nil { 354 extenders = append(extenders, tt.extender) 355 } 356 logger, ctx := ktesting.NewTestContext(t) 357 ctx, cancel := context.WithCancel(ctx) 358 defer cancel() 359 f, err := tf.NewFramework(ctx, registeredPlugins, "", 360 frameworkruntime.WithClientSet(cs), 361 frameworkruntime.WithEventRecorder(&events.FakeRecorder{}), 362 frameworkruntime.WithInformerFactory(informerFactory), 363 frameworkruntime.WithPodNominator(internalqueue.NewPodNominator(informerFactory.Core().V1().Pods().Lister())), 364 frameworkruntime.WithExtenders(extenders), 365 frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.pods, tt.nodes)), 366 frameworkruntime.WithLogger(logger), 367 ) 368 if err != nil { 369 t.Fatal(err) 370 } 371 p := DefaultPreemption{ 372 fh: f, 373 podLister: informerFactory.Core().V1().Pods().Lister(), 374 pdbLister: getPDBLister(informerFactory), 375 args: *getDefaultDefaultPreemptionArgs(), 376 } 377 378 state := framework.NewCycleState() 379 // Ensure <state> is populated. 380 if _, status := f.RunPreFilterPlugins(ctx, state, tt.pod); !status.IsSuccess() { 381 t.Errorf("Unexpected PreFilter Status: %v", status) 382 } 383 384 gotResult, gotStatus := p.PostFilter(ctx, state, tt.pod, tt.filteredNodesStatuses) 385 // As we cannot compare two errors directly due to miss the equal method for how to compare two errors, so just need to compare the reasons. 386 if gotStatus.Code() == framework.Error { 387 if diff := cmp.Diff(tt.wantStatus.Reasons(), gotStatus.Reasons()); diff != "" { 388 t.Errorf("Unexpected status (-want, +got):\n%s", diff) 389 } 390 } else { 391 if diff := cmp.Diff(tt.wantStatus, gotStatus); diff != "" { 392 t.Errorf("Unexpected status (-want, +got):\n%s", diff) 393 } 394 } 395 if diff := cmp.Diff(tt.wantResult, gotResult); diff != "" { 396 t.Errorf("Unexpected postFilterResult (-want, +got):\n%s", diff) 397 } 398 }) 399 } 400 } 401 402 type candidate struct { 403 victims *extenderv1.Victims 404 name string 405 } 406 407 func TestDryRunPreemption(t *testing.T) { 408 tests := []struct { 409 name string 410 args *config.DefaultPreemptionArgs 411 nodeNames []string 412 testPods []*v1.Pod 413 initPods []*v1.Pod 414 registerPlugins []tf.RegisterPluginFunc 415 pdbs []*policy.PodDisruptionBudget 416 fakeFilterRC framework.Code // return code for fake filter plugin 417 disableParallelism bool 418 expected [][]candidate 419 expectedNumFilterCalled []int32 420 }{ 421 { 422 name: "a pod that does not fit on any node", 423 registerPlugins: []tf.RegisterPluginFunc{ 424 tf.RegisterFilterPlugin("FalseFilter", tf.NewFalseFilterPlugin), 425 }, 426 nodeNames: []string{"node1", "node2"}, 427 testPods: []*v1.Pod{ 428 st.MakePod().Name("p").UID("p").Priority(highPriority).Obj(), 429 }, 430 initPods: []*v1.Pod{ 431 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Obj(), 432 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Obj(), 433 }, 434 expected: [][]candidate{{}}, 435 expectedNumFilterCalled: []int32{2}, 436 }, 437 { 438 name: "a pod that fits with no preemption", 439 registerPlugins: []tf.RegisterPluginFunc{ 440 tf.RegisterFilterPlugin("TrueFilter", tf.NewTrueFilterPlugin), 441 }, 442 nodeNames: []string{"node1", "node2"}, 443 testPods: []*v1.Pod{ 444 st.MakePod().Name("p").UID("p").Priority(highPriority).Obj(), 445 }, 446 initPods: []*v1.Pod{ 447 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Obj(), 448 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Obj(), 449 }, 450 expected: [][]candidate{{}}, 451 fakeFilterRC: framework.Unschedulable, 452 expectedNumFilterCalled: []int32{2}, 453 }, 454 { 455 name: "a pod that fits on one node with no preemption", 456 registerPlugins: []tf.RegisterPluginFunc{ 457 tf.RegisterFilterPlugin("MatchFilter", tf.NewMatchFilterPlugin), 458 }, 459 nodeNames: []string{"node1", "node2"}, 460 testPods: []*v1.Pod{ 461 // Name the pod as "node1" to fit "MatchFilter" plugin. 462 st.MakePod().Name("node1").UID("node1").Priority(highPriority).Obj(), 463 }, 464 initPods: []*v1.Pod{ 465 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Obj(), 466 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Obj(), 467 }, 468 expected: [][]candidate{{}}, 469 fakeFilterRC: framework.Unschedulable, 470 expectedNumFilterCalled: []int32{2}, 471 }, 472 { 473 name: "a pod that fits on both nodes when lower priority pods are preempted", 474 registerPlugins: []tf.RegisterPluginFunc{ 475 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 476 }, 477 nodeNames: []string{"node1", "node2"}, 478 testPods: []*v1.Pod{ 479 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), 480 }, 481 initPods: []*v1.Pod{ 482 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj(), 483 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj(), 484 }, 485 expected: [][]candidate{ 486 { 487 candidate{ 488 victims: &extenderv1.Victims{ 489 Pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj()}, 490 }, 491 name: "node1", 492 }, 493 candidate{ 494 victims: &extenderv1.Victims{ 495 Pods: []*v1.Pod{st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj()}, 496 }, 497 name: "node2", 498 }, 499 }, 500 }, 501 expectedNumFilterCalled: []int32{4}, 502 }, 503 { 504 name: "a pod that would fit on the nodes, but other pods running are higher priority, no preemption would happen", 505 registerPlugins: []tf.RegisterPluginFunc{ 506 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 507 }, 508 nodeNames: []string{"node1", "node2"}, 509 testPods: []*v1.Pod{ 510 st.MakePod().Name("p").UID("p").Priority(lowPriority).Req(largeRes).Obj(), 511 }, 512 initPods: []*v1.Pod{ 513 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj(), 514 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj(), 515 }, 516 expected: [][]candidate{{}}, 517 expectedNumFilterCalled: []int32{0}, 518 }, 519 { 520 name: "medium priority pod is preempted, but lower priority one stays as it is small", 521 registerPlugins: []tf.RegisterPluginFunc{ 522 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 523 }, 524 nodeNames: []string{"node1", "node2"}, 525 testPods: []*v1.Pod{ 526 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), 527 }, 528 initPods: []*v1.Pod{ 529 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 530 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(midPriority).Req(largeRes).Obj(), 531 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj(), 532 }, 533 expected: [][]candidate{ 534 { 535 candidate{ 536 victims: &extenderv1.Victims{ 537 Pods: []*v1.Pod{st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(midPriority).Req(largeRes).Obj()}, 538 }, 539 name: "node1", 540 }, 541 candidate{ 542 victims: &extenderv1.Victims{ 543 Pods: []*v1.Pod{st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj()}, 544 }, 545 name: "node2", 546 }, 547 }, 548 }, 549 expectedNumFilterCalled: []int32{5}, 550 }, 551 { 552 name: "mixed priority pods are preempted", 553 registerPlugins: []tf.RegisterPluginFunc{ 554 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 555 }, 556 nodeNames: []string{"node1", "node2"}, 557 testPods: []*v1.Pod{ 558 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), 559 }, 560 initPods: []*v1.Pod{ 561 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(midPriority).Req(smallRes).Obj(), 562 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 563 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Priority(midPriority).Req(mediumRes).Obj(), 564 st.MakePod().Name("p1.4").UID("p1.4").Node("node1").Priority(highPriority).Req(smallRes).Obj(), 565 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(highPriority).Req(largeRes).Obj(), 566 }, 567 expected: [][]candidate{ 568 { 569 candidate{ 570 victims: &extenderv1.Victims{ 571 Pods: []*v1.Pod{ 572 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 573 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Priority(midPriority).Req(mediumRes).Obj(), 574 }, 575 }, 576 name: "node1", 577 }, 578 }, 579 }, 580 expectedNumFilterCalled: []int32{4}, 581 }, 582 { 583 name: "mixed priority pods are preempted, pick later StartTime one when priorities are equal", 584 registerPlugins: []tf.RegisterPluginFunc{ 585 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 586 }, 587 nodeNames: []string{"node1", "node2"}, 588 testPods: []*v1.Pod{ 589 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), 590 }, 591 initPods: []*v1.Pod{ 592 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(lowPriority).Req(smallRes).StartTime(epochTime5).Obj(), 593 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(lowPriority).Req(smallRes).StartTime(epochTime4).Obj(), 594 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Priority(midPriority).Req(mediumRes).StartTime(epochTime3).Obj(), 595 st.MakePod().Name("p1.4").UID("p1.4").Node("node1").Priority(highPriority).Req(smallRes).StartTime(epochTime2).Obj(), 596 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(highPriority).Req(largeRes).StartTime(epochTime1).Obj(), 597 }, 598 expected: [][]candidate{ 599 { 600 candidate{ 601 victims: &extenderv1.Victims{ 602 Pods: []*v1.Pod{ 603 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(lowPriority).Req(smallRes).StartTime(epochTime5).Obj(), 604 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Priority(midPriority).Req(mediumRes).StartTime(epochTime3).Obj(), 605 }, 606 }, 607 name: "node1", 608 }, 609 }, 610 }, 611 expectedNumFilterCalled: []int32{4}, // no preemption would happen on node2 and no filter call is counted. 612 }, 613 { 614 name: "pod with anti-affinity is preempted", 615 registerPlugins: []tf.RegisterPluginFunc{ 616 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 617 tf.RegisterPluginAsExtensions(interpodaffinity.Name, interpodaffinity.New, "Filter", "PreFilter"), 618 }, 619 nodeNames: []string{"node1", "node2"}, 620 testPods: []*v1.Pod{ 621 st.MakePod().Name("p").UID("p").Label("foo", "").Priority(highPriority).Req(smallRes).Obj(), 622 }, 623 initPods: []*v1.Pod{ 624 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("foo", "").Priority(lowPriority).Req(smallRes). 625 PodAntiAffinityExists("foo", "hostname", st.PodAntiAffinityWithRequiredReq).Obj(), 626 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(midPriority).Req(smallRes).Obj(), 627 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Priority(highPriority).Req(smallRes).Obj(), 628 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(highPriority).Req(smallRes).Obj(), 629 }, 630 expected: [][]candidate{ 631 { 632 candidate{ 633 victims: &extenderv1.Victims{ 634 Pods: []*v1.Pod{ 635 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("foo", "").Priority(lowPriority).Req(smallRes). 636 PodAntiAffinityExists("foo", "hostname", st.PodAntiAffinityWithRequiredReq).Obj(), 637 }, 638 }, 639 name: "node1", 640 }, 641 }, 642 }, 643 expectedNumFilterCalled: []int32{3}, // no preemption would happen on node2 and no filter call is counted. 644 }, 645 { 646 name: "preemption to resolve pod topology spread filter failure", 647 registerPlugins: []tf.RegisterPluginFunc{ 648 tf.RegisterPluginAsExtensions(podtopologyspread.Name, podTopologySpreadFunc, "PreFilter", "Filter"), 649 }, 650 nodeNames: []string{"node-a/zone1", "node-b/zone1", "node-x/zone2"}, 651 testPods: []*v1.Pod{ 652 st.MakePod().Name("p").UID("p").Label("foo", "").Priority(highPriority). 653 SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("foo").Obj(), nil, nil, nil, nil). 654 SpreadConstraint(1, "hostname", v1.DoNotSchedule, st.MakeLabelSelector().Exists("foo").Obj(), nil, nil, nil, nil). 655 Obj(), 656 }, 657 initPods: []*v1.Pod{ 658 st.MakePod().Name("pod-a1").UID("pod-a1").Node("node-a").Label("foo", "").Priority(midPriority).Obj(), 659 st.MakePod().Name("pod-a2").UID("pod-a2").Node("node-a").Label("foo", "").Priority(lowPriority).Obj(), 660 st.MakePod().Name("pod-b1").UID("pod-b1").Node("node-b").Label("foo", "").Priority(lowPriority).Obj(), 661 st.MakePod().Name("pod-x1").UID("pod-x1").Node("node-x").Label("foo", "").Priority(highPriority).Obj(), 662 st.MakePod().Name("pod-x2").UID("pod-x2").Node("node-x").Label("foo", "").Priority(highPriority).Obj(), 663 }, 664 expected: [][]candidate{ 665 { 666 candidate{ 667 victims: &extenderv1.Victims{ 668 Pods: []*v1.Pod{st.MakePod().Name("pod-a2").UID("pod-a2").Node("node-a").Label("foo", "").Priority(lowPriority).Obj()}, 669 }, 670 name: "node-a", 671 }, 672 candidate{ 673 victims: &extenderv1.Victims{ 674 Pods: []*v1.Pod{st.MakePod().Name("pod-b1").UID("pod-b1").Node("node-b").Label("foo", "").Priority(lowPriority).Obj()}, 675 }, 676 name: "node-b", 677 }, 678 }, 679 }, 680 expectedNumFilterCalled: []int32{5}, // node-a (3), node-b (2), node-x (0) 681 }, 682 { 683 name: "get Unschedulable in the preemption phase when the filter plugins filtering the nodes", 684 registerPlugins: []tf.RegisterPluginFunc{ 685 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 686 }, 687 nodeNames: []string{"node1", "node2"}, 688 testPods: []*v1.Pod{ 689 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), 690 }, 691 initPods: []*v1.Pod{ 692 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj(), 693 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj(), 694 }, 695 fakeFilterRC: framework.Unschedulable, 696 expected: [][]candidate{{}}, 697 expectedNumFilterCalled: []int32{2}, 698 }, 699 { 700 name: "preemption with violation of same pdb", 701 registerPlugins: []tf.RegisterPluginFunc{ 702 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 703 }, 704 nodeNames: []string{"node1"}, 705 testPods: []*v1.Pod{ 706 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 707 }, 708 initPods: []*v1.Pod{ 709 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 710 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 711 }, 712 pdbs: []*policy.PodDisruptionBudget{ 713 { 714 Spec: policy.PodDisruptionBudgetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}}, 715 Status: policy.PodDisruptionBudgetStatus{DisruptionsAllowed: 1}, 716 }, 717 }, 718 expected: [][]candidate{ 719 { 720 candidate{ 721 victims: &extenderv1.Victims{ 722 Pods: []*v1.Pod{ 723 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 724 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 725 }, 726 NumPDBViolations: 1, 727 }, 728 name: "node1", 729 }, 730 }, 731 }, 732 expectedNumFilterCalled: []int32{3}, 733 }, 734 { 735 name: "preemption with violation of the pdb with pod whose eviction was processed, the victim doesn't belong to DisruptedPods", 736 registerPlugins: []tf.RegisterPluginFunc{ 737 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 738 }, 739 nodeNames: []string{"node1"}, 740 testPods: []*v1.Pod{ 741 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 742 }, 743 initPods: []*v1.Pod{ 744 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 745 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 746 }, 747 pdbs: []*policy.PodDisruptionBudget{ 748 { 749 Spec: policy.PodDisruptionBudgetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}}, 750 Status: policy.PodDisruptionBudgetStatus{DisruptionsAllowed: 1, DisruptedPods: map[string]metav1.Time{"p2": {Time: time.Now()}}}, 751 }, 752 }, 753 expected: [][]candidate{ 754 { 755 candidate{ 756 victims: &extenderv1.Victims{ 757 Pods: []*v1.Pod{ 758 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 759 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 760 }, 761 NumPDBViolations: 1, 762 }, 763 name: "node1", 764 }, 765 }, 766 }, 767 expectedNumFilterCalled: []int32{3}, 768 }, 769 { 770 name: "preemption with violation of the pdb with pod whose eviction was processed, the victim belongs to DisruptedPods", 771 registerPlugins: []tf.RegisterPluginFunc{ 772 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 773 }, 774 nodeNames: []string{"node1"}, 775 testPods: []*v1.Pod{ 776 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 777 }, 778 initPods: []*v1.Pod{ 779 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 780 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 781 }, 782 pdbs: []*policy.PodDisruptionBudget{ 783 { 784 Spec: policy.PodDisruptionBudgetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}}, 785 Status: policy.PodDisruptionBudgetStatus{DisruptionsAllowed: 1, DisruptedPods: map[string]metav1.Time{"p1.2": {Time: time.Now()}}}, 786 }, 787 }, 788 expected: [][]candidate{ 789 { 790 candidate{ 791 victims: &extenderv1.Victims{ 792 Pods: []*v1.Pod{ 793 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 794 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 795 }, 796 NumPDBViolations: 0, 797 }, 798 name: "node1", 799 }, 800 }, 801 }, 802 expectedNumFilterCalled: []int32{3}, 803 }, 804 { 805 name: "preemption with violation of the pdb with pod whose eviction was processed, the victim which belongs to DisruptedPods is treated as 'nonViolating'", 806 registerPlugins: []tf.RegisterPluginFunc{ 807 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 808 }, 809 nodeNames: []string{"node1"}, 810 testPods: []*v1.Pod{ 811 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 812 }, 813 initPods: []*v1.Pod{ 814 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 815 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 816 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 817 }, 818 pdbs: []*policy.PodDisruptionBudget{ 819 { 820 Spec: policy.PodDisruptionBudgetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}}, 821 Status: policy.PodDisruptionBudgetStatus{DisruptionsAllowed: 1, DisruptedPods: map[string]metav1.Time{"p1.3": {Time: time.Now()}}}, 822 }, 823 }, 824 expected: [][]candidate{ 825 { 826 candidate{ 827 victims: &extenderv1.Victims{ 828 Pods: []*v1.Pod{ 829 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 830 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 831 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Label("app", "foo").Priority(midPriority).Req(mediumRes).Obj(), 832 }, 833 NumPDBViolations: 1, 834 }, 835 name: "node1", 836 }, 837 }, 838 }, 839 expectedNumFilterCalled: []int32{4}, 840 }, 841 { 842 name: "all nodes are possible candidates, but DefaultPreemptionArgs limits to 2", 843 args: &config.DefaultPreemptionArgs{MinCandidateNodesPercentage: 40, MinCandidateNodesAbsolute: 1}, 844 registerPlugins: []tf.RegisterPluginFunc{ 845 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 846 }, 847 nodeNames: []string{"node1", "node2", "node3", "node4", "node5"}, 848 testPods: []*v1.Pod{ 849 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), 850 }, 851 initPods: []*v1.Pod{ 852 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj(), 853 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj(), 854 st.MakePod().Name("p3").UID("p3").Node("node3").Priority(midPriority).Req(largeRes).Obj(), 855 st.MakePod().Name("p4").UID("p4").Node("node4").Priority(midPriority).Req(largeRes).Obj(), 856 st.MakePod().Name("p5").UID("p5").Node("node5").Priority(midPriority).Req(largeRes).Obj(), 857 }, 858 disableParallelism: true, 859 expected: [][]candidate{ 860 { 861 // cycle=0 => offset=4 => node5 (yes), node1 (yes) 862 candidate{ 863 name: "node1", 864 victims: &extenderv1.Victims{ 865 Pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj()}, 866 }, 867 }, 868 candidate{ 869 name: "node5", 870 victims: &extenderv1.Victims{ 871 Pods: []*v1.Pod{st.MakePod().Name("p5").UID("p5").Node("node5").Priority(midPriority).Req(largeRes).Obj()}, 872 }, 873 }, 874 }, 875 }, 876 expectedNumFilterCalled: []int32{4}, 877 }, 878 { 879 name: "some nodes are not possible candidates, DefaultPreemptionArgs limits to 2", 880 args: &config.DefaultPreemptionArgs{MinCandidateNodesPercentage: 40, MinCandidateNodesAbsolute: 1}, 881 registerPlugins: []tf.RegisterPluginFunc{ 882 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 883 }, 884 nodeNames: []string{"node1", "node2", "node3", "node4", "node5"}, 885 testPods: []*v1.Pod{ 886 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), 887 }, 888 initPods: []*v1.Pod{ 889 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj(), 890 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(veryHighPriority).Req(largeRes).Obj(), 891 st.MakePod().Name("p3").UID("p3").Node("node3").Priority(midPriority).Req(largeRes).Obj(), 892 st.MakePod().Name("p4").UID("p4").Node("node4").Priority(midPriority).Req(largeRes).Obj(), 893 st.MakePod().Name("p5").UID("p5").Node("node5").Priority(veryHighPriority).Req(largeRes).Obj(), 894 }, 895 disableParallelism: true, 896 expected: [][]candidate{ 897 { 898 // cycle=0 => offset=4 => node5 (no), node1 (yes), node2 (no), node3 (yes) 899 candidate{ 900 name: "node1", 901 victims: &extenderv1.Victims{ 902 Pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj()}, 903 }, 904 }, 905 candidate{ 906 name: "node3", 907 victims: &extenderv1.Victims{ 908 Pods: []*v1.Pod{st.MakePod().Name("p3").UID("p3").Node("node3").Priority(midPriority).Req(largeRes).Obj()}, 909 }, 910 }, 911 }, 912 }, 913 expectedNumFilterCalled: []int32{4}, 914 }, 915 { 916 name: "preemption offset across multiple scheduling cycles and wrap around", 917 args: &config.DefaultPreemptionArgs{MinCandidateNodesPercentage: 40, MinCandidateNodesAbsolute: 1}, 918 registerPlugins: []tf.RegisterPluginFunc{ 919 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 920 }, 921 nodeNames: []string{"node1", "node2", "node3", "node4", "node5"}, 922 testPods: []*v1.Pod{ 923 st.MakePod().Name("tp1").UID("tp1").Priority(highPriority).Req(largeRes).Obj(), 924 st.MakePod().Name("tp2").UID("tp2").Priority(highPriority).Req(largeRes).Obj(), 925 st.MakePod().Name("tp3").UID("tp3").Priority(highPriority).Req(largeRes).Obj(), 926 }, 927 initPods: []*v1.Pod{ 928 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj(), 929 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj(), 930 st.MakePod().Name("p3").UID("p3").Node("node3").Priority(midPriority).Req(largeRes).Obj(), 931 st.MakePod().Name("p4").UID("p4").Node("node4").Priority(midPriority).Req(largeRes).Obj(), 932 st.MakePod().Name("p5").UID("p5").Node("node5").Priority(midPriority).Req(largeRes).Obj(), 933 }, 934 disableParallelism: true, 935 expected: [][]candidate{ 936 { 937 // cycle=0 => offset=4 => node5 (yes), node1 (yes) 938 candidate{ 939 name: "node1", 940 victims: &extenderv1.Victims{ 941 Pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).Obj()}, 942 }, 943 }, 944 candidate{ 945 name: "node5", 946 victims: &extenderv1.Victims{ 947 Pods: []*v1.Pod{st.MakePod().Name("p5").UID("p5").Node("node5").Priority(midPriority).Req(largeRes).Obj()}, 948 }, 949 }, 950 }, 951 { 952 // cycle=1 => offset=1 => node2 (yes), node3 (yes) 953 candidate{ 954 name: "node2", 955 victims: &extenderv1.Victims{ 956 Pods: []*v1.Pod{st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj()}, 957 }, 958 }, 959 candidate{ 960 name: "node3", 961 victims: &extenderv1.Victims{ 962 Pods: []*v1.Pod{st.MakePod().Name("p3").UID("p3").Node("node3").Priority(midPriority).Req(largeRes).Obj()}, 963 }, 964 }, 965 }, 966 { 967 // cycle=2 => offset=3 => node4 (yes), node5 (yes) 968 candidate{ 969 name: "node4", 970 victims: &extenderv1.Victims{ 971 Pods: []*v1.Pod{st.MakePod().Name("p4").UID("p4").Node("node4").Priority(midPriority).Req(largeRes).Obj()}, 972 }, 973 }, 974 candidate{ 975 name: "node5", 976 victims: &extenderv1.Victims{ 977 Pods: []*v1.Pod{st.MakePod().Name("p5").UID("p5").Node("node5").Priority(midPriority).Req(largeRes).Obj()}, 978 }, 979 }, 980 }, 981 }, 982 expectedNumFilterCalled: []int32{4, 4, 4}, 983 }, 984 { 985 name: "preemption looks past numCandidates until a non-PDB violating node is found", 986 args: &config.DefaultPreemptionArgs{MinCandidateNodesPercentage: 40, MinCandidateNodesAbsolute: 2}, 987 registerPlugins: []tf.RegisterPluginFunc{ 988 tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 989 }, 990 nodeNames: []string{"node1", "node2", "node3", "node4", "node5"}, 991 testPods: []*v1.Pod{ 992 st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), 993 }, 994 initPods: []*v1.Pod{ 995 st.MakePod().Name("p1").UID("p1").Node("node1").Label("app", "foo").Priority(midPriority).Req(largeRes).Obj(), 996 st.MakePod().Name("p2").UID("p2").Node("node2").Label("app", "foo").Priority(midPriority).Req(largeRes).Obj(), 997 st.MakePod().Name("p3").UID("p3").Node("node3").Priority(midPriority).Req(largeRes).Obj(), 998 st.MakePod().Name("p4").UID("p4").Node("node4").Priority(midPriority).Req(largeRes).Obj(), 999 st.MakePod().Name("p5").UID("p5").Node("node5").Label("app", "foo").Priority(midPriority).Req(largeRes).Obj(), 1000 }, 1001 pdbs: []*policy.PodDisruptionBudget{ 1002 { 1003 Spec: policy.PodDisruptionBudgetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}}, 1004 Status: policy.PodDisruptionBudgetStatus{DisruptionsAllowed: 0}, 1005 }, 1006 }, 1007 disableParallelism: true, 1008 expected: [][]candidate{ 1009 { 1010 // Even though the DefaultPreemptionArgs constraints suggest that the 1011 // minimum number of candidates is 2, we get three candidates here 1012 // because we're okay with being a little over (in production, if a 1013 // non-PDB violating candidate isn't found close to the offset, the 1014 // number of additional candidates returned will be at most 1015 // approximately equal to the parallelism in dryRunPreemption). 1016 // cycle=0 => offset=4 => node5 (yes, pdb), node1 (yes, pdb), node2 (no, pdb), node3 (yes) 1017 candidate{ 1018 name: "node1", 1019 victims: &extenderv1.Victims{ 1020 Pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Node("node1").Label("app", "foo").Priority(midPriority).Req(largeRes).Obj()}, 1021 NumPDBViolations: 1, 1022 }, 1023 }, 1024 candidate{ 1025 name: "node3", 1026 victims: &extenderv1.Victims{ 1027 Pods: []*v1.Pod{st.MakePod().Name("p3").UID("p3").Node("node3").Priority(midPriority).Req(largeRes).Obj()}, 1028 }, 1029 }, 1030 candidate{ 1031 name: "node5", 1032 victims: &extenderv1.Victims{ 1033 Pods: []*v1.Pod{st.MakePod().Name("p5").UID("p5").Node("node5").Label("app", "foo").Priority(midPriority).Req(largeRes).Obj()}, 1034 NumPDBViolations: 1, 1035 }, 1036 }, 1037 }, 1038 }, 1039 expectedNumFilterCalled: []int32{8}, 1040 }, 1041 } 1042 1043 labelKeys := []string{"hostname", "zone", "region"} 1044 for _, tt := range tests { 1045 t.Run(tt.name, func(t *testing.T) { 1046 nodes := make([]*v1.Node, len(tt.nodeNames)) 1047 fakeFilterRCMap := make(map[string]framework.Code, len(tt.nodeNames)) 1048 for i, nodeName := range tt.nodeNames { 1049 nodeWrapper := st.MakeNode().Capacity(veryLargeRes) 1050 // Split node name by '/' to form labels in a format of 1051 // {"hostname": tpKeys[0], "zone": tpKeys[1], "region": tpKeys[2]} 1052 tpKeys := strings.Split(nodeName, "/") 1053 nodeWrapper.Name(tpKeys[0]) 1054 for i, labelVal := range strings.Split(nodeName, "/") { 1055 nodeWrapper.Label(labelKeys[i], labelVal) 1056 } 1057 nodes[i] = nodeWrapper.Obj() 1058 fakeFilterRCMap[nodeName] = tt.fakeFilterRC 1059 } 1060 snapshot := internalcache.NewSnapshot(tt.initPods, nodes) 1061 1062 // For each test, register a FakeFilterPlugin along with essential plugins and tt.registerPlugins. 1063 fakePlugin := tf.FakeFilterPlugin{ 1064 FailedNodeReturnCodeMap: fakeFilterRCMap, 1065 } 1066 registeredPlugins := append([]tf.RegisterPluginFunc{ 1067 tf.RegisterFilterPlugin( 1068 "FakeFilter", 1069 func(_ context.Context, _ runtime.Object, fh framework.Handle) (framework.Plugin, error) { 1070 return &fakePlugin, nil 1071 }, 1072 )}, 1073 tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), 1074 tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), 1075 ) 1076 registeredPlugins = append(registeredPlugins, tt.registerPlugins...) 1077 var objs []runtime.Object 1078 for _, p := range append(tt.testPods, tt.initPods...) { 1079 objs = append(objs, p) 1080 } 1081 for _, n := range nodes { 1082 objs = append(objs, n) 1083 } 1084 informerFactory := informers.NewSharedInformerFactory(clientsetfake.NewSimpleClientset(objs...), 0) 1085 parallelism := parallelize.DefaultParallelism 1086 if tt.disableParallelism { 1087 // We need disableParallelism because of the non-deterministic nature 1088 // of the results of tests that set custom minCandidateNodesPercentage 1089 // or minCandidateNodesAbsolute. This is only done in a handful of tests. 1090 parallelism = 1 1091 } 1092 1093 logger, ctx := ktesting.NewTestContext(t) 1094 ctx, cancel := context.WithCancel(ctx) 1095 defer cancel() 1096 fwk, err := tf.NewFramework( 1097 ctx, 1098 registeredPlugins, "", 1099 frameworkruntime.WithPodNominator(internalqueue.NewPodNominator(informerFactory.Core().V1().Pods().Lister())), 1100 frameworkruntime.WithSnapshotSharedLister(snapshot), 1101 frameworkruntime.WithInformerFactory(informerFactory), 1102 frameworkruntime.WithParallelism(parallelism), 1103 frameworkruntime.WithLogger(logger), 1104 ) 1105 if err != nil { 1106 t.Fatal(err) 1107 } 1108 1109 informerFactory.Start(ctx.Done()) 1110 informerFactory.WaitForCacheSync(ctx.Done()) 1111 1112 nodeInfos, err := snapshot.NodeInfos().List() 1113 if err != nil { 1114 t.Fatal(err) 1115 } 1116 sort.Slice(nodeInfos, func(i, j int) bool { 1117 return nodeInfos[i].Node().Name < nodeInfos[j].Node().Name 1118 }) 1119 1120 if tt.args == nil { 1121 tt.args = getDefaultDefaultPreemptionArgs() 1122 } 1123 pl := &DefaultPreemption{ 1124 fh: fwk, 1125 podLister: informerFactory.Core().V1().Pods().Lister(), 1126 pdbLister: getPDBLister(informerFactory), 1127 args: *tt.args, 1128 } 1129 1130 // Using 4 as a seed source to test getOffsetAndNumCandidates() deterministically. 1131 // However, we need to do it after informerFactory.WaitforCacheSync() which might 1132 // set a seed. 1133 rand.Seed(4) 1134 var prevNumFilterCalled int32 1135 for cycle, pod := range tt.testPods { 1136 state := framework.NewCycleState() 1137 // Some tests rely on PreFilter plugin to compute its CycleState. 1138 if _, status := fwk.RunPreFilterPlugins(ctx, state, pod); !status.IsSuccess() { 1139 t.Errorf("cycle %d: Unexpected PreFilter Status: %v", cycle, status) 1140 } 1141 pe := preemption.Evaluator{ 1142 PluginName: names.DefaultPreemption, 1143 Handler: pl.fh, 1144 PodLister: pl.podLister, 1145 PdbLister: pl.pdbLister, 1146 State: state, 1147 Interface: pl, 1148 } 1149 offset, numCandidates := pl.GetOffsetAndNumCandidates(int32(len(nodeInfos))) 1150 got, _, _ := pe.DryRunPreemption(ctx, pod, nodeInfos, tt.pdbs, offset, numCandidates) 1151 // Sort the values (inner victims) and the candidate itself (by its NominatedNodeName). 1152 for i := range got { 1153 victims := got[i].Victims().Pods 1154 sort.Slice(victims, func(i, j int) bool { 1155 return victims[i].Name < victims[j].Name 1156 }) 1157 } 1158 sort.Slice(got, func(i, j int) bool { 1159 return got[i].Name() < got[j].Name() 1160 }) 1161 candidates := []candidate{} 1162 for i := range got { 1163 candidates = append(candidates, candidate{victims: got[i].Victims(), name: got[i].Name()}) 1164 } 1165 if fakePlugin.NumFilterCalled-prevNumFilterCalled != tt.expectedNumFilterCalled[cycle] { 1166 t.Errorf("cycle %d: got NumFilterCalled=%d, want %d", cycle, fakePlugin.NumFilterCalled-prevNumFilterCalled, tt.expectedNumFilterCalled[cycle]) 1167 } 1168 prevNumFilterCalled = fakePlugin.NumFilterCalled 1169 if diff := cmp.Diff(tt.expected[cycle], candidates, cmp.AllowUnexported(candidate{})); diff != "" { 1170 t.Errorf("cycle %d: unexpected candidates (-want, +got): %s", cycle, diff) 1171 } 1172 } 1173 }) 1174 } 1175 } 1176 1177 func TestSelectBestCandidate(t *testing.T) { 1178 tests := []struct { 1179 name string 1180 registerPlugin tf.RegisterPluginFunc 1181 nodeNames []string 1182 pod *v1.Pod 1183 pods []*v1.Pod 1184 expected []string // any of the items is valid 1185 }{ 1186 { 1187 name: "a pod that fits on both nodes when lower priority pods are preempted", 1188 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1189 nodeNames: []string{"node1", "node2"}, 1190 pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), 1191 pods: []*v1.Pod{ 1192 st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).StartTime(epochTime).Obj(), 1193 st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).StartTime(epochTime).Obj(), 1194 }, 1195 expected: []string{"node1", "node2"}, 1196 }, 1197 { 1198 name: "node with min highest priority pod is picked", 1199 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1200 nodeNames: []string{"node1", "node2", "node3"}, 1201 pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 1202 pods: []*v1.Pod{ 1203 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(midPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1204 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(midPriority).Req(largeRes).StartTime(epochTime).Obj(), 1205 st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(midPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1206 st.MakePod().Name("p2.2").UID("p2.2").Node("node2").Priority(lowPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1207 st.MakePod().Name("p3.1").UID("p3.1").Node("node3").Priority(lowPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1208 st.MakePod().Name("p3.2").UID("p3.2").Node("node3").Priority(lowPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1209 }, 1210 expected: []string{"node3"}, 1211 }, 1212 { 1213 name: "when highest priorities are the same, minimum sum of priorities is picked", 1214 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1215 nodeNames: []string{"node1", "node2", "node3"}, 1216 pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 1217 pods: []*v1.Pod{ 1218 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(midPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1219 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(midPriority).Req(largeRes).StartTime(epochTime).Obj(), 1220 st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(midPriority).Req(largeRes).StartTime(epochTime).Obj(), 1221 st.MakePod().Name("p2.2").UID("p2.2").Node("node2").Priority(lowPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1222 st.MakePod().Name("p3.1").UID("p3.1").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1223 st.MakePod().Name("p3.2").UID("p3.2").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1224 }, 1225 expected: []string{"node2"}, 1226 }, 1227 { 1228 name: "when highest priority and sum are the same, minimum number of pods is picked", 1229 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1230 nodeNames: []string{"node1", "node2", "node3"}, 1231 pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 1232 pods: []*v1.Pod{ 1233 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(midPriority).Req(smallRes).StartTime(epochTime).Obj(), 1234 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(negPriority).Req(smallRes).StartTime(epochTime).Obj(), 1235 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Priority(midPriority).Req(smallRes).StartTime(epochTime).Obj(), 1236 st.MakePod().Name("p1.4").UID("p1.4").Node("node1").Priority(negPriority).Req(smallRes).StartTime(epochTime).Obj(), 1237 st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(midPriority).Req(largeRes).StartTime(epochTime).Obj(), 1238 st.MakePod().Name("p2.2").UID("p2.2").Node("node2").Priority(negPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1239 st.MakePod().Name("p3.1").UID("p3.1").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1240 st.MakePod().Name("p3.2").UID("p3.2").Node("node3").Priority(negPriority).Req(smallRes).StartTime(epochTime).Obj(), 1241 st.MakePod().Name("p3.3").UID("p3.3").Node("node3").Priority(lowPriority).Req(smallRes).StartTime(epochTime).Obj(), 1242 }, 1243 expected: []string{"node2"}, 1244 }, 1245 { 1246 // pickOneNodeForPreemption adjusts pod priorities when finding the sum of the victims. This 1247 // test ensures that the logic works correctly. 1248 name: "sum of adjusted priorities is considered", 1249 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1250 nodeNames: []string{"node1", "node2", "node3"}, 1251 pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 1252 pods: []*v1.Pod{ 1253 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(midPriority).Req(smallRes).StartTime(epochTime).Obj(), 1254 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(negPriority).Req(smallRes).StartTime(epochTime).Obj(), 1255 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Priority(negPriority).Req(smallRes).StartTime(epochTime).Obj(), 1256 st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(midPriority).Req(largeRes).StartTime(epochTime).Obj(), 1257 st.MakePod().Name("p2.2").UID("p2.2").Node("node2").Priority(negPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1258 st.MakePod().Name("p3.1").UID("p3.1").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1259 st.MakePod().Name("p3.2").UID("p3.2").Node("node3").Priority(negPriority).Req(smallRes).StartTime(epochTime).Obj(), 1260 st.MakePod().Name("p3.3").UID("p3.3").Node("node3").Priority(lowPriority).Req(smallRes).StartTime(epochTime).Obj(), 1261 }, 1262 expected: []string{"node2"}, 1263 }, 1264 { 1265 name: "non-overlapping lowest high priority, sum priorities, and number of pods", 1266 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1267 nodeNames: []string{"node1", "node2", "node3", "node4"}, 1268 pod: st.MakePod().Name("p").UID("p").Priority(veryHighPriority).Req(veryLargeRes).Obj(), 1269 pods: []*v1.Pod{ 1270 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(midPriority).Req(smallRes).StartTime(epochTime).Obj(), 1271 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(lowPriority).Req(smallRes).StartTime(epochTime).Obj(), 1272 st.MakePod().Name("p1.3").UID("p1.3").Node("node1").Priority(lowPriority).Req(smallRes).StartTime(epochTime).Obj(), 1273 st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(highPriority).Req(largeRes).StartTime(epochTime).Obj(), 1274 st.MakePod().Name("p3.1").UID("p3.1").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1275 st.MakePod().Name("p3.2").UID("p3.2").Node("node3").Priority(lowPriority).Req(smallRes).StartTime(epochTime).Obj(), 1276 st.MakePod().Name("p3.3").UID("p3.3").Node("node3").Priority(lowPriority).Req(smallRes).StartTime(epochTime).Obj(), 1277 st.MakePod().Name("p3.4").UID("p3.4").Node("node3").Priority(lowPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1278 st.MakePod().Name("p4.1").UID("p4.1").Node("node4").Priority(midPriority).Req(mediumRes).StartTime(epochTime).Obj(), 1279 st.MakePod().Name("p4.2").UID("p4.2").Node("node4").Priority(midPriority).Req(smallRes).StartTime(epochTime).Obj(), 1280 st.MakePod().Name("p4.3").UID("p4.3").Node("node4").Priority(midPriority).Req(smallRes).StartTime(epochTime).Obj(), 1281 st.MakePod().Name("p4.4").UID("p4.4").Node("node4").Priority(negPriority).Req(smallRes).StartTime(epochTime).Obj(), 1282 }, 1283 expected: []string{"node1"}, 1284 }, 1285 { 1286 name: "same priority, same number of victims, different start time for each node's pod", 1287 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1288 nodeNames: []string{"node1", "node2", "node3"}, 1289 pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 1290 pods: []*v1.Pod{ 1291 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(midPriority).Req(mediumRes).StartTime(epochTime2).Obj(), 1292 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(midPriority).Req(mediumRes).StartTime(epochTime2).Obj(), 1293 st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(midPriority).Req(mediumRes).StartTime(epochTime3).Obj(), 1294 st.MakePod().Name("p2.2").UID("p2.2").Node("node2").Priority(midPriority).Req(mediumRes).StartTime(epochTime3).Obj(), 1295 st.MakePod().Name("p3.1").UID("p3.1").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime1).Obj(), 1296 st.MakePod().Name("p3.2").UID("p3.2").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime1).Obj(), 1297 }, 1298 expected: []string{"node2"}, 1299 }, 1300 { 1301 name: "same priority, same number of victims, different start time for all pods", 1302 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1303 nodeNames: []string{"node1", "node2", "node3"}, 1304 pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 1305 pods: []*v1.Pod{ 1306 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(midPriority).Req(mediumRes).StartTime(epochTime4).Obj(), 1307 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(midPriority).Req(mediumRes).StartTime(epochTime2).Obj(), 1308 st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(midPriority).Req(mediumRes).StartTime(epochTime5).Obj(), 1309 st.MakePod().Name("p2.2").UID("p2.2").Node("node2").Priority(midPriority).Req(mediumRes).StartTime(epochTime1).Obj(), 1310 st.MakePod().Name("p3.1").UID("p3.1").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime3).Obj(), 1311 st.MakePod().Name("p3.2").UID("p3.2").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime6).Obj(), 1312 }, 1313 expected: []string{"node3"}, 1314 }, 1315 { 1316 name: "different priority, same number of victims, different start time for all pods", 1317 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1318 nodeNames: []string{"node1", "node2", "node3"}, 1319 pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).Obj(), 1320 pods: []*v1.Pod{ 1321 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(lowPriority).Req(mediumRes).StartTime(epochTime4).Obj(), 1322 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(midPriority).Req(mediumRes).StartTime(epochTime2).Obj(), 1323 st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(midPriority).Req(mediumRes).StartTime(epochTime6).Obj(), 1324 st.MakePod().Name("p2.2").UID("p2.2").Node("node2").Priority(lowPriority).Req(mediumRes).StartTime(epochTime1).Obj(), 1325 st.MakePod().Name("p3.1").UID("p3.1").Node("node3").Priority(lowPriority).Req(mediumRes).StartTime(epochTime3).Obj(), 1326 st.MakePod().Name("p3.2").UID("p3.2").Node("node3").Priority(midPriority).Req(mediumRes).StartTime(epochTime5).Obj(), 1327 }, 1328 expected: []string{"node2"}, 1329 }, 1330 } 1331 for _, tt := range tests { 1332 t.Run(tt.name, func(t *testing.T) { 1333 rand.Seed(4) 1334 nodes := make([]*v1.Node, len(tt.nodeNames)) 1335 for i, nodeName := range tt.nodeNames { 1336 nodes[i] = st.MakeNode().Name(nodeName).Capacity(veryLargeRes).Obj() 1337 } 1338 1339 var objs []runtime.Object 1340 objs = append(objs, tt.pod) 1341 for _, pod := range tt.pods { 1342 objs = append(objs, pod) 1343 } 1344 cs := clientsetfake.NewSimpleClientset(objs...) 1345 informerFactory := informers.NewSharedInformerFactory(cs, 0) 1346 snapshot := internalcache.NewSnapshot(tt.pods, nodes) 1347 logger, ctx := ktesting.NewTestContext(t) 1348 ctx, cancel := context.WithCancel(ctx) 1349 defer cancel() 1350 fwk, err := tf.NewFramework( 1351 ctx, 1352 []tf.RegisterPluginFunc{ 1353 tt.registerPlugin, 1354 tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), 1355 tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), 1356 }, 1357 "", 1358 frameworkruntime.WithPodNominator(internalqueue.NewPodNominator(informerFactory.Core().V1().Pods().Lister())), 1359 frameworkruntime.WithSnapshotSharedLister(snapshot), 1360 frameworkruntime.WithLogger(logger), 1361 ) 1362 if err != nil { 1363 t.Fatal(err) 1364 } 1365 1366 state := framework.NewCycleState() 1367 // Some tests rely on PreFilter plugin to compute its CycleState. 1368 if _, status := fwk.RunPreFilterPlugins(ctx, state, tt.pod); !status.IsSuccess() { 1369 t.Errorf("Unexpected PreFilter Status: %v", status) 1370 } 1371 nodeInfos, err := snapshot.NodeInfos().List() 1372 if err != nil { 1373 t.Fatal(err) 1374 } 1375 1376 pl := &DefaultPreemption{ 1377 fh: fwk, 1378 podLister: informerFactory.Core().V1().Pods().Lister(), 1379 pdbLister: getPDBLister(informerFactory), 1380 args: *getDefaultDefaultPreemptionArgs(), 1381 } 1382 pe := preemption.Evaluator{ 1383 PluginName: names.DefaultPreemption, 1384 Handler: pl.fh, 1385 PodLister: pl.podLister, 1386 PdbLister: pl.pdbLister, 1387 State: state, 1388 Interface: pl, 1389 } 1390 offset, numCandidates := pl.GetOffsetAndNumCandidates(int32(len(nodeInfos))) 1391 candidates, _, _ := pe.DryRunPreemption(ctx, tt.pod, nodeInfos, nil, offset, numCandidates) 1392 s := pe.SelectCandidate(ctx, candidates) 1393 if s == nil || len(s.Name()) == 0 { 1394 return 1395 } 1396 found := false 1397 for _, nodeName := range tt.expected { 1398 if nodeName == s.Name() { 1399 found = true 1400 break 1401 } 1402 } 1403 if !found { 1404 t.Errorf("expect any node in %v, but got %v", tt.expected, s.Name()) 1405 } 1406 }) 1407 } 1408 } 1409 1410 func TestPodEligibleToPreemptOthers(t *testing.T) { 1411 tests := []struct { 1412 name string 1413 fts feature.Features 1414 pod *v1.Pod 1415 pods []*v1.Pod 1416 nodes []string 1417 nominatedNodeStatus *framework.Status 1418 expected bool 1419 }{ 1420 { 1421 name: "Pod with nominated node", 1422 pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), 1423 pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()}, 1424 nodes: []string{"node1"}, 1425 nominatedNodeStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, tainttoleration.ErrReasonNotMatch), 1426 expected: true, 1427 }, 1428 { 1429 name: "Pod with nominated node, but without nominated node status", 1430 pod: st.MakePod().Name("p_without_status").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), 1431 pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()}, 1432 nodes: []string{"node1"}, 1433 nominatedNodeStatus: nil, 1434 expected: false, 1435 }, 1436 { 1437 name: "Pod without nominated node", 1438 pod: st.MakePod().Name("p_without_nominated_node").UID("p").Priority(highPriority).Obj(), 1439 pods: []*v1.Pod{}, 1440 nodes: []string{}, 1441 nominatedNodeStatus: nil, 1442 expected: true, 1443 }, 1444 { 1445 name: "Pod with 'PreemptNever' preemption policy", 1446 pod: st.MakePod().Name("p_with_preempt_never_policy").UID("p").Priority(highPriority).PreemptionPolicy(v1.PreemptNever).Obj(), 1447 pods: []*v1.Pod{}, 1448 nodes: []string{}, 1449 nominatedNodeStatus: nil, 1450 expected: false, 1451 }, 1452 { 1453 name: "victim Pods terminating, feature PodDisruptionConditions is enabled", 1454 fts: feature.Features{EnablePodDisruptionConditions: true}, 1455 pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), 1456 pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating(). 1457 Condition(v1.DisruptionTarget, v1.ConditionTrue, v1.PodReasonPreemptionByScheduler).Obj()}, 1458 nodes: []string{"node1"}, 1459 expected: false, 1460 }, 1461 { 1462 name: "non-victim Pods terminating, feature PodDisruptionConditions is enabled", 1463 fts: feature.Features{EnablePodDisruptionConditions: true}, 1464 pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), 1465 pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()}, 1466 nodes: []string{"node1"}, 1467 expected: true, 1468 }, 1469 { 1470 name: "victim Pods terminating, feature PodDisruptionConditions is disabled", 1471 fts: feature.Features{EnablePodDisruptionConditions: false}, 1472 pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), 1473 pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating(). 1474 Condition(v1.DisruptionTarget, v1.ConditionTrue, v1.PodReasonPreemptionByScheduler).Obj()}, 1475 nodes: []string{"node1"}, 1476 expected: false, 1477 }, 1478 { 1479 name: "non-victim Pods terminating, feature PodDisruptionConditions is disabled", 1480 fts: feature.Features{EnablePodDisruptionConditions: false}, 1481 pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), 1482 pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()}, 1483 nodes: []string{"node1"}, 1484 expected: false, 1485 }, 1486 } 1487 1488 for _, test := range tests { 1489 t.Run(test.name, func(t *testing.T) { 1490 logger, ctx := ktesting.NewTestContext(t) 1491 ctx, cancel := context.WithCancel(ctx) 1492 defer cancel() 1493 var nodes []*v1.Node 1494 for _, n := range test.nodes { 1495 nodes = append(nodes, st.MakeNode().Name(n).Obj()) 1496 } 1497 registeredPlugins := []tf.RegisterPluginFunc{ 1498 tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), 1499 tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), 1500 } 1501 f, err := tf.NewFramework(ctx, registeredPlugins, "", 1502 frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(test.pods, nodes)), 1503 frameworkruntime.WithLogger(logger), 1504 ) 1505 if err != nil { 1506 t.Fatal(err) 1507 } 1508 pl := DefaultPreemption{fh: f, fts: test.fts} 1509 if got, _ := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected { 1510 t.Errorf("expected %t, got %t for pod: %s", test.expected, got, test.pod.Name) 1511 } 1512 }) 1513 } 1514 } 1515 func TestPreempt(t *testing.T) { 1516 tests := []struct { 1517 name string 1518 pod *v1.Pod 1519 pods []*v1.Pod 1520 extenders []*tf.FakeExtender 1521 nodeNames []string 1522 registerPlugin tf.RegisterPluginFunc 1523 want *framework.PostFilterResult 1524 expectedPods []string // list of preempted pods 1525 }{ 1526 { 1527 name: "basic preemption logic", 1528 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(veryLargeRes).PreemptionPolicy(v1.PreemptLowerPriority).Obj(), 1529 pods: []*v1.Pod{ 1530 st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1531 st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1532 st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(highPriority).Req(largeRes).Obj(), 1533 st.MakePod().Name("p3.1").UID("p3.1").Node("node3").Priority(midPriority).Req(mediumRes).Obj(), 1534 }, 1535 nodeNames: []string{"node1", "node2", "node3"}, 1536 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1537 want: framework.NewPostFilterResultWithNominatedNode("node1"), 1538 expectedPods: []string{"p1.1", "p1.2"}, 1539 }, 1540 { 1541 name: "preemption for topology spread constraints", 1542 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Label("foo", "").Priority(highPriority). 1543 SpreadConstraint(1, "zone", v1.DoNotSchedule, st.MakeLabelSelector().Exists("foo").Obj(), nil, nil, nil, nil). 1544 SpreadConstraint(1, "hostname", v1.DoNotSchedule, st.MakeLabelSelector().Exists("foo").Obj(), nil, nil, nil, nil). 1545 Obj(), 1546 pods: []*v1.Pod{ 1547 st.MakePod().Name("p-a1").UID("p-a1").Namespace(v1.NamespaceDefault).Node("node-a").Label("foo", "").Priority(highPriority).Obj(), 1548 st.MakePod().Name("p-a2").UID("p-a2").Namespace(v1.NamespaceDefault).Node("node-a").Label("foo", "").Priority(highPriority).Obj(), 1549 st.MakePod().Name("p-b1").UID("p-b1").Namespace(v1.NamespaceDefault).Node("node-b").Label("foo", "").Priority(lowPriority).Obj(), 1550 st.MakePod().Name("p-x1").UID("p-x1").Namespace(v1.NamespaceDefault).Node("node-x").Label("foo", "").Priority(highPriority).Obj(), 1551 st.MakePod().Name("p-x2").UID("p-x2").Namespace(v1.NamespaceDefault).Node("node-x").Label("foo", "").Priority(highPriority).Obj(), 1552 }, 1553 nodeNames: []string{"node-a/zone1", "node-b/zone1", "node-x/zone2"}, 1554 registerPlugin: tf.RegisterPluginAsExtensions(podtopologyspread.Name, podTopologySpreadFunc, "PreFilter", "Filter"), 1555 want: framework.NewPostFilterResultWithNominatedNode("node-b"), 1556 expectedPods: []string{"p-b1"}, 1557 }, 1558 { 1559 name: "Scheduler extenders allow only node1, otherwise node3 would have been chosen", 1560 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(veryLargeRes).PreemptionPolicy(v1.PreemptLowerPriority).Obj(), 1561 pods: []*v1.Pod{ 1562 st.MakePod().Name("p1.1").UID("p1.1").Namespace(v1.NamespaceDefault).Node("node1").Priority(midPriority).Req(smallRes).Obj(), 1563 st.MakePod().Name("p1.2").UID("p1.2").Namespace(v1.NamespaceDefault).Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1564 st.MakePod().Name("p2.1").UID("p2.1").Namespace(v1.NamespaceDefault).Node("node3").Priority(midPriority).Req(largeRes).Obj(), 1565 }, 1566 nodeNames: []string{"node1", "node2", "node3"}, 1567 extenders: []*tf.FakeExtender{ 1568 { 1569 ExtenderName: "FakeExtender1", 1570 Predicates: []tf.FitPredicate{tf.TruePredicateExtender}, 1571 }, 1572 { 1573 ExtenderName: "FakeExtender2", 1574 Predicates: []tf.FitPredicate{tf.Node1PredicateExtender}, 1575 }, 1576 }, 1577 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1578 want: framework.NewPostFilterResultWithNominatedNode("node1"), 1579 expectedPods: []string{"p1.1", "p1.2"}, 1580 }, 1581 { 1582 name: "Scheduler extenders do not allow any preemption", 1583 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(veryLargeRes).PreemptionPolicy(v1.PreemptLowerPriority).Obj(), 1584 pods: []*v1.Pod{ 1585 st.MakePod().Name("p1.1").UID("p1.1").Namespace(v1.NamespaceDefault).Node("node1").Priority(midPriority).Req(smallRes).Obj(), 1586 st.MakePod().Name("p1.2").UID("p1.2").Namespace(v1.NamespaceDefault).Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1587 st.MakePod().Name("p2.1").UID("p2.1").Namespace(v1.NamespaceDefault).Node("node2").Priority(midPriority).Req(largeRes).Obj(), 1588 }, 1589 nodeNames: []string{"node1", "node2", "node3"}, 1590 extenders: []*tf.FakeExtender{ 1591 { 1592 ExtenderName: "FakeExtender1", 1593 Predicates: []tf.FitPredicate{tf.FalsePredicateExtender}, 1594 }, 1595 }, 1596 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1597 want: nil, 1598 expectedPods: []string{}, 1599 }, 1600 { 1601 name: "One scheduler extender allows only node1, the other returns error but ignorable. Only node1 would be chosen", 1602 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(veryLargeRes).PreemptionPolicy(v1.PreemptLowerPriority).Obj(), 1603 pods: []*v1.Pod{ 1604 st.MakePod().Name("p1.1").UID("p1.1").Namespace(v1.NamespaceDefault).Node("node1").Priority(midPriority).Req(smallRes).Obj(), 1605 st.MakePod().Name("p1.2").UID("p1.2").Namespace(v1.NamespaceDefault).Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1606 st.MakePod().Name("p2.1").UID("p2.1").Namespace(v1.NamespaceDefault).Node("node2").Priority(midPriority).Req(largeRes).Obj(), 1607 }, 1608 nodeNames: []string{"node1", "node2", "node3"}, 1609 extenders: []*tf.FakeExtender{ 1610 { 1611 Predicates: []tf.FitPredicate{tf.ErrorPredicateExtender}, 1612 Ignorable: true, 1613 ExtenderName: "FakeExtender1", 1614 }, 1615 { 1616 Predicates: []tf.FitPredicate{tf.Node1PredicateExtender}, 1617 ExtenderName: "FakeExtender2", 1618 }, 1619 }, 1620 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1621 want: framework.NewPostFilterResultWithNominatedNode("node1"), 1622 expectedPods: []string{"p1.1", "p1.2"}, 1623 }, 1624 { 1625 name: "One scheduler extender allows only node1, but it is not interested in given pod, otherwise node1 would have been chosen", 1626 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(veryLargeRes).PreemptionPolicy(v1.PreemptLowerPriority).Obj(), 1627 pods: []*v1.Pod{ 1628 st.MakePod().Name("p1.1").UID("p1.1").Namespace(v1.NamespaceDefault).Node("node1").Priority(midPriority).Req(smallRes).Obj(), 1629 st.MakePod().Name("p1.2").UID("p1.2").Namespace(v1.NamespaceDefault).Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1630 st.MakePod().Name("p2.1").UID("p2.1").Namespace(v1.NamespaceDefault).Node("node2").Priority(midPriority).Req(largeRes).Obj(), 1631 }, 1632 nodeNames: []string{"node1", "node2"}, 1633 extenders: []*tf.FakeExtender{ 1634 { 1635 ExtenderName: "FakeExtender1", 1636 Predicates: []tf.FitPredicate{tf.Node1PredicateExtender}, 1637 UnInterested: true, 1638 }, 1639 { 1640 ExtenderName: "FakeExtender2", 1641 Predicates: []tf.FitPredicate{tf.TruePredicateExtender}, 1642 }, 1643 }, 1644 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1645 // sum of priorities of all victims on node1 is larger than node2, node2 is chosen. 1646 want: framework.NewPostFilterResultWithNominatedNode("node2"), 1647 expectedPods: []string{"p2.1"}, 1648 }, 1649 { 1650 name: "no preempting in pod", 1651 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(veryLargeRes).PreemptionPolicy(v1.PreemptNever).Obj(), 1652 pods: []*v1.Pod{ 1653 st.MakePod().Name("p1.1").UID("p1.1").Namespace(v1.NamespaceDefault).Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1654 st.MakePod().Name("p1.2").UID("p1.2").Namespace(v1.NamespaceDefault).Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1655 st.MakePod().Name("p2.1").UID("p2.1").Namespace(v1.NamespaceDefault).Node("node2").Priority(highPriority).Req(largeRes).Obj(), 1656 st.MakePod().Name("p3.1").UID("p3.1").Namespace(v1.NamespaceDefault).Node("node3").Priority(midPriority).Req(mediumRes).Obj(), 1657 }, 1658 nodeNames: []string{"node1", "node2", "node3"}, 1659 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1660 want: nil, 1661 expectedPods: nil, 1662 }, 1663 { 1664 name: "PreemptionPolicy is nil", 1665 pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Req(veryLargeRes).Obj(), 1666 pods: []*v1.Pod{ 1667 st.MakePod().Name("p1.1").UID("p1.1").Namespace(v1.NamespaceDefault).Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1668 st.MakePod().Name("p1.2").UID("p1.2").Namespace(v1.NamespaceDefault).Node("node1").Priority(lowPriority).Req(smallRes).Obj(), 1669 st.MakePod().Name("p2.1").UID("p2.1").Namespace(v1.NamespaceDefault).Node("node2").Priority(highPriority).Req(largeRes).Obj(), 1670 st.MakePod().Name("p3.1").UID("p3.1").Namespace(v1.NamespaceDefault).Node("node3").Priority(midPriority).Req(mediumRes).Obj(), 1671 }, 1672 nodeNames: []string{"node1", "node2", "node3"}, 1673 registerPlugin: tf.RegisterPluginAsExtensions(noderesources.Name, nodeResourcesFitFunc, "Filter", "PreFilter"), 1674 want: framework.NewPostFilterResultWithNominatedNode("node1"), 1675 expectedPods: []string{"p1.1", "p1.2"}, 1676 }, 1677 } 1678 1679 labelKeys := []string{"hostname", "zone", "region"} 1680 for _, test := range tests { 1681 t.Run(test.name, func(t *testing.T) { 1682 client := clientsetfake.NewSimpleClientset() 1683 informerFactory := informers.NewSharedInformerFactory(client, 0) 1684 podInformer := informerFactory.Core().V1().Pods().Informer() 1685 podInformer.GetStore().Add(test.pod) 1686 for i := range test.pods { 1687 podInformer.GetStore().Add(test.pods[i]) 1688 } 1689 1690 deletedPodNames := sets.New[string]() 1691 patchedPodNames := sets.New[string]() 1692 client.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) { 1693 patchedPodNames.Insert(action.(clienttesting.PatchAction).GetName()) 1694 return true, nil, nil 1695 }) 1696 client.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) { 1697 deletedPodNames.Insert(action.(clienttesting.DeleteAction).GetName()) 1698 return true, nil, nil 1699 }) 1700 1701 logger, ctx := ktesting.NewTestContext(t) 1702 ctx, cancel := context.WithCancel(ctx) 1703 defer cancel() 1704 1705 cache := internalcache.New(ctx, time.Duration(0)) 1706 for _, pod := range test.pods { 1707 cache.AddPod(logger, pod) 1708 } 1709 cachedNodeInfoMap := map[string]*framework.NodeInfo{} 1710 nodes := make([]*v1.Node, len(test.nodeNames)) 1711 for i, name := range test.nodeNames { 1712 node := st.MakeNode().Name(name).Capacity(veryLargeRes).Obj() 1713 // Split node name by '/' to form labels in a format of 1714 // {"hostname": node.Name[0], "zone": node.Name[1], "region": node.Name[2]} 1715 node.ObjectMeta.Labels = make(map[string]string) 1716 for i, label := range strings.Split(node.Name, "/") { 1717 node.ObjectMeta.Labels[labelKeys[i]] = label 1718 } 1719 node.Name = node.ObjectMeta.Labels["hostname"] 1720 cache.AddNode(logger, node) 1721 nodes[i] = node 1722 1723 // Set nodeInfo to extenders to mock extenders' cache for preemption. 1724 cachedNodeInfo := framework.NewNodeInfo() 1725 cachedNodeInfo.SetNode(node) 1726 cachedNodeInfoMap[node.Name] = cachedNodeInfo 1727 } 1728 var extenders []framework.Extender 1729 for _, extender := range test.extenders { 1730 // Set nodeInfoMap as extenders cached node information. 1731 extender.CachedNodeNameToInfo = cachedNodeInfoMap 1732 extenders = append(extenders, extender) 1733 } 1734 fwk, err := tf.NewFramework( 1735 ctx, 1736 []tf.RegisterPluginFunc{ 1737 test.registerPlugin, 1738 tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), 1739 tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), 1740 }, 1741 "", 1742 frameworkruntime.WithClientSet(client), 1743 frameworkruntime.WithEventRecorder(&events.FakeRecorder{}), 1744 frameworkruntime.WithExtenders(extenders), 1745 frameworkruntime.WithPodNominator(internalqueue.NewPodNominator(informerFactory.Core().V1().Pods().Lister())), 1746 frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(test.pods, nodes)), 1747 frameworkruntime.WithInformerFactory(informerFactory), 1748 frameworkruntime.WithLogger(logger), 1749 ) 1750 if err != nil { 1751 t.Fatal(err) 1752 } 1753 1754 state := framework.NewCycleState() 1755 // Some tests rely on PreFilter plugin to compute its CycleState. 1756 if _, s := fwk.RunPreFilterPlugins(ctx, state, test.pod); !s.IsSuccess() { 1757 t.Errorf("Unexpected preFilterStatus: %v", s) 1758 } 1759 // Call preempt and check the expected results. 1760 pl := DefaultPreemption{ 1761 fh: fwk, 1762 podLister: informerFactory.Core().V1().Pods().Lister(), 1763 pdbLister: getPDBLister(informerFactory), 1764 args: *getDefaultDefaultPreemptionArgs(), 1765 } 1766 1767 pe := preemption.Evaluator{ 1768 PluginName: names.DefaultPreemption, 1769 Handler: pl.fh, 1770 PodLister: pl.podLister, 1771 PdbLister: pl.pdbLister, 1772 State: state, 1773 Interface: &pl, 1774 } 1775 res, status := pe.Preempt(ctx, test.pod, make(framework.NodeToStatusMap)) 1776 if !status.IsSuccess() && !status.IsRejected() { 1777 t.Errorf("unexpected error in preemption: %v", status.AsError()) 1778 } 1779 if diff := cmp.Diff(test.want, res); diff != "" { 1780 t.Errorf("Unexpected status (-want, +got):\n%s", diff) 1781 } 1782 if len(deletedPodNames) != len(test.expectedPods) { 1783 t.Errorf("expected %v pods, got %v.", len(test.expectedPods), len(deletedPodNames)) 1784 } 1785 if diff := cmp.Diff(sets.List(patchedPodNames), sets.List(deletedPodNames)); diff != "" { 1786 t.Errorf("unexpected difference in the set of patched and deleted pods: %s", diff) 1787 } 1788 for victimName := range deletedPodNames { 1789 found := false 1790 for _, expPod := range test.expectedPods { 1791 if expPod == victimName { 1792 found = true 1793 break 1794 } 1795 } 1796 if !found { 1797 t.Errorf("pod %v is not expected to be a victim.", victimName) 1798 } 1799 } 1800 if res != nil && res.NominatingInfo != nil { 1801 test.pod.Status.NominatedNodeName = res.NominatedNodeName 1802 } 1803 1804 // Manually set the deleted Pods' deletionTimestamp to non-nil. 1805 for _, pod := range test.pods { 1806 if deletedPodNames.Has(pod.Name) { 1807 now := metav1.Now() 1808 pod.DeletionTimestamp = &now 1809 deletedPodNames.Delete(pod.Name) 1810 } 1811 } 1812 1813 // Call preempt again and make sure it doesn't preempt any more pods. 1814 res, status = pe.Preempt(ctx, test.pod, make(framework.NodeToStatusMap)) 1815 if !status.IsSuccess() && !status.IsRejected() { 1816 t.Errorf("unexpected error in preemption: %v", status.AsError()) 1817 } 1818 if res != nil && res.NominatingInfo != nil && len(deletedPodNames) > 0 { 1819 t.Errorf("didn't expect any more preemption. Node %v is selected for preemption.", res.NominatedNodeName) 1820 } 1821 }) 1822 } 1823 }