k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/integration/scheduler/queue_test.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package scheduler 18 19 import ( 20 "context" 21 "fmt" 22 "testing" 23 "time" 24 25 v1 "k8s.io/api/core/v1" 26 apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" 27 apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" 28 "k8s.io/apimachinery/pkg/api/errors" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 31 "k8s.io/apimachinery/pkg/runtime" 32 "k8s.io/apimachinery/pkg/runtime/schema" 33 "k8s.io/apimachinery/pkg/types" 34 "k8s.io/apimachinery/pkg/util/sets" 35 "k8s.io/apimachinery/pkg/util/uuid" 36 "k8s.io/apimachinery/pkg/util/wait" 37 utilfeature "k8s.io/apiserver/pkg/util/feature" 38 "k8s.io/client-go/dynamic" 39 "k8s.io/client-go/kubernetes" 40 featuregatetesting "k8s.io/component-base/featuregate/testing" 41 "k8s.io/klog/v2" 42 configv1 "k8s.io/kube-scheduler/config/v1" 43 apiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing" 44 "k8s.io/kubernetes/pkg/features" 45 "k8s.io/kubernetes/pkg/scheduler" 46 configtesting "k8s.io/kubernetes/pkg/scheduler/apis/config/testing" 47 "k8s.io/kubernetes/pkg/scheduler/framework" 48 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder" 49 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/names" 50 frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" 51 st "k8s.io/kubernetes/pkg/scheduler/testing" 52 testfwk "k8s.io/kubernetes/test/integration/framework" 53 testutils "k8s.io/kubernetes/test/integration/util" 54 imageutils "k8s.io/kubernetes/test/utils/image" 55 "k8s.io/utils/pointer" 56 ) 57 58 func TestSchedulingGates(t *testing.T) { 59 tests := []struct { 60 name string 61 pods []*v1.Pod 62 schedule []string 63 delete []string 64 rmGates []string 65 }{ 66 { 67 name: "regular pods", 68 pods: []*v1.Pod{ 69 st.MakePod().Name("p1").Container("pause").Obj(), 70 st.MakePod().Name("p2").Container("pause").Obj(), 71 }, 72 schedule: []string{"p1", "p2"}, 73 }, 74 { 75 name: "one pod carrying scheduling gates", 76 pods: []*v1.Pod{ 77 st.MakePod().Name("p1").SchedulingGates([]string{"foo"}).Container("pause").Obj(), 78 st.MakePod().Name("p2").Container("pause").Obj(), 79 }, 80 schedule: []string{"p2"}, 81 }, 82 { 83 name: "two pod carrying scheduling gates, and remove gates of one pod", 84 pods: []*v1.Pod{ 85 st.MakePod().Name("p1").SchedulingGates([]string{"foo"}).Container("pause").Obj(), 86 st.MakePod().Name("p2").SchedulingGates([]string{"bar"}).Container("pause").Obj(), 87 st.MakePod().Name("p3").Container("pause").Obj(), 88 }, 89 schedule: []string{"p3"}, 90 rmGates: []string{"p2"}, 91 }, 92 { 93 name: "gated pod schedulable after deleting the scheduled pod and removing gate", 94 pods: []*v1.Pod{ 95 st.MakePod().Name("p1").SchedulingGates([]string{"foo"}).Container("pause").Obj(), 96 st.MakePod().Name("p2").Container("pause").Obj(), 97 }, 98 schedule: []string{"p2"}, 99 delete: []string{"p2"}, 100 rmGates: []string{"p1"}, 101 }, 102 } 103 104 for _, tt := range tests { 105 t.Run(tt.name, func(t *testing.T) { 106 // Use zero backoff seconds to bypass backoffQ. 107 // It's intended to not start the scheduler's queue, and hence to 108 // not start any flushing logic. We will pop and schedule the Pods manually later. 109 testCtx := testutils.InitTestSchedulerWithOptions( 110 t, 111 testutils.InitTestAPIServer(t, "pod-scheduling-gates", nil), 112 0, 113 scheduler.WithPodInitialBackoffSeconds(0), 114 scheduler.WithPodMaxBackoffSeconds(0), 115 ) 116 testutils.SyncSchedulerInformerFactory(testCtx) 117 118 cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx 119 120 // Create node, so we can schedule pods. 121 node := st.MakeNode().Name("node").Obj() 122 if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil { 123 t.Fatal("Failed to create node") 124 125 } 126 127 // Create pods. 128 for _, p := range tt.pods { 129 p.Namespace = ns 130 if _, err := cs.CoreV1().Pods(ns).Create(ctx, p, metav1.CreateOptions{}); err != nil { 131 t.Fatalf("Failed to create Pod %q: %v", p.Name, err) 132 } 133 } 134 135 // Wait for the pods to be present in the scheduling queue. 136 if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) { 137 pendingPods, _ := testCtx.Scheduler.SchedulingQueue.PendingPods() 138 return len(pendingPods) == len(tt.pods), nil 139 }); err != nil { 140 t.Fatal(err) 141 } 142 143 // Schedule pods. 144 for _, podName := range tt.schedule { 145 testCtx.Scheduler.ScheduleOne(testCtx.Ctx) 146 if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, ns, podName)); err != nil { 147 t.Fatalf("Failed to schedule %s", podName) 148 } 149 } 150 151 // Delete pods, which triggers AssignedPodDelete event in the scheduling queue. 152 for _, podName := range tt.delete { 153 if err := cs.CoreV1().Pods(ns).Delete(ctx, podName, metav1.DeleteOptions{}); err != nil { 154 t.Fatalf("Error calling Delete on %s", podName) 155 } 156 if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, testutils.PodDeleted(ctx, cs, ns, podName)); err != nil { 157 t.Fatalf("Failed to delete %s", podName) 158 } 159 } 160 161 // Ensure gated pods are not in ActiveQ 162 if len(testCtx.Scheduler.SchedulingQueue.PodsInActiveQ()) > 0 { 163 t.Fatal("Expected no schedulable pods") 164 } 165 166 // Remove scheduling gates from the pod spec. 167 for _, podName := range tt.rmGates { 168 patch := `{"spec": {"schedulingGates": null}}` 169 if _, err := cs.CoreV1().Pods(ns).Patch(ctx, podName, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{}); err != nil { 170 t.Fatalf("Failed to patch pod %v: %v", podName, err) 171 } 172 } 173 174 // Schedule pods which no longer have gates. 175 for _, podName := range tt.rmGates { 176 testCtx.Scheduler.ScheduleOne(testCtx.Ctx) 177 if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, ns, podName)); err != nil { 178 t.Fatalf("Failed to schedule %s", podName) 179 } 180 } 181 }) 182 } 183 } 184 185 // TestCoreResourceEnqueue verify Pods failed by in-tree default plugins can be 186 // moved properly upon their registered events. 187 func TestCoreResourceEnqueue(t *testing.T) { 188 tests := []struct { 189 name string 190 // initialNode is the Node to be created at first. 191 initialNode *v1.Node 192 // initialPod is the Pod to be created at first if it's not empty. 193 initialPod *v1.Pod 194 // pods are the list of Pods to be created. 195 // All of them are expected to be unschedulable at first. 196 pods []*v1.Pod 197 // triggerFn is the function that triggers the event to move Pods. 198 triggerFn func(testCtx *testutils.TestContext) error 199 // wantRequeuedPods is the map of Pods that are expected to be requeued after triggerFn. 200 wantRequeuedPods sets.Set[string] 201 }{ 202 { 203 name: "Pod without a required toleration to a node isn't requeued to activeQ", 204 initialNode: st.MakeNode().Name("fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj(), 205 pods: []*v1.Pod{ 206 // - Pod1 doesn't have the required toleration and will be rejected by the TaintToleration plugin. 207 // (TaintToleration plugin is evaluated before NodeResourcesFit plugin.) 208 // - Pod2 has the required toleration, but requests a large amount of CPU - will be rejected by the NodeResourcesFit plugin. 209 st.MakePod().Name("pod1").Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Container("image").Obj(), 210 st.MakePod().Name("pod2").Toleration(v1.TaintNodeNotReady).Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Container("image").Obj(), 211 }, 212 triggerFn: func(testCtx *testutils.TestContext) error { 213 // Trigger a NodeChange event by increasing CPU capacity. 214 // It makes Pod2 schedulable. 215 // Pod1 is not requeued because the Node is still unready and it doesn't have the required toleration. 216 if _, err := testCtx.ClientSet.CoreV1().Nodes().UpdateStatus(testCtx.Ctx, st.MakeNode().Name("fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj(), metav1.UpdateOptions{}); err != nil { 217 return fmt.Errorf("failed to update the node: %w", err) 218 } 219 return nil 220 }, 221 wantRequeuedPods: sets.New("pod2"), 222 }, 223 { 224 name: "Pod rejected by the PodAffinity plugin is requeued when a new Node is created and turned to ready", 225 initialNode: st.MakeNode().Name("fake-node").Label("node", "fake-node").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj(), 226 initialPod: st.MakePod().Label("anti", "anti").Name("pod1").PodAntiAffinityExists("anti", "node", st.PodAntiAffinityWithRequiredReq).Container("image").Node("fake-node").Obj(), 227 pods: []*v1.Pod{ 228 // - Pod2 will be rejected by the PodAffinity plugin. 229 st.MakePod().Label("anti", "anti").Name("pod2").PodAntiAffinityExists("anti", "node", st.PodAntiAffinityWithRequiredReq).Container("image").Obj(), 230 }, 231 triggerFn: func(testCtx *testutils.TestContext) error { 232 // Trigger a NodeCreated event. 233 // Note that this Node has a un-ready taint and pod2 should be requeued ideally because unschedulable plugins registered for pod2 is PodAffinity. 234 // However, due to preCheck, it's not requeueing pod2 to activeQ. 235 // It'll be fixed by the removal of preCheck in the future. 236 // https://github.com/kubernetes/kubernetes/issues/110175 237 node := st.MakeNode().Name("fake-node2").Label("node", "fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Taints([]v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoSchedule}}).Obj() 238 if _, err := testCtx.ClientSet.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}); err != nil { 239 return fmt.Errorf("failed to create a new node: %w", err) 240 } 241 242 // As a mitigation of an issue described above, all plugins subscribing Node/Add event register UpdateNodeTaint too. 243 // So, this removal of taint moves pod2 to activeQ. 244 node.Spec.Taints = nil 245 if _, err := testCtx.ClientSet.CoreV1().Nodes().Update(testCtx.Ctx, node, metav1.UpdateOptions{}); err != nil { 246 return fmt.Errorf("failed to remove taints off the node: %w", err) 247 } 248 return nil 249 }, 250 wantRequeuedPods: sets.New("pod2"), 251 }, 252 } 253 254 for _, featureEnabled := range []bool{false, true} { 255 for _, tt := range tests { 256 t.Run(fmt.Sprintf("%s [SchedulerQueueingHints enabled: %v]", tt.name, featureEnabled), func(t *testing.T) { 257 featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, featureEnabled) 258 259 // Use zero backoff seconds to bypass backoffQ. 260 // It's intended to not start the scheduler's queue, and hence to 261 // not start any flushing logic. We will pop and schedule the Pods manually later. 262 testCtx := testutils.InitTestSchedulerWithOptions( 263 t, 264 testutils.InitTestAPIServer(t, "core-res-enqueue", nil), 265 0, 266 scheduler.WithPodInitialBackoffSeconds(0), 267 scheduler.WithPodMaxBackoffSeconds(0), 268 ) 269 testutils.SyncSchedulerInformerFactory(testCtx) 270 271 defer testCtx.Scheduler.SchedulingQueue.Close() 272 273 cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx 274 // Create one Node with a taint. 275 if _, err := cs.CoreV1().Nodes().Create(ctx, tt.initialNode, metav1.CreateOptions{}); err != nil { 276 t.Fatalf("Failed to create an initial Node %q: %v", tt.initialNode.Name, err) 277 } 278 279 if tt.initialPod != nil { 280 if _, err := cs.CoreV1().Pods(ns).Create(ctx, tt.initialPod, metav1.CreateOptions{}); err != nil { 281 t.Fatalf("Failed to create an initial Pod %q: %v", tt.initialPod.Name, err) 282 } 283 } 284 285 for _, pod := range tt.pods { 286 if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil { 287 t.Fatalf("Failed to create Pod %q: %v", pod.Name, err) 288 } 289 } 290 291 // Wait for the tt.pods to be present in the scheduling queue. 292 if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) { 293 pendingPods, _ := testCtx.Scheduler.SchedulingQueue.PendingPods() 294 return len(pendingPods) == len(tt.pods), nil 295 }); err != nil { 296 t.Fatal(err) 297 } 298 299 t.Log("Confirmed Pods in the scheduling queue, starting to schedule them") 300 301 // Pop all pods out. They should be unschedulable. 302 for i := 0; i < len(tt.pods); i++ { 303 testCtx.Scheduler.ScheduleOne(testCtx.Ctx) 304 } 305 // Wait for the tt.pods to be still present in the scheduling queue. 306 if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) { 307 pendingPods, _ := testCtx.Scheduler.SchedulingQueue.PendingPods() 308 return len(pendingPods) == len(tt.pods), nil 309 }); err != nil { 310 t.Fatal(err) 311 } 312 313 t.Log("finished initial schedulings for all Pods, will trigger triggerFn") 314 315 err := tt.triggerFn(testCtx) 316 if err != nil { 317 t.Fatalf("Failed to trigger the event: %v", err) 318 } 319 320 t.Log("triggered tt.triggerFn, will check if tt.requeuedPods are requeued") 321 322 // Wait for the tt.pods to be still present in the scheduling queue. 323 var requeuedPods sets.Set[string] 324 if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) { 325 requeuedPods = sets.Set[string]{} // reset 326 for _, requeuedPod := range testCtx.Scheduler.SchedulingQueue.PodsInActiveQ() { 327 requeuedPods.Insert(requeuedPod.Name) 328 } 329 330 return requeuedPods.Equal(tt.wantRequeuedPods), nil 331 }); err != nil { 332 t.Fatalf("Expect Pods %v to be requeued, but %v are requeued actually", tt.wantRequeuedPods, requeuedPods) 333 } 334 }) 335 } 336 } 337 } 338 339 var _ framework.FilterPlugin = &fakeCRPlugin{} 340 var _ framework.EnqueueExtensions = &fakeCRPlugin{} 341 342 type fakeCRPlugin struct{} 343 344 func (f *fakeCRPlugin) Name() string { 345 return "fakeCRPlugin" 346 } 347 348 func (f *fakeCRPlugin) Filter(_ context.Context, _ *framework.CycleState, _ *v1.Pod, _ *framework.NodeInfo) *framework.Status { 349 return framework.NewStatus(framework.Unschedulable, "always fail") 350 } 351 352 // EventsToRegister returns the possible events that may make a Pod 353 // failed by this plugin schedulable. 354 func (f *fakeCRPlugin) EventsToRegister() []framework.ClusterEventWithHint { 355 return []framework.ClusterEventWithHint{ 356 {Event: framework.ClusterEvent{Resource: "foos.v1.example.com", ActionType: framework.All}}, 357 } 358 } 359 360 // TestCustomResourceEnqueue constructs a fake plugin that registers custom resources 361 // to verify Pods failed by this plugin can be moved properly upon CR events. 362 func TestCustomResourceEnqueue(t *testing.T) { 363 // Start API Server with apiextensions supported. 364 server := apiservertesting.StartTestServerOrDie( 365 t, apiservertesting.NewDefaultTestServerOptions(), 366 []string{"--disable-admission-plugins=ServiceAccount,TaintNodesByCondition", "--runtime-config=api/all=true"}, 367 testfwk.SharedEtcd(), 368 ) 369 testCtx := &testutils.TestContext{} 370 ctx, cancel := context.WithCancel(context.Background()) 371 testCtx.Ctx = ctx 372 testCtx.CloseFn = func() { 373 cancel() 374 server.TearDownFn() 375 } 376 377 apiExtensionClient := apiextensionsclient.NewForConfigOrDie(server.ClientConfig) 378 dynamicClient := dynamic.NewForConfigOrDie(server.ClientConfig) 379 380 // Create a Foo CRD. 381 fooCRD := &apiextensionsv1.CustomResourceDefinition{ 382 ObjectMeta: metav1.ObjectMeta{ 383 Name: "foos.example.com", 384 }, 385 Spec: apiextensionsv1.CustomResourceDefinitionSpec{ 386 Group: "example.com", 387 Scope: apiextensionsv1.NamespaceScoped, 388 Names: apiextensionsv1.CustomResourceDefinitionNames{ 389 Plural: "foos", 390 Kind: "Foo", 391 }, 392 Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ 393 { 394 Name: "v1", 395 Served: true, 396 Storage: true, 397 Schema: &apiextensionsv1.CustomResourceValidation{ 398 OpenAPIV3Schema: &apiextensionsv1.JSONSchemaProps{ 399 Type: "object", 400 Properties: map[string]apiextensionsv1.JSONSchemaProps{ 401 "field": {Type: "string"}, 402 }, 403 }, 404 }, 405 }, 406 }, 407 }, 408 } 409 var err error 410 fooCRD, err = apiExtensionClient.ApiextensionsV1().CustomResourceDefinitions().Create(testCtx.Ctx, fooCRD, metav1.CreateOptions{}) 411 if err != nil { 412 t.Fatal(err) 413 } 414 415 registry := frameworkruntime.Registry{ 416 "fakeCRPlugin": func(_ context.Context, _ runtime.Object, fh framework.Handle) (framework.Plugin, error) { 417 return &fakeCRPlugin{}, nil 418 }, 419 } 420 cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{ 421 Profiles: []configv1.KubeSchedulerProfile{{ 422 SchedulerName: pointer.String(v1.DefaultSchedulerName), 423 Plugins: &configv1.Plugins{ 424 Filter: configv1.PluginSet{ 425 Enabled: []configv1.Plugin{ 426 {Name: "fakeCRPlugin"}, 427 }, 428 }, 429 }, 430 }}}) 431 432 testCtx.KubeConfig = server.ClientConfig 433 testCtx.ClientSet = kubernetes.NewForConfigOrDie(server.ClientConfig) 434 testCtx.NS, err = testCtx.ClientSet.CoreV1().Namespaces().Create(testCtx.Ctx, &v1.Namespace{ 435 ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("cr-enqueue-%v", string(uuid.NewUUID()))}}, metav1.CreateOptions{}) 436 if err != nil && !errors.IsAlreadyExists(err) { 437 t.Fatalf("Failed to integration test ns: %v", err) 438 } 439 440 // Use zero backoff seconds to bypass backoffQ. 441 // It's intended to not start the scheduler's queue, and hence to 442 // not start any flushing logic. We will pop and schedule the Pods manually later. 443 testCtx = testutils.InitTestSchedulerWithOptions( 444 t, 445 testCtx, 446 0, 447 scheduler.WithProfiles(cfg.Profiles...), 448 scheduler.WithFrameworkOutOfTreeRegistry(registry), 449 scheduler.WithPodInitialBackoffSeconds(0), 450 scheduler.WithPodMaxBackoffSeconds(0), 451 ) 452 testutils.SyncSchedulerInformerFactory(testCtx) 453 454 defer testutils.CleanupTest(t, testCtx) 455 456 cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx 457 logger := klog.FromContext(ctx) 458 // Create one Node. 459 node := st.MakeNode().Name("fake-node").Obj() 460 if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil { 461 t.Fatalf("Failed to create Node %q: %v", node.Name, err) 462 } 463 464 // Create a testing Pod. 465 pause := imageutils.GetPauseImageName() 466 pod := st.MakePod().Namespace(ns).Name("fake-pod").Container(pause).Obj() 467 if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil { 468 t.Fatalf("Failed to create Pod %q: %v", pod.Name, err) 469 } 470 471 // Wait for the testing Pod to be present in the scheduling queue. 472 if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) { 473 pendingPods, _ := testCtx.Scheduler.SchedulingQueue.PendingPods() 474 return len(pendingPods) == 1, nil 475 }); err != nil { 476 t.Fatal(err) 477 } 478 479 // Pop fake-pod out. It should be unschedulable. 480 podInfo := testutils.NextPodOrDie(t, testCtx) 481 fwk, ok := testCtx.Scheduler.Profiles[podInfo.Pod.Spec.SchedulerName] 482 if !ok { 483 t.Fatalf("Cannot find the profile for Pod %v", podInfo.Pod.Name) 484 } 485 // Schedule the Pod manually. 486 _, fitError := testCtx.Scheduler.SchedulePod(ctx, fwk, framework.NewCycleState(), podInfo.Pod) 487 // The fitError is expected to be non-nil as it failed the fakeCRPlugin plugin. 488 if fitError == nil { 489 t.Fatalf("Expect Pod %v to fail at scheduling.", podInfo.Pod.Name) 490 } 491 testCtx.Scheduler.FailureHandler(ctx, fwk, podInfo, framework.NewStatus(framework.Unschedulable).WithError(fitError), nil, time.Now()) 492 493 // Scheduling cycle is incremented from 0 to 1 after NextPod() is called, so 494 // pass a number larger than 1 to move Pod to unschedulablePods. 495 testCtx.Scheduler.SchedulingQueue.AddUnschedulableIfNotPresent(logger, podInfo, 10) 496 497 // Trigger a Custom Resource event. 498 // We expect this event to trigger moving the test Pod from unschedulablePods to activeQ. 499 crdGVR := schema.GroupVersionResource{Group: fooCRD.Spec.Group, Version: fooCRD.Spec.Versions[0].Name, Resource: "foos"} 500 crClient := dynamicClient.Resource(crdGVR).Namespace(ns) 501 if _, err := crClient.Create(ctx, &unstructured.Unstructured{ 502 Object: map[string]interface{}{ 503 "apiVersion": "example.com/v1", 504 "kind": "Foo", 505 "metadata": map[string]interface{}{"name": "foo1"}, 506 }, 507 }, metav1.CreateOptions{}); err != nil { 508 t.Fatalf("Unable to create cr: %v", err) 509 } 510 511 // Now we should be able to pop the Pod from activeQ again. 512 podInfo = testutils.NextPodOrDie(t, testCtx) 513 if podInfo.Attempts != 2 { 514 t.Errorf("Expected the Pod to be attempted 2 times, but got %v", podInfo.Attempts) 515 } 516 } 517 518 // TestRequeueByBindFailure verify Pods failed by bind plugin are 519 // put back to the queue regardless of whether event happens or not. 520 func TestRequeueByBindFailure(t *testing.T) { 521 fakeBind := &firstFailBindPlugin{} 522 registry := frameworkruntime.Registry{ 523 "firstFailBindPlugin": func(ctx context.Context, o runtime.Object, fh framework.Handle) (framework.Plugin, error) { 524 binder, err := defaultbinder.New(ctx, nil, fh) 525 if err != nil { 526 return nil, err 527 } 528 529 fakeBind.defaultBinderPlugin = binder.(framework.BindPlugin) 530 return fakeBind, nil 531 }, 532 } 533 534 cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{ 535 Profiles: []configv1.KubeSchedulerProfile{{ 536 SchedulerName: pointer.String(v1.DefaultSchedulerName), 537 Plugins: &configv1.Plugins{ 538 MultiPoint: configv1.PluginSet{ 539 Enabled: []configv1.Plugin{ 540 {Name: "firstFailBindPlugin"}, 541 }, 542 Disabled: []configv1.Plugin{ 543 {Name: names.DefaultBinder}, 544 }, 545 }, 546 }, 547 }}}) 548 549 // Use zero backoff seconds to bypass backoffQ. 550 testCtx := testutils.InitTestSchedulerWithOptions( 551 t, 552 testutils.InitTestAPIServer(t, "core-res-enqueue", nil), 553 0, 554 scheduler.WithPodInitialBackoffSeconds(0), 555 scheduler.WithPodMaxBackoffSeconds(0), 556 scheduler.WithProfiles(cfg.Profiles...), 557 scheduler.WithFrameworkOutOfTreeRegistry(registry), 558 ) 559 testutils.SyncSchedulerInformerFactory(testCtx) 560 561 go testCtx.Scheduler.Run(testCtx.Ctx) 562 563 cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx 564 node := st.MakeNode().Name("fake-node").Obj() 565 if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil { 566 t.Fatalf("Failed to create Node %q: %v", node.Name, err) 567 } 568 // create a pod. 569 pod := st.MakePod().Namespace(ns).Name("pod-1").Container(imageutils.GetPauseImageName()).Obj() 570 if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil { 571 t.Fatalf("Failed to create Pod %q: %v", pod.Name, err) 572 } 573 574 // 1. first binding try should fail. 575 // 2. The pod should be enqueued to activeQ/backoffQ without any event. 576 // 3. The pod should be scheduled in the second binding try. 577 // Here, waiting until (3). 578 err := wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, testutils.PodScheduled(cs, ns, pod.Name)) 579 if err != nil { 580 t.Fatalf("Expect pod-1 to be scheduled by the bind plugin: %v", err) 581 } 582 583 // Make sure the first binding trial was failed, and this pod is scheduled at the second trial. 584 if fakeBind.counter != 1 { 585 t.Fatalf("Expect pod-1 to be scheduled by the bind plugin in the second binding try: %v", err) 586 } 587 } 588 589 // firstFailBindPlugin rejects the Pod in the first Bind call. 590 type firstFailBindPlugin struct { 591 counter int 592 defaultBinderPlugin framework.BindPlugin 593 } 594 595 func (*firstFailBindPlugin) Name() string { 596 return "firstFailBindPlugin" 597 } 598 599 func (p *firstFailBindPlugin) Bind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodename string) *framework.Status { 600 if p.counter == 0 { 601 // fail in the first Bind call. 602 p.counter++ 603 return framework.NewStatus(framework.Error, "firstFailBindPlugin rejects the Pod") 604 } 605 606 return p.defaultBinderPlugin.Bind(ctx, state, pod, nodename) 607 } 608 609 // TestRequeueByPermitRejection verify Pods failed by permit plugins in the binding cycle are 610 // put back to the queue, according to the correct scheduling cycle number. 611 func TestRequeueByPermitRejection(t *testing.T) { 612 featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, true) 613 queueingHintCalledCounter := 0 614 fakePermit := &fakePermitPlugin{} 615 registry := frameworkruntime.Registry{ 616 fakePermitPluginName: func(ctx context.Context, o runtime.Object, fh framework.Handle) (framework.Plugin, error) { 617 fakePermit = &fakePermitPlugin{ 618 frameworkHandler: fh, 619 schedulingHint: func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) { 620 queueingHintCalledCounter++ 621 return framework.Queue, nil 622 }, 623 } 624 return fakePermit, nil 625 }, 626 } 627 cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{ 628 Profiles: []configv1.KubeSchedulerProfile{{ 629 SchedulerName: pointer.String(v1.DefaultSchedulerName), 630 Plugins: &configv1.Plugins{ 631 MultiPoint: configv1.PluginSet{ 632 Enabled: []configv1.Plugin{ 633 {Name: fakePermitPluginName}, 634 }, 635 }, 636 }, 637 }}}) 638 639 // Use zero backoff seconds to bypass backoffQ. 640 testCtx := testutils.InitTestSchedulerWithOptions( 641 t, 642 testutils.InitTestAPIServer(t, "core-res-enqueue", nil), 643 0, 644 scheduler.WithPodInitialBackoffSeconds(0), 645 scheduler.WithPodMaxBackoffSeconds(0), 646 scheduler.WithProfiles(cfg.Profiles...), 647 scheduler.WithFrameworkOutOfTreeRegistry(registry), 648 ) 649 testutils.SyncSchedulerInformerFactory(testCtx) 650 651 go testCtx.Scheduler.Run(testCtx.Ctx) 652 653 cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx 654 node := st.MakeNode().Name("fake-node").Obj() 655 if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil { 656 t.Fatalf("Failed to create Node %q: %v", node.Name, err) 657 } 658 // create a pod. 659 pod := st.MakePod().Namespace(ns).Name("pod-1").Container(imageutils.GetPauseImageName()).Obj() 660 if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil { 661 t.Fatalf("Failed to create Pod %q: %v", pod.Name, err) 662 } 663 664 // update node label. (causes the NodeUpdate event) 665 node.Labels = map[string]string{"updated": ""} 666 if _, err := cs.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}); err != nil { 667 t.Fatalf("Failed to add labels to the node: %v", err) 668 } 669 670 // create a pod to increment the scheduling cycle number in the scheduling queue. 671 // We can make sure NodeUpdate event, that has happened in the previous scheduling cycle, makes Pod to be enqueued to activeQ via the scheduling queue. 672 pod = st.MakePod().Namespace(ns).Name("pod-2").Container(imageutils.GetPauseImageName()).Obj() 673 if _, err := cs.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}); err != nil { 674 t.Fatalf("Failed to create Pod %q: %v", pod.Name, err) 675 } 676 677 // reject pod-1 to simulate the failure in Permit plugins. 678 // This pod-1 should be enqueued to activeQ because the NodeUpdate event has happened. 679 fakePermit.frameworkHandler.IterateOverWaitingPods(func(wp framework.WaitingPod) { 680 if wp.GetPod().Name == "pod-1" { 681 wp.Reject(fakePermitPluginName, "fakePermitPlugin rejects the Pod") 682 return 683 } 684 }) 685 686 // Wait for pod-2 to be scheduled. 687 err := wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (done bool, err error) { 688 fakePermit.frameworkHandler.IterateOverWaitingPods(func(wp framework.WaitingPod) { 689 if wp.GetPod().Name == "pod-2" { 690 wp.Allow(fakePermitPluginName) 691 } 692 }) 693 694 return testutils.PodScheduled(cs, ns, "pod-2")(ctx) 695 }) 696 if err != nil { 697 t.Fatalf("Expect pod-2 to be scheduled") 698 } 699 700 err = wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (done bool, err error) { 701 pod1Found := false 702 fakePermit.frameworkHandler.IterateOverWaitingPods(func(wp framework.WaitingPod) { 703 if wp.GetPod().Name == "pod-1" { 704 pod1Found = true 705 wp.Allow(fakePermitPluginName) 706 } 707 }) 708 return pod1Found, nil 709 }) 710 if err != nil { 711 t.Fatal("Expect pod-1 to be scheduled again") 712 } 713 714 if queueingHintCalledCounter != 1 { 715 t.Fatalf("Expected the scheduling hint to be called 1 time, but %v", queueingHintCalledCounter) 716 } 717 } 718 719 type fakePermitPlugin struct { 720 frameworkHandler framework.Handle 721 schedulingHint framework.QueueingHintFn 722 } 723 724 const fakePermitPluginName = "fakePermitPlugin" 725 726 func (p *fakePermitPlugin) Name() string { 727 return fakePermitPluginName 728 } 729 730 func (p *fakePermitPlugin) Permit(ctx context.Context, state *framework.CycleState, _ *v1.Pod, _ string) (*framework.Status, time.Duration) { 731 return framework.NewStatus(framework.Wait), wait.ForeverTestTimeout 732 } 733 734 func (p *fakePermitPlugin) EventsToRegister() []framework.ClusterEventWithHint { 735 return []framework.ClusterEventWithHint{ 736 {Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.UpdateNodeLabel}, QueueingHintFn: p.schedulingHint}, 737 } 738 }