k8s.io/kubernetes@v1.29.3/pkg/scheduler/internal/cache/cache_test.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cache 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "strings" 24 "testing" 25 "time" 26 27 "github.com/google/go-cmp/cmp" 28 v1 "k8s.io/api/core/v1" 29 "k8s.io/apimachinery/pkg/api/resource" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/types" 32 "k8s.io/apimachinery/pkg/util/sets" 33 "k8s.io/klog/v2" 34 "k8s.io/klog/v2/ktesting" 35 "k8s.io/kubernetes/pkg/scheduler/framework" 36 st "k8s.io/kubernetes/pkg/scheduler/testing" 37 schedutil "k8s.io/kubernetes/pkg/scheduler/util" 38 ) 39 40 func deepEqualWithoutGeneration(actual *nodeInfoListItem, expected *framework.NodeInfo) error { 41 if (actual == nil) != (expected == nil) { 42 return errors.New("one of the actual or expected is nil and the other is not") 43 } 44 // Ignore generation field. 45 if actual != nil { 46 actual.info.Generation = 0 47 } 48 if expected != nil { 49 expected.Generation = 0 50 } 51 if actual != nil { 52 if diff := cmp.Diff(expected, actual.info, cmp.AllowUnexported(framework.NodeInfo{})); diff != "" { 53 return fmt.Errorf("Unexpected node info (-want,+got):\n%s", diff) 54 } 55 } 56 return nil 57 } 58 59 type hostPortInfoParam struct { 60 protocol, ip string 61 port int32 62 } 63 64 type hostPortInfoBuilder struct { 65 inputs []hostPortInfoParam 66 } 67 68 func newHostPortInfoBuilder() *hostPortInfoBuilder { 69 return &hostPortInfoBuilder{} 70 } 71 72 func (b *hostPortInfoBuilder) add(protocol, ip string, port int32) *hostPortInfoBuilder { 73 b.inputs = append(b.inputs, hostPortInfoParam{protocol, ip, port}) 74 return b 75 } 76 77 func (b *hostPortInfoBuilder) build() framework.HostPortInfo { 78 res := make(framework.HostPortInfo) 79 for _, param := range b.inputs { 80 res.Add(param.ip, param.protocol, param.port) 81 } 82 return res 83 } 84 85 func newNodeInfo(requestedResource *framework.Resource, 86 nonzeroRequest *framework.Resource, 87 pods []*v1.Pod, 88 usedPorts framework.HostPortInfo, 89 imageStates map[string]*framework.ImageStateSummary, 90 ) *framework.NodeInfo { 91 nodeInfo := framework.NewNodeInfo(pods...) 92 nodeInfo.Requested = requestedResource 93 nodeInfo.NonZeroRequested = nonzeroRequest 94 nodeInfo.UsedPorts = usedPorts 95 nodeInfo.ImageStates = imageStates 96 return nodeInfo 97 } 98 99 // TestAssumePodScheduled tests that after a pod is assumed, its information is aggregated 100 // on node level. 101 func TestAssumePodScheduled(t *testing.T) { 102 nodeName := "node" 103 testPods := []*v1.Pod{ 104 makeBasePod(t, nodeName, "test-resource-request-and-port-0", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 105 makeBasePod(t, nodeName, "test-resource-request-and-port-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 106 makeBasePod(t, nodeName, "test-resource-request-and-port-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}), 107 makeBasePod(t, nodeName, "test-nonzero-request", "", "", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 108 makeBasePod(t, nodeName, "test-extended-resource-1", "100m", "500", "example.com/foo:3", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 109 makeBasePod(t, nodeName, "test-extended-resource-2", "200m", "1Ki", "example.com/foo:5", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}), 110 makeBasePod(t, nodeName, "test-extended-key", "100m", "500", "random-invalid-extended-key:100", []v1.ContainerPort{{}}), 111 } 112 113 tests := []struct { 114 name string 115 pods []*v1.Pod 116 117 wNodeInfo *framework.NodeInfo 118 }{{ 119 name: "assumed one pod with resource request and used ports", 120 pods: []*v1.Pod{testPods[0]}, 121 wNodeInfo: newNodeInfo( 122 &framework.Resource{ 123 MilliCPU: 100, 124 Memory: 500, 125 }, 126 &framework.Resource{ 127 MilliCPU: 100, 128 Memory: 500, 129 }, 130 []*v1.Pod{testPods[0]}, 131 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), 132 make(map[string]*framework.ImageStateSummary), 133 ), 134 }, { 135 name: "node requested resource are equal to the sum of the assumed pods requested resource, node contains host ports defined by pods", 136 pods: []*v1.Pod{testPods[1], testPods[2]}, 137 wNodeInfo: newNodeInfo( 138 &framework.Resource{ 139 MilliCPU: 300, 140 Memory: 1524, 141 }, 142 &framework.Resource{ 143 MilliCPU: 300, 144 Memory: 1524, 145 }, 146 []*v1.Pod{testPods[1], testPods[2]}, 147 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).add("TCP", "127.0.0.1", 8080).build(), 148 make(map[string]*framework.ImageStateSummary), 149 ), 150 }, { // test non-zero request 151 name: "assumed pod without resource request", 152 pods: []*v1.Pod{testPods[3]}, 153 wNodeInfo: newNodeInfo( 154 &framework.Resource{ 155 MilliCPU: 0, 156 Memory: 0, 157 }, 158 &framework.Resource{ 159 MilliCPU: schedutil.DefaultMilliCPURequest, 160 Memory: schedutil.DefaultMemoryRequest, 161 }, 162 []*v1.Pod{testPods[3]}, 163 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), 164 make(map[string]*framework.ImageStateSummary), 165 ), 166 }, { 167 name: "assumed one pod with extended resource", 168 pods: []*v1.Pod{testPods[4]}, 169 wNodeInfo: newNodeInfo( 170 &framework.Resource{ 171 MilliCPU: 100, 172 Memory: 500, 173 ScalarResources: map[v1.ResourceName]int64{"example.com/foo": 3}, 174 }, 175 &framework.Resource{ 176 MilliCPU: 100, 177 Memory: 500, 178 }, 179 []*v1.Pod{testPods[4]}, 180 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), 181 make(map[string]*framework.ImageStateSummary), 182 ), 183 }, { 184 name: "assumed two pods with extended resources", 185 pods: []*v1.Pod{testPods[4], testPods[5]}, 186 wNodeInfo: newNodeInfo( 187 &framework.Resource{ 188 MilliCPU: 300, 189 Memory: 1524, 190 ScalarResources: map[v1.ResourceName]int64{"example.com/foo": 8}, 191 }, 192 &framework.Resource{ 193 MilliCPU: 300, 194 Memory: 1524, 195 }, 196 []*v1.Pod{testPods[4], testPods[5]}, 197 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).add("TCP", "127.0.0.1", 8080).build(), 198 make(map[string]*framework.ImageStateSummary), 199 ), 200 }, { 201 name: "assumed pod with random invalid extended resource key", 202 pods: []*v1.Pod{testPods[6]}, 203 wNodeInfo: newNodeInfo( 204 &framework.Resource{ 205 MilliCPU: 100, 206 Memory: 500, 207 }, 208 &framework.Resource{ 209 MilliCPU: 100, 210 Memory: 500, 211 }, 212 []*v1.Pod{testPods[6]}, 213 newHostPortInfoBuilder().build(), 214 make(map[string]*framework.ImageStateSummary), 215 ), 216 }, 217 } 218 219 for _, tc := range tests { 220 t.Run(tc.name, func(t *testing.T) { 221 logger, ctx := ktesting.NewTestContext(t) 222 ctx, cancel := context.WithCancel(ctx) 223 defer cancel() 224 cache := newCache(ctx, time.Second, time.Second) 225 for _, pod := range tc.pods { 226 if err := cache.AssumePod(logger, pod); err != nil { 227 t.Fatalf("AssumePod failed: %v", err) 228 } 229 // pod already in cache so can't be assumed 230 if err := cache.AssumePod(logger, pod); err == nil { 231 t.Error("expected error, no error found") 232 } 233 } 234 n := cache.nodes[nodeName] 235 if err := deepEqualWithoutGeneration(n, tc.wNodeInfo); err != nil { 236 t.Error(err) 237 } 238 239 for _, pod := range tc.pods { 240 if err := cache.ForgetPod(logger, pod); err != nil { 241 t.Fatalf("ForgetPod failed: %v", err) 242 } 243 if err := isForgottenFromCache(pod, cache); err != nil { 244 t.Errorf("pod %s: %v", pod.Name, err) 245 } 246 } 247 }) 248 } 249 } 250 251 type testExpirePodStruct struct { 252 pod *v1.Pod 253 finishBind bool 254 assumedTime time.Time 255 } 256 257 func assumeAndFinishBinding(logger klog.Logger, cache *cacheImpl, pod *v1.Pod, assumedTime time.Time) error { 258 if err := cache.AssumePod(logger, pod); err != nil { 259 return err 260 } 261 return cache.finishBinding(logger, pod, assumedTime) 262 } 263 264 // TestExpirePod tests that assumed pods will be removed if expired. 265 // The removal will be reflected in node info. 266 func TestExpirePod(t *testing.T) { 267 nodeName := "node" 268 testPods := []*v1.Pod{ 269 makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 270 makeBasePod(t, nodeName, "test-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}), 271 makeBasePod(t, nodeName, "test-3", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}), 272 } 273 now := time.Now() 274 defaultTTL := 10 * time.Second 275 tests := []struct { 276 name string 277 pods []*testExpirePodStruct 278 cleanupTime time.Time 279 ttl time.Duration 280 wNodeInfo *framework.NodeInfo 281 }{ 282 { 283 name: "assumed pod would expire", 284 pods: []*testExpirePodStruct{ 285 {pod: testPods[0], finishBind: true, assumedTime: now}, 286 }, 287 cleanupTime: now.Add(2 * defaultTTL), 288 wNodeInfo: nil, 289 ttl: defaultTTL, 290 }, 291 { 292 name: "first one would expire, second and third would not", 293 pods: []*testExpirePodStruct{ 294 {pod: testPods[0], finishBind: true, assumedTime: now}, 295 {pod: testPods[1], finishBind: true, assumedTime: now.Add(3 * defaultTTL / 2)}, 296 {pod: testPods[2]}, 297 }, 298 cleanupTime: now.Add(2 * defaultTTL), 299 wNodeInfo: newNodeInfo( 300 &framework.Resource{ 301 MilliCPU: 400, 302 Memory: 2048, 303 }, 304 &framework.Resource{ 305 MilliCPU: 400, 306 Memory: 2048, 307 }, 308 // Order gets altered when removing pods. 309 []*v1.Pod{testPods[2], testPods[1]}, 310 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 8080).build(), 311 make(map[string]*framework.ImageStateSummary), 312 ), 313 ttl: defaultTTL, 314 }, 315 { 316 name: "assumed pod would never expire", 317 pods: []*testExpirePodStruct{ 318 {pod: testPods[0], finishBind: true, assumedTime: now}, 319 }, 320 cleanupTime: now.Add(3 * defaultTTL), 321 wNodeInfo: newNodeInfo( 322 &framework.Resource{ 323 MilliCPU: 100, 324 Memory: 500, 325 }, 326 &framework.Resource{ 327 MilliCPU: 100, 328 Memory: 500, 329 }, 330 []*v1.Pod{testPods[0]}, 331 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), 332 make(map[string]*framework.ImageStateSummary), 333 ), 334 ttl: time.Duration(0), 335 }, 336 } 337 338 for _, tc := range tests { 339 t.Run(tc.name, func(t *testing.T) { 340 logger, ctx := ktesting.NewTestContext(t) 341 ctx, cancel := context.WithCancel(ctx) 342 defer cancel() 343 cache := newCache(ctx, tc.ttl, time.Second) 344 345 for _, pod := range tc.pods { 346 if err := cache.AssumePod(logger, pod.pod); err != nil { 347 t.Fatal(err) 348 } 349 if !pod.finishBind { 350 continue 351 } 352 if err := cache.finishBinding(logger, pod.pod, pod.assumedTime); err != nil { 353 t.Fatal(err) 354 } 355 } 356 // pods that got bound and have assumedTime + ttl < cleanupTime will get 357 // expired and removed 358 cache.cleanupAssumedPods(logger, tc.cleanupTime) 359 n := cache.nodes[nodeName] 360 if err := deepEqualWithoutGeneration(n, tc.wNodeInfo); err != nil { 361 t.Error(err) 362 } 363 }) 364 } 365 } 366 367 // TestAddPodWillConfirm tests that a pod being Add()ed will be confirmed if assumed. 368 // The pod info should still exist after manually expiring unconfirmed pods. 369 func TestAddPodWillConfirm(t *testing.T) { 370 nodeName := "node" 371 now := time.Now() 372 ttl := 10 * time.Second 373 374 testPods := []*v1.Pod{ 375 makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 376 makeBasePod(t, nodeName, "test-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}), 377 } 378 test := struct { 379 podsToAssume []*v1.Pod 380 podsToAdd []*v1.Pod 381 382 wNodeInfo *framework.NodeInfo 383 }{ // two pod were assumed at same time. But first one is called Add() and gets confirmed. 384 podsToAssume: []*v1.Pod{testPods[0], testPods[1]}, 385 podsToAdd: []*v1.Pod{testPods[0]}, 386 wNodeInfo: newNodeInfo( 387 &framework.Resource{ 388 MilliCPU: 100, 389 Memory: 500, 390 }, 391 &framework.Resource{ 392 MilliCPU: 100, 393 Memory: 500, 394 }, 395 []*v1.Pod{testPods[0]}, 396 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), 397 make(map[string]*framework.ImageStateSummary), 398 ), 399 } 400 401 logger, ctx := ktesting.NewTestContext(t) 402 ctx, cancel := context.WithCancel(ctx) 403 defer cancel() 404 cache := newCache(ctx, ttl, time.Second) 405 for _, podToAssume := range test.podsToAssume { 406 if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil { 407 t.Fatalf("assumePod failed: %v", err) 408 } 409 } 410 for _, podToAdd := range test.podsToAdd { 411 if err := cache.AddPod(logger, podToAdd); err != nil { 412 t.Fatalf("AddPod failed: %v", err) 413 } 414 // pod already in added state 415 if err := cache.AddPod(logger, podToAdd); err == nil { 416 t.Error("expected error, no error found") 417 } 418 } 419 cache.cleanupAssumedPods(logger, now.Add(2*ttl)) 420 // check after expiration. confirmed pods shouldn't be expired. 421 n := cache.nodes[nodeName] 422 if err := deepEqualWithoutGeneration(n, test.wNodeInfo); err != nil { 423 t.Error(err) 424 } 425 } 426 427 func TestDump(t *testing.T) { 428 nodeName := "node" 429 now := time.Now() 430 ttl := 10 * time.Second 431 432 testPods := []*v1.Pod{ 433 makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 434 makeBasePod(t, nodeName, "test-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 435 } 436 test := struct { 437 podsToAssume []*v1.Pod 438 podsToAdd []*v1.Pod 439 }{ // two pod were assumed at same time. But first one is called Add() and gets confirmed. 440 podsToAssume: []*v1.Pod{testPods[0], testPods[1]}, 441 podsToAdd: []*v1.Pod{testPods[0]}, 442 } 443 444 logger, ctx := ktesting.NewTestContext(t) 445 ctx, cancel := context.WithCancel(ctx) 446 defer cancel() 447 cache := newCache(ctx, ttl, time.Second) 448 for _, podToAssume := range test.podsToAssume { 449 if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil { 450 t.Errorf("assumePod failed: %v", err) 451 } 452 } 453 for _, podToAdd := range test.podsToAdd { 454 if err := cache.AddPod(logger, podToAdd); err != nil { 455 t.Errorf("AddPod failed: %v", err) 456 } 457 } 458 459 snapshot := cache.Dump() 460 if len(snapshot.Nodes) != len(cache.nodes) { 461 t.Errorf("Unequal number of nodes in the cache and its snapshot. expected: %v, got: %v", len(cache.nodes), len(snapshot.Nodes)) 462 } 463 for name, ni := range snapshot.Nodes { 464 nItem := cache.nodes[name] 465 if diff := cmp.Diff(nItem.info, ni, cmp.AllowUnexported(framework.NodeInfo{})); diff != "" { 466 t.Errorf("Unexpected node info (-want,+got):\n%s", diff) 467 } 468 } 469 if diff := cmp.Diff(cache.assumedPods, snapshot.AssumedPods); diff != "" { 470 t.Errorf("Unexpected assumedPods (-want,+got):\n%s", diff) 471 } 472 473 } 474 475 // TestAddPodAlwaysUpdatePodInfoInNodeInfo tests that AddPod method always updates PodInfo in NodeInfo, 476 // even when the Pod is assumed one. 477 func TestAddPodAlwaysUpdatesPodInfoInNodeInfo(t *testing.T) { 478 ttl := 10 * time.Second 479 logger, ctx := ktesting.NewTestContext(t) 480 ctx, cancel := context.WithCancel(ctx) 481 defer cancel() 482 now := time.Now() 483 p1 := makeBasePod(t, "node1", "test-1", "100m", "500", "", []v1.ContainerPort{{HostPort: 80}}) 484 485 p2 := p1.DeepCopy() 486 p2.Status.Conditions = append(p1.Status.Conditions, v1.PodCondition{ 487 Type: v1.PodScheduled, 488 Status: v1.ConditionTrue, 489 }) 490 491 test := struct { 492 podsToAssume []*v1.Pod 493 podsToAddAfterAssume []*v1.Pod 494 nodeInfo map[string]*framework.NodeInfo 495 }{ 496 podsToAssume: []*v1.Pod{p1}, 497 podsToAddAfterAssume: []*v1.Pod{p2}, 498 nodeInfo: map[string]*framework.NodeInfo{ 499 "node1": newNodeInfo( 500 &framework.Resource{ 501 MilliCPU: 100, 502 Memory: 500, 503 }, 504 &framework.Resource{ 505 MilliCPU: 100, 506 Memory: 500, 507 }, 508 []*v1.Pod{p2}, 509 newHostPortInfoBuilder().add("TCP", "0.0.0.0", 80).build(), 510 make(map[string]*framework.ImageStateSummary), 511 ), 512 }, 513 } 514 515 cache := newCache(ctx, ttl, time.Second) 516 for _, podToAssume := range test.podsToAssume { 517 if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil { 518 t.Fatalf("assumePod failed: %v", err) 519 } 520 } 521 for _, podToAdd := range test.podsToAddAfterAssume { 522 if err := cache.AddPod(logger, podToAdd); err != nil { 523 t.Fatalf("AddPod failed: %v", err) 524 } 525 } 526 for nodeName, expected := range test.nodeInfo { 527 n := cache.nodes[nodeName] 528 if err := deepEqualWithoutGeneration(n, expected); err != nil { 529 t.Errorf("node %q: %v", nodeName, err) 530 } 531 } 532 } 533 534 // TestAddPodWillReplaceAssumed tests that a pod being Add()ed will replace any assumed pod. 535 func TestAddPodWillReplaceAssumed(t *testing.T) { 536 now := time.Now() 537 ttl := 10 * time.Second 538 539 assumedPod := makeBasePod(t, "assumed-node-1", "test-1", "100m", "500", "", []v1.ContainerPort{{HostPort: 80}}) 540 addedPod := makeBasePod(t, "actual-node", "test-1", "100m", "500", "", []v1.ContainerPort{{HostPort: 80}}) 541 updatedPod := makeBasePod(t, "actual-node", "test-1", "200m", "500", "", []v1.ContainerPort{{HostPort: 90}}) 542 543 test := struct { 544 podsToAssume []*v1.Pod 545 podsToAdd []*v1.Pod 546 podsToUpdate [][]*v1.Pod 547 548 wNodeInfo map[string]*framework.NodeInfo 549 }{ 550 podsToAssume: []*v1.Pod{assumedPod.DeepCopy()}, 551 podsToAdd: []*v1.Pod{addedPod.DeepCopy()}, 552 podsToUpdate: [][]*v1.Pod{{addedPod.DeepCopy(), updatedPod.DeepCopy()}}, 553 wNodeInfo: map[string]*framework.NodeInfo{ 554 "assumed-node": nil, 555 "actual-node": newNodeInfo( 556 &framework.Resource{ 557 MilliCPU: 200, 558 Memory: 500, 559 }, 560 &framework.Resource{ 561 MilliCPU: 200, 562 Memory: 500, 563 }, 564 []*v1.Pod{updatedPod.DeepCopy()}, 565 newHostPortInfoBuilder().add("TCP", "0.0.0.0", 90).build(), 566 make(map[string]*framework.ImageStateSummary), 567 ), 568 }, 569 } 570 571 logger, ctx := ktesting.NewTestContext(t) 572 ctx, cancel := context.WithCancel(ctx) 573 defer cancel() 574 cache := newCache(ctx, ttl, time.Second) 575 for _, podToAssume := range test.podsToAssume { 576 if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil { 577 t.Fatalf("assumePod failed: %v", err) 578 } 579 } 580 for _, podToAdd := range test.podsToAdd { 581 if err := cache.AddPod(logger, podToAdd); err != nil { 582 t.Fatalf("AddPod failed: %v", err) 583 } 584 } 585 for _, podToUpdate := range test.podsToUpdate { 586 if err := cache.UpdatePod(logger, podToUpdate[0], podToUpdate[1]); err != nil { 587 t.Fatalf("UpdatePod failed: %v", err) 588 } 589 } 590 for nodeName, expected := range test.wNodeInfo { 591 n := cache.nodes[nodeName] 592 if err := deepEqualWithoutGeneration(n, expected); err != nil { 593 t.Errorf("node %q: %v", nodeName, err) 594 } 595 } 596 } 597 598 // TestAddPodAfterExpiration tests that a pod being Add()ed will be added back if expired. 599 func TestAddPodAfterExpiration(t *testing.T) { 600 nodeName := "node" 601 ttl := 10 * time.Second 602 basePod := makeBasePod(t, nodeName, "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}) 603 test := struct { 604 pod *v1.Pod 605 wNodeInfo *framework.NodeInfo 606 }{ 607 pod: basePod, 608 wNodeInfo: newNodeInfo( 609 &framework.Resource{ 610 MilliCPU: 100, 611 Memory: 500, 612 }, 613 &framework.Resource{ 614 MilliCPU: 100, 615 Memory: 500, 616 }, 617 []*v1.Pod{basePod}, 618 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), 619 make(map[string]*framework.ImageStateSummary), 620 ), 621 } 622 623 logger, ctx := ktesting.NewTestContext(t) 624 ctx, cancel := context.WithCancel(ctx) 625 defer cancel() 626 now := time.Now() 627 cache := newCache(ctx, ttl, time.Second) 628 if err := assumeAndFinishBinding(logger, cache, test.pod, now); err != nil { 629 t.Fatalf("assumePod failed: %v", err) 630 } 631 cache.cleanupAssumedPods(logger, now.Add(2*ttl)) 632 // It should be expired and removed. 633 if err := isForgottenFromCache(test.pod, cache); err != nil { 634 t.Error(err) 635 } 636 if err := cache.AddPod(logger, test.pod); err != nil { 637 t.Fatalf("AddPod failed: %v", err) 638 } 639 // check after expiration. confirmed pods shouldn't be expired. 640 n := cache.nodes[nodeName] 641 if err := deepEqualWithoutGeneration(n, test.wNodeInfo); err != nil { 642 t.Error(err) 643 } 644 } 645 646 // TestUpdatePod tests that a pod will be updated if added before. 647 func TestUpdatePod(t *testing.T) { 648 nodeName := "node" 649 ttl := 10 * time.Second 650 testPods := []*v1.Pod{ 651 makeBasePod(t, nodeName, "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 652 makeBasePod(t, nodeName, "test", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}), 653 } 654 test := struct { 655 podsToAdd []*v1.Pod 656 podsToUpdate []*v1.Pod 657 658 wNodeInfo []*framework.NodeInfo 659 }{ // add a pod and then update it twice 660 podsToAdd: []*v1.Pod{testPods[0]}, 661 podsToUpdate: []*v1.Pod{testPods[0], testPods[1], testPods[0]}, 662 wNodeInfo: []*framework.NodeInfo{newNodeInfo( 663 &framework.Resource{ 664 MilliCPU: 200, 665 Memory: 1024, 666 }, 667 &framework.Resource{ 668 MilliCPU: 200, 669 Memory: 1024, 670 }, 671 []*v1.Pod{testPods[1]}, 672 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 8080).build(), 673 make(map[string]*framework.ImageStateSummary), 674 ), newNodeInfo( 675 &framework.Resource{ 676 MilliCPU: 100, 677 Memory: 500, 678 }, 679 &framework.Resource{ 680 MilliCPU: 100, 681 Memory: 500, 682 }, 683 []*v1.Pod{testPods[0]}, 684 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), 685 make(map[string]*framework.ImageStateSummary), 686 )}, 687 } 688 689 logger, ctx := ktesting.NewTestContext(t) 690 ctx, cancel := context.WithCancel(ctx) 691 defer cancel() 692 cache := newCache(ctx, ttl, time.Second) 693 for _, podToAdd := range test.podsToAdd { 694 if err := cache.AddPod(logger, podToAdd); err != nil { 695 t.Fatalf("AddPod failed: %v", err) 696 } 697 } 698 699 for j := range test.podsToUpdate { 700 if j == 0 { 701 continue 702 } 703 if err := cache.UpdatePod(logger, test.podsToUpdate[j-1], test.podsToUpdate[j]); err != nil { 704 t.Fatalf("UpdatePod failed: %v", err) 705 } 706 // check after expiration. confirmed pods shouldn't be expired. 707 n := cache.nodes[nodeName] 708 if err := deepEqualWithoutGeneration(n, test.wNodeInfo[j-1]); err != nil { 709 t.Errorf("update %d: %v", j, err) 710 } 711 } 712 } 713 714 // TestUpdatePodAndGet tests get always return latest pod state 715 func TestUpdatePodAndGet(t *testing.T) { 716 nodeName := "node" 717 ttl := 10 * time.Second 718 testPods := []*v1.Pod{ 719 makeBasePod(t, nodeName, "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 720 makeBasePod(t, nodeName, "test", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}), 721 } 722 tests := []struct { 723 name string 724 pod *v1.Pod 725 podToUpdate *v1.Pod 726 handler func(logger klog.Logger, cache Cache, pod *v1.Pod) error 727 assumePod bool 728 }{ 729 { 730 name: "do not update pod when pod information has not changed", 731 pod: testPods[0], 732 podToUpdate: testPods[0], 733 handler: func(logger klog.Logger, cache Cache, pod *v1.Pod) error { 734 return cache.AssumePod(logger, pod) 735 }, 736 assumePod: true, 737 }, 738 { 739 name: "update pod when pod information changed", 740 pod: testPods[0], 741 podToUpdate: testPods[1], 742 handler: func(logger klog.Logger, cache Cache, pod *v1.Pod) error { 743 return cache.AddPod(logger, pod) 744 }, 745 assumePod: false, 746 }, 747 } 748 749 for _, tc := range tests { 750 t.Run(tc.name, func(t *testing.T) { 751 logger, ctx := ktesting.NewTestContext(t) 752 ctx, cancel := context.WithCancel(ctx) 753 defer cancel() 754 cache := newCache(ctx, ttl, time.Second) 755 // trying to get an unknown pod should return an error 756 // podToUpdate has not been added yet 757 if _, err := cache.GetPod(tc.podToUpdate); err == nil { 758 t.Error("expected error, no error found") 759 } 760 761 // trying to update an unknown pod should return an error 762 // pod has not been added yet 763 if err := cache.UpdatePod(logger, tc.pod, tc.podToUpdate); err == nil { 764 t.Error("expected error, no error found") 765 } 766 767 if err := tc.handler(logger, cache, tc.pod); err != nil { 768 t.Fatalf("unexpected err: %v", err) 769 } 770 771 if !tc.assumePod { 772 if err := cache.UpdatePod(logger, tc.pod, tc.podToUpdate); err != nil { 773 t.Fatalf("UpdatePod failed: %v", err) 774 } 775 } 776 777 cachedPod, err := cache.GetPod(tc.pod) 778 if err != nil { 779 t.Fatalf("GetPod failed: %v", err) 780 } 781 if diff := cmp.Diff(tc.podToUpdate, cachedPod); diff != "" { 782 t.Fatalf("Unexpected pod (-want, +got):\n%s", diff) 783 } 784 }) 785 } 786 } 787 788 // TestExpireAddUpdatePod test the sequence that a pod is expired, added, then updated 789 func TestExpireAddUpdatePod(t *testing.T) { 790 nodeName := "node" 791 ttl := 10 * time.Second 792 testPods := []*v1.Pod{ 793 makeBasePod(t, nodeName, "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), 794 makeBasePod(t, nodeName, "test", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}), 795 } 796 test := struct { 797 podsToAssume []*v1.Pod 798 podsToAdd []*v1.Pod 799 podsToUpdate []*v1.Pod 800 801 wNodeInfo []*framework.NodeInfo 802 }{ // Pod is assumed, expired, and added. Then it would be updated twice. 803 podsToAssume: []*v1.Pod{testPods[0]}, 804 podsToAdd: []*v1.Pod{testPods[0]}, 805 podsToUpdate: []*v1.Pod{testPods[0], testPods[1], testPods[0]}, 806 wNodeInfo: []*framework.NodeInfo{newNodeInfo( 807 &framework.Resource{ 808 MilliCPU: 200, 809 Memory: 1024, 810 }, 811 &framework.Resource{ 812 MilliCPU: 200, 813 Memory: 1024, 814 }, 815 []*v1.Pod{testPods[1]}, 816 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 8080).build(), 817 make(map[string]*framework.ImageStateSummary), 818 ), newNodeInfo( 819 &framework.Resource{ 820 MilliCPU: 100, 821 Memory: 500, 822 }, 823 &framework.Resource{ 824 MilliCPU: 100, 825 Memory: 500, 826 }, 827 []*v1.Pod{testPods[0]}, 828 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), 829 make(map[string]*framework.ImageStateSummary), 830 )}, 831 } 832 833 logger, ctx := ktesting.NewTestContext(t) 834 ctx, cancel := context.WithCancel(ctx) 835 defer cancel() 836 now := time.Now() 837 cache := newCache(ctx, ttl, time.Second) 838 for _, podToAssume := range test.podsToAssume { 839 if err := assumeAndFinishBinding(logger, cache, podToAssume, now); err != nil { 840 t.Fatalf("assumePod failed: %v", err) 841 } 842 } 843 cache.cleanupAssumedPods(logger, now.Add(2*ttl)) 844 845 for _, podToAdd := range test.podsToAdd { 846 if err := cache.AddPod(logger, podToAdd); err != nil { 847 t.Fatalf("AddPod failed: %v", err) 848 } 849 } 850 851 for j := range test.podsToUpdate { 852 if j == 0 { 853 continue 854 } 855 if err := cache.UpdatePod(logger, test.podsToUpdate[j-1], test.podsToUpdate[j]); err != nil { 856 t.Fatalf("UpdatePod failed: %v", err) 857 } 858 // check after expiration. confirmed pods shouldn't be expired. 859 n := cache.nodes[nodeName] 860 if err := deepEqualWithoutGeneration(n, test.wNodeInfo[j-1]); err != nil { 861 t.Errorf("update %d: %v", j, err) 862 } 863 } 864 } 865 866 func makePodWithEphemeralStorage(nodeName, ephemeralStorage string) *v1.Pod { 867 return st.MakePod().Name("pod-with-ephemeral-storage").Namespace("default-namespace").UID("pod-with-ephemeral-storage").Req( 868 map[v1.ResourceName]string{ 869 v1.ResourceEphemeralStorage: ephemeralStorage, 870 }, 871 ).Node(nodeName).Obj() 872 } 873 874 func TestEphemeralStorageResource(t *testing.T) { 875 nodeName := "node" 876 podE := makePodWithEphemeralStorage(nodeName, "500") 877 test := struct { 878 pod *v1.Pod 879 wNodeInfo *framework.NodeInfo 880 }{ 881 pod: podE, 882 wNodeInfo: newNodeInfo( 883 &framework.Resource{ 884 EphemeralStorage: 500, 885 }, 886 &framework.Resource{ 887 MilliCPU: schedutil.DefaultMilliCPURequest, 888 Memory: schedutil.DefaultMemoryRequest, 889 }, 890 []*v1.Pod{podE}, 891 framework.HostPortInfo{}, 892 make(map[string]*framework.ImageStateSummary), 893 ), 894 } 895 logger, ctx := ktesting.NewTestContext(t) 896 ctx, cancel := context.WithCancel(ctx) 897 defer cancel() 898 cache := newCache(ctx, time.Second, time.Second) 899 if err := cache.AddPod(logger, test.pod); err != nil { 900 t.Fatalf("AddPod failed: %v", err) 901 } 902 n := cache.nodes[nodeName] 903 if err := deepEqualWithoutGeneration(n, test.wNodeInfo); err != nil { 904 t.Error(err) 905 } 906 907 if err := cache.RemovePod(logger, test.pod); err != nil { 908 t.Fatalf("RemovePod failed: %v", err) 909 } 910 if _, err := cache.GetPod(test.pod); err == nil { 911 t.Errorf("pod was not deleted") 912 } 913 } 914 915 // TestRemovePod tests after added pod is removed, its information should also be subtracted. 916 func TestRemovePod(t *testing.T) { 917 pod := makeBasePod(t, "node-1", "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}) 918 nodes := []*v1.Node{ 919 { 920 ObjectMeta: metav1.ObjectMeta{Name: "node-1"}, 921 }, 922 { 923 ObjectMeta: metav1.ObjectMeta{Name: "node-2"}, 924 }, 925 } 926 wNodeInfo := newNodeInfo( 927 &framework.Resource{ 928 MilliCPU: 100, 929 Memory: 500, 930 }, 931 &framework.Resource{ 932 MilliCPU: 100, 933 Memory: 500, 934 }, 935 []*v1.Pod{pod}, 936 newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(), 937 make(map[string]*framework.ImageStateSummary), 938 ) 939 tests := map[string]struct { 940 assume bool 941 }{ 942 "bound": {}, 943 "assumed": {assume: true}, 944 } 945 946 for name, tt := range tests { 947 t.Run(name, func(t *testing.T) { 948 logger, ctx := ktesting.NewTestContext(t) 949 ctx, cancel := context.WithCancel(ctx) 950 defer cancel() 951 nodeName := pod.Spec.NodeName 952 cache := newCache(ctx, time.Second, time.Second) 953 // Add/Assume pod succeeds even before adding the nodes. 954 if tt.assume { 955 if err := cache.AddPod(logger, pod); err != nil { 956 t.Fatalf("AddPod failed: %v", err) 957 } 958 } else { 959 if err := cache.AssumePod(logger, pod); err != nil { 960 t.Fatalf("AssumePod failed: %v", err) 961 } 962 } 963 n := cache.nodes[nodeName] 964 if err := deepEqualWithoutGeneration(n, wNodeInfo); err != nil { 965 t.Error(err) 966 } 967 for _, n := range nodes { 968 cache.AddNode(logger, n) 969 } 970 971 if err := cache.RemovePod(logger, pod); err != nil { 972 t.Fatalf("RemovePod failed: %v", err) 973 } 974 975 if _, err := cache.GetPod(pod); err == nil { 976 t.Errorf("pod was not deleted") 977 } 978 979 // trying to remove a pod already removed should return an error 980 if err := cache.RemovePod(logger, pod); err == nil { 981 t.Error("expected error, no error found") 982 } 983 984 // Node that owned the Pod should be at the head of the list. 985 if cache.headNode.info.Node().Name != nodeName { 986 t.Errorf("node %q is not at the head of the list", nodeName) 987 } 988 }) 989 } 990 } 991 992 func TestForgetPod(t *testing.T) { 993 nodeName := "node" 994 basePod := makeBasePod(t, nodeName, "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}) 995 pods := []*v1.Pod{basePod} 996 now := time.Now() 997 ttl := 10 * time.Second 998 logger, ctx := ktesting.NewTestContext(t) 999 ctx, cancel := context.WithCancel(ctx) 1000 defer cancel() 1001 1002 cache := newCache(ctx, ttl, time.Second) 1003 for _, pod := range pods { 1004 if err := assumeAndFinishBinding(logger, cache, pod, now); err != nil { 1005 t.Fatalf("assumePod failed: %v", err) 1006 } 1007 isAssumed, err := cache.IsAssumedPod(pod) 1008 if err != nil { 1009 t.Fatalf("IsAssumedPod failed: %v.", err) 1010 } 1011 if !isAssumed { 1012 t.Fatalf("Pod is expected to be assumed.") 1013 } 1014 assumedPod, err := cache.GetPod(pod) 1015 if err != nil { 1016 t.Fatalf("GetPod failed: %v.", err) 1017 } 1018 if assumedPod.Namespace != pod.Namespace { 1019 t.Errorf("assumedPod.Namespace != pod.Namespace (%s != %s)", assumedPod.Namespace, pod.Namespace) 1020 } 1021 if assumedPod.Name != pod.Name { 1022 t.Errorf("assumedPod.Name != pod.Name (%s != %s)", assumedPod.Name, pod.Name) 1023 } 1024 } 1025 for _, pod := range pods { 1026 if err := cache.ForgetPod(logger, pod); err != nil { 1027 t.Fatalf("ForgetPod failed: %v", err) 1028 } 1029 if err := isForgottenFromCache(pod, cache); err != nil { 1030 t.Errorf("pod %q: %v", pod.Name, err) 1031 } 1032 // trying to forget a pod already forgotten should return an error 1033 if err := cache.ForgetPod(logger, pod); err == nil { 1034 t.Error("expected error, no error found") 1035 } 1036 } 1037 } 1038 1039 // buildNodeInfo creates a NodeInfo by simulating node operations in cache. 1040 func buildNodeInfo(node *v1.Node, pods []*v1.Pod, imageStates map[string]*framework.ImageStateSummary) *framework.NodeInfo { 1041 expected := framework.NewNodeInfo() 1042 expected.SetNode(node) 1043 expected.Allocatable = framework.NewResource(node.Status.Allocatable) 1044 expected.Generation++ 1045 for _, pod := range pods { 1046 expected.AddPod(pod) 1047 } 1048 for _, image := range node.Status.Images { 1049 for _, name := range image.Names { 1050 if state, ok := imageStates[name]; ok { 1051 expected.ImageStates[name] = state 1052 } 1053 } 1054 } 1055 return expected 1056 } 1057 1058 // buildImageStates creates ImageStateSummary of image from nodes that will be added in cache. 1059 func buildImageStates(nodes []*v1.Node) map[string]*framework.ImageStateSummary { 1060 imageStates := make(map[string]*framework.ImageStateSummary) 1061 for _, item := range nodes { 1062 for _, image := range item.Status.Images { 1063 for _, name := range image.Names { 1064 if state, ok := imageStates[name]; !ok { 1065 state = &framework.ImageStateSummary{ 1066 Size: image.SizeBytes, 1067 Nodes: sets.New[string](item.Name), 1068 } 1069 imageStates[name] = state 1070 } else { 1071 state.Nodes.Insert(item.Name) 1072 } 1073 } 1074 } 1075 } 1076 return imageStates 1077 } 1078 1079 // TestNodeOperators tests node operations of cache, including add, update 1080 // and remove. 1081 func TestNodeOperators(t *testing.T) { 1082 // Test data 1083 cpuHalf := resource.MustParse("500m") 1084 mem50m := resource.MustParse("50m") 1085 resourceList1 := map[v1.ResourceName]string{ 1086 v1.ResourceCPU: "1000m", 1087 v1.ResourceMemory: "100m", 1088 v1.ResourceName("example.com/foo"): "1", 1089 } 1090 resourceList2 := map[v1.ResourceName]string{ 1091 v1.ResourceCPU: "500m", 1092 v1.ResourceMemory: "50m", 1093 v1.ResourceName("example.com/foo"): "2", 1094 } 1095 taints := []v1.Taint{ 1096 { 1097 Key: "test-key", 1098 Value: "test-value", 1099 Effect: v1.TaintEffectPreferNoSchedule, 1100 }, 1101 } 1102 imageStatus1 := map[string]int64{ 1103 "gcr.io/80:latest": 80 * mb, 1104 "gcr.io/80:v1": 80 * mb, 1105 "gcr.io/300:latest": 300 * mb, 1106 "gcr.io/300:v1": 300 * mb, 1107 } 1108 imageStatus2 := map[string]int64{ 1109 "gcr.io/600:latest": 600 * mb, 1110 "gcr.io/80:latest": 80 * mb, 1111 "gcr.io/900:latest": 900 * mb, 1112 } 1113 tests := []struct { 1114 name string 1115 nodes []*v1.Node 1116 pods []*v1.Pod 1117 }{ 1118 { 1119 name: "operate the node with one pod", 1120 nodes: []*v1.Node{ 1121 &st.MakeNode().Name("test-node-1").Capacity(resourceList1).Taints(taints).Images(imageStatus1).Node, 1122 &st.MakeNode().Name("test-node-2").Capacity(resourceList2).Taints(taints).Images(imageStatus2).Node, 1123 &st.MakeNode().Name("test-node-3").Capacity(resourceList1).Taints(taints).Images(imageStatus1).Node, 1124 &st.MakeNode().Name("test-node-4").Capacity(resourceList2).Taints(taints).Images(imageStatus2).Node, 1125 }, 1126 pods: []*v1.Pod{ 1127 { 1128 ObjectMeta: metav1.ObjectMeta{ 1129 Name: "pod1", 1130 UID: types.UID("pod1"), 1131 }, 1132 Spec: v1.PodSpec{ 1133 NodeName: "test-node-1", 1134 Containers: []v1.Container{ 1135 { 1136 Resources: v1.ResourceRequirements{ 1137 Requests: v1.ResourceList{ 1138 v1.ResourceCPU: cpuHalf, 1139 v1.ResourceMemory: mem50m, 1140 }, 1141 }, 1142 Ports: []v1.ContainerPort{ 1143 { 1144 Name: "http", 1145 HostPort: 80, 1146 ContainerPort: 80, 1147 }, 1148 }, 1149 }, 1150 }, 1151 }, 1152 }, 1153 }, 1154 }, 1155 { 1156 name: "operate the node with two pods", 1157 nodes: []*v1.Node{ 1158 &st.MakeNode().Name("test-node-1").Capacity(resourceList1).Taints(taints).Images(imageStatus1).Node, 1159 &st.MakeNode().Name("test-node-2").Capacity(resourceList2).Taints(taints).Images(imageStatus2).Node, 1160 &st.MakeNode().Name("test-node-3").Capacity(resourceList1).Taints(taints).Images(imageStatus1).Node, 1161 }, 1162 pods: []*v1.Pod{ 1163 { 1164 ObjectMeta: metav1.ObjectMeta{ 1165 Name: "pod1", 1166 UID: types.UID("pod1"), 1167 }, 1168 Spec: v1.PodSpec{ 1169 NodeName: "test-node-1", 1170 Containers: []v1.Container{ 1171 { 1172 Resources: v1.ResourceRequirements{ 1173 Requests: v1.ResourceList{ 1174 v1.ResourceCPU: cpuHalf, 1175 v1.ResourceMemory: mem50m, 1176 }, 1177 }, 1178 }, 1179 }, 1180 }, 1181 }, 1182 { 1183 ObjectMeta: metav1.ObjectMeta{ 1184 Name: "pod2", 1185 UID: types.UID("pod2"), 1186 }, 1187 Spec: v1.PodSpec{ 1188 NodeName: "test-node-1", 1189 Containers: []v1.Container{ 1190 { 1191 Resources: v1.ResourceRequirements{ 1192 Requests: v1.ResourceList{ 1193 v1.ResourceCPU: cpuHalf, 1194 v1.ResourceMemory: mem50m, 1195 }, 1196 }, 1197 }, 1198 }, 1199 }, 1200 }, 1201 }, 1202 }, 1203 } 1204 1205 for _, tc := range tests { 1206 t.Run(tc.name, func(t *testing.T) { 1207 logger, ctx := ktesting.NewTestContext(t) 1208 ctx, cancel := context.WithCancel(ctx) 1209 defer cancel() 1210 node := tc.nodes[0] 1211 1212 imageStates := buildImageStates(tc.nodes) 1213 expected := buildNodeInfo(node, tc.pods, imageStates) 1214 1215 cache := newCache(ctx, time.Second, time.Second) 1216 for _, nodeItem := range tc.nodes { 1217 cache.AddNode(logger, nodeItem) 1218 } 1219 for _, pod := range tc.pods { 1220 if err := cache.AddPod(logger, pod); err != nil { 1221 t.Fatal(err) 1222 } 1223 } 1224 nodes := map[string]*framework.NodeInfo{} 1225 for nodeItem := cache.headNode; nodeItem != nil; nodeItem = nodeItem.next { 1226 nodes[nodeItem.info.Node().Name] = nodeItem.info 1227 } 1228 1229 // Step 1: the node was added into cache successfully. 1230 got, found := cache.nodes[node.Name] 1231 if !found { 1232 t.Errorf("Failed to find node %v in internalcache.", node.Name) 1233 } 1234 nodesList, err := cache.nodeTree.list() 1235 if err != nil { 1236 t.Fatal(err) 1237 } 1238 if cache.nodeTree.numNodes != len(tc.nodes) || len(nodesList) != len(tc.nodes) { 1239 t.Errorf("cache.nodeTree is not updated correctly after adding node got: %d, expected: %d", 1240 cache.nodeTree.numNodes, len(tc.nodes)) 1241 } 1242 1243 // Generations are globally unique. We check in our unit tests that they are incremented correctly. 1244 expected.Generation = got.info.Generation 1245 if diff := cmp.Diff(expected, got.info, cmp.AllowUnexported(framework.NodeInfo{})); diff != "" { 1246 t.Errorf("Failed to add node into scheduler cache (-want,+got):\n%s", diff) 1247 } 1248 1249 // check imageState of NodeInfo with specific image when node added 1250 if !checkImageStateSummary(nodes, "gcr.io/80:latest", "gcr.io/300:latest") { 1251 t.Error("image have different ImageStateSummary") 1252 } 1253 1254 // Step 2: dump cached nodes successfully. 1255 cachedNodes := NewEmptySnapshot() 1256 if err := cache.UpdateSnapshot(logger, cachedNodes); err != nil { 1257 t.Error(err) 1258 } 1259 newNode, found := cachedNodes.nodeInfoMap[node.Name] 1260 if !found || len(cachedNodes.nodeInfoMap) != len(tc.nodes) { 1261 t.Errorf("failed to dump cached nodes:\n got: %v \nexpected: %v", cachedNodes.nodeInfoMap, tc.nodes) 1262 } 1263 expected.Generation = newNode.Generation 1264 if diff := cmp.Diff(newNode, expected.Snapshot(), cmp.AllowUnexported(framework.NodeInfo{})); diff != "" { 1265 t.Errorf("Failed to clone node:\n%s", diff) 1266 } 1267 // check imageState of NodeInfo with specific image when update snapshot 1268 if !checkImageStateSummary(cachedNodes.nodeInfoMap, "gcr.io/80:latest", "gcr.io/300:latest") { 1269 t.Error("image have different ImageStateSummary") 1270 } 1271 1272 // Step 3: update node attribute successfully. 1273 node.Status.Allocatable[v1.ResourceMemory] = mem50m 1274 expected.Allocatable.Memory = mem50m.Value() 1275 1276 cache.UpdateNode(logger, nil, node) 1277 got, found = cache.nodes[node.Name] 1278 if !found { 1279 t.Errorf("Failed to find node %v in schedulertypes after UpdateNode.", node.Name) 1280 } 1281 if got.info.Generation <= expected.Generation { 1282 t.Errorf("Generation is not incremented. got: %v, expected: %v", got.info.Generation, expected.Generation) 1283 } 1284 expected.Generation = got.info.Generation 1285 1286 if diff := cmp.Diff(expected, got.info, cmp.AllowUnexported(framework.NodeInfo{})); diff != "" { 1287 t.Errorf("Unexpected schedulertypes after updating node (-want, +got):\n%s", diff) 1288 } 1289 // check imageState of NodeInfo with specific image when update node 1290 if !checkImageStateSummary(nodes, "gcr.io/80:latest", "gcr.io/300:latest") { 1291 t.Error("image have different ImageStateSummary") 1292 } 1293 // Check nodeTree after update 1294 nodesList, err = cache.nodeTree.list() 1295 if err != nil { 1296 t.Fatal(err) 1297 } 1298 if cache.nodeTree.numNodes != len(tc.nodes) || len(nodesList) != len(tc.nodes) { 1299 t.Errorf("unexpected cache.nodeTree after updating node") 1300 } 1301 1302 // Step 4: the node can be removed even if it still has pods. 1303 if err := cache.RemoveNode(logger, node); err != nil { 1304 t.Error(err) 1305 } 1306 if n, err := cache.getNodeInfo(node.Name); err != nil { 1307 t.Errorf("The node %v should still have a ghost entry: %v", node.Name, err) 1308 } else if n != nil { 1309 t.Errorf("The node object for %v should be nil", node.Name) 1310 } 1311 1312 // trying to remove a node already removed should return an error 1313 if err := cache.RemoveNode(logger, node); err == nil { 1314 t.Error("expected error, no error found") 1315 } 1316 1317 // Check node is removed from nodeTree as well. 1318 nodesList, err = cache.nodeTree.list() 1319 if err != nil { 1320 t.Fatal(err) 1321 } 1322 if cache.nodeTree.numNodes != len(tc.nodes)-1 || len(nodesList) != len(tc.nodes)-1 { 1323 t.Errorf("unexpected cache.nodeTree after removing node: %v", node.Name) 1324 } 1325 // check imageState of NodeInfo with specific image when delete node 1326 if !checkImageStateSummary(nodes, "gcr.io/80:latest", "gcr.io/300:latest") { 1327 t.Error("image have different ImageStateSummary after removing node") 1328 } 1329 // Pods are still in the pods cache. 1330 for _, p := range tc.pods { 1331 if _, err := cache.GetPod(p); err != nil { 1332 t.Error(err) 1333 } 1334 } 1335 1336 // Step 5: removing pods for the removed node still succeeds. 1337 for _, p := range tc.pods { 1338 if err := cache.RemovePod(logger, p); err != nil { 1339 t.Error(err) 1340 } 1341 if _, err := cache.GetPod(p); err == nil { 1342 t.Errorf("pod %q still in cache", p.Name) 1343 } 1344 } 1345 }) 1346 } 1347 } 1348 1349 func TestSchedulerCache_UpdateSnapshot(t *testing.T) { 1350 logger, _ := ktesting.NewTestContext(t) 1351 1352 // Create a few nodes to be used in tests. 1353 var nodes []*v1.Node 1354 for i := 0; i < 10; i++ { 1355 node := &v1.Node{ 1356 ObjectMeta: metav1.ObjectMeta{ 1357 Name: fmt.Sprintf("test-node%v", i), 1358 }, 1359 Status: v1.NodeStatus{ 1360 Allocatable: v1.ResourceList{ 1361 v1.ResourceCPU: resource.MustParse("1000m"), 1362 v1.ResourceMemory: resource.MustParse("100m"), 1363 }, 1364 }, 1365 } 1366 nodes = append(nodes, node) 1367 } 1368 // Create a few nodes as updated versions of the above nodes 1369 var updatedNodes []*v1.Node 1370 for _, n := range nodes { 1371 updatedNode := n.DeepCopy() 1372 updatedNode.Status.Allocatable = v1.ResourceList{ 1373 v1.ResourceCPU: resource.MustParse("2000m"), 1374 v1.ResourceMemory: resource.MustParse("500m"), 1375 } 1376 updatedNodes = append(updatedNodes, updatedNode) 1377 } 1378 1379 // Create a few pods for tests. 1380 var pods []*v1.Pod 1381 for i := 0; i < 20; i++ { 1382 pod := st.MakePod().Name(fmt.Sprintf("test-pod%v", i)).Namespace("test-ns").UID(fmt.Sprintf("test-puid%v", i)). 1383 Node(fmt.Sprintf("test-node%v", i%10)).Obj() 1384 pods = append(pods, pod) 1385 } 1386 1387 // Create a few pods as updated versions of the above pods. 1388 var updatedPods []*v1.Pod 1389 for _, p := range pods { 1390 updatedPod := p.DeepCopy() 1391 priority := int32(1000) 1392 updatedPod.Spec.Priority = &priority 1393 updatedPods = append(updatedPods, updatedPod) 1394 } 1395 1396 // Add a couple of pods with affinity, on the first and seconds nodes. 1397 var podsWithAffinity []*v1.Pod 1398 for i := 0; i < 2; i++ { 1399 pod := st.MakePod().Name(fmt.Sprintf("p-affinity-%v", i)).Namespace("test-ns").UID(fmt.Sprintf("puid-affinity-%v", i)). 1400 PodAffinityExists("foo", "", st.PodAffinityWithRequiredReq).Node(fmt.Sprintf("test-node%v", i)).Obj() 1401 podsWithAffinity = append(podsWithAffinity, pod) 1402 } 1403 1404 // Add a few of pods with PVC 1405 var podsWithPVC []*v1.Pod 1406 for i := 0; i < 8; i++ { 1407 pod := st.MakePod().Name(fmt.Sprintf("p-pvc-%v", i)).Namespace("test-ns").UID(fmt.Sprintf("puid-pvc-%v", i)). 1408 PVC(fmt.Sprintf("test-pvc%v", i%4)).Node(fmt.Sprintf("test-node%v", i%2)).Obj() 1409 podsWithPVC = append(podsWithPVC, pod) 1410 } 1411 1412 var cache *cacheImpl 1413 var snapshot *Snapshot 1414 type operation = func(t *testing.T) 1415 1416 addNode := func(i int) operation { 1417 return func(t *testing.T) { 1418 cache.AddNode(logger, nodes[i]) 1419 } 1420 } 1421 removeNode := func(i int) operation { 1422 return func(t *testing.T) { 1423 if err := cache.RemoveNode(logger, nodes[i]); err != nil { 1424 t.Error(err) 1425 } 1426 } 1427 } 1428 updateNode := func(i int) operation { 1429 return func(t *testing.T) { 1430 cache.UpdateNode(logger, nodes[i], updatedNodes[i]) 1431 } 1432 } 1433 addPod := func(i int) operation { 1434 return func(t *testing.T) { 1435 if err := cache.AddPod(logger, pods[i]); err != nil { 1436 t.Error(err) 1437 } 1438 } 1439 } 1440 addPodWithAffinity := func(i int) operation { 1441 return func(t *testing.T) { 1442 if err := cache.AddPod(logger, podsWithAffinity[i]); err != nil { 1443 t.Error(err) 1444 } 1445 } 1446 } 1447 addPodWithPVC := func(i int) operation { 1448 return func(t *testing.T) { 1449 if err := cache.AddPod(logger, podsWithPVC[i]); err != nil { 1450 t.Error(err) 1451 } 1452 } 1453 } 1454 removePod := func(i int) operation { 1455 return func(t *testing.T) { 1456 if err := cache.RemovePod(logger, pods[i]); err != nil { 1457 t.Error(err) 1458 } 1459 } 1460 } 1461 removePodWithAffinity := func(i int) operation { 1462 return func(t *testing.T) { 1463 if err := cache.RemovePod(logger, podsWithAffinity[i]); err != nil { 1464 t.Error(err) 1465 } 1466 } 1467 } 1468 removePodWithPVC := func(i int) operation { 1469 return func(t *testing.T) { 1470 if err := cache.RemovePod(logger, podsWithPVC[i]); err != nil { 1471 t.Error(err) 1472 } 1473 } 1474 } 1475 updatePod := func(i int) operation { 1476 return func(t *testing.T) { 1477 if err := cache.UpdatePod(logger, pods[i], updatedPods[i]); err != nil { 1478 t.Error(err) 1479 } 1480 } 1481 } 1482 updateSnapshot := func() operation { 1483 return func(t *testing.T) { 1484 cache.UpdateSnapshot(logger, snapshot) 1485 if err := compareCacheWithNodeInfoSnapshot(t, cache, snapshot); err != nil { 1486 t.Error(err) 1487 } 1488 } 1489 } 1490 1491 tests := []struct { 1492 name string 1493 operations []operation 1494 expected []*v1.Node 1495 expectedHavePodsWithAffinity int 1496 expectedUsedPVCSet sets.Set[string] 1497 }{ 1498 { 1499 name: "Empty cache", 1500 operations: []operation{}, 1501 expected: []*v1.Node{}, 1502 expectedUsedPVCSet: sets.New[string](), 1503 }, 1504 { 1505 name: "Single node", 1506 operations: []operation{addNode(1)}, 1507 expected: []*v1.Node{nodes[1]}, 1508 expectedUsedPVCSet: sets.New[string](), 1509 }, 1510 { 1511 name: "Add node, remove it, add it again", 1512 operations: []operation{ 1513 addNode(1), updateSnapshot(), removeNode(1), addNode(1), 1514 }, 1515 expected: []*v1.Node{nodes[1]}, 1516 expectedUsedPVCSet: sets.New[string](), 1517 }, 1518 { 1519 name: "Add node and remove it in the same cycle, add it again", 1520 operations: []operation{ 1521 addNode(1), updateSnapshot(), addNode(2), removeNode(1), 1522 }, 1523 expected: []*v1.Node{nodes[2]}, 1524 expectedUsedPVCSet: sets.New[string](), 1525 }, 1526 { 1527 name: "Add a few nodes, and snapshot in the middle", 1528 operations: []operation{ 1529 addNode(0), updateSnapshot(), addNode(1), updateSnapshot(), addNode(2), 1530 updateSnapshot(), addNode(3), 1531 }, 1532 expected: []*v1.Node{nodes[3], nodes[2], nodes[1], nodes[0]}, 1533 expectedUsedPVCSet: sets.New[string](), 1534 }, 1535 { 1536 name: "Add a few nodes, and snapshot in the end", 1537 operations: []operation{ 1538 addNode(0), addNode(2), addNode(5), addNode(6), 1539 }, 1540 expected: []*v1.Node{nodes[6], nodes[5], nodes[2], nodes[0]}, 1541 expectedUsedPVCSet: sets.New[string](), 1542 }, 1543 { 1544 name: "Update some nodes", 1545 operations: []operation{ 1546 addNode(0), addNode(1), addNode(5), updateSnapshot(), updateNode(1), 1547 }, 1548 expected: []*v1.Node{nodes[1], nodes[5], nodes[0]}, 1549 expectedUsedPVCSet: sets.New[string](), 1550 }, 1551 { 1552 name: "Add a few nodes, and remove all of them", 1553 operations: []operation{ 1554 addNode(0), addNode(2), addNode(5), addNode(6), updateSnapshot(), 1555 removeNode(0), removeNode(2), removeNode(5), removeNode(6), 1556 }, 1557 expected: []*v1.Node{}, 1558 expectedUsedPVCSet: sets.New[string](), 1559 }, 1560 { 1561 name: "Add a few nodes, and remove some of them", 1562 operations: []operation{ 1563 addNode(0), addNode(2), addNode(5), addNode(6), updateSnapshot(), 1564 removeNode(0), removeNode(6), 1565 }, 1566 expected: []*v1.Node{nodes[5], nodes[2]}, 1567 expectedUsedPVCSet: sets.New[string](), 1568 }, 1569 { 1570 name: "Add a few nodes, remove all of them, and add more", 1571 operations: []operation{ 1572 addNode(2), addNode(5), addNode(6), updateSnapshot(), 1573 removeNode(2), removeNode(5), removeNode(6), updateSnapshot(), 1574 addNode(7), addNode(9), 1575 }, 1576 expected: []*v1.Node{nodes[9], nodes[7]}, 1577 expectedUsedPVCSet: sets.New[string](), 1578 }, 1579 { 1580 name: "Update nodes in particular order", 1581 operations: []operation{ 1582 addNode(8), updateNode(2), updateNode(8), updateSnapshot(), 1583 addNode(1), 1584 }, 1585 expected: []*v1.Node{nodes[1], nodes[8], nodes[2]}, 1586 expectedUsedPVCSet: sets.New[string](), 1587 }, 1588 { 1589 name: "Add some nodes and some pods", 1590 operations: []operation{ 1591 addNode(0), addNode(2), addNode(8), updateSnapshot(), 1592 addPod(8), addPod(2), 1593 }, 1594 expected: []*v1.Node{nodes[2], nodes[8], nodes[0]}, 1595 expectedUsedPVCSet: sets.New[string](), 1596 }, 1597 { 1598 name: "Updating a pod moves its node to the head", 1599 operations: []operation{ 1600 addNode(0), addPod(0), addNode(2), addNode(4), updatePod(0), 1601 }, 1602 expected: []*v1.Node{nodes[0], nodes[4], nodes[2]}, 1603 expectedUsedPVCSet: sets.New[string](), 1604 }, 1605 { 1606 name: "Add pod before its node", 1607 operations: []operation{ 1608 addNode(0), addPod(1), updatePod(1), addNode(1), 1609 }, 1610 expected: []*v1.Node{nodes[1], nodes[0]}, 1611 expectedUsedPVCSet: sets.New[string](), 1612 }, 1613 { 1614 name: "Remove node before its pods", 1615 operations: []operation{ 1616 addNode(0), addNode(1), addPod(1), addPod(11), updateSnapshot(), 1617 removeNode(1), updateSnapshot(), 1618 updatePod(1), updatePod(11), removePod(1), removePod(11), 1619 }, 1620 expected: []*v1.Node{nodes[0]}, 1621 expectedUsedPVCSet: sets.New[string](), 1622 }, 1623 { 1624 name: "Add Pods with affinity", 1625 operations: []operation{ 1626 addNode(0), addPodWithAffinity(0), updateSnapshot(), addNode(1), 1627 }, 1628 expected: []*v1.Node{nodes[1], nodes[0]}, 1629 expectedHavePodsWithAffinity: 1, 1630 expectedUsedPVCSet: sets.New[string](), 1631 }, 1632 { 1633 name: "Add Pods with PVC", 1634 operations: []operation{ 1635 addNode(0), addPodWithPVC(0), updateSnapshot(), addNode(1), 1636 }, 1637 expected: []*v1.Node{nodes[1], nodes[0]}, 1638 expectedUsedPVCSet: sets.New("test-ns/test-pvc0"), 1639 }, 1640 { 1641 name: "Add multiple nodes with pods with affinity", 1642 operations: []operation{ 1643 addNode(0), addPodWithAffinity(0), updateSnapshot(), addNode(1), addPodWithAffinity(1), updateSnapshot(), 1644 }, 1645 expected: []*v1.Node{nodes[1], nodes[0]}, 1646 expectedHavePodsWithAffinity: 2, 1647 expectedUsedPVCSet: sets.New[string](), 1648 }, 1649 { 1650 name: "Add multiple nodes with pods with PVC", 1651 operations: []operation{ 1652 addNode(0), addPodWithPVC(0), updateSnapshot(), addNode(1), addPodWithPVC(1), updateSnapshot(), 1653 }, 1654 expected: []*v1.Node{nodes[1], nodes[0]}, 1655 expectedUsedPVCSet: sets.New("test-ns/test-pvc0", "test-ns/test-pvc1"), 1656 }, 1657 { 1658 name: "Add then Remove pods with affinity", 1659 operations: []operation{ 1660 addNode(0), addNode(1), addPodWithAffinity(0), updateSnapshot(), removePodWithAffinity(0), updateSnapshot(), 1661 }, 1662 expected: []*v1.Node{nodes[0], nodes[1]}, 1663 expectedHavePodsWithAffinity: 0, 1664 expectedUsedPVCSet: sets.New[string](), 1665 }, 1666 { 1667 name: "Add then Remove pod with PVC", 1668 operations: []operation{ 1669 addNode(0), addPodWithPVC(0), updateSnapshot(), removePodWithPVC(0), addPodWithPVC(2), updateSnapshot(), 1670 }, 1671 expected: []*v1.Node{nodes[0]}, 1672 expectedUsedPVCSet: sets.New("test-ns/test-pvc2"), 1673 }, 1674 { 1675 name: "Add then Remove pod with PVC and add same pod again", 1676 operations: []operation{ 1677 addNode(0), addPodWithPVC(0), updateSnapshot(), removePodWithPVC(0), addPodWithPVC(0), updateSnapshot(), 1678 }, 1679 expected: []*v1.Node{nodes[0]}, 1680 expectedUsedPVCSet: sets.New("test-ns/test-pvc0"), 1681 }, 1682 { 1683 name: "Add and Remove multiple pods with PVC with same ref count length different content", 1684 operations: []operation{ 1685 addNode(0), addNode(1), addPodWithPVC(0), addPodWithPVC(1), updateSnapshot(), 1686 removePodWithPVC(0), removePodWithPVC(1), addPodWithPVC(2), addPodWithPVC(3), updateSnapshot(), 1687 }, 1688 expected: []*v1.Node{nodes[1], nodes[0]}, 1689 expectedUsedPVCSet: sets.New("test-ns/test-pvc2", "test-ns/test-pvc3"), 1690 }, 1691 { 1692 name: "Add and Remove multiple pods with PVC", 1693 operations: []operation{ 1694 addNode(0), addNode(1), addPodWithPVC(0), addPodWithPVC(1), addPodWithPVC(2), updateSnapshot(), 1695 removePodWithPVC(0), removePodWithPVC(1), updateSnapshot(), addPodWithPVC(0), updateSnapshot(), 1696 addPodWithPVC(3), addPodWithPVC(4), addPodWithPVC(5), updateSnapshot(), 1697 removePodWithPVC(0), removePodWithPVC(3), removePodWithPVC(4), updateSnapshot(), 1698 }, 1699 expected: []*v1.Node{nodes[0], nodes[1]}, 1700 expectedUsedPVCSet: sets.New("test-ns/test-pvc1", "test-ns/test-pvc2"), 1701 }, 1702 } 1703 1704 for _, test := range tests { 1705 t.Run(test.name, func(t *testing.T) { 1706 _, ctx := ktesting.NewTestContext(t) 1707 ctx, cancel := context.WithCancel(ctx) 1708 defer cancel() 1709 cache = newCache(ctx, time.Second, time.Second) 1710 snapshot = NewEmptySnapshot() 1711 1712 for _, op := range test.operations { 1713 op(t) 1714 } 1715 1716 if len(test.expected) != len(cache.nodes) { 1717 t.Errorf("unexpected number of nodes. Expected: %v, got: %v", len(test.expected), len(cache.nodes)) 1718 } 1719 var i int 1720 // Check that cache is in the expected state. 1721 for node := cache.headNode; node != nil; node = node.next { 1722 if node.info.Node() != nil && node.info.Node().Name != test.expected[i].Name { 1723 t.Errorf("unexpected node. Expected: %v, got: %v, index: %v", test.expected[i].Name, node.info.Node().Name, i) 1724 } 1725 i++ 1726 } 1727 // Make sure we visited all the cached nodes in the above for loop. 1728 if i != len(cache.nodes) { 1729 t.Errorf("Not all the nodes were visited by following the NodeInfo linked list. Expected to see %v nodes, saw %v.", len(cache.nodes), i) 1730 } 1731 1732 // Check number of nodes with pods with affinity 1733 if len(snapshot.havePodsWithAffinityNodeInfoList) != test.expectedHavePodsWithAffinity { 1734 t.Errorf("unexpected number of HavePodsWithAffinity nodes. Expected: %v, got: %v", test.expectedHavePodsWithAffinity, len(snapshot.havePodsWithAffinityNodeInfoList)) 1735 } 1736 1737 // Compare content of the used PVC set 1738 if diff := cmp.Diff(test.expectedUsedPVCSet, snapshot.usedPVCSet); diff != "" { 1739 t.Errorf("Unexpected usedPVCSet (-want +got):\n%s", diff) 1740 } 1741 1742 // Always update the snapshot at the end of operations and compare it. 1743 if err := cache.UpdateSnapshot(logger, snapshot); err != nil { 1744 t.Error(err) 1745 } 1746 if err := compareCacheWithNodeInfoSnapshot(t, cache, snapshot); err != nil { 1747 t.Error(err) 1748 } 1749 }) 1750 } 1751 } 1752 1753 func compareCacheWithNodeInfoSnapshot(t *testing.T, cache *cacheImpl, snapshot *Snapshot) error { 1754 // Compare the map. 1755 if len(snapshot.nodeInfoMap) != cache.nodeTree.numNodes { 1756 return fmt.Errorf("unexpected number of nodes in the snapshot. Expected: %v, got: %v", cache.nodeTree.numNodes, len(snapshot.nodeInfoMap)) 1757 } 1758 for name, ni := range cache.nodes { 1759 want := ni.info 1760 if want.Node() == nil { 1761 want = nil 1762 } 1763 if diff := cmp.Diff(want, snapshot.nodeInfoMap[name], cmp.AllowUnexported(framework.NodeInfo{})); diff != "" { 1764 return fmt.Errorf("Unexpected node info for node (-want, +got):\n%s", diff) 1765 } 1766 } 1767 1768 // Compare the lists. 1769 if len(snapshot.nodeInfoList) != cache.nodeTree.numNodes { 1770 return fmt.Errorf("unexpected number of nodes in NodeInfoList. Expected: %v, got: %v", cache.nodeTree.numNodes, len(snapshot.nodeInfoList)) 1771 } 1772 1773 expectedNodeInfoList := make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes) 1774 expectedHavePodsWithAffinityNodeInfoList := make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes) 1775 expectedUsedPVCSet := sets.New[string]() 1776 nodesList, err := cache.nodeTree.list() 1777 if err != nil { 1778 t.Fatal(err) 1779 } 1780 for _, nodeName := range nodesList { 1781 if n := snapshot.nodeInfoMap[nodeName]; n != nil { 1782 expectedNodeInfoList = append(expectedNodeInfoList, n) 1783 if len(n.PodsWithAffinity) > 0 { 1784 expectedHavePodsWithAffinityNodeInfoList = append(expectedHavePodsWithAffinityNodeInfoList, n) 1785 } 1786 for key := range n.PVCRefCounts { 1787 expectedUsedPVCSet.Insert(key) 1788 } 1789 } else { 1790 return fmt.Errorf("node %q exist in nodeTree but not in NodeInfoMap, this should not happen", nodeName) 1791 } 1792 } 1793 1794 for i, expected := range expectedNodeInfoList { 1795 got := snapshot.nodeInfoList[i] 1796 if expected != got { 1797 return fmt.Errorf("unexpected NodeInfo pointer in NodeInfoList. Expected: %p, got: %p", expected, got) 1798 } 1799 } 1800 1801 for i, expected := range expectedHavePodsWithAffinityNodeInfoList { 1802 got := snapshot.havePodsWithAffinityNodeInfoList[i] 1803 if expected != got { 1804 return fmt.Errorf("unexpected NodeInfo pointer in HavePodsWithAffinityNodeInfoList. Expected: %p, got: %p", expected, got) 1805 } 1806 } 1807 1808 for key := range expectedUsedPVCSet { 1809 if !snapshot.usedPVCSet.Has(key) { 1810 return fmt.Errorf("expected PVC %s to exist in UsedPVCSet but it is not found", key) 1811 } 1812 } 1813 1814 return nil 1815 } 1816 1817 func TestSchedulerCache_updateNodeInfoSnapshotList(t *testing.T) { 1818 logger, _ := ktesting.NewTestContext(t) 1819 1820 // Create a few nodes to be used in tests. 1821 var nodes []*v1.Node 1822 i := 0 1823 // List of number of nodes per zone, zone 0 -> 2, zone 1 -> 6 1824 for zone, nb := range []int{2, 6} { 1825 for j := 0; j < nb; j++ { 1826 nodes = append(nodes, &v1.Node{ 1827 ObjectMeta: metav1.ObjectMeta{ 1828 Name: fmt.Sprintf("node-%d", i), 1829 Labels: map[string]string{ 1830 v1.LabelTopologyRegion: fmt.Sprintf("region-%d", zone), 1831 v1.LabelTopologyZone: fmt.Sprintf("zone-%d", zone), 1832 }, 1833 }, 1834 }) 1835 i++ 1836 } 1837 } 1838 1839 var cache *cacheImpl 1840 var snapshot *Snapshot 1841 1842 addNode := func(t *testing.T, i int) { 1843 cache.AddNode(logger, nodes[i]) 1844 _, ok := snapshot.nodeInfoMap[nodes[i].Name] 1845 if !ok { 1846 snapshot.nodeInfoMap[nodes[i].Name] = cache.nodes[nodes[i].Name].info 1847 } 1848 } 1849 1850 updateSnapshot := func(t *testing.T) { 1851 cache.updateNodeInfoSnapshotList(logger, snapshot, true) 1852 if err := compareCacheWithNodeInfoSnapshot(t, cache, snapshot); err != nil { 1853 t.Error(err) 1854 } 1855 } 1856 1857 tests := []struct { 1858 name string 1859 operations func(t *testing.T) 1860 expected []string 1861 }{ 1862 { 1863 name: "Empty cache", 1864 operations: func(t *testing.T) {}, 1865 expected: []string{}, 1866 }, 1867 { 1868 name: "Single node", 1869 operations: func(t *testing.T) { 1870 addNode(t, 0) 1871 }, 1872 expected: []string{"node-0"}, 1873 }, 1874 { 1875 name: "Two nodes", 1876 operations: func(t *testing.T) { 1877 addNode(t, 0) 1878 updateSnapshot(t) 1879 addNode(t, 1) 1880 }, 1881 expected: []string{"node-0", "node-1"}, 1882 }, 1883 { 1884 name: "bug 91601, two nodes, update the snapshot and add two nodes in different zones", 1885 operations: func(t *testing.T) { 1886 addNode(t, 2) 1887 addNode(t, 3) 1888 updateSnapshot(t) 1889 addNode(t, 4) 1890 addNode(t, 0) 1891 }, 1892 expected: []string{"node-2", "node-0", "node-3", "node-4"}, 1893 }, 1894 { 1895 name: "bug 91601, 6 nodes, one in a different zone", 1896 operations: func(t *testing.T) { 1897 addNode(t, 2) 1898 addNode(t, 3) 1899 addNode(t, 4) 1900 addNode(t, 5) 1901 updateSnapshot(t) 1902 addNode(t, 6) 1903 addNode(t, 0) 1904 }, 1905 expected: []string{"node-2", "node-0", "node-3", "node-4", "node-5", "node-6"}, 1906 }, 1907 { 1908 name: "bug 91601, 7 nodes, two in a different zone", 1909 operations: func(t *testing.T) { 1910 addNode(t, 2) 1911 updateSnapshot(t) 1912 addNode(t, 3) 1913 addNode(t, 4) 1914 updateSnapshot(t) 1915 addNode(t, 5) 1916 addNode(t, 6) 1917 addNode(t, 0) 1918 addNode(t, 1) 1919 }, 1920 expected: []string{"node-2", "node-0", "node-3", "node-1", "node-4", "node-5", "node-6"}, 1921 }, 1922 { 1923 name: "bug 91601, 7 nodes, two in a different zone, different zone order", 1924 operations: func(t *testing.T) { 1925 addNode(t, 2) 1926 addNode(t, 1) 1927 updateSnapshot(t) 1928 addNode(t, 3) 1929 addNode(t, 4) 1930 updateSnapshot(t) 1931 addNode(t, 5) 1932 addNode(t, 6) 1933 addNode(t, 0) 1934 }, 1935 expected: []string{"node-2", "node-1", "node-3", "node-0", "node-4", "node-5", "node-6"}, 1936 }, 1937 } 1938 1939 for _, test := range tests { 1940 t.Run(test.name, func(t *testing.T) { 1941 _, ctx := ktesting.NewTestContext(t) 1942 ctx, cancel := context.WithCancel(ctx) 1943 defer cancel() 1944 cache = newCache(ctx, time.Second, time.Second) 1945 snapshot = NewEmptySnapshot() 1946 1947 test.operations(t) 1948 1949 // Always update the snapshot at the end of operations and compare it. 1950 cache.updateNodeInfoSnapshotList(logger, snapshot, true) 1951 if err := compareCacheWithNodeInfoSnapshot(t, cache, snapshot); err != nil { 1952 t.Error(err) 1953 } 1954 nodeNames := make([]string, len(snapshot.nodeInfoList)) 1955 for i, nodeInfo := range snapshot.nodeInfoList { 1956 nodeNames[i] = nodeInfo.Node().Name 1957 } 1958 if diff := cmp.Diff(test.expected, nodeNames); diff != "" { 1959 t.Errorf("Unexpected nodeInfoList (-want, +got):\n%s", diff) 1960 } 1961 }) 1962 } 1963 } 1964 1965 func BenchmarkUpdate1kNodes30kPods(b *testing.B) { 1966 logger, _ := ktesting.NewTestContext(b) 1967 cache := setupCacheOf1kNodes30kPods(b) 1968 b.ResetTimer() 1969 for n := 0; n < b.N; n++ { 1970 cachedNodes := NewEmptySnapshot() 1971 cache.UpdateSnapshot(logger, cachedNodes) 1972 } 1973 } 1974 1975 func BenchmarkExpirePods(b *testing.B) { 1976 podNums := []int{ 1977 100, 1978 1000, 1979 10000, 1980 } 1981 for _, podNum := range podNums { 1982 name := fmt.Sprintf("%dPods", podNum) 1983 b.Run(name, func(b *testing.B) { 1984 benchmarkExpire(b, podNum) 1985 }) 1986 } 1987 } 1988 1989 func benchmarkExpire(b *testing.B, podNum int) { 1990 logger, _ := ktesting.NewTestContext(b) 1991 now := time.Now() 1992 for n := 0; n < b.N; n++ { 1993 b.StopTimer() 1994 cache := setupCacheWithAssumedPods(b, podNum, now) 1995 b.StartTimer() 1996 cache.cleanupAssumedPods(logger, now.Add(2*time.Second)) 1997 } 1998 } 1999 2000 type testingMode interface { 2001 Fatalf(format string, args ...interface{}) 2002 } 2003 2004 func makeBasePod(t testingMode, nodeName, objName, cpu, mem, extended string, ports []v1.ContainerPort) *v1.Pod { 2005 req := make(map[v1.ResourceName]string) 2006 if cpu != "" { 2007 req[v1.ResourceCPU] = cpu 2008 req[v1.ResourceMemory] = mem 2009 2010 if extended != "" { 2011 parts := strings.Split(extended, ":") 2012 if len(parts) != 2 { 2013 t.Fatalf("Invalid extended resource string: \"%s\"", extended) 2014 } 2015 req[v1.ResourceName(parts[0])] = parts[1] 2016 } 2017 } 2018 podWrapper := st.MakePod().Name(objName).Namespace("node_info_cache_test").UID(objName).Node(nodeName).Containers([]v1.Container{ 2019 st.MakeContainer().Name("container").Image("pause").Resources(req).ContainerPort(ports).Obj(), 2020 }) 2021 return podWrapper.Obj() 2022 } 2023 2024 // checkImageStateSummary collect ImageStateSummary of image traverse nodes, 2025 // the collected ImageStateSummary should be equal 2026 func checkImageStateSummary(nodes map[string]*framework.NodeInfo, imageNames ...string) bool { 2027 for _, imageName := range imageNames { 2028 var imageState *framework.ImageStateSummary 2029 for _, node := range nodes { 2030 state, ok := node.ImageStates[imageName] 2031 if !ok { 2032 continue 2033 } 2034 if imageState == nil { 2035 imageState = state 2036 continue 2037 } 2038 if diff := cmp.Diff(imageState, state); diff != "" { 2039 return false 2040 } 2041 } 2042 } 2043 return true 2044 } 2045 2046 func setupCacheOf1kNodes30kPods(b *testing.B) Cache { 2047 logger, ctx := ktesting.NewTestContext(b) 2048 ctx, cancel := context.WithCancel(ctx) 2049 defer cancel() 2050 cache := newCache(ctx, time.Second, time.Second) 2051 for i := 0; i < 1000; i++ { 2052 nodeName := fmt.Sprintf("node-%d", i) 2053 for j := 0; j < 30; j++ { 2054 objName := fmt.Sprintf("%s-pod-%d", nodeName, j) 2055 pod := makeBasePod(b, nodeName, objName, "0", "0", "", nil) 2056 2057 if err := cache.AddPod(logger, pod); err != nil { 2058 b.Fatalf("AddPod failed: %v", err) 2059 } 2060 } 2061 } 2062 return cache 2063 } 2064 2065 func setupCacheWithAssumedPods(b *testing.B, podNum int, assumedTime time.Time) *cacheImpl { 2066 logger, ctx := ktesting.NewTestContext(b) 2067 ctx, cancel := context.WithCancel(ctx) 2068 defer cancel() 2069 cache := newCache(ctx, time.Second, time.Second) 2070 for i := 0; i < podNum; i++ { 2071 nodeName := fmt.Sprintf("node-%d", i/10) 2072 objName := fmt.Sprintf("%s-pod-%d", nodeName, i%10) 2073 pod := makeBasePod(b, nodeName, objName, "0", "0", "", nil) 2074 2075 err := assumeAndFinishBinding(logger, cache, pod, assumedTime) 2076 if err != nil { 2077 b.Fatalf("assumePod failed: %v", err) 2078 } 2079 } 2080 return cache 2081 } 2082 2083 func isForgottenFromCache(p *v1.Pod, c *cacheImpl) error { 2084 if assumed, err := c.IsAssumedPod(p); err != nil { 2085 return err 2086 } else if assumed { 2087 return errors.New("still assumed") 2088 } 2089 if _, err := c.GetPod(p); err == nil { 2090 return errors.New("still in cache") 2091 } 2092 return nil 2093 } 2094 2095 // getNodeInfo returns cached data for the node name. 2096 func (cache *cacheImpl) getNodeInfo(nodeName string) (*v1.Node, error) { 2097 cache.mu.RLock() 2098 defer cache.mu.RUnlock() 2099 2100 n, ok := cache.nodes[nodeName] 2101 if !ok { 2102 return nil, fmt.Errorf("node %q not found in cache", nodeName) 2103 } 2104 2105 return n.info.Node(), nil 2106 }