k8s.io/kubernetes@v1.29.3/pkg/controller/disruption/disruption_test.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package disruption 18 19 import ( 20 "context" 21 "fmt" 22 "runtime/debug" 23 "strings" 24 "sync" 25 "testing" 26 "time" 27 28 "github.com/google/go-cmp/cmp" 29 "github.com/google/go-cmp/cmp/cmpopts" 30 apps "k8s.io/api/apps/v1" 31 autoscalingapi "k8s.io/api/autoscaling/v1" 32 v1 "k8s.io/api/core/v1" 33 policy "k8s.io/api/policy/v1" 34 "k8s.io/apimachinery/pkg/api/errors" 35 apimeta "k8s.io/apimachinery/pkg/api/meta" 36 "k8s.io/apimachinery/pkg/api/meta/testrestmapper" 37 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 38 "k8s.io/apimachinery/pkg/runtime" 39 "k8s.io/apimachinery/pkg/runtime/schema" 40 "k8s.io/apimachinery/pkg/types" 41 "k8s.io/apimachinery/pkg/util/intstr" 42 "k8s.io/apimachinery/pkg/util/uuid" 43 "k8s.io/apimachinery/pkg/util/wait" 44 discoveryfake "k8s.io/client-go/discovery/fake" 45 "k8s.io/client-go/informers" 46 "k8s.io/client-go/kubernetes/fake" 47 scalefake "k8s.io/client-go/scale/fake" 48 core "k8s.io/client-go/testing" 49 "k8s.io/client-go/tools/cache" 50 "k8s.io/client-go/tools/record" 51 "k8s.io/client-go/util/workqueue" 52 _ "k8s.io/kubernetes/pkg/apis/core/install" 53 "k8s.io/kubernetes/pkg/controller" 54 "k8s.io/kubernetes/test/utils/ktesting" 55 clocktesting "k8s.io/utils/clock/testing" 56 "k8s.io/utils/pointer" 57 ) 58 59 type pdbStates map[string]policy.PodDisruptionBudget 60 61 var alwaysReady = func() bool { return true } 62 63 func (ps *pdbStates) Set(ctx context.Context, pdb *policy.PodDisruptionBudget) error { 64 key, err := controller.KeyFunc(pdb) 65 if err != nil { 66 return err 67 } 68 (*ps)[key] = *pdb.DeepCopy() 69 return nil 70 } 71 72 func (ps *pdbStates) Get(key string) policy.PodDisruptionBudget { 73 return (*ps)[key] 74 } 75 76 func (ps *pdbStates) VerifyPdbStatus(t *testing.T, key string, disruptionsAllowed, currentHealthy, desiredHealthy, expectedPods int32, disruptedPodMap map[string]metav1.Time) { 77 t.Helper() 78 actualPDB := ps.Get(key) 79 actualConditions := actualPDB.Status.Conditions 80 actualPDB.Status.Conditions = nil 81 expectedStatus := policy.PodDisruptionBudgetStatus{ 82 DisruptionsAllowed: disruptionsAllowed, 83 CurrentHealthy: currentHealthy, 84 DesiredHealthy: desiredHealthy, 85 ExpectedPods: expectedPods, 86 DisruptedPods: disruptedPodMap, 87 ObservedGeneration: actualPDB.Generation, 88 } 89 actualStatus := actualPDB.Status 90 if diff := cmp.Diff(expectedStatus, actualStatus, cmpopts.EquateEmpty()); diff != "" { 91 t.Fatalf("PDB %q status mismatch (-want,+got):\n%s", key, diff) 92 } 93 94 cond := apimeta.FindStatusCondition(actualConditions, policy.DisruptionAllowedCondition) 95 if cond == nil { 96 t.Fatalf("Expected condition %q, but didn't find it", policy.DisruptionAllowedCondition) 97 } 98 if disruptionsAllowed > 0 { 99 if cond.Status != metav1.ConditionTrue { 100 t.Fatalf("Expected condition %q to have status %q, but was %q", 101 policy.DisruptionAllowedCondition, metav1.ConditionTrue, cond.Status) 102 } 103 } else { 104 if cond.Status != metav1.ConditionFalse { 105 t.Fatalf("Expected condition %q to have status %q, but was %q", 106 policy.DisruptionAllowedCondition, metav1.ConditionFalse, cond.Status) 107 } 108 } 109 } 110 111 func (ps *pdbStates) VerifyDisruptionAllowed(t *testing.T, key string, disruptionsAllowed int32) { 112 pdb := ps.Get(key) 113 if pdb.Status.DisruptionsAllowed != disruptionsAllowed { 114 debug.PrintStack() 115 t.Fatalf("PodDisruptionAllowed mismatch for PDB %q. Expected %v but got %v.", key, disruptionsAllowed, pdb.Status.DisruptionsAllowed) 116 } 117 } 118 119 func (ps *pdbStates) VerifyNoStatusError(t *testing.T, key string) { 120 pdb := ps.Get(key) 121 for _, condition := range pdb.Status.Conditions { 122 if strings.Contains(condition.Message, "found no controller ref") && condition.Reason == policy.SyncFailedReason { 123 t.Fatalf("PodDisruption Controller should not error when unmanaged pods are found but it failed for %q", key) 124 } 125 } 126 } 127 128 type disruptionController struct { 129 *DisruptionController 130 131 podStore cache.Store 132 pdbStore cache.Store 133 rcStore cache.Store 134 rsStore cache.Store 135 dStore cache.Store 136 ssStore cache.Store 137 138 coreClient *fake.Clientset 139 scaleClient *scalefake.FakeScaleClient 140 discoveryClient *discoveryfake.FakeDiscovery 141 informerFactory informers.SharedInformerFactory 142 } 143 144 var customGVK = schema.GroupVersionKind{ 145 Group: "custom.k8s.io", 146 Version: "v1", 147 Kind: "customresource", 148 } 149 150 func newFakeDisruptionController(ctx context.Context) (*disruptionController, *pdbStates) { 151 return newFakeDisruptionControllerWithTime(ctx, time.Now()) 152 } 153 154 func newFakeDisruptionControllerWithTime(ctx context.Context, now time.Time) (*disruptionController, *pdbStates) { 155 ps := &pdbStates{} 156 157 coreClient := fake.NewSimpleClientset() 158 informerFactory := informers.NewSharedInformerFactory(coreClient, controller.NoResyncPeriodFunc()) 159 160 scheme := runtime.NewScheme() 161 scheme.AddKnownTypeWithName(customGVK, &v1.Service{}) 162 fakeScaleClient := &scalefake.FakeScaleClient{} 163 fakeDiscovery := &discoveryfake.FakeDiscovery{ 164 Fake: &core.Fake{}, 165 } 166 fakeClock := clocktesting.NewFakeClock(now) 167 168 dc := NewDisruptionControllerInternal( 169 ctx, 170 informerFactory.Core().V1().Pods(), 171 informerFactory.Policy().V1().PodDisruptionBudgets(), 172 informerFactory.Core().V1().ReplicationControllers(), 173 informerFactory.Apps().V1().ReplicaSets(), 174 informerFactory.Apps().V1().Deployments(), 175 informerFactory.Apps().V1().StatefulSets(), 176 coreClient, 177 testrestmapper.TestOnlyStaticRESTMapper(scheme), 178 fakeScaleClient, 179 fakeDiscovery, 180 fakeClock, 181 stalePodDisruptionTimeout, 182 ) 183 dc.getUpdater = func() updater { return ps.Set } 184 dc.podListerSynced = alwaysReady 185 dc.pdbListerSynced = alwaysReady 186 dc.rcListerSynced = alwaysReady 187 dc.rsListerSynced = alwaysReady 188 dc.dListerSynced = alwaysReady 189 dc.ssListerSynced = alwaysReady 190 dc.recorder = record.NewFakeRecorder(100) 191 informerFactory.Start(ctx.Done()) 192 informerFactory.WaitForCacheSync(ctx.Done()) 193 194 return &disruptionController{ 195 dc, 196 informerFactory.Core().V1().Pods().Informer().GetStore(), 197 informerFactory.Policy().V1().PodDisruptionBudgets().Informer().GetStore(), 198 informerFactory.Core().V1().ReplicationControllers().Informer().GetStore(), 199 informerFactory.Apps().V1().ReplicaSets().Informer().GetStore(), 200 informerFactory.Apps().V1().Deployments().Informer().GetStore(), 201 informerFactory.Apps().V1().StatefulSets().Informer().GetStore(), 202 coreClient, 203 fakeScaleClient, 204 fakeDiscovery, 205 informerFactory, 206 }, ps 207 } 208 209 func fooBar() map[string]string { 210 return map[string]string{"foo": "bar"} 211 } 212 213 func newSel(labels map[string]string) *metav1.LabelSelector { 214 return &metav1.LabelSelector{MatchLabels: labels} 215 } 216 217 func newSelFooBar() *metav1.LabelSelector { 218 return newSel(map[string]string{"foo": "bar"}) 219 } 220 221 func newMinAvailablePodDisruptionBudget(t *testing.T, minAvailable intstr.IntOrString) (*policy.PodDisruptionBudget, string) { 222 223 pdb := &policy.PodDisruptionBudget{ 224 TypeMeta: metav1.TypeMeta{APIVersion: "v1"}, 225 ObjectMeta: metav1.ObjectMeta{ 226 UID: uuid.NewUUID(), 227 Name: "foobar", 228 Namespace: metav1.NamespaceDefault, 229 ResourceVersion: "18", 230 }, 231 Spec: policy.PodDisruptionBudgetSpec{ 232 MinAvailable: &minAvailable, 233 Selector: newSelFooBar(), 234 }, 235 } 236 237 pdbName, err := controller.KeyFunc(pdb) 238 if err != nil { 239 t.Fatalf("Unexpected error naming pdb %q: %v", pdb.Name, err) 240 } 241 242 return pdb, pdbName 243 } 244 245 func newMaxUnavailablePodDisruptionBudget(t *testing.T, maxUnavailable intstr.IntOrString) (*policy.PodDisruptionBudget, string) { 246 pdb := &policy.PodDisruptionBudget{ 247 TypeMeta: metav1.TypeMeta{APIVersion: "v1"}, 248 ObjectMeta: metav1.ObjectMeta{ 249 UID: uuid.NewUUID(), 250 Name: "foobar", 251 Namespace: metav1.NamespaceDefault, 252 ResourceVersion: "18", 253 }, 254 Spec: policy.PodDisruptionBudgetSpec{ 255 MaxUnavailable: &maxUnavailable, 256 Selector: newSelFooBar(), 257 }, 258 } 259 260 pdbName, err := controller.KeyFunc(pdb) 261 if err != nil { 262 t.Fatalf("Unexpected error naming pdb %q: %v", pdb.Name, err) 263 } 264 265 return pdb, pdbName 266 } 267 268 func updatePodOwnerToRc(t *testing.T, pod *v1.Pod, rc *v1.ReplicationController) { 269 var controllerReference metav1.OwnerReference 270 var trueVar = true 271 controllerReference = metav1.OwnerReference{UID: rc.UID, APIVersion: controllerKindRC.GroupVersion().String(), Kind: controllerKindRC.Kind, Name: rc.Name, Controller: &trueVar} 272 pod.OwnerReferences = append(pod.OwnerReferences, controllerReference) 273 } 274 275 func updatePodOwnerToRs(t *testing.T, pod *v1.Pod, rs *apps.ReplicaSet) { 276 var controllerReference metav1.OwnerReference 277 var trueVar = true 278 controllerReference = metav1.OwnerReference{UID: rs.UID, APIVersion: controllerKindRS.GroupVersion().String(), Kind: controllerKindRS.Kind, Name: rs.Name, Controller: &trueVar} 279 pod.OwnerReferences = append(pod.OwnerReferences, controllerReference) 280 } 281 282 func updatePodOwnerToSs(t *testing.T, pod *v1.Pod, ss *apps.StatefulSet) { 283 var controllerReference metav1.OwnerReference 284 var trueVar = true 285 controllerReference = metav1.OwnerReference{UID: ss.UID, APIVersion: controllerKindSS.GroupVersion().String(), Kind: controllerKindSS.Kind, Name: ss.Name, Controller: &trueVar} 286 pod.OwnerReferences = append(pod.OwnerReferences, controllerReference) 287 } 288 289 func newPod(t *testing.T, name string) (*v1.Pod, string) { 290 pod := &v1.Pod{ 291 TypeMeta: metav1.TypeMeta{APIVersion: "v1"}, 292 ObjectMeta: metav1.ObjectMeta{ 293 UID: uuid.NewUUID(), 294 Annotations: make(map[string]string), 295 Name: name, 296 Namespace: metav1.NamespaceDefault, 297 ResourceVersion: "18", 298 Labels: fooBar(), 299 }, 300 Spec: v1.PodSpec{}, 301 Status: v1.PodStatus{ 302 Conditions: []v1.PodCondition{ 303 {Type: v1.PodReady, Status: v1.ConditionTrue}, 304 }, 305 }, 306 } 307 308 podName, err := controller.KeyFunc(pod) 309 if err != nil { 310 t.Fatalf("Unexpected error naming pod %q: %v", pod.Name, err) 311 } 312 313 return pod, podName 314 } 315 316 func newReplicationController(t *testing.T, size int32) (*v1.ReplicationController, string) { 317 rc := &v1.ReplicationController{ 318 TypeMeta: metav1.TypeMeta{APIVersion: "v1"}, 319 ObjectMeta: metav1.ObjectMeta{ 320 UID: uuid.NewUUID(), 321 Name: "foobar", 322 Namespace: metav1.NamespaceDefault, 323 ResourceVersion: "18", 324 Labels: fooBar(), 325 }, 326 Spec: v1.ReplicationControllerSpec{ 327 Replicas: &size, 328 Selector: fooBar(), 329 }, 330 } 331 332 rcName, err := controller.KeyFunc(rc) 333 if err != nil { 334 t.Fatalf("Unexpected error naming RC %q", rc.Name) 335 } 336 337 return rc, rcName 338 } 339 340 func newDeployment(t *testing.T, size int32) (*apps.Deployment, string) { 341 d := &apps.Deployment{ 342 TypeMeta: metav1.TypeMeta{APIVersion: "v1"}, 343 ObjectMeta: metav1.ObjectMeta{ 344 UID: uuid.NewUUID(), 345 Name: "foobar", 346 Namespace: metav1.NamespaceDefault, 347 ResourceVersion: "18", 348 Labels: fooBar(), 349 }, 350 Spec: apps.DeploymentSpec{ 351 Replicas: &size, 352 Selector: newSelFooBar(), 353 }, 354 } 355 356 dName, err := controller.KeyFunc(d) 357 if err != nil { 358 t.Fatalf("Unexpected error naming Deployment %q: %v", d.Name, err) 359 } 360 361 return d, dName 362 } 363 364 func newReplicaSet(t *testing.T, size int32) (*apps.ReplicaSet, string) { 365 rs := &apps.ReplicaSet{ 366 TypeMeta: metav1.TypeMeta{APIVersion: "v1"}, 367 ObjectMeta: metav1.ObjectMeta{ 368 UID: uuid.NewUUID(), 369 Name: "foobar", 370 Namespace: metav1.NamespaceDefault, 371 ResourceVersion: "18", 372 Labels: fooBar(), 373 }, 374 Spec: apps.ReplicaSetSpec{ 375 Replicas: &size, 376 Selector: newSelFooBar(), 377 }, 378 } 379 380 rsName, err := controller.KeyFunc(rs) 381 if err != nil { 382 t.Fatalf("Unexpected error naming ReplicaSet %q: %v", rs.Name, err) 383 } 384 385 return rs, rsName 386 } 387 388 func newStatefulSet(t *testing.T, size int32) (*apps.StatefulSet, string) { 389 ss := &apps.StatefulSet{ 390 TypeMeta: metav1.TypeMeta{APIVersion: "v1"}, 391 ObjectMeta: metav1.ObjectMeta{ 392 UID: uuid.NewUUID(), 393 Name: "foobar", 394 Namespace: metav1.NamespaceDefault, 395 ResourceVersion: "18", 396 Labels: fooBar(), 397 }, 398 Spec: apps.StatefulSetSpec{ 399 Replicas: &size, 400 Selector: newSelFooBar(), 401 }, 402 } 403 404 ssName, err := controller.KeyFunc(ss) 405 if err != nil { 406 t.Fatalf("Unexpected error naming StatefulSet %q: %v", ss.Name, err) 407 } 408 409 return ss, ssName 410 } 411 412 func update(t *testing.T, store cache.Store, obj interface{}) { 413 if err := store.Update(obj); err != nil { 414 t.Fatalf("Could not add %+v to %+v: %v", obj, store, err) 415 } 416 } 417 418 func add(t *testing.T, store cache.Store, obj interface{}) { 419 if err := store.Add(obj); err != nil { 420 t.Fatalf("Could not add %+v to %+v: %v", obj, store, err) 421 } 422 } 423 424 // Create one with no selector. Verify it matches all pods 425 func TestNoSelector(t *testing.T) { 426 _, ctx := ktesting.NewTestContext(t) 427 dc, ps := newFakeDisruptionController(ctx) 428 429 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromInt32(3)) 430 pdb.Spec.Selector = &metav1.LabelSelector{} 431 pod, _ := newPod(t, "yo-yo-yo") 432 433 add(t, dc.pdbStore, pdb) 434 dc.sync(ctx, pdbName) 435 ps.VerifyPdbStatus(t, pdbName, 0, 0, 3, 0, map[string]metav1.Time{}) 436 437 add(t, dc.podStore, pod) 438 dc.sync(ctx, pdbName) 439 ps.VerifyPdbStatus(t, pdbName, 0, 1, 3, 1, map[string]metav1.Time{}) 440 } 441 442 // Verify that available/expected counts go up as we add pods, then verify that 443 // available count goes down when we make a pod unavailable. 444 func TestUnavailable(t *testing.T) { 445 _, ctx := ktesting.NewTestContext(t) 446 dc, ps := newFakeDisruptionController(ctx) 447 448 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromInt32(3)) 449 add(t, dc.pdbStore, pdb) 450 dc.sync(ctx, pdbName) 451 452 // Add three pods, verifying that the counts go up at each step. 453 pods := []*v1.Pod{} 454 for i := int32(0); i < 4; i++ { 455 ps.VerifyPdbStatus(t, pdbName, 0, i, 3, i, map[string]metav1.Time{}) 456 pod, _ := newPod(t, fmt.Sprintf("yo-yo-yo %d", i)) 457 pods = append(pods, pod) 458 add(t, dc.podStore, pod) 459 dc.sync(ctx, pdbName) 460 } 461 ps.VerifyPdbStatus(t, pdbName, 1, 4, 3, 4, map[string]metav1.Time{}) 462 463 // Now set one pod as unavailable 464 pods[0].Status.Conditions = []v1.PodCondition{} 465 update(t, dc.podStore, pods[0]) 466 dc.sync(ctx, pdbName) 467 468 // Verify expected update 469 ps.VerifyPdbStatus(t, pdbName, 0, 3, 3, 4, map[string]metav1.Time{}) 470 } 471 472 // Verify that an integer MaxUnavailable won't 473 // allow a disruption for pods with no controller. 474 func TestIntegerMaxUnavailable(t *testing.T) { 475 _, ctx := ktesting.NewTestContext(t) 476 dc, ps := newFakeDisruptionController(ctx) 477 478 pdb, pdbName := newMaxUnavailablePodDisruptionBudget(t, intstr.FromInt32(1)) 479 add(t, dc.pdbStore, pdb) 480 dc.sync(ctx, pdbName) 481 // This verifies that when a PDB has 0 pods, disruptions are not allowed. 482 ps.VerifyDisruptionAllowed(t, pdbName, 0) 483 484 pod, _ := newPod(t, "naked") 485 add(t, dc.podStore, pod) 486 dc.sync(ctx, pdbName) 487 488 ps.VerifyDisruptionAllowed(t, pdbName, 0) 489 verifyEventEmitted(t, dc, "UnmanagedPods") 490 491 } 492 493 // Verify that an integer MaxUnavailable will recompute allowed disruptions when the scale of 494 // the selected pod's controller is modified. 495 func TestIntegerMaxUnavailableWithScaling(t *testing.T) { 496 _, ctx := ktesting.NewTestContext(t) 497 dc, ps := newFakeDisruptionController(ctx) 498 499 pdb, pdbName := newMaxUnavailablePodDisruptionBudget(t, intstr.FromInt32(2)) 500 add(t, dc.pdbStore, pdb) 501 502 rs, _ := newReplicaSet(t, 7) 503 add(t, dc.rsStore, rs) 504 505 pod, _ := newPod(t, "pod") 506 updatePodOwnerToRs(t, pod, rs) 507 add(t, dc.podStore, pod) 508 dc.sync(ctx, pdbName) 509 ps.VerifyPdbStatus(t, pdbName, 0, 1, 5, 7, map[string]metav1.Time{}) 510 511 // Update scale of ReplicaSet and check PDB 512 rs.Spec.Replicas = pointer.Int32(5) 513 update(t, dc.rsStore, rs) 514 515 dc.sync(ctx, pdbName) 516 ps.VerifyPdbStatus(t, pdbName, 0, 1, 3, 5, map[string]metav1.Time{}) 517 } 518 519 // Verify that an percentage MaxUnavailable will recompute allowed disruptions when the scale of 520 // the selected pod's controller is modified. 521 func TestPercentageMaxUnavailableWithScaling(t *testing.T) { 522 _, ctx := ktesting.NewTestContext(t) 523 dc, ps := newFakeDisruptionController(ctx) 524 525 pdb, pdbName := newMaxUnavailablePodDisruptionBudget(t, intstr.FromString("30%")) 526 add(t, dc.pdbStore, pdb) 527 528 rs, _ := newReplicaSet(t, 7) 529 add(t, dc.rsStore, rs) 530 531 pod, _ := newPod(t, "pod") 532 updatePodOwnerToRs(t, pod, rs) 533 add(t, dc.podStore, pod) 534 dc.sync(ctx, pdbName) 535 ps.VerifyPdbStatus(t, pdbName, 0, 1, 4, 7, map[string]metav1.Time{}) 536 537 // Update scale of ReplicaSet and check PDB 538 rs.Spec.Replicas = pointer.Int32(3) 539 update(t, dc.rsStore, rs) 540 541 dc.sync(ctx, pdbName) 542 ps.VerifyPdbStatus(t, pdbName, 0, 1, 2, 3, map[string]metav1.Time{}) 543 } 544 545 // Create a pod with no controller, and verify that a PDB with a percentage 546 // specified won't allow a disruption. 547 func TestNakedPod(t *testing.T) { 548 _, ctx := ktesting.NewTestContext(t) 549 dc, ps := newFakeDisruptionController(ctx) 550 551 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("28%")) 552 add(t, dc.pdbStore, pdb) 553 dc.sync(ctx, pdbName) 554 // This verifies that when a PDB has 0 pods, disruptions are not allowed. 555 ps.VerifyDisruptionAllowed(t, pdbName, 0) 556 557 pod, _ := newPod(t, "naked") 558 add(t, dc.podStore, pod) 559 dc.sync(ctx, pdbName) 560 561 ps.VerifyDisruptionAllowed(t, pdbName, 0) 562 verifyEventEmitted(t, dc, "UnmanagedPods") 563 } 564 565 // Create a pod with unsupported controller, and verify that a PDB with a percentage 566 // specified won't allow a disruption. 567 func TestUnsupportedControllerPod(t *testing.T) { 568 _, ctx := ktesting.NewTestContext(t) 569 dc, ps := newFakeDisruptionController(ctx) 570 571 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("28%")) 572 add(t, dc.pdbStore, pdb) 573 dc.sync(ctx, pdbName) 574 // This verifies that when a PDB has 0 pods, disruptions are not allowed. 575 ps.VerifyDisruptionAllowed(t, pdbName, 0) 576 577 pod, _ := newPod(t, "naked") 578 isController := true 579 pod.OwnerReferences = append(pod.OwnerReferences, metav1.OwnerReference{ 580 APIVersion: "apps.test.io/v1", 581 Kind: "TestWorkload", 582 Name: "fake-controller", 583 UID: "b7329742-8daa-493a-8881-6ca07139172b", 584 Controller: &isController, 585 }) 586 587 add(t, dc.podStore, pod) 588 dc.sync(ctx, pdbName) 589 590 ps.VerifyDisruptionAllowed(t, pdbName, 0) 591 verifyEventEmitted(t, dc, "CalculateExpectedPodCountFailed") 592 } 593 594 // Verify that disruption controller is not erroring when unmanaged pods are found 595 func TestStatusForUnmanagedPod(t *testing.T) { 596 _, ctx := ktesting.NewTestContext(t) 597 dc, ps := newFakeDisruptionController(ctx) 598 599 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("28%")) 600 add(t, dc.pdbStore, pdb) 601 dc.sync(ctx, pdbName) 602 // This verifies that when a PDB has 0 pods, disruptions are not allowed. 603 ps.VerifyDisruptionAllowed(t, pdbName, 0) 604 605 pod, _ := newPod(t, "unmanaged") 606 add(t, dc.podStore, pod) 607 dc.sync(ctx, pdbName) 608 ps.VerifyNoStatusError(t, pdbName) 609 verifyEventEmitted(t, dc, "UnmanagedPods") 610 } 611 612 // Check if the unmanaged pods are correctly collected or not 613 func TestTotalUnmanagedPods(t *testing.T) { 614 _, ctx := ktesting.NewTestContext(t) 615 dc, ps := newFakeDisruptionController(ctx) 616 617 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("28%")) 618 add(t, dc.pdbStore, pdb) 619 dc.sync(ctx, pdbName) 620 // This verifies that when a PDB has 0 pods, disruptions are not allowed. 621 ps.VerifyDisruptionAllowed(t, pdbName, 0) 622 623 pod, _ := newPod(t, "unmanaged") 624 add(t, dc.podStore, pod) 625 dc.sync(ctx, pdbName) 626 var pods []*v1.Pod 627 pods = append(pods, pod) 628 _, unmanagedPods, _ := dc.getExpectedScale(ctx, pdb, pods) 629 if len(unmanagedPods) != 1 { 630 t.Fatalf("expected one pod to be unmanaged pod but found %d", len(unmanagedPods)) 631 } 632 ps.VerifyNoStatusError(t, pdbName) 633 verifyEventEmitted(t, dc, "UnmanagedPods") 634 } 635 636 // Verify that we count the scale of a ReplicaSet even when it has no Deployment. 637 func TestReplicaSet(t *testing.T) { 638 _, ctx := ktesting.NewTestContext(t) 639 dc, ps := newFakeDisruptionController(ctx) 640 641 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("20%")) 642 add(t, dc.pdbStore, pdb) 643 644 rs, _ := newReplicaSet(t, 10) 645 add(t, dc.rsStore, rs) 646 pod, _ := newPod(t, "pod") 647 updatePodOwnerToRs(t, pod, rs) 648 add(t, dc.podStore, pod) 649 dc.sync(ctx, pdbName) 650 ps.VerifyPdbStatus(t, pdbName, 0, 1, 2, 10, map[string]metav1.Time{}) 651 } 652 653 func TestScaleResource(t *testing.T) { 654 customResourceUID := uuid.NewUUID() 655 replicas := int32(10) 656 pods := int32(4) 657 maxUnavailable := int32(5) 658 659 _, ctx := ktesting.NewTestContext(t) 660 dc, ps := newFakeDisruptionController(ctx) 661 662 dc.scaleClient.AddReactor("get", "customresources", func(action core.Action) (handled bool, ret runtime.Object, err error) { 663 obj := &autoscalingapi.Scale{ 664 ObjectMeta: metav1.ObjectMeta{ 665 Namespace: metav1.NamespaceDefault, 666 UID: customResourceUID, 667 }, 668 Spec: autoscalingapi.ScaleSpec{ 669 Replicas: replicas, 670 }, 671 } 672 return true, obj, nil 673 }) 674 675 pdb, pdbName := newMaxUnavailablePodDisruptionBudget(t, intstr.FromInt32(maxUnavailable)) 676 add(t, dc.pdbStore, pdb) 677 678 trueVal := true 679 for i := 0; i < int(pods); i++ { 680 pod, _ := newPod(t, fmt.Sprintf("pod-%d", i)) 681 pod.SetOwnerReferences([]metav1.OwnerReference{ 682 { 683 Kind: customGVK.Kind, 684 APIVersion: customGVK.GroupVersion().String(), 685 Controller: &trueVal, 686 UID: customResourceUID, 687 }, 688 }) 689 add(t, dc.podStore, pod) 690 } 691 dc.sync(ctx, pdbName) 692 disruptionsAllowed := int32(0) 693 if replicas-pods < maxUnavailable { 694 disruptionsAllowed = maxUnavailable - (replicas - pods) 695 } 696 ps.VerifyPdbStatus(t, pdbName, disruptionsAllowed, pods, replicas-maxUnavailable, replicas, map[string]metav1.Time{}) 697 } 698 699 func TestScaleFinderNoResource(t *testing.T) { 700 resourceName := "customresources" 701 testCases := map[string]struct { 702 apiResources []metav1.APIResource 703 expectError bool 704 }{ 705 "resource implements scale": { 706 apiResources: []metav1.APIResource{ 707 { 708 Kind: customGVK.Kind, 709 Name: resourceName + "/status", 710 }, 711 { 712 Kind: "Scale", 713 Group: autoscalingapi.GroupName, 714 Version: "v1", 715 Name: resourceName + "/scale", 716 }, 717 { 718 Kind: customGVK.Kind, 719 Name: resourceName, 720 }, 721 }, 722 expectError: false, 723 }, 724 "resource implements unsupported data format for scale subresource": { 725 apiResources: []metav1.APIResource{ 726 { 727 Kind: customGVK.Kind, 728 Name: resourceName, 729 }, 730 { 731 Kind: customGVK.Kind, 732 Name: resourceName + "/scale", 733 }, 734 }, 735 expectError: true, 736 }, 737 "resource does not implement scale": { 738 apiResources: []metav1.APIResource{ 739 { 740 Kind: customGVK.Kind, 741 Name: resourceName, 742 }, 743 }, 744 expectError: true, 745 }, 746 } 747 748 for tn, tc := range testCases { 749 t.Run(tn, func(t *testing.T) { 750 customResourceUID := uuid.NewUUID() 751 752 _, ctx := ktesting.NewTestContext(t) 753 dc, _ := newFakeDisruptionController(ctx) 754 755 dc.scaleClient.AddReactor("get", resourceName, func(action core.Action) (handled bool, ret runtime.Object, err error) { 756 gr := schema.GroupResource{ 757 Group: customGVK.Group, 758 Resource: resourceName, 759 } 760 return true, nil, errors.NewNotFound(gr, "name") 761 }) 762 dc.discoveryClient.Resources = []*metav1.APIResourceList{ 763 { 764 GroupVersion: customGVK.GroupVersion().String(), 765 APIResources: tc.apiResources, 766 }, 767 } 768 769 trueVal := true 770 ownerRef := &metav1.OwnerReference{ 771 Kind: customGVK.Kind, 772 APIVersion: customGVK.GroupVersion().String(), 773 Controller: &trueVal, 774 UID: customResourceUID, 775 } 776 777 _, err := dc.getScaleController(ctx, ownerRef, "default") 778 779 if tc.expectError && err == nil { 780 t.Error("expected error, but didn't get one") 781 } 782 783 if !tc.expectError && err != nil { 784 t.Errorf("did not expect error, but got %v", err) 785 } 786 }) 787 } 788 } 789 790 // Verify that multiple controllers doesn't allow the PDB to be set true. 791 func TestMultipleControllers(t *testing.T) { 792 const podCount = 2 793 _, ctx := ktesting.NewTestContext(t) 794 dc, ps := newFakeDisruptionController(ctx) 795 796 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("1%")) 797 add(t, dc.pdbStore, pdb) 798 799 pods := []*v1.Pod{} 800 for i := 0; i < podCount; i++ { 801 pod, _ := newPod(t, fmt.Sprintf("pod %d", i)) 802 pods = append(pods, pod) 803 add(t, dc.podStore, pod) 804 } 805 dc.sync(ctx, pdbName) 806 807 // No controllers yet => no disruption allowed 808 ps.VerifyDisruptionAllowed(t, pdbName, 0) 809 810 rc, _ := newReplicationController(t, 1) 811 rc.Name = "rc 1" 812 for i := 0; i < podCount; i++ { 813 updatePodOwnerToRc(t, pods[i], rc) 814 } 815 add(t, dc.rcStore, rc) 816 dc.sync(ctx, pdbName) 817 // One RC and 200%>1% healthy => disruption allowed 818 ps.VerifyDisruptionAllowed(t, pdbName, 1) 819 820 rc, _ = newReplicationController(t, 1) 821 rc.Name = "rc 2" 822 for i := 0; i < podCount; i++ { 823 updatePodOwnerToRc(t, pods[i], rc) 824 } 825 add(t, dc.rcStore, rc) 826 dc.sync(ctx, pdbName) 827 828 // 100%>1% healthy BUT two RCs => no disruption allowed 829 // TODO: Find out if this assert is still needed 830 //ps.VerifyDisruptionAllowed(t, pdbName, 0) 831 } 832 833 func TestReplicationController(t *testing.T) { 834 // The budget in this test matches foo=bar, but the RC and its pods match 835 // {foo=bar, baz=quux}. Later, when we add a rogue pod with only a foo=bar 836 // label, it will match the budget but have no controllers, which should 837 // trigger the controller to set PodDisruptionAllowed to false. 838 labels := map[string]string{ 839 "foo": "bar", 840 "baz": "quux", 841 } 842 _, ctx := ktesting.NewTestContext(t) 843 dc, ps := newFakeDisruptionController(ctx) 844 845 // 34% should round up to 2 846 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("34%")) 847 add(t, dc.pdbStore, pdb) 848 rc, _ := newReplicationController(t, 3) 849 rc.Spec.Selector = labels 850 add(t, dc.rcStore, rc) 851 dc.sync(ctx, pdbName) 852 853 // It starts out at 0 expected because, with no pods, the PDB doesn't know 854 // about the RC. This is a known bug. TODO(mml): file issue 855 ps.VerifyPdbStatus(t, pdbName, 0, 0, 0, 0, map[string]metav1.Time{}) 856 857 for i := int32(0); i < 3; i++ { 858 pod, _ := newPod(t, fmt.Sprintf("foobar %d", i)) 859 updatePodOwnerToRc(t, pod, rc) 860 pod.Labels = labels 861 add(t, dc.podStore, pod) 862 dc.sync(ctx, pdbName) 863 if i < 2 { 864 ps.VerifyPdbStatus(t, pdbName, 0, i+1, 2, 3, map[string]metav1.Time{}) 865 } else { 866 ps.VerifyPdbStatus(t, pdbName, 1, 3, 2, 3, map[string]metav1.Time{}) 867 } 868 } 869 870 rogue, _ := newPod(t, "rogue") 871 add(t, dc.podStore, rogue) 872 dc.sync(ctx, pdbName) 873 ps.VerifyDisruptionAllowed(t, pdbName, 2) 874 } 875 876 func TestStatefulSetController(t *testing.T) { 877 labels := map[string]string{ 878 "foo": "bar", 879 "baz": "quux", 880 } 881 882 _, ctx := ktesting.NewTestContext(t) 883 dc, ps := newFakeDisruptionController(ctx) 884 885 // 34% should round up to 2 886 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("34%")) 887 add(t, dc.pdbStore, pdb) 888 ss, _ := newStatefulSet(t, 3) 889 add(t, dc.ssStore, ss) 890 dc.sync(ctx, pdbName) 891 892 // It starts out at 0 expected because, with no pods, the PDB doesn't know 893 // about the SS. This is a known bug. TODO(mml): file issue 894 ps.VerifyPdbStatus(t, pdbName, 0, 0, 0, 0, map[string]metav1.Time{}) 895 896 for i := int32(0); i < 3; i++ { 897 pod, _ := newPod(t, fmt.Sprintf("foobar %d", i)) 898 updatePodOwnerToSs(t, pod, ss) 899 pod.Labels = labels 900 add(t, dc.podStore, pod) 901 dc.sync(ctx, pdbName) 902 if i < 2 { 903 ps.VerifyPdbStatus(t, pdbName, 0, i+1, 2, 3, map[string]metav1.Time{}) 904 } else { 905 ps.VerifyPdbStatus(t, pdbName, 1, 3, 2, 3, map[string]metav1.Time{}) 906 } 907 } 908 } 909 910 func TestTwoControllers(t *testing.T) { 911 // Most of this test is in verifying intermediate cases as we define the 912 // three controllers and create the pods. 913 rcLabels := map[string]string{ 914 "foo": "bar", 915 "baz": "quux", 916 } 917 dLabels := map[string]string{ 918 "foo": "bar", 919 "baz": "quuux", 920 } 921 _, ctx := ktesting.NewTestContext(t) 922 dc, ps := newFakeDisruptionController(ctx) 923 924 // These constants are related, but I avoid calculating the correct values in 925 // code. If you update a parameter here, recalculate the correct values for 926 // all of them. Further down in the test, we use these to control loops, and 927 // that level of logic is enough complexity for me. 928 const collectionSize int32 = 11 // How big each collection is 929 const minimumOne int32 = 4 // integer minimum with one controller 930 const minimumTwo int32 = 7 // integer minimum with two controllers 931 932 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromString("28%")) 933 add(t, dc.pdbStore, pdb) 934 rc, _ := newReplicationController(t, collectionSize) 935 rc.Spec.Selector = rcLabels 936 add(t, dc.rcStore, rc) 937 dc.sync(ctx, pdbName) 938 939 ps.VerifyPdbStatus(t, pdbName, 0, 0, 0, 0, map[string]metav1.Time{}) 940 941 pods := []*v1.Pod{} 942 943 unavailablePods := collectionSize - minimumOne - 1 944 for i := int32(1); i <= collectionSize; i++ { 945 pod, _ := newPod(t, fmt.Sprintf("quux %d", i)) 946 updatePodOwnerToRc(t, pod, rc) 947 pods = append(pods, pod) 948 pod.Labels = rcLabels 949 if i <= unavailablePods { 950 pod.Status.Conditions = []v1.PodCondition{} 951 } 952 add(t, dc.podStore, pod) 953 dc.sync(ctx, pdbName) 954 if i <= unavailablePods { 955 ps.VerifyPdbStatus(t, pdbName, 0, 0, minimumOne, collectionSize, map[string]metav1.Time{}) 956 } else if i-unavailablePods <= minimumOne { 957 ps.VerifyPdbStatus(t, pdbName, 0, i-unavailablePods, minimumOne, collectionSize, map[string]metav1.Time{}) 958 } else { 959 ps.VerifyPdbStatus(t, pdbName, 1, i-unavailablePods, minimumOne, collectionSize, map[string]metav1.Time{}) 960 } 961 } 962 963 d, _ := newDeployment(t, collectionSize) 964 d.Spec.Selector = newSel(dLabels) 965 add(t, dc.dStore, d) 966 dc.sync(ctx, pdbName) 967 ps.VerifyPdbStatus(t, pdbName, 1, minimumOne+1, minimumOne, collectionSize, map[string]metav1.Time{}) 968 969 rs, _ := newReplicaSet(t, collectionSize) 970 rs.Spec.Selector = newSel(dLabels) 971 rs.Labels = dLabels 972 add(t, dc.rsStore, rs) 973 dc.sync(ctx, pdbName) 974 ps.VerifyPdbStatus(t, pdbName, 1, minimumOne+1, minimumOne, collectionSize, map[string]metav1.Time{}) 975 976 // By the end of this loop, the number of ready pods should be N+2 (hence minimumTwo+2). 977 unavailablePods = 2*collectionSize - (minimumTwo + 2) - unavailablePods 978 for i := int32(1); i <= collectionSize; i++ { 979 pod, _ := newPod(t, fmt.Sprintf("quuux %d", i)) 980 updatePodOwnerToRs(t, pod, rs) 981 pods = append(pods, pod) 982 pod.Labels = dLabels 983 if i <= unavailablePods { 984 pod.Status.Conditions = []v1.PodCondition{} 985 } 986 add(t, dc.podStore, pod) 987 dc.sync(ctx, pdbName) 988 if i <= unavailablePods { 989 ps.VerifyPdbStatus(t, pdbName, 0, minimumOne+1, minimumTwo, 2*collectionSize, map[string]metav1.Time{}) 990 } else if i-unavailablePods <= minimumTwo-(minimumOne+1) { 991 ps.VerifyPdbStatus(t, pdbName, 0, (minimumOne+1)+(i-unavailablePods), minimumTwo, 2*collectionSize, map[string]metav1.Time{}) 992 } else { 993 ps.VerifyPdbStatus(t, pdbName, i-unavailablePods-(minimumTwo-(minimumOne+1)), 994 (minimumOne+1)+(i-unavailablePods), minimumTwo, 2*collectionSize, map[string]metav1.Time{}) 995 } 996 } 997 998 // Now we verify we can bring down 1 pod and a disruption is still permitted, 999 // but if we bring down two, it's not. Then we make the pod ready again and 1000 // verify that a disruption is permitted again. 1001 ps.VerifyPdbStatus(t, pdbName, 2, 2+minimumTwo, minimumTwo, 2*collectionSize, map[string]metav1.Time{}) 1002 pods[collectionSize-1].Status.Conditions = []v1.PodCondition{} 1003 update(t, dc.podStore, pods[collectionSize-1]) 1004 dc.sync(ctx, pdbName) 1005 ps.VerifyPdbStatus(t, pdbName, 1, 1+minimumTwo, minimumTwo, 2*collectionSize, map[string]metav1.Time{}) 1006 1007 pods[collectionSize-2].Status.Conditions = []v1.PodCondition{} 1008 update(t, dc.podStore, pods[collectionSize-2]) 1009 dc.sync(ctx, pdbName) 1010 ps.VerifyPdbStatus(t, pdbName, 0, minimumTwo, minimumTwo, 2*collectionSize, map[string]metav1.Time{}) 1011 1012 pods[collectionSize-1].Status.Conditions = []v1.PodCondition{{Type: v1.PodReady, Status: v1.ConditionTrue}} 1013 update(t, dc.podStore, pods[collectionSize-1]) 1014 dc.sync(ctx, pdbName) 1015 ps.VerifyPdbStatus(t, pdbName, 1, 1+minimumTwo, minimumTwo, 2*collectionSize, map[string]metav1.Time{}) 1016 } 1017 1018 // Test pdb doesn't exist 1019 func TestPDBNotExist(t *testing.T) { 1020 _, ctx := ktesting.NewTestContext(t) 1021 dc, _ := newFakeDisruptionController(ctx) 1022 pdb, _ := newMinAvailablePodDisruptionBudget(t, intstr.FromString("67%")) 1023 add(t, dc.pdbStore, pdb) 1024 if err := dc.sync(ctx, "notExist"); err != nil { 1025 t.Errorf("Unexpected error: %v, expect nil", err) 1026 } 1027 } 1028 1029 func TestUpdateDisruptedPods(t *testing.T) { 1030 _, ctx := ktesting.NewTestContext(t) 1031 dc, ps := newFakeDisruptionController(ctx) 1032 dc.recheckQueue = workqueue.NewNamedDelayingQueue("pdb_queue") 1033 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromInt32(1)) 1034 currentTime := dc.clock.Now() 1035 pdb.Status.DisruptedPods = map[string]metav1.Time{ 1036 "p1": {Time: currentTime}, // Should be removed, pod deletion started. 1037 "p2": {Time: currentTime.Add(-3 * time.Minute)}, // Should be removed, expired. 1038 "p3": {Time: currentTime.Add(-time.Minute)}, // Should remain, pod untouched. 1039 "notthere": {Time: currentTime}, // Should be removed, pod deleted. 1040 } 1041 add(t, dc.pdbStore, pdb) 1042 1043 pod1, _ := newPod(t, "p1") 1044 pod1.DeletionTimestamp = &metav1.Time{Time: dc.clock.Now()} 1045 pod2, _ := newPod(t, "p2") 1046 pod3, _ := newPod(t, "p3") 1047 1048 add(t, dc.podStore, pod1) 1049 add(t, dc.podStore, pod2) 1050 add(t, dc.podStore, pod3) 1051 1052 dc.sync(ctx, pdbName) 1053 1054 ps.VerifyPdbStatus(t, pdbName, 0, 1, 1, 3, map[string]metav1.Time{"p3": {Time: currentTime.Add(-time.Minute)}}) 1055 } 1056 1057 func TestBasicFinderFunctions(t *testing.T) { 1058 _, ctx := ktesting.NewTestContext(t) 1059 dc, _ := newFakeDisruptionController(ctx) 1060 1061 rs, _ := newReplicaSet(t, 10) 1062 add(t, dc.rsStore, rs) 1063 rc, _ := newReplicationController(t, 12) 1064 add(t, dc.rcStore, rc) 1065 ss, _ := newStatefulSet(t, 14) 1066 add(t, dc.ssStore, ss) 1067 1068 testCases := map[string]struct { 1069 finderFunc podControllerFinder 1070 apiVersion string 1071 kind string 1072 name string 1073 uid types.UID 1074 findsScale bool 1075 expectedScale int32 1076 }{ 1077 "replicaset controller with apps group": { 1078 finderFunc: dc.getPodReplicaSet, 1079 apiVersion: "apps/v1", 1080 kind: controllerKindRS.Kind, 1081 name: rs.Name, 1082 uid: rs.UID, 1083 findsScale: true, 1084 expectedScale: 10, 1085 }, 1086 "replicaset controller with invalid group": { 1087 finderFunc: dc.getPodReplicaSet, 1088 apiVersion: "invalid/v1", 1089 kind: controllerKindRS.Kind, 1090 name: rs.Name, 1091 uid: rs.UID, 1092 findsScale: false, 1093 }, 1094 "replicationcontroller with empty group": { 1095 finderFunc: dc.getPodReplicationController, 1096 apiVersion: "/v1", 1097 kind: controllerKindRC.Kind, 1098 name: rc.Name, 1099 uid: rc.UID, 1100 findsScale: true, 1101 expectedScale: 12, 1102 }, 1103 "replicationcontroller with invalid group": { 1104 finderFunc: dc.getPodReplicationController, 1105 apiVersion: "apps/v1", 1106 kind: controllerKindRC.Kind, 1107 name: rc.Name, 1108 uid: rc.UID, 1109 findsScale: false, 1110 }, 1111 "statefulset controller with extensions group": { 1112 finderFunc: dc.getPodStatefulSet, 1113 apiVersion: "apps/v1", 1114 kind: controllerKindSS.Kind, 1115 name: ss.Name, 1116 uid: ss.UID, 1117 findsScale: true, 1118 expectedScale: 14, 1119 }, 1120 "statefulset controller with invalid kind": { 1121 finderFunc: dc.getPodStatefulSet, 1122 apiVersion: "apps/v1", 1123 kind: controllerKindRS.Kind, 1124 name: ss.Name, 1125 uid: ss.UID, 1126 findsScale: false, 1127 }, 1128 } 1129 1130 for tn, tc := range testCases { 1131 t.Run(tn, func(t *testing.T) { 1132 controllerRef := &metav1.OwnerReference{ 1133 APIVersion: tc.apiVersion, 1134 Kind: tc.kind, 1135 Name: tc.name, 1136 UID: tc.uid, 1137 } 1138 1139 controllerAndScale, _ := tc.finderFunc(ctx, controllerRef, metav1.NamespaceDefault) 1140 1141 if controllerAndScale == nil { 1142 if tc.findsScale { 1143 t.Error("Expected scale, but got nil") 1144 } 1145 return 1146 } 1147 1148 if got, want := controllerAndScale.scale, tc.expectedScale; got != want { 1149 t.Errorf("Expected scale %d, but got %d", want, got) 1150 } 1151 1152 if got, want := controllerAndScale.UID, tc.uid; got != want { 1153 t.Errorf("Expected uid %s, but got %s", want, got) 1154 } 1155 }) 1156 } 1157 } 1158 1159 func TestDeploymentFinderFunction(t *testing.T) { 1160 labels := map[string]string{ 1161 "foo": "bar", 1162 } 1163 1164 testCases := map[string]struct { 1165 rsApiVersion string 1166 rsKind string 1167 depApiVersion string 1168 depKind string 1169 findsScale bool 1170 expectedScale int32 1171 }{ 1172 "happy path": { 1173 rsApiVersion: "apps/v1", 1174 rsKind: controllerKindRS.Kind, 1175 depApiVersion: "extensions/v1", 1176 depKind: controllerKindDep.Kind, 1177 findsScale: true, 1178 expectedScale: 10, 1179 }, 1180 "invalid rs apiVersion": { 1181 rsApiVersion: "invalid/v1", 1182 rsKind: controllerKindRS.Kind, 1183 depApiVersion: "apps/v1", 1184 depKind: controllerKindDep.Kind, 1185 findsScale: false, 1186 }, 1187 "invalid rs kind": { 1188 rsApiVersion: "apps/v1", 1189 rsKind: "InvalidKind", 1190 depApiVersion: "apps/v1", 1191 depKind: controllerKindDep.Kind, 1192 findsScale: false, 1193 }, 1194 "invalid deployment apiVersion": { 1195 rsApiVersion: "extensions/v1", 1196 rsKind: controllerKindRS.Kind, 1197 depApiVersion: "deployment/v1", 1198 depKind: controllerKindDep.Kind, 1199 findsScale: false, 1200 }, 1201 "invalid deployment kind": { 1202 rsApiVersion: "apps/v1", 1203 rsKind: controllerKindRS.Kind, 1204 depApiVersion: "extensions/v1", 1205 depKind: "InvalidKind", 1206 findsScale: false, 1207 }, 1208 } 1209 1210 for tn, tc := range testCases { 1211 t.Run(tn, func(t *testing.T) { 1212 _, ctx := ktesting.NewTestContext(t) 1213 dc, _ := newFakeDisruptionController(ctx) 1214 1215 dep, _ := newDeployment(t, 10) 1216 dep.Spec.Selector = newSel(labels) 1217 add(t, dc.dStore, dep) 1218 1219 rs, _ := newReplicaSet(t, 5) 1220 rs.Labels = labels 1221 trueVal := true 1222 rs.OwnerReferences = append(rs.OwnerReferences, metav1.OwnerReference{ 1223 APIVersion: tc.depApiVersion, 1224 Kind: tc.depKind, 1225 Name: dep.Name, 1226 UID: dep.UID, 1227 Controller: &trueVal, 1228 }) 1229 add(t, dc.rsStore, rs) 1230 1231 controllerRef := &metav1.OwnerReference{ 1232 APIVersion: tc.rsApiVersion, 1233 Kind: tc.rsKind, 1234 Name: rs.Name, 1235 UID: rs.UID, 1236 } 1237 1238 controllerAndScale, _ := dc.getPodDeployment(ctx, controllerRef, metav1.NamespaceDefault) 1239 1240 if controllerAndScale == nil { 1241 if tc.findsScale { 1242 t.Error("Expected scale, but got nil") 1243 } 1244 return 1245 } 1246 1247 if got, want := controllerAndScale.scale, tc.expectedScale; got != want { 1248 t.Errorf("Expected scale %d, but got %d", want, got) 1249 } 1250 1251 if got, want := controllerAndScale.UID, dep.UID; got != want { 1252 t.Errorf("Expected uid %s, but got %s", want, got) 1253 } 1254 }) 1255 } 1256 } 1257 1258 // This test checks that the disruption controller does not write stale data to 1259 // a PDB status during race conditions with the eviction handler. Specifically, 1260 // failed updates due to ResourceVersion conflict should not cause a stale value 1261 // of DisruptionsAllowed to be written. 1262 // 1263 // In this test, DisruptionsAllowed starts at 2. 1264 // (A) We will delete 1 pod and trigger DisruptionController to set 1265 // DisruptionsAllowed to 1. 1266 // (B) As the DisruptionController attempts this write, we will evict the 1267 // remaining 2 pods and update DisruptionsAllowed to 0. (The real eviction 1268 // handler would allow this because it still sees DisruptionsAllowed=2.) 1269 // (C) If the DisruptionController writes DisruptionsAllowed=1 despite the 1270 // resource conflict error, then there is a bug. 1271 func TestUpdatePDBStatusRetries(t *testing.T) { 1272 _, ctx := ktesting.NewTestContext(t) 1273 dc, _ := newFakeDisruptionController(ctx) 1274 // Inject the production code over our fake impl 1275 dc.getUpdater = func() updater { return dc.writePdbStatus } 1276 // Create a PDB and 3 pods that match it. 1277 pdb, pdbKey := newMinAvailablePodDisruptionBudget(t, intstr.FromInt32(1)) 1278 pdb, err := dc.coreClient.PolicyV1().PodDisruptionBudgets(pdb.Namespace).Create(ctx, pdb, metav1.CreateOptions{}) 1279 if err != nil { 1280 t.Fatalf("Failed to create PDB: %v", err) 1281 } 1282 podNames := []string{"moe", "larry", "curly"} 1283 for _, name := range podNames { 1284 pod, _ := newPod(t, name) 1285 _, err := dc.coreClient.CoreV1().Pods(pod.Namespace).Create(ctx, pod, metav1.CreateOptions{}) 1286 if err != nil { 1287 t.Fatalf("Failed to create pod: %v", err) 1288 } 1289 } 1290 1291 // Block until the fake clientset writes are observable in the informer caches. 1292 // FUN FACT: This guarantees that the informer caches have updated, but it does 1293 // not guarantee that informer event handlers have completed. Fortunately, 1294 // DisruptionController does most of its logic by reading from informer 1295 // listers, so this guarantee is sufficient. 1296 if err := waitForCacheCount(dc.pdbStore, 1); err != nil { 1297 t.Fatalf("Failed to verify PDB in informer cache: %v", err) 1298 } 1299 if err := waitForCacheCount(dc.podStore, len(podNames)); err != nil { 1300 t.Fatalf("Failed to verify pods in informer cache: %v", err) 1301 } 1302 1303 // Sync DisruptionController once to update PDB status. 1304 if err := dc.sync(ctx, pdbKey); err != nil { 1305 t.Fatalf("Failed initial sync: %v", err) 1306 } 1307 1308 // Evict simulates the visible effects of eviction in our fake client. 1309 evict := func(podNames ...string) { 1310 // These GVRs are copied from the generated fake code because they are not exported. 1311 var ( 1312 podsResource = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} 1313 poddisruptionbudgetsResource = schema.GroupVersionResource{Group: "policy", Version: "v1", Resource: "poddisruptionbudgets"} 1314 ) 1315 1316 // Bypass the coreClient.Fake and write directly to the ObjectTracker, because 1317 // this helper will be called while the Fake is holding a lock. 1318 obj, err := dc.coreClient.Tracker().Get(poddisruptionbudgetsResource, pdb.Namespace, pdb.Name) 1319 if err != nil { 1320 t.Fatalf("Failed to get PDB: %v", err) 1321 } 1322 updatedPDB := obj.(*policy.PodDisruptionBudget) 1323 // Each eviction, 1324 // - decrements DisruptionsAllowed 1325 // - adds the pod to DisruptedPods 1326 // - deletes the pod 1327 updatedPDB.Status.DisruptionsAllowed -= int32(len(podNames)) 1328 updatedPDB.Status.DisruptedPods = make(map[string]metav1.Time) 1329 for _, name := range podNames { 1330 updatedPDB.Status.DisruptedPods[name] = metav1.NewTime(dc.clock.Now()) 1331 } 1332 if err := dc.coreClient.Tracker().Update(poddisruptionbudgetsResource, updatedPDB, updatedPDB.Namespace); err != nil { 1333 t.Fatalf("Eviction (PDB update) failed: %v", err) 1334 } 1335 for _, name := range podNames { 1336 if err := dc.coreClient.Tracker().Delete(podsResource, "default", name); err != nil { 1337 t.Fatalf("Eviction (pod delete) failed: %v", err) 1338 } 1339 } 1340 } 1341 1342 // The fake kube client does not update ResourceVersion or check for conflicts. 1343 // Instead, we add a reactor that returns a conflict error on the first PDB 1344 // update and success after that. 1345 var failOnce sync.Once 1346 dc.coreClient.Fake.PrependReactor("update", "poddisruptionbudgets", func(a core.Action) (handled bool, obj runtime.Object, err error) { 1347 failOnce.Do(func() { 1348 // (B) Evict two pods and fail this update. 1349 evict(podNames[1], podNames[2]) 1350 handled = true 1351 err = errors.NewConflict(a.GetResource().GroupResource(), pdb.Name, fmt.Errorf("conflict")) 1352 }) 1353 return handled, obj, err 1354 }) 1355 1356 // (A) Delete one pod 1357 if err := dc.coreClient.CoreV1().Pods("default").Delete(ctx, podNames[0], metav1.DeleteOptions{}); err != nil { 1358 t.Fatal(err) 1359 } 1360 if err := waitForCacheCount(dc.podStore, len(podNames)-1); err != nil { 1361 t.Fatalf("Failed to verify pods in informer cache: %v", err) 1362 } 1363 1364 // The sync() function should either write a correct status which takes the 1365 // evictions into account, or re-queue the PDB for another sync (by returning 1366 // an error) 1367 if err := dc.sync(ctx, pdbKey); err != nil { 1368 t.Logf("sync() returned with error: %v", err) 1369 } else { 1370 t.Logf("sync() returned with no error") 1371 } 1372 1373 // (C) Whether or not sync() returned an error, the PDB status should reflect 1374 // the evictions that took place. 1375 finalPDB, err := dc.coreClient.PolicyV1().PodDisruptionBudgets("default").Get(ctx, pdb.Name, metav1.GetOptions{}) 1376 if err != nil { 1377 t.Fatalf("Failed to get PDB: %v", err) 1378 } 1379 if expected, actual := int32(0), finalPDB.Status.DisruptionsAllowed; expected != actual { 1380 t.Errorf("DisruptionsAllowed should be %d, got %d", expected, actual) 1381 } 1382 } 1383 1384 func TestInvalidSelectors(t *testing.T) { 1385 testCases := map[string]struct { 1386 labelSelector *metav1.LabelSelector 1387 }{ 1388 "illegal value key": { 1389 labelSelector: &metav1.LabelSelector{ 1390 MatchLabels: map[string]string{ 1391 "k8s.io/too/many/slashes": "value", 1392 }, 1393 }, 1394 }, 1395 "illegal operator": { 1396 labelSelector: &metav1.LabelSelector{ 1397 MatchExpressions: []metav1.LabelSelectorRequirement{ 1398 { 1399 Key: "foo", 1400 Operator: metav1.LabelSelectorOperator("illegal"), 1401 Values: []string{"bar"}, 1402 }, 1403 }, 1404 }, 1405 }, 1406 } 1407 1408 for tn, tc := range testCases { 1409 t.Run(tn, func(t *testing.T) { 1410 _, ctx := ktesting.NewTestContext(t) 1411 1412 dc, ps := newFakeDisruptionController(ctx) 1413 1414 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromInt32(3)) 1415 pdb.Spec.Selector = tc.labelSelector 1416 1417 add(t, dc.pdbStore, pdb) 1418 dc.sync(ctx, pdbName) 1419 ps.VerifyPdbStatus(t, pdbName, 0, 0, 0, 0, map[string]metav1.Time{}) 1420 }) 1421 } 1422 } 1423 1424 func TestStalePodDisruption(t *testing.T) { 1425 now := time.Now() 1426 cases := map[string]struct { 1427 pod *v1.Pod 1428 timePassed time.Duration 1429 wantConditions []v1.PodCondition 1430 }{ 1431 "stale pod disruption": { 1432 pod: &v1.Pod{ 1433 ObjectMeta: metav1.ObjectMeta{ 1434 Name: "foo", 1435 Namespace: metav1.NamespaceDefault, 1436 }, 1437 Status: v1.PodStatus{ 1438 Conditions: []v1.PodCondition{ 1439 { 1440 Type: v1.DisruptionTarget, 1441 Status: v1.ConditionTrue, 1442 LastTransitionTime: metav1.Time{Time: now}, 1443 }, 1444 }, 1445 }, 1446 }, 1447 timePassed: 2*time.Minute + time.Second, 1448 wantConditions: []v1.PodCondition{ 1449 { 1450 Type: v1.DisruptionTarget, 1451 Status: v1.ConditionFalse, 1452 }, 1453 }, 1454 }, 1455 "pod disruption in progress": { 1456 pod: &v1.Pod{ 1457 ObjectMeta: metav1.ObjectMeta{ 1458 Name: "foo", 1459 Namespace: metav1.NamespaceDefault, 1460 }, 1461 Status: v1.PodStatus{ 1462 Conditions: []v1.PodCondition{ 1463 { 1464 Type: v1.DisruptionTarget, 1465 Status: v1.ConditionTrue, 1466 LastTransitionTime: metav1.Time{Time: now}, 1467 }, 1468 }, 1469 }, 1470 }, 1471 timePassed: 2*time.Minute - time.Second, 1472 wantConditions: []v1.PodCondition{ 1473 { 1474 Type: v1.DisruptionTarget, 1475 Status: v1.ConditionTrue, 1476 }, 1477 }, 1478 }, 1479 "pod disruption actuated": { 1480 pod: &v1.Pod{ 1481 ObjectMeta: metav1.ObjectMeta{ 1482 Name: "foo", 1483 Namespace: metav1.NamespaceDefault, 1484 DeletionTimestamp: &metav1.Time{Time: now}, 1485 }, 1486 Status: v1.PodStatus{ 1487 Conditions: []v1.PodCondition{ 1488 { 1489 Type: v1.DisruptionTarget, 1490 Status: v1.ConditionTrue, 1491 LastTransitionTime: metav1.Time{Time: now}, 1492 }, 1493 }, 1494 }, 1495 }, 1496 timePassed: 2*time.Minute + time.Second, 1497 wantConditions: []v1.PodCondition{ 1498 { 1499 Type: v1.DisruptionTarget, 1500 Status: v1.ConditionTrue, 1501 }, 1502 }, 1503 }, 1504 "no pod disruption": { 1505 pod: &v1.Pod{ 1506 ObjectMeta: metav1.ObjectMeta{ 1507 Name: "foo", 1508 Namespace: metav1.NamespaceDefault, 1509 DeletionTimestamp: &metav1.Time{Time: now}, 1510 }, 1511 }, 1512 timePassed: 2*time.Minute + time.Second, 1513 }, 1514 "pod disruption cleared": { 1515 pod: &v1.Pod{ 1516 ObjectMeta: metav1.ObjectMeta{ 1517 Name: "foo", 1518 Namespace: metav1.NamespaceDefault, 1519 DeletionTimestamp: &metav1.Time{Time: now}, 1520 }, 1521 Status: v1.PodStatus{ 1522 Conditions: []v1.PodCondition{ 1523 { 1524 Type: v1.DisruptionTarget, 1525 Status: v1.ConditionFalse, 1526 }, 1527 }, 1528 }, 1529 }, 1530 timePassed: 2*time.Minute + time.Second, 1531 wantConditions: []v1.PodCondition{ 1532 { 1533 Type: v1.DisruptionTarget, 1534 Status: v1.ConditionFalse, 1535 }, 1536 }, 1537 }, 1538 } 1539 for name, tc := range cases { 1540 t.Run(name, func(t *testing.T) { 1541 _, ctx := ktesting.NewTestContext(t) 1542 ctx, cancel := context.WithCancel(ctx) 1543 defer cancel() 1544 dc, _ := newFakeDisruptionControllerWithTime(ctx, now) 1545 go dc.Run(ctx) 1546 if _, err := dc.coreClient.CoreV1().Pods(tc.pod.Namespace).Create(ctx, tc.pod, metav1.CreateOptions{}); err != nil { 1547 t.Fatalf("Failed to create pod: %v", err) 1548 } 1549 dc.clock.Sleep(tc.timePassed) 1550 if err := dc.informerFactory.Core().V1().Pods().Informer().GetIndexer().Add(tc.pod); err != nil { 1551 t.Fatalf("Failed adding pod to indexer: %v", err) 1552 } 1553 diff := "" 1554 if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, func() (bool, error) { 1555 pod, err := dc.kubeClient.CoreV1().Pods(tc.pod.Namespace).Get(ctx, tc.pod.Name, metav1.GetOptions{}) 1556 if err != nil { 1557 t.Fatalf("Failed getting updated pod: %v", err) 1558 } 1559 diff = cmp.Diff(tc.wantConditions, pod.Status.Conditions, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")) 1560 return diff == "", nil 1561 }); err != nil { 1562 t.Fatalf("Failed waiting for worker to sync: %v, (-want,+got):\n%s", err, diff) 1563 } 1564 }) 1565 } 1566 } 1567 1568 func TestKeepExistingPDBConditionDuringSync(t *testing.T) { 1569 _, ctx := ktesting.NewTestContext(t) 1570 dc, ps := newFakeDisruptionController(ctx) 1571 1572 pdb, pdbName := newMinAvailablePodDisruptionBudget(t, intstr.FromInt32(3)) 1573 pdb.Spec.Selector = &metav1.LabelSelector{} 1574 1575 pdb.Status.Conditions = append(pdb.Status.Conditions, metav1.Condition{ 1576 Type: "ExistingTestCondition", 1577 Status: metav1.ConditionTrue, 1578 Message: "This is a test condition", 1579 Reason: "Test", 1580 LastTransitionTime: metav1.Now(), 1581 }) 1582 1583 add(t, dc.pdbStore, pdb) 1584 if err := dc.sync(ctx, pdbName); err != nil { 1585 t.Fatalf("Failed to sync PDB: %v", err) 1586 } 1587 ps.VerifyPdbStatus(t, pdbName, 0, 0, 3, 0, map[string]metav1.Time{}) 1588 1589 actualPDB := ps.Get(pdbName) 1590 condition := apimeta.FindStatusCondition(actualPDB.Status.Conditions, "ExistingTestCondition") 1591 if len(actualPDB.Status.Conditions) != 2 { 1592 t.Fatalf("Expected 2 conditions, but got %d", len(actualPDB.Status.Conditions)) 1593 } 1594 if condition == nil { 1595 t.Fatalf("Expected ExistingTestCondition condition, but didn't find it") 1596 } 1597 } 1598 1599 // waitForCacheCount blocks until the given cache store has the desired number 1600 // of items in it. This will return an error if the condition is not met after a 1601 // 10 second timeout. 1602 func waitForCacheCount(store cache.Store, n int) error { 1603 return wait.Poll(10*time.Millisecond, 10*time.Second, func() (bool, error) { 1604 return len(store.List()) == n, nil 1605 }) 1606 } 1607 1608 func verifyEventEmitted(t *testing.T, dc *disruptionController, expectedEvent string) { 1609 ticker := time.NewTicker(500 * time.Millisecond) 1610 for { 1611 select { 1612 case e := <-dc.recorder.(*record.FakeRecorder).Events: 1613 if strings.Contains(e, expectedEvent) { 1614 return 1615 } 1616 case <-ticker.C: 1617 t.Fatalf("Timed out: expected event not generated: %v", expectedEvent) 1618 } 1619 } 1620 }