sigs.k8s.io/cluster-api@v1.7.1/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package machinehealthcheck 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "sort" 24 "testing" 25 "time" 26 27 "github.com/go-logr/logr" 28 . "github.com/onsi/gomega" 29 corev1 "k8s.io/api/core/v1" 30 apierrors "k8s.io/apimachinery/pkg/api/errors" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 33 "k8s.io/apimachinery/pkg/types" 34 "k8s.io/apimachinery/pkg/util/intstr" 35 "k8s.io/apimachinery/pkg/util/uuid" 36 "k8s.io/client-go/kubernetes/scheme" 37 "k8s.io/client-go/tools/record" 38 "k8s.io/utils/ptr" 39 "sigs.k8s.io/controller-runtime/pkg/client" 40 "sigs.k8s.io/controller-runtime/pkg/client/fake" 41 "sigs.k8s.io/controller-runtime/pkg/log" 42 "sigs.k8s.io/controller-runtime/pkg/reconcile" 43 "sigs.k8s.io/controller-runtime/pkg/webhook/admission" 44 45 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 46 "sigs.k8s.io/cluster-api/api/v1beta1/index" 47 "sigs.k8s.io/cluster-api/controllers/remote" 48 capierrors "sigs.k8s.io/cluster-api/errors" 49 "sigs.k8s.io/cluster-api/internal/test/builder" 50 "sigs.k8s.io/cluster-api/internal/webhooks" 51 "sigs.k8s.io/cluster-api/util" 52 "sigs.k8s.io/cluster-api/util/conditions" 53 "sigs.k8s.io/cluster-api/util/patch" 54 ) 55 56 func TestMachineHealthCheck_Reconcile(t *testing.T) { 57 ns, err := env.CreateNamespace(ctx, "test-mhc") 58 if err != nil { 59 t.Fatal(err) 60 } 61 defer func() { 62 if err := env.Delete(ctx, ns); err != nil { 63 t.Fatal(err) 64 } 65 }() 66 67 t.Run("it should ensure the correct cluster-name label when no existing labels exist", func(t *testing.T) { 68 g := NewWithT(t) 69 cluster := createCluster(g, ns.Name) 70 71 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 72 mhc.Labels = map[string]string{} 73 74 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 75 defer func(do ...client.Object) { 76 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 77 }(cluster, mhc) 78 79 g.Eventually(func() map[string]string { 80 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 81 if err != nil { 82 return nil 83 } 84 return mhc.GetLabels() 85 }).Should(HaveKeyWithValue(clusterv1.ClusterNameLabel, cluster.Name)) 86 }) 87 88 t.Run("it should ensure the correct cluster-name label when the label has the wrong value", func(t *testing.T) { 89 g := NewWithT(t) 90 cluster := createCluster(g, ns.Name) 91 92 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 93 mhc.Labels = map[string]string{ 94 clusterv1.ClusterNameLabel: "wrong-cluster", 95 } 96 97 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 98 defer func(do ...client.Object) { 99 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 100 }(cluster, mhc) 101 102 g.Eventually(func() map[string]string { 103 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 104 if err != nil { 105 return nil 106 } 107 return mhc.GetLabels() 108 }).Should(HaveKeyWithValue(clusterv1.ClusterNameLabel, cluster.Name)) 109 }) 110 111 t.Run("it should ensure the correct cluster-name label when other labels are present", func(t *testing.T) { 112 g := NewWithT(t) 113 cluster := createCluster(g, ns.Name) 114 115 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 116 mhc.Labels = map[string]string{ 117 "extra-label": "1", 118 } 119 120 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 121 defer func(do ...client.Object) { 122 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 123 }(cluster, mhc) 124 125 g.Eventually(func() map[string]string { 126 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 127 if err != nil { 128 return nil 129 } 130 return mhc.GetLabels() 131 }).Should(And( 132 HaveKeyWithValue(clusterv1.ClusterNameLabel, cluster.Name), 133 HaveKeyWithValue("extra-label", "1"), 134 HaveLen(2), 135 )) 136 }) 137 138 t.Run("it should ensure an owner reference is present when no existing ones exist", func(t *testing.T) { 139 g := NewWithT(t) 140 cluster := createCluster(g, ns.Name) 141 142 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 143 mhc.OwnerReferences = []metav1.OwnerReference{} 144 145 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 146 defer func(do ...client.Object) { 147 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 148 }(cluster, mhc) 149 150 g.Eventually(func() []metav1.OwnerReference { 151 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 152 if err != nil { 153 fmt.Printf("error cannot retrieve mhc in ctx: %v", err) 154 return nil 155 } 156 return mhc.GetOwnerReferences() 157 }, timeout, 100*time.Millisecond).Should(And( 158 HaveLen(1), 159 ContainElement(ownerReferenceForCluster(ctx, g, cluster)), 160 )) 161 }) 162 163 t.Run("it should ensure an owner reference is present when modifying existing ones", func(t *testing.T) { 164 g := NewWithT(t) 165 cluster := createCluster(g, ns.Name) 166 167 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 168 mhc.OwnerReferences = []metav1.OwnerReference{ 169 {Kind: "Foo", APIVersion: "foo.bar.baz/v1", Name: "Bar", UID: "12345"}, 170 } 171 172 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 173 defer func(do ...client.Object) { 174 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 175 }(cluster, mhc) 176 177 g.Eventually(func() []metav1.OwnerReference { 178 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 179 if err != nil { 180 return nil 181 } 182 return mhc.GetOwnerReferences() 183 }, timeout, 100*time.Millisecond).Should(And( 184 ContainElements( 185 metav1.OwnerReference{Kind: "Foo", APIVersion: "foo.bar.baz/v1", Name: "Bar", UID: "12345"}, 186 ownerReferenceForCluster(ctx, g, cluster)), 187 HaveLen(2), 188 )) 189 }) 190 191 t.Run("it ignores Machines not matching the label selector", func(t *testing.T) { 192 g := NewWithT(t) 193 cluster := createCluster(g, ns.Name) 194 195 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 196 197 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 198 defer func(do ...client.Object) { 199 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 200 }(cluster, mhc) 201 202 // Healthy nodes and machines matching the MHC's label selector. 203 _, machines, cleanup := createMachinesWithNodes(g, cluster, 204 count(2), 205 firstMachineAsControlPlane(), 206 createNodeRefForMachine(true), 207 nodeStatus(corev1.ConditionTrue), 208 machineLabels(mhc.Spec.Selector.MatchLabels), 209 ) 210 defer cleanup() 211 targetMachines := make([]string, len(machines)) 212 for i, m := range machines { 213 targetMachines[i] = m.Name 214 } 215 sort.Strings(targetMachines) 216 217 // Healthy nodes and machines NOT matching the MHC's label selector. 218 _, _, cleanup2 := createMachinesWithNodes(g, cluster, 219 count(2), 220 createNodeRefForMachine(true), 221 nodeStatus(corev1.ConditionTrue), 222 ) 223 defer cleanup2() 224 225 // Make sure the status matches. 226 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 227 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 228 if err != nil { 229 return nil 230 } 231 return &mhc.Status 232 }, 5*time.Second, 100*time.Millisecond).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 233 ExpectedMachines: 2, 234 CurrentHealthy: 2, 235 RemediationsAllowed: 2, 236 ObservedGeneration: 1, 237 Targets: targetMachines, 238 Conditions: clusterv1.Conditions{ 239 { 240 Type: clusterv1.RemediationAllowedCondition, 241 Status: corev1.ConditionTrue, 242 }, 243 }, 244 })) 245 }) 246 247 t.Run("it doesn't mark anything unhealthy when cluster infrastructure is not ready", func(t *testing.T) { 248 g := NewWithT(t) 249 cluster := createCluster(g, ns.Name) 250 251 patchHelper, err := patch.NewHelper(cluster, env.Client) 252 g.Expect(err).ToNot(HaveOccurred()) 253 254 conditions.MarkFalse(cluster, clusterv1.InfrastructureReadyCondition, "SomeReason", clusterv1.ConditionSeverityError, "") 255 g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed()) 256 257 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 258 259 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 260 defer func(do ...client.Object) { 261 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 262 }(cluster, mhc) 263 264 // Healthy nodes and machines. 265 _, machines, cleanup := createMachinesWithNodes(g, cluster, 266 count(2), 267 firstMachineAsControlPlane(), 268 createNodeRefForMachine(true), 269 machineLabels(mhc.Spec.Selector.MatchLabels), 270 ) 271 defer cleanup() 272 targetMachines := make([]string, len(machines)) 273 for i, m := range machines { 274 targetMachines[i] = m.Name 275 } 276 sort.Strings(targetMachines) 277 278 // Make sure the status matches. 279 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 280 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 281 if err != nil { 282 return nil 283 } 284 return &mhc.Status 285 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 286 ExpectedMachines: 2, 287 CurrentHealthy: 2, 288 RemediationsAllowed: 2, 289 ObservedGeneration: 1, 290 Targets: targetMachines, 291 Conditions: clusterv1.Conditions{ 292 { 293 Type: clusterv1.RemediationAllowedCondition, 294 Status: corev1.ConditionTrue, 295 }, 296 }, 297 })) 298 }) 299 300 t.Run("it doesn't mark anything unhealthy when all Machines are healthy", func(t *testing.T) { 301 g := NewWithT(t) 302 cluster := createCluster(g, ns.Name) 303 304 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 305 306 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 307 defer func(do ...client.Object) { 308 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 309 }(cluster, mhc) 310 311 // Healthy nodes and machines. 312 _, machines, cleanup := createMachinesWithNodes(g, cluster, 313 count(2), 314 firstMachineAsControlPlane(), 315 createNodeRefForMachine(true), 316 nodeStatus(corev1.ConditionTrue), 317 machineLabels(mhc.Spec.Selector.MatchLabels), 318 ) 319 defer cleanup() 320 targetMachines := make([]string, len(machines)) 321 for i, m := range machines { 322 targetMachines[i] = m.Name 323 } 324 sort.Strings(targetMachines) 325 326 // Make sure the status matches. 327 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 328 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 329 if err != nil { 330 return nil 331 } 332 return &mhc.Status 333 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 334 ExpectedMachines: 2, 335 CurrentHealthy: 2, 336 RemediationsAllowed: 2, 337 ObservedGeneration: 1, 338 Targets: targetMachines, 339 Conditions: clusterv1.Conditions{ 340 { 341 Type: clusterv1.RemediationAllowedCondition, 342 Status: corev1.ConditionTrue, 343 }, 344 }, 345 })) 346 }) 347 348 t.Run("it marks unhealthy machines for remediation when there is one unhealthy Machine", func(t *testing.T) { 349 g := NewWithT(t) 350 cluster := createCluster(g, ns.Name) 351 352 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 353 354 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 355 defer func(do ...client.Object) { 356 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 357 }(cluster, mhc) 358 359 // Healthy nodes and machines. 360 _, machines, cleanup1 := createMachinesWithNodes(g, cluster, 361 count(2), 362 firstMachineAsControlPlane(), 363 createNodeRefForMachine(true), 364 nodeStatus(corev1.ConditionTrue), 365 machineLabels(mhc.Spec.Selector.MatchLabels), 366 ) 367 defer cleanup1() 368 // Unhealthy nodes and machines. 369 _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, 370 count(1), 371 createNodeRefForMachine(true), 372 nodeStatus(corev1.ConditionUnknown), 373 machineLabels(mhc.Spec.Selector.MatchLabels), 374 ) 375 defer cleanup2() 376 machines = append(machines, unhealthyMachines...) 377 targetMachines := make([]string, len(machines)) 378 for i, m := range machines { 379 targetMachines[i] = m.Name 380 } 381 sort.Strings(targetMachines) 382 383 // Make sure the status matches. 384 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 385 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 386 if err != nil { 387 return nil 388 } 389 return &mhc.Status 390 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 391 ExpectedMachines: 3, 392 CurrentHealthy: 2, 393 RemediationsAllowed: 2, 394 ObservedGeneration: 1, 395 Targets: targetMachines, 396 Conditions: clusterv1.Conditions{ 397 { 398 Type: clusterv1.RemediationAllowedCondition, 399 Status: corev1.ConditionTrue, 400 }, 401 }, 402 })) 403 }) 404 405 t.Run("it marks unhealthy machines for remediation when there a Machine has a failure reason", func(t *testing.T) { 406 g := NewWithT(t) 407 cluster := createCluster(g, ns.Name) 408 409 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 410 411 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 412 defer func(do ...client.Object) { 413 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 414 }(cluster, mhc) 415 416 // Healthy nodes and machines. 417 _, machines, cleanup1 := createMachinesWithNodes(g, cluster, 418 count(2), 419 firstMachineAsControlPlane(), 420 createNodeRefForMachine(true), 421 nodeStatus(corev1.ConditionTrue), 422 machineLabels(mhc.Spec.Selector.MatchLabels), 423 ) 424 defer cleanup1() 425 // Machine with failure reason. 426 _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, 427 count(1), 428 createNodeRefForMachine(true), 429 nodeStatus(corev1.ConditionTrue), 430 machineLabels(mhc.Spec.Selector.MatchLabels), 431 machineFailureReason("some failure"), 432 ) 433 defer cleanup2() 434 machines = append(machines, unhealthyMachines...) 435 targetMachines := make([]string, len(machines)) 436 for i, m := range machines { 437 targetMachines[i] = m.Name 438 } 439 sort.Strings(targetMachines) 440 441 // Make sure the status matches. 442 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 443 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 444 if err != nil { 445 return nil 446 } 447 return &mhc.Status 448 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 449 ExpectedMachines: 3, 450 CurrentHealthy: 2, 451 RemediationsAllowed: 2, 452 ObservedGeneration: 1, 453 Targets: targetMachines, 454 Conditions: clusterv1.Conditions{ 455 { 456 Type: clusterv1.RemediationAllowedCondition, 457 Status: corev1.ConditionTrue, 458 }, 459 }, 460 })) 461 }) 462 463 t.Run("it marks unhealthy machines for remediation when there a Machine has a failure message", func(t *testing.T) { 464 g := NewWithT(t) 465 cluster := createCluster(g, ns.Name) 466 467 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 468 469 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 470 defer func(do ...client.Object) { 471 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 472 }(cluster, mhc) 473 474 // Healthy nodes and machines. 475 _, machines, cleanup1 := createMachinesWithNodes(g, cluster, 476 count(2), 477 firstMachineAsControlPlane(), 478 createNodeRefForMachine(true), 479 nodeStatus(corev1.ConditionTrue), 480 machineLabels(mhc.Spec.Selector.MatchLabels), 481 ) 482 defer cleanup1() 483 // Machine with failure message. 484 _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, 485 count(1), 486 createNodeRefForMachine(true), 487 nodeStatus(corev1.ConditionTrue), 488 machineLabels(mhc.Spec.Selector.MatchLabels), 489 machineFailureMessage("some failure"), 490 ) 491 defer cleanup2() 492 machines = append(machines, unhealthyMachines...) 493 targetMachines := make([]string, len(machines)) 494 for i, m := range machines { 495 targetMachines[i] = m.Name 496 } 497 sort.Strings(targetMachines) 498 499 // Make sure the status matches. 500 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 501 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 502 if err != nil { 503 return nil 504 } 505 return &mhc.Status 506 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 507 ExpectedMachines: 3, 508 CurrentHealthy: 2, 509 RemediationsAllowed: 2, 510 ObservedGeneration: 1, 511 Targets: targetMachines, 512 Conditions: clusterv1.Conditions{ 513 { 514 Type: clusterv1.RemediationAllowedCondition, 515 Status: corev1.ConditionTrue, 516 }, 517 }, 518 })) 519 }) 520 521 t.Run("it marks unhealthy machines for remediation when the unhealthy Machines exceed MaxUnhealthy", func(t *testing.T) { 522 g := NewWithT(t) 523 cluster := createCluster(g, ns.Name) 524 525 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 526 maxUnhealthy := intstr.Parse("40%") 527 mhc.Spec.MaxUnhealthy = &maxUnhealthy 528 529 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 530 defer func(do ...client.Object) { 531 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 532 }(cluster, mhc) 533 534 // Healthy nodes and machines. 535 _, machines, cleanup1 := createMachinesWithNodes(g, cluster, 536 count(1), 537 firstMachineAsControlPlane(), 538 createNodeRefForMachine(true), 539 nodeStatus(corev1.ConditionTrue), 540 machineLabels(mhc.Spec.Selector.MatchLabels), 541 ) 542 defer cleanup1() 543 // Unhealthy nodes and machines. 544 _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, 545 count(2), 546 createNodeRefForMachine(true), 547 nodeStatus(corev1.ConditionUnknown), 548 machineLabels(mhc.Spec.Selector.MatchLabels), 549 ) 550 defer cleanup2() 551 machines = append(machines, unhealthyMachines...) 552 targetMachines := make([]string, len(machines)) 553 for i, m := range machines { 554 targetMachines[i] = m.Name 555 } 556 sort.Strings(targetMachines) 557 558 // Make sure the status matches. 559 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 560 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 561 if err != nil { 562 return nil 563 } 564 return &mhc.Status 565 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 566 ExpectedMachines: 3, 567 CurrentHealthy: 1, 568 RemediationsAllowed: 0, 569 ObservedGeneration: 1, 570 Targets: targetMachines, 571 Conditions: clusterv1.Conditions{ 572 { 573 Type: clusterv1.RemediationAllowedCondition, 574 Status: corev1.ConditionFalse, 575 Severity: clusterv1.ConditionSeverityWarning, 576 Reason: clusterv1.TooManyUnhealthyReason, 577 Message: "Remediation is not allowed, the number of not started or unhealthy machines exceeds maxUnhealthy (total: 3, unhealthy: 2, maxUnhealthy: 40%)", 578 }, 579 }, 580 })) 581 582 // Calculate how many Machines have health check succeeded = false. 583 g.Eventually(func() (unhealthy int) { 584 machines := &clusterv1.MachineList{} 585 err := env.List(ctx, machines, client.MatchingLabels{ 586 "selector": mhc.Spec.Selector.MatchLabels["selector"], 587 }) 588 if err != nil { 589 return -1 590 } 591 592 for i := range machines.Items { 593 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 594 unhealthy++ 595 } 596 } 597 return 598 }).Should(Equal(2)) 599 600 // Calculate how many Machines have been remediated. 601 g.Eventually(func() (remediated int) { 602 machines := &clusterv1.MachineList{} 603 err := env.List(ctx, machines, client.MatchingLabels{ 604 "selector": mhc.Spec.Selector.MatchLabels["selector"], 605 }) 606 if err != nil { 607 return -1 608 } 609 610 for i := range machines.Items { 611 if conditions.IsTrue(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) { 612 remediated++ 613 } 614 } 615 return 616 }).Should(Equal(0)) 617 }) 618 619 t.Run("it marks unhealthy machines for remediation when number of unhealthy machines is within unhealthyRange", func(t *testing.T) { 620 g := NewWithT(t) 621 cluster := createCluster(g, ns.Name) 622 623 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 624 unhealthyRange := "[1-3]" 625 mhc.Spec.UnhealthyRange = &unhealthyRange 626 627 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 628 defer func(do ...client.Object) { 629 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 630 }(cluster, mhc) 631 632 // Healthy nodes and machines. 633 _, machines, cleanup1 := createMachinesWithNodes(g, cluster, 634 count(2), 635 firstMachineAsControlPlane(), 636 createNodeRefForMachine(true), 637 nodeStatus(corev1.ConditionTrue), 638 machineLabels(mhc.Spec.Selector.MatchLabels), 639 ) 640 defer cleanup1() 641 // Unhealthy nodes and machines. 642 _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, 643 count(1), 644 createNodeRefForMachine(true), 645 nodeStatus(corev1.ConditionUnknown), 646 machineLabels(mhc.Spec.Selector.MatchLabels), 647 ) 648 defer cleanup2() 649 machines = append(machines, unhealthyMachines...) 650 targetMachines := make([]string, len(machines)) 651 for i, m := range machines { 652 targetMachines[i] = m.Name 653 } 654 sort.Strings(targetMachines) 655 656 // Make sure the status matches. 657 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 658 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 659 if err != nil { 660 return nil 661 } 662 return &mhc.Status 663 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 664 ExpectedMachines: 3, 665 CurrentHealthy: 2, 666 RemediationsAllowed: 2, 667 ObservedGeneration: 1, 668 Targets: targetMachines, 669 Conditions: clusterv1.Conditions{ 670 { 671 Type: clusterv1.RemediationAllowedCondition, 672 Status: corev1.ConditionTrue, 673 }, 674 }, 675 })) 676 }) 677 678 t.Run("it marks unhealthy machines for remediation when the unhealthy Machines is not within UnhealthyRange", func(t *testing.T) { 679 g := NewWithT(t) 680 cluster := createCluster(g, ns.Name) 681 682 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 683 unhealthyRange := "[3-5]" 684 mhc.Spec.UnhealthyRange = &unhealthyRange 685 686 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 687 defer func(do ...client.Object) { 688 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 689 }(cluster, mhc) 690 691 // Healthy nodes and machines. 692 _, machines, cleanup1 := createMachinesWithNodes(g, cluster, 693 count(1), 694 firstMachineAsControlPlane(), 695 createNodeRefForMachine(true), 696 nodeStatus(corev1.ConditionTrue), 697 machineLabels(mhc.Spec.Selector.MatchLabels), 698 ) 699 defer cleanup1() 700 // Unhealthy nodes and machines. 701 _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, 702 count(2), 703 createNodeRefForMachine(true), 704 nodeStatus(corev1.ConditionUnknown), 705 machineLabels(mhc.Spec.Selector.MatchLabels), 706 ) 707 defer cleanup2() 708 machines = append(machines, unhealthyMachines...) 709 targetMachines := make([]string, len(machines)) 710 for i, m := range machines { 711 targetMachines[i] = m.Name 712 } 713 sort.Strings(targetMachines) 714 715 // Make sure the status matches. 716 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 717 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 718 if err != nil { 719 return nil 720 } 721 return &mhc.Status 722 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 723 ExpectedMachines: 3, 724 CurrentHealthy: 1, 725 RemediationsAllowed: 0, 726 ObservedGeneration: 1, 727 Targets: targetMachines, 728 Conditions: clusterv1.Conditions{ 729 { 730 Type: clusterv1.RemediationAllowedCondition, 731 Status: corev1.ConditionFalse, 732 Severity: clusterv1.ConditionSeverityWarning, 733 Reason: clusterv1.TooManyUnhealthyReason, 734 Message: "Remediation is not allowed, the number of not started or unhealthy machines does not fall within the range (total: 3, unhealthy: 2, unhealthyRange: [3-5])", 735 }, 736 }, 737 })) 738 739 // Calculate how many Machines have health check succeeded = false. 740 g.Eventually(func() (unhealthy int) { 741 machines := &clusterv1.MachineList{} 742 err := env.List(ctx, machines, client.MatchingLabels{ 743 "selector": mhc.Spec.Selector.MatchLabels["selector"], 744 }) 745 if err != nil { 746 return -1 747 } 748 749 for i := range machines.Items { 750 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 751 unhealthy++ 752 } 753 } 754 return 755 }).Should(Equal(2)) 756 757 // Calculate how many Machines have been remediated. 758 g.Eventually(func() (remediated int) { 759 machines := &clusterv1.MachineList{} 760 err := env.List(ctx, machines, client.MatchingLabels{ 761 "selector": mhc.Spec.Selector.MatchLabels["selector"], 762 }) 763 if err != nil { 764 return -1 765 } 766 767 for i := range machines.Items { 768 if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil { 769 remediated++ 770 } 771 } 772 return 773 }).Should(Equal(0)) 774 }) 775 776 t.Run("when a Machine has no Node ref for less than the NodeStartupTimeout", func(t *testing.T) { 777 g := NewWithT(t) 778 cluster := createCluster(g, ns.Name) 779 780 // After the cluster exists, we have to set the infrastructure ready condition; otherwise, MachineHealthChecks 781 // will never fail when nodeStartupTimeout is exceeded. 782 patchHelper, err := patch.NewHelper(cluster, env.GetClient()) 783 g.Expect(err).ToNot(HaveOccurred()) 784 785 conditions.MarkTrue(cluster, clusterv1.InfrastructureReadyCondition) 786 g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed()) 787 788 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 789 mhc.Spec.NodeStartupTimeout = &metav1.Duration{Duration: 5 * time.Hour} 790 791 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 792 defer func(do ...client.Object) { 793 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 794 }(cluster, mhc) 795 796 // Healthy nodes and machines. 797 _, machines, cleanup1 := createMachinesWithNodes(g, cluster, 798 count(2), 799 firstMachineAsControlPlane(), 800 createNodeRefForMachine(true), 801 nodeStatus(corev1.ConditionTrue), 802 machineLabels(mhc.Spec.Selector.MatchLabels), 803 ) 804 defer cleanup1() 805 // Unhealthy nodes and machines. 806 _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, 807 count(1), 808 createNodeRefForMachine(false), 809 nodeStatus(corev1.ConditionUnknown), 810 machineLabels(mhc.Spec.Selector.MatchLabels), 811 ) 812 defer cleanup2() 813 machines = append(machines, unhealthyMachines...) 814 targetMachines := make([]string, len(machines)) 815 for i, m := range machines { 816 targetMachines[i] = m.Name 817 } 818 sort.Strings(targetMachines) 819 820 // Make sure the status matches. 821 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 822 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 823 if err != nil { 824 return nil 825 } 826 return &mhc.Status 827 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 828 ExpectedMachines: 3, 829 CurrentHealthy: 2, 830 RemediationsAllowed: 2, 831 ObservedGeneration: 1, 832 Targets: targetMachines, 833 Conditions: clusterv1.Conditions{ 834 { 835 Type: clusterv1.RemediationAllowedCondition, 836 Status: corev1.ConditionTrue, 837 }, 838 }, 839 })) 840 841 // Calculate how many Machines have health check succeeded = false. 842 g.Eventually(func() (unhealthy int) { 843 machines := &clusterv1.MachineList{} 844 err := env.List(ctx, machines, client.MatchingLabels{ 845 "selector": mhc.Spec.Selector.MatchLabels["selector"], 846 }) 847 if err != nil { 848 return -1 849 } 850 851 for i := range machines.Items { 852 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 853 unhealthy++ 854 } 855 } 856 return 857 }).Should(Equal(0)) 858 859 // Calculate how many Machines have been remediated. 860 g.Eventually(func() (remediated int) { 861 machines := &clusterv1.MachineList{} 862 err := env.List(ctx, machines, client.MatchingLabels{ 863 "selector": mhc.Spec.Selector.MatchLabels["selector"], 864 }) 865 if err != nil { 866 return -1 867 } 868 869 for i := range machines.Items { 870 if conditions.IsTrue(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) { 871 remediated++ 872 } 873 } 874 return 875 }).Should(Equal(0)) 876 }) 877 878 t.Run("when a Machine has no Node ref for longer than the NodeStartupTimeout", func(t *testing.T) { 879 // FIXME: Resolve flaky/failing test 880 t.Skip("skipping until made stable") 881 g := NewWithT(t) 882 cluster := createCluster(g, ns.Name) 883 884 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 885 mhc.Spec.NodeStartupTimeout = &metav1.Duration{Duration: time.Second} 886 887 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 888 defer func(do ...client.Object) { 889 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 890 }(cluster, mhc) 891 892 // Healthy nodes and machines. 893 _, machines, cleanup1 := createMachinesWithNodes(g, cluster, 894 count(2), 895 firstMachineAsControlPlane(), 896 createNodeRefForMachine(true), 897 nodeStatus(corev1.ConditionTrue), 898 machineLabels(mhc.Spec.Selector.MatchLabels), 899 ) 900 defer cleanup1() 901 // Unhealthy nodes and machines. 902 _, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster, 903 count(1), 904 createNodeRefForMachine(false), 905 nodeStatus(corev1.ConditionUnknown), 906 machineLabels(mhc.Spec.Selector.MatchLabels), 907 ) 908 defer cleanup2() 909 machines = append(machines, unhealthyMachines...) 910 911 targetMachines := make([]string, len(machines)) 912 for i, m := range machines { 913 targetMachines[i] = m.Name 914 } 915 sort.Strings(targetMachines) 916 917 // Make sure the MHC status matches. We have two healthy machines and 918 // one unhealthy. 919 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 920 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 921 if err != nil { 922 fmt.Printf("error retrieving mhc: %v", err) 923 return nil 924 } 925 return &mhc.Status 926 }, timeout, 100*time.Millisecond).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 927 ExpectedMachines: 3, 928 CurrentHealthy: 2, 929 RemediationsAllowed: 2, 930 ObservedGeneration: 1, 931 Targets: targetMachines, 932 Conditions: clusterv1.Conditions{ 933 { 934 Type: clusterv1.RemediationAllowedCondition, 935 Status: corev1.ConditionTrue, 936 }, 937 }, 938 })) 939 940 // Calculate how many Machines have health check succeeded = false. 941 g.Eventually(func() (unhealthy int) { 942 machines := &clusterv1.MachineList{} 943 err := env.List(ctx, machines, client.MatchingLabels{ 944 "selector": mhc.Spec.Selector.MatchLabels["selector"], 945 }) 946 if err != nil { 947 fmt.Printf("error retrieving list: %v", err) 948 return -1 949 } 950 951 for i := range machines.Items { 952 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 953 unhealthy++ 954 } 955 } 956 return 957 }, timeout, 100*time.Millisecond).Should(Equal(1)) 958 959 // Calculate how many Machines have been remediated. 960 g.Eventually(func() (remediated int) { 961 machines := &clusterv1.MachineList{} 962 err := env.List(ctx, machines, client.MatchingLabels{ 963 "selector": mhc.Spec.Selector.MatchLabels["selector"], 964 }) 965 if err != nil { 966 return -1 967 } 968 969 for i := range machines.Items { 970 if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil { 971 remediated++ 972 } 973 } 974 return 975 }, timeout, 100*time.Millisecond).Should(Equal(1)) 976 }) 977 978 t.Run("when a Machine's Node has gone away", func(t *testing.T) { 979 // FIXME: Resolve flaky/failing test 980 t.Skip("skipping until made stable") 981 g := NewWithT(t) 982 cluster := createCluster(g, ns.Name) 983 984 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 985 986 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 987 defer func(do ...client.Object) { 988 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 989 }(cluster, mhc) 990 991 // Healthy nodes and machines. 992 nodes, machines, cleanup := createMachinesWithNodes(g, cluster, 993 count(3), 994 firstMachineAsControlPlane(), 995 createNodeRefForMachine(true), 996 nodeStatus(corev1.ConditionTrue), 997 machineLabels(mhc.Spec.Selector.MatchLabels), 998 ) 999 defer cleanup() 1000 targetMachines := make([]string, len(machines)) 1001 for i, m := range machines { 1002 targetMachines[i] = m.Name 1003 } 1004 sort.Strings(targetMachines) 1005 1006 // Forcibly remove the last machine's node. 1007 g.Eventually(func() bool { 1008 nodeToBeRemoved := nodes[2] 1009 if err := env.Delete(ctx, nodeToBeRemoved); err != nil { 1010 return apierrors.IsNotFound(err) 1011 } 1012 return apierrors.IsNotFound(env.Get(ctx, util.ObjectKey(nodeToBeRemoved), nodeToBeRemoved)) 1013 }).Should(BeTrue()) 1014 1015 // Make sure the status matches. 1016 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1017 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1018 if err != nil { 1019 return nil 1020 } 1021 return &mhc.Status 1022 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1023 ExpectedMachines: 3, 1024 CurrentHealthy: 2, 1025 RemediationsAllowed: 2, 1026 ObservedGeneration: 1, 1027 Targets: targetMachines, 1028 Conditions: clusterv1.Conditions{ 1029 { 1030 Type: clusterv1.RemediationAllowedCondition, 1031 Status: corev1.ConditionTrue, 1032 }, 1033 }, 1034 })) 1035 1036 // Calculate how many Machines have health check succeeded = false. 1037 g.Eventually(func() (unhealthy int) { 1038 machines := &clusterv1.MachineList{} 1039 err := env.List(ctx, machines, client.MatchingLabels{ 1040 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1041 }) 1042 if err != nil { 1043 return -1 1044 } 1045 1046 for i := range machines.Items { 1047 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 1048 unhealthy++ 1049 } 1050 } 1051 return 1052 }).Should(Equal(1)) 1053 1054 // Calculate how many Machines have been remediated. 1055 g.Eventually(func() (remediated int) { 1056 machines := &clusterv1.MachineList{} 1057 err := env.List(ctx, machines, client.MatchingLabels{ 1058 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1059 }) 1060 if err != nil { 1061 return -1 1062 } 1063 1064 for i := range machines.Items { 1065 if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil { 1066 remediated++ 1067 } 1068 } 1069 return 1070 }, timeout, 100*time.Millisecond).Should(Equal(1)) 1071 }) 1072 1073 t.Run("should react when a Node transitions to unhealthy", func(t *testing.T) { 1074 g := NewWithT(t) 1075 cluster := createCluster(g, ns.Name) 1076 1077 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 1078 1079 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 1080 defer func(do ...client.Object) { 1081 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 1082 }(cluster, mhc) 1083 1084 // Healthy nodes and machines. 1085 nodes, machines, cleanup := createMachinesWithNodes(g, cluster, 1086 count(1), 1087 firstMachineAsControlPlane(), 1088 createNodeRefForMachine(true), 1089 nodeStatus(corev1.ConditionTrue), 1090 machineLabels(mhc.Spec.Selector.MatchLabels), 1091 ) 1092 defer cleanup() 1093 targetMachines := make([]string, len(machines)) 1094 for i, m := range machines { 1095 targetMachines[i] = m.Name 1096 } 1097 sort.Strings(targetMachines) 1098 1099 // Make sure the status matches. 1100 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1101 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1102 if err != nil { 1103 return nil 1104 } 1105 return &mhc.Status 1106 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1107 ExpectedMachines: 1, 1108 CurrentHealthy: 1, 1109 RemediationsAllowed: 1, 1110 ObservedGeneration: 1, 1111 Targets: targetMachines, 1112 Conditions: clusterv1.Conditions{ 1113 { 1114 Type: clusterv1.RemediationAllowedCondition, 1115 Status: corev1.ConditionTrue, 1116 }, 1117 }, 1118 })) 1119 1120 // Transition the node to unhealthy. 1121 node := nodes[0] 1122 nodePatch := client.MergeFrom(node.DeepCopy()) 1123 node.Status.Conditions = []corev1.NodeCondition{ 1124 { 1125 Type: corev1.NodeReady, 1126 Status: corev1.ConditionUnknown, 1127 LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), 1128 }, 1129 } 1130 g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed()) 1131 1132 // Make sure the status matches. 1133 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1134 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1135 if err != nil { 1136 return nil 1137 } 1138 return &mhc.Status 1139 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1140 ExpectedMachines: 1, 1141 CurrentHealthy: 0, 1142 ObservedGeneration: 1, 1143 Targets: targetMachines, 1144 Conditions: clusterv1.Conditions{ 1145 { 1146 Type: clusterv1.RemediationAllowedCondition, 1147 Status: corev1.ConditionTrue, 1148 }, 1149 }, 1150 })) 1151 1152 // Calculate how many Machines have health check succeeded = false. 1153 g.Eventually(func() (unhealthy int) { 1154 machines := &clusterv1.MachineList{} 1155 err := env.List(ctx, machines, client.MatchingLabels{ 1156 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1157 }) 1158 if err != nil { 1159 return -1 1160 } 1161 1162 for i := range machines.Items { 1163 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 1164 unhealthy++ 1165 } 1166 } 1167 return 1168 }).Should(Equal(1)) 1169 1170 // Calculate how many Machines have been marked for remediation 1171 g.Eventually(func() (remediated int) { 1172 machines := &clusterv1.MachineList{} 1173 err := env.List(ctx, machines, client.MatchingLabels{ 1174 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1175 }) 1176 if err != nil { 1177 return -1 1178 } 1179 1180 for i := range machines.Items { 1181 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) { 1182 remediated++ 1183 } 1184 } 1185 return 1186 }).Should(Equal(1)) 1187 }) 1188 1189 t.Run("when in a MachineSet, unhealthy machines should be deleted", func(t *testing.T) { 1190 g := NewWithT(t) 1191 cluster := createCluster(g, ns.Name) 1192 1193 // Create 1 control plane machine so MHC can proceed 1194 _, _, cleanup := createMachinesWithNodes(g, cluster, 1195 count(1), 1196 firstMachineAsControlPlane(), 1197 createNodeRefForMachine(true), 1198 nodeStatus(corev1.ConditionTrue), 1199 ) 1200 defer cleanup() 1201 1202 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 1203 // Create infrastructure template resource. 1204 infraResource := map[string]interface{}{ 1205 "kind": "GenericInfrastructureMachine", 1206 "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", 1207 "metadata": map[string]interface{}{}, 1208 "spec": map[string]interface{}{ 1209 "size": "3xlarge", 1210 }, 1211 } 1212 infraTmpl := &unstructured.Unstructured{ 1213 Object: map[string]interface{}{ 1214 "spec": map[string]interface{}{ 1215 "template": infraResource, 1216 }, 1217 }, 1218 } 1219 infraTmpl.SetKind("GenericInfrastructureMachineTemplate") 1220 infraTmpl.SetAPIVersion("infrastructure.cluster.x-k8s.io/v1beta1") 1221 infraTmpl.SetGenerateName("mhc-ms-template-") 1222 infraTmpl.SetNamespace(mhc.Namespace) 1223 1224 g.Expect(env.Create(ctx, infraTmpl)).To(Succeed()) 1225 1226 machineSet := &clusterv1.MachineSet{ 1227 ObjectMeta: metav1.ObjectMeta{ 1228 GenerateName: "mhc-ms-", 1229 Namespace: mhc.Namespace, 1230 }, 1231 Spec: clusterv1.MachineSetSpec{ 1232 ClusterName: cluster.Name, 1233 Replicas: ptr.To[int32](1), 1234 Selector: mhc.Spec.Selector, 1235 Template: clusterv1.MachineTemplateSpec{ 1236 ObjectMeta: clusterv1.ObjectMeta{ 1237 Labels: mhc.Spec.Selector.MatchLabels, 1238 }, 1239 Spec: clusterv1.MachineSpec{ 1240 ClusterName: cluster.Name, 1241 Bootstrap: clusterv1.Bootstrap{ 1242 DataSecretName: ptr.To("test-data-secret-name"), 1243 }, 1244 InfrastructureRef: corev1.ObjectReference{ 1245 APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", 1246 Kind: "GenericInfrastructureMachineTemplate", 1247 Name: infraTmpl.GetName(), 1248 }, 1249 }, 1250 }, 1251 }, 1252 } 1253 1254 reqCtx := admission.NewContextWithRequest(ctx, admission.Request{}) 1255 g.Expect((&webhooks.MachineSet{}).Default(reqCtx, machineSet)).Should(Succeed()) 1256 g.Expect(env.Create(ctx, machineSet)).To(Succeed()) 1257 1258 // Ensure machines have been created. 1259 g.Eventually(func() int { 1260 machines := &clusterv1.MachineList{} 1261 err := env.List(ctx, machines, client.MatchingLabels{ 1262 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1263 }) 1264 if err != nil { 1265 return -1 1266 } 1267 return len(machines.Items) 1268 }, timeout, 100*time.Millisecond).Should(Equal(1)) 1269 1270 // Create the MachineHealthCheck instance. 1271 mhc.Spec.NodeStartupTimeout = &metav1.Duration{Duration: time.Second} 1272 1273 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 1274 // defer cleanup for all the objects that have been created 1275 defer func(do ...client.Object) { 1276 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 1277 }(cluster, mhc, infraTmpl, machineSet) 1278 1279 // Pause the MachineSet reconciler to delay the deletion of the 1280 // Machine, because the MachineSet controller deletes the Machine when 1281 // it is marked unhealthy by MHC. 1282 machineSetPatch := client.MergeFrom(machineSet.DeepCopy()) 1283 machineSet.Annotations = map[string]string{ 1284 clusterv1.PausedAnnotation: "", 1285 } 1286 g.Expect(env.Patch(ctx, machineSet, machineSetPatch)).To(Succeed()) 1287 1288 // Calculate how many Machines have health check succeeded = false. 1289 g.Eventually(func() (unhealthy int) { 1290 machines := &clusterv1.MachineList{} 1291 err := env.List(ctx, machines, client.MatchingLabels{ 1292 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1293 }) 1294 if err != nil { 1295 return -1 1296 } 1297 1298 for i := range machines.Items { 1299 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 1300 unhealthy++ 1301 } 1302 } 1303 return 1304 }, timeout, 100*time.Millisecond).Should(Equal(1)) 1305 1306 // Calculate how many Machines should be remediated. 1307 var unhealthyMachine *clusterv1.Machine 1308 g.Eventually(func() (remediated int) { 1309 machines := &clusterv1.MachineList{} 1310 err := env.List(ctx, machines, client.MatchingLabels{ 1311 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1312 }) 1313 if err != nil { 1314 return -1 1315 } 1316 1317 for i := range machines.Items { 1318 if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil { 1319 unhealthyMachine = machines.Items[i].DeepCopy() 1320 remediated++ 1321 } 1322 } 1323 return 1324 }, timeout, 100*time.Millisecond).Should(Equal(1)) 1325 1326 // Unpause the MachineSet reconciler. 1327 machineSetPatch = client.MergeFrom(machineSet.DeepCopy()) 1328 delete(machineSet.Annotations, clusterv1.PausedAnnotation) 1329 g.Expect(env.Patch(ctx, machineSet, machineSetPatch)).To(Succeed()) 1330 1331 // Make sure the Machine gets deleted. 1332 g.Eventually(func() bool { 1333 machine := unhealthyMachine.DeepCopy() 1334 err := env.Get(ctx, util.ObjectKey(unhealthyMachine), machine) 1335 return apierrors.IsNotFound(err) || !machine.DeletionTimestamp.IsZero() 1336 }, timeout, 100*time.Millisecond).Should(BeTrue()) 1337 }) 1338 1339 t.Run("when a machine is paused", func(t *testing.T) { 1340 // FIXME: Resolve flaky/failing test 1341 t.Skip("skipping until made stable") 1342 g := NewWithT(t) 1343 cluster := createCluster(g, ns.Name) 1344 1345 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 1346 1347 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 1348 defer func(do ...client.Object) { 1349 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 1350 }(cluster, mhc) 1351 1352 // Healthy nodes and machines. 1353 nodes, machines, cleanup := createMachinesWithNodes(g, cluster, 1354 count(1), 1355 firstMachineAsControlPlane(), 1356 createNodeRefForMachine(true), 1357 nodeStatus(corev1.ConditionTrue), 1358 machineLabels(mhc.Spec.Selector.MatchLabels), 1359 ) 1360 defer cleanup() 1361 targetMachines := make([]string, len(machines)) 1362 for i, m := range machines { 1363 targetMachines[i] = m.Name 1364 } 1365 sort.Strings(targetMachines) 1366 1367 // Make sure the status matches. 1368 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1369 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1370 if err != nil { 1371 return nil 1372 } 1373 return &mhc.Status 1374 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1375 ExpectedMachines: 1, 1376 CurrentHealthy: 1, 1377 ObservedGeneration: 1, 1378 Targets: targetMachines, 1379 Conditions: clusterv1.Conditions{ 1380 { 1381 Type: clusterv1.RemediationAllowedCondition, 1382 Status: corev1.ConditionTrue, 1383 }, 1384 }, 1385 })) 1386 1387 // Pause the machine 1388 machinePatch := client.MergeFrom(machines[0].DeepCopy()) 1389 machines[0].Annotations = map[string]string{ 1390 clusterv1.PausedAnnotation: "", 1391 } 1392 g.Expect(env.Patch(ctx, machines[0], machinePatch)).To(Succeed()) 1393 1394 // Transition the node to unhealthy. 1395 node := nodes[0] 1396 nodePatch := client.MergeFrom(node.DeepCopy()) 1397 node.Status.Conditions = []corev1.NodeCondition{ 1398 { 1399 Type: corev1.NodeReady, 1400 Status: corev1.ConditionUnknown, 1401 LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), 1402 }, 1403 } 1404 g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed()) 1405 1406 // Make sure the status matches. 1407 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1408 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1409 if err != nil { 1410 return nil 1411 } 1412 return &mhc.Status 1413 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1414 ExpectedMachines: 1, 1415 CurrentHealthy: 0, 1416 RemediationsAllowed: 0, 1417 ObservedGeneration: 1, 1418 Targets: targetMachines, 1419 Conditions: clusterv1.Conditions{ 1420 { 1421 Type: clusterv1.RemediationAllowedCondition, 1422 Status: corev1.ConditionTrue, 1423 }, 1424 }, 1425 })) 1426 1427 // Calculate how many Machines have health check succeeded = false. 1428 g.Eventually(func() (unhealthy int) { 1429 machines := &clusterv1.MachineList{} 1430 err := env.List(ctx, machines, client.MatchingLabels{ 1431 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1432 }) 1433 if err != nil { 1434 return -1 1435 } 1436 1437 for i := range machines.Items { 1438 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 1439 unhealthy++ 1440 } 1441 } 1442 return 1443 }).Should(Equal(1)) 1444 1445 // Calculate how many Machines have been remediated. 1446 g.Eventually(func() (remediated int) { 1447 machines := &clusterv1.MachineList{} 1448 err := env.List(ctx, machines, client.MatchingLabels{ 1449 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1450 }) 1451 if err != nil { 1452 return -1 1453 } 1454 1455 for i := range machines.Items { 1456 if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil { 1457 remediated++ 1458 } 1459 } 1460 return 1461 }).Should(Equal(0)) 1462 }) 1463 1464 t.Run("When remediationTemplate is set and node transitions to unhealthy, new Remediation Request should be created", func(t *testing.T) { 1465 g := NewWithT(t) 1466 cluster := createCluster(g, ns.Name) 1467 1468 // Create remediation template resource. 1469 infraRemediationResource := map[string]interface{}{ 1470 "kind": "GenericExternalRemediation", 1471 "apiVersion": builder.RemediationGroupVersion.String(), 1472 "metadata": map[string]interface{}{}, 1473 "spec": map[string]interface{}{ 1474 "size": "3xlarge", 1475 }, 1476 } 1477 infraRemediationTmpl := &unstructured.Unstructured{ 1478 Object: map[string]interface{}{ 1479 "spec": map[string]interface{}{ 1480 "template": infraRemediationResource, 1481 }, 1482 }, 1483 } 1484 infraRemediationTmpl.SetKind("GenericExternalRemediationTemplate") 1485 infraRemediationTmpl.SetAPIVersion(builder.RemediationGroupVersion.String()) 1486 infraRemediationTmpl.SetGenerateName("remediation-template-name-") 1487 infraRemediationTmpl.SetNamespace(cluster.Namespace) 1488 g.Expect(env.Create(ctx, infraRemediationTmpl)).To(Succeed()) 1489 1490 remediationTemplate := &corev1.ObjectReference{ 1491 APIVersion: builder.RemediationGroupVersion.String(), 1492 Kind: "GenericExternalRemediationTemplate", 1493 Name: infraRemediationTmpl.GetName(), 1494 } 1495 1496 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 1497 mhc.Spec.RemediationTemplate = remediationTemplate 1498 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 1499 defer func(do ...client.Object) { 1500 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 1501 }(cluster, mhc, infraRemediationTmpl) 1502 1503 // Healthy nodes and machines. 1504 nodes, machines, cleanup := createMachinesWithNodes(g, cluster, 1505 count(1), 1506 firstMachineAsControlPlane(), 1507 createNodeRefForMachine(true), 1508 nodeStatus(corev1.ConditionTrue), 1509 machineLabels(mhc.Spec.Selector.MatchLabels), 1510 ) 1511 defer cleanup() 1512 targetMachines := make([]string, len(machines)) 1513 for i, m := range machines { 1514 targetMachines[i] = m.Name 1515 } 1516 sort.Strings(targetMachines) 1517 1518 // Make sure the status matches. 1519 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1520 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1521 if err != nil { 1522 return nil 1523 } 1524 return &mhc.Status 1525 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1526 ExpectedMachines: 1, 1527 CurrentHealthy: 1, 1528 RemediationsAllowed: 1, 1529 ObservedGeneration: 1, 1530 Targets: targetMachines, 1531 Conditions: clusterv1.Conditions{ 1532 { 1533 Type: clusterv1.RemediationAllowedCondition, 1534 Status: corev1.ConditionTrue, 1535 }, 1536 }, 1537 })) 1538 1539 // Transition the node to unhealthy. 1540 node := nodes[0] 1541 nodePatch := client.MergeFrom(node.DeepCopy()) 1542 node.Status.Conditions = []corev1.NodeCondition{ 1543 { 1544 Type: corev1.NodeReady, 1545 Status: corev1.ConditionUnknown, 1546 LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), 1547 }, 1548 } 1549 g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed()) 1550 1551 // Make sure the status matches. 1552 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1553 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1554 if err != nil { 1555 return nil 1556 } 1557 return &mhc.Status 1558 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1559 ExpectedMachines: 1, 1560 CurrentHealthy: 0, 1561 RemediationsAllowed: 0, 1562 ObservedGeneration: 1, 1563 Targets: targetMachines, 1564 Conditions: clusterv1.Conditions{ 1565 { 1566 Type: clusterv1.RemediationAllowedCondition, 1567 Status: corev1.ConditionTrue, 1568 }, 1569 }, 1570 })) 1571 1572 // Calculate how many Machines have health check succeeded = false. 1573 g.Eventually(func() (unhealthy int) { 1574 machines := &clusterv1.MachineList{} 1575 err := env.List(ctx, machines, client.MatchingLabels{ 1576 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1577 }) 1578 if err != nil { 1579 return -1 1580 } 1581 1582 for i := range machines.Items { 1583 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 1584 unhealthy++ 1585 } 1586 } 1587 return 1588 }).Should(Equal(1)) 1589 1590 ref := corev1.ObjectReference{ 1591 APIVersion: builder.RemediationGroupVersion.String(), 1592 Kind: "GenericExternalRemediation", 1593 } 1594 1595 obj := util.ObjectReferenceToUnstructured(ref) 1596 // Make sure the Remeditaion Request is created. 1597 g.Eventually(func() *unstructured.Unstructured { 1598 key := client.ObjectKey{ 1599 Namespace: machines[0].Namespace, 1600 Name: machines[0].Name, 1601 } 1602 err := env.Get(ctx, key, obj) 1603 if err != nil { 1604 return nil 1605 } 1606 return obj 1607 }, timeout, 100*time.Millisecond).ShouldNot(BeNil()) 1608 g.Expect(obj.GetOwnerReferences()).To(HaveLen(1)) 1609 g.Expect(obj.GetOwnerReferences()[0].Name).To(Equal(machines[0].Name)) 1610 }) 1611 1612 t.Run("When remediationTemplate is set and node transitions back to healthy, new Remediation Request should be deleted", func(t *testing.T) { 1613 g := NewWithT(t) 1614 cluster := createCluster(g, ns.Name) 1615 1616 // Create remediation template resource. 1617 infraRemediationResource := map[string]interface{}{ 1618 "kind": "GenericExternalRemediation", 1619 "apiVersion": builder.RemediationGroupVersion.String(), 1620 "metadata": map[string]interface{}{}, 1621 "spec": map[string]interface{}{ 1622 "size": "3xlarge", 1623 }, 1624 } 1625 infraRemediationTmpl := &unstructured.Unstructured{ 1626 Object: map[string]interface{}{ 1627 "spec": map[string]interface{}{ 1628 "template": infraRemediationResource, 1629 }, 1630 }, 1631 } 1632 infraRemediationTmpl.SetKind("GenericExternalRemediationTemplate") 1633 infraRemediationTmpl.SetAPIVersion(builder.RemediationGroupVersion.String()) 1634 infraRemediationTmpl.SetGenerateName("remediation-template-name-") 1635 infraRemediationTmpl.SetNamespace(cluster.Namespace) 1636 g.Expect(env.Create(ctx, infraRemediationTmpl)).To(Succeed()) 1637 1638 remediationTemplate := &corev1.ObjectReference{ 1639 APIVersion: builder.RemediationGroupVersion.String(), 1640 Kind: "GenericExternalRemediationTemplate", 1641 Name: infraRemediationTmpl.GetName(), 1642 } 1643 1644 mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) 1645 mhc.Spec.RemediationTemplate = remediationTemplate 1646 g.Expect(env.Create(ctx, mhc)).To(Succeed()) 1647 defer func(do ...client.Object) { 1648 g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) 1649 }(cluster, mhc, infraRemediationTmpl) 1650 1651 // Healthy nodes and machines. 1652 nodes, machines, cleanup := createMachinesWithNodes(g, cluster, 1653 count(1), 1654 firstMachineAsControlPlane(), 1655 createNodeRefForMachine(true), 1656 nodeStatus(corev1.ConditionTrue), 1657 machineLabels(mhc.Spec.Selector.MatchLabels), 1658 ) 1659 defer cleanup() 1660 targetMachines := make([]string, len(machines)) 1661 for i, m := range machines { 1662 targetMachines[i] = m.Name 1663 } 1664 sort.Strings(targetMachines) 1665 1666 // Make sure the status matches. 1667 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1668 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1669 if err != nil { 1670 return nil 1671 } 1672 return &mhc.Status 1673 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1674 ExpectedMachines: 1, 1675 CurrentHealthy: 1, 1676 RemediationsAllowed: 1, 1677 ObservedGeneration: 1, 1678 Targets: targetMachines, 1679 Conditions: clusterv1.Conditions{ 1680 { 1681 Type: clusterv1.RemediationAllowedCondition, 1682 Status: corev1.ConditionTrue, 1683 }, 1684 }, 1685 })) 1686 1687 // Transition the node to unhealthy. 1688 node := nodes[0] 1689 nodePatch := client.MergeFrom(node.DeepCopy()) 1690 node.Status.Conditions = []corev1.NodeCondition{ 1691 { 1692 Type: corev1.NodeReady, 1693 Status: corev1.ConditionUnknown, 1694 LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), 1695 }, 1696 } 1697 g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed()) 1698 1699 // Make sure the status matches. 1700 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1701 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1702 if err != nil { 1703 return nil 1704 } 1705 return &mhc.Status 1706 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1707 ExpectedMachines: 1, 1708 CurrentHealthy: 0, 1709 RemediationsAllowed: 0, 1710 ObservedGeneration: 1, 1711 Targets: targetMachines, 1712 Conditions: clusterv1.Conditions{ 1713 { 1714 Type: clusterv1.RemediationAllowedCondition, 1715 Status: corev1.ConditionTrue, 1716 }, 1717 }, 1718 })) 1719 1720 // Calculate how many Machines have health check succeeded = false. 1721 g.Eventually(func() (unhealthy int) { 1722 machines := &clusterv1.MachineList{} 1723 err := env.List(ctx, machines, client.MatchingLabels{ 1724 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1725 }) 1726 if err != nil { 1727 return -1 1728 } 1729 1730 for i := range machines.Items { 1731 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 1732 unhealthy++ 1733 } 1734 } 1735 return 1736 }).Should(Equal(1)) 1737 1738 // Transition the node back to healthy. 1739 node = nodes[0] 1740 nodePatch = client.MergeFrom(node.DeepCopy()) 1741 node.Status.Conditions = []corev1.NodeCondition{ 1742 { 1743 Type: corev1.NodeReady, 1744 Status: corev1.ConditionTrue, 1745 LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), 1746 }, 1747 } 1748 g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed()) 1749 1750 // Make sure the status matches. 1751 g.Eventually(func() *clusterv1.MachineHealthCheckStatus { 1752 err := env.Get(ctx, util.ObjectKey(mhc), mhc) 1753 if err != nil { 1754 return nil 1755 } 1756 return &mhc.Status 1757 }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ 1758 ExpectedMachines: 1, 1759 CurrentHealthy: 1, 1760 RemediationsAllowed: 1, 1761 ObservedGeneration: 1, 1762 Targets: targetMachines, 1763 Conditions: clusterv1.Conditions{ 1764 { 1765 Type: clusterv1.RemediationAllowedCondition, 1766 Status: corev1.ConditionTrue, 1767 }, 1768 }, 1769 })) 1770 1771 // Calculate how many Machines have health check succeeded = false. 1772 g.Eventually(func() (unhealthy int) { 1773 machines := &clusterv1.MachineList{} 1774 err := env.List(ctx, machines, client.MatchingLabels{ 1775 "selector": mhc.Spec.Selector.MatchLabels["selector"], 1776 }) 1777 if err != nil { 1778 return -1 1779 } 1780 1781 for i := range machines.Items { 1782 if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) { 1783 unhealthy++ 1784 } 1785 } 1786 return 1787 }).Should(Equal(0)) 1788 1789 ref := corev1.ObjectReference{ 1790 APIVersion: builder.RemediationGroupVersion.String(), 1791 Kind: "GenericExternalRemediation", 1792 } 1793 1794 obj := util.ObjectReferenceToUnstructured(ref) 1795 // Make sure the Remediation Request is deleted. 1796 g.Eventually(func() *unstructured.Unstructured { 1797 key := client.ObjectKey{ 1798 Namespace: machines[0].Namespace, 1799 Name: machines[0].Name, 1800 } 1801 err := env.Get(ctx, key, obj) 1802 if err != nil { 1803 return nil 1804 } 1805 return obj 1806 }, timeout, 100*time.Millisecond).Should(BeNil()) 1807 }) 1808 } 1809 1810 func TestClusterToMachineHealthCheck(t *testing.T) { 1811 fakeClient := fake.NewClientBuilder().Build() 1812 1813 r := &Reconciler{ 1814 Client: fakeClient, 1815 } 1816 1817 namespace := metav1.NamespaceDefault 1818 clusterName := testClusterName 1819 labels := make(map[string]string) 1820 1821 mhc1 := newMachineHealthCheckWithLabels("mhc1", namespace, clusterName, labels) 1822 mhc1Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc1.Namespace, Name: mhc1.Name}} 1823 mhc2 := newMachineHealthCheckWithLabels("mhc2", namespace, clusterName, labels) 1824 mhc2Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc2.Namespace, Name: mhc2.Name}} 1825 mhc3 := newMachineHealthCheckWithLabels("mhc3", namespace, "othercluster", labels) 1826 mhc4 := newMachineHealthCheckWithLabels("mhc4", "othernamespace", clusterName, labels) 1827 cluster1 := &clusterv1.Cluster{ 1828 ObjectMeta: metav1.ObjectMeta{ 1829 Name: clusterName, 1830 Namespace: namespace, 1831 }, 1832 } 1833 1834 testCases := []struct { 1835 name string 1836 toCreate []clusterv1.MachineHealthCheck 1837 object client.Object 1838 expected []reconcile.Request 1839 }{ 1840 { 1841 name: "when a MachineHealthCheck exists for the Cluster in the same namespace", 1842 toCreate: []clusterv1.MachineHealthCheck{*mhc1}, 1843 object: cluster1, 1844 expected: []reconcile.Request{mhc1Req}, 1845 }, 1846 { 1847 name: "when 2 MachineHealthChecks exists for the Cluster in the same namespace", 1848 toCreate: []clusterv1.MachineHealthCheck{*mhc1, *mhc2}, 1849 object: cluster1, 1850 expected: []reconcile.Request{mhc1Req, mhc2Req}, 1851 }, 1852 { 1853 name: "when a MachineHealthCheck exists for another Cluster in the same namespace", 1854 toCreate: []clusterv1.MachineHealthCheck{*mhc3}, 1855 object: cluster1, 1856 expected: []reconcile.Request{}, 1857 }, 1858 { 1859 name: "when a MachineHealthCheck exists for another Cluster in another namespace", 1860 toCreate: []clusterv1.MachineHealthCheck{*mhc4}, 1861 object: cluster1, 1862 expected: []reconcile.Request{}, 1863 }, 1864 } 1865 1866 for _, tc := range testCases { 1867 t.Run(tc.name, func(t *testing.T) { 1868 gs := NewWithT(t) 1869 1870 for _, obj := range tc.toCreate { 1871 o := obj 1872 gs.Expect(r.Client.Create(ctx, &o)).To(Succeed()) 1873 defer func() { 1874 gs.Expect(r.Client.Delete(ctx, &o)).To(Succeed()) 1875 }() 1876 // Check the cache is populated 1877 getObj := func() error { 1878 return r.Client.Get(ctx, util.ObjectKey(&o), &clusterv1.MachineHealthCheck{}) 1879 } 1880 gs.Eventually(getObj).Should(Succeed()) 1881 } 1882 1883 got := r.clusterToMachineHealthCheck(ctx, tc.object) 1884 gs.Expect(got).To(ConsistOf(tc.expected)) 1885 }) 1886 } 1887 } 1888 1889 func TestMachineToMachineHealthCheck(t *testing.T) { 1890 fakeClient := fake.NewClientBuilder().Build() 1891 1892 r := &Reconciler{ 1893 Client: fakeClient, 1894 } 1895 1896 namespace := metav1.NamespaceDefault 1897 clusterName := testClusterName 1898 nodeName := "node1" 1899 labels := map[string]string{"cluster": "foo", "nodepool": "bar"} 1900 1901 mhc1 := newMachineHealthCheckWithLabels("mhc1", namespace, clusterName, labels) 1902 mhc1Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc1.Namespace, Name: mhc1.Name}} 1903 mhc2 := newMachineHealthCheckWithLabels("mhc2", namespace, clusterName, labels) 1904 mhc2Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc2.Namespace, Name: mhc2.Name}} 1905 mhc3 := newMachineHealthCheckWithLabels("mhc3", namespace, clusterName, map[string]string{"cluster": "foo", "nodepool": "other"}) 1906 mhc4 := newMachineHealthCheckWithLabels("mhc4", "othernamespace", clusterName, labels) 1907 machine1 := newTestMachine("machine1", namespace, clusterName, nodeName, labels) 1908 1909 testCases := []struct { 1910 name string 1911 toCreate []clusterv1.MachineHealthCheck 1912 object client.Object 1913 expected []reconcile.Request 1914 }{ 1915 { 1916 name: "when a MachineHealthCheck matches labels for the Machine in the same namespace", 1917 toCreate: []clusterv1.MachineHealthCheck{*mhc1}, 1918 object: machine1, 1919 expected: []reconcile.Request{mhc1Req}, 1920 }, 1921 { 1922 name: "when 2 MachineHealthChecks match labels for the Machine in the same namespace", 1923 toCreate: []clusterv1.MachineHealthCheck{*mhc1, *mhc2}, 1924 object: machine1, 1925 expected: []reconcile.Request{mhc1Req, mhc2Req}, 1926 }, 1927 { 1928 name: "when a MachineHealthCheck does not match labels for the Machine in the same namespace", 1929 toCreate: []clusterv1.MachineHealthCheck{*mhc3}, 1930 object: machine1, 1931 expected: []reconcile.Request{}, 1932 }, 1933 { 1934 name: "when a MachineHealthCheck matches labels for the Machine in another namespace", 1935 toCreate: []clusterv1.MachineHealthCheck{*mhc4}, 1936 object: machine1, 1937 expected: []reconcile.Request{}, 1938 }, 1939 } 1940 1941 for _, tc := range testCases { 1942 t.Run(tc.name, func(t *testing.T) { 1943 gs := NewWithT(t) 1944 1945 for _, obj := range tc.toCreate { 1946 o := obj 1947 gs.Expect(r.Client.Create(ctx, &o)).To(Succeed()) 1948 defer func() { 1949 gs.Expect(r.Client.Delete(ctx, &o)).To(Succeed()) 1950 }() 1951 // Check the cache is populated 1952 getObj := func() error { 1953 return r.Client.Get(ctx, util.ObjectKey(&o), &clusterv1.MachineHealthCheck{}) 1954 } 1955 gs.Eventually(getObj).Should(Succeed()) 1956 } 1957 1958 got := r.machineToMachineHealthCheck(ctx, tc.object) 1959 gs.Expect(got).To(ConsistOf(tc.expected)) 1960 }) 1961 } 1962 } 1963 1964 func TestNodeToMachineHealthCheck(t *testing.T) { 1965 fakeClient := fake.NewClientBuilder(). 1966 WithIndex(&clusterv1.Machine{}, index.MachineNodeNameField, index.MachineByNodeName). 1967 WithStatusSubresource(&clusterv1.MachineHealthCheck{}, &clusterv1.Machine{}). 1968 Build() 1969 1970 r := &Reconciler{ 1971 Client: fakeClient, 1972 } 1973 1974 namespace := metav1.NamespaceDefault 1975 clusterName := testClusterName 1976 nodeName := "node1" 1977 labels := map[string]string{"cluster": "foo", "nodepool": "bar"} 1978 1979 mhc1 := newMachineHealthCheckWithLabels("mhc1", namespace, clusterName, labels) 1980 mhc1Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc1.Namespace, Name: mhc1.Name}} 1981 mhc2 := newMachineHealthCheckWithLabels("mhc2", namespace, clusterName, labels) 1982 mhc2Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc2.Namespace, Name: mhc2.Name}} 1983 mhc3 := newMachineHealthCheckWithLabels("mhc3", namespace, "othercluster", labels) 1984 mhc4 := newMachineHealthCheckWithLabels("mhc4", "othernamespace", clusterName, labels) 1985 1986 machine1 := newTestMachine("machine1", namespace, clusterName, nodeName, labels) 1987 machine2 := newTestMachine("machine2", namespace, clusterName, nodeName, labels) 1988 1989 node1 := &corev1.Node{ 1990 ObjectMeta: metav1.ObjectMeta{ 1991 Name: nodeName, 1992 }, 1993 } 1994 1995 testCases := []struct { 1996 name string 1997 mhcToCreate []clusterv1.MachineHealthCheck 1998 mToCreate []clusterv1.Machine 1999 object client.Object 2000 expected []reconcile.Request 2001 }{ 2002 { 2003 name: "when no Machine exists for the Node", 2004 mhcToCreate: []clusterv1.MachineHealthCheck{*mhc1}, 2005 mToCreate: []clusterv1.Machine{}, 2006 object: node1, 2007 expected: []reconcile.Request{}, 2008 }, 2009 { 2010 name: "when two Machines exist for the Node", 2011 mhcToCreate: []clusterv1.MachineHealthCheck{*mhc1}, 2012 mToCreate: []clusterv1.Machine{*machine1, *machine2}, 2013 object: node1, 2014 expected: []reconcile.Request{}, 2015 }, 2016 { 2017 name: "when no MachineHealthCheck exists for the Node in the Machine's namespace", 2018 mhcToCreate: []clusterv1.MachineHealthCheck{*mhc4}, 2019 mToCreate: []clusterv1.Machine{*machine1}, 2020 object: node1, 2021 expected: []reconcile.Request{}, 2022 }, 2023 { 2024 name: "when a MachineHealthCheck exists for the Node in the Machine's namespace", 2025 mhcToCreate: []clusterv1.MachineHealthCheck{*mhc1}, 2026 mToCreate: []clusterv1.Machine{*machine1}, 2027 object: node1, 2028 expected: []reconcile.Request{mhc1Req}, 2029 }, 2030 { 2031 name: "when two MachineHealthChecks exist for the Node in the Machine's namespace", 2032 mhcToCreate: []clusterv1.MachineHealthCheck{*mhc1, *mhc2}, 2033 mToCreate: []clusterv1.Machine{*machine1}, 2034 object: node1, 2035 expected: []reconcile.Request{mhc1Req, mhc2Req}, 2036 }, 2037 { 2038 name: "when a MachineHealthCheck exists for the Node, but not in the Machine's cluster", 2039 mhcToCreate: []clusterv1.MachineHealthCheck{*mhc3}, 2040 mToCreate: []clusterv1.Machine{*machine1}, 2041 object: node1, 2042 expected: []reconcile.Request{}, 2043 }, 2044 } 2045 2046 for _, tc := range testCases { 2047 t.Run(tc.name, func(t *testing.T) { 2048 gs := NewWithT(t) 2049 2050 for _, obj := range tc.mhcToCreate { 2051 o := obj 2052 gs.Expect(r.Client.Create(ctx, &o)).To(Succeed()) 2053 defer func() { 2054 gs.Expect(r.Client.Delete(ctx, &o)).To(Succeed()) 2055 }() 2056 // Check the cache is populated 2057 key := util.ObjectKey(&o) 2058 getObj := func() error { 2059 return r.Client.Get(ctx, key, &clusterv1.MachineHealthCheck{}) 2060 } 2061 gs.Eventually(getObj).Should(Succeed()) 2062 } 2063 for _, obj := range tc.mToCreate { 2064 o := obj 2065 gs.Expect(r.Client.Create(ctx, &o)).To(Succeed()) 2066 defer func() { 2067 gs.Expect(r.Client.Delete(ctx, &o)).To(Succeed()) 2068 }() 2069 // Ensure the status is set (required for matching node to machine) 2070 o.Status = obj.Status 2071 gs.Expect(r.Client.Status().Update(ctx, &o)).To(Succeed()) 2072 2073 // Check the cache is up to date with the status update 2074 key := util.ObjectKey(&o) 2075 checkStatus := func() clusterv1.MachineStatus { 2076 m := &clusterv1.Machine{} 2077 err := r.Client.Get(ctx, key, m) 2078 if err != nil { 2079 return clusterv1.MachineStatus{} 2080 } 2081 return m.Status 2082 } 2083 gs.Eventually(checkStatus).Should(BeComparableTo(o.Status)) 2084 } 2085 2086 got := r.nodeToMachineHealthCheck(ctx, tc.object) 2087 gs.Expect(got).To(ConsistOf(tc.expected)) 2088 }) 2089 } 2090 } 2091 2092 func TestIsAllowedRemediation(t *testing.T) { 2093 testCases := []struct { 2094 name string 2095 maxUnhealthy *intstr.IntOrString 2096 expectedMachines int32 2097 currentHealthy int32 2098 allowed bool 2099 observedGeneration int64 2100 }{ 2101 { 2102 name: "when maxUnhealthy is not set", 2103 maxUnhealthy: nil, 2104 expectedMachines: int32(3), 2105 currentHealthy: int32(0), 2106 allowed: false, 2107 }, 2108 { 2109 name: "when maxUnhealthy is not an int or percentage", 2110 maxUnhealthy: &intstr.IntOrString{Type: intstr.String, StrVal: "abcdef"}, 2111 expectedMachines: int32(5), 2112 currentHealthy: int32(2), 2113 allowed: false, 2114 }, 2115 { 2116 name: "when maxUnhealthy is an int less than current unhealthy", 2117 maxUnhealthy: &intstr.IntOrString{Type: intstr.Int, IntVal: int32(1)}, 2118 expectedMachines: int32(3), 2119 currentHealthy: int32(1), 2120 allowed: false, 2121 }, 2122 { 2123 name: "when maxUnhealthy is an int equal to current unhealthy", 2124 maxUnhealthy: &intstr.IntOrString{Type: intstr.Int, IntVal: int32(2)}, 2125 expectedMachines: int32(3), 2126 currentHealthy: int32(1), 2127 allowed: true, 2128 }, 2129 { 2130 name: "when maxUnhealthy is an int greater than current unhealthy", 2131 maxUnhealthy: &intstr.IntOrString{Type: intstr.Int, IntVal: int32(3)}, 2132 expectedMachines: int32(3), 2133 currentHealthy: int32(1), 2134 allowed: true, 2135 }, 2136 { 2137 name: "when maxUnhealthy is a percentage less than current unhealthy", 2138 maxUnhealthy: &intstr.IntOrString{Type: intstr.String, StrVal: "50%"}, 2139 expectedMachines: int32(5), 2140 currentHealthy: int32(2), 2141 allowed: false, 2142 }, 2143 { 2144 name: "when maxUnhealthy is a percentage equal to current unhealthy", 2145 maxUnhealthy: &intstr.IntOrString{Type: intstr.String, StrVal: "60%"}, 2146 expectedMachines: int32(5), 2147 currentHealthy: int32(2), 2148 allowed: true, 2149 }, 2150 { 2151 name: "when maxUnhealthy is a percentage greater than current unhealthy", 2152 maxUnhealthy: &intstr.IntOrString{Type: intstr.String, StrVal: "70%"}, 2153 expectedMachines: int32(5), 2154 currentHealthy: int32(2), 2155 allowed: true, 2156 }, 2157 } 2158 2159 for _, tc := range testCases { 2160 t.Run(tc.name, func(t *testing.T) { 2161 g := NewWithT(t) 2162 2163 mhc := &clusterv1.MachineHealthCheck{ 2164 Spec: clusterv1.MachineHealthCheckSpec{ 2165 MaxUnhealthy: tc.maxUnhealthy, 2166 NodeStartupTimeout: &metav1.Duration{Duration: 1 * time.Millisecond}, 2167 }, 2168 Status: clusterv1.MachineHealthCheckStatus{ 2169 ExpectedMachines: tc.expectedMachines, 2170 CurrentHealthy: tc.currentHealthy, 2171 ObservedGeneration: tc.observedGeneration, 2172 }, 2173 } 2174 2175 remediationAllowed, _, _ := isAllowedRemediation(mhc) 2176 g.Expect(remediationAllowed).To(Equal(tc.allowed)) 2177 }) 2178 } 2179 } 2180 2181 func TestGetMaxUnhealthy(t *testing.T) { 2182 testCases := []struct { 2183 name string 2184 maxUnhealthy *intstr.IntOrString 2185 expectedMaxUnhealthy int 2186 actualMachineCount int32 2187 expectedErr error 2188 }{ 2189 { 2190 name: "when maxUnhealthy is nil", 2191 maxUnhealthy: nil, 2192 expectedMaxUnhealthy: 0, 2193 actualMachineCount: 7, 2194 expectedErr: errors.New("spec.maxUnhealthy must be set"), 2195 }, 2196 { 2197 name: "when maxUnhealthy is not an int or percentage", 2198 maxUnhealthy: &intstr.IntOrString{Type: intstr.String, StrVal: "abcdef"}, 2199 expectedMaxUnhealthy: 0, 2200 actualMachineCount: 3, 2201 expectedErr: errors.New("invalid value for IntOrString: invalid type: string is not a percentage"), 2202 }, 2203 { 2204 name: "when maxUnhealthy is an int", 2205 maxUnhealthy: &intstr.IntOrString{Type: intstr.Int, IntVal: 3}, 2206 actualMachineCount: 2, 2207 expectedMaxUnhealthy: 3, 2208 expectedErr: nil, 2209 }, 2210 { 2211 name: "when maxUnhealthy is a 40% (of 5)", 2212 maxUnhealthy: &intstr.IntOrString{Type: intstr.String, StrVal: "40%"}, 2213 actualMachineCount: 5, 2214 expectedMaxUnhealthy: 2, 2215 expectedErr: nil, 2216 }, 2217 { 2218 name: "when maxUnhealthy is a 60% (of 7)", 2219 maxUnhealthy: &intstr.IntOrString{Type: intstr.String, StrVal: "60%"}, 2220 actualMachineCount: 7, 2221 expectedMaxUnhealthy: 4, 2222 expectedErr: nil, 2223 }, 2224 } 2225 2226 for _, tc := range testCases { 2227 t.Run(tc.name, func(t *testing.T) { 2228 g := NewWithT(t) 2229 2230 mhc := &clusterv1.MachineHealthCheck{ 2231 Spec: clusterv1.MachineHealthCheckSpec{ 2232 MaxUnhealthy: tc.maxUnhealthy, 2233 }, 2234 Status: clusterv1.MachineHealthCheckStatus{ 2235 ExpectedMachines: tc.actualMachineCount, 2236 }, 2237 } 2238 2239 maxUnhealthy, err := getMaxUnhealthy(mhc) 2240 if tc.expectedErr != nil { 2241 g.Expect(err).To(MatchError(tc.expectedErr.Error())) 2242 } else { 2243 g.Expect(err).ToNot(HaveOccurred()) 2244 } 2245 g.Expect(maxUnhealthy).To(Equal(tc.expectedMaxUnhealthy)) 2246 }) 2247 } 2248 } 2249 2250 func ownerReferenceForCluster(ctx context.Context, g *WithT, c *clusterv1.Cluster) metav1.OwnerReference { 2251 // Fetch the cluster to populate the UID 2252 cc := &clusterv1.Cluster{} 2253 g.Expect(env.Get(ctx, util.ObjectKey(c), cc)).To(Succeed()) 2254 2255 return metav1.OwnerReference{ 2256 APIVersion: clusterv1.GroupVersion.String(), 2257 Kind: "Cluster", 2258 Name: cc.Name, 2259 UID: cc.UID, 2260 } 2261 } 2262 2263 // createCluster creates a Cluster and KubeconfigSecret for that cluster in said namespace. 2264 func createCluster(g *WithT, namespaceName string) *clusterv1.Cluster { 2265 cluster := &clusterv1.Cluster{ 2266 ObjectMeta: metav1.ObjectMeta{ 2267 GenerateName: "test-cluster-", 2268 Namespace: namespaceName, 2269 }, 2270 } 2271 2272 g.Expect(env.Create(ctx, cluster)).To(Succeed()) 2273 2274 // Make sure the cluster is in the cache before proceeding 2275 g.Eventually(func() error { 2276 var cl clusterv1.Cluster 2277 return env.Get(ctx, util.ObjectKey(cluster), &cl) 2278 }, timeout, 100*time.Millisecond).Should(Succeed()) 2279 2280 // This is required for MHC to perform checks 2281 patchHelper, err := patch.NewHelper(cluster, env.Client) 2282 g.Expect(err).ToNot(HaveOccurred()) 2283 conditions.MarkTrue(cluster, clusterv1.InfrastructureReadyCondition) 2284 g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed()) 2285 2286 // Wait for cluster in cache to be updated post-patch 2287 g.Eventually(func() bool { 2288 err := env.Get(ctx, util.ObjectKey(cluster), cluster) 2289 if err != nil { 2290 return false 2291 } 2292 2293 return conditions.IsTrue(cluster, clusterv1.InfrastructureReadyCondition) 2294 }, timeout, 100*time.Millisecond).Should(BeTrue()) 2295 2296 g.Expect(env.CreateKubeconfigSecret(ctx, cluster)).To(Succeed()) 2297 2298 return cluster 2299 } 2300 2301 // newRunningMachine creates a Machine object with a Status.Phase == Running. 2302 func newRunningMachine(c *clusterv1.Cluster, labels map[string]string) *clusterv1.Machine { 2303 return &clusterv1.Machine{ 2304 TypeMeta: metav1.TypeMeta{ 2305 APIVersion: clusterv1.GroupVersion.String(), 2306 Kind: "Machine", 2307 }, 2308 ObjectMeta: metav1.ObjectMeta{ 2309 GenerateName: "test-mhc-machine-", 2310 Namespace: c.Namespace, 2311 Labels: labels, 2312 }, 2313 Spec: clusterv1.MachineSpec{ 2314 ClusterName: c.Name, 2315 Bootstrap: clusterv1.Bootstrap{ 2316 DataSecretName: ptr.To("data-secret-name"), 2317 }, 2318 }, 2319 Status: clusterv1.MachineStatus{ 2320 InfrastructureReady: true, 2321 BootstrapReady: true, 2322 Phase: string(clusterv1.MachinePhaseRunning), 2323 ObservedGeneration: 1, 2324 }, 2325 } 2326 } 2327 2328 func newInfraMachine(machine *clusterv1.Machine) (*unstructured.Unstructured, string) { 2329 providerID := fmt.Sprintf("test:////%v", uuid.NewUUID()) 2330 return &unstructured.Unstructured{ 2331 Object: map[string]interface{}{ 2332 "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", 2333 "kind": "GenericInfrastructureMachine", 2334 "metadata": map[string]interface{}{ 2335 "generateName": "test-mhc-machine-infra-", 2336 "namespace": machine.Namespace, 2337 }, 2338 "spec": map[string]interface{}{ 2339 "providerID": providerID, 2340 }, 2341 }, 2342 }, providerID 2343 } 2344 2345 type machinesWithNodes struct { 2346 count int 2347 nodeStatus corev1.ConditionStatus 2348 createNodeRefForMachine bool 2349 firstMachineAsControlPlane bool 2350 labels map[string]string 2351 failureReason string 2352 failureMessage string 2353 } 2354 2355 type machineWithNodesOption func(m *machinesWithNodes) 2356 2357 func count(n int) machineWithNodesOption { 2358 return func(m *machinesWithNodes) { 2359 m.count = n 2360 } 2361 } 2362 2363 func firstMachineAsControlPlane() machineWithNodesOption { 2364 return func(m *machinesWithNodes) { 2365 m.firstMachineAsControlPlane = true 2366 } 2367 } 2368 2369 func nodeStatus(s corev1.ConditionStatus) machineWithNodesOption { 2370 return func(m *machinesWithNodes) { 2371 m.nodeStatus = s 2372 } 2373 } 2374 2375 func createNodeRefForMachine(b bool) machineWithNodesOption { 2376 return func(m *machinesWithNodes) { 2377 m.createNodeRefForMachine = b 2378 } 2379 } 2380 2381 func machineLabels(l map[string]string) machineWithNodesOption { 2382 return func(m *machinesWithNodes) { 2383 m.labels = l 2384 } 2385 } 2386 2387 func machineFailureReason(s string) machineWithNodesOption { 2388 return func(m *machinesWithNodes) { 2389 m.failureReason = s 2390 } 2391 } 2392 2393 func machineFailureMessage(s string) machineWithNodesOption { 2394 return func(m *machinesWithNodes) { 2395 m.failureMessage = s 2396 } 2397 } 2398 2399 func createMachinesWithNodes( 2400 g *WithT, 2401 c *clusterv1.Cluster, 2402 opts ...machineWithNodesOption, 2403 ) ([]*corev1.Node, []*clusterv1.Machine, func()) { 2404 o := &machinesWithNodes{} 2405 for _, op := range opts { 2406 op(o) 2407 } 2408 2409 var ( 2410 nodes []*corev1.Node 2411 machines []*clusterv1.Machine 2412 infraMachines []*unstructured.Unstructured 2413 ) 2414 2415 for i := 0; i < o.count; i++ { 2416 machine := newRunningMachine(c, o.labels) 2417 if i == 0 && o.firstMachineAsControlPlane { 2418 if machine.Labels == nil { 2419 machine.Labels = make(map[string]string) 2420 } 2421 machine.Labels[clusterv1.MachineControlPlaneLabel] = "" 2422 } 2423 infraMachine, providerID := newInfraMachine(machine) 2424 g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) 2425 infraMachines = append(infraMachines, infraMachine) 2426 fmt.Printf("inframachine created: %s\n", infraMachine.GetName()) 2427 // Patch the status of the InfraMachine and mark it as ready. 2428 // NB. Status cannot be set during object creation so we need to patch 2429 // it separately. 2430 infraMachinePatch := client.MergeFrom(infraMachine.DeepCopy()) 2431 g.Expect(unstructured.SetNestedField(infraMachine.Object, true, "status", "ready")).To(Succeed()) 2432 g.Expect(env.Status().Patch(ctx, infraMachine, infraMachinePatch)).To(Succeed()) 2433 2434 machine.Spec.InfrastructureRef = corev1.ObjectReference{ 2435 APIVersion: infraMachine.GetAPIVersion(), 2436 Kind: infraMachine.GetKind(), 2437 Name: infraMachine.GetName(), 2438 } 2439 g.Expect(env.Create(ctx, machine)).To(Succeed()) 2440 fmt.Printf("machine created: %s\n", machine.GetName()) 2441 2442 // Before moving on we want to ensure that the machine has a valid 2443 // status. That is, LastUpdated should not be nil. 2444 g.Eventually(func() *metav1.Time { 2445 k := client.ObjectKey{ 2446 Name: machine.GetName(), 2447 Namespace: machine.GetNamespace(), 2448 } 2449 err := env.Get(ctx, k, machine) 2450 if err != nil { 2451 return nil 2452 } 2453 return machine.Status.LastUpdated 2454 }, timeout, 100*time.Millisecond).ShouldNot(BeNil()) 2455 2456 machinePatchHelper, err := patch.NewHelper(machine, env.Client) 2457 g.Expect(err).ToNot(HaveOccurred()) 2458 2459 if o.createNodeRefForMachine { 2460 // Create node 2461 node := &corev1.Node{ 2462 ObjectMeta: metav1.ObjectMeta{ 2463 GenerateName: "test-mhc-node-", 2464 }, 2465 Spec: corev1.NodeSpec{ 2466 ProviderID: providerID, 2467 }, 2468 } 2469 2470 g.Expect(env.Create(ctx, node)).To(Succeed()) 2471 fmt.Printf("node created: %s\n", node.GetName()) 2472 2473 // Patch node status 2474 nodePatchHelper, err := patch.NewHelper(node, env.Client) 2475 g.Expect(err).ToNot(HaveOccurred()) 2476 2477 node.Status.Conditions = []corev1.NodeCondition{ 2478 { 2479 Type: corev1.NodeReady, 2480 Status: o.nodeStatus, 2481 LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), 2482 }, 2483 } 2484 2485 g.Expect(nodePatchHelper.Patch(ctx, node)).To(Succeed()) 2486 2487 nodes = append(nodes, node) 2488 2489 machine.Status.NodeRef = &corev1.ObjectReference{ 2490 Name: node.Name, 2491 } 2492 } 2493 2494 if o.failureReason != "" { 2495 failureReason := capierrors.MachineStatusError(o.failureReason) 2496 machine.Status.FailureReason = &failureReason 2497 } 2498 if o.failureMessage != "" { 2499 machine.Status.FailureMessage = ptr.To(o.failureMessage) 2500 } 2501 2502 // Adding one second to ensure there is a difference from the 2503 // original time so that the patch works. That is, ensure the 2504 // precision isn't lost during conversions. 2505 lastUp := metav1.NewTime(machine.Status.LastUpdated.Add(time.Second)) 2506 machine.Status.LastUpdated = &lastUp 2507 2508 // Patch the machine to record the status changes 2509 g.Expect(machinePatchHelper.Patch(ctx, machine)).To(Succeed()) 2510 2511 machines = append(machines, machine) 2512 } 2513 2514 cleanup := func() { 2515 fmt.Println("Cleaning up nodes, machines and infra machines.") 2516 for _, n := range nodes { 2517 if err := env.Delete(ctx, n); !apierrors.IsNotFound(err) { 2518 g.Expect(err).ToNot(HaveOccurred()) 2519 } 2520 } 2521 for _, m := range machines { 2522 g.Expect(env.Delete(ctx, m)).To(Succeed()) 2523 } 2524 for _, im := range infraMachines { 2525 if err := env.Delete(ctx, im); !apierrors.IsNotFound(err) { 2526 g.Expect(err).ToNot(HaveOccurred()) 2527 } 2528 } 2529 } 2530 2531 return nodes, machines, cleanup 2532 } 2533 2534 func newMachineHealthCheckWithLabels(name, namespace, cluster string, labels map[string]string) *clusterv1.MachineHealthCheck { 2535 l := make(map[string]string, len(labels)) 2536 for k, v := range labels { 2537 l[k] = v 2538 } 2539 l[clusterv1.ClusterNameLabel] = cluster 2540 2541 mhc := newMachineHealthCheck(namespace, cluster) 2542 mhc.SetName(name) 2543 mhc.Labels = l 2544 mhc.Spec.Selector.MatchLabels = l 2545 2546 return mhc 2547 } 2548 2549 func newMachineHealthCheck(namespace, clusterName string) *clusterv1.MachineHealthCheck { 2550 maxUnhealthy := intstr.FromString("100%") 2551 return &clusterv1.MachineHealthCheck{ 2552 ObjectMeta: metav1.ObjectMeta{ 2553 GenerateName: "test-mhc-", 2554 Namespace: namespace, 2555 }, 2556 Spec: clusterv1.MachineHealthCheckSpec{ 2557 ClusterName: clusterName, 2558 Selector: metav1.LabelSelector{ 2559 MatchLabels: map[string]string{ 2560 "selector": string(uuid.NewUUID()), 2561 }, 2562 }, 2563 MaxUnhealthy: &maxUnhealthy, 2564 NodeStartupTimeout: &metav1.Duration{Duration: 1 * time.Millisecond}, 2565 UnhealthyConditions: []clusterv1.UnhealthyCondition{ 2566 { 2567 Type: corev1.NodeReady, 2568 Status: corev1.ConditionUnknown, 2569 Timeout: metav1.Duration{Duration: 5 * time.Minute}, 2570 }, 2571 }, 2572 }, 2573 } 2574 } 2575 2576 func TestPatchTargets(t *testing.T) { 2577 g := NewWithT(t) 2578 2579 namespace := metav1.NamespaceDefault 2580 clusterName := testClusterName 2581 defaultCluster := &clusterv1.Cluster{ 2582 ObjectMeta: metav1.ObjectMeta{ 2583 Name: clusterName, 2584 Namespace: namespace, 2585 }, 2586 } 2587 labels := map[string]string{"cluster": "foo", "nodepool": "bar"} 2588 2589 mhc := newMachineHealthCheckWithLabels("mhc", namespace, clusterName, labels) 2590 machine1 := newTestMachine("machine1", namespace, clusterName, "nodeName", labels) 2591 machine1.ResourceVersion = "999" 2592 conditions.MarkTrue(machine1, clusterv1.MachineHealthCheckSucceededCondition) 2593 machine2 := machine1.DeepCopy() 2594 machine2.Name = "machine2" 2595 2596 cl := fake.NewClientBuilder().WithObjects( 2597 machine1, 2598 machine2, 2599 mhc, 2600 ).WithStatusSubresource(&clusterv1.MachineHealthCheck{}, &clusterv1.Machine{}).Build() 2601 r := &Reconciler{ 2602 Client: cl, 2603 recorder: record.NewFakeRecorder(32), 2604 Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), cl, cl, scheme.Scheme, client.ObjectKey{Name: clusterName, Namespace: namespace}, "machinehealthcheck-watchClusterNodes"), 2605 } 2606 2607 // To make the patch fail, create patchHelper with a different client. 2608 fakeMachine := machine1.DeepCopy() 2609 fakeMachine.Name = "fake" 2610 patchHelper, err := patch.NewHelper(fakeMachine, fake.NewClientBuilder().WithObjects(fakeMachine).Build()) 2611 g.Expect(err).ToNot(HaveOccurred()) 2612 // healthCheckTarget with fake patchHelper, patch should fail on this target. 2613 target1 := healthCheckTarget{ 2614 MHC: mhc, 2615 Machine: machine1, 2616 patchHelper: patchHelper, 2617 Node: &corev1.Node{}, 2618 } 2619 2620 // healthCheckTarget with correct patchHelper. 2621 patchHelper2, err := patch.NewHelper(machine2, cl) 2622 g.Expect(err).ToNot(HaveOccurred()) 2623 target3 := healthCheckTarget{ 2624 MHC: mhc, 2625 Machine: machine2, 2626 patchHelper: patchHelper2, 2627 Node: &corev1.Node{}, 2628 } 2629 2630 // Target with wrong patch helper will fail but the other one will be patched. 2631 g.Expect(r.patchUnhealthyTargets(context.TODO(), logr.New(log.NullLogSink{}), []healthCheckTarget{target1, target3}, defaultCluster, mhc)).ToNot(BeEmpty()) 2632 g.Expect(cl.Get(ctx, client.ObjectKey{Name: machine2.Name, Namespace: machine2.Namespace}, machine2)).ToNot(HaveOccurred()) 2633 g.Expect(conditions.Get(machine2, clusterv1.MachineOwnerRemediatedCondition).Status).To(Equal(corev1.ConditionFalse)) 2634 2635 // Target with wrong patch helper will fail but the other one will be patched. 2636 g.Expect(r.patchHealthyTargets(context.TODO(), logr.New(log.NullLogSink{}), []healthCheckTarget{target1, target3}, mhc)).ToNot(BeEmpty()) 2637 }