sigs.k8s.io/cluster-api@v1.6.3/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package machinehealthcheck
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"sort"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/go-logr/logr"
    28  	. "github.com/onsi/gomega"
    29  	corev1 "k8s.io/api/core/v1"
    30  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    31  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    33  	"k8s.io/apimachinery/pkg/types"
    34  	"k8s.io/apimachinery/pkg/util/intstr"
    35  	"k8s.io/apimachinery/pkg/util/uuid"
    36  	"k8s.io/client-go/kubernetes/scheme"
    37  	"k8s.io/client-go/tools/record"
    38  	"k8s.io/utils/pointer"
    39  	"sigs.k8s.io/controller-runtime/pkg/client"
    40  	"sigs.k8s.io/controller-runtime/pkg/client/fake"
    41  	"sigs.k8s.io/controller-runtime/pkg/log"
    42  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    43  
    44  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    45  	"sigs.k8s.io/cluster-api/api/v1beta1/index"
    46  	"sigs.k8s.io/cluster-api/controllers/remote"
    47  	capierrors "sigs.k8s.io/cluster-api/errors"
    48  	"sigs.k8s.io/cluster-api/internal/test/builder"
    49  	"sigs.k8s.io/cluster-api/internal/webhooks"
    50  	"sigs.k8s.io/cluster-api/util"
    51  	"sigs.k8s.io/cluster-api/util/conditions"
    52  	"sigs.k8s.io/cluster-api/util/patch"
    53  )
    54  
    55  func TestMachineHealthCheck_Reconcile(t *testing.T) {
    56  	ns, err := env.CreateNamespace(ctx, "test-mhc")
    57  	if err != nil {
    58  		t.Fatal(err)
    59  	}
    60  	defer func() {
    61  		if err := env.Delete(ctx, ns); err != nil {
    62  			t.Fatal(err)
    63  		}
    64  	}()
    65  
    66  	t.Run("it should ensure the correct cluster-name label when no existing labels exist", func(t *testing.T) {
    67  		g := NewWithT(t)
    68  		cluster := createCluster(g, ns.Name)
    69  
    70  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
    71  		mhc.Labels = map[string]string{}
    72  
    73  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
    74  		defer func(do ...client.Object) {
    75  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
    76  		}(cluster, mhc)
    77  
    78  		g.Eventually(func() map[string]string {
    79  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
    80  			if err != nil {
    81  				return nil
    82  			}
    83  			return mhc.GetLabels()
    84  		}).Should(HaveKeyWithValue(clusterv1.ClusterNameLabel, cluster.Name))
    85  	})
    86  
    87  	t.Run("it should ensure the correct cluster-name label when the label has the wrong value", func(t *testing.T) {
    88  		g := NewWithT(t)
    89  		cluster := createCluster(g, ns.Name)
    90  
    91  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
    92  		mhc.Labels = map[string]string{
    93  			clusterv1.ClusterNameLabel: "wrong-cluster",
    94  		}
    95  
    96  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
    97  		defer func(do ...client.Object) {
    98  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
    99  		}(cluster, mhc)
   100  
   101  		g.Eventually(func() map[string]string {
   102  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   103  			if err != nil {
   104  				return nil
   105  			}
   106  			return mhc.GetLabels()
   107  		}).Should(HaveKeyWithValue(clusterv1.ClusterNameLabel, cluster.Name))
   108  	})
   109  
   110  	t.Run("it should ensure the correct cluster-name label when other labels are present", func(t *testing.T) {
   111  		g := NewWithT(t)
   112  		cluster := createCluster(g, ns.Name)
   113  
   114  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   115  		mhc.Labels = map[string]string{
   116  			"extra-label": "1",
   117  		}
   118  
   119  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   120  		defer func(do ...client.Object) {
   121  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   122  		}(cluster, mhc)
   123  
   124  		g.Eventually(func() map[string]string {
   125  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   126  			if err != nil {
   127  				return nil
   128  			}
   129  			return mhc.GetLabels()
   130  		}).Should(And(
   131  			HaveKeyWithValue(clusterv1.ClusterNameLabel, cluster.Name),
   132  			HaveKeyWithValue("extra-label", "1"),
   133  			HaveLen(2),
   134  		))
   135  	})
   136  
   137  	t.Run("it should ensure an owner reference is present when no existing ones exist", func(t *testing.T) {
   138  		g := NewWithT(t)
   139  		cluster := createCluster(g, ns.Name)
   140  
   141  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   142  		mhc.OwnerReferences = []metav1.OwnerReference{}
   143  
   144  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   145  		defer func(do ...client.Object) {
   146  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   147  		}(cluster, mhc)
   148  
   149  		g.Eventually(func() []metav1.OwnerReference {
   150  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   151  			if err != nil {
   152  				fmt.Printf("error cannot retrieve mhc in ctx: %v", err)
   153  				return nil
   154  			}
   155  			return mhc.GetOwnerReferences()
   156  		}, timeout, 100*time.Millisecond).Should(And(
   157  			HaveLen(1),
   158  			ContainElement(ownerReferenceForCluster(ctx, g, cluster)),
   159  		))
   160  	})
   161  
   162  	t.Run("it should ensure an owner reference is present when modifying existing ones", func(t *testing.T) {
   163  		g := NewWithT(t)
   164  		cluster := createCluster(g, ns.Name)
   165  
   166  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   167  		mhc.OwnerReferences = []metav1.OwnerReference{
   168  			{Kind: "Foo", APIVersion: "foo.bar.baz/v1", Name: "Bar", UID: "12345"},
   169  		}
   170  
   171  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   172  		defer func(do ...client.Object) {
   173  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   174  		}(cluster, mhc)
   175  
   176  		g.Eventually(func() []metav1.OwnerReference {
   177  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   178  			if err != nil {
   179  				return nil
   180  			}
   181  			return mhc.GetOwnerReferences()
   182  		}, timeout, 100*time.Millisecond).Should(And(
   183  			ContainElements(
   184  				metav1.OwnerReference{Kind: "Foo", APIVersion: "foo.bar.baz/v1", Name: "Bar", UID: "12345"},
   185  				ownerReferenceForCluster(ctx, g, cluster)),
   186  			HaveLen(2),
   187  		))
   188  	})
   189  
   190  	t.Run("it ignores Machines not matching the label selector", func(t *testing.T) {
   191  		g := NewWithT(t)
   192  		cluster := createCluster(g, ns.Name)
   193  
   194  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   195  
   196  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   197  		defer func(do ...client.Object) {
   198  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   199  		}(cluster, mhc)
   200  
   201  		// Healthy nodes and machines matching the MHC's label selector.
   202  		_, machines, cleanup := createMachinesWithNodes(g, cluster,
   203  			count(2),
   204  			firstMachineAsControlPlane(),
   205  			createNodeRefForMachine(true),
   206  			nodeStatus(corev1.ConditionTrue),
   207  			machineLabels(mhc.Spec.Selector.MatchLabels),
   208  		)
   209  		defer cleanup()
   210  		targetMachines := make([]string, len(machines))
   211  		for i, m := range machines {
   212  			targetMachines[i] = m.Name
   213  		}
   214  		sort.Strings(targetMachines)
   215  
   216  		// Healthy nodes and machines NOT matching the MHC's label selector.
   217  		_, _, cleanup2 := createMachinesWithNodes(g, cluster,
   218  			count(2),
   219  			createNodeRefForMachine(true),
   220  			nodeStatus(corev1.ConditionTrue),
   221  		)
   222  		defer cleanup2()
   223  
   224  		// Make sure the status matches.
   225  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   226  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   227  			if err != nil {
   228  				return nil
   229  			}
   230  			return &mhc.Status
   231  		}, 5*time.Second, 100*time.Millisecond).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   232  			ExpectedMachines:    2,
   233  			CurrentHealthy:      2,
   234  			RemediationsAllowed: 2,
   235  			ObservedGeneration:  1,
   236  			Targets:             targetMachines,
   237  			Conditions: clusterv1.Conditions{
   238  				{
   239  					Type:   clusterv1.RemediationAllowedCondition,
   240  					Status: corev1.ConditionTrue,
   241  				},
   242  			},
   243  		}))
   244  	})
   245  
   246  	t.Run("it doesn't mark anything unhealthy when cluster infrastructure is not ready", func(t *testing.T) {
   247  		g := NewWithT(t)
   248  		cluster := createCluster(g, ns.Name)
   249  
   250  		patchHelper, err := patch.NewHelper(cluster, env.Client)
   251  		g.Expect(err).ToNot(HaveOccurred())
   252  
   253  		conditions.MarkFalse(cluster, clusterv1.InfrastructureReadyCondition, "SomeReason", clusterv1.ConditionSeverityError, "")
   254  		g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed())
   255  
   256  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   257  
   258  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   259  		defer func(do ...client.Object) {
   260  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   261  		}(cluster, mhc)
   262  
   263  		// Healthy nodes and machines.
   264  		_, machines, cleanup := createMachinesWithNodes(g, cluster,
   265  			count(2),
   266  			firstMachineAsControlPlane(),
   267  			createNodeRefForMachine(true),
   268  			machineLabels(mhc.Spec.Selector.MatchLabels),
   269  		)
   270  		defer cleanup()
   271  		targetMachines := make([]string, len(machines))
   272  		for i, m := range machines {
   273  			targetMachines[i] = m.Name
   274  		}
   275  		sort.Strings(targetMachines)
   276  
   277  		// Make sure the status matches.
   278  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   279  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   280  			if err != nil {
   281  				return nil
   282  			}
   283  			return &mhc.Status
   284  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   285  			ExpectedMachines:    2,
   286  			CurrentHealthy:      2,
   287  			RemediationsAllowed: 2,
   288  			ObservedGeneration:  1,
   289  			Targets:             targetMachines,
   290  			Conditions: clusterv1.Conditions{
   291  				{
   292  					Type:   clusterv1.RemediationAllowedCondition,
   293  					Status: corev1.ConditionTrue,
   294  				},
   295  			},
   296  		}))
   297  	})
   298  
   299  	t.Run("it doesn't mark anything unhealthy when all Machines are healthy", func(t *testing.T) {
   300  		g := NewWithT(t)
   301  		cluster := createCluster(g, ns.Name)
   302  
   303  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   304  
   305  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   306  		defer func(do ...client.Object) {
   307  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   308  		}(cluster, mhc)
   309  
   310  		// Healthy nodes and machines.
   311  		_, machines, cleanup := createMachinesWithNodes(g, cluster,
   312  			count(2),
   313  			firstMachineAsControlPlane(),
   314  			createNodeRefForMachine(true),
   315  			nodeStatus(corev1.ConditionTrue),
   316  			machineLabels(mhc.Spec.Selector.MatchLabels),
   317  		)
   318  		defer cleanup()
   319  		targetMachines := make([]string, len(machines))
   320  		for i, m := range machines {
   321  			targetMachines[i] = m.Name
   322  		}
   323  		sort.Strings(targetMachines)
   324  
   325  		// Make sure the status matches.
   326  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   327  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   328  			if err != nil {
   329  				return nil
   330  			}
   331  			return &mhc.Status
   332  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   333  			ExpectedMachines:    2,
   334  			CurrentHealthy:      2,
   335  			RemediationsAllowed: 2,
   336  			ObservedGeneration:  1,
   337  			Targets:             targetMachines,
   338  			Conditions: clusterv1.Conditions{
   339  				{
   340  					Type:   clusterv1.RemediationAllowedCondition,
   341  					Status: corev1.ConditionTrue,
   342  				},
   343  			},
   344  		}))
   345  	})
   346  
   347  	t.Run("it marks unhealthy machines for remediation when there is one unhealthy Machine", func(t *testing.T) {
   348  		g := NewWithT(t)
   349  		cluster := createCluster(g, ns.Name)
   350  
   351  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   352  
   353  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   354  		defer func(do ...client.Object) {
   355  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   356  		}(cluster, mhc)
   357  
   358  		// Healthy nodes and machines.
   359  		_, machines, cleanup1 := createMachinesWithNodes(g, cluster,
   360  			count(2),
   361  			firstMachineAsControlPlane(),
   362  			createNodeRefForMachine(true),
   363  			nodeStatus(corev1.ConditionTrue),
   364  			machineLabels(mhc.Spec.Selector.MatchLabels),
   365  		)
   366  		defer cleanup1()
   367  		// Unhealthy nodes and machines.
   368  		_, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster,
   369  			count(1),
   370  			createNodeRefForMachine(true),
   371  			nodeStatus(corev1.ConditionUnknown),
   372  			machineLabels(mhc.Spec.Selector.MatchLabels),
   373  		)
   374  		defer cleanup2()
   375  		machines = append(machines, unhealthyMachines...)
   376  		targetMachines := make([]string, len(machines))
   377  		for i, m := range machines {
   378  			targetMachines[i] = m.Name
   379  		}
   380  		sort.Strings(targetMachines)
   381  
   382  		// Make sure the status matches.
   383  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   384  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   385  			if err != nil {
   386  				return nil
   387  			}
   388  			return &mhc.Status
   389  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   390  			ExpectedMachines:    3,
   391  			CurrentHealthy:      2,
   392  			RemediationsAllowed: 2,
   393  			ObservedGeneration:  1,
   394  			Targets:             targetMachines,
   395  			Conditions: clusterv1.Conditions{
   396  				{
   397  					Type:   clusterv1.RemediationAllowedCondition,
   398  					Status: corev1.ConditionTrue,
   399  				},
   400  			},
   401  		}))
   402  	})
   403  
   404  	t.Run("it marks unhealthy machines for remediation when there a Machine has a failure reason", func(t *testing.T) {
   405  		g := NewWithT(t)
   406  		cluster := createCluster(g, ns.Name)
   407  
   408  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   409  
   410  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   411  		defer func(do ...client.Object) {
   412  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   413  		}(cluster, mhc)
   414  
   415  		// Healthy nodes and machines.
   416  		_, machines, cleanup1 := createMachinesWithNodes(g, cluster,
   417  			count(2),
   418  			firstMachineAsControlPlane(),
   419  			createNodeRefForMachine(true),
   420  			nodeStatus(corev1.ConditionTrue),
   421  			machineLabels(mhc.Spec.Selector.MatchLabels),
   422  		)
   423  		defer cleanup1()
   424  		// Machine with failure reason.
   425  		_, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster,
   426  			count(1),
   427  			createNodeRefForMachine(true),
   428  			nodeStatus(corev1.ConditionTrue),
   429  			machineLabels(mhc.Spec.Selector.MatchLabels),
   430  			machineFailureReason("some failure"),
   431  		)
   432  		defer cleanup2()
   433  		machines = append(machines, unhealthyMachines...)
   434  		targetMachines := make([]string, len(machines))
   435  		for i, m := range machines {
   436  			targetMachines[i] = m.Name
   437  		}
   438  		sort.Strings(targetMachines)
   439  
   440  		// Make sure the status matches.
   441  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   442  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   443  			if err != nil {
   444  				return nil
   445  			}
   446  			return &mhc.Status
   447  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   448  			ExpectedMachines:    3,
   449  			CurrentHealthy:      2,
   450  			RemediationsAllowed: 2,
   451  			ObservedGeneration:  1,
   452  			Targets:             targetMachines,
   453  			Conditions: clusterv1.Conditions{
   454  				{
   455  					Type:   clusterv1.RemediationAllowedCondition,
   456  					Status: corev1.ConditionTrue,
   457  				},
   458  			},
   459  		}))
   460  	})
   461  
   462  	t.Run("it marks unhealthy machines for remediation when there a Machine has a failure message", func(t *testing.T) {
   463  		g := NewWithT(t)
   464  		cluster := createCluster(g, ns.Name)
   465  
   466  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   467  
   468  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   469  		defer func(do ...client.Object) {
   470  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   471  		}(cluster, mhc)
   472  
   473  		// Healthy nodes and machines.
   474  		_, machines, cleanup1 := createMachinesWithNodes(g, cluster,
   475  			count(2),
   476  			firstMachineAsControlPlane(),
   477  			createNodeRefForMachine(true),
   478  			nodeStatus(corev1.ConditionTrue),
   479  			machineLabels(mhc.Spec.Selector.MatchLabels),
   480  		)
   481  		defer cleanup1()
   482  		// Machine with failure message.
   483  		_, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster,
   484  			count(1),
   485  			createNodeRefForMachine(true),
   486  			nodeStatus(corev1.ConditionTrue),
   487  			machineLabels(mhc.Spec.Selector.MatchLabels),
   488  			machineFailureMessage("some failure"),
   489  		)
   490  		defer cleanup2()
   491  		machines = append(machines, unhealthyMachines...)
   492  		targetMachines := make([]string, len(machines))
   493  		for i, m := range machines {
   494  			targetMachines[i] = m.Name
   495  		}
   496  		sort.Strings(targetMachines)
   497  
   498  		// Make sure the status matches.
   499  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   500  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   501  			if err != nil {
   502  				return nil
   503  			}
   504  			return &mhc.Status
   505  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   506  			ExpectedMachines:    3,
   507  			CurrentHealthy:      2,
   508  			RemediationsAllowed: 2,
   509  			ObservedGeneration:  1,
   510  			Targets:             targetMachines,
   511  			Conditions: clusterv1.Conditions{
   512  				{
   513  					Type:   clusterv1.RemediationAllowedCondition,
   514  					Status: corev1.ConditionTrue,
   515  				},
   516  			},
   517  		}))
   518  	})
   519  
   520  	t.Run("it marks unhealthy machines for remediation when the unhealthy Machines exceed MaxUnhealthy", func(t *testing.T) {
   521  		g := NewWithT(t)
   522  		cluster := createCluster(g, ns.Name)
   523  
   524  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   525  		maxUnhealthy := intstr.Parse("40%")
   526  		mhc.Spec.MaxUnhealthy = &maxUnhealthy
   527  
   528  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   529  		defer func(do ...client.Object) {
   530  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   531  		}(cluster, mhc)
   532  
   533  		// Healthy nodes and machines.
   534  		_, machines, cleanup1 := createMachinesWithNodes(g, cluster,
   535  			count(1),
   536  			firstMachineAsControlPlane(),
   537  			createNodeRefForMachine(true),
   538  			nodeStatus(corev1.ConditionTrue),
   539  			machineLabels(mhc.Spec.Selector.MatchLabels),
   540  		)
   541  		defer cleanup1()
   542  		// Unhealthy nodes and machines.
   543  		_, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster,
   544  			count(2),
   545  			createNodeRefForMachine(true),
   546  			nodeStatus(corev1.ConditionUnknown),
   547  			machineLabels(mhc.Spec.Selector.MatchLabels),
   548  		)
   549  		defer cleanup2()
   550  		machines = append(machines, unhealthyMachines...)
   551  		targetMachines := make([]string, len(machines))
   552  		for i, m := range machines {
   553  			targetMachines[i] = m.Name
   554  		}
   555  		sort.Strings(targetMachines)
   556  
   557  		// Make sure the status matches.
   558  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   559  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   560  			if err != nil {
   561  				return nil
   562  			}
   563  			return &mhc.Status
   564  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   565  			ExpectedMachines:    3,
   566  			CurrentHealthy:      1,
   567  			RemediationsAllowed: 0,
   568  			ObservedGeneration:  1,
   569  			Targets:             targetMachines,
   570  			Conditions: clusterv1.Conditions{
   571  				{
   572  					Type:     clusterv1.RemediationAllowedCondition,
   573  					Status:   corev1.ConditionFalse,
   574  					Severity: clusterv1.ConditionSeverityWarning,
   575  					Reason:   clusterv1.TooManyUnhealthyReason,
   576  					Message:  "Remediation is not allowed, the number of not started or unhealthy machines exceeds maxUnhealthy (total: 3, unhealthy: 2, maxUnhealthy: 40%)",
   577  				},
   578  			},
   579  		}))
   580  
   581  		// Calculate how many Machines have health check succeeded = false.
   582  		g.Eventually(func() (unhealthy int) {
   583  			machines := &clusterv1.MachineList{}
   584  			err := env.List(ctx, machines, client.MatchingLabels{
   585  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
   586  			})
   587  			if err != nil {
   588  				return -1
   589  			}
   590  
   591  			for i := range machines.Items {
   592  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
   593  					unhealthy++
   594  				}
   595  			}
   596  			return
   597  		}).Should(Equal(2))
   598  
   599  		// Calculate how many Machines have been remediated.
   600  		g.Eventually(func() (remediated int) {
   601  			machines := &clusterv1.MachineList{}
   602  			err := env.List(ctx, machines, client.MatchingLabels{
   603  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
   604  			})
   605  			if err != nil {
   606  				return -1
   607  			}
   608  
   609  			for i := range machines.Items {
   610  				if conditions.IsTrue(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) {
   611  					remediated++
   612  				}
   613  			}
   614  			return
   615  		}).Should(Equal(0))
   616  	})
   617  
   618  	t.Run("it marks unhealthy machines for remediation when number of unhealthy machines is within unhealthyRange", func(t *testing.T) {
   619  		g := NewWithT(t)
   620  		cluster := createCluster(g, ns.Name)
   621  
   622  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   623  		unhealthyRange := "[1-3]"
   624  		mhc.Spec.UnhealthyRange = &unhealthyRange
   625  
   626  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   627  		defer func(do ...client.Object) {
   628  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   629  		}(cluster, mhc)
   630  
   631  		// Healthy nodes and machines.
   632  		_, machines, cleanup1 := createMachinesWithNodes(g, cluster,
   633  			count(2),
   634  			firstMachineAsControlPlane(),
   635  			createNodeRefForMachine(true),
   636  			nodeStatus(corev1.ConditionTrue),
   637  			machineLabels(mhc.Spec.Selector.MatchLabels),
   638  		)
   639  		defer cleanup1()
   640  		// Unhealthy nodes and machines.
   641  		_, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster,
   642  			count(1),
   643  			createNodeRefForMachine(true),
   644  			nodeStatus(corev1.ConditionUnknown),
   645  			machineLabels(mhc.Spec.Selector.MatchLabels),
   646  		)
   647  		defer cleanup2()
   648  		machines = append(machines, unhealthyMachines...)
   649  		targetMachines := make([]string, len(machines))
   650  		for i, m := range machines {
   651  			targetMachines[i] = m.Name
   652  		}
   653  		sort.Strings(targetMachines)
   654  
   655  		// Make sure the status matches.
   656  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   657  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   658  			if err != nil {
   659  				return nil
   660  			}
   661  			return &mhc.Status
   662  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   663  			ExpectedMachines:    3,
   664  			CurrentHealthy:      2,
   665  			RemediationsAllowed: 2,
   666  			ObservedGeneration:  1,
   667  			Targets:             targetMachines,
   668  			Conditions: clusterv1.Conditions{
   669  				{
   670  					Type:   clusterv1.RemediationAllowedCondition,
   671  					Status: corev1.ConditionTrue,
   672  				},
   673  			},
   674  		}))
   675  	})
   676  
   677  	t.Run("it marks unhealthy machines for remediation when the unhealthy Machines is not within UnhealthyRange", func(t *testing.T) {
   678  		g := NewWithT(t)
   679  		cluster := createCluster(g, ns.Name)
   680  
   681  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   682  		unhealthyRange := "[3-5]"
   683  		mhc.Spec.UnhealthyRange = &unhealthyRange
   684  
   685  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   686  		defer func(do ...client.Object) {
   687  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   688  		}(cluster, mhc)
   689  
   690  		// Healthy nodes and machines.
   691  		_, machines, cleanup1 := createMachinesWithNodes(g, cluster,
   692  			count(1),
   693  			firstMachineAsControlPlane(),
   694  			createNodeRefForMachine(true),
   695  			nodeStatus(corev1.ConditionTrue),
   696  			machineLabels(mhc.Spec.Selector.MatchLabels),
   697  		)
   698  		defer cleanup1()
   699  		// Unhealthy nodes and machines.
   700  		_, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster,
   701  			count(2),
   702  			createNodeRefForMachine(true),
   703  			nodeStatus(corev1.ConditionUnknown),
   704  			machineLabels(mhc.Spec.Selector.MatchLabels),
   705  		)
   706  		defer cleanup2()
   707  		machines = append(machines, unhealthyMachines...)
   708  		targetMachines := make([]string, len(machines))
   709  		for i, m := range machines {
   710  			targetMachines[i] = m.Name
   711  		}
   712  		sort.Strings(targetMachines)
   713  
   714  		// Make sure the status matches.
   715  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   716  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   717  			if err != nil {
   718  				return nil
   719  			}
   720  			return &mhc.Status
   721  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   722  			ExpectedMachines:    3,
   723  			CurrentHealthy:      1,
   724  			RemediationsAllowed: 0,
   725  			ObservedGeneration:  1,
   726  			Targets:             targetMachines,
   727  			Conditions: clusterv1.Conditions{
   728  				{
   729  					Type:     clusterv1.RemediationAllowedCondition,
   730  					Status:   corev1.ConditionFalse,
   731  					Severity: clusterv1.ConditionSeverityWarning,
   732  					Reason:   clusterv1.TooManyUnhealthyReason,
   733  					Message:  "Remediation is not allowed, the number of not started or unhealthy machines does not fall within the range (total: 3, unhealthy: 2, unhealthyRange: [3-5])",
   734  				},
   735  			},
   736  		}))
   737  
   738  		// Calculate how many Machines have health check succeeded = false.
   739  		g.Eventually(func() (unhealthy int) {
   740  			machines := &clusterv1.MachineList{}
   741  			err := env.List(ctx, machines, client.MatchingLabels{
   742  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
   743  			})
   744  			if err != nil {
   745  				return -1
   746  			}
   747  
   748  			for i := range machines.Items {
   749  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
   750  					unhealthy++
   751  				}
   752  			}
   753  			return
   754  		}).Should(Equal(2))
   755  
   756  		// Calculate how many Machines have been remediated.
   757  		g.Eventually(func() (remediated int) {
   758  			machines := &clusterv1.MachineList{}
   759  			err := env.List(ctx, machines, client.MatchingLabels{
   760  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
   761  			})
   762  			if err != nil {
   763  				return -1
   764  			}
   765  
   766  			for i := range machines.Items {
   767  				if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil {
   768  					remediated++
   769  				}
   770  			}
   771  			return
   772  		}).Should(Equal(0))
   773  	})
   774  
   775  	t.Run("when a Machine has no Node ref for less than the NodeStartupTimeout", func(t *testing.T) {
   776  		g := NewWithT(t)
   777  		cluster := createCluster(g, ns.Name)
   778  
   779  		// After the cluster exists, we have to set the infrastructure ready condition; otherwise, MachineHealthChecks
   780  		// will never fail when nodeStartupTimeout is exceeded.
   781  		patchHelper, err := patch.NewHelper(cluster, env.GetClient())
   782  		g.Expect(err).ToNot(HaveOccurred())
   783  
   784  		conditions.MarkTrue(cluster, clusterv1.InfrastructureReadyCondition)
   785  		g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed())
   786  
   787  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   788  		mhc.Spec.NodeStartupTimeout = &metav1.Duration{Duration: 5 * time.Hour}
   789  
   790  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   791  		defer func(do ...client.Object) {
   792  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   793  		}(cluster, mhc)
   794  
   795  		// Healthy nodes and machines.
   796  		_, machines, cleanup1 := createMachinesWithNodes(g, cluster,
   797  			count(2),
   798  			firstMachineAsControlPlane(),
   799  			createNodeRefForMachine(true),
   800  			nodeStatus(corev1.ConditionTrue),
   801  			machineLabels(mhc.Spec.Selector.MatchLabels),
   802  		)
   803  		defer cleanup1()
   804  		// Unhealthy nodes and machines.
   805  		_, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster,
   806  			count(1),
   807  			createNodeRefForMachine(false),
   808  			nodeStatus(corev1.ConditionUnknown),
   809  			machineLabels(mhc.Spec.Selector.MatchLabels),
   810  		)
   811  		defer cleanup2()
   812  		machines = append(machines, unhealthyMachines...)
   813  		targetMachines := make([]string, len(machines))
   814  		for i, m := range machines {
   815  			targetMachines[i] = m.Name
   816  		}
   817  		sort.Strings(targetMachines)
   818  
   819  		// Make sure the status matches.
   820  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   821  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   822  			if err != nil {
   823  				return nil
   824  			}
   825  			return &mhc.Status
   826  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   827  			ExpectedMachines:    3,
   828  			CurrentHealthy:      2,
   829  			RemediationsAllowed: 2,
   830  			ObservedGeneration:  1,
   831  			Targets:             targetMachines,
   832  			Conditions: clusterv1.Conditions{
   833  				{
   834  					Type:   clusterv1.RemediationAllowedCondition,
   835  					Status: corev1.ConditionTrue,
   836  				},
   837  			},
   838  		}))
   839  
   840  		// Calculate how many Machines have health check succeeded = false.
   841  		g.Eventually(func() (unhealthy int) {
   842  			machines := &clusterv1.MachineList{}
   843  			err := env.List(ctx, machines, client.MatchingLabels{
   844  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
   845  			})
   846  			if err != nil {
   847  				return -1
   848  			}
   849  
   850  			for i := range machines.Items {
   851  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
   852  					unhealthy++
   853  				}
   854  			}
   855  			return
   856  		}).Should(Equal(0))
   857  
   858  		// Calculate how many Machines have been remediated.
   859  		g.Eventually(func() (remediated int) {
   860  			machines := &clusterv1.MachineList{}
   861  			err := env.List(ctx, machines, client.MatchingLabels{
   862  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
   863  			})
   864  			if err != nil {
   865  				return -1
   866  			}
   867  
   868  			for i := range machines.Items {
   869  				if conditions.IsTrue(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) {
   870  					remediated++
   871  				}
   872  			}
   873  			return
   874  		}).Should(Equal(0))
   875  	})
   876  
   877  	t.Run("when a Machine has no Node ref for longer than the NodeStartupTimeout", func(t *testing.T) {
   878  		// FIXME: Resolve flaky/failing test
   879  		t.Skip("skipping until made stable")
   880  		g := NewWithT(t)
   881  		cluster := createCluster(g, ns.Name)
   882  
   883  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   884  		mhc.Spec.NodeStartupTimeout = &metav1.Duration{Duration: time.Second}
   885  
   886  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   887  		defer func(do ...client.Object) {
   888  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   889  		}(cluster, mhc)
   890  
   891  		// Healthy nodes and machines.
   892  		_, machines, cleanup1 := createMachinesWithNodes(g, cluster,
   893  			count(2),
   894  			firstMachineAsControlPlane(),
   895  			createNodeRefForMachine(true),
   896  			nodeStatus(corev1.ConditionTrue),
   897  			machineLabels(mhc.Spec.Selector.MatchLabels),
   898  		)
   899  		defer cleanup1()
   900  		// Unhealthy nodes and machines.
   901  		_, unhealthyMachines, cleanup2 := createMachinesWithNodes(g, cluster,
   902  			count(1),
   903  			createNodeRefForMachine(false),
   904  			nodeStatus(corev1.ConditionUnknown),
   905  			machineLabels(mhc.Spec.Selector.MatchLabels),
   906  		)
   907  		defer cleanup2()
   908  		machines = append(machines, unhealthyMachines...)
   909  
   910  		targetMachines := make([]string, len(machines))
   911  		for i, m := range machines {
   912  			targetMachines[i] = m.Name
   913  		}
   914  		sort.Strings(targetMachines)
   915  
   916  		// Make sure the MHC status matches. We have two healthy machines and
   917  		// one unhealthy.
   918  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
   919  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
   920  			if err != nil {
   921  				fmt.Printf("error retrieving mhc: %v", err)
   922  				return nil
   923  			}
   924  			return &mhc.Status
   925  		}, timeout, 100*time.Millisecond).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
   926  			ExpectedMachines:    3,
   927  			CurrentHealthy:      2,
   928  			RemediationsAllowed: 2,
   929  			ObservedGeneration:  1,
   930  			Targets:             targetMachines,
   931  			Conditions: clusterv1.Conditions{
   932  				{
   933  					Type:   clusterv1.RemediationAllowedCondition,
   934  					Status: corev1.ConditionTrue,
   935  				},
   936  			},
   937  		}))
   938  
   939  		// Calculate how many Machines have health check succeeded = false.
   940  		g.Eventually(func() (unhealthy int) {
   941  			machines := &clusterv1.MachineList{}
   942  			err := env.List(ctx, machines, client.MatchingLabels{
   943  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
   944  			})
   945  			if err != nil {
   946  				fmt.Printf("error retrieving list: %v", err)
   947  				return -1
   948  			}
   949  
   950  			for i := range machines.Items {
   951  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
   952  					unhealthy++
   953  				}
   954  			}
   955  			return
   956  		}, timeout, 100*time.Millisecond).Should(Equal(1))
   957  
   958  		// Calculate how many Machines have been remediated.
   959  		g.Eventually(func() (remediated int) {
   960  			machines := &clusterv1.MachineList{}
   961  			err := env.List(ctx, machines, client.MatchingLabels{
   962  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
   963  			})
   964  			if err != nil {
   965  				return -1
   966  			}
   967  
   968  			for i := range machines.Items {
   969  				if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil {
   970  					remediated++
   971  				}
   972  			}
   973  			return
   974  		}, timeout, 100*time.Millisecond).Should(Equal(1))
   975  	})
   976  
   977  	t.Run("when a Machine's Node has gone away", func(t *testing.T) {
   978  		// FIXME: Resolve flaky/failing test
   979  		t.Skip("skipping until made stable")
   980  		g := NewWithT(t)
   981  		cluster := createCluster(g, ns.Name)
   982  
   983  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
   984  
   985  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
   986  		defer func(do ...client.Object) {
   987  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
   988  		}(cluster, mhc)
   989  
   990  		// Healthy nodes and machines.
   991  		nodes, machines, cleanup := createMachinesWithNodes(g, cluster,
   992  			count(3),
   993  			firstMachineAsControlPlane(),
   994  			createNodeRefForMachine(true),
   995  			nodeStatus(corev1.ConditionTrue),
   996  			machineLabels(mhc.Spec.Selector.MatchLabels),
   997  		)
   998  		defer cleanup()
   999  		targetMachines := make([]string, len(machines))
  1000  		for i, m := range machines {
  1001  			targetMachines[i] = m.Name
  1002  		}
  1003  		sort.Strings(targetMachines)
  1004  
  1005  		// Forcibly remove the last machine's node.
  1006  		g.Eventually(func() bool {
  1007  			nodeToBeRemoved := nodes[2]
  1008  			if err := env.Delete(ctx, nodeToBeRemoved); err != nil {
  1009  				return apierrors.IsNotFound(err)
  1010  			}
  1011  			return apierrors.IsNotFound(env.Get(ctx, util.ObjectKey(nodeToBeRemoved), nodeToBeRemoved))
  1012  		}).Should(BeTrue())
  1013  
  1014  		// Make sure the status matches.
  1015  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1016  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1017  			if err != nil {
  1018  				return nil
  1019  			}
  1020  			return &mhc.Status
  1021  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1022  			ExpectedMachines:    3,
  1023  			CurrentHealthy:      2,
  1024  			RemediationsAllowed: 2,
  1025  			ObservedGeneration:  1,
  1026  			Targets:             targetMachines,
  1027  			Conditions: clusterv1.Conditions{
  1028  				{
  1029  					Type:   clusterv1.RemediationAllowedCondition,
  1030  					Status: corev1.ConditionTrue,
  1031  				},
  1032  			},
  1033  		}))
  1034  
  1035  		// Calculate how many Machines have health check succeeded = false.
  1036  		g.Eventually(func() (unhealthy int) {
  1037  			machines := &clusterv1.MachineList{}
  1038  			err := env.List(ctx, machines, client.MatchingLabels{
  1039  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1040  			})
  1041  			if err != nil {
  1042  				return -1
  1043  			}
  1044  
  1045  			for i := range machines.Items {
  1046  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
  1047  					unhealthy++
  1048  				}
  1049  			}
  1050  			return
  1051  		}).Should(Equal(1))
  1052  
  1053  		// Calculate how many Machines have been remediated.
  1054  		g.Eventually(func() (remediated int) {
  1055  			machines := &clusterv1.MachineList{}
  1056  			err := env.List(ctx, machines, client.MatchingLabels{
  1057  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1058  			})
  1059  			if err != nil {
  1060  				return -1
  1061  			}
  1062  
  1063  			for i := range machines.Items {
  1064  				if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil {
  1065  					remediated++
  1066  				}
  1067  			}
  1068  			return
  1069  		}, timeout, 100*time.Millisecond).Should(Equal(1))
  1070  	})
  1071  
  1072  	t.Run("should react when a Node transitions to unhealthy", func(t *testing.T) {
  1073  		g := NewWithT(t)
  1074  		cluster := createCluster(g, ns.Name)
  1075  
  1076  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
  1077  
  1078  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
  1079  		defer func(do ...client.Object) {
  1080  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
  1081  		}(cluster, mhc)
  1082  
  1083  		// Healthy nodes and machines.
  1084  		nodes, machines, cleanup := createMachinesWithNodes(g, cluster,
  1085  			count(1),
  1086  			firstMachineAsControlPlane(),
  1087  			createNodeRefForMachine(true),
  1088  			nodeStatus(corev1.ConditionTrue),
  1089  			machineLabels(mhc.Spec.Selector.MatchLabels),
  1090  		)
  1091  		defer cleanup()
  1092  		targetMachines := make([]string, len(machines))
  1093  		for i, m := range machines {
  1094  			targetMachines[i] = m.Name
  1095  		}
  1096  		sort.Strings(targetMachines)
  1097  
  1098  		// Make sure the status matches.
  1099  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1100  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1101  			if err != nil {
  1102  				return nil
  1103  			}
  1104  			return &mhc.Status
  1105  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1106  			ExpectedMachines:    1,
  1107  			CurrentHealthy:      1,
  1108  			RemediationsAllowed: 1,
  1109  			ObservedGeneration:  1,
  1110  			Targets:             targetMachines,
  1111  			Conditions: clusterv1.Conditions{
  1112  				{
  1113  					Type:   clusterv1.RemediationAllowedCondition,
  1114  					Status: corev1.ConditionTrue,
  1115  				},
  1116  			},
  1117  		}))
  1118  
  1119  		// Transition the node to unhealthy.
  1120  		node := nodes[0]
  1121  		nodePatch := client.MergeFrom(node.DeepCopy())
  1122  		node.Status.Conditions = []corev1.NodeCondition{
  1123  			{
  1124  				Type:               corev1.NodeReady,
  1125  				Status:             corev1.ConditionUnknown,
  1126  				LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)),
  1127  			},
  1128  		}
  1129  		g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed())
  1130  
  1131  		// Make sure the status matches.
  1132  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1133  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1134  			if err != nil {
  1135  				return nil
  1136  			}
  1137  			return &mhc.Status
  1138  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1139  			ExpectedMachines:   1,
  1140  			CurrentHealthy:     0,
  1141  			ObservedGeneration: 1,
  1142  			Targets:            targetMachines,
  1143  			Conditions: clusterv1.Conditions{
  1144  				{
  1145  					Type:   clusterv1.RemediationAllowedCondition,
  1146  					Status: corev1.ConditionTrue,
  1147  				},
  1148  			},
  1149  		}))
  1150  
  1151  		// Calculate how many Machines have health check succeeded = false.
  1152  		g.Eventually(func() (unhealthy int) {
  1153  			machines := &clusterv1.MachineList{}
  1154  			err := env.List(ctx, machines, client.MatchingLabels{
  1155  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1156  			})
  1157  			if err != nil {
  1158  				return -1
  1159  			}
  1160  
  1161  			for i := range machines.Items {
  1162  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
  1163  					unhealthy++
  1164  				}
  1165  			}
  1166  			return
  1167  		}).Should(Equal(1))
  1168  
  1169  		// Calculate how many Machines have been marked for remediation
  1170  		g.Eventually(func() (remediated int) {
  1171  			machines := &clusterv1.MachineList{}
  1172  			err := env.List(ctx, machines, client.MatchingLabels{
  1173  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1174  			})
  1175  			if err != nil {
  1176  				return -1
  1177  			}
  1178  
  1179  			for i := range machines.Items {
  1180  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) {
  1181  					remediated++
  1182  				}
  1183  			}
  1184  			return
  1185  		}).Should(Equal(1))
  1186  	})
  1187  
  1188  	t.Run("when in a MachineSet, unhealthy machines should be deleted", func(t *testing.T) {
  1189  		g := NewWithT(t)
  1190  		cluster := createCluster(g, ns.Name)
  1191  
  1192  		// Create 1 control plane machine so MHC can proceed
  1193  		_, _, cleanup := createMachinesWithNodes(g, cluster,
  1194  			count(1),
  1195  			firstMachineAsControlPlane(),
  1196  			createNodeRefForMachine(true),
  1197  			nodeStatus(corev1.ConditionTrue),
  1198  		)
  1199  		defer cleanup()
  1200  
  1201  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
  1202  		// Create infrastructure template resource.
  1203  		infraResource := map[string]interface{}{
  1204  			"kind":       "GenericInfrastructureMachine",
  1205  			"apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1",
  1206  			"metadata":   map[string]interface{}{},
  1207  			"spec": map[string]interface{}{
  1208  				"size": "3xlarge",
  1209  			},
  1210  		}
  1211  		infraTmpl := &unstructured.Unstructured{
  1212  			Object: map[string]interface{}{
  1213  				"spec": map[string]interface{}{
  1214  					"template": infraResource,
  1215  				},
  1216  			},
  1217  		}
  1218  		infraTmpl.SetKind("GenericInfrastructureMachineTemplate")
  1219  		infraTmpl.SetAPIVersion("infrastructure.cluster.x-k8s.io/v1beta1")
  1220  		infraTmpl.SetGenerateName("mhc-ms-template-")
  1221  		infraTmpl.SetNamespace(mhc.Namespace)
  1222  
  1223  		g.Expect(env.Create(ctx, infraTmpl)).To(Succeed())
  1224  
  1225  		machineSet := &clusterv1.MachineSet{
  1226  			ObjectMeta: metav1.ObjectMeta{
  1227  				GenerateName: "mhc-ms-",
  1228  				Namespace:    mhc.Namespace,
  1229  			},
  1230  			Spec: clusterv1.MachineSetSpec{
  1231  				ClusterName: cluster.Name,
  1232  				Replicas:    pointer.Int32(1),
  1233  				Selector:    mhc.Spec.Selector,
  1234  				Template: clusterv1.MachineTemplateSpec{
  1235  					ObjectMeta: clusterv1.ObjectMeta{
  1236  						Labels: mhc.Spec.Selector.MatchLabels,
  1237  					},
  1238  					Spec: clusterv1.MachineSpec{
  1239  						ClusterName: cluster.Name,
  1240  						Bootstrap: clusterv1.Bootstrap{
  1241  							DataSecretName: pointer.String("test-data-secret-name"),
  1242  						},
  1243  						InfrastructureRef: corev1.ObjectReference{
  1244  							APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1",
  1245  							Kind:       "GenericInfrastructureMachineTemplate",
  1246  							Name:       infraTmpl.GetName(),
  1247  						},
  1248  					},
  1249  				},
  1250  			},
  1251  		}
  1252  		g.Expect((&webhooks.MachineSet{}).Default(ctx, machineSet)).Should(Succeed())
  1253  		g.Expect(env.Create(ctx, machineSet)).To(Succeed())
  1254  
  1255  		// Ensure machines have been created.
  1256  		g.Eventually(func() int {
  1257  			machines := &clusterv1.MachineList{}
  1258  			err := env.List(ctx, machines, client.MatchingLabels{
  1259  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1260  			})
  1261  			if err != nil {
  1262  				return -1
  1263  			}
  1264  			return len(machines.Items)
  1265  		}, timeout, 100*time.Millisecond).Should(Equal(1))
  1266  
  1267  		// Create the MachineHealthCheck instance.
  1268  		mhc.Spec.NodeStartupTimeout = &metav1.Duration{Duration: time.Second}
  1269  
  1270  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
  1271  		// defer cleanup for all the objects that have been created
  1272  		defer func(do ...client.Object) {
  1273  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
  1274  		}(cluster, mhc, infraTmpl, machineSet)
  1275  
  1276  		// Pause the MachineSet reconciler to delay the deletion of the
  1277  		// Machine, because the MachineSet controller deletes the Machine when
  1278  		// it is marked unhealthy by MHC.
  1279  		machineSetPatch := client.MergeFrom(machineSet.DeepCopy())
  1280  		machineSet.Annotations = map[string]string{
  1281  			clusterv1.PausedAnnotation: "",
  1282  		}
  1283  		g.Expect(env.Patch(ctx, machineSet, machineSetPatch)).To(Succeed())
  1284  
  1285  		// Calculate how many Machines have health check succeeded = false.
  1286  		g.Eventually(func() (unhealthy int) {
  1287  			machines := &clusterv1.MachineList{}
  1288  			err := env.List(ctx, machines, client.MatchingLabels{
  1289  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1290  			})
  1291  			if err != nil {
  1292  				return -1
  1293  			}
  1294  
  1295  			for i := range machines.Items {
  1296  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
  1297  					unhealthy++
  1298  				}
  1299  			}
  1300  			return
  1301  		}, timeout, 100*time.Millisecond).Should(Equal(1))
  1302  
  1303  		// Calculate how many Machines should be remediated.
  1304  		var unhealthyMachine *clusterv1.Machine
  1305  		g.Eventually(func() (remediated int) {
  1306  			machines := &clusterv1.MachineList{}
  1307  			err := env.List(ctx, machines, client.MatchingLabels{
  1308  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1309  			})
  1310  			if err != nil {
  1311  				return -1
  1312  			}
  1313  
  1314  			for i := range machines.Items {
  1315  				if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil {
  1316  					unhealthyMachine = machines.Items[i].DeepCopy()
  1317  					remediated++
  1318  				}
  1319  			}
  1320  			return
  1321  		}, timeout, 100*time.Millisecond).Should(Equal(1))
  1322  
  1323  		// Unpause the MachineSet reconciler.
  1324  		machineSetPatch = client.MergeFrom(machineSet.DeepCopy())
  1325  		delete(machineSet.Annotations, clusterv1.PausedAnnotation)
  1326  		g.Expect(env.Patch(ctx, machineSet, machineSetPatch)).To(Succeed())
  1327  
  1328  		// Make sure the Machine gets deleted.
  1329  		g.Eventually(func() bool {
  1330  			machine := unhealthyMachine.DeepCopy()
  1331  			err := env.Get(ctx, util.ObjectKey(unhealthyMachine), machine)
  1332  			return apierrors.IsNotFound(err) || !machine.DeletionTimestamp.IsZero()
  1333  		}, timeout, 100*time.Millisecond).Should(BeTrue())
  1334  	})
  1335  
  1336  	t.Run("when a machine is paused", func(t *testing.T) {
  1337  		// FIXME: Resolve flaky/failing test
  1338  		t.Skip("skipping until made stable")
  1339  		g := NewWithT(t)
  1340  		cluster := createCluster(g, ns.Name)
  1341  
  1342  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
  1343  
  1344  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
  1345  		defer func(do ...client.Object) {
  1346  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
  1347  		}(cluster, mhc)
  1348  
  1349  		// Healthy nodes and machines.
  1350  		nodes, machines, cleanup := createMachinesWithNodes(g, cluster,
  1351  			count(1),
  1352  			firstMachineAsControlPlane(),
  1353  			createNodeRefForMachine(true),
  1354  			nodeStatus(corev1.ConditionTrue),
  1355  			machineLabels(mhc.Spec.Selector.MatchLabels),
  1356  		)
  1357  		defer cleanup()
  1358  		targetMachines := make([]string, len(machines))
  1359  		for i, m := range machines {
  1360  			targetMachines[i] = m.Name
  1361  		}
  1362  		sort.Strings(targetMachines)
  1363  
  1364  		// Make sure the status matches.
  1365  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1366  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1367  			if err != nil {
  1368  				return nil
  1369  			}
  1370  			return &mhc.Status
  1371  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1372  			ExpectedMachines:   1,
  1373  			CurrentHealthy:     1,
  1374  			ObservedGeneration: 1,
  1375  			Targets:            targetMachines,
  1376  			Conditions: clusterv1.Conditions{
  1377  				{
  1378  					Type:   clusterv1.RemediationAllowedCondition,
  1379  					Status: corev1.ConditionTrue,
  1380  				},
  1381  			},
  1382  		}))
  1383  
  1384  		// Pause the machine
  1385  		machinePatch := client.MergeFrom(machines[0].DeepCopy())
  1386  		machines[0].Annotations = map[string]string{
  1387  			clusterv1.PausedAnnotation: "",
  1388  		}
  1389  		g.Expect(env.Patch(ctx, machines[0], machinePatch)).To(Succeed())
  1390  
  1391  		// Transition the node to unhealthy.
  1392  		node := nodes[0]
  1393  		nodePatch := client.MergeFrom(node.DeepCopy())
  1394  		node.Status.Conditions = []corev1.NodeCondition{
  1395  			{
  1396  				Type:               corev1.NodeReady,
  1397  				Status:             corev1.ConditionUnknown,
  1398  				LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)),
  1399  			},
  1400  		}
  1401  		g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed())
  1402  
  1403  		// Make sure the status matches.
  1404  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1405  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1406  			if err != nil {
  1407  				return nil
  1408  			}
  1409  			return &mhc.Status
  1410  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1411  			ExpectedMachines:    1,
  1412  			CurrentHealthy:      0,
  1413  			RemediationsAllowed: 0,
  1414  			ObservedGeneration:  1,
  1415  			Targets:             targetMachines,
  1416  			Conditions: clusterv1.Conditions{
  1417  				{
  1418  					Type:   clusterv1.RemediationAllowedCondition,
  1419  					Status: corev1.ConditionTrue,
  1420  				},
  1421  			},
  1422  		}))
  1423  
  1424  		// Calculate how many Machines have health check succeeded = false.
  1425  		g.Eventually(func() (unhealthy int) {
  1426  			machines := &clusterv1.MachineList{}
  1427  			err := env.List(ctx, machines, client.MatchingLabels{
  1428  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1429  			})
  1430  			if err != nil {
  1431  				return -1
  1432  			}
  1433  
  1434  			for i := range machines.Items {
  1435  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
  1436  					unhealthy++
  1437  				}
  1438  			}
  1439  			return
  1440  		}).Should(Equal(1))
  1441  
  1442  		// Calculate how many Machines have been remediated.
  1443  		g.Eventually(func() (remediated int) {
  1444  			machines := &clusterv1.MachineList{}
  1445  			err := env.List(ctx, machines, client.MatchingLabels{
  1446  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1447  			})
  1448  			if err != nil {
  1449  				return -1
  1450  			}
  1451  
  1452  			for i := range machines.Items {
  1453  				if conditions.Get(&machines.Items[i], clusterv1.MachineOwnerRemediatedCondition) != nil {
  1454  					remediated++
  1455  				}
  1456  			}
  1457  			return
  1458  		}).Should(Equal(0))
  1459  	})
  1460  
  1461  	t.Run("When remediationTemplate is set and node transitions to unhealthy, new Remediation Request should be created", func(t *testing.T) {
  1462  		g := NewWithT(t)
  1463  		cluster := createCluster(g, ns.Name)
  1464  
  1465  		// Create remediation template resource.
  1466  		infraRemediationResource := map[string]interface{}{
  1467  			"kind":       "GenericExternalRemediation",
  1468  			"apiVersion": builder.RemediationGroupVersion.String(),
  1469  			"metadata":   map[string]interface{}{},
  1470  			"spec": map[string]interface{}{
  1471  				"size": "3xlarge",
  1472  			},
  1473  		}
  1474  		infraRemediationTmpl := &unstructured.Unstructured{
  1475  			Object: map[string]interface{}{
  1476  				"spec": map[string]interface{}{
  1477  					"template": infraRemediationResource,
  1478  				},
  1479  			},
  1480  		}
  1481  		infraRemediationTmpl.SetKind("GenericExternalRemediationTemplate")
  1482  		infraRemediationTmpl.SetAPIVersion(builder.RemediationGroupVersion.String())
  1483  		infraRemediationTmpl.SetGenerateName("remediation-template-name-")
  1484  		infraRemediationTmpl.SetNamespace(cluster.Namespace)
  1485  		g.Expect(env.Create(ctx, infraRemediationTmpl)).To(Succeed())
  1486  
  1487  		remediationTemplate := &corev1.ObjectReference{
  1488  			APIVersion: builder.RemediationGroupVersion.String(),
  1489  			Kind:       "GenericExternalRemediationTemplate",
  1490  			Name:       infraRemediationTmpl.GetName(),
  1491  		}
  1492  
  1493  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
  1494  		mhc.Spec.RemediationTemplate = remediationTemplate
  1495  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
  1496  		defer func(do ...client.Object) {
  1497  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
  1498  		}(cluster, mhc, infraRemediationTmpl)
  1499  
  1500  		// Healthy nodes and machines.
  1501  		nodes, machines, cleanup := createMachinesWithNodes(g, cluster,
  1502  			count(1),
  1503  			firstMachineAsControlPlane(),
  1504  			createNodeRefForMachine(true),
  1505  			nodeStatus(corev1.ConditionTrue),
  1506  			machineLabels(mhc.Spec.Selector.MatchLabels),
  1507  		)
  1508  		defer cleanup()
  1509  		targetMachines := make([]string, len(machines))
  1510  		for i, m := range machines {
  1511  			targetMachines[i] = m.Name
  1512  		}
  1513  		sort.Strings(targetMachines)
  1514  
  1515  		// Make sure the status matches.
  1516  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1517  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1518  			if err != nil {
  1519  				return nil
  1520  			}
  1521  			return &mhc.Status
  1522  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1523  			ExpectedMachines:    1,
  1524  			CurrentHealthy:      1,
  1525  			RemediationsAllowed: 1,
  1526  			ObservedGeneration:  1,
  1527  			Targets:             targetMachines,
  1528  			Conditions: clusterv1.Conditions{
  1529  				{
  1530  					Type:   clusterv1.RemediationAllowedCondition,
  1531  					Status: corev1.ConditionTrue,
  1532  				},
  1533  			},
  1534  		}))
  1535  
  1536  		// Transition the node to unhealthy.
  1537  		node := nodes[0]
  1538  		nodePatch := client.MergeFrom(node.DeepCopy())
  1539  		node.Status.Conditions = []corev1.NodeCondition{
  1540  			{
  1541  				Type:               corev1.NodeReady,
  1542  				Status:             corev1.ConditionUnknown,
  1543  				LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)),
  1544  			},
  1545  		}
  1546  		g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed())
  1547  
  1548  		// Make sure the status matches.
  1549  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1550  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1551  			if err != nil {
  1552  				return nil
  1553  			}
  1554  			return &mhc.Status
  1555  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1556  			ExpectedMachines:    1,
  1557  			CurrentHealthy:      0,
  1558  			RemediationsAllowed: 0,
  1559  			ObservedGeneration:  1,
  1560  			Targets:             targetMachines,
  1561  			Conditions: clusterv1.Conditions{
  1562  				{
  1563  					Type:   clusterv1.RemediationAllowedCondition,
  1564  					Status: corev1.ConditionTrue,
  1565  				},
  1566  			},
  1567  		}))
  1568  
  1569  		// Calculate how many Machines have health check succeeded = false.
  1570  		g.Eventually(func() (unhealthy int) {
  1571  			machines := &clusterv1.MachineList{}
  1572  			err := env.List(ctx, machines, client.MatchingLabels{
  1573  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1574  			})
  1575  			if err != nil {
  1576  				return -1
  1577  			}
  1578  
  1579  			for i := range machines.Items {
  1580  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
  1581  					unhealthy++
  1582  				}
  1583  			}
  1584  			return
  1585  		}).Should(Equal(1))
  1586  
  1587  		ref := corev1.ObjectReference{
  1588  			APIVersion: builder.RemediationGroupVersion.String(),
  1589  			Kind:       "GenericExternalRemediation",
  1590  		}
  1591  
  1592  		obj := util.ObjectReferenceToUnstructured(ref)
  1593  		// Make sure the Remeditaion Request is created.
  1594  		g.Eventually(func() *unstructured.Unstructured {
  1595  			key := client.ObjectKey{
  1596  				Namespace: machines[0].Namespace,
  1597  				Name:      machines[0].Name,
  1598  			}
  1599  			err := env.Get(ctx, key, obj)
  1600  			if err != nil {
  1601  				return nil
  1602  			}
  1603  			return obj
  1604  		}, timeout, 100*time.Millisecond).ShouldNot(BeNil())
  1605  		g.Expect(obj.GetOwnerReferences()).To(HaveLen(1))
  1606  		g.Expect(obj.GetOwnerReferences()[0].Name).To(Equal(machines[0].Name))
  1607  	})
  1608  
  1609  	t.Run("When remediationTemplate is set and node transitions back to healthy, new Remediation Request should be deleted", func(t *testing.T) {
  1610  		g := NewWithT(t)
  1611  		cluster := createCluster(g, ns.Name)
  1612  
  1613  		// Create remediation template resource.
  1614  		infraRemediationResource := map[string]interface{}{
  1615  			"kind":       "GenericExternalRemediation",
  1616  			"apiVersion": builder.RemediationGroupVersion.String(),
  1617  			"metadata":   map[string]interface{}{},
  1618  			"spec": map[string]interface{}{
  1619  				"size": "3xlarge",
  1620  			},
  1621  		}
  1622  		infraRemediationTmpl := &unstructured.Unstructured{
  1623  			Object: map[string]interface{}{
  1624  				"spec": map[string]interface{}{
  1625  					"template": infraRemediationResource,
  1626  				},
  1627  			},
  1628  		}
  1629  		infraRemediationTmpl.SetKind("GenericExternalRemediationTemplate")
  1630  		infraRemediationTmpl.SetAPIVersion(builder.RemediationGroupVersion.String())
  1631  		infraRemediationTmpl.SetGenerateName("remediation-template-name-")
  1632  		infraRemediationTmpl.SetNamespace(cluster.Namespace)
  1633  		g.Expect(env.Create(ctx, infraRemediationTmpl)).To(Succeed())
  1634  
  1635  		remediationTemplate := &corev1.ObjectReference{
  1636  			APIVersion: builder.RemediationGroupVersion.String(),
  1637  			Kind:       "GenericExternalRemediationTemplate",
  1638  			Name:       infraRemediationTmpl.GetName(),
  1639  		}
  1640  
  1641  		mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name)
  1642  		mhc.Spec.RemediationTemplate = remediationTemplate
  1643  		g.Expect(env.Create(ctx, mhc)).To(Succeed())
  1644  		defer func(do ...client.Object) {
  1645  			g.Expect(env.Cleanup(ctx, do...)).To(Succeed())
  1646  		}(cluster, mhc, infraRemediationTmpl)
  1647  
  1648  		// Healthy nodes and machines.
  1649  		nodes, machines, cleanup := createMachinesWithNodes(g, cluster,
  1650  			count(1),
  1651  			firstMachineAsControlPlane(),
  1652  			createNodeRefForMachine(true),
  1653  			nodeStatus(corev1.ConditionTrue),
  1654  			machineLabels(mhc.Spec.Selector.MatchLabels),
  1655  		)
  1656  		defer cleanup()
  1657  		targetMachines := make([]string, len(machines))
  1658  		for i, m := range machines {
  1659  			targetMachines[i] = m.Name
  1660  		}
  1661  		sort.Strings(targetMachines)
  1662  
  1663  		// Make sure the status matches.
  1664  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1665  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1666  			if err != nil {
  1667  				return nil
  1668  			}
  1669  			return &mhc.Status
  1670  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1671  			ExpectedMachines:    1,
  1672  			CurrentHealthy:      1,
  1673  			RemediationsAllowed: 1,
  1674  			ObservedGeneration:  1,
  1675  			Targets:             targetMachines,
  1676  			Conditions: clusterv1.Conditions{
  1677  				{
  1678  					Type:   clusterv1.RemediationAllowedCondition,
  1679  					Status: corev1.ConditionTrue,
  1680  				},
  1681  			},
  1682  		}))
  1683  
  1684  		// Transition the node to unhealthy.
  1685  		node := nodes[0]
  1686  		nodePatch := client.MergeFrom(node.DeepCopy())
  1687  		node.Status.Conditions = []corev1.NodeCondition{
  1688  			{
  1689  				Type:               corev1.NodeReady,
  1690  				Status:             corev1.ConditionUnknown,
  1691  				LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)),
  1692  			},
  1693  		}
  1694  		g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed())
  1695  
  1696  		// Make sure the status matches.
  1697  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1698  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1699  			if err != nil {
  1700  				return nil
  1701  			}
  1702  			return &mhc.Status
  1703  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1704  			ExpectedMachines:    1,
  1705  			CurrentHealthy:      0,
  1706  			RemediationsAllowed: 0,
  1707  			ObservedGeneration:  1,
  1708  			Targets:             targetMachines,
  1709  			Conditions: clusterv1.Conditions{
  1710  				{
  1711  					Type:   clusterv1.RemediationAllowedCondition,
  1712  					Status: corev1.ConditionTrue,
  1713  				},
  1714  			},
  1715  		}))
  1716  
  1717  		// Calculate how many Machines have health check succeeded = false.
  1718  		g.Eventually(func() (unhealthy int) {
  1719  			machines := &clusterv1.MachineList{}
  1720  			err := env.List(ctx, machines, client.MatchingLabels{
  1721  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1722  			})
  1723  			if err != nil {
  1724  				return -1
  1725  			}
  1726  
  1727  			for i := range machines.Items {
  1728  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
  1729  					unhealthy++
  1730  				}
  1731  			}
  1732  			return
  1733  		}).Should(Equal(1))
  1734  
  1735  		// Transition the node back to healthy.
  1736  		node = nodes[0]
  1737  		nodePatch = client.MergeFrom(node.DeepCopy())
  1738  		node.Status.Conditions = []corev1.NodeCondition{
  1739  			{
  1740  				Type:               corev1.NodeReady,
  1741  				Status:             corev1.ConditionTrue,
  1742  				LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)),
  1743  			},
  1744  		}
  1745  		g.Expect(env.Status().Patch(ctx, node, nodePatch)).To(Succeed())
  1746  
  1747  		// Make sure the status matches.
  1748  		g.Eventually(func() *clusterv1.MachineHealthCheckStatus {
  1749  			err := env.Get(ctx, util.ObjectKey(mhc), mhc)
  1750  			if err != nil {
  1751  				return nil
  1752  			}
  1753  			return &mhc.Status
  1754  		}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
  1755  			ExpectedMachines:    1,
  1756  			CurrentHealthy:      1,
  1757  			RemediationsAllowed: 1,
  1758  			ObservedGeneration:  1,
  1759  			Targets:             targetMachines,
  1760  			Conditions: clusterv1.Conditions{
  1761  				{
  1762  					Type:   clusterv1.RemediationAllowedCondition,
  1763  					Status: corev1.ConditionTrue,
  1764  				},
  1765  			},
  1766  		}))
  1767  
  1768  		// Calculate how many Machines have health check succeeded = false.
  1769  		g.Eventually(func() (unhealthy int) {
  1770  			machines := &clusterv1.MachineList{}
  1771  			err := env.List(ctx, machines, client.MatchingLabels{
  1772  				"selector": mhc.Spec.Selector.MatchLabels["selector"],
  1773  			})
  1774  			if err != nil {
  1775  				return -1
  1776  			}
  1777  
  1778  			for i := range machines.Items {
  1779  				if conditions.IsFalse(&machines.Items[i], clusterv1.MachineHealthCheckSucceededCondition) {
  1780  					unhealthy++
  1781  				}
  1782  			}
  1783  			return
  1784  		}).Should(Equal(0))
  1785  
  1786  		ref := corev1.ObjectReference{
  1787  			APIVersion: builder.RemediationGroupVersion.String(),
  1788  			Kind:       "GenericExternalRemediation",
  1789  		}
  1790  
  1791  		obj := util.ObjectReferenceToUnstructured(ref)
  1792  		// Make sure the Remediation Request is deleted.
  1793  		g.Eventually(func() *unstructured.Unstructured {
  1794  			key := client.ObjectKey{
  1795  				Namespace: machines[0].Namespace,
  1796  				Name:      machines[0].Name,
  1797  			}
  1798  			err := env.Get(ctx, key, obj)
  1799  			if err != nil {
  1800  				return nil
  1801  			}
  1802  			return obj
  1803  		}, timeout, 100*time.Millisecond).Should(BeNil())
  1804  	})
  1805  }
  1806  
  1807  func TestClusterToMachineHealthCheck(t *testing.T) {
  1808  	fakeClient := fake.NewClientBuilder().Build()
  1809  
  1810  	r := &Reconciler{
  1811  		Client: fakeClient,
  1812  	}
  1813  
  1814  	namespace := metav1.NamespaceDefault
  1815  	clusterName := testClusterName
  1816  	labels := make(map[string]string)
  1817  
  1818  	mhc1 := newMachineHealthCheckWithLabels("mhc1", namespace, clusterName, labels)
  1819  	mhc1Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc1.Namespace, Name: mhc1.Name}}
  1820  	mhc2 := newMachineHealthCheckWithLabels("mhc2", namespace, clusterName, labels)
  1821  	mhc2Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc2.Namespace, Name: mhc2.Name}}
  1822  	mhc3 := newMachineHealthCheckWithLabels("mhc3", namespace, "othercluster", labels)
  1823  	mhc4 := newMachineHealthCheckWithLabels("mhc4", "othernamespace", clusterName, labels)
  1824  	cluster1 := &clusterv1.Cluster{
  1825  		ObjectMeta: metav1.ObjectMeta{
  1826  			Name:      clusterName,
  1827  			Namespace: namespace,
  1828  		},
  1829  	}
  1830  
  1831  	testCases := []struct {
  1832  		name     string
  1833  		toCreate []clusterv1.MachineHealthCheck
  1834  		object   client.Object
  1835  		expected []reconcile.Request
  1836  	}{
  1837  		{
  1838  			name:     "when a MachineHealthCheck exists for the Cluster in the same namespace",
  1839  			toCreate: []clusterv1.MachineHealthCheck{*mhc1},
  1840  			object:   cluster1,
  1841  			expected: []reconcile.Request{mhc1Req},
  1842  		},
  1843  		{
  1844  			name:     "when 2 MachineHealthChecks exists for the Cluster in the same namespace",
  1845  			toCreate: []clusterv1.MachineHealthCheck{*mhc1, *mhc2},
  1846  			object:   cluster1,
  1847  			expected: []reconcile.Request{mhc1Req, mhc2Req},
  1848  		},
  1849  		{
  1850  			name:     "when a MachineHealthCheck exists for another Cluster in the same namespace",
  1851  			toCreate: []clusterv1.MachineHealthCheck{*mhc3},
  1852  			object:   cluster1,
  1853  			expected: []reconcile.Request{},
  1854  		},
  1855  		{
  1856  			name:     "when a MachineHealthCheck exists for another Cluster in another namespace",
  1857  			toCreate: []clusterv1.MachineHealthCheck{*mhc4},
  1858  			object:   cluster1,
  1859  			expected: []reconcile.Request{},
  1860  		},
  1861  	}
  1862  
  1863  	for _, tc := range testCases {
  1864  		t.Run(tc.name, func(t *testing.T) {
  1865  			gs := NewWithT(t)
  1866  
  1867  			for _, obj := range tc.toCreate {
  1868  				o := obj
  1869  				gs.Expect(r.Client.Create(ctx, &o)).To(Succeed())
  1870  				defer func() {
  1871  					gs.Expect(r.Client.Delete(ctx, &o)).To(Succeed())
  1872  				}()
  1873  				// Check the cache is populated
  1874  				getObj := func() error {
  1875  					return r.Client.Get(ctx, util.ObjectKey(&o), &clusterv1.MachineHealthCheck{})
  1876  				}
  1877  				gs.Eventually(getObj).Should(Succeed())
  1878  			}
  1879  
  1880  			got := r.clusterToMachineHealthCheck(ctx, tc.object)
  1881  			gs.Expect(got).To(ConsistOf(tc.expected))
  1882  		})
  1883  	}
  1884  }
  1885  
  1886  func TestMachineToMachineHealthCheck(t *testing.T) {
  1887  	fakeClient := fake.NewClientBuilder().Build()
  1888  
  1889  	r := &Reconciler{
  1890  		Client: fakeClient,
  1891  	}
  1892  
  1893  	namespace := metav1.NamespaceDefault
  1894  	clusterName := testClusterName
  1895  	nodeName := "node1"
  1896  	labels := map[string]string{"cluster": "foo", "nodepool": "bar"}
  1897  
  1898  	mhc1 := newMachineHealthCheckWithLabels("mhc1", namespace, clusterName, labels)
  1899  	mhc1Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc1.Namespace, Name: mhc1.Name}}
  1900  	mhc2 := newMachineHealthCheckWithLabels("mhc2", namespace, clusterName, labels)
  1901  	mhc2Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc2.Namespace, Name: mhc2.Name}}
  1902  	mhc3 := newMachineHealthCheckWithLabels("mhc3", namespace, clusterName, map[string]string{"cluster": "foo", "nodepool": "other"})
  1903  	mhc4 := newMachineHealthCheckWithLabels("mhc4", "othernamespace", clusterName, labels)
  1904  	machine1 := newTestMachine("machine1", namespace, clusterName, nodeName, labels)
  1905  
  1906  	testCases := []struct {
  1907  		name     string
  1908  		toCreate []clusterv1.MachineHealthCheck
  1909  		object   client.Object
  1910  		expected []reconcile.Request
  1911  	}{
  1912  		{
  1913  			name:     "when a MachineHealthCheck matches labels for the Machine in the same namespace",
  1914  			toCreate: []clusterv1.MachineHealthCheck{*mhc1},
  1915  			object:   machine1,
  1916  			expected: []reconcile.Request{mhc1Req},
  1917  		},
  1918  		{
  1919  			name:     "when 2 MachineHealthChecks match labels for the Machine in the same namespace",
  1920  			toCreate: []clusterv1.MachineHealthCheck{*mhc1, *mhc2},
  1921  			object:   machine1,
  1922  			expected: []reconcile.Request{mhc1Req, mhc2Req},
  1923  		},
  1924  		{
  1925  			name:     "when a MachineHealthCheck does not match labels for the Machine in the same namespace",
  1926  			toCreate: []clusterv1.MachineHealthCheck{*mhc3},
  1927  			object:   machine1,
  1928  			expected: []reconcile.Request{},
  1929  		},
  1930  		{
  1931  			name:     "when a MachineHealthCheck matches labels for the Machine in another namespace",
  1932  			toCreate: []clusterv1.MachineHealthCheck{*mhc4},
  1933  			object:   machine1,
  1934  			expected: []reconcile.Request{},
  1935  		},
  1936  	}
  1937  
  1938  	for _, tc := range testCases {
  1939  		t.Run(tc.name, func(t *testing.T) {
  1940  			gs := NewWithT(t)
  1941  
  1942  			for _, obj := range tc.toCreate {
  1943  				o := obj
  1944  				gs.Expect(r.Client.Create(ctx, &o)).To(Succeed())
  1945  				defer func() {
  1946  					gs.Expect(r.Client.Delete(ctx, &o)).To(Succeed())
  1947  				}()
  1948  				// Check the cache is populated
  1949  				getObj := func() error {
  1950  					return r.Client.Get(ctx, util.ObjectKey(&o), &clusterv1.MachineHealthCheck{})
  1951  				}
  1952  				gs.Eventually(getObj).Should(Succeed())
  1953  			}
  1954  
  1955  			got := r.machineToMachineHealthCheck(ctx, tc.object)
  1956  			gs.Expect(got).To(ConsistOf(tc.expected))
  1957  		})
  1958  	}
  1959  }
  1960  
  1961  func TestNodeToMachineHealthCheck(t *testing.T) {
  1962  	fakeClient := fake.NewClientBuilder().
  1963  		WithIndex(&clusterv1.Machine{}, index.MachineNodeNameField, index.MachineByNodeName).
  1964  		WithStatusSubresource(&clusterv1.MachineHealthCheck{}, &clusterv1.Machine{}).
  1965  		Build()
  1966  
  1967  	r := &Reconciler{
  1968  		Client: fakeClient,
  1969  	}
  1970  
  1971  	namespace := metav1.NamespaceDefault
  1972  	clusterName := testClusterName
  1973  	nodeName := "node1"
  1974  	labels := map[string]string{"cluster": "foo", "nodepool": "bar"}
  1975  
  1976  	mhc1 := newMachineHealthCheckWithLabels("mhc1", namespace, clusterName, labels)
  1977  	mhc1Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc1.Namespace, Name: mhc1.Name}}
  1978  	mhc2 := newMachineHealthCheckWithLabels("mhc2", namespace, clusterName, labels)
  1979  	mhc2Req := reconcile.Request{NamespacedName: types.NamespacedName{Namespace: mhc2.Namespace, Name: mhc2.Name}}
  1980  	mhc3 := newMachineHealthCheckWithLabels("mhc3", namespace, "othercluster", labels)
  1981  	mhc4 := newMachineHealthCheckWithLabels("mhc4", "othernamespace", clusterName, labels)
  1982  
  1983  	machine1 := newTestMachine("machine1", namespace, clusterName, nodeName, labels)
  1984  	machine2 := newTestMachine("machine2", namespace, clusterName, nodeName, labels)
  1985  
  1986  	node1 := &corev1.Node{
  1987  		ObjectMeta: metav1.ObjectMeta{
  1988  			Name: nodeName,
  1989  		},
  1990  	}
  1991  
  1992  	testCases := []struct {
  1993  		name        string
  1994  		mhcToCreate []clusterv1.MachineHealthCheck
  1995  		mToCreate   []clusterv1.Machine
  1996  		object      client.Object
  1997  		expected    []reconcile.Request
  1998  	}{
  1999  		{
  2000  			name:        "when no Machine exists for the Node",
  2001  			mhcToCreate: []clusterv1.MachineHealthCheck{*mhc1},
  2002  			mToCreate:   []clusterv1.Machine{},
  2003  			object:      node1,
  2004  			expected:    []reconcile.Request{},
  2005  		},
  2006  		{
  2007  			name:        "when two Machines exist for the Node",
  2008  			mhcToCreate: []clusterv1.MachineHealthCheck{*mhc1},
  2009  			mToCreate:   []clusterv1.Machine{*machine1, *machine2},
  2010  			object:      node1,
  2011  			expected:    []reconcile.Request{},
  2012  		},
  2013  		{
  2014  			name:        "when no MachineHealthCheck exists for the Node in the Machine's namespace",
  2015  			mhcToCreate: []clusterv1.MachineHealthCheck{*mhc4},
  2016  			mToCreate:   []clusterv1.Machine{*machine1},
  2017  			object:      node1,
  2018  			expected:    []reconcile.Request{},
  2019  		},
  2020  		{
  2021  			name:        "when a MachineHealthCheck exists for the Node in the Machine's namespace",
  2022  			mhcToCreate: []clusterv1.MachineHealthCheck{*mhc1},
  2023  			mToCreate:   []clusterv1.Machine{*machine1},
  2024  			object:      node1,
  2025  			expected:    []reconcile.Request{mhc1Req},
  2026  		},
  2027  		{
  2028  			name:        "when two MachineHealthChecks exist for the Node in the Machine's namespace",
  2029  			mhcToCreate: []clusterv1.MachineHealthCheck{*mhc1, *mhc2},
  2030  			mToCreate:   []clusterv1.Machine{*machine1},
  2031  			object:      node1,
  2032  			expected:    []reconcile.Request{mhc1Req, mhc2Req},
  2033  		},
  2034  		{
  2035  			name:        "when a MachineHealthCheck exists for the Node, but not in the Machine's cluster",
  2036  			mhcToCreate: []clusterv1.MachineHealthCheck{*mhc3},
  2037  			mToCreate:   []clusterv1.Machine{*machine1},
  2038  			object:      node1,
  2039  			expected:    []reconcile.Request{},
  2040  		},
  2041  	}
  2042  
  2043  	for _, tc := range testCases {
  2044  		t.Run(tc.name, func(t *testing.T) {
  2045  			gs := NewWithT(t)
  2046  
  2047  			for _, obj := range tc.mhcToCreate {
  2048  				o := obj
  2049  				gs.Expect(r.Client.Create(ctx, &o)).To(Succeed())
  2050  				defer func() {
  2051  					gs.Expect(r.Client.Delete(ctx, &o)).To(Succeed())
  2052  				}()
  2053  				// Check the cache is populated
  2054  				key := util.ObjectKey(&o)
  2055  				getObj := func() error {
  2056  					return r.Client.Get(ctx, key, &clusterv1.MachineHealthCheck{})
  2057  				}
  2058  				gs.Eventually(getObj).Should(Succeed())
  2059  			}
  2060  			for _, obj := range tc.mToCreate {
  2061  				o := obj
  2062  				gs.Expect(r.Client.Create(ctx, &o)).To(Succeed())
  2063  				defer func() {
  2064  					gs.Expect(r.Client.Delete(ctx, &o)).To(Succeed())
  2065  				}()
  2066  				// Ensure the status is set (required for matching node to machine)
  2067  				o.Status = obj.Status
  2068  				gs.Expect(r.Client.Status().Update(ctx, &o)).To(Succeed())
  2069  
  2070  				// Check the cache is up to date with the status update
  2071  				key := util.ObjectKey(&o)
  2072  				checkStatus := func() clusterv1.MachineStatus {
  2073  					m := &clusterv1.Machine{}
  2074  					err := r.Client.Get(ctx, key, m)
  2075  					if err != nil {
  2076  						return clusterv1.MachineStatus{}
  2077  					}
  2078  					return m.Status
  2079  				}
  2080  				gs.Eventually(checkStatus).Should(BeComparableTo(o.Status))
  2081  			}
  2082  
  2083  			got := r.nodeToMachineHealthCheck(ctx, tc.object)
  2084  			gs.Expect(got).To(ConsistOf(tc.expected))
  2085  		})
  2086  	}
  2087  }
  2088  
  2089  func TestIsAllowedRemediation(t *testing.T) {
  2090  	testCases := []struct {
  2091  		name               string
  2092  		maxUnhealthy       *intstr.IntOrString
  2093  		expectedMachines   int32
  2094  		currentHealthy     int32
  2095  		allowed            bool
  2096  		observedGeneration int64
  2097  	}{
  2098  		{
  2099  			name:             "when maxUnhealthy is not set",
  2100  			maxUnhealthy:     nil,
  2101  			expectedMachines: int32(3),
  2102  			currentHealthy:   int32(0),
  2103  			allowed:          false,
  2104  		},
  2105  		{
  2106  			name:             "when maxUnhealthy is not an int or percentage",
  2107  			maxUnhealthy:     &intstr.IntOrString{Type: intstr.String, StrVal: "abcdef"},
  2108  			expectedMachines: int32(5),
  2109  			currentHealthy:   int32(2),
  2110  			allowed:          false,
  2111  		},
  2112  		{
  2113  			name:             "when maxUnhealthy is an int less than current unhealthy",
  2114  			maxUnhealthy:     &intstr.IntOrString{Type: intstr.Int, IntVal: int32(1)},
  2115  			expectedMachines: int32(3),
  2116  			currentHealthy:   int32(1),
  2117  			allowed:          false,
  2118  		},
  2119  		{
  2120  			name:             "when maxUnhealthy is an int equal to current unhealthy",
  2121  			maxUnhealthy:     &intstr.IntOrString{Type: intstr.Int, IntVal: int32(2)},
  2122  			expectedMachines: int32(3),
  2123  			currentHealthy:   int32(1),
  2124  			allowed:          true,
  2125  		},
  2126  		{
  2127  			name:             "when maxUnhealthy is an int greater than current unhealthy",
  2128  			maxUnhealthy:     &intstr.IntOrString{Type: intstr.Int, IntVal: int32(3)},
  2129  			expectedMachines: int32(3),
  2130  			currentHealthy:   int32(1),
  2131  			allowed:          true,
  2132  		},
  2133  		{
  2134  			name:             "when maxUnhealthy is a percentage less than current unhealthy",
  2135  			maxUnhealthy:     &intstr.IntOrString{Type: intstr.String, StrVal: "50%"},
  2136  			expectedMachines: int32(5),
  2137  			currentHealthy:   int32(2),
  2138  			allowed:          false,
  2139  		},
  2140  		{
  2141  			name:             "when maxUnhealthy is a percentage equal to current unhealthy",
  2142  			maxUnhealthy:     &intstr.IntOrString{Type: intstr.String, StrVal: "60%"},
  2143  			expectedMachines: int32(5),
  2144  			currentHealthy:   int32(2),
  2145  			allowed:          true,
  2146  		},
  2147  		{
  2148  			name:             "when maxUnhealthy is a percentage greater than current unhealthy",
  2149  			maxUnhealthy:     &intstr.IntOrString{Type: intstr.String, StrVal: "70%"},
  2150  			expectedMachines: int32(5),
  2151  			currentHealthy:   int32(2),
  2152  			allowed:          true,
  2153  		},
  2154  	}
  2155  
  2156  	for _, tc := range testCases {
  2157  		t.Run(tc.name, func(t *testing.T) {
  2158  			g := NewWithT(t)
  2159  
  2160  			mhc := &clusterv1.MachineHealthCheck{
  2161  				Spec: clusterv1.MachineHealthCheckSpec{
  2162  					MaxUnhealthy:       tc.maxUnhealthy,
  2163  					NodeStartupTimeout: &metav1.Duration{Duration: 1 * time.Millisecond},
  2164  				},
  2165  				Status: clusterv1.MachineHealthCheckStatus{
  2166  					ExpectedMachines:   tc.expectedMachines,
  2167  					CurrentHealthy:     tc.currentHealthy,
  2168  					ObservedGeneration: tc.observedGeneration,
  2169  				},
  2170  			}
  2171  
  2172  			remediationAllowed, _, _ := isAllowedRemediation(mhc)
  2173  			g.Expect(remediationAllowed).To(Equal(tc.allowed))
  2174  		})
  2175  	}
  2176  }
  2177  
  2178  func TestGetMaxUnhealthy(t *testing.T) {
  2179  	testCases := []struct {
  2180  		name                 string
  2181  		maxUnhealthy         *intstr.IntOrString
  2182  		expectedMaxUnhealthy int
  2183  		actualMachineCount   int32
  2184  		expectedErr          error
  2185  	}{
  2186  		{
  2187  			name:                 "when maxUnhealthy is nil",
  2188  			maxUnhealthy:         nil,
  2189  			expectedMaxUnhealthy: 0,
  2190  			actualMachineCount:   7,
  2191  			expectedErr:          errors.New("spec.maxUnhealthy must be set"),
  2192  		},
  2193  		{
  2194  			name:                 "when maxUnhealthy is not an int or percentage",
  2195  			maxUnhealthy:         &intstr.IntOrString{Type: intstr.String, StrVal: "abcdef"},
  2196  			expectedMaxUnhealthy: 0,
  2197  			actualMachineCount:   3,
  2198  			expectedErr:          errors.New("invalid value for IntOrString: invalid type: string is not a percentage"),
  2199  		},
  2200  		{
  2201  			name:                 "when maxUnhealthy is an int",
  2202  			maxUnhealthy:         &intstr.IntOrString{Type: intstr.Int, IntVal: 3},
  2203  			actualMachineCount:   2,
  2204  			expectedMaxUnhealthy: 3,
  2205  			expectedErr:          nil,
  2206  		},
  2207  		{
  2208  			name:                 "when maxUnhealthy is a 40% (of 5)",
  2209  			maxUnhealthy:         &intstr.IntOrString{Type: intstr.String, StrVal: "40%"},
  2210  			actualMachineCount:   5,
  2211  			expectedMaxUnhealthy: 2,
  2212  			expectedErr:          nil,
  2213  		},
  2214  		{
  2215  			name:                 "when maxUnhealthy is a 60% (of 7)",
  2216  			maxUnhealthy:         &intstr.IntOrString{Type: intstr.String, StrVal: "60%"},
  2217  			actualMachineCount:   7,
  2218  			expectedMaxUnhealthy: 4,
  2219  			expectedErr:          nil,
  2220  		},
  2221  	}
  2222  
  2223  	for _, tc := range testCases {
  2224  		t.Run(tc.name, func(t *testing.T) {
  2225  			g := NewWithT(t)
  2226  
  2227  			mhc := &clusterv1.MachineHealthCheck{
  2228  				Spec: clusterv1.MachineHealthCheckSpec{
  2229  					MaxUnhealthy: tc.maxUnhealthy,
  2230  				},
  2231  				Status: clusterv1.MachineHealthCheckStatus{
  2232  					ExpectedMachines: tc.actualMachineCount,
  2233  				},
  2234  			}
  2235  
  2236  			maxUnhealthy, err := getMaxUnhealthy(mhc)
  2237  			if tc.expectedErr != nil {
  2238  				g.Expect(err).To(MatchError(tc.expectedErr.Error()))
  2239  			} else {
  2240  				g.Expect(err).ToNot(HaveOccurred())
  2241  			}
  2242  			g.Expect(maxUnhealthy).To(Equal(tc.expectedMaxUnhealthy))
  2243  		})
  2244  	}
  2245  }
  2246  
  2247  func ownerReferenceForCluster(ctx context.Context, g *WithT, c *clusterv1.Cluster) metav1.OwnerReference {
  2248  	// Fetch the cluster to populate the UID
  2249  	cc := &clusterv1.Cluster{}
  2250  	g.Expect(env.Get(ctx, util.ObjectKey(c), cc)).To(Succeed())
  2251  
  2252  	return metav1.OwnerReference{
  2253  		APIVersion: clusterv1.GroupVersion.String(),
  2254  		Kind:       "Cluster",
  2255  		Name:       cc.Name,
  2256  		UID:        cc.UID,
  2257  	}
  2258  }
  2259  
  2260  // createCluster creates a Cluster and KubeconfigSecret for that cluster in said namespace.
  2261  func createCluster(g *WithT, namespaceName string) *clusterv1.Cluster {
  2262  	cluster := &clusterv1.Cluster{
  2263  		ObjectMeta: metav1.ObjectMeta{
  2264  			GenerateName: "test-cluster-",
  2265  			Namespace:    namespaceName,
  2266  		},
  2267  	}
  2268  
  2269  	g.Expect(env.Create(ctx, cluster)).To(Succeed())
  2270  
  2271  	// Make sure the cluster is in the cache before proceeding
  2272  	g.Eventually(func() error {
  2273  		var cl clusterv1.Cluster
  2274  		return env.Get(ctx, util.ObjectKey(cluster), &cl)
  2275  	}, timeout, 100*time.Millisecond).Should(Succeed())
  2276  
  2277  	// This is required for MHC to perform checks
  2278  	patchHelper, err := patch.NewHelper(cluster, env.Client)
  2279  	g.Expect(err).ToNot(HaveOccurred())
  2280  	conditions.MarkTrue(cluster, clusterv1.InfrastructureReadyCondition)
  2281  	g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed())
  2282  
  2283  	// Wait for cluster in cache to be updated post-patch
  2284  	g.Eventually(func() bool {
  2285  		err := env.Get(ctx, util.ObjectKey(cluster), cluster)
  2286  		if err != nil {
  2287  			return false
  2288  		}
  2289  
  2290  		return conditions.IsTrue(cluster, clusterv1.InfrastructureReadyCondition)
  2291  	}, timeout, 100*time.Millisecond).Should(BeTrue())
  2292  
  2293  	g.Expect(env.CreateKubeconfigSecret(ctx, cluster)).To(Succeed())
  2294  
  2295  	return cluster
  2296  }
  2297  
  2298  // newRunningMachine creates a Machine object with a Status.Phase == Running.
  2299  func newRunningMachine(c *clusterv1.Cluster, labels map[string]string) *clusterv1.Machine {
  2300  	return &clusterv1.Machine{
  2301  		TypeMeta: metav1.TypeMeta{
  2302  			APIVersion: clusterv1.GroupVersion.String(),
  2303  			Kind:       "Machine",
  2304  		},
  2305  		ObjectMeta: metav1.ObjectMeta{
  2306  			GenerateName: "test-mhc-machine-",
  2307  			Namespace:    c.Namespace,
  2308  			Labels:       labels,
  2309  		},
  2310  		Spec: clusterv1.MachineSpec{
  2311  			ClusterName: c.Name,
  2312  			Bootstrap: clusterv1.Bootstrap{
  2313  				DataSecretName: pointer.String("data-secret-name"),
  2314  			},
  2315  		},
  2316  		Status: clusterv1.MachineStatus{
  2317  			InfrastructureReady: true,
  2318  			BootstrapReady:      true,
  2319  			Phase:               string(clusterv1.MachinePhaseRunning),
  2320  			ObservedGeneration:  1,
  2321  		},
  2322  	}
  2323  }
  2324  
  2325  func newInfraMachine(machine *clusterv1.Machine) (*unstructured.Unstructured, string) {
  2326  	providerID := fmt.Sprintf("test:////%v", uuid.NewUUID())
  2327  	return &unstructured.Unstructured{
  2328  		Object: map[string]interface{}{
  2329  			"apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1",
  2330  			"kind":       "GenericInfrastructureMachine",
  2331  			"metadata": map[string]interface{}{
  2332  				"generateName": "test-mhc-machine-infra-",
  2333  				"namespace":    machine.Namespace,
  2334  			},
  2335  			"spec": map[string]interface{}{
  2336  				"providerID": providerID,
  2337  			},
  2338  		},
  2339  	}, providerID
  2340  }
  2341  
  2342  type machinesWithNodes struct {
  2343  	count                      int
  2344  	nodeStatus                 corev1.ConditionStatus
  2345  	createNodeRefForMachine    bool
  2346  	firstMachineAsControlPlane bool
  2347  	labels                     map[string]string
  2348  	failureReason              string
  2349  	failureMessage             string
  2350  }
  2351  
  2352  type machineWithNodesOption func(m *machinesWithNodes)
  2353  
  2354  func count(n int) machineWithNodesOption {
  2355  	return func(m *machinesWithNodes) {
  2356  		m.count = n
  2357  	}
  2358  }
  2359  
  2360  func firstMachineAsControlPlane() machineWithNodesOption {
  2361  	return func(m *machinesWithNodes) {
  2362  		m.firstMachineAsControlPlane = true
  2363  	}
  2364  }
  2365  
  2366  func nodeStatus(s corev1.ConditionStatus) machineWithNodesOption {
  2367  	return func(m *machinesWithNodes) {
  2368  		m.nodeStatus = s
  2369  	}
  2370  }
  2371  
  2372  func createNodeRefForMachine(b bool) machineWithNodesOption {
  2373  	return func(m *machinesWithNodes) {
  2374  		m.createNodeRefForMachine = b
  2375  	}
  2376  }
  2377  
  2378  func machineLabels(l map[string]string) machineWithNodesOption {
  2379  	return func(m *machinesWithNodes) {
  2380  		m.labels = l
  2381  	}
  2382  }
  2383  
  2384  func machineFailureReason(s string) machineWithNodesOption {
  2385  	return func(m *machinesWithNodes) {
  2386  		m.failureReason = s
  2387  	}
  2388  }
  2389  
  2390  func machineFailureMessage(s string) machineWithNodesOption {
  2391  	return func(m *machinesWithNodes) {
  2392  		m.failureMessage = s
  2393  	}
  2394  }
  2395  
  2396  func createMachinesWithNodes(
  2397  	g *WithT,
  2398  	c *clusterv1.Cluster,
  2399  	opts ...machineWithNodesOption,
  2400  ) ([]*corev1.Node, []*clusterv1.Machine, func()) {
  2401  	o := &machinesWithNodes{}
  2402  	for _, op := range opts {
  2403  		op(o)
  2404  	}
  2405  
  2406  	var (
  2407  		nodes         []*corev1.Node
  2408  		machines      []*clusterv1.Machine
  2409  		infraMachines []*unstructured.Unstructured
  2410  	)
  2411  
  2412  	for i := 0; i < o.count; i++ {
  2413  		machine := newRunningMachine(c, o.labels)
  2414  		if i == 0 && o.firstMachineAsControlPlane {
  2415  			if machine.Labels == nil {
  2416  				machine.Labels = make(map[string]string)
  2417  			}
  2418  			machine.Labels[clusterv1.MachineControlPlaneLabel] = ""
  2419  		}
  2420  		infraMachine, providerID := newInfraMachine(machine)
  2421  		g.Expect(env.Create(ctx, infraMachine)).To(Succeed())
  2422  		infraMachines = append(infraMachines, infraMachine)
  2423  		fmt.Printf("inframachine created: %s\n", infraMachine.GetName())
  2424  		// Patch the status of the InfraMachine and mark it as ready.
  2425  		// NB. Status cannot be set during object creation so we need to patch
  2426  		// it separately.
  2427  		infraMachinePatch := client.MergeFrom(infraMachine.DeepCopy())
  2428  		g.Expect(unstructured.SetNestedField(infraMachine.Object, true, "status", "ready")).To(Succeed())
  2429  		g.Expect(env.Status().Patch(ctx, infraMachine, infraMachinePatch)).To(Succeed())
  2430  
  2431  		machine.Spec.InfrastructureRef = corev1.ObjectReference{
  2432  			APIVersion: infraMachine.GetAPIVersion(),
  2433  			Kind:       infraMachine.GetKind(),
  2434  			Name:       infraMachine.GetName(),
  2435  		}
  2436  		g.Expect(env.Create(ctx, machine)).To(Succeed())
  2437  		fmt.Printf("machine created: %s\n", machine.GetName())
  2438  
  2439  		// Before moving on we want to ensure that the machine has a valid
  2440  		// status. That is, LastUpdated should not be nil.
  2441  		g.Eventually(func() *metav1.Time {
  2442  			k := client.ObjectKey{
  2443  				Name:      machine.GetName(),
  2444  				Namespace: machine.GetNamespace(),
  2445  			}
  2446  			err := env.Get(ctx, k, machine)
  2447  			if err != nil {
  2448  				return nil
  2449  			}
  2450  			return machine.Status.LastUpdated
  2451  		}, timeout, 100*time.Millisecond).ShouldNot(BeNil())
  2452  
  2453  		machinePatchHelper, err := patch.NewHelper(machine, env.Client)
  2454  		g.Expect(err).ToNot(HaveOccurred())
  2455  
  2456  		if o.createNodeRefForMachine {
  2457  			// Create node
  2458  			node := &corev1.Node{
  2459  				ObjectMeta: metav1.ObjectMeta{
  2460  					GenerateName: "test-mhc-node-",
  2461  				},
  2462  				Spec: corev1.NodeSpec{
  2463  					ProviderID: providerID,
  2464  				},
  2465  			}
  2466  
  2467  			g.Expect(env.Create(ctx, node)).To(Succeed())
  2468  			fmt.Printf("node created: %s\n", node.GetName())
  2469  
  2470  			// Patch node status
  2471  			nodePatchHelper, err := patch.NewHelper(node, env.Client)
  2472  			g.Expect(err).ToNot(HaveOccurred())
  2473  
  2474  			node.Status.Conditions = []corev1.NodeCondition{
  2475  				{
  2476  					Type:               corev1.NodeReady,
  2477  					Status:             o.nodeStatus,
  2478  					LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)),
  2479  				},
  2480  			}
  2481  
  2482  			g.Expect(nodePatchHelper.Patch(ctx, node)).To(Succeed())
  2483  
  2484  			nodes = append(nodes, node)
  2485  
  2486  			machine.Status.NodeRef = &corev1.ObjectReference{
  2487  				Name: node.Name,
  2488  			}
  2489  		}
  2490  
  2491  		if o.failureReason != "" {
  2492  			failureReason := capierrors.MachineStatusError(o.failureReason)
  2493  			machine.Status.FailureReason = &failureReason
  2494  		}
  2495  		if o.failureMessage != "" {
  2496  			machine.Status.FailureMessage = pointer.String(o.failureMessage)
  2497  		}
  2498  
  2499  		// Adding one second to ensure there is a difference from the
  2500  		// original time so that the patch works. That is, ensure the
  2501  		// precision isn't lost during conversions.
  2502  		lastUp := metav1.NewTime(machine.Status.LastUpdated.Add(time.Second))
  2503  		machine.Status.LastUpdated = &lastUp
  2504  
  2505  		// Patch the machine to record the status changes
  2506  		g.Expect(machinePatchHelper.Patch(ctx, machine)).To(Succeed())
  2507  
  2508  		machines = append(machines, machine)
  2509  	}
  2510  
  2511  	cleanup := func() {
  2512  		fmt.Println("Cleaning up nodes, machines and infra machines.")
  2513  		for _, n := range nodes {
  2514  			if err := env.Delete(ctx, n); !apierrors.IsNotFound(err) {
  2515  				g.Expect(err).ToNot(HaveOccurred())
  2516  			}
  2517  		}
  2518  		for _, m := range machines {
  2519  			g.Expect(env.Delete(ctx, m)).To(Succeed())
  2520  		}
  2521  		for _, im := range infraMachines {
  2522  			if err := env.Delete(ctx, im); !apierrors.IsNotFound(err) {
  2523  				g.Expect(err).ToNot(HaveOccurred())
  2524  			}
  2525  		}
  2526  	}
  2527  
  2528  	return nodes, machines, cleanup
  2529  }
  2530  
  2531  func newMachineHealthCheckWithLabels(name, namespace, cluster string, labels map[string]string) *clusterv1.MachineHealthCheck {
  2532  	l := make(map[string]string, len(labels))
  2533  	for k, v := range labels {
  2534  		l[k] = v
  2535  	}
  2536  	l[clusterv1.ClusterNameLabel] = cluster
  2537  
  2538  	mhc := newMachineHealthCheck(namespace, cluster)
  2539  	mhc.SetName(name)
  2540  	mhc.Labels = l
  2541  	mhc.Spec.Selector.MatchLabels = l
  2542  
  2543  	return mhc
  2544  }
  2545  
  2546  func newMachineHealthCheck(namespace, clusterName string) *clusterv1.MachineHealthCheck {
  2547  	maxUnhealthy := intstr.FromString("100%")
  2548  	return &clusterv1.MachineHealthCheck{
  2549  		ObjectMeta: metav1.ObjectMeta{
  2550  			GenerateName: "test-mhc-",
  2551  			Namespace:    namespace,
  2552  		},
  2553  		Spec: clusterv1.MachineHealthCheckSpec{
  2554  			ClusterName: clusterName,
  2555  			Selector: metav1.LabelSelector{
  2556  				MatchLabels: map[string]string{
  2557  					"selector": string(uuid.NewUUID()),
  2558  				},
  2559  			},
  2560  			MaxUnhealthy:       &maxUnhealthy,
  2561  			NodeStartupTimeout: &metav1.Duration{Duration: 1 * time.Millisecond},
  2562  			UnhealthyConditions: []clusterv1.UnhealthyCondition{
  2563  				{
  2564  					Type:    corev1.NodeReady,
  2565  					Status:  corev1.ConditionUnknown,
  2566  					Timeout: metav1.Duration{Duration: 5 * time.Minute},
  2567  				},
  2568  			},
  2569  		},
  2570  	}
  2571  }
  2572  
  2573  func TestPatchTargets(t *testing.T) {
  2574  	g := NewWithT(t)
  2575  
  2576  	namespace := metav1.NamespaceDefault
  2577  	clusterName := testClusterName
  2578  	defaultCluster := &clusterv1.Cluster{
  2579  		ObjectMeta: metav1.ObjectMeta{
  2580  			Name:      clusterName,
  2581  			Namespace: namespace,
  2582  		},
  2583  	}
  2584  	labels := map[string]string{"cluster": "foo", "nodepool": "bar"}
  2585  
  2586  	mhc := newMachineHealthCheckWithLabels("mhc", namespace, clusterName, labels)
  2587  	machine1 := newTestMachine("machine1", namespace, clusterName, "nodeName", labels)
  2588  	machine1.ResourceVersion = "999"
  2589  	conditions.MarkTrue(machine1, clusterv1.MachineHealthCheckSucceededCondition)
  2590  	machine2 := machine1.DeepCopy()
  2591  	machine2.Name = "machine2"
  2592  
  2593  	cl := fake.NewClientBuilder().WithObjects(
  2594  		machine1,
  2595  		machine2,
  2596  		mhc,
  2597  	).WithStatusSubresource(&clusterv1.MachineHealthCheck{}, &clusterv1.Machine{}).Build()
  2598  	r := &Reconciler{
  2599  		Client:   cl,
  2600  		recorder: record.NewFakeRecorder(32),
  2601  		Tracker:  remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), cl, scheme.Scheme, client.ObjectKey{Name: clusterName, Namespace: namespace}, "machinehealthcheck-watchClusterNodes"),
  2602  	}
  2603  
  2604  	// To make the patch fail, create patchHelper with a different client.
  2605  	fakeMachine := machine1.DeepCopy()
  2606  	fakeMachine.Name = "fake"
  2607  	patchHelper, _ := patch.NewHelper(fakeMachine, fake.NewClientBuilder().WithObjects(fakeMachine).Build())
  2608  	// healthCheckTarget with fake patchHelper, patch should fail on this target.
  2609  	target1 := healthCheckTarget{
  2610  		MHC:         mhc,
  2611  		Machine:     machine1,
  2612  		patchHelper: patchHelper,
  2613  		Node:        &corev1.Node{},
  2614  	}
  2615  
  2616  	// healthCheckTarget with correct patchHelper.
  2617  	patchHelper2, _ := patch.NewHelper(machine2, cl)
  2618  	target3 := healthCheckTarget{
  2619  		MHC:         mhc,
  2620  		Machine:     machine2,
  2621  		patchHelper: patchHelper2,
  2622  		Node:        &corev1.Node{},
  2623  	}
  2624  
  2625  	// Target with wrong patch helper will fail but the other one will be patched.
  2626  	g.Expect(r.patchUnhealthyTargets(context.TODO(), logr.New(log.NullLogSink{}), []healthCheckTarget{target1, target3}, defaultCluster, mhc)).ToNot(BeEmpty())
  2627  	g.Expect(cl.Get(ctx, client.ObjectKey{Name: machine2.Name, Namespace: machine2.Namespace}, machine2)).ToNot(HaveOccurred())
  2628  	g.Expect(conditions.Get(machine2, clusterv1.MachineOwnerRemediatedCondition).Status).To(Equal(corev1.ConditionFalse))
  2629  
  2630  	// Target with wrong patch helper will fail but the other one will be patched.
  2631  	g.Expect(r.patchHealthyTargets(context.TODO(), logr.New(log.NullLogSink{}), []healthCheckTarget{target1, target3}, mhc)).ToNot(BeEmpty())
  2632  }