sigs.k8s.io/cluster-api@v1.7.1/controllers/remote/cluster_cache_healthcheck_test.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package remote
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net"
    23  	"testing"
    24  	"time"
    25  
    26  	. "github.com/onsi/gomega"
    27  	corev1 "k8s.io/api/core/v1"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/client-go/kubernetes/scheme"
    30  	"k8s.io/client-go/rest"
    31  	ctrl "sigs.k8s.io/controller-runtime"
    32  	"sigs.k8s.io/controller-runtime/pkg/client"
    33  	"sigs.k8s.io/controller-runtime/pkg/manager"
    34  	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
    35  
    36  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    37  	"sigs.k8s.io/cluster-api/util"
    38  	"sigs.k8s.io/cluster-api/util/conditions"
    39  )
    40  
    41  func TestClusterCacheHealthCheck(t *testing.T) {
    42  	t.Run("when health checking clusters", func(t *testing.T) {
    43  		var mgr manager.Manager
    44  		var mgrContext context.Context
    45  		var mgrCancel context.CancelFunc
    46  		var k8sClient client.Client
    47  
    48  		var testClusterKey client.ObjectKey
    49  		var cct *ClusterCacheTracker
    50  		var cc *stoppableCache
    51  
    52  		var testPollInterval = 250 * time.Millisecond
    53  		var testPollTimeout = 1 * time.Second
    54  		var testUnhealthyThreshold = 3
    55  
    56  		setup := func(t *testing.T, g *WithT) *corev1.Namespace {
    57  			t.Helper()
    58  
    59  			t.Log("Setting up a new manager")
    60  			var err error
    61  			mgr, err = manager.New(env.Config, manager.Options{
    62  				Scheme: scheme.Scheme,
    63  				Metrics: metricsserver.Options{
    64  					BindAddress: "0",
    65  				},
    66  			})
    67  			g.Expect(err).ToNot(HaveOccurred())
    68  
    69  			mgrContext, mgrCancel = context.WithCancel(ctx)
    70  			t.Log("Starting the manager")
    71  			go func() {
    72  				g.Expect(mgr.Start(mgrContext)).To(Succeed())
    73  			}()
    74  			<-env.Manager.Elected()
    75  
    76  			k8sClient = mgr.GetClient()
    77  
    78  			t.Log("Setting up a ClusterCacheTracker")
    79  			cct, err = NewClusterCacheTracker(mgr, ClusterCacheTrackerOptions{
    80  				Log:     &ctrl.Log,
    81  				Indexes: []Index{NodeProviderIDIndex},
    82  			})
    83  			g.Expect(err).ToNot(HaveOccurred())
    84  
    85  			t.Log("Creating a namespace for the test")
    86  			ns, err := env.CreateNamespace(ctx, "cluster-cache-health-test")
    87  			g.Expect(err).ToNot(HaveOccurred())
    88  
    89  			t.Log("Creating a test cluster")
    90  			testCluster := &clusterv1.Cluster{
    91  				ObjectMeta: metav1.ObjectMeta{
    92  					Name:      "test-cluster",
    93  					Namespace: ns.GetName(),
    94  				},
    95  			}
    96  			g.Expect(k8sClient.Create(ctx, testCluster)).To(Succeed())
    97  			conditions.MarkTrue(testCluster, clusterv1.ControlPlaneInitializedCondition)
    98  			testCluster.Status.InfrastructureReady = true
    99  			g.Expect(k8sClient.Status().Update(ctx, testCluster)).To(Succeed())
   100  
   101  			t.Log("Creating a test cluster kubeconfig")
   102  			g.Expect(env.CreateKubeconfigSecret(ctx, testCluster)).To(Succeed())
   103  
   104  			testClusterKey = util.ObjectKey(testCluster)
   105  
   106  			_, cancel := context.WithCancel(ctx)
   107  			cc = &stoppableCache{cancelFunc: cancel}
   108  			cct.clusterAccessors[testClusterKey] = &clusterAccessor{cache: cc}
   109  
   110  			return ns
   111  		}
   112  
   113  		teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace) {
   114  			t.Helper()
   115  
   116  			t.Log("Deleting any Secrets")
   117  			g.Expect(cleanupTestSecrets(ctx, k8sClient)).To(Succeed())
   118  			t.Log("Deleting any Clusters")
   119  			g.Expect(cleanupTestClusters(ctx, k8sClient)).To(Succeed())
   120  			t.Log("Deleting Namespace")
   121  			g.Expect(env.Delete(ctx, ns)).To(Succeed())
   122  			t.Log("Stopping the manager")
   123  			cc.cancelFunc()
   124  			mgrCancel()
   125  		}
   126  
   127  		t.Run("with a healthy cluster", func(t *testing.T) {
   128  			g := NewWithT(t)
   129  			ns := setup(t, g)
   130  			defer teardown(t, g, ns)
   131  
   132  			ctx, cancel := context.WithCancel(ctx)
   133  			defer cancel()
   134  
   135  			httpClient, err := rest.HTTPClientFor(env.Config)
   136  			g.Expect(err).ToNot(HaveOccurred())
   137  
   138  			go cct.healthCheckCluster(ctx, &healthCheckInput{
   139  				cluster:            testClusterKey,
   140  				cfg:                env.Config,
   141  				httpClient:         httpClient,
   142  				interval:           testPollInterval,
   143  				requestTimeout:     testPollTimeout,
   144  				unhealthyThreshold: testUnhealthyThreshold,
   145  				path:               "/",
   146  			})
   147  
   148  			// Make sure this passes for at least for some seconds, to give the health check goroutine time to run.
   149  			g.Consistently(func() bool {
   150  				_, ok := cct.loadAccessor(testClusterKey)
   151  				return ok
   152  			}, 5*time.Second, 1*time.Second).Should(BeTrue())
   153  		})
   154  
   155  		t.Run("during creation of a new cluster accessor", func(t *testing.T) {
   156  			g := NewWithT(t)
   157  			ns := setup(t, g)
   158  			defer teardown(t, g, ns)
   159  			// Create a context with a timeout to cancel the healthcheck after some time
   160  			ctx, cancel := context.WithTimeout(ctx, time.Second)
   161  			defer cancel()
   162  			// Delete the cluster accessor and lock the cluster to simulate creation of a new cluster accessor
   163  			cct.deleteAccessor(ctx, testClusterKey)
   164  			g.Expect(cct.clusterLock.TryLock(testClusterKey)).To(BeTrue())
   165  			startHealthCheck := time.Now()
   166  
   167  			httpClient, err := rest.HTTPClientFor(env.Config)
   168  			g.Expect(err).ToNot(HaveOccurred())
   169  			cct.healthCheckCluster(ctx, &healthCheckInput{
   170  				cluster:            testClusterKey,
   171  				cfg:                env.Config,
   172  				httpClient:         httpClient,
   173  				interval:           testPollInterval,
   174  				requestTimeout:     testPollTimeout,
   175  				unhealthyThreshold: testUnhealthyThreshold,
   176  				path:               "/",
   177  			})
   178  			timeElapsedForHealthCheck := time.Since(startHealthCheck)
   179  			// If the duration is shorter than the timeout, we know that the healthcheck wasn't requeued properly.
   180  			g.Expect(timeElapsedForHealthCheck).Should(BeNumerically(">=", time.Second))
   181  			// The healthcheck should be aborted by the timout of the context
   182  			g.Expect(ctx.Done()).Should(BeClosed())
   183  		})
   184  
   185  		t.Run("with an invalid path", func(t *testing.T) {
   186  			g := NewWithT(t)
   187  			ns := setup(t, g)
   188  			defer teardown(t, g, ns)
   189  
   190  			ctx, cancel := context.WithCancel(ctx)
   191  			defer cancel()
   192  
   193  			httpClient, err := rest.HTTPClientFor(env.Config)
   194  			g.Expect(err).ToNot(HaveOccurred())
   195  			go cct.healthCheckCluster(ctx,
   196  				&healthCheckInput{
   197  					cluster:            testClusterKey,
   198  					cfg:                env.Config,
   199  					httpClient:         httpClient,
   200  					interval:           testPollInterval,
   201  					requestTimeout:     testPollTimeout,
   202  					unhealthyThreshold: testUnhealthyThreshold,
   203  					path:               "/clusterAccessor",
   204  				})
   205  
   206  			// This should succeed after N consecutive failed requests.
   207  			g.Eventually(func() bool {
   208  				_, ok := cct.loadAccessor(testClusterKey)
   209  				return ok
   210  			}, 5*time.Second, 1*time.Second).Should(BeFalse())
   211  		})
   212  
   213  		t.Run("with an invalid config", func(t *testing.T) {
   214  			g := NewWithT(t)
   215  			ns := setup(t, g)
   216  			defer teardown(t, g, ns)
   217  
   218  			ctx, cancel := context.WithCancel(ctx)
   219  			defer cancel()
   220  
   221  			// Set the host to a random free port on localhost
   222  			addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
   223  			g.Expect(err).ToNot(HaveOccurred())
   224  			l, err := net.ListenTCP("tcp", addr)
   225  			g.Expect(err).ToNot(HaveOccurred())
   226  			g.Expect(l.Close()).To(Succeed())
   227  
   228  			config := rest.CopyConfig(env.Config)
   229  			config.Host = fmt.Sprintf("http://127.0.0.1:%d", l.Addr().(*net.TCPAddr).Port)
   230  
   231  			httpClient, err := rest.HTTPClientFor(env.Config)
   232  			g.Expect(err).ToNot(HaveOccurred())
   233  			go cct.healthCheckCluster(ctx, &healthCheckInput{
   234  				cluster:            testClusterKey,
   235  				cfg:                config,
   236  				httpClient:         httpClient,
   237  				interval:           testPollInterval,
   238  				requestTimeout:     testPollTimeout,
   239  				unhealthyThreshold: testUnhealthyThreshold,
   240  				path:               "/",
   241  			})
   242  
   243  			// This should succeed after N consecutive failed requests.
   244  			g.Eventually(func() bool {
   245  				_, ok := cct.loadAccessor(testClusterKey)
   246  				return ok
   247  			}, 5*time.Second, 1*time.Second).Should(BeFalse())
   248  		})
   249  	})
   250  }