sigs.k8s.io/cluster-api@v1.6.3/controllers/remote/cluster_cache_healthcheck_test.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package remote
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net"
    23  	"testing"
    24  	"time"
    25  
    26  	. "github.com/onsi/gomega"
    27  	corev1 "k8s.io/api/core/v1"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/client-go/kubernetes/scheme"
    30  	"k8s.io/client-go/rest"
    31  	"k8s.io/klog/v2/klogr"
    32  	"sigs.k8s.io/controller-runtime/pkg/client"
    33  	"sigs.k8s.io/controller-runtime/pkg/manager"
    34  	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
    35  
    36  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    37  	"sigs.k8s.io/cluster-api/util"
    38  	"sigs.k8s.io/cluster-api/util/conditions"
    39  )
    40  
    41  func TestClusterCacheHealthCheck(t *testing.T) {
    42  	t.Run("when health checking clusters", func(t *testing.T) {
    43  		var mgr manager.Manager
    44  		var mgrContext context.Context
    45  		var mgrCancel context.CancelFunc
    46  		var k8sClient client.Client
    47  
    48  		var testClusterKey client.ObjectKey
    49  		var cct *ClusterCacheTracker
    50  		var cc *stoppableCache
    51  
    52  		var testPollInterval = 250 * time.Millisecond
    53  		var testPollTimeout = 1 * time.Second
    54  		var testUnhealthyThreshold = 3
    55  
    56  		setup := func(t *testing.T, g *WithT) *corev1.Namespace {
    57  			t.Helper()
    58  
    59  			t.Log("Setting up a new manager")
    60  			var err error
    61  			mgr, err = manager.New(env.Config, manager.Options{
    62  				Scheme: scheme.Scheme,
    63  				Metrics: metricsserver.Options{
    64  					BindAddress: "0",
    65  				},
    66  			})
    67  			g.Expect(err).ToNot(HaveOccurred())
    68  
    69  			mgrContext, mgrCancel = context.WithCancel(ctx)
    70  			t.Log("Starting the manager")
    71  			go func() {
    72  				g.Expect(mgr.Start(mgrContext)).To(Succeed())
    73  			}()
    74  			<-env.Manager.Elected()
    75  
    76  			k8sClient = mgr.GetClient()
    77  
    78  			t.Log("Setting up a ClusterCacheTracker")
    79  			log := klogr.New()
    80  			cct, err = NewClusterCacheTracker(mgr, ClusterCacheTrackerOptions{
    81  				Log:     &log,
    82  				Indexes: []Index{NodeProviderIDIndex},
    83  			})
    84  			g.Expect(err).ToNot(HaveOccurred())
    85  
    86  			t.Log("Creating a namespace for the test")
    87  			ns, err := env.CreateNamespace(ctx, "cluster-cache-health-test")
    88  			g.Expect(err).ToNot(HaveOccurred())
    89  
    90  			t.Log("Creating a test cluster")
    91  			testCluster := &clusterv1.Cluster{
    92  				ObjectMeta: metav1.ObjectMeta{
    93  					Name:      "test-cluster",
    94  					Namespace: ns.GetName(),
    95  				},
    96  			}
    97  			g.Expect(k8sClient.Create(ctx, testCluster)).To(Succeed())
    98  			conditions.MarkTrue(testCluster, clusterv1.ControlPlaneInitializedCondition)
    99  			testCluster.Status.InfrastructureReady = true
   100  			g.Expect(k8sClient.Status().Update(ctx, testCluster)).To(Succeed())
   101  
   102  			t.Log("Creating a test cluster kubeconfig")
   103  			g.Expect(env.CreateKubeconfigSecret(ctx, testCluster)).To(Succeed())
   104  
   105  			testClusterKey = util.ObjectKey(testCluster)
   106  
   107  			_, cancel := context.WithCancel(ctx)
   108  			cc = &stoppableCache{cancelFunc: cancel}
   109  			cct.clusterAccessors[testClusterKey] = &clusterAccessor{cache: cc}
   110  
   111  			return ns
   112  		}
   113  
   114  		teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace) {
   115  			t.Helper()
   116  
   117  			t.Log("Deleting any Secrets")
   118  			g.Expect(cleanupTestSecrets(ctx, k8sClient)).To(Succeed())
   119  			t.Log("Deleting any Clusters")
   120  			g.Expect(cleanupTestClusters(ctx, k8sClient)).To(Succeed())
   121  			t.Log("Deleting Namespace")
   122  			g.Expect(env.Delete(ctx, ns)).To(Succeed())
   123  			t.Log("Stopping the manager")
   124  			cc.cancelFunc()
   125  			mgrCancel()
   126  		}
   127  
   128  		t.Run("with a healthy cluster", func(t *testing.T) {
   129  			g := NewWithT(t)
   130  			ns := setup(t, g)
   131  			defer teardown(t, g, ns)
   132  
   133  			ctx, cancel := context.WithCancel(ctx)
   134  			defer cancel()
   135  
   136  			httpClient, err := rest.HTTPClientFor(env.Config)
   137  			g.Expect(err).ToNot(HaveOccurred())
   138  
   139  			go cct.healthCheckCluster(ctx, &healthCheckInput{
   140  				cluster:            testClusterKey,
   141  				cfg:                env.Config,
   142  				httpClient:         httpClient,
   143  				interval:           testPollInterval,
   144  				requestTimeout:     testPollTimeout,
   145  				unhealthyThreshold: testUnhealthyThreshold,
   146  				path:               "/",
   147  			})
   148  
   149  			// Make sure this passes for at least for some seconds, to give the health check goroutine time to run.
   150  			g.Consistently(func() bool {
   151  				_, ok := cct.loadAccessor(testClusterKey)
   152  				return ok
   153  			}, 5*time.Second, 1*time.Second).Should(BeTrue())
   154  		})
   155  
   156  		t.Run("during creation of a new cluster accessor", func(t *testing.T) {
   157  			g := NewWithT(t)
   158  			ns := setup(t, g)
   159  			defer teardown(t, g, ns)
   160  			// Create a context with a timeout to cancel the healthcheck after some time
   161  			ctx, cancel := context.WithTimeout(ctx, time.Second)
   162  			defer cancel()
   163  			// Delete the cluster accessor and lock the cluster to simulate creation of a new cluster accessor
   164  			cct.deleteAccessor(ctx, testClusterKey)
   165  			g.Expect(cct.clusterLock.TryLock(testClusterKey)).To(BeTrue())
   166  			startHealthCheck := time.Now()
   167  
   168  			httpClient, err := rest.HTTPClientFor(env.Config)
   169  			g.Expect(err).ToNot(HaveOccurred())
   170  			cct.healthCheckCluster(ctx, &healthCheckInput{
   171  				cluster:            testClusterKey,
   172  				cfg:                env.Config,
   173  				httpClient:         httpClient,
   174  				interval:           testPollInterval,
   175  				requestTimeout:     testPollTimeout,
   176  				unhealthyThreshold: testUnhealthyThreshold,
   177  				path:               "/",
   178  			})
   179  			timeElapsedForHealthCheck := time.Since(startHealthCheck)
   180  			// If the duration is shorter than the timeout, we know that the healthcheck wasn't requeued properly.
   181  			g.Expect(timeElapsedForHealthCheck).Should(BeNumerically(">=", time.Second))
   182  			// The healthcheck should be aborted by the timout of the context
   183  			g.Expect(ctx.Done()).Should(BeClosed())
   184  		})
   185  
   186  		t.Run("with an invalid path", func(t *testing.T) {
   187  			g := NewWithT(t)
   188  			ns := setup(t, g)
   189  			defer teardown(t, g, ns)
   190  
   191  			ctx, cancel := context.WithCancel(ctx)
   192  			defer cancel()
   193  
   194  			httpClient, err := rest.HTTPClientFor(env.Config)
   195  			g.Expect(err).ToNot(HaveOccurred())
   196  			go cct.healthCheckCluster(ctx,
   197  				&healthCheckInput{
   198  					cluster:            testClusterKey,
   199  					cfg:                env.Config,
   200  					httpClient:         httpClient,
   201  					interval:           testPollInterval,
   202  					requestTimeout:     testPollTimeout,
   203  					unhealthyThreshold: testUnhealthyThreshold,
   204  					path:               "/clusterAccessor",
   205  				})
   206  
   207  			// This should succeed after N consecutive failed requests.
   208  			g.Eventually(func() bool {
   209  				_, ok := cct.loadAccessor(testClusterKey)
   210  				return ok
   211  			}, 5*time.Second, 1*time.Second).Should(BeFalse())
   212  		})
   213  
   214  		t.Run("with an invalid config", func(t *testing.T) {
   215  			g := NewWithT(t)
   216  			ns := setup(t, g)
   217  			defer teardown(t, g, ns)
   218  
   219  			ctx, cancel := context.WithCancel(ctx)
   220  			defer cancel()
   221  
   222  			// Set the host to a random free port on localhost
   223  			addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
   224  			g.Expect(err).ToNot(HaveOccurred())
   225  			l, err := net.ListenTCP("tcp", addr)
   226  			g.Expect(err).ToNot(HaveOccurred())
   227  			g.Expect(l.Close()).To(Succeed())
   228  
   229  			config := rest.CopyConfig(env.Config)
   230  			config.Host = fmt.Sprintf("http://127.0.0.1:%d", l.Addr().(*net.TCPAddr).Port)
   231  
   232  			httpClient, err := rest.HTTPClientFor(env.Config)
   233  			g.Expect(err).ToNot(HaveOccurred())
   234  			go cct.healthCheckCluster(ctx, &healthCheckInput{
   235  				cluster:            testClusterKey,
   236  				cfg:                config,
   237  				httpClient:         httpClient,
   238  				interval:           testPollInterval,
   239  				requestTimeout:     testPollTimeout,
   240  				unhealthyThreshold: testUnhealthyThreshold,
   241  				path:               "/",
   242  			})
   243  
   244  			// This should succeed after N consecutive failed requests.
   245  			g.Eventually(func() bool {
   246  				_, ok := cct.loadAccessor(testClusterKey)
   247  				return ok
   248  			}, 5*time.Second, 1*time.Second).Should(BeFalse())
   249  		})
   250  	})
   251  }