sigs.k8s.io/cluster-api@v1.6.3/controllers/remote/cluster_cache_healthcheck_test.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package remote 18 19 import ( 20 "context" 21 "fmt" 22 "net" 23 "testing" 24 "time" 25 26 . "github.com/onsi/gomega" 27 corev1 "k8s.io/api/core/v1" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/client-go/kubernetes/scheme" 30 "k8s.io/client-go/rest" 31 "k8s.io/klog/v2/klogr" 32 "sigs.k8s.io/controller-runtime/pkg/client" 33 "sigs.k8s.io/controller-runtime/pkg/manager" 34 metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" 35 36 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 37 "sigs.k8s.io/cluster-api/util" 38 "sigs.k8s.io/cluster-api/util/conditions" 39 ) 40 41 func TestClusterCacheHealthCheck(t *testing.T) { 42 t.Run("when health checking clusters", func(t *testing.T) { 43 var mgr manager.Manager 44 var mgrContext context.Context 45 var mgrCancel context.CancelFunc 46 var k8sClient client.Client 47 48 var testClusterKey client.ObjectKey 49 var cct *ClusterCacheTracker 50 var cc *stoppableCache 51 52 var testPollInterval = 250 * time.Millisecond 53 var testPollTimeout = 1 * time.Second 54 var testUnhealthyThreshold = 3 55 56 setup := func(t *testing.T, g *WithT) *corev1.Namespace { 57 t.Helper() 58 59 t.Log("Setting up a new manager") 60 var err error 61 mgr, err = manager.New(env.Config, manager.Options{ 62 Scheme: scheme.Scheme, 63 Metrics: metricsserver.Options{ 64 BindAddress: "0", 65 }, 66 }) 67 g.Expect(err).ToNot(HaveOccurred()) 68 69 mgrContext, mgrCancel = context.WithCancel(ctx) 70 t.Log("Starting the manager") 71 go func() { 72 g.Expect(mgr.Start(mgrContext)).To(Succeed()) 73 }() 74 <-env.Manager.Elected() 75 76 k8sClient = mgr.GetClient() 77 78 t.Log("Setting up a ClusterCacheTracker") 79 log := klogr.New() 80 cct, err = NewClusterCacheTracker(mgr, ClusterCacheTrackerOptions{ 81 Log: &log, 82 Indexes: []Index{NodeProviderIDIndex}, 83 }) 84 g.Expect(err).ToNot(HaveOccurred()) 85 86 t.Log("Creating a namespace for the test") 87 ns, err := env.CreateNamespace(ctx, "cluster-cache-health-test") 88 g.Expect(err).ToNot(HaveOccurred()) 89 90 t.Log("Creating a test cluster") 91 testCluster := &clusterv1.Cluster{ 92 ObjectMeta: metav1.ObjectMeta{ 93 Name: "test-cluster", 94 Namespace: ns.GetName(), 95 }, 96 } 97 g.Expect(k8sClient.Create(ctx, testCluster)).To(Succeed()) 98 conditions.MarkTrue(testCluster, clusterv1.ControlPlaneInitializedCondition) 99 testCluster.Status.InfrastructureReady = true 100 g.Expect(k8sClient.Status().Update(ctx, testCluster)).To(Succeed()) 101 102 t.Log("Creating a test cluster kubeconfig") 103 g.Expect(env.CreateKubeconfigSecret(ctx, testCluster)).To(Succeed()) 104 105 testClusterKey = util.ObjectKey(testCluster) 106 107 _, cancel := context.WithCancel(ctx) 108 cc = &stoppableCache{cancelFunc: cancel} 109 cct.clusterAccessors[testClusterKey] = &clusterAccessor{cache: cc} 110 111 return ns 112 } 113 114 teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace) { 115 t.Helper() 116 117 t.Log("Deleting any Secrets") 118 g.Expect(cleanupTestSecrets(ctx, k8sClient)).To(Succeed()) 119 t.Log("Deleting any Clusters") 120 g.Expect(cleanupTestClusters(ctx, k8sClient)).To(Succeed()) 121 t.Log("Deleting Namespace") 122 g.Expect(env.Delete(ctx, ns)).To(Succeed()) 123 t.Log("Stopping the manager") 124 cc.cancelFunc() 125 mgrCancel() 126 } 127 128 t.Run("with a healthy cluster", func(t *testing.T) { 129 g := NewWithT(t) 130 ns := setup(t, g) 131 defer teardown(t, g, ns) 132 133 ctx, cancel := context.WithCancel(ctx) 134 defer cancel() 135 136 httpClient, err := rest.HTTPClientFor(env.Config) 137 g.Expect(err).ToNot(HaveOccurred()) 138 139 go cct.healthCheckCluster(ctx, &healthCheckInput{ 140 cluster: testClusterKey, 141 cfg: env.Config, 142 httpClient: httpClient, 143 interval: testPollInterval, 144 requestTimeout: testPollTimeout, 145 unhealthyThreshold: testUnhealthyThreshold, 146 path: "/", 147 }) 148 149 // Make sure this passes for at least for some seconds, to give the health check goroutine time to run. 150 g.Consistently(func() bool { 151 _, ok := cct.loadAccessor(testClusterKey) 152 return ok 153 }, 5*time.Second, 1*time.Second).Should(BeTrue()) 154 }) 155 156 t.Run("during creation of a new cluster accessor", func(t *testing.T) { 157 g := NewWithT(t) 158 ns := setup(t, g) 159 defer teardown(t, g, ns) 160 // Create a context with a timeout to cancel the healthcheck after some time 161 ctx, cancel := context.WithTimeout(ctx, time.Second) 162 defer cancel() 163 // Delete the cluster accessor and lock the cluster to simulate creation of a new cluster accessor 164 cct.deleteAccessor(ctx, testClusterKey) 165 g.Expect(cct.clusterLock.TryLock(testClusterKey)).To(BeTrue()) 166 startHealthCheck := time.Now() 167 168 httpClient, err := rest.HTTPClientFor(env.Config) 169 g.Expect(err).ToNot(HaveOccurred()) 170 cct.healthCheckCluster(ctx, &healthCheckInput{ 171 cluster: testClusterKey, 172 cfg: env.Config, 173 httpClient: httpClient, 174 interval: testPollInterval, 175 requestTimeout: testPollTimeout, 176 unhealthyThreshold: testUnhealthyThreshold, 177 path: "/", 178 }) 179 timeElapsedForHealthCheck := time.Since(startHealthCheck) 180 // If the duration is shorter than the timeout, we know that the healthcheck wasn't requeued properly. 181 g.Expect(timeElapsedForHealthCheck).Should(BeNumerically(">=", time.Second)) 182 // The healthcheck should be aborted by the timout of the context 183 g.Expect(ctx.Done()).Should(BeClosed()) 184 }) 185 186 t.Run("with an invalid path", func(t *testing.T) { 187 g := NewWithT(t) 188 ns := setup(t, g) 189 defer teardown(t, g, ns) 190 191 ctx, cancel := context.WithCancel(ctx) 192 defer cancel() 193 194 httpClient, err := rest.HTTPClientFor(env.Config) 195 g.Expect(err).ToNot(HaveOccurred()) 196 go cct.healthCheckCluster(ctx, 197 &healthCheckInput{ 198 cluster: testClusterKey, 199 cfg: env.Config, 200 httpClient: httpClient, 201 interval: testPollInterval, 202 requestTimeout: testPollTimeout, 203 unhealthyThreshold: testUnhealthyThreshold, 204 path: "/clusterAccessor", 205 }) 206 207 // This should succeed after N consecutive failed requests. 208 g.Eventually(func() bool { 209 _, ok := cct.loadAccessor(testClusterKey) 210 return ok 211 }, 5*time.Second, 1*time.Second).Should(BeFalse()) 212 }) 213 214 t.Run("with an invalid config", func(t *testing.T) { 215 g := NewWithT(t) 216 ns := setup(t, g) 217 defer teardown(t, g, ns) 218 219 ctx, cancel := context.WithCancel(ctx) 220 defer cancel() 221 222 // Set the host to a random free port on localhost 223 addr, err := net.ResolveTCPAddr("tcp", "localhost:0") 224 g.Expect(err).ToNot(HaveOccurred()) 225 l, err := net.ListenTCP("tcp", addr) 226 g.Expect(err).ToNot(HaveOccurred()) 227 g.Expect(l.Close()).To(Succeed()) 228 229 config := rest.CopyConfig(env.Config) 230 config.Host = fmt.Sprintf("http://127.0.0.1:%d", l.Addr().(*net.TCPAddr).Port) 231 232 httpClient, err := rest.HTTPClientFor(env.Config) 233 g.Expect(err).ToNot(HaveOccurred()) 234 go cct.healthCheckCluster(ctx, &healthCheckInput{ 235 cluster: testClusterKey, 236 cfg: config, 237 httpClient: httpClient, 238 interval: testPollInterval, 239 requestTimeout: testPollTimeout, 240 unhealthyThreshold: testUnhealthyThreshold, 241 path: "/", 242 }) 243 244 // This should succeed after N consecutive failed requests. 245 g.Eventually(func() bool { 246 _, ok := cct.loadAccessor(testClusterKey) 247 return ok 248 }, 5*time.Second, 1*time.Second).Should(BeFalse()) 249 }) 250 }) 251 }