github.com/cilium/cilium@v1.16.2/test/k8s/chaos.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package k8sTest 5 6 import ( 7 "context" 8 "fmt" 9 10 . "github.com/onsi/gomega" 11 12 . "github.com/cilium/cilium/test/ginkgo-ext" 13 "github.com/cilium/cilium/test/helpers" 14 ) 15 16 // The 5.4 CI job is intended to catch BPF complexity regressions and as such 17 // doesn't need to execute this test suite. 18 var _ = SkipDescribeIf(helpers.RunsOn54Kernel, "K8sAgentChaosTest", func() { 19 20 var ( 21 kubectl *helpers.Kubectl 22 demoDSPath string 23 cnpPath string 24 ciliumFilename string 25 testDSService = "testds-service" 26 ) 27 28 BeforeAll(func() { 29 kubectl = helpers.CreateKubectl(helpers.K8s1VMName(), logger) 30 demoDSPath = helpers.ManifestGet(kubectl.BasePath(), "demo_ds.yaml") 31 cnpPath = helpers.ManifestGet(kubectl.BasePath(), "cnp-to-cidr-oneoneoneone.yaml") 32 33 ciliumFilename = helpers.TimestampFilename("cilium.yaml") 34 DeployCiliumAndDNS(kubectl, ciliumFilename) 35 }) 36 37 AfterFailed(func() { 38 kubectl.CiliumReport("cilium-dbg service list", "cilium-dbg endpoint list") 39 }) 40 41 JustAfterEach(func() { 42 kubectl.ValidateNoErrorsInLogs(CurrentGinkgoTestDescription().Duration) 43 }) 44 45 AfterAll(func() { 46 UninstallCiliumFromManifest(kubectl, ciliumFilename) 47 kubectl.CloseSSHClient() 48 }) 49 50 Context("Connectivity demo application", func() { 51 BeforeEach(func() { 52 kubectl.ApplyDefault(demoDSPath).ExpectSuccess("DS deployment cannot be applied") 53 // this NP needs to be a separate namespace; it is just to trigger CIDR allocation 54 kubectl.NamespaceCreate("cilium-test-unused").ExpectSuccess("Namespace cilium-test-unused could not be created") 55 kubectl.Apply(helpers.ApplyOptions{ 56 FilePath: cnpPath, 57 Namespace: "cilium-test-unused", 58 }).ExpectSuccess("CNP cannot be applied") 59 60 err := kubectl.WaitforPods( 61 helpers.DefaultNamespace, "-l zgroup=testDS", helpers.HelperTimeout) 62 Expect(err).Should(BeNil(), "Pods are not ready after timeout") 63 }) 64 65 AfterEach(func() { 66 kubectl.DeleteLong(demoDSPath).ExpectSuccess( 67 "%s deployment cannot be deleted", demoDSPath) 68 ExpectAllPodsTerminated(kubectl) 69 70 }) 71 72 // connectivityTest performs a few test inside: 73 // - tests connectivity of all client pods to the backend pods directly via ping 74 // - tests connectivity of all client pods to the ClusterIP of the test-ds service via curl 75 // - tests connectivity of all client pods to the DNS name for the test-ds service via curl 76 // - tests that CIDR identities are stable after restoration 77 connectivityTest := func() { 78 pods, err := kubectl.GetPodNames(helpers.DefaultNamespace, "zgroup=testDSClient") 79 Expect(err).To(BeNil(), "Cannot get pods names") 80 Expect(len(pods)).To(BeNumerically(">", 0), "No pods available to test connectivity") 81 82 dsPods, err := kubectl.GetPodsIPs(helpers.DefaultNamespace, "zgroup=testDS") 83 Expect(err).To(BeNil(), "Cannot get daemonset pods IPS") 84 Expect(len(dsPods)).To(BeNumerically(">", 0), "No pods available to test connectivity") 85 86 By("Waiting for kube-dns entry for service testds-service") 87 err = kubectl.WaitForKubeDNSEntry(testDSService, helpers.DefaultNamespace) 88 ExpectWithOffset(1, err).To(BeNil(), "DNS entry is not ready after timeout") 89 90 By("Getting ClusterIP For testds-service") 91 host, _, err := kubectl.GetServiceHostPort(helpers.DefaultNamespace, "testds-service") 92 ExpectWithOffset(1, err).To(BeNil(), "unable to get ClusterIP and port for service testds-service") 93 94 for _, pod := range pods { 95 for _, ip := range dsPods { 96 By("Pinging testds pod with IP %q from client pod %q", ip, pod) 97 res := kubectl.ExecPodCmd( 98 helpers.DefaultNamespace, pod, helpers.Ping(ip)) 99 log.Debugf("Pod %s ping %v", pod, ip) 100 ExpectWithOffset(1, res).To(helpers.CMDSuccess(), 101 "Cannot ping from %q to %q", pod, ip) 102 } 103 104 By("Curling testds-service via ClusterIP %q", host) 105 res := kubectl.ExecPodCmd( 106 helpers.DefaultNamespace, pod, helpers.CurlFail("http://%s:80/", host)) 107 ExpectWithOffset(1, res).To(helpers.CMDSuccess(), 108 "Cannot curl from %q to testds-service via ClusterIP", pod) 109 110 By("Curling testds-service via DNS hostname") 111 res = kubectl.ExecPodCmd( 112 helpers.DefaultNamespace, pod, helpers.CurlFail("http://%s:80/", testDSService)) 113 ExpectWithOffset(1, res).To(helpers.CMDSuccess(), 114 "Cannot curl from %q to testds-service via DNS hostname", pod) 115 } 116 } 117 118 It("Endpoint can still connect while Cilium is not running", func() { 119 By("Waiting for deployed pods to be ready") 120 err := kubectl.WaitforPods( 121 helpers.DefaultNamespace, 122 "-l zgroup=testDSClient", helpers.HelperTimeout) 123 Expect(err).Should(BeNil(), "Pods are not ready after timeout") 124 125 err = kubectl.CiliumEndpointWaitReady() 126 Expect(err).To(BeNil(), "Endpoints are not ready after timeout") 127 128 By("Checking connectivity before restarting Cilium") 129 connectivityTest() 130 131 By("Determining the identity for an extra-cluster cidr") 132 ciliumPodK8s1, err := kubectl.GetCiliumPodOnNode(helpers.K8s1) 133 Expect(err).To(BeNil(), "Could not list Cilium pods") 134 origID, err := kubectl.GetCiliumIdentityForIP(ciliumPodK8s1, "1.1.1.1") 135 Expect(err).To(BeNil(), "Could not look up numeric identity for 1.1.1.1") 136 137 By("Deleting cilium pods") 138 res := kubectl.Exec(fmt.Sprintf("%s -n %s delete pods -l k8s-app=cilium", 139 helpers.KubectlCmd, helpers.CiliumNamespace)) 140 res.ExpectSuccess() 141 142 ExpectAllPodsTerminated(kubectl) 143 144 ExpectCiliumReady(kubectl) 145 err = kubectl.CiliumEndpointWaitReady() 146 Expect(err).To(BeNil(), "Endpoints are not ready after Cilium restarts") 147 148 By("Checking connectivity after restarting Cilium") 149 connectivityTest() 150 151 By("Determining the identity for an extra-cluster cidr") 152 ciliumPodK8s1, err = kubectl.GetCiliumPodOnNode(helpers.K8s1) 153 Expect(err).To(BeNil(), "Could not list Cilium pods") 154 newID, err := kubectl.GetCiliumIdentityForIP(ciliumPodK8s1, "1.1.1.1") 155 Expect(err).To(BeNil(), "Could not look up numeric identity for 1.1.1.1") 156 Expect(origID).To(Equal(newID), "Numeric identity for 1.1.1.1 should not change after restart.") 157 158 By("Uninstall cilium pods") 159 160 res = kubectl.DeleteResource( 161 "ds", fmt.Sprintf("-n %s cilium", helpers.CiliumNamespace)) 162 res.ExpectSuccess("Cilium DS cannot be deleted") 163 164 ExpectAllPodsTerminated(kubectl) 165 166 By("Checking connectivity after uninstalling Cilium") 167 connectivityTest() 168 169 By("Reinstall cilium DaemonSet") 170 err = kubectl.CiliumInstall(ciliumFilename, map[string]string{}) 171 Expect(err).To(BeNil(), "Cilium cannot be installed") 172 173 ExpectCiliumReady(kubectl) 174 175 err = kubectl.CiliumEndpointWaitReady() 176 Expect(err).To(BeNil(), "Endpoints are not ready after timeout") 177 178 By("Checking connectivity after reinstalling Cilium") 179 connectivityTest() 180 181 By("Determining the identity for an extra-cluster cidr") 182 ciliumPodK8s1, err = kubectl.GetCiliumPodOnNode(helpers.K8s1) 183 Expect(err).To(BeNil(), "Could not list Cilium pods") 184 newID, err = kubectl.GetCiliumIdentityForIP(ciliumPodK8s1, "1.1.1.1") 185 Expect(err).To(BeNil(), "Could not look up numeric identity for 1.1.1.1") 186 Expect(origID).To(Equal(newID), "Numeric identity for 1.1.1.1 should not change after restart.") 187 }) 188 }) 189 190 Context("Restart with long lived connections", func() { 191 192 var ( 193 netperfManifest string 194 netperfPolicy string 195 netperfServiceName = "netperf-service" 196 podsIps map[string]string 197 netperfClient = "netperf-client" 198 netperfServer = "netperf-server" 199 ) 200 201 BeforeAll(func() { 202 netperfManifest = helpers.ManifestGet(kubectl.BasePath(), "netperf-deployment.yaml") 203 netperfPolicy = helpers.ManifestGet(kubectl.BasePath(), "netperf-policy.yaml") 204 205 kubectl.ApplyDefault(netperfManifest).ExpectSuccess("Netperf cannot be deployed") 206 207 err := kubectl.WaitforPods( 208 helpers.DefaultNamespace, 209 "-l zgroup=testapp", helpers.HelperTimeout) 210 Expect(err).Should(BeNil(), "Pods are not ready after timeout") 211 212 podsIps, err = kubectl.GetPodsIPs(helpers.DefaultNamespace, "zgroup=testapp") 213 Expect(err).To(BeNil(), "Cannot get pods ips") 214 215 _, _, err = kubectl.GetServiceHostPort(helpers.DefaultNamespace, netperfServiceName) 216 Expect(err).To(BeNil(), "cannot get service netperf ip") 217 }) 218 219 AfterAll(func() { 220 _ = kubectl.Delete(netperfManifest) 221 ExpectAllPodsTerminated(kubectl) 222 }) 223 224 AfterEach(func() { 225 _ = kubectl.Delete(netperfPolicy) 226 }) 227 228 restartCilium := func() { 229 ciliumFilter := "k8s-app=cilium" 230 231 By("Deleting all cilium pods") 232 res := kubectl.Exec(fmt.Sprintf( 233 "%s -n %s delete pods -l %s", 234 helpers.KubectlCmd, helpers.CiliumNamespace, ciliumFilter)) 235 res.ExpectSuccess("Failed to delete cilium pods") 236 237 By("Waiting cilium pods to terminate") 238 ExpectAllPodsTerminated(kubectl) 239 240 By("Waiting for cilium pods to be ready") 241 err := kubectl.WaitforPods( 242 helpers.CiliumNamespace, fmt.Sprintf("-l %s", ciliumFilter), helpers.HelperTimeout) 243 Expect(err).Should(BeNil(), "Pods are not ready after timeout") 244 245 err = kubectl.CiliumEndpointWaitReady() 246 Expect(err).To(BeNil(), "Endpoints are not ready after timeout") 247 } 248 249 It("TCP connection is not dropped when cilium restarts", func() { 250 ctx, cancel := context.WithCancel(context.Background()) 251 defer cancel() 252 res := kubectl.ExecPodCmdBackground( 253 ctx, 254 helpers.DefaultNamespace, 255 netperfClient, "", 256 fmt.Sprintf("netperf -l 60 -t TCP_STREAM -H %s", podsIps[netperfServer])) 257 258 restartCilium() 259 260 By("Stopping netperf client test") 261 res.WaitUntilFinish() 262 res.ExpectSuccess("Failed while cilium was restarting") 263 }) 264 265 It("L3/L4 policies still work while Cilium is restarted", func() { 266 267 ctx, cancel := context.WithCancel(context.Background()) 268 defer cancel() 269 res := kubectl.ExecPodCmdBackground( 270 ctx, 271 helpers.DefaultNamespace, 272 netperfClient, "", 273 fmt.Sprintf("netperf -l 60 -t TCP_STREAM -H %s", podsIps[netperfServer])) 274 275 By("Installing the L3-L4 Policy") 276 _, err := kubectl.CiliumPolicyAction( 277 helpers.DefaultNamespace, netperfPolicy, helpers.KubectlApply, helpers.HelperTimeout) 278 Expect(err).Should(BeNil(), "Cannot install %q policy", netperfPolicy) 279 280 restartCilium() 281 282 By("Stopping netperf client test") 283 res.WaitUntilFinish() 284 res.ExpectSuccess("Failed while cilium was restarting") 285 }) 286 }) 287 })