github.com/cilium/cilium@v1.16.2/test/k8s/chaos.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package k8sTest
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  
    10  	. "github.com/onsi/gomega"
    11  
    12  	. "github.com/cilium/cilium/test/ginkgo-ext"
    13  	"github.com/cilium/cilium/test/helpers"
    14  )
    15  
    16  // The 5.4 CI job is intended to catch BPF complexity regressions and as such
    17  // doesn't need to execute this test suite.
    18  var _ = SkipDescribeIf(helpers.RunsOn54Kernel, "K8sAgentChaosTest", func() {
    19  
    20  	var (
    21  		kubectl        *helpers.Kubectl
    22  		demoDSPath     string
    23  		cnpPath        string
    24  		ciliumFilename string
    25  		testDSService  = "testds-service"
    26  	)
    27  
    28  	BeforeAll(func() {
    29  		kubectl = helpers.CreateKubectl(helpers.K8s1VMName(), logger)
    30  		demoDSPath = helpers.ManifestGet(kubectl.BasePath(), "demo_ds.yaml")
    31  		cnpPath = helpers.ManifestGet(kubectl.BasePath(), "cnp-to-cidr-oneoneoneone.yaml")
    32  
    33  		ciliumFilename = helpers.TimestampFilename("cilium.yaml")
    34  		DeployCiliumAndDNS(kubectl, ciliumFilename)
    35  	})
    36  
    37  	AfterFailed(func() {
    38  		kubectl.CiliumReport("cilium-dbg service list", "cilium-dbg endpoint list")
    39  	})
    40  
    41  	JustAfterEach(func() {
    42  		kubectl.ValidateNoErrorsInLogs(CurrentGinkgoTestDescription().Duration)
    43  	})
    44  
    45  	AfterAll(func() {
    46  		UninstallCiliumFromManifest(kubectl, ciliumFilename)
    47  		kubectl.CloseSSHClient()
    48  	})
    49  
    50  	Context("Connectivity demo application", func() {
    51  		BeforeEach(func() {
    52  			kubectl.ApplyDefault(demoDSPath).ExpectSuccess("DS deployment cannot be applied")
    53  			// this NP needs to be a separate namespace; it is just to trigger CIDR allocation
    54  			kubectl.NamespaceCreate("cilium-test-unused").ExpectSuccess("Namespace cilium-test-unused could not be created")
    55  			kubectl.Apply(helpers.ApplyOptions{
    56  				FilePath:  cnpPath,
    57  				Namespace: "cilium-test-unused",
    58  			}).ExpectSuccess("CNP cannot be applied")
    59  
    60  			err := kubectl.WaitforPods(
    61  				helpers.DefaultNamespace, "-l zgroup=testDS", helpers.HelperTimeout)
    62  			Expect(err).Should(BeNil(), "Pods are not ready after timeout")
    63  		})
    64  
    65  		AfterEach(func() {
    66  			kubectl.DeleteLong(demoDSPath).ExpectSuccess(
    67  				"%s deployment cannot be deleted", demoDSPath)
    68  			ExpectAllPodsTerminated(kubectl)
    69  
    70  		})
    71  
    72  		// connectivityTest  performs a few test inside:
    73  		// - tests connectivity of all client pods to the backend pods directly via ping
    74  		// - tests connectivity of all client pods to the ClusterIP of the test-ds service via curl
    75  		// - tests connectivity of all client pods to the DNS name for the test-ds service via curl
    76  		// - tests that CIDR identities are stable after restoration
    77  		connectivityTest := func() {
    78  			pods, err := kubectl.GetPodNames(helpers.DefaultNamespace, "zgroup=testDSClient")
    79  			Expect(err).To(BeNil(), "Cannot get pods names")
    80  			Expect(len(pods)).To(BeNumerically(">", 0), "No pods available to test connectivity")
    81  
    82  			dsPods, err := kubectl.GetPodsIPs(helpers.DefaultNamespace, "zgroup=testDS")
    83  			Expect(err).To(BeNil(), "Cannot get daemonset pods IPS")
    84  			Expect(len(dsPods)).To(BeNumerically(">", 0), "No pods available to test connectivity")
    85  
    86  			By("Waiting for kube-dns entry for service testds-service")
    87  			err = kubectl.WaitForKubeDNSEntry(testDSService, helpers.DefaultNamespace)
    88  			ExpectWithOffset(1, err).To(BeNil(), "DNS entry is not ready after timeout")
    89  
    90  			By("Getting ClusterIP For testds-service")
    91  			host, _, err := kubectl.GetServiceHostPort(helpers.DefaultNamespace, "testds-service")
    92  			ExpectWithOffset(1, err).To(BeNil(), "unable to get ClusterIP and port for service testds-service")
    93  
    94  			for _, pod := range pods {
    95  				for _, ip := range dsPods {
    96  					By("Pinging testds pod with IP %q from client pod %q", ip, pod)
    97  					res := kubectl.ExecPodCmd(
    98  						helpers.DefaultNamespace, pod, helpers.Ping(ip))
    99  					log.Debugf("Pod %s ping %v", pod, ip)
   100  					ExpectWithOffset(1, res).To(helpers.CMDSuccess(),
   101  						"Cannot ping from %q to %q", pod, ip)
   102  				}
   103  
   104  				By("Curling testds-service via ClusterIP %q", host)
   105  				res := kubectl.ExecPodCmd(
   106  					helpers.DefaultNamespace, pod, helpers.CurlFail("http://%s:80/", host))
   107  				ExpectWithOffset(1, res).To(helpers.CMDSuccess(),
   108  					"Cannot curl from %q to testds-service via ClusterIP", pod)
   109  
   110  				By("Curling testds-service via DNS hostname")
   111  				res = kubectl.ExecPodCmd(
   112  					helpers.DefaultNamespace, pod, helpers.CurlFail("http://%s:80/", testDSService))
   113  				ExpectWithOffset(1, res).To(helpers.CMDSuccess(),
   114  					"Cannot curl from %q to testds-service via DNS hostname", pod)
   115  			}
   116  		}
   117  
   118  		It("Endpoint can still connect while Cilium is not running", func() {
   119  			By("Waiting for deployed pods to be ready")
   120  			err := kubectl.WaitforPods(
   121  				helpers.DefaultNamespace,
   122  				"-l zgroup=testDSClient", helpers.HelperTimeout)
   123  			Expect(err).Should(BeNil(), "Pods are not ready after timeout")
   124  
   125  			err = kubectl.CiliumEndpointWaitReady()
   126  			Expect(err).To(BeNil(), "Endpoints are not ready after timeout")
   127  
   128  			By("Checking connectivity before restarting Cilium")
   129  			connectivityTest()
   130  
   131  			By("Determining the identity for an extra-cluster cidr")
   132  			ciliumPodK8s1, err := kubectl.GetCiliumPodOnNode(helpers.K8s1)
   133  			Expect(err).To(BeNil(), "Could not list Cilium pods")
   134  			origID, err := kubectl.GetCiliumIdentityForIP(ciliumPodK8s1, "1.1.1.1")
   135  			Expect(err).To(BeNil(), "Could not look up numeric identity for 1.1.1.1")
   136  
   137  			By("Deleting cilium pods")
   138  			res := kubectl.Exec(fmt.Sprintf("%s -n %s delete pods -l k8s-app=cilium",
   139  				helpers.KubectlCmd, helpers.CiliumNamespace))
   140  			res.ExpectSuccess()
   141  
   142  			ExpectAllPodsTerminated(kubectl)
   143  
   144  			ExpectCiliumReady(kubectl)
   145  			err = kubectl.CiliumEndpointWaitReady()
   146  			Expect(err).To(BeNil(), "Endpoints are not ready after Cilium restarts")
   147  
   148  			By("Checking connectivity after restarting Cilium")
   149  			connectivityTest()
   150  
   151  			By("Determining the identity for an extra-cluster cidr")
   152  			ciliumPodK8s1, err = kubectl.GetCiliumPodOnNode(helpers.K8s1)
   153  			Expect(err).To(BeNil(), "Could not list Cilium pods")
   154  			newID, err := kubectl.GetCiliumIdentityForIP(ciliumPodK8s1, "1.1.1.1")
   155  			Expect(err).To(BeNil(), "Could not look up numeric identity for 1.1.1.1")
   156  			Expect(origID).To(Equal(newID), "Numeric identity for 1.1.1.1 should not change after restart.")
   157  
   158  			By("Uninstall cilium pods")
   159  
   160  			res = kubectl.DeleteResource(
   161  				"ds", fmt.Sprintf("-n %s cilium", helpers.CiliumNamespace))
   162  			res.ExpectSuccess("Cilium DS cannot be deleted")
   163  
   164  			ExpectAllPodsTerminated(kubectl)
   165  
   166  			By("Checking connectivity after uninstalling Cilium")
   167  			connectivityTest()
   168  
   169  			By("Reinstall cilium DaemonSet")
   170  			err = kubectl.CiliumInstall(ciliumFilename, map[string]string{})
   171  			Expect(err).To(BeNil(), "Cilium cannot be installed")
   172  
   173  			ExpectCiliumReady(kubectl)
   174  
   175  			err = kubectl.CiliumEndpointWaitReady()
   176  			Expect(err).To(BeNil(), "Endpoints are not ready after timeout")
   177  
   178  			By("Checking connectivity after reinstalling Cilium")
   179  			connectivityTest()
   180  
   181  			By("Determining the identity for an extra-cluster cidr")
   182  			ciliumPodK8s1, err = kubectl.GetCiliumPodOnNode(helpers.K8s1)
   183  			Expect(err).To(BeNil(), "Could not list Cilium pods")
   184  			newID, err = kubectl.GetCiliumIdentityForIP(ciliumPodK8s1, "1.1.1.1")
   185  			Expect(err).To(BeNil(), "Could not look up numeric identity for 1.1.1.1")
   186  			Expect(origID).To(Equal(newID), "Numeric identity for 1.1.1.1 should not change after restart.")
   187  		})
   188  	})
   189  
   190  	Context("Restart with long lived connections", func() {
   191  
   192  		var (
   193  			netperfManifest    string
   194  			netperfPolicy      string
   195  			netperfServiceName = "netperf-service"
   196  			podsIps            map[string]string
   197  			netperfClient      = "netperf-client"
   198  			netperfServer      = "netperf-server"
   199  		)
   200  
   201  		BeforeAll(func() {
   202  			netperfManifest = helpers.ManifestGet(kubectl.BasePath(), "netperf-deployment.yaml")
   203  			netperfPolicy = helpers.ManifestGet(kubectl.BasePath(), "netperf-policy.yaml")
   204  
   205  			kubectl.ApplyDefault(netperfManifest).ExpectSuccess("Netperf cannot be deployed")
   206  
   207  			err := kubectl.WaitforPods(
   208  				helpers.DefaultNamespace,
   209  				"-l zgroup=testapp", helpers.HelperTimeout)
   210  			Expect(err).Should(BeNil(), "Pods are not ready after timeout")
   211  
   212  			podsIps, err = kubectl.GetPodsIPs(helpers.DefaultNamespace, "zgroup=testapp")
   213  			Expect(err).To(BeNil(), "Cannot get pods ips")
   214  
   215  			_, _, err = kubectl.GetServiceHostPort(helpers.DefaultNamespace, netperfServiceName)
   216  			Expect(err).To(BeNil(), "cannot get service netperf ip")
   217  		})
   218  
   219  		AfterAll(func() {
   220  			_ = kubectl.Delete(netperfManifest)
   221  			ExpectAllPodsTerminated(kubectl)
   222  		})
   223  
   224  		AfterEach(func() {
   225  			_ = kubectl.Delete(netperfPolicy)
   226  		})
   227  
   228  		restartCilium := func() {
   229  			ciliumFilter := "k8s-app=cilium"
   230  
   231  			By("Deleting all cilium pods")
   232  			res := kubectl.Exec(fmt.Sprintf(
   233  				"%s -n %s delete pods -l %s",
   234  				helpers.KubectlCmd, helpers.CiliumNamespace, ciliumFilter))
   235  			res.ExpectSuccess("Failed to delete cilium pods")
   236  
   237  			By("Waiting cilium pods to terminate")
   238  			ExpectAllPodsTerminated(kubectl)
   239  
   240  			By("Waiting for cilium pods to be ready")
   241  			err := kubectl.WaitforPods(
   242  				helpers.CiliumNamespace, fmt.Sprintf("-l %s", ciliumFilter), helpers.HelperTimeout)
   243  			Expect(err).Should(BeNil(), "Pods are not ready after timeout")
   244  
   245  			err = kubectl.CiliumEndpointWaitReady()
   246  			Expect(err).To(BeNil(), "Endpoints are not ready after timeout")
   247  		}
   248  
   249  		It("TCP connection is not dropped when cilium restarts", func() {
   250  			ctx, cancel := context.WithCancel(context.Background())
   251  			defer cancel()
   252  			res := kubectl.ExecPodCmdBackground(
   253  				ctx,
   254  				helpers.DefaultNamespace,
   255  				netperfClient, "",
   256  				fmt.Sprintf("netperf -l 60 -t TCP_STREAM -H %s", podsIps[netperfServer]))
   257  
   258  			restartCilium()
   259  
   260  			By("Stopping netperf client test")
   261  			res.WaitUntilFinish()
   262  			res.ExpectSuccess("Failed while cilium was restarting")
   263  		})
   264  
   265  		It("L3/L4 policies still work while Cilium is restarted", func() {
   266  
   267  			ctx, cancel := context.WithCancel(context.Background())
   268  			defer cancel()
   269  			res := kubectl.ExecPodCmdBackground(
   270  				ctx,
   271  				helpers.DefaultNamespace,
   272  				netperfClient, "",
   273  				fmt.Sprintf("netperf -l 60 -t TCP_STREAM -H %s", podsIps[netperfServer]))
   274  
   275  			By("Installing the L3-L4 Policy")
   276  			_, err := kubectl.CiliumPolicyAction(
   277  				helpers.DefaultNamespace, netperfPolicy, helpers.KubectlApply, helpers.HelperTimeout)
   278  			Expect(err).Should(BeNil(), "Cannot install %q policy", netperfPolicy)
   279  
   280  			restartCilium()
   281  
   282  			By("Stopping netperf client test")
   283  			res.WaitUntilFinish()
   284  			res.ExpectSuccess("Failed while cilium was restarting")
   285  		})
   286  	})
   287  })