github.com/zhyoulun/cilium@v1.6.12/test/k8sT/Updates.go (about)

     1  package k8sTest
     2  
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  	"strings"
     7  
     8  	. "github.com/cilium/cilium/test/ginkgo-ext"
     9  	"github.com/cilium/cilium/test/helpers"
    10  	. "github.com/onsi/gomega"
    11  )
    12  
    13  var (
    14  	demoPath         = helpers.ManifestGet("demo.yaml")
    15  	l7Policy         = helpers.ManifestGet("l7-policy.yaml")
    16  	migrateSVCClient = helpers.ManifestGet("migrate-svc-client.yaml")
    17  	migrateSVCServer = helpers.ManifestGet("migrate-svc-server.yaml")
    18  )
    19  
    20  var _ = Describe("K8sUpdates", func() {
    21  
    22  	// This test runs 8 steps as following:
    23  	// 1 - delete all pods. Clean cilium, this can be, and should be achieved by
    24  	// `clean-cilium-state: "true"` option that we have in configmap
    25  	// 2 - install cilium `cilium:v${LATEST_STABLE}`
    26  	// 3 - make endpoints talk with each other with policy
    27  	// 4 - upgrade cilium to `k8s1:5000/cilium/cilium-dev:latest`
    28  	// 5 - make endpoints talk with each other with policy
    29  	// 6 - downgrade cilium to `cilium:v${LATEST_STABLE}`
    30  	// 7 - make endpoints talk with each other with policy
    31  	// 8 - delete all pods. Clean cilium, this can be, and should be achieved by
    32  	// `clean-cilium-state: "true"` option that we have in configmap.
    33  	// This makes sure the upgrade tests won't affect any other test
    34  	// 9 - re install cilium:latest image for remaining tests.
    35  
    36  	var (
    37  		kubectl *helpers.Kubectl
    38  
    39  		cleanupCallback = func() { return }
    40  	)
    41  
    42  	BeforeAll(func() {
    43  		kubectl = helpers.CreateKubectl(helpers.K8s1VMName(), logger)
    44  
    45  		_ = kubectl.Delete(helpers.DNSDeployment())
    46  
    47  		kubectl.Delete(migrateSVCClient)
    48  		kubectl.Delete(migrateSVCServer)
    49  		kubectl.Delete(l7Policy)
    50  		kubectl.Delete(demoPath)
    51  
    52  		// Delete kube-dns because if not will be a restore the old endpoints
    53  		// from master instead of create the new ones.
    54  		_ = kubectl.DeleteResource(
    55  			"deploy", fmt.Sprintf("-n %s kube-dns", helpers.KubeSystemNamespace))
    56  
    57  		_ = kubectl.DeleteResource(
    58  			"deploy", fmt.Sprintf("-n %s cilium-operator", helpers.KubeSystemNamespace))
    59  		// Sometimes PolicyGen has a lot of pods running around without delete
    60  		// it. Using this we are sure that we delete before this test start
    61  		kubectl.Exec(fmt.Sprintf(
    62  			"%s delete --all pods,svc,cnp -n %s", helpers.KubectlCmd, helpers.DefaultNamespace))
    63  
    64  		kubectl.DeleteETCDOperator()
    65  
    66  		ExpectAllPodsTerminated(kubectl)
    67  	})
    68  
    69  	AfterAll(func() {
    70  		kubectl.CloseSSHClient()
    71  	})
    72  
    73  	AfterFailed(func() {
    74  		kubectl.CiliumReport(helpers.KubeSystemNamespace, "cilium endpoint list")
    75  	})
    76  
    77  	JustAfterEach(func() {
    78  		kubectl.ValidateNoErrorsInLogs(CurrentGinkgoTestDescription().Duration)
    79  	})
    80  
    81  	AfterEach(func() {
    82  		cleanupCallback()
    83  		ExpectAllPodsTerminated(kubectl)
    84  	})
    85  
    86  	It("Tests upgrade and downgrade from a Cilium stable image to master", func() {
    87  		var assertUpgradeSuccessful func()
    88  		assertUpgradeSuccessful, cleanupCallback =
    89  			InstallAndValidateCiliumUpgrades(kubectl, helpers.CiliumStableVersion, helpers.CiliumDeveloperImage)
    90  		assertUpgradeSuccessful()
    91  	})
    92  })
    93  
    94  // InstallAndValidateCiliumUpgrades installs and tests if the oldVersion can be
    95  // upgrade to the newVersion and if the newVersion can be downgraded to the
    96  // oldVersion.  It returns two callbacks, the first one is the assertfunction
    97  // that need to run, and the second one are the cleanup actions
    98  func InstallAndValidateCiliumUpgrades(kubectl *helpers.Kubectl, oldVersion, newVersion string) (func(), func()) {
    99  	canRun, err := helpers.CanRunK8sVersion(oldVersion, helpers.GetCurrentK8SEnv())
   100  	ExpectWithOffset(1, err).To(BeNil(), "Unable to get k8s constraints for %s", oldVersion)
   101  	if !canRun {
   102  		Skip(fmt.Sprintf(
   103  			"Cilium %q is not supported in K8s %q. Skipping upgrade/downgrade tests.",
   104  			oldVersion, helpers.GetCurrentK8SEnv()))
   105  		return func() {}, func() {}
   106  	}
   107  
   108  	SkipIfFlannel()
   109  
   110  	apps := []string{helpers.App1, helpers.App2, helpers.App3}
   111  	app1Service := "app1-service"
   112  
   113  	cleanupCallback := func() {
   114  		kubectl.Delete(migrateSVCClient)
   115  		kubectl.Delete(migrateSVCServer)
   116  		kubectl.Delete(l7Policy)
   117  		kubectl.Delete(demoPath)
   118  
   119  		kubectl.DeleteETCDOperator()
   120  
   121  		ExpectAllPodsTerminated(kubectl)
   122  
   123  		// make sure we clean everything up before doing any other test
   124  		err := kubectl.CiliumInstall([]string{
   125  			"--set global.cleanState=true",
   126  		})
   127  
   128  		ExpectWithOffset(1, err).To(BeNil(), "Cilium %q was not able to be deployed", newVersion)
   129  		err = kubectl.WaitForCiliumInitContainerToFinish()
   130  		ExpectWithOffset(1, err).To(BeNil(), "Cilium %q was not able to be clean up environment", newVersion)
   131  
   132  		if res := kubectl.Delete(helpers.DNSDeployment()); !res.WasSuccessful() {
   133  			log.Warningf("Unable to delete CoreDNS deployment: %s", res.OutputPrettyPrint())
   134  		}
   135  
   136  		if err := kubectl.CiliumUninstall([]string{}); err != nil {
   137  			log.WithError(err).Warning("Unable to uninstall Cilium")
   138  		}
   139  	}
   140  
   141  	testfunc := func() {
   142  		By("Deleting Cilium, CoreDNS, and etcd-operator...")
   143  		// Making sure that we deleted the  cilium ds. No assert
   144  		// message because maybe is not present
   145  		if res := kubectl.DeleteResource("ds", fmt.Sprintf("-n %s cilium", helpers.KubeSystemNamespace)); !res.WasSuccessful() {
   146  			log.Warningf("Unable to delete Cilium DaemonSet: %s", res.OutputPrettyPrint())
   147  		}
   148  
   149  		// Delete kube-dns because if not will be a restore the old
   150  		// endpoints from master instead of create the new ones.
   151  		if res := kubectl.Delete(helpers.DNSDeployment()); !res.WasSuccessful() {
   152  			log.Warningf("Unable to delete CoreDNS deployment: %s", res.OutputPrettyPrint())
   153  		}
   154  
   155  		// Delete all etcd pods otherwise they will be kept running but
   156  		// the bpf endpoints will be cleaned up when we restart cilium
   157  		// with a clean state a couple lines bellow
   158  		kubectl.DeleteETCDOperator()
   159  
   160  		By("Waiting for pods to be terminated..")
   161  		ExpectAllPodsTerminated(kubectl)
   162  
   163  		By("Cleaning Cilium state")
   164  		err = kubectl.CiliumInstallVersion(
   165  			"cilium-ds-clean-only.yaml",
   166  			"cilium-cm-patch-clean-cilium-state.yaml",
   167  			oldVersion,
   168  		)
   169  		Expect(err).To(BeNil(), "Cilium %q was not able to be deployed", oldVersion)
   170  
   171  		err := kubectl.WaitforPods(helpers.KubeSystemNamespace, "-l k8s-app=cilium", longTimeout)
   172  		ExpectWithOffset(1, err).Should(BeNil(), "Cleaning state did not complete in time")
   173  
   174  		By("Deploying Cilium")
   175  		err = kubectl.CiliumInstallVersion(
   176  			helpers.CiliumDefaultDSPatch,
   177  			"cilium-cm-patch.yaml",
   178  			oldVersion,
   179  		)
   180  		Expect(err).To(BeNil(), "Cilium %q was not able to be deployed", oldVersion)
   181  
   182  		By("Installing kube-dns")
   183  		_ = kubectl.ApplyDefault(helpers.DNSDeployment())
   184  
   185  		// Cilium is only ready if kvstore is ready, the kvstore is ready if
   186  		// kube-dns is running.
   187  		By("Cilium %q is installed and running", oldVersion)
   188  		ExpectCiliumReady(kubectl)
   189  
   190  		ExpectETCDOperatorReady(kubectl)
   191  		ExpectCiliumOperatorReady(kubectl)
   192  
   193  		By("Installing Microscope")
   194  		microscopeErr, microscopeCancel := kubectl.MicroscopeStart()
   195  		ExpectWithOffset(1, microscopeErr).To(BeNil(), "Microscope cannot be started")
   196  		defer microscopeCancel()
   197  
   198  		validatedImage := func(image string) {
   199  			By("Checking that installed image is %q", image)
   200  
   201  			filter := `{.items[*].status.containerStatuses[0].image}`
   202  			data, err := kubectl.GetPods(
   203  				helpers.KubeSystemNamespace, "-l k8s-app=cilium").Filter(filter)
   204  			ExpectWithOffset(1, err).To(BeNil(), "Cannot get cilium pods")
   205  
   206  			for _, val := range strings.Split(data.String(), " ") {
   207  				ExpectWithOffset(1, val).To(ContainSubstring(image), "Cilium image didn't update correctly")
   208  			}
   209  		}
   210  
   211  		validateEndpointsConnection := func() {
   212  			By("Validate that endpoints are ready before making any connection")
   213  			err := kubectl.CiliumEndpointWaitReady()
   214  			ExpectWithOffset(1, err).To(BeNil(), "Endpoints are not ready after timeout")
   215  
   216  			ExpectKubeDNSReady(kubectl)
   217  
   218  			err = kubectl.WaitForKubeDNSEntry(app1Service, helpers.DefaultNamespace)
   219  			ExpectWithOffset(1, err).To(BeNil(), "DNS entry is not ready after timeout")
   220  
   221  			appPods := helpers.GetAppPods(apps, helpers.DefaultNamespace, kubectl, "id")
   222  
   223  			err = kubectl.WaitForKubeDNSEntry(app1Service, helpers.DefaultNamespace)
   224  			ExpectWithOffset(1, err).To(BeNil(), "DNS entry is not ready after timeout")
   225  
   226  			By("Making L7 requests between endpoints")
   227  			res := kubectl.ExecPodCmd(
   228  				helpers.DefaultNamespace, appPods[helpers.App2],
   229  				helpers.CurlFail("http://%s/public", app1Service))
   230  			ExpectWithOffset(1, res).Should(helpers.CMDSuccess(), "Cannot curl app1-service")
   231  
   232  			res = kubectl.ExecPodCmd(
   233  				helpers.DefaultNamespace, appPods[helpers.App2],
   234  				helpers.CurlFail("http://%s/private", app1Service))
   235  			ExpectWithOffset(1, res).ShouldNot(helpers.CMDSuccess(), "Expect a 403 from app1-service")
   236  		}
   237  
   238  		// checkNoInteruptsInSVCFlows checks whether there are no
   239  		// interrupts in established connections to the migrate-svc service
   240  		// after Cilium has been upgraded / downgraded.
   241  		//
   242  		// The check is based on restart count of the Pods. We can do it so, because
   243  		// any interrupt in the flow makes a client to panic which makes the Pod
   244  		// to restart.
   245  		lastCount := -1
   246  		checkNoInteruptsInSVCFlows := func() {
   247  			By("No interrupts in migrated svc flows")
   248  
   249  			filter := `{.items[*].status.containerStatuses[0].restartCount}`
   250  			restartCount, err := kubectl.GetPods(helpers.DefaultNamespace,
   251  				"-l zgroup=migrate-svc").Filter(filter)
   252  			ExpectWithOffset(1, err).To(BeNil(), "Failed to query \"migrate-svc-server\" Pod")
   253  
   254  			currentCount := 0
   255  			for _, c := range strings.Split(restartCount.String(), " ") {
   256  				count, err := strconv.Atoi(c)
   257  				ExpectWithOffset(1, err).To(BeNil(), "Failed to convert count value")
   258  				currentCount += count
   259  			}
   260  			// The check is invoked for the first time
   261  			if lastCount == -1 {
   262  				lastCount = currentCount
   263  			}
   264  			Expect(lastCount).Should(BeIdenticalTo(currentCount),
   265  				"migrate-svc restart count values do not match")
   266  		}
   267  
   268  		By("Creating some endpoints and L7 policy")
   269  
   270  		res := kubectl.ApplyDefault(demoPath)
   271  		ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "cannot apply dempo application")
   272  
   273  		err = kubectl.WaitforPods(helpers.DefaultNamespace, "-l zgroup=testapp", timeout)
   274  		Expect(err).Should(BeNil(), "Test pods are not ready after timeout")
   275  
   276  		ExpectKubeDNSReady(kubectl)
   277  
   278  		_, err = kubectl.CiliumPolicyAction(
   279  			helpers.DefaultNamespace, l7Policy, helpers.KubectlApply, timeout)
   280  		Expect(err).Should(BeNil(), "cannot import l7 policy: %v", l7Policy)
   281  
   282  		By("Creating service and clients for migration")
   283  
   284  		res = kubectl.ApplyDefault(migrateSVCServer)
   285  		ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "cannot apply migrate-svc-server")
   286  		err = kubectl.WaitforPods(helpers.DefaultNamespace, "-l app=migrate-svc-server", timeout)
   287  		Expect(err).Should(BeNil(), "migrate-svc-server pods are not ready after timeout")
   288  
   289  		res = kubectl.ApplyDefault(migrateSVCClient)
   290  		ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "cannot apply migrate-svc-client")
   291  		err = kubectl.WaitforPods(helpers.DefaultNamespace, "-l app=migrate-svc-client", timeout)
   292  		Expect(err).Should(BeNil(), "migrate-svc-client pods are not ready after timeout")
   293  
   294  		validateEndpointsConnection()
   295  		checkNoInteruptsInSVCFlows()
   296  
   297  		waitForUpdateImage := func(image string) func() bool {
   298  			return func() bool {
   299  				pods, err := kubectl.GetCiliumPods(helpers.KubeSystemNamespace)
   300  				if err != nil {
   301  					return false
   302  				}
   303  
   304  				filter := `{.items[*].status.containerStatuses[0].image}`
   305  				data, err := kubectl.GetPods(
   306  					helpers.KubeSystemNamespace, "-l k8s-app=cilium").Filter(filter)
   307  				if err != nil {
   308  					return false
   309  				}
   310  				number := strings.Count(data.String(), image)
   311  				if number == len(pods) {
   312  					return true
   313  				}
   314  				log.Infof("Only '%v' of '%v' cilium pods updated to the new image",
   315  					number, len(pods))
   316  				return false
   317  			}
   318  		}
   319  
   320  		By("Install Cilium pre-flight check DaemonSet")
   321  
   322  		res = kubectl.ExecMiddle("helm template " +
   323  			helpers.HelmTemplate + " " +
   324  			"--namespace=kube-system " +
   325  			"--set preflight.enabled=true " +
   326  			fmt.Sprintf("--set preflight.image=%s ", helpers.CiliumDeveloperImage) +
   327  			"--set agent.enabled=false " +
   328  			"--set config.enabled=false " +
   329  			"--set operator.enabled=false " +
   330  			"> cilium-preflight.yaml")
   331  		ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "Unable to generate preflight YAML")
   332  
   333  		res = kubectl.ApplyDefault("cilium-preflight.yaml")
   334  		ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "Unable to deploy preflight manifest")
   335  		ExpectCiliumPreFlightInstallReady(kubectl)
   336  
   337  		// Once they are installed we can remove it
   338  		By("Removing Cilium pre-flight check DaemonSet")
   339  		kubectl.Delete("cilium-preflight.yaml")
   340  
   341  		// Need to run using the kvstore-based allocator because upgrading from
   342  		// kvstore-based allocator to CRD-based allocator is not currently
   343  		// supported at this time.
   344  		By("Installing Cilium using kvstore-based allocator")
   345  		err = kubectl.CiliumInstall([]string{
   346  			"--set global.identityAllocationMode=kvstore",
   347  			"--set global.etcd.enabled=true",
   348  			"--set global.etcd.managed=true",
   349  		})
   350  		ExpectWithOffset(1, err).To(BeNil(), "Cilium %q was not able to be deployed", newVersion)
   351  
   352  		err = helpers.WithTimeout(
   353  			waitForUpdateImage(newVersion),
   354  			"Cilium Pods are not updating correctly",
   355  			&helpers.TimeoutConfig{Timeout: timeout})
   356  		ExpectWithOffset(1, err).To(BeNil(), "Pods are not updating")
   357  
   358  		err = kubectl.WaitforPods(
   359  			helpers.KubeSystemNamespace, "-l k8s-app=cilium", timeout)
   360  		ExpectWithOffset(1, err).Should(BeNil(), "Cilium is not ready after timeout")
   361  
   362  		validatedImage(newVersion)
   363  		ExpectCiliumReady(kubectl)
   364  		ExpectCiliumOperatorReady(kubectl)
   365  
   366  		validateEndpointsConnection()
   367  		checkNoInteruptsInSVCFlows()
   368  
   369  		By("Downgrading cilium to %s image", oldVersion)
   370  
   371  		err = kubectl.CiliumInstallVersion(
   372  			helpers.CiliumDefaultDSPatch,
   373  			helpers.CiliumConfigMapPatch,
   374  			oldVersion,
   375  		)
   376  		ExpectWithOffset(1, err).To(BeNil(), "Cilium %q was not able to be deployed", oldVersion)
   377  
   378  		err = helpers.WithTimeout(
   379  			waitForUpdateImage(oldVersion),
   380  			"Cilium Pods are not updating correctly",
   381  			&helpers.TimeoutConfig{Timeout: timeout})
   382  		ExpectWithOffset(1, err).To(BeNil(), "Pods are not updating")
   383  
   384  		err = kubectl.WaitforPods(
   385  			helpers.KubeSystemNamespace, "-l k8s-app=cilium", timeout)
   386  		ExpectWithOffset(1, err).Should(BeNil(), "Cilium is not ready after timeout")
   387  
   388  		validatedImage(oldVersion)
   389  		ExpectCiliumOperatorReady(kubectl)
   390  
   391  		validateEndpointsConnection()
   392  		checkNoInteruptsInSVCFlows()
   393  	}
   394  	return testfunc, cleanupCallback
   395  }