github.com/rancher/elemental/tests@v0.0.0-20240517125144-ae048c615b3f/e2e/upgrade_test.go (about)

     1  /*
     2  Copyright © 2022 - 2024 SUSE LLC
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7      http://www.apache.org/licenses/LICENSE-2.0
     8  Unless required by applicable law or agreed to in writing, software
     9  distributed under the License is distributed on an "AS IS" BASIS,
    10  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  See the License for the specific language governing permissions and
    12  limitations under the License.
    13  */
    14  
    15  package e2e_test
    16  
    17  import (
    18  	"os"
    19  	"os/exec"
    20  	"strconv"
    21  	"strings"
    22  	"sync"
    23  	"time"
    24  
    25  	. "github.com/onsi/ginkgo/v2"
    26  	. "github.com/onsi/gomega"
    27  	"github.com/rancher-sandbox/ele-testhelpers/kubectl"
    28  	"github.com/rancher-sandbox/ele-testhelpers/rancher"
    29  	"github.com/rancher-sandbox/ele-testhelpers/tools"
    30  	"github.com/rancher/elemental/tests/e2e/helpers/elemental"
    31  )
    32  
    33  var _ = Describe("E2E - Upgrading Elemental Operator", Label("upgrade-operator"), func() {
    34  	// Create kubectl context
    35  	// Default timeout is too small, so New() cannot be used
    36  	k := &kubectl.Kubectl{
    37  		Namespace:    "",
    38  		PollTimeout:  tools.SetTimeout(300 * time.Second),
    39  		PollInterval: 500 * time.Millisecond,
    40  	}
    41  
    42  	It("Upgrade operator", func() {
    43  		// Report to Qase
    44  		testCaseID = 71
    45  
    46  		// Check if CRDs chart is already installed (not always the case in older versions)
    47  		chartList, err := exec.Command("helm",
    48  			"list",
    49  			"--no-headers",
    50  			"--namespace", "cattle-elemental-system",
    51  		).CombinedOutput()
    52  		Expect(err).To(Not(HaveOccurred()))
    53  
    54  		upgradeOrder := []string{"elemental-operator-crds", "elemental-operator"}
    55  		if !strings.Contains(string(chartList), "-crds") {
    56  			upgradeOrder = []string{"elemental-operator", "elemental-operator-crds"}
    57  		}
    58  
    59  		for _, chart := range upgradeOrder {
    60  			RunHelmCmdWithRetry(
    61  				"upgrade", "--install", chart,
    62  				operatorUpgrade+"/"+chart+"-chart",
    63  				"--namespace", "cattle-elemental-system",
    64  				"--create-namespace",
    65  				"--wait", "--wait-for-jobs",
    66  			)
    67  
    68  			// Delay few seconds for all to be installed
    69  			time.Sleep(tools.SetTimeout(20 * time.Second))
    70  		}
    71  
    72  		// Wait for all pods to be started
    73  		Eventually(func() error {
    74  			return rancher.CheckPod(k, [][]string{{"cattle-elemental-system", "app=elemental-operator"}})
    75  		}, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil())
    76  	})
    77  })
    78  
    79  var _ = Describe("E2E - Upgrading Rancher Manager", Label("upgrade-rancher-manager"), func() {
    80  	// Create kubectl context
    81  	// Default timeout is too small, so New() cannot be used
    82  	k := &kubectl.Kubectl{
    83  		Namespace:    "",
    84  		PollTimeout:  tools.SetTimeout(300 * time.Second),
    85  		PollInterval: 500 * time.Millisecond,
    86  	}
    87  
    88  	It("Upgrade Rancher Manager", func() {
    89  		// Report to Qase
    90  		testCaseID = 72
    91  
    92  		// Get before-upgrade Rancher Manager version
    93  		getImageVersion := []string{
    94  			"get", "pod",
    95  			"--namespace", "cattle-system",
    96  			"-l", "app=rancher",
    97  			"-o", "jsonpath={.items[*].status.containerStatuses[*].image}",
    98  		}
    99  		versionBeforeUpgrade, err := kubectl.RunWithoutErr(getImageVersion...)
   100  		Expect(err).To(Not(HaveOccurred()))
   101  
   102  		// Upgrade Rancher Manager
   103  		// NOTE: Don't check the status, we can have false-positive here...
   104  		//       Better to check the rollout after the upgrade, it will fail if the upgrade failed
   105  		_ = rancher.DeployRancherManager(
   106  			rancherHostname,
   107  			rancherUpgradeChannel,
   108  			rancherUpgradeVersion,
   109  			rancherUpgradeHeadVersion,
   110  			caType,
   111  			proxy,
   112  		)
   113  
   114  		// Wait for Rancher Manager to be restarted
   115  		// NOTE: 1st or 2nd rollout command can sporadically fail, so better to use Eventually here
   116  		Eventually(func() string {
   117  			status, _ := kubectl.RunWithoutErr(
   118  				"rollout",
   119  				"--namespace", "cattle-system",
   120  				"status", "deployment/rancher",
   121  			)
   122  			return status
   123  		}, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(ContainSubstring("successfully rolled out"))
   124  
   125  		// Check that all Rancher Manager pods are running
   126  		Eventually(func() error {
   127  			checkList := [][]string{
   128  				{"cattle-system", "app=rancher"},
   129  				{"cattle-fleet-local-system", "app=fleet-agent"},
   130  				{"cattle-system", "app=rancher-webhook"},
   131  			}
   132  			return rancher.CheckPod(k, checkList)
   133  		}, tools.SetTimeout(3*time.Minute), 10*time.Second).Should(Not(HaveOccurred()))
   134  
   135  		// A bit dirty be better to wait a little here for all to be correctly started
   136  		time.Sleep(2 * time.Minute)
   137  
   138  		// Check that all pods are using the same version
   139  		Eventually(func() int {
   140  			out, _ := kubectl.RunWithoutErr(getImageVersion...)
   141  			return len(strings.Fields(out))
   142  		}, tools.SetTimeout(3*time.Minute), 10*time.Second).Should(Equal(1))
   143  
   144  		// Get after-upgrade Rancher Manager version
   145  		// and check that it's different to the before-upgrade version
   146  		versionAfterUpgrade, err := kubectl.RunWithoutErr(getImageVersion...)
   147  		Expect(err).To(Not(HaveOccurred()))
   148  		Expect(versionAfterUpgrade).To(Not(Equal(versionBeforeUpgrade)))
   149  	})
   150  })
   151  
   152  var _ = Describe("E2E - Upgrading node", Label("upgrade-node"), func() {
   153  	var (
   154  		value        string
   155  		valueToCheck string
   156  		wg           sync.WaitGroup
   157  	)
   158  
   159  	It("Upgrade node", func() {
   160  		// Report to Qase
   161  		testCaseID = 73
   162  
   163  		By("Checking if upgrade type is set", func() {
   164  			Expect(upgradeType).To(Not(BeEmpty()))
   165  		})
   166  
   167  		for index := vmIndex; index <= numberOfVMs; index++ {
   168  			// Set node hostname
   169  			hostName := elemental.SetHostname(vmNameRoot, index)
   170  			Expect(hostName).To(Not(BeEmpty()))
   171  
   172  			// Get node information
   173  			client, _ := GetNodeInfo(hostName)
   174  			Expect(client).To(Not(BeNil()))
   175  
   176  			// Execute node deployment in parallel
   177  			wg.Add(1)
   178  			go func(h string, cl *tools.Client) {
   179  				defer wg.Done()
   180  				defer GinkgoRecover()
   181  
   182  				By("Checking OS version on "+h+" before upgrade", func() {
   183  					out := RunSSHWithRetry(cl, "cat /etc/os-release")
   184  					GinkgoWriter.Printf("OS Version on %s:\n%s\n", h, out)
   185  				})
   186  			}(hostName, client)
   187  		}
   188  
   189  		// Wait for all parallel jobs
   190  		wg.Wait()
   191  
   192  		By("Triggering Upgrade in Rancher with "+upgradeType, func() {
   193  			// Set temporary file
   194  			upgradeTmp, err := tools.CreateTemp("upgrade")
   195  			Expect(err).To(Not(HaveOccurred()))
   196  			defer os.Remove(upgradeTmp)
   197  
   198  			if upgradeType == "managedOSVersionName" {
   199  				// Get OSVersion name
   200  				OSVersion, err := exec.Command(getOSScript, upgradeOSChannel).Output()
   201  				Expect(err).To(Not(HaveOccurred()))
   202  
   203  				// In case of sync failure OSVersion can be empty,
   204  				// so try to force the sync before aborting
   205  				if string(OSVersion) == "" {
   206  					const channel = "elemental-channel"
   207  
   208  					// Log the workaround, could be useful
   209  					GinkgoWriter.Printf("!! ManagedOSVersionChannel not synced !! Triggering a re-sync!\n")
   210  
   211  					// Get current syncInterval
   212  					syncValue, err := kubectl.RunWithoutErr("get", "managedOSVersionChannel",
   213  						"--namespace", clusterNS, channel,
   214  						"-o", "jsonpath={.spec.syncInterval}")
   215  					Expect(err).To(Not(HaveOccurred()))
   216  					Expect(syncValue).To(Not(BeEmpty()))
   217  
   218  					// Reduce syncInterval to force an update
   219  					_, err = kubectl.RunWithoutErr("patch", "managedOSVersionChannel",
   220  						"--namespace", clusterNS, channel,
   221  						"--type", "merge",
   222  						"--patch", "{\"spec\":{\"syncInterval\":\"1m\"}}")
   223  					Expect(err).To(Not(HaveOccurred()))
   224  
   225  					// Loop until sync is done
   226  					Eventually(func() string {
   227  						value, _ := exec.Command(getOSScript, upgradeOSChannel).Output()
   228  
   229  						return string(value)
   230  					}, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(Not(BeEmpty()))
   231  
   232  					// We should now have an OS version!
   233  					OSVersion, err = exec.Command(getOSScript, upgradeOSChannel).Output()
   234  					Expect(err).To(Not(HaveOccurred()))
   235  					Expect(OSVersion).To(Not(BeEmpty()))
   236  
   237  					// Re-patch syncInterval to the initial value
   238  					_, err = kubectl.RunWithoutErr("patch", "managedOSVersionChannel",
   239  						"--namespace", clusterNS, channel,
   240  						"--type", "merge",
   241  						"--patch", "{\"spec\":{\"syncInterval\":\""+syncValue+"\"}}")
   242  					Expect(err).To(Not(HaveOccurred()))
   243  				}
   244  
   245  				// Set OS image to use for upgrade
   246  				value = string(OSVersion)
   247  
   248  				// Extract the value to check after the upgrade
   249  				out, err := kubectl.RunWithoutErr("get", "ManagedOSVersion",
   250  					"--namespace", clusterNS, value,
   251  					"-o", "jsonpath={.spec.metadata.upgradeImage}")
   252  				Expect(err).To(Not(HaveOccurred()))
   253  				valueToCheck = tools.TrimStringFromChar(out, ":")
   254  			} else if upgradeType == "osImage" {
   255  				// Set OS image to use for upgrade
   256  				value = upgradeImage
   257  
   258  				// Extract the value to check after the upgrade
   259  				valueToCheck = tools.TrimStringFromChar(upgradeImage, ":")
   260  			}
   261  
   262  			// Add a nodeSelector if needed
   263  			if usedNodes == 1 {
   264  				// Set node hostname
   265  				hostName := elemental.SetHostname(vmNameRoot, vmIndex)
   266  				Expect(hostName).To(Not(BeEmpty()))
   267  
   268  				// Get node information
   269  				client, _ := GetNodeInfo(hostName)
   270  				Expect(client).To(Not(BeNil()))
   271  
   272  				// Get *REAL* hostname
   273  				hostname := RunSSHWithRetry(client, "hostname")
   274  				hostname = strings.Trim(hostname, "\n")
   275  
   276  				label := "kubernetes.io/hostname"
   277  				selector, err := elemental.AddSelector(label, hostname)
   278  				Expect(err).To(Not(HaveOccurred()), selector)
   279  
   280  				// Create new file for this specific upgrade
   281  				err = tools.AddDataToFile(upgradeSkelYaml, upgradeTmp, selector)
   282  				Expect(err).To(Not(HaveOccurred()))
   283  			} else {
   284  				// Use original file as-is
   285  				err := tools.CopyFile(upgradeSkelYaml, upgradeTmp)
   286  				Expect(err).To(Not(HaveOccurred()))
   287  			}
   288  
   289  			// Patterns to replace
   290  			patterns := []YamlPattern{
   291  				{
   292  					key:   "with-%UPGRADE_TYPE%",
   293  					value: strings.ToLower(upgradeType),
   294  				},
   295  				{
   296  					key:   "%UPGRADE_TYPE%",
   297  					value: upgradeType + ": " + value,
   298  				},
   299  				{
   300  					key:   "%CLUSTER_NAME%",
   301  					value: clusterName,
   302  				},
   303  				{
   304  					key:   "%FORCE_DOWNGRADE%",
   305  					value: strconv.FormatBool(forceDowngrade),
   306  				},
   307  			}
   308  
   309  			// Create Yaml file
   310  			for _, p := range patterns {
   311  				err := tools.Sed(p.key, p.value, upgradeTmp)
   312  				Expect(err).To(Not(HaveOccurred()))
   313  			}
   314  
   315  			// Apply the generated file
   316  			err = kubectl.Apply(clusterNS, upgradeTmp)
   317  			Expect(err).To(Not(HaveOccurred()))
   318  		})
   319  
   320  		for index := vmIndex; index <= numberOfVMs; index++ {
   321  			// Set node hostname
   322  			hostName := elemental.SetHostname(vmNameRoot, index)
   323  			Expect(hostName).To(Not(BeEmpty()))
   324  
   325  			// Get node information
   326  			client, _ := GetNodeInfo(hostName)
   327  			Expect(client).To(Not(BeNil()))
   328  
   329  			// Execute node deployment in parallel
   330  			wg.Add(1)
   331  			go func(h string, cl *tools.Client) {
   332  				defer wg.Done()
   333  				defer GinkgoRecover()
   334  
   335  				By("Checking VM upgrade on "+h, func() {
   336  					Eventually(func() string {
   337  						// Use grep here in case of comment in the file!
   338  						out, _ := cl.RunSSH("eval $(grep -v ^# /etc/os-release) && echo ${IMAGE}")
   339  
   340  						// This remove the version and keep only the repo, as in the file
   341  						// we have the exact version and we don't know it before the upgrade
   342  						return tools.TrimStringFromChar(strings.Trim(out, "\n"), ":")
   343  					}, tools.SetTimeout(5*time.Minute), 30*time.Second).Should(Equal(valueToCheck))
   344  				})
   345  
   346  				By("Checking OS version on "+h+" after upgrade", func() {
   347  					out := RunSSHWithRetry(cl, "cat /etc/os-release")
   348  					GinkgoWriter.Printf("OS Version on %s:\n%s\n", h, out)
   349  				})
   350  			}(hostName, client)
   351  		}
   352  
   353  		// Wait for all parallel jobs
   354  		wg.Wait()
   355  
   356  		By("Checking cluster state after upgrade", func() {
   357  			WaitCluster(clusterNS, clusterName)
   358  		})
   359  	})
   360  })