github.com/rancher/elemental/tests@v0.0.0-20240517125144-ae048c615b3f/e2e/suite_test.go (about)

     1  /*
     2  Copyright © 2022 - 2024 SUSE LLC
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7      http://www.apache.org/licenses/LICENSE-2.0
     8  Unless required by applicable law or agreed to in writing, software
     9  distributed under the License is distributed on an "AS IS" BASIS,
    10  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  See the License for the specific language governing permissions and
    12  limitations under the License.
    13  */
    14  
    15  package e2e_test
    16  
    17  import (
    18  	"os"
    19  	"strconv"
    20  	"strings"
    21  	"testing"
    22  	"time"
    23  
    24  	. "github.com/onsi/ginkgo/v2"
    25  	. "github.com/onsi/gomega"
    26  	"github.com/rancher-sandbox/ele-testhelpers/kubectl"
    27  	"github.com/rancher-sandbox/ele-testhelpers/rancher"
    28  	"github.com/rancher-sandbox/ele-testhelpers/tools"
    29  	. "github.com/rancher-sandbox/qase-ginkgo"
    30  	"github.com/rancher/elemental/tests/e2e/helpers/elemental"
    31  )
    32  
    33  const (
    34  	airgapBuildScript     = "../scripts/build-airgap"
    35  	appYaml               = "../assets/hello-world_app.yaml"
    36  	backupYaml            = "../assets/backup.yaml"
    37  	ciTokenYaml           = "../assets/local-kubeconfig-token-skel.yaml"
    38  	configPrivateCAScript = "../scripts/config-private-ca"
    39  	configRKE2Yaml        = "../assets/config_rke2.yaml"
    40  	dumbRegistrationYaml  = "../assets/dumb_machineRegistration.yaml"
    41  	emulateTPMYaml        = "../assets/emulateTPM.yaml"
    42  	getOSScript           = "../scripts/get-name-from-managedosversion"
    43  	httpSrv               = "http://192.168.122.1:8000"
    44  	installConfigYaml     = "../../install-config.yaml"
    45  	installHardenedScript = "../scripts/config-hardened"
    46  	installVMScript       = "../scripts/install-vm"
    47  	localKubeconfigYaml   = "../assets/local-kubeconfig-skel.yaml"
    48  	numberOfNodesMax      = 30
    49  	resetMachineInv       = "../assets/reset_machine_inventory.yaml"
    50  	restoreYaml           = "../assets/restore.yaml"
    51  	upgradeSkelYaml       = "../assets/upgrade_skel.yaml"
    52  	userName              = "root"
    53  	userPassword          = "r0s@pwd1"
    54  	vmNameRoot            = "node"
    55  )
    56  
    57  var (
    58  	backupRestoreVersion      string
    59  	caType                    string
    60  	certManagerVersion        string
    61  	clusterName               string
    62  	clusterNS                 string
    63  	clusterType               string
    64  	clusterYaml               string
    65  	elementalSupport          string
    66  	emulateTPM                bool
    67  	forceDowngrade            bool
    68  	isoBoot                   bool
    69  	k8sUpstreamVersion        string
    70  	k8sDownstreamVersion      string
    71  	netDefaultFileName        string
    72  	numberOfClusters          int
    73  	numberOfVMs               int
    74  	operatorUpgrade           string
    75  	operatorRepo              string
    76  	os2Test                   string
    77  	poolType                  string
    78  	proxy                     string
    79  	rancherChannel            string
    80  	rancherHeadVersion        string
    81  	rancherHostname           string
    82  	rancherLogCollector       string
    83  	rancherVersion            string
    84  	rancherUpgrade            string
    85  	rancherUpgradeChannel     string
    86  	rancherUpgradeHeadVersion string
    87  	rancherUpgradeVersion     string
    88  	rawBoot                   bool
    89  	registrationYaml          string
    90  	seedImageYaml             string
    91  	selectorYaml              string
    92  	sequential                bool
    93  	snapType                  string
    94  	testCaseID                int64
    95  	testType                  string
    96  	upgradeImage              string
    97  	upgradeOSChannel          string
    98  	upgradeType               string
    99  	usedNodes                 int
   100  	vmIndex                   int
   101  	vmName                    string
   102  )
   103  
   104  /*
   105  Wait for cluster to be in a stable state
   106    - @param ns Namespace where the cluster is deployed
   107    - @param cn Cluster resource name
   108    - @returns Nothing, the function will fail through Ginkgo in case of issue
   109  */
   110  func WaitCluster(ns, cn string) {
   111  	type state struct {
   112  		conditionStatus string
   113  		conditionType   string
   114  	}
   115  
   116  	// List of conditions to check
   117  	states := []state{
   118  		{
   119  			conditionStatus: "True",
   120  			conditionType:   "AgentDeployed",
   121  		},
   122  		{
   123  			conditionStatus: "True",
   124  			conditionType:   "NoDiskPressure",
   125  		},
   126  		{
   127  			conditionStatus: "True",
   128  			conditionType:   "NoMemoryPressure",
   129  		},
   130  		{
   131  			conditionStatus: "True",
   132  			conditionType:   "Provisioned",
   133  		},
   134  		{
   135  			conditionStatus: "True",
   136  			conditionType:   "Ready",
   137  		},
   138  		{
   139  			conditionStatus: "False",
   140  			conditionType:   "Reconciling",
   141  		},
   142  		{
   143  			conditionStatus: "False",
   144  			conditionType:   "Stalled",
   145  		},
   146  		{
   147  			conditionStatus: "True",
   148  			conditionType:   "Updated",
   149  		},
   150  		{
   151  			conditionStatus: "True",
   152  			conditionType:   "Waiting",
   153  		},
   154  	}
   155  
   156  	// Check that the cluster is in Ready state (this means that it has been created)
   157  	Eventually(func() string {
   158  		status, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io",
   159  			"--namespace", ns, cn,
   160  			"-o", "jsonpath={.status.ready}")
   161  		return status
   162  	}, tools.SetTimeout(2*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(Equal("true"))
   163  
   164  	// Check that all needed conditions are in the good state
   165  	for _, s := range states {
   166  		counter := 0
   167  
   168  		Eventually(func() string {
   169  			status, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io",
   170  				"--namespace", ns, cn,
   171  				"-o", "jsonpath={.status.conditions[?(@.type==\""+s.conditionType+"\")].status}")
   172  
   173  			if status != s.conditionStatus {
   174  				// Show the status in case of issue, easier to debug (but log after 10 different issues)
   175  				// NOTE: it's not perfect but it's mainly a way to inform that the cluster took time to came up
   176  				counter++
   177  				if counter > 10 {
   178  					GinkgoWriter.Printf("!! Cluster status issue !! %s is %s instead of %s\n",
   179  						s.conditionType, status, s.conditionStatus)
   180  
   181  					// Reset counter
   182  					counter = 0
   183  				}
   184  
   185  				// Check if rancher-system-agent.service has some issue
   186  				if s.conditionType == "Provisioned" || s.conditionType == "Ready" || s.conditionType == "Updated" {
   187  					msg := "error applying plan -- check rancher-system-agent.service logs on node for more information"
   188  
   189  					// Extract the list of failed nodes
   190  					listIP, _ := kubectl.RunWithoutErr("get", "machine",
   191  						"--namespace", ns,
   192  						"-o", "jsonpath={.items[?(@.status.conditions[*].message==\""+msg+"\")].status.addresses[?(@.type==\"InternalIP\")].address}")
   193  
   194  					// We can try to restart the rancher-system-agent service on the failing node
   195  					// because sometimes it can fail just because of a sporadic/timeout issue and a restart can fix it!
   196  					for _, ip := range strings.Fields(listIP) {
   197  						if tools.IsIPv4(ip) {
   198  							// Set 'client' to be able to access the node through SSH
   199  							cl := &tools.Client{
   200  								Host:     ip + ":22",
   201  								Username: userName,
   202  								Password: userPassword,
   203  							}
   204  
   205  							// Log the workaround, could be useful
   206  							GinkgoWriter.Printf("!! rancher-system-agent issue !! Service has been restarted on node with IP %s\n", ip)
   207  
   208  							// Restart rancher-system-agent service on the node
   209  							// NOTE: wait a little to be sure that all is restarted before continuing
   210  							RunSSHWithRetry(cl, "systemctl restart rancher-system-agent.service")
   211  							time.Sleep(tools.SetTimeout(15 * time.Second))
   212  						}
   213  					}
   214  				}
   215  			}
   216  
   217  			return status
   218  		}, tools.SetTimeout(2*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(Equal(s.conditionStatus))
   219  	}
   220  }
   221  
   222  /*
   223  Check that Cluster resource has been correctly created
   224    - @param ns Namespace where the cluster is deployed
   225    - @param cn Cluster resource name
   226    - @returns Nothing, the function will fail through Ginkgo in case of issue
   227  */
   228  func CheckCreatedCluster(ns, cn string) {
   229  	// Check that the cluster is correctly created
   230  	Eventually(func() string {
   231  		out, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io",
   232  			"--namespace", ns,
   233  			cn, "-o", "jsonpath={.metadata.name}")
   234  		return out
   235  	}, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(Equal(cn))
   236  }
   237  
   238  /*
   239  Check that Cluster resource has been correctly created
   240    - @param ns Namespace where the cluster is deployed
   241    - @param rn MachineRegistration resource name
   242    - @returns Nothing, the function will fail through Ginkgo in case of issue
   243  */
   244  func CheckCreatedRegistration(ns, rn string) {
   245  	Eventually(func() string {
   246  		out, _ := kubectl.RunWithoutErr("get", "MachineRegistration",
   247  			"--namespace", clusterNS,
   248  			"-o", "jsonpath={.items[*].metadata.name}")
   249  		return out
   250  	}, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(ContainSubstring(rn))
   251  }
   252  
   253  /*
   254  Check that a SelectorTemplate resource has been correctly created
   255    - @param ns Namespace where the cluster is deployed
   256    - @param sn Selector name
   257    - @returns Nothing, the function will fail through Ginkgo in case of issue
   258  */
   259  func CheckCreatedSelectorTemplate(ns, sn string) {
   260  	Eventually(func() string {
   261  		out, _ := kubectl.RunWithoutErr("get", "MachineInventorySelectorTemplate",
   262  			"--namespace", ns,
   263  			"-o", "jsonpath={.items[*].metadata.name}")
   264  		return out
   265  	}, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(ContainSubstring(sn))
   266  }
   267  
   268  /*
   269  Wait for OSVersion to be populated
   270    - @param ns Namespace where the cluster is deployed
   271    - @returns Nothing, the function will fail through Ginkgo in case of issue
   272  */
   273  func WaitForOSVersion(ns string) {
   274  	Eventually(func() string {
   275  		out, _ := kubectl.RunWithoutErr("get", "ManagedOSVersion",
   276  			"--namespace", ns,
   277  			"-o", "jsonpath={.items[*].metadata.name}")
   278  		return out
   279  	}, tools.SetTimeout(2*time.Minute), 5*time.Second).Should(Not(BeEmpty()))
   280  }
   281  
   282  /*
   283  Check SSH connection
   284    - @param cl Client (node) informations
   285    - @returns Nothing, the function will fail through Ginkgo in case of issue
   286  */
   287  func CheckSSH(cl *tools.Client) {
   288  	Eventually(func() string {
   289  		out, _ := cl.RunSSH("echo SSH_OK")
   290  		return strings.Trim(out, "\n")
   291  	}, tools.SetTimeout(10*time.Minute), 5*time.Second).Should(Equal("SSH_OK"))
   292  }
   293  
   294  /*
   295  Download ISO built with SeedImage
   296    - @param ns Namespace where the cluster is deployed
   297    - @param seedName Name of the used SeedImage resource
   298    - @param filename Path and name of the file where to store the ISO
   299    - @returns Nothing, the function will fail through Ginkgo in case of issue
   300  */
   301  func DownloadBuiltISO(ns, seedName, filename string) {
   302  	// Set minimal ISO file to 500MB
   303  	const minimalISOSize = 500 * 1024 * 1024
   304  
   305  	// Check that the seed image is correctly created
   306  	Eventually(func() string {
   307  		out, _ := kubectl.RunWithoutErr("get", "SeedImage",
   308  			"--namespace", ns,
   309  			seedName,
   310  			"-o", "jsonpath={.status}")
   311  		return out
   312  	}, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(ContainSubstring("downloadURL"))
   313  
   314  	// Get URL
   315  	seedImageURL, err := kubectl.RunWithoutErr("get", "SeedImage",
   316  		"--namespace", ns,
   317  		seedName,
   318  		"-o", "jsonpath={.status.downloadURL}")
   319  	Expect(err).To(Not(HaveOccurred()))
   320  
   321  	// ISO file size should be greater than 500MB
   322  	Eventually(func() int64 {
   323  		// No need to check download status, file size at the end is enough
   324  		_ = tools.GetFileFromURL(seedImageURL, filename, false)
   325  		file, _ := os.Stat(filename)
   326  		return file.Size()
   327  	}, tools.SetTimeout(2*time.Minute), 10*time.Second).Should(BeNumerically(">", minimalISOSize))
   328  }
   329  
   330  /*
   331  Get Elemental node information
   332    - @param hn Node hostname
   333    - @returns Client structure and MAC address
   334  */
   335  func GetNodeInfo(hn string) (*tools.Client, string) {
   336  	// Get network data
   337  	data, err := rancher.GetHostNetConfig(".*name=\""+hn+"\".*", netDefaultFileName)
   338  	Expect(err).To(Not(HaveOccurred()))
   339  
   340  	// Set 'client' to be able to access the node through SSH
   341  	c := &tools.Client{
   342  		Host:     string(data.IP) + ":22",
   343  		Username: userName,
   344  		Password: userPassword,
   345  	}
   346  
   347  	return c, data.Mac
   348  }
   349  
   350  /*
   351  Get Elemental node IP address
   352    - @param hn Node hostname
   353    - @returns IP address
   354  */
   355  func GetNodeIP(hn string) string {
   356  	// Get network data
   357  	data, err := rancher.GetHostNetConfig(".*name=\""+hn+"\".*", netDefaultFileName)
   358  	Expect(err).To(Not(HaveOccurred()))
   359  
   360  	return data.IP
   361  }
   362  
   363  /*
   364  Execute RunHelmBinaryWithCustomErr within a loop with timeout
   365    - @param s options to pass to RunHelmBinaryWithCustomErr command
   366    - @returns Nothing, the function will fail through Ginkgo in case of issue
   367  */
   368  func RunHelmCmdWithRetry(s ...string) {
   369  	Eventually(func() error {
   370  		return kubectl.RunHelmBinaryWithCustomErr(s...)
   371  	}, tools.SetTimeout(2*time.Minute), 20*time.Second).Should(Not(HaveOccurred()))
   372  }
   373  
   374  /*
   375  Execute SSH command with retry
   376    - @param cl Client (node) informations
   377    - @param cmd Command to execute
   378    - @returns result of the executed command
   379  */
   380  func RunSSHWithRetry(cl *tools.Client, cmd string) string {
   381  	var err error
   382  	var out string
   383  
   384  	Eventually(func() error {
   385  		out, err = cl.RunSSH(cmd)
   386  		return err
   387  	}, tools.SetTimeout(2*time.Minute), 20*time.Second).Should(Not(HaveOccurred()))
   388  
   389  	return out
   390  }
   391  
   392  func FailWithReport(message string, callerSkip ...int) {
   393  	// Ensures the correct line numbers are reported
   394  	Fail(message, callerSkip[0]+1)
   395  }
   396  
   397  func TestE2E(t *testing.T) {
   398  	RegisterFailHandler(FailWithReport)
   399  	RunSpecs(t, "Elemental End-To-End Test Suite")
   400  }
   401  
   402  // Use to modify yaml templates
   403  type YamlPattern struct {
   404  	key   string
   405  	value string
   406  }
   407  
   408  var _ = BeforeSuite(func() {
   409  	backupRestoreVersion = os.Getenv("BACKUP_RESTORE_VERSION")
   410  	bootTypeString := os.Getenv("BOOT_TYPE")
   411  	caType = os.Getenv("CA_TYPE")
   412  	certManagerVersion = os.Getenv("CERT_MANAGER_VERSION")
   413  	clusterName = os.Getenv("CLUSTER_NAME")
   414  	clusterNS = os.Getenv("CLUSTER_NS")
   415  	clusterType = os.Getenv("CLUSTER_TYPE")
   416  	elementalSupport = os.Getenv("ELEMENTAL_SUPPORT")
   417  	eTPM := os.Getenv("EMULATE_TPM")
   418  	forceDowngradeString := os.Getenv("FORCE_DOWNGRADE")
   419  	index := os.Getenv("VM_INDEX")
   420  	k8sDownstreamVersion = os.Getenv("K8S_DOWNSTREAM_VERSION")
   421  	k8sUpstreamVersion = os.Getenv("K8S_UPSTREAM_VERSION")
   422  	number := os.Getenv("VM_NUMBERS")
   423  	clusterNumber := os.Getenv("CLUSTER_NUMBER")
   424  	operatorUpgrade = os.Getenv("OPERATOR_UPGRADE")
   425  	operatorRepo = os.Getenv("OPERATOR_REPO")
   426  	os2Test = os.Getenv("OS_TO_TEST")
   427  	poolType = os.Getenv("POOL")
   428  	proxy = os.Getenv("PROXY")
   429  	rancherHostname = os.Getenv("PUBLIC_FQDN")
   430  	rancherLogCollector = os.Getenv("RANCHER_LOG_COLLECTOR")
   431  	rancherVersion = os.Getenv("RANCHER_VERSION")
   432  	rancherUpgrade = os.Getenv("RANCHER_UPGRADE")
   433  	seqString := os.Getenv("SEQUENTIAL")
   434  	snapType = os.Getenv("SNAP_TYPE")
   435  	testType = os.Getenv("TEST_TYPE")
   436  	upgradeImage = os.Getenv("UPGRADE_IMAGE")
   437  	upgradeOSChannel = os.Getenv("UPGRADE_OS_CHANNEL")
   438  	upgradeType = os.Getenv("UPGRADE_TYPE")
   439  
   440  	// Only if VM_INDEX is set
   441  	if index != "" {
   442  		var err error
   443  		vmIndex, err = strconv.Atoi(index)
   444  		Expect(err).To(Not(HaveOccurred()))
   445  
   446  		// Set default hostname
   447  		vmName = elemental.SetHostname(vmNameRoot, vmIndex)
   448  	} else {
   449  		// Default value for vmIndex
   450  		vmIndex = 0
   451  	}
   452  
   453  	// Only if VM_NUMBER is set
   454  	if number != "" {
   455  		var err error
   456  		numberOfVMs, err = strconv.Atoi(number)
   457  		Expect(err).To(Not(HaveOccurred()))
   458  	} else {
   459  		// By default set to vmIndex
   460  		numberOfVMs = vmIndex
   461  	}
   462  
   463  	// Set number of "used" nodes
   464  	// NOTE: could be the number added nodes or the number of nodes to use/upgrade
   465  	usedNodes = (numberOfVMs - vmIndex) + 1
   466  
   467  	// Force correct value for emulateTPM
   468  	switch eTPM {
   469  	case "true":
   470  		emulateTPM = true
   471  	default:
   472  		emulateTPM = false
   473  	}
   474  
   475  	// Force correct value for sequential
   476  	switch seqString {
   477  	case "true":
   478  		sequential = true
   479  	default:
   480  		sequential = false
   481  	}
   482  
   483  	// Define boot type
   484  	switch bootTypeString {
   485  	case "iso":
   486  		isoBoot = true
   487  	case "raw":
   488  		rawBoot = true
   489  	}
   490  
   491  	// Force correct value for forceDowngrade
   492  	switch forceDowngradeString {
   493  	case "true":
   494  		forceDowngrade = true
   495  	default:
   496  		forceDowngrade = false
   497  	}
   498  
   499  	// Extract Rancher Manager channel/version to install
   500  	if rancherVersion != "" {
   501  		// Split rancherVersion and reset it
   502  		s := strings.Split(rancherVersion, "/")
   503  		rancherVersion = ""
   504  
   505  		// Get needed informations
   506  		rancherChannel = s[0]
   507  		if len(s) > 1 {
   508  			rancherVersion = s[1]
   509  		}
   510  		if len(s) > 2 {
   511  			rancherHeadVersion = s[2]
   512  		}
   513  	}
   514  
   515  	// Extract Rancher Manager channel/version to upgrade
   516  	if rancherUpgrade != "" {
   517  		// Split rancherUpgrade and reset it
   518  		s := strings.Split(rancherUpgrade, "/")
   519  
   520  		// Get needed informations
   521  		rancherUpgradeChannel = s[0]
   522  		if len(s) > 1 {
   523  			rancherUpgradeVersion = s[1]
   524  		}
   525  		if len(s) > 2 {
   526  			rancherUpgradeHeadVersion = s[2]
   527  		}
   528  	}
   529  
   530  	switch testType {
   531  	case "airgap":
   532  		// Enable airgap support
   533  		clusterYaml = "../assets/cluster-airgap.yaml"
   534  		netDefaultFileName = "../assets/net-default-airgap.xml"
   535  		registrationYaml = "../assets/machineRegistration.yaml"
   536  		seedImageYaml = "../assets/seedImage.yaml"
   537  		selectorYaml = "../assets/selector.yaml"
   538  	case "multi":
   539  		// Enable multi-cluster support
   540  		if clusterNumber != "" {
   541  			var err error
   542  			numberOfClusters, err = strconv.Atoi(clusterNumber)
   543  			Expect(err).To(Not(HaveOccurred()))
   544  		}
   545  
   546  		clusterYaml = "../assets/cluster-multi.yaml"
   547  		netDefaultFileName = "../assets/net-default.xml"
   548  		registrationYaml = "../assets/machineRegistration-multi.yaml"
   549  		seedImageYaml = "../assets/seedImage-multi.yaml"
   550  		selectorYaml = "../assets/selector-multi.yaml"
   551  	default:
   552  		// Default cluster support
   553  		clusterYaml = "../assets/cluster.yaml"
   554  		netDefaultFileName = "../assets/net-default.xml"
   555  		registrationYaml = "../assets/machineRegistration.yaml"
   556  		seedImageYaml = "../assets/seedImage.yaml"
   557  		selectorYaml = "../assets/selector.yaml"
   558  	}
   559  
   560  	// Start HTTP server
   561  	tools.HTTPShare("../..", ":8000")
   562  })
   563  
   564  var _ = ReportBeforeEach(func(report SpecReport) {
   565  	// Reset case ID
   566  	testCaseID = -1
   567  })
   568  
   569  var _ = ReportAfterEach(func(report SpecReport) {
   570  	// Add result in Qase if asked
   571  	Qase(testCaseID, report)
   572  })