github.com/rancher/elemental/tests@v0.0.0-20240517125144-ae048c615b3f/e2e/bootstrap_test.go (about)

     1  /*
     2  Copyright © 2022 - 2024 SUSE LLC
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7      http://www.apache.org/licenses/LICENSE-2.0
     8  Unless required by applicable law or agreed to in writing, software
     9  distributed under the License is distributed on an "AS IS" BASIS,
    10  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  See the License for the specific language governing permissions and
    12  limitations under the License.
    13  */
    14  
    15  package e2e_test
    16  
    17  import (
    18  	"os/exec"
    19  	"strings"
    20  	"sync"
    21  	"time"
    22  
    23  	. "github.com/onsi/ginkgo/v2"
    24  	. "github.com/onsi/gomega"
    25  	"github.com/rancher-sandbox/ele-testhelpers/kubectl"
    26  	"github.com/rancher-sandbox/ele-testhelpers/rancher"
    27  	"github.com/rancher-sandbox/ele-testhelpers/tools"
    28  	"github.com/rancher/elemental/tests/e2e/helpers/elemental"
    29  	"github.com/rancher/elemental/tests/e2e/helpers/misc"
    30  	"github.com/rancher/elemental/tests/e2e/helpers/network"
    31  )
    32  
    33  func checkClusterAgent(client *tools.Client) {
    34  	// cluster-agent is the pod that communicates to Rancher, wait for it before continuing
    35  	Eventually(func() string {
    36  		out, _ := client.RunSSH("kubectl get pod -n cattle-system -l app=cattle-cluster-agent")
    37  		return out
    38  	}, tools.SetTimeout(5*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(ContainSubstring("Running"))
    39  }
    40  
    41  var _ = Describe("E2E - Bootstrapping node", Label("bootstrap"), func() {
    42  	var (
    43  		bootstrappedNodes int
    44  		wg                sync.WaitGroup
    45  	)
    46  
    47  	It("Provision the node", func() {
    48  		// Report to Qase
    49  		testCaseID = 9
    50  
    51  		if !isoBoot && !rawBoot {
    52  			By("Downloading MachineRegistration file", func() {
    53  				// Download the new YAML installation config file
    54  				machineRegName := "machine-registration-" + poolType + "-" + clusterName
    55  				tokenURL, err := kubectl.RunWithoutErr("get", "MachineRegistration",
    56  					"--namespace", clusterNS, machineRegName,
    57  					"-o", "jsonpath={.status.registrationURL}")
    58  				Expect(err).To(Not(HaveOccurred()))
    59  
    60  				Eventually(func() error {
    61  					return tools.GetFileFromURL(tokenURL, installConfigYaml, false)
    62  				}, tools.SetTimeout(2*time.Minute), 10*time.Second).ShouldNot(HaveOccurred())
    63  			})
    64  
    65  			By("Configuring iPXE boot script for network installation", func() {
    66  				numberOfFile, err := network.ConfigureiPXE(httpSrv)
    67  				Expect(err).To(Not(HaveOccurred()))
    68  				Expect(numberOfFile).To(BeNumerically(">=", 1))
    69  			})
    70  		}
    71  
    72  		// Loop on node provisionning
    73  		// NOTE: if numberOfVMs == vmIndex then only one node will be provisionned
    74  		bootstrappedNodes = 0
    75  		for index := vmIndex; index <= numberOfVMs; index++ {
    76  			// Set node hostname
    77  			hostName := elemental.SetHostname(vmNameRoot, index)
    78  			Expect(hostName).To(Not(BeEmpty()))
    79  
    80  			// Add node in network configuration
    81  			err := rancher.AddNode(netDefaultFileName, hostName, index)
    82  			Expect(err).To(Not(HaveOccurred()))
    83  
    84  			// Get generated MAC address
    85  			_, macAdrs := GetNodeInfo(hostName)
    86  			Expect(macAdrs).To(Not(BeEmpty()))
    87  
    88  			wg.Add(1)
    89  			go func(s, h, m string, i int) {
    90  				defer wg.Done()
    91  				defer GinkgoRecover()
    92  
    93  				By("Installing node "+h, func() {
    94  					// Wait a little bit to avoid starting all VMs at the same time
    95  					misc.RandomSleep(sequential, i)
    96  
    97  					// Execute node deployment in parallel
    98  					err := exec.Command(s, h, m).Run()
    99  					Expect(err).To(Not(HaveOccurred()))
   100  				})
   101  			}(installVMScript, hostName, macAdrs, index)
   102  
   103  			// Wait a bit before starting more nodes to reduce CPU and I/O load
   104  			bootstrappedNodes = misc.WaitNodesBoot(index, vmIndex, bootstrappedNodes, numberOfNodesMax)
   105  		}
   106  
   107  		// Wait for all parallel jobs
   108  		wg.Wait()
   109  
   110  		// Loop on nodes to check that SeedImage cloud-config is correctly applied
   111  		// Only for master pool
   112  		if poolType == "master" && isoBoot {
   113  			for index := vmIndex; index <= numberOfVMs; index++ {
   114  				hostName := elemental.SetHostname(vmNameRoot, index)
   115  				Expect(hostName).To(Not(BeEmpty()))
   116  
   117  				client, _ := GetNodeInfo(hostName)
   118  				Expect(client).To(Not(BeNil()))
   119  
   120  				wg.Add(1)
   121  				go func(h string, cl *tools.Client) {
   122  					defer wg.Done()
   123  					defer GinkgoRecover()
   124  
   125  					By("Checking SeedImage cloud-config on "+h, func() {
   126  						// Wait for SSH to be available
   127  						// NOTE: this also checks that the root password was correctly set by cloud-config
   128  						CheckSSH(cl)
   129  
   130  						// Check that the cloud-config is correctly applied by checking the presence of a file
   131  						_ = RunSSHWithRetry(cl, "ls /etc/elemental-test")
   132  
   133  						// Check that the installation is completed before halting the VM
   134  						Eventually(func() error {
   135  							// A little bit dirty but this is temporary to keep compatibility with older Stable versions
   136  							_, err := cl.RunSSH("(journalctl --no-pager -u elemental-register.service ; journalctl --no-pager -u elemental-register-install.service) | grep -Eiq 'elemental install.* completed'")
   137  							return err
   138  						}, tools.SetTimeout(8*time.Minute), 10*time.Second).Should(Not(HaveOccurred()))
   139  
   140  						// Halt the VM
   141  						_ = RunSSHWithRetry(cl, "setsid -f init 0")
   142  					})
   143  				}(hostName, client)
   144  			}
   145  			wg.Wait()
   146  		}
   147  	})
   148  
   149  	It("Add the nodes in Rancher Manager", func() {
   150  		// Report to Qase
   151  		testCaseID = 67
   152  
   153  		for index := vmIndex; index <= numberOfVMs; index++ {
   154  			// Set node hostname
   155  			hostName := elemental.SetHostname(vmNameRoot, index)
   156  			Expect(hostName).To(Not(BeEmpty()))
   157  
   158  			// Execute node deployment in parallel
   159  			wg.Add(1)
   160  			go func(c, h string, i int) {
   161  				defer wg.Done()
   162  				defer GinkgoRecover()
   163  
   164  				By("Checking that node "+h+" is available in Rancher", func() {
   165  					Eventually(func() string {
   166  						id, _ := elemental.GetServerID(c, i)
   167  						return id
   168  					}, tools.SetTimeout(1*time.Minute), 5*time.Second).Should(Not(BeEmpty()))
   169  				})
   170  			}(clusterNS, hostName, index)
   171  		}
   172  
   173  		// Wait for all parallel jobs
   174  		wg.Wait()
   175  
   176  		if vmIndex > 1 {
   177  			By("Checking cluster state", func() {
   178  				WaitCluster(clusterNS, clusterName)
   179  			})
   180  		}
   181  
   182  		By("Incrementing number of nodes in "+poolType+" pool", func() {
   183  			// Increase 'quantity' field
   184  			poolName := "pool-" + poolType + "-" + clusterName
   185  			value, err := rancher.SetNodeQuantity(clusterNS, clusterName, poolName, usedNodes)
   186  			Expect(err).To(Not(HaveOccurred()))
   187  			Expect(value).To(BeNumerically(">=", 1))
   188  
   189  			// Check that the selector has been correctly created
   190  			Eventually(func() string {
   191  				out, _ := kubectl.RunWithoutErr("get", "MachineInventorySelector",
   192  					"--namespace", clusterNS,
   193  					"-o", "jsonpath={.items[*].metadata.name}")
   194  				return out
   195  			}, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(ContainSubstring("selector-" + poolType + "-" + clusterName))
   196  		})
   197  
   198  		By("Waiting for known cluster state before adding the node(s)", func() {
   199  			msg := `(configuring .* node\(s\)|waiting for viable init node)`
   200  			Eventually(func() string {
   201  				clusterMsg, _ := elemental.GetClusterState(clusterNS, clusterName,
   202  					"{.status.conditions[?(@.type==\"Updated\")].message}")
   203  
   204  				// Sometimes we can have a different status/condition
   205  				if clusterMsg == "" {
   206  					out, _ := elemental.GetClusterState(clusterNS, clusterName,
   207  						"{.status.conditions[?(@.type==\"Provisioned\")].message}")
   208  
   209  					return out
   210  				}
   211  
   212  				return clusterMsg
   213  			}, tools.SetTimeout(5*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(MatchRegexp(msg))
   214  		})
   215  
   216  		bootstrappedNodes = 0
   217  		for index := vmIndex; index <= numberOfVMs; index++ {
   218  			// Set node hostname
   219  			hostName := elemental.SetHostname(vmNameRoot, index)
   220  			Expect(hostName).To(Not(BeEmpty()))
   221  
   222  			// Get node information
   223  			client, _ := GetNodeInfo(hostName)
   224  			Expect(client).To(Not(BeNil()))
   225  
   226  			// Execute in parallel
   227  			wg.Add(1)
   228  			go func(c, h string, i int, t bool, cl *tools.Client) {
   229  				defer wg.Done()
   230  				defer GinkgoRecover()
   231  
   232  				// Restart the node(s)
   233  				By("Restarting "+h+" to add it in the cluster", func() {
   234  					// Wait a little bit to avoid starting all VMs at the same time
   235  					misc.RandomSleep(sequential, i)
   236  
   237  					err := exec.Command("sudo", "virsh", "start", h).Run()
   238  					Expect(err).To(Not(HaveOccurred()))
   239  				})
   240  
   241  				By("Checking "+h+" SSH connection", func() {
   242  					CheckSSH(cl)
   243  				})
   244  
   245  				By("Checking that TPM is correctly configured on "+h, func() {
   246  					testValue := "-c"
   247  					if t == true {
   248  						testValue = "! -e"
   249  					}
   250  					_ = RunSSHWithRetry(cl, "[[ "+testValue+" /dev/tpm0 ]]")
   251  				})
   252  
   253  				By("Checking OS version on "+h, func() {
   254  					out := RunSSHWithRetry(cl, "cat /etc/os-release")
   255  					GinkgoWriter.Printf("OS Version on %s:\n%s\n", h, out)
   256  				})
   257  			}(clusterNS, hostName, index, emulateTPM, client)
   258  
   259  			// Wait a bit before starting more nodes to reduce CPU and I/O load
   260  			bootstrappedNodes = misc.WaitNodesBoot(index, vmIndex, bootstrappedNodes, numberOfNodesMax)
   261  		}
   262  
   263  		// Wait for all parallel jobs
   264  		wg.Wait()
   265  
   266  		if poolType != "worker" {
   267  			for index := vmIndex; index <= numberOfVMs; index++ {
   268  				// Set node hostname
   269  				hostName := elemental.SetHostname(vmNameRoot, index)
   270  				Expect(hostName).To(Not(BeEmpty()))
   271  
   272  				// Get node information
   273  				client, _ := GetNodeInfo(hostName)
   274  				Expect(client).To(Not(BeNil()))
   275  
   276  				// Execute in parallel
   277  				wg.Add(1)
   278  				go func(h string, cl *tools.Client) {
   279  					defer wg.Done()
   280  					defer GinkgoRecover()
   281  
   282  					if strings.Contains(k8sDownstreamVersion, "rke2") {
   283  						By("Configuring kubectl command on node "+h, func() {
   284  							dir := "/var/lib/rancher/rke2/bin"
   285  							kubeCfg := "export KUBECONFIG=/etc/rancher/rke2/rke2.yaml"
   286  
   287  							// Wait a little to be sure that RKE2 installation has started
   288  							// Otherwise the directory is not available!
   289  							_ = RunSSHWithRetry(cl, "[[ -d "+dir+" ]]")
   290  
   291  							// Configure kubectl
   292  							_ = RunSSHWithRetry(cl, "I="+dir+"/kubectl; if [[ -x ${I} ]]; then ln -s ${I} bin/; echo "+kubeCfg+" >> .bashrc; fi")
   293  						})
   294  					}
   295  
   296  					By("Checking kubectl command on "+h, func() {
   297  						// Check if kubectl works
   298  						Eventually(func() string {
   299  							out, _ := cl.RunSSH("kubectl version 2>/dev/null | grep 'Server Version:'")
   300  							return out
   301  						}, tools.SetTimeout(5*time.Minute), 5*time.Second).Should(ContainSubstring(k8sDownstreamVersion))
   302  					})
   303  
   304  					By("Checking cluster agent on "+h, func() {
   305  						checkClusterAgent(cl)
   306  					})
   307  				}(hostName, client)
   308  			}
   309  
   310  			// Wait for all parallel jobs
   311  			wg.Wait()
   312  		}
   313  
   314  		By("Checking cluster state", func() {
   315  			WaitCluster(clusterNS, clusterName)
   316  		})
   317  
   318  		if poolType != "worker" {
   319  			for index := vmIndex; index <= numberOfVMs; index++ {
   320  				// Set node hostname
   321  				hostName := elemental.SetHostname(vmNameRoot, index)
   322  				Expect(hostName).To(Not(BeEmpty()))
   323  
   324  				// Get node information
   325  				client, _ := GetNodeInfo(hostName)
   326  				Expect(client).To(Not(BeNil()))
   327  
   328  				// Execute in parallel
   329  				wg.Add(1)
   330  				go func(h string, cl *tools.Client) {
   331  					defer wg.Done()
   332  					defer GinkgoRecover()
   333  
   334  					By("Checking cluster version on "+h, func() {
   335  						Eventually(func() error {
   336  							k8sVer, err := cl.RunSSH("kubectl version 2>/dev/null")
   337  							if strings.Contains(k8sVer, "Server Version:") {
   338  								// Show cluster version, could be useful for debugging purposes
   339  								GinkgoWriter.Printf("K8s version on %s:\n%s\n", h, k8sVer)
   340  							}
   341  							return err
   342  						}, tools.SetTimeout(1*time.Minute), 5*time.Second).Should(Not(HaveOccurred()))
   343  					})
   344  				}(hostName, client)
   345  			}
   346  
   347  			// Wait for all parallel jobs
   348  			wg.Wait()
   349  		}
   350  
   351  		bootstrappedNodes = 0
   352  		for index := vmIndex; index <= numberOfVMs; index++ {
   353  			// Set node hostname
   354  			hostName := elemental.SetHostname(vmNameRoot, index)
   355  			Expect(hostName).To(Not(BeEmpty()))
   356  
   357  			// Get node information
   358  			client, _ := GetNodeInfo(hostName)
   359  			Expect(client).To(Not(BeNil()))
   360  
   361  			// Execute in parallel
   362  			wg.Add(1)
   363  			go func(h, p string, i int, cl *tools.Client) {
   364  				defer wg.Done()
   365  				defer GinkgoRecover()
   366  
   367  				By("Rebooting "+h, func() {
   368  					// Wait a little bit to avoid starting all VMs at the same time
   369  					misc.RandomSleep(sequential, i)
   370  
   371  					// Execute 'reboot' in background, to avoid SSH locking
   372  					Eventually(func() error {
   373  						_, err := cl.RunSSH("setsid -f reboot")
   374  						return err
   375  					}, tools.SetTimeout(2*time.Minute), 10*time.Second).Should(Not(HaveOccurred()))
   376  				})
   377  
   378  				if p != "worker" {
   379  					By("Checking cluster agent on "+h, func() {
   380  						checkClusterAgent(cl)
   381  					})
   382  				}
   383  			}(hostName, poolType, index, client)
   384  
   385  			// Wait a bit before starting more nodes to reduce CPU and I/O load
   386  			bootstrappedNodes = misc.WaitNodesBoot(index, vmIndex, bootstrappedNodes, numberOfNodesMax)
   387  		}
   388  
   389  		// Wait for all parallel jobs
   390  		wg.Wait()
   391  
   392  		By("Checking cluster state after reboot", func() {
   393  			WaitCluster(clusterNS, clusterName)
   394  		})
   395  	})
   396  })