github.com/rancher/elemental/tests@v0.0.0-20240517125144-ae048c615b3f/e2e/suite_test.go (about) 1 /* 2 Copyright © 2022 - 2024 SUSE LLC 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 http://www.apache.org/licenses/LICENSE-2.0 8 Unless required by applicable law or agreed to in writing, software 9 distributed under the License is distributed on an "AS IS" BASIS, 10 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 See the License for the specific language governing permissions and 12 limitations under the License. 13 */ 14 15 package e2e_test 16 17 import ( 18 "os" 19 "strconv" 20 "strings" 21 "testing" 22 "time" 23 24 . "github.com/onsi/ginkgo/v2" 25 . "github.com/onsi/gomega" 26 "github.com/rancher-sandbox/ele-testhelpers/kubectl" 27 "github.com/rancher-sandbox/ele-testhelpers/rancher" 28 "github.com/rancher-sandbox/ele-testhelpers/tools" 29 . "github.com/rancher-sandbox/qase-ginkgo" 30 "github.com/rancher/elemental/tests/e2e/helpers/elemental" 31 ) 32 33 const ( 34 airgapBuildScript = "../scripts/build-airgap" 35 appYaml = "../assets/hello-world_app.yaml" 36 backupYaml = "../assets/backup.yaml" 37 ciTokenYaml = "../assets/local-kubeconfig-token-skel.yaml" 38 configPrivateCAScript = "../scripts/config-private-ca" 39 configRKE2Yaml = "../assets/config_rke2.yaml" 40 dumbRegistrationYaml = "../assets/dumb_machineRegistration.yaml" 41 emulateTPMYaml = "../assets/emulateTPM.yaml" 42 getOSScript = "../scripts/get-name-from-managedosversion" 43 httpSrv = "http://192.168.122.1:8000" 44 installConfigYaml = "../../install-config.yaml" 45 installHardenedScript = "../scripts/config-hardened" 46 installVMScript = "../scripts/install-vm" 47 localKubeconfigYaml = "../assets/local-kubeconfig-skel.yaml" 48 numberOfNodesMax = 30 49 resetMachineInv = "../assets/reset_machine_inventory.yaml" 50 restoreYaml = "../assets/restore.yaml" 51 upgradeSkelYaml = "../assets/upgrade_skel.yaml" 52 userName = "root" 53 userPassword = "r0s@pwd1" 54 vmNameRoot = "node" 55 ) 56 57 var ( 58 backupRestoreVersion string 59 caType string 60 certManagerVersion string 61 clusterName string 62 clusterNS string 63 clusterType string 64 clusterYaml string 65 elementalSupport string 66 emulateTPM bool 67 forceDowngrade bool 68 isoBoot bool 69 k8sUpstreamVersion string 70 k8sDownstreamVersion string 71 netDefaultFileName string 72 numberOfClusters int 73 numberOfVMs int 74 operatorUpgrade string 75 operatorRepo string 76 os2Test string 77 poolType string 78 proxy string 79 rancherChannel string 80 rancherHeadVersion string 81 rancherHostname string 82 rancherLogCollector string 83 rancherVersion string 84 rancherUpgrade string 85 rancherUpgradeChannel string 86 rancherUpgradeHeadVersion string 87 rancherUpgradeVersion string 88 rawBoot bool 89 registrationYaml string 90 seedImageYaml string 91 selectorYaml string 92 sequential bool 93 snapType string 94 testCaseID int64 95 testType string 96 upgradeImage string 97 upgradeOSChannel string 98 upgradeType string 99 usedNodes int 100 vmIndex int 101 vmName string 102 ) 103 104 /* 105 Wait for cluster to be in a stable state 106 - @param ns Namespace where the cluster is deployed 107 - @param cn Cluster resource name 108 - @returns Nothing, the function will fail through Ginkgo in case of issue 109 */ 110 func WaitCluster(ns, cn string) { 111 type state struct { 112 conditionStatus string 113 conditionType string 114 } 115 116 // List of conditions to check 117 states := []state{ 118 { 119 conditionStatus: "True", 120 conditionType: "AgentDeployed", 121 }, 122 { 123 conditionStatus: "True", 124 conditionType: "NoDiskPressure", 125 }, 126 { 127 conditionStatus: "True", 128 conditionType: "NoMemoryPressure", 129 }, 130 { 131 conditionStatus: "True", 132 conditionType: "Provisioned", 133 }, 134 { 135 conditionStatus: "True", 136 conditionType: "Ready", 137 }, 138 { 139 conditionStatus: "False", 140 conditionType: "Reconciling", 141 }, 142 { 143 conditionStatus: "False", 144 conditionType: "Stalled", 145 }, 146 { 147 conditionStatus: "True", 148 conditionType: "Updated", 149 }, 150 { 151 conditionStatus: "True", 152 conditionType: "Waiting", 153 }, 154 } 155 156 // Check that the cluster is in Ready state (this means that it has been created) 157 Eventually(func() string { 158 status, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io", 159 "--namespace", ns, cn, 160 "-o", "jsonpath={.status.ready}") 161 return status 162 }, tools.SetTimeout(2*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(Equal("true")) 163 164 // Check that all needed conditions are in the good state 165 for _, s := range states { 166 counter := 0 167 168 Eventually(func() string { 169 status, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io", 170 "--namespace", ns, cn, 171 "-o", "jsonpath={.status.conditions[?(@.type==\""+s.conditionType+"\")].status}") 172 173 if status != s.conditionStatus { 174 // Show the status in case of issue, easier to debug (but log after 10 different issues) 175 // NOTE: it's not perfect but it's mainly a way to inform that the cluster took time to came up 176 counter++ 177 if counter > 10 { 178 GinkgoWriter.Printf("!! Cluster status issue !! %s is %s instead of %s\n", 179 s.conditionType, status, s.conditionStatus) 180 181 // Reset counter 182 counter = 0 183 } 184 185 // Check if rancher-system-agent.service has some issue 186 if s.conditionType == "Provisioned" || s.conditionType == "Ready" || s.conditionType == "Updated" { 187 msg := "error applying plan -- check rancher-system-agent.service logs on node for more information" 188 189 // Extract the list of failed nodes 190 listIP, _ := kubectl.RunWithoutErr("get", "machine", 191 "--namespace", ns, 192 "-o", "jsonpath={.items[?(@.status.conditions[*].message==\""+msg+"\")].status.addresses[?(@.type==\"InternalIP\")].address}") 193 194 // We can try to restart the rancher-system-agent service on the failing node 195 // because sometimes it can fail just because of a sporadic/timeout issue and a restart can fix it! 196 for _, ip := range strings.Fields(listIP) { 197 if tools.IsIPv4(ip) { 198 // Set 'client' to be able to access the node through SSH 199 cl := &tools.Client{ 200 Host: ip + ":22", 201 Username: userName, 202 Password: userPassword, 203 } 204 205 // Log the workaround, could be useful 206 GinkgoWriter.Printf("!! rancher-system-agent issue !! Service has been restarted on node with IP %s\n", ip) 207 208 // Restart rancher-system-agent service on the node 209 // NOTE: wait a little to be sure that all is restarted before continuing 210 RunSSHWithRetry(cl, "systemctl restart rancher-system-agent.service") 211 time.Sleep(tools.SetTimeout(15 * time.Second)) 212 } 213 } 214 } 215 } 216 217 return status 218 }, tools.SetTimeout(2*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(Equal(s.conditionStatus)) 219 } 220 } 221 222 /* 223 Check that Cluster resource has been correctly created 224 - @param ns Namespace where the cluster is deployed 225 - @param cn Cluster resource name 226 - @returns Nothing, the function will fail through Ginkgo in case of issue 227 */ 228 func CheckCreatedCluster(ns, cn string) { 229 // Check that the cluster is correctly created 230 Eventually(func() string { 231 out, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io", 232 "--namespace", ns, 233 cn, "-o", "jsonpath={.metadata.name}") 234 return out 235 }, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(Equal(cn)) 236 } 237 238 /* 239 Check that Cluster resource has been correctly created 240 - @param ns Namespace where the cluster is deployed 241 - @param rn MachineRegistration resource name 242 - @returns Nothing, the function will fail through Ginkgo in case of issue 243 */ 244 func CheckCreatedRegistration(ns, rn string) { 245 Eventually(func() string { 246 out, _ := kubectl.RunWithoutErr("get", "MachineRegistration", 247 "--namespace", clusterNS, 248 "-o", "jsonpath={.items[*].metadata.name}") 249 return out 250 }, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(ContainSubstring(rn)) 251 } 252 253 /* 254 Check that a SelectorTemplate resource has been correctly created 255 - @param ns Namespace where the cluster is deployed 256 - @param sn Selector name 257 - @returns Nothing, the function will fail through Ginkgo in case of issue 258 */ 259 func CheckCreatedSelectorTemplate(ns, sn string) { 260 Eventually(func() string { 261 out, _ := kubectl.RunWithoutErr("get", "MachineInventorySelectorTemplate", 262 "--namespace", ns, 263 "-o", "jsonpath={.items[*].metadata.name}") 264 return out 265 }, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(ContainSubstring(sn)) 266 } 267 268 /* 269 Wait for OSVersion to be populated 270 - @param ns Namespace where the cluster is deployed 271 - @returns Nothing, the function will fail through Ginkgo in case of issue 272 */ 273 func WaitForOSVersion(ns string) { 274 Eventually(func() string { 275 out, _ := kubectl.RunWithoutErr("get", "ManagedOSVersion", 276 "--namespace", ns, 277 "-o", "jsonpath={.items[*].metadata.name}") 278 return out 279 }, tools.SetTimeout(2*time.Minute), 5*time.Second).Should(Not(BeEmpty())) 280 } 281 282 /* 283 Check SSH connection 284 - @param cl Client (node) informations 285 - @returns Nothing, the function will fail through Ginkgo in case of issue 286 */ 287 func CheckSSH(cl *tools.Client) { 288 Eventually(func() string { 289 out, _ := cl.RunSSH("echo SSH_OK") 290 return strings.Trim(out, "\n") 291 }, tools.SetTimeout(10*time.Minute), 5*time.Second).Should(Equal("SSH_OK")) 292 } 293 294 /* 295 Download ISO built with SeedImage 296 - @param ns Namespace where the cluster is deployed 297 - @param seedName Name of the used SeedImage resource 298 - @param filename Path and name of the file where to store the ISO 299 - @returns Nothing, the function will fail through Ginkgo in case of issue 300 */ 301 func DownloadBuiltISO(ns, seedName, filename string) { 302 // Set minimal ISO file to 500MB 303 const minimalISOSize = 500 * 1024 * 1024 304 305 // Check that the seed image is correctly created 306 Eventually(func() string { 307 out, _ := kubectl.RunWithoutErr("get", "SeedImage", 308 "--namespace", ns, 309 seedName, 310 "-o", "jsonpath={.status}") 311 return out 312 }, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(ContainSubstring("downloadURL")) 313 314 // Get URL 315 seedImageURL, err := kubectl.RunWithoutErr("get", "SeedImage", 316 "--namespace", ns, 317 seedName, 318 "-o", "jsonpath={.status.downloadURL}") 319 Expect(err).To(Not(HaveOccurred())) 320 321 // ISO file size should be greater than 500MB 322 Eventually(func() int64 { 323 // No need to check download status, file size at the end is enough 324 _ = tools.GetFileFromURL(seedImageURL, filename, false) 325 file, _ := os.Stat(filename) 326 return file.Size() 327 }, tools.SetTimeout(2*time.Minute), 10*time.Second).Should(BeNumerically(">", minimalISOSize)) 328 } 329 330 /* 331 Get Elemental node information 332 - @param hn Node hostname 333 - @returns Client structure and MAC address 334 */ 335 func GetNodeInfo(hn string) (*tools.Client, string) { 336 // Get network data 337 data, err := rancher.GetHostNetConfig(".*name=\""+hn+"\".*", netDefaultFileName) 338 Expect(err).To(Not(HaveOccurred())) 339 340 // Set 'client' to be able to access the node through SSH 341 c := &tools.Client{ 342 Host: string(data.IP) + ":22", 343 Username: userName, 344 Password: userPassword, 345 } 346 347 return c, data.Mac 348 } 349 350 /* 351 Get Elemental node IP address 352 - @param hn Node hostname 353 - @returns IP address 354 */ 355 func GetNodeIP(hn string) string { 356 // Get network data 357 data, err := rancher.GetHostNetConfig(".*name=\""+hn+"\".*", netDefaultFileName) 358 Expect(err).To(Not(HaveOccurred())) 359 360 return data.IP 361 } 362 363 /* 364 Execute RunHelmBinaryWithCustomErr within a loop with timeout 365 - @param s options to pass to RunHelmBinaryWithCustomErr command 366 - @returns Nothing, the function will fail through Ginkgo in case of issue 367 */ 368 func RunHelmCmdWithRetry(s ...string) { 369 Eventually(func() error { 370 return kubectl.RunHelmBinaryWithCustomErr(s...) 371 }, tools.SetTimeout(2*time.Minute), 20*time.Second).Should(Not(HaveOccurred())) 372 } 373 374 /* 375 Execute SSH command with retry 376 - @param cl Client (node) informations 377 - @param cmd Command to execute 378 - @returns result of the executed command 379 */ 380 func RunSSHWithRetry(cl *tools.Client, cmd string) string { 381 var err error 382 var out string 383 384 Eventually(func() error { 385 out, err = cl.RunSSH(cmd) 386 return err 387 }, tools.SetTimeout(2*time.Minute), 20*time.Second).Should(Not(HaveOccurred())) 388 389 return out 390 } 391 392 func FailWithReport(message string, callerSkip ...int) { 393 // Ensures the correct line numbers are reported 394 Fail(message, callerSkip[0]+1) 395 } 396 397 func TestE2E(t *testing.T) { 398 RegisterFailHandler(FailWithReport) 399 RunSpecs(t, "Elemental End-To-End Test Suite") 400 } 401 402 // Use to modify yaml templates 403 type YamlPattern struct { 404 key string 405 value string 406 } 407 408 var _ = BeforeSuite(func() { 409 backupRestoreVersion = os.Getenv("BACKUP_RESTORE_VERSION") 410 bootTypeString := os.Getenv("BOOT_TYPE") 411 caType = os.Getenv("CA_TYPE") 412 certManagerVersion = os.Getenv("CERT_MANAGER_VERSION") 413 clusterName = os.Getenv("CLUSTER_NAME") 414 clusterNS = os.Getenv("CLUSTER_NS") 415 clusterType = os.Getenv("CLUSTER_TYPE") 416 elementalSupport = os.Getenv("ELEMENTAL_SUPPORT") 417 eTPM := os.Getenv("EMULATE_TPM") 418 forceDowngradeString := os.Getenv("FORCE_DOWNGRADE") 419 index := os.Getenv("VM_INDEX") 420 k8sDownstreamVersion = os.Getenv("K8S_DOWNSTREAM_VERSION") 421 k8sUpstreamVersion = os.Getenv("K8S_UPSTREAM_VERSION") 422 number := os.Getenv("VM_NUMBERS") 423 clusterNumber := os.Getenv("CLUSTER_NUMBER") 424 operatorUpgrade = os.Getenv("OPERATOR_UPGRADE") 425 operatorRepo = os.Getenv("OPERATOR_REPO") 426 os2Test = os.Getenv("OS_TO_TEST") 427 poolType = os.Getenv("POOL") 428 proxy = os.Getenv("PROXY") 429 rancherHostname = os.Getenv("PUBLIC_FQDN") 430 rancherLogCollector = os.Getenv("RANCHER_LOG_COLLECTOR") 431 rancherVersion = os.Getenv("RANCHER_VERSION") 432 rancherUpgrade = os.Getenv("RANCHER_UPGRADE") 433 seqString := os.Getenv("SEQUENTIAL") 434 snapType = os.Getenv("SNAP_TYPE") 435 testType = os.Getenv("TEST_TYPE") 436 upgradeImage = os.Getenv("UPGRADE_IMAGE") 437 upgradeOSChannel = os.Getenv("UPGRADE_OS_CHANNEL") 438 upgradeType = os.Getenv("UPGRADE_TYPE") 439 440 // Only if VM_INDEX is set 441 if index != "" { 442 var err error 443 vmIndex, err = strconv.Atoi(index) 444 Expect(err).To(Not(HaveOccurred())) 445 446 // Set default hostname 447 vmName = elemental.SetHostname(vmNameRoot, vmIndex) 448 } else { 449 // Default value for vmIndex 450 vmIndex = 0 451 } 452 453 // Only if VM_NUMBER is set 454 if number != "" { 455 var err error 456 numberOfVMs, err = strconv.Atoi(number) 457 Expect(err).To(Not(HaveOccurred())) 458 } else { 459 // By default set to vmIndex 460 numberOfVMs = vmIndex 461 } 462 463 // Set number of "used" nodes 464 // NOTE: could be the number added nodes or the number of nodes to use/upgrade 465 usedNodes = (numberOfVMs - vmIndex) + 1 466 467 // Force correct value for emulateTPM 468 switch eTPM { 469 case "true": 470 emulateTPM = true 471 default: 472 emulateTPM = false 473 } 474 475 // Force correct value for sequential 476 switch seqString { 477 case "true": 478 sequential = true 479 default: 480 sequential = false 481 } 482 483 // Define boot type 484 switch bootTypeString { 485 case "iso": 486 isoBoot = true 487 case "raw": 488 rawBoot = true 489 } 490 491 // Force correct value for forceDowngrade 492 switch forceDowngradeString { 493 case "true": 494 forceDowngrade = true 495 default: 496 forceDowngrade = false 497 } 498 499 // Extract Rancher Manager channel/version to install 500 if rancherVersion != "" { 501 // Split rancherVersion and reset it 502 s := strings.Split(rancherVersion, "/") 503 rancherVersion = "" 504 505 // Get needed informations 506 rancherChannel = s[0] 507 if len(s) > 1 { 508 rancherVersion = s[1] 509 } 510 if len(s) > 2 { 511 rancherHeadVersion = s[2] 512 } 513 } 514 515 // Extract Rancher Manager channel/version to upgrade 516 if rancherUpgrade != "" { 517 // Split rancherUpgrade and reset it 518 s := strings.Split(rancherUpgrade, "/") 519 520 // Get needed informations 521 rancherUpgradeChannel = s[0] 522 if len(s) > 1 { 523 rancherUpgradeVersion = s[1] 524 } 525 if len(s) > 2 { 526 rancherUpgradeHeadVersion = s[2] 527 } 528 } 529 530 switch testType { 531 case "airgap": 532 // Enable airgap support 533 clusterYaml = "../assets/cluster-airgap.yaml" 534 netDefaultFileName = "../assets/net-default-airgap.xml" 535 registrationYaml = "../assets/machineRegistration.yaml" 536 seedImageYaml = "../assets/seedImage.yaml" 537 selectorYaml = "../assets/selector.yaml" 538 case "multi": 539 // Enable multi-cluster support 540 if clusterNumber != "" { 541 var err error 542 numberOfClusters, err = strconv.Atoi(clusterNumber) 543 Expect(err).To(Not(HaveOccurred())) 544 } 545 546 clusterYaml = "../assets/cluster-multi.yaml" 547 netDefaultFileName = "../assets/net-default.xml" 548 registrationYaml = "../assets/machineRegistration-multi.yaml" 549 seedImageYaml = "../assets/seedImage-multi.yaml" 550 selectorYaml = "../assets/selector-multi.yaml" 551 default: 552 // Default cluster support 553 clusterYaml = "../assets/cluster.yaml" 554 netDefaultFileName = "../assets/net-default.xml" 555 registrationYaml = "../assets/machineRegistration.yaml" 556 seedImageYaml = "../assets/seedImage.yaml" 557 selectorYaml = "../assets/selector.yaml" 558 } 559 560 // Start HTTP server 561 tools.HTTPShare("../..", ":8000") 562 }) 563 564 var _ = ReportBeforeEach(func(report SpecReport) { 565 // Reset case ID 566 testCaseID = -1 567 }) 568 569 var _ = ReportAfterEach(func(report SpecReport) { 570 // Add result in Qase if asked 571 Qase(testCaseID, report) 572 })