github.com/verrazzano/verrazzano@v1.7.1/tests/e2e/backup/opensearch/opensearch_backup_test.go (about) 1 // Copyright (c) 2022, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package opensearch 5 6 import ( 7 "bytes" 8 "context" 9 "fmt" 10 "github.com/verrazzano/verrazzano/platform-operator/constants" 11 common "github.com/verrazzano/verrazzano/tests/e2e/backup/helpers" 12 "github.com/verrazzano/verrazzano/tests/e2e/pkg/test/framework/metrics" 13 "go.uber.org/zap" 14 k8serror "k8s.io/apimachinery/pkg/api/errors" 15 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 "net/http" 17 "text/template" 18 "time" 19 20 . "github.com/onsi/ginkgo/v2" 21 . "github.com/onsi/gomega" 22 "github.com/verrazzano/verrazzano/pkg/k8sutil" 23 "github.com/verrazzano/verrazzano/tests/e2e/pkg" 24 "github.com/verrazzano/verrazzano/tests/e2e/pkg/test/framework" 25 ) 26 27 const ( 28 shortWaitTimeout = 10 * time.Minute 29 shortPollingInterval = 10 * time.Second 30 waitTimeout = 20 * time.Minute 31 pollingInterval = 30 * time.Second 32 opsterDeploymentName = "opensearch-operator-controller-manager" 33 osMasterStsName = "opensearch-es-master" 34 osDataStsName = "opensearch-es-data" 35 osIngestStsName = "opensearch-es-ingest" 36 idSearchExactURL = "verrazzano-system/_search?from=0&size=1" 37 idSearchAllURL = "verrazzano-system/_search?" 38 ) 39 40 var esPods = []string{"opensearch-es-master", "opensearch-es-ingest", "opensearch-es-data"} 41 var esPodsUp = []string{"opensearch-es-master", "opensearch-es-ingest", "opensearch-es-data", opsterDeploymentName, "opensearch-dashboards"} 42 43 var beforeSuite = t.BeforeSuiteFunc(func() { 44 start := time.Now() 45 common.GatherInfo() 46 backupPrerequisites() 47 metrics.Emit(t.Metrics.With("deployment_elapsed_time", time.Since(start).Milliseconds())) 48 }) 49 50 var _ = BeforeSuite(beforeSuite) 51 52 var afterSuite = t.AfterSuiteFunc(func() { 53 start := time.Now() 54 cleanUpVelero() 55 metrics.Emit(t.Metrics.With("undeployment_elapsed_time", time.Since(start).Milliseconds())) 56 }) 57 58 var _ = AfterSuite(afterSuite) 59 60 var t = framework.NewTestFramework("opensearch-backup") 61 62 // CreateVeleroBackupObject creates velero backup object that starts the backup 63 func CreateVeleroBackupObject() error { 64 var b bytes.Buffer 65 template, _ := template.New("velero-backup").Parse(common.VeleroBackup) 66 data := common.VeleroBackupObject{ 67 VeleroNamespaceName: common.VeleroNameSpace, 68 VeleroBackupName: common.BackupOpensearchName, 69 VeleroBackupStorageName: common.BackupOpensearchStorageName, 70 VeleroOpensearchHookResourceName: common.BackupResourceName, 71 } 72 template.Execute(&b, data) 73 err := common.DynamicSSA(context.TODO(), b.String(), t.Logs) 74 if err != nil { 75 t.Logs.Errorf("Error creating velero backup object", zap.Error(err)) 76 return err 77 } 78 return nil 79 } 80 81 // CreateVeleroRestoreObject creates velero restore object that starts restore 82 func CreateVeleroRestoreObject() error { 83 var b bytes.Buffer 84 template, _ := template.New("velero-restore").Parse(common.VeleroRestore) 85 data := common.VeleroRestoreObject{ 86 VeleroRestore: common.RestoreOpensearchName, 87 VeleroNamespaceName: common.VeleroNameSpace, 88 VeleroBackupName: common.BackupOpensearchName, 89 VeleroOpensearchHookResourceName: common.BackupResourceName, 90 } 91 92 template.Execute(&b, data) 93 err := common.DynamicSSA(context.TODO(), b.String(), t.Logs) 94 if err != nil { 95 t.Logs.Errorf("Error creating velero restore object ", zap.Error(err)) 96 return err 97 } 98 return nil 99 } 100 101 // GetBackupID fetches an opensearch id before starting the backup 102 // This will be used to compare the restore process 103 func GetBackupID() error { 104 esURL, err := common.GetEsURL(t.Logs) 105 if err != nil { 106 t.Logs.Infof("Error getting es url ", zap.Error(err)) 107 return err 108 } 109 vzPasswd, err := common.GetVZPasswd(t.Logs) 110 if err != nil { 111 t.Logs.Errorf("Error getting vz passwd ", zap.Error(err)) 112 return err 113 } 114 115 httpClient := pkg.EventuallyVerrazzanoRetryableHTTPClient() 116 searchURL := fmt.Sprintf("%s/%s", esURL, idSearchExactURL) 117 creds := fmt.Sprintf("verrazzano:%s", vzPasswd) 118 parsedJSON, err := common.HTTPHelper(httpClient, "GET", searchURL, creds, "Basic", http.StatusOK, nil, t.Logs) 119 if err != nil { 120 t.Logs.Errorf("Error while retrieving http data %v", zap.Error(err)) 121 return err 122 } 123 common.BackupID = fmt.Sprintf("%s", parsedJSON.Path("hits.hits.0._id").Data()) 124 125 t.Logs.Infof("BackupId ===> = '%s'", common.BackupID) 126 return nil 127 } 128 129 // IsRestoreSuccessful fetches the same backup id and returns the result 130 func IsRestoreSuccessful() string { 131 esURL, err := common.GetEsURL(t.Logs) 132 if err != nil { 133 t.Logs.Infof("Error getting es url ", zap.Error(err)) 134 return "" 135 } 136 137 vzPasswd, err := common.GetVZPasswd(t.Logs) 138 if err != nil { 139 t.Logs.Infof("Error getting vz passwd ", zap.Error(err)) 140 return "" 141 } 142 143 var b bytes.Buffer 144 template, _ := template.New("id-query").Parse(common.EsQueryBody) 145 data := common.EsQueryObject{ 146 BackupIDBeforeBackup: common.BackupID, 147 } 148 template.Execute(&b, data) 149 150 httpClient := pkg.EventuallyVerrazzanoRetryableHTTPClient() 151 fetchURL := fmt.Sprintf("%s/%s", esURL, idSearchAllURL) 152 creds := fmt.Sprintf("verrazzano:%s", vzPasswd) 153 parsedJSON, err := common.HTTPHelper(httpClient, "GET", fetchURL, creds, "Basic", http.StatusOK, b.Bytes(), t.Logs) 154 if err != nil { 155 t.Logs.Errorf("Error while retrieving http data %v", zap.Error(err)) 156 return "" 157 } 158 159 backupID := fmt.Sprintf("%s", parsedJSON.Search("hits", "hits", "0", "_id").Data()) 160 t.Logs.Infof("Opensearch id before backup = '%v'", common.BackupID) 161 t.Logs.Infof("Opensearch id fetched after restore = '%v'", backupID) 162 return backupID 163 } 164 165 // NukeOpensearch is used to destroy the opensearch cluster including data 166 // This is only done after a successful backup was taken 167 func NukeOpensearch() error { 168 clientset, err := k8sutil.GetKubernetesClientset() 169 if err != nil { 170 t.Logs.Errorf("Failed to get clientset with error: %v", err) 171 return err 172 } 173 174 t.Logs.Infof("Scaling down Opster Operator") 175 getScale, err := clientset.AppsV1().Deployments(constants.VerrazzanoLoggingNamespace).GetScale(context.TODO(), opsterDeploymentName, metav1.GetOptions{}) 176 if err != nil { 177 return err 178 } 179 newScale := *getScale 180 newScale.Spec.Replicas = 0 181 182 _, err = clientset.AppsV1().Deployments(constants.VerrazzanoLoggingNamespace).UpdateScale(context.TODO(), opsterDeploymentName, &newScale, metav1.UpdateOptions{}) 183 if err != nil { 184 t.Logs.Infof("Error = %v", zap.Error(err)) 185 return err 186 } 187 188 t.Logs.Infof("Deleting opensearch master sts") 189 err = clientset.AppsV1().StatefulSets(constants.VerrazzanoLoggingNamespace).Delete(context.TODO(), osMasterStsName, metav1.DeleteOptions{}) 190 if err != nil { 191 if !k8serror.IsNotFound(err) { 192 t.Logs.Errorf("Unable to delete opensearch master sts due to '%v'", zap.Error(err)) 193 return err 194 } 195 } 196 197 t.Logs.Infof("Deleting opensearch data sts") 198 err = clientset.AppsV1().StatefulSets(constants.VerrazzanoLoggingNamespace).Delete(context.TODO(), osDataStsName, metav1.DeleteOptions{}) 199 if err != nil { 200 if !k8serror.IsNotFound(err) { 201 t.Logs.Errorf("Unable to delete opensearch data sts due to '%v'", zap.Error(err)) 202 return err 203 } 204 } 205 206 t.Logs.Infof("Deleting opensearch ingest sts") 207 err = clientset.AppsV1().StatefulSets(constants.VerrazzanoLoggingNamespace).Delete(context.TODO(), osIngestStsName, metav1.DeleteOptions{}) 208 if err != nil { 209 if !k8serror.IsNotFound(err) { 210 t.Logs.Errorf("Unable to delete opensearch ingest sts due to '%v'", zap.Error(err)) 211 return err 212 } 213 } 214 215 t.Logs.Infof("Deleting opensearch master pvc if still present") 216 for i := 0; i < 3; i++ { 217 err = clientset.CoreV1().PersistentVolumeClaims(constants.VerrazzanoLoggingNamespace).Delete(context.TODO(), fmt.Sprintf("data-%s-%v", osMasterStsName, i), metav1.DeleteOptions{}) 218 if err != nil { 219 if !k8serror.IsNotFound(err) { 220 t.Logs.Errorf("Unable to delete opensearch master pvc due to '%v'", zap.Error(err)) 221 return err 222 } 223 } 224 } 225 226 t.Logs.Infof("Deleting opensearch data pvc") 227 for i := 0; i < 3; i++ { 228 err = clientset.CoreV1().PersistentVolumeClaims(constants.VerrazzanoLoggingNamespace).Delete(context.TODO(), fmt.Sprintf("data-%s-%v", osDataStsName, i), metav1.DeleteOptions{}) 229 if err != nil { 230 if !k8serror.IsNotFound(err) { 231 t.Logs.Errorf("Unable to delete opensearch data pvc due to '%v'", zap.Error(err)) 232 return err 233 } 234 } 235 } 236 237 return nil 238 } 239 240 // 'It' Wrapper to only run spec if the Velero is supported on the current Verrazzano version 241 func WhenVeleroInstalledIt(description string, f func()) { 242 kubeconfigPath, err := k8sutil.GetKubeConfigLocation() 243 if err != nil { 244 t.It(description, func() { 245 Fail(fmt.Sprintf("Failed to get default kubeconfig path: %s", err.Error())) 246 }) 247 } 248 supported, err := pkg.IsVerrazzanoMinVersion("1.4.0", kubeconfigPath) 249 if err != nil { 250 t.It(description, func() { 251 Fail(fmt.Sprintf("Failed to check Verrazzano version 1.4.0: %s", err.Error())) 252 }) 253 } 254 if supported { 255 t.It(description, f) 256 } else { 257 t.Logs.Infof("Skipping check '%v', the Velero is not supported", description) 258 } 259 } 260 261 // checkPodsRunning checks whether the pods are ready in a given namespace 262 func checkPodsRunning(namespace string, expectedPods []string) bool { 263 result, err := pkg.PodsRunning(namespace, expectedPods) 264 if err != nil { 265 AbortSuite(fmt.Sprintf("One or more pods are not running in the namespace: %v, error: %v", namespace, err)) 266 } 267 return result 268 } 269 270 // checkPodsNotRunning checks whether the pods are not ready in a given namespace 271 func checkPodsNotRunning(namespace string, expectedPods []string) bool { 272 result, err := pkg.PodsNotRunning(namespace, expectedPods) 273 if err != nil { 274 AbortSuite(fmt.Sprintf("One or more pods are running in the namespace: %v, error: %v", namespace, err)) 275 } 276 return result 277 } 278 279 // Run as part of BeforeSuite 280 func backupPrerequisites() { 281 t.Logs.Info("Setup backup pre-requisites") 282 t.Logs.Info("Create backup secret for velero backup objects") 283 Eventually(func() error { 284 return common.CreateCredentialsSecretFromFile(common.VeleroNameSpace, common.VeleroOpenSearchSecretName, t.Logs) 285 }, shortWaitTimeout, shortPollingInterval).Should(BeNil()) 286 287 t.Logs.Info("Create backup storage location for velero backup objects") 288 Eventually(func() error { 289 return common.CreateVeleroBackupLocationObject(common.BackupOpensearchStorageName, common.VeleroOpenSearchSecretName, t.Logs) 290 }, shortWaitTimeout, shortPollingInterval).Should(BeNil()) 291 292 t.Logs.Info("Get backup id before starting the backup process") 293 Eventually(func() error { 294 return GetBackupID() 295 }, shortWaitTimeout, shortPollingInterval).Should(BeNil()) 296 297 } 298 299 // Run as part of AfterSuite 300 func cleanUpVelero() { 301 t.Logs.Info("Cleanup backup and restore objects") 302 303 t.Logs.Info("Cleanup restore object") 304 Eventually(func() error { 305 return common.CrdPruner("velero.io", "v1", "restores", common.RestoreOpensearchName, common.VeleroNameSpace, t.Logs) 306 }, shortWaitTimeout, shortPollingInterval).Should(BeNil()) 307 308 t.Logs.Info("Cleanup backup object") 309 Eventually(func() error { 310 return common.CrdPruner("velero.io", "v1", "backups", common.BackupOpensearchName, common.VeleroNameSpace, t.Logs) 311 }, shortWaitTimeout, shortPollingInterval).Should(BeNil()) 312 313 t.Logs.Info("Cleanup backup storage object") 314 Eventually(func() error { 315 return common.CrdPruner("velero.io", "v1", "backupstoragelocations", common.BackupOpensearchStorageName, common.VeleroNameSpace, t.Logs) 316 }, shortWaitTimeout, shortPollingInterval).Should(BeNil()) 317 318 t.Logs.Info("Cleanup velero secrets") 319 Eventually(func() error { 320 return common.DeleteSecret(common.VeleroNameSpace, common.VeleroOpenSearchSecretName, t.Logs) 321 }, shortWaitTimeout, shortPollingInterval).Should(BeNil()) 322 323 } 324 325 var _ = t.Describe("OpenSearch Backup and Restore,", Label("f:platform-verrazzano.opensearch-backup"), Serial, func() { 326 327 t.Context("OpenSearch backup", func() { 328 WhenVeleroInstalledIt("Start opensearch backup after velero backup storage location created", func() { 329 Eventually(func() error { 330 return CreateVeleroBackupObject() 331 }, waitTimeout, pollingInterval).Should(BeNil()) 332 }) 333 334 WhenVeleroInstalledIt("Check backup progress after velero backup object was created", func() { 335 Eventually(func() error { 336 return common.TrackOperationProgress("velero", common.BackupResource, common.BackupOpensearchName, common.VeleroNameSpace, t.Logs) 337 }, waitTimeout, pollingInterval).Should(BeNil()) 338 }) 339 340 WhenVeleroInstalledIt("Fetch logs after backup is complete", func() { 341 Eventually(func() error { 342 return common.DisplayHookLogs(t.Logs) 343 }, waitTimeout, pollingInterval).Should(BeNil()) 344 }) 345 346 }) 347 348 t.Context("Disaster simulation", func() { 349 WhenVeleroInstalledIt("Cleanup opensearch once backup is done", func() { 350 Eventually(func() error { 351 return NukeOpensearch() 352 }, waitTimeout, pollingInterval).Should(BeNil()) 353 }) 354 355 WhenVeleroInstalledIt("Ensure the pods are not running before starting a restore", func() { 356 Eventually(func() bool { 357 return checkPodsNotRunning(constants.VerrazzanoLoggingNamespace, esPods) 358 }, waitTimeout, pollingInterval).Should(BeTrue(), "Check if pods are down") 359 }) 360 361 WhenVeleroInstalledIt("After pods are down check if pvcs are deleted before starting a restore", func() { 362 Eventually(func() error { 363 return common.CheckPvcsTerminated("opster.io/opensearch-cluster=opensearch", constants.VerrazzanoLoggingNamespace, t.Logs) 364 }, waitTimeout, pollingInterval).Should(BeNil(), "Check if pvcs are removed") 365 }) 366 367 }) 368 369 t.Context("OpenSearch restore", func() { 370 WhenVeleroInstalledIt("Start restore after velero backup is completed", func() { 371 Eventually(func() error { 372 return CreateVeleroRestoreObject() 373 }, waitTimeout, pollingInterval).Should(BeNil()) 374 }) 375 WhenVeleroInstalledIt("Check velero restore progress", func() { 376 Eventually(func() error { 377 return common.TrackOperationProgress("velero", common.RestoreResource, common.RestoreOpensearchName, common.VeleroNameSpace, t.Logs) 378 }, waitTimeout, pollingInterval).Should(BeNil()) 379 }) 380 WhenVeleroInstalledIt("Fetch logs after restore is complete", func() { 381 Eventually(func() error { 382 return common.DisplayHookLogs(t.Logs) 383 }, waitTimeout, pollingInterval).Should(BeNil()) 384 }) 385 }) 386 387 t.Context("OpenSearch Data and Infra verification", func() { 388 WhenVeleroInstalledIt("Wait for all pods to come up in verrazzano-logging", func() { 389 Eventually(func() bool { 390 return checkPodsRunning(constants.VerrazzanoLoggingNamespace, esPodsUp) 391 }, waitTimeout, pollingInterval).Should(BeTrue(), "Check if pods are up") 392 }) 393 WhenVeleroInstalledIt("Is Restore good? Verify restore", func() { 394 Eventually(func() string { 395 return IsRestoreSuccessful() 396 }, waitTimeout, pollingInterval).Should(Equal(common.BackupID)) 397 }) 398 399 }) 400 401 })