github.com/verrazzano/verrazzano@v1.7.1/tests/e2e/metrics/syscomponents/metrics_test.go (about) 1 // Copyright (c) 2021, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package syscomponents 5 6 import ( 7 "fmt" 8 "os" 9 "strings" 10 "time" 11 12 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 13 "github.com/verrazzano/verrazzano/pkg/nginxutil" 14 "github.com/verrazzano/verrazzano/tools/vz/pkg/constants" 15 apierrors "k8s.io/apimachinery/pkg/api/errors" 16 17 . "github.com/onsi/ginkgo/v2" 18 . "github.com/onsi/gomega" 19 "github.com/verrazzano/verrazzano/pkg/k8sutil" 20 "github.com/verrazzano/verrazzano/tests/e2e/pkg" 21 "github.com/verrazzano/verrazzano/tests/e2e/pkg/test/framework" 22 ) 23 24 const ( 25 metricsVersion = "1.4.0" 26 27 longPollingInterval = 10 * time.Second 28 longWaitTimeout = 15 * time.Minute 29 30 // Constants for sample metrics of system components validated by the test 31 ingressControllerSuccess = "nginx_ingress_controller_success" 32 containerStartTimeSeconds = "container_start_time_seconds" 33 cpuSecondsTotal = "node_cpu_seconds_total" 34 istioRequestsTotal = "istio_requests_total" 35 sidecarInjectionRequests = "sidecar_injection_requests_total" 36 prometheusTargetIntervalLength = "prometheus_target_interval_length_seconds" 37 envoyStatsRecentLookups = "envoy_server_stats_recent_lookups" 38 vmoFunctionMetric = "vz_monitoring_operator_reconcile_total" 39 vmoCounterMetric = "vz_monitoring_operator_deployment_update_total" 40 vmoGaugeMetric = "vz_monitoring_operator_work_queue_size" 41 vmoTimestampMetric = "vz_monitoring_operator_configmap_last_successful_timestamp" 42 vaoSuccessCountMetric = "vz_application_operator_appconfig_successful_reconcile_total" 43 vaoFailCountMetric = "vz_application_operator_appconfig_error_reconcile_total" 44 vaoDurationCountMetric = "vz_application_operator_appconfig_reconcile_duration_count" 45 esClusterStatusMetric = "opensearch_cluster_status" 46 47 // Namespaces used for validating envoy stats 48 verrazzanoSystemNamespace = "verrazzano-system" 49 istioSystemNamespace = "istio-system" 50 keycloakNamespace = "keycloak" 51 52 // Constants for various metric labels, used in the validation 53 nodeExporter = "node-exporter" 54 istiod = "istiod" 55 pilot = "pilot" 56 prometheus = "prometheus-operator-kube-p-prometheus" 57 oldPrometheus = "prometheus" 58 controllerNamespace = "controller_namespace" 59 ingressController = "ingress-controller" 60 appK8SIOInstance = "app_kubernetes_io_instance" 61 job = "job" 62 app = "app" 63 namespace = "namespace" 64 container = "container" 65 esMaster = "es-master" 66 67 failedVerifyVersionMsg = "Failed to verify the Verrazzano version was min 1.4.0: %v" 68 ) 69 70 var clusterName = os.Getenv("CLUSTER_NAME") 71 var kubeConfig = os.Getenv("KUBECONFIG") 72 73 // will be initialized in BeforeSuiteFunc so that any log messages during init are available 74 var clusterNameMetricsLabel = "" 75 var isMinVersion110 bool 76 77 var adminKubeConfig string 78 var isManagedClusterProfile bool 79 80 var ingressNGINXNamespace string 81 82 // List of namespaces considered for validating the envoy-stats 83 var envoyStatsNamespaces = []string{ 84 ingressNGINXNamespace, 85 istioSystemNamespace, 86 verrazzanoSystemNamespace, 87 } 88 89 // List of pods to be excluded from verrazzano-system namespace for envoy-stats as they do not have envoy 90 var excludePodsVS = []string{ 91 "coherence-operator", 92 "oam-kubernetes-runtime", 93 "verrazzano-application-operator", 94 "verrazzano-monitoring-operator", 95 "verrazzano-cluster-operator", 96 "verrazzano-operator", 97 "weblogic-operator-webhook", 98 } 99 100 // List of pods to be excluded from istio-system namespace for envoy-stats as they do not have envoy 101 var excludePodsIstio = []string{ 102 "istiocoredns", 103 "istiod", 104 } 105 var metricsTest pkg.MetricsTest 106 107 var t = framework.NewTestFramework("syscomponents") 108 109 var beforeSuite = t.BeforeSuiteFunc(func() { 110 present := false 111 var err error 112 adminKubeConfig, present = os.LookupEnv("ADMIN_KUBECONFIG") 113 isManagedClusterProfile = pkg.IsManagedClusterProfile() 114 if isManagedClusterProfile { 115 if !present { 116 Fail(fmt.Sprintln("Environment variable ADMIN_KUBECONFIG is required to run the test")) 117 } 118 } else { 119 // Include the namespace keycloak for the validation for admin cluster and single cluster installation 120 envoyStatsNamespaces = append(envoyStatsNamespaces, keycloakNamespace) 121 adminKubeConfig, err = k8sutil.GetKubeConfigLocation() 122 if err != nil { 123 Fail(err.Error()) 124 } 125 } 126 127 isMinVersion110, err = pkg.IsVerrazzanoMinVersion("1.1.0", adminKubeConfig) 128 if err != nil { 129 Fail(err.Error()) 130 } 131 132 defaultLabels := map[string]string{} 133 if clusterLabelVal := getClusterNameForPromQuery(); clusterLabelVal != "" { 134 defaultLabels[getClusterNameMetricLabel()] = clusterLabelVal 135 } 136 metricsTest, err = pkg.NewMetricsTest(adminKubeConfig, defaultLabels) 137 if err != nil { 138 Fail(err.Error()) 139 } 140 141 ingressNGINXNamespace, err = nginxutil.DetermineNamespaceForIngressNGINX(vzlog.DefaultLogger()) 142 if err != nil { 143 Fail(err.Error()) 144 } 145 }) 146 147 var _ = BeforeSuite(beforeSuite) 148 149 var afterSuite = t.AfterSuiteFunc(func() {}) 150 151 var _ = AfterSuite(afterSuite) 152 153 var _ = t.AfterEach(func() {}) 154 155 // 'It' Wrapper to only run spec if the Verrazzano Monitoring Operator is installed in the cluster 156 func WhenVMOInstalledAndMinVersionIt(description string, version string, kubeConfigPath string, f func()) { 157 _, err := pkg.GetDeployment(verrazzanoSystemNamespace, constants.VerrazzanoMonitoringOperator) 158 if err != nil { 159 if apierrors.IsNotFound(err) { 160 t.It(description, func() { 161 Skip("VMO is not installed, skipping") 162 }) 163 } else { 164 Fail(err.Error()) 165 } 166 } else { 167 t.ItMinimumVersion(description, version, kubeConfigPath, f) 168 } 169 } 170 171 var _ = t.Describe("Thanos Metrics", Label("f:observability.monitoring.prom"), func() { 172 // Query Prometheus for the sample metrics from the default scraping jobs 173 var _ = t.Describe("for the system components", func() { 174 t.It("Verify sample NGINX metrics can be queried from Thanos", func() { 175 eventuallyMetricsContainLabels(ingressControllerSuccess, map[string]string{ 176 controllerNamespace: ingressNGINXNamespace, 177 appK8SIOInstance: ingressController, 178 }) 179 }) 180 181 if !pkg.IsManagedClusterProfile() { 182 t.ItMinimumVersion("Verify sample OpenSearch metrics can be queried from Thanos", "1.5.0", kubeConfig, func() { 183 eventuallyMetricsContainLabels(esClusterStatusMetric, map[string]string{}) 184 }) 185 } 186 187 t.It("Verify sample Container Advisor metrics can be queried from Thanos", func() { 188 eventuallyMetricsContainLabels(containerStartTimeSeconds, map[string]string{}) 189 }) 190 t.ItMinimumVersion("Verify VPO summary counter metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 191 eventuallyMetricsContainLabels("vz_platform_operator_reconcile_duration_count", map[string]string{}) 192 }) 193 t.ItMinimumVersion("Verify VPO summary sum times can be queried from Thanos", metricsVersion, kubeConfig, func() { 194 eventuallyMetricsContainLabels("vz_platform_operator_reconcile_duration_sum", map[string]string{}) 195 }) 196 t.ItMinimumVersion("Verify VPO counter metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 197 eventuallyMetricsContainLabels("vz_platform_operator_reconcile_total", map[string]string{}) 198 }) 199 t.ItMinimumVersion("Verify VPO error counter metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 200 eventuallyMetricsContainLabels("vz_platform_operator_error_reconcile_total", map[string]string{}) 201 }) 202 t.ItMinimumVersion("Verify VPO install metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 203 eventuallyMetricsContainLabels("vz_platform_operator_component_install_duration_seconds", map[string]string{}) 204 }) 205 t.ItMinimumVersion("Verify VPO upgrade counter metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 206 eventuallyMetricsContainLabels("vz_platform_operator_component_upgrade_duration_seconds", map[string]string{}) 207 }) 208 209 WhenVMOInstalledAndMinVersionIt("Verify VMO function metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 210 eventuallyMetricsContainLabels(vmoFunctionMetric, map[string]string{}) 211 }) 212 213 WhenVMOInstalledAndMinVersionIt("Verify VMO counter metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 214 eventuallyMetricsContainLabels(vmoCounterMetric, map[string]string{}) 215 }) 216 217 WhenVMOInstalledAndMinVersionIt("Verify VMO gauge metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 218 eventuallyMetricsContainLabels(vmoGaugeMetric, map[string]string{}) 219 }) 220 221 WhenVMOInstalledAndMinVersionIt("Verify VMO timestamp metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 222 eventuallyMetricsContainLabels(vmoTimestampMetric, map[string]string{}) 223 }) 224 225 t.ItMinimumVersion("Verify VAO successful counter metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 226 eventuallyMetricsContainLabels(vaoSuccessCountMetric, map[string]string{}) 227 }) 228 t.ItMinimumVersion("Verify VAO failed counter metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 229 eventuallyMetricsContainLabels(vaoFailCountMetric, map[string]string{}) 230 }) 231 t.ItMinimumVersion("Verify VAO Duration summary metrics can be queried from Thanos", metricsVersion, kubeConfig, func() { 232 eventuallyMetricsContainLabels(vaoDurationCountMetric, map[string]string{}) 233 }) 234 235 t.It("Verify sample Node Exporter metrics can be queried from Thanos", func() { 236 Eventually(func() bool { 237 kv := map[string]string{ 238 job: nodeExporter, 239 } 240 return metricsTest.MetricsExist(cpuSecondsTotal, kv) 241 }, longWaitTimeout, longPollingInterval).Should(BeTrue()) 242 }) 243 244 if istioInjection == "enabled" { 245 t.It("Verify sample mesh metrics can be queried from Thanos", func() { 246 Eventually(func() bool { 247 kv := map[string]string{ 248 namespace: verrazzanoSystemNamespace, 249 } 250 return metricsTest.MetricsExist(istioRequestsTotal, kv) 251 }, longWaitTimeout, longPollingInterval).Should(BeTrue()) 252 }) 253 254 t.It("Verify sample istiod metrics can be queried from Thanos", func() { 255 Eventually(func() bool { 256 kv := map[string]string{ 257 app: istiod, 258 job: pilot, 259 } 260 261 minVer14, err := pkg.IsVerrazzanoMinVersion("1.4.0", adminKubeConfig) 262 if err != nil { 263 pkg.Log(pkg.Error, fmt.Sprintf(failedVerifyVersionMsg, err)) 264 return false 265 } 266 if minVer14 { 267 kv = map[string]string{ 268 app: istiod, 269 job: istiod, 270 } 271 } 272 return metricsTest.MetricsExist(sidecarInjectionRequests, kv) 273 }, longWaitTimeout, longPollingInterval).Should(BeTrue()) 274 }) 275 } 276 277 t.It("Verify sample metrics can be queried from Thanos", func() { 278 Eventually(func() bool { 279 kv := map[string]string{ 280 job: oldPrometheus, 281 } 282 283 minVer14, err := pkg.IsVerrazzanoMinVersion("1.4.0", adminKubeConfig) 284 if err != nil { 285 pkg.Log(pkg.Error, fmt.Sprintf(failedVerifyVersionMsg, err)) 286 return false 287 } 288 if minVer14 { 289 kv = map[string]string{ 290 job: prometheus, 291 } 292 } 293 return metricsTest.MetricsExist(prometheusTargetIntervalLength, kv) 294 }, longWaitTimeout, longPollingInterval).Should(BeTrue()) 295 }) 296 if istioInjection == "enabled" { 297 t.It("Verify envoy stats", func() { 298 Eventually(func() bool { 299 return verifyEnvoyStats(envoyStatsRecentLookups) 300 }, longWaitTimeout, longPollingInterval).Should(BeTrue()) 301 }) 302 } 303 }) 304 }) 305 306 // Validate the Istio envoy stats for the pods in the namespaces defined in envoyStatsNamespaces 307 func verifyEnvoyStats(metricName string) bool { 308 clientset, err := pkg.GetKubernetesClientsetForCluster(kubeConfig) 309 if err != nil { 310 t.Logs.Errorf("Error getting clienset for %s, error: %v", kubeConfig, err) 311 return false 312 } 313 for _, ns := range envoyStatsNamespaces { 314 pods, err := pkg.ListPodsInCluster(ns, clientset) 315 if err != nil { 316 t.Logs.Errorf("Error listing pods in cluster for namespace: %s, error: %v", namespace, err) 317 return false 318 } 319 labels := map[string]string{} 320 for _, pod := range pods.Items { 321 if ns == istioSystemNamespace && excludePods(pod.Name, excludePodsIstio) || 322 ns == verrazzanoSystemNamespace && excludePods(pod.Name, excludePodsVS) { 323 continue 324 } 325 labels[ns] = pod.Name 326 } 327 metricsTest.MetricsExist(metricName, labels) 328 } 329 return true 330 } 331 332 func getClusterNameMetricLabel() string { 333 if clusterNameMetricsLabel == "" { 334 // ignore error getting the metric label - we'll just use the default value returned 335 lbl, err := pkg.GetClusterNameMetricLabel(adminKubeConfig) 336 if err != nil { 337 t.Logs.Errorf("Error getting cluster name metric label: %s", err.Error()) 338 } 339 clusterNameMetricsLabel = lbl 340 } 341 return clusterNameMetricsLabel 342 } 343 344 // Exclude the pods where envoy stats are not available 345 func excludePods(pod string, excludeList []string) bool { 346 for _, excludes := range excludeList { 347 if strings.HasPrefix(pod, excludes) { 348 return true 349 } 350 } 351 return false 352 } 353 354 // Return the cluster name used for the Prometheus query 355 func getClusterNameForPromQuery() string { 356 if isManagedClusterProfile { 357 return clusterName 358 } 359 if isMinVersion110 { 360 return "local" 361 } 362 return "" 363 } 364 365 // Queries Thanos for a given metric name and a map of labels for the metric 366 func eventuallyMetricsContainLabels(metricName string, kv map[string]string) { 367 Eventually(func() bool { 368 return metricsTest.MetricsExist(metricName, kv) 369 }, longWaitTimeout, longPollingInterval).Should(BeTrue()) 370 }