github.com/verrazzano/verrazzano@v1.7.1/tests/e2e/pkg/metrics.go (about) 1 // Copyright (c) 2021, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package pkg 5 6 import ( 7 "context" 8 "encoding/json" 9 "fmt" 10 "net/http" 11 "os/exec" 12 "strings" 13 14 promoperapi "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" 15 "github.com/verrazzano/verrazzano/pkg/k8sutil" 16 "github.com/verrazzano/verrazzano/pkg/vzcr" 17 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 18 "k8s.io/apimachinery/pkg/runtime" 19 "k8s.io/apimachinery/pkg/types" 20 "sigs.k8s.io/controller-runtime/pkg/client" 21 ) 22 23 type MetricsTest struct { 24 Source MetricSource 25 DefaultLabels map[string]string 26 } 27 28 // NewMetricsTest returns a metric test object with which to query metrics 29 // Parameters: 30 // kubeconfigs a list of kubeconfigs from all clusters 31 // kubeconfigPath this is the kubeconfigPath for the cluster we want to search metrics from 32 // defaultLabels the default labels will be added to the test metric when the query begins 33 func NewMetricsTest(kubeconfigPath string, defaultLabels map[string]string, extraKubeconfigs ...string) (MetricsTest, error) { 34 mt := MetricsTest{ 35 DefaultLabels: defaultLabels, 36 } 37 38 for _, kc := range append(extraKubeconfigs, kubeconfigPath) { 39 vz, err := GetVerrazzanoInstallResourceInClusterV1beta1(kc) 40 if err != nil { 41 return MetricsTest{}, err 42 } 43 if !vzcr.IsThanosEnabled(vz) { 44 source, err := NewPrometheusSource(kubeconfigPath) 45 if err != nil { 46 return MetricsTest{}, err 47 } 48 mt.Source = source 49 return mt, nil 50 } 51 } 52 53 source, err := NewThanosSource(kubeconfigPath) 54 if err != nil { 55 return MetricsTest{}, err 56 } 57 mt.Source = source 58 return mt, nil 59 } 60 61 func (m MetricsTest) QueryMetric(metricName string, labels map[string]string) (string, error) { 62 metricsURL := fmt.Sprintf("https://%s/api/v1/query?query=%s", m.Source.GetHost(), metricName) 63 metricsURL = m.appendLabels(metricsURL, labels) 64 password, err := GetVerrazzanoPasswordInCluster(m.Source.getKubeConfigPath()) 65 if err != nil { 66 return "", err 67 } 68 resp, err := GetWebPageWithBasicAuth(metricsURL, "", "verrazzano", password, m.Source.getKubeConfigPath()) 69 if err != nil { 70 return "", err 71 } 72 if resp.StatusCode != http.StatusOK { 73 return "", fmt.Errorf("error retrieving metric %s, status %d", metricName, resp.StatusCode) 74 } 75 Log(Info, fmt.Sprintf("metric: %s", resp.Body)) 76 return string(resp.Body), nil 77 } 78 79 func (m MetricsTest) MetricsExist(metricName string, labels map[string]string) bool { 80 result, err := m.QueryMetric(metricName, labels) 81 if err != nil { 82 return false 83 } 84 85 metricList, ok := JTq(result, "data", "result").([]interface{}) 86 if !ok { 87 Log(Error, "error extracting metric result, format is not a list type") 88 } 89 return ok && len(metricList) > 0 90 } 91 92 func (m MetricsTest) appendLabels(query string, labels map[string]string) string { 93 if len(labels) == 0 && len(m.DefaultLabels) == 0 { 94 return query 95 } 96 97 var labelStrings []string 98 for k, v := range m.DefaultLabels { 99 labelStrings = append(labelStrings, fmt.Sprintf(`%s="%s"`, k, v)) 100 } 101 for k, v := range labels { 102 labelStrings = append(labelStrings, fmt.Sprintf(`%s="%s"`, k, v)) 103 } 104 return fmt.Sprintf("%s{%s}", query, strings.Join(labelStrings, ",")) 105 } 106 107 // GetPrometheusIngressHost gets the host used for ingress to the system Prometheus in the given cluster 108 func GetPrometheusIngressHost(kubeconfigPath string) string { 109 source, err := NewPrometheusSource(kubeconfigPath) 110 if err != nil { 111 return "" 112 } 113 return source.GetHost() 114 } 115 116 // GetThanosQueryIngressHost gets the host used for ingress to Thanos Query in the given cluster 117 func GetThanosQueryIngressHost(kubeconfigPath string) string { 118 source, err := NewThanosSource(kubeconfigPath) 119 if err != nil { 120 return "" 121 } 122 return source.GetHost() 123 } 124 125 // GetQueryStoreIngressHost gets the host used for ingress to Thanos Query Store in the given cluster 126 func GetQueryStoreIngressHost(kubeconfigPath string) string { 127 clientset, err := GetKubernetesClientsetForCluster(kubeconfigPath) 128 if err != nil { 129 Log(Error, fmt.Sprintf("Failed to get clientset for cluster %v", err)) 130 return "" 131 } 132 ingress, err := clientset.NetworkingV1().Ingresses("verrazzano-system").Get(context.TODO(), "thanos-query-store", metav1.GetOptions{}) 133 if err != nil { 134 Log(Error, fmt.Sprintf("Failed to get Ingress thanos-query-store from the cluster: %v", err)) 135 } 136 return ingress.Spec.Rules[0].Host 137 } 138 139 // GetClusterNameMetricLabel returns the label name used for labeling metrics with the Verrazzano cluster 140 // This is different in pre-1.1 VZ releases versus later releases 141 func GetClusterNameMetricLabel(kubeconfigPath string) (string, error) { 142 isVz11OrGreater, err := IsVerrazzanoMinVersion("1.1.0", kubeconfigPath) 143 if err != nil { 144 Log(Error, fmt.Sprintf("Error checking Verrazzano min version == 1.1: %t", err)) 145 return "verrazzano_cluster", err // callers can choose to ignore the error 146 } else if !isVz11OrGreater { 147 Log(Info, "GetClusterNameMetricsLabel: version is less than 1.1.0") 148 // versions < 1.1 use the managed_cluster label not the verrazzano_cluster label 149 return "managed_cluster", nil 150 } 151 Log(Info, "GetClusterNameMetricsLabel: version is greater than or equal to 1.1.0") 152 return "verrazzano_cluster", nil 153 } 154 155 // JTq queries JSON text with a JSON path 156 func JTq(jtext string, path ...string) interface{} { 157 var j map[string]interface{} 158 json.Unmarshal([]byte(jtext), &j) 159 return Jq(j, path...) 160 } 161 162 // ScrapeTargets queries Prometheus API /api/v1/targets to list scrape targets 163 func ScrapeTargets() ([]interface{}, error) { 164 kubeconfigPath, err := k8sutil.GetKubeConfigLocation() 165 if err != nil { 166 Log(Error, fmt.Sprintf("Error getting kubeconfig, error: %v", err)) 167 return nil, err 168 } 169 170 metricsURL := fmt.Sprintf("https://%s/api/v1/targets", GetPrometheusIngressHost(kubeconfigPath)) 171 password, err := GetVerrazzanoPasswordInCluster(kubeconfigPath) 172 if err != nil { 173 return nil, err 174 } 175 resp, err := GetWebPageWithBasicAuth(metricsURL, "", "verrazzano", password, kubeconfigPath) 176 if err != nil { 177 return nil, err 178 } 179 if resp.StatusCode != http.StatusOK { 180 return nil, fmt.Errorf("error retrieving targets %d", resp.StatusCode) 181 } 182 var result map[string]interface{} 183 if err = json.Unmarshal(resp.Body, &result); err != nil { 184 return nil, err 185 } 186 activeTargets := Jq(result, "data", "activeTargets").([]interface{}) 187 return activeTargets, nil 188 } 189 190 func ScrapeTargetsFromExec() ([]interface{}, error) { 191 metricsURL := "http://localhost:9090/api/v1/targets" 192 cmd := exec.Command("kubectl", "exec", "prometheus-prometheus-operator-kube-p-prometheus-0", "-n", "verrazzano-monitoring", "--", "curl", metricsURL) 193 out, err := cmd.Output() 194 if err != nil { 195 return nil, err 196 } 197 if len(string(out)) == 0 { 198 return nil, fmt.Errorf("prometheus scrape targets request returned no data") 199 } 200 var data map[string]interface{} 201 if err = json.Unmarshal(out, &data); err != nil { 202 return nil, err 203 } 204 activeTargets := Jq(data, "data", "activeTargets").([]interface{}) 205 return activeTargets, nil 206 } 207 208 // Jq queries JSON nodes with a JSON path 209 func Jq(node interface{}, path ...string) interface{} { 210 for _, p := range path { 211 if node == nil { 212 return nil 213 } 214 var nodeMap, ok = node.(map[string]interface{}) 215 if ok { 216 node = nodeMap[p] 217 } else { 218 return nil 219 } 220 } 221 return node 222 } 223 224 // getPromOperatorClient returns a client for fetching ServiceMonitor resources 225 func getPromOperatorClient() (client.Client, error) { 226 config, err := k8sutil.GetKubeConfig() 227 if err != nil { 228 return nil, err 229 } 230 231 scheme := runtime.NewScheme() 232 _ = promoperapi.AddToScheme(scheme) 233 234 cli, err := client.New(config, client.Options{Scheme: scheme}) 235 if err != nil { 236 return nil, err 237 } 238 return cli, nil 239 } 240 241 // GetAppServiceMonitorName returns the service monitor name used in VZ 1.4+ for the given 242 // namespace and app name 243 func GetAppServiceMonitorName(namespace string, appName string, component string) string { 244 // For VZ versions starting from 1.4.0, the service monitor name for prometheus is of the format 245 // <app_name>_<app_namespace> 246 var smName string 247 if component == "" { 248 smName = fmt.Sprintf("%s-%s", appName, namespace) 249 } else { 250 smName = fmt.Sprintf("%s-%s-%s", appName, namespace, component) 251 if len(smName) > 63 { 252 smName = fmt.Sprintf("%s-%s", appName, namespace) 253 } 254 } 255 if len(smName) > 63 { 256 smName = smName[:63] 257 } 258 return smName 259 } 260 261 // GetServiceMonitor returns the ServiceMonitor identified by namespace and name 262 func GetServiceMonitor(namespace, name string) (*promoperapi.ServiceMonitor, error) { 263 cli, err := getPromOperatorClient() 264 if err != nil { 265 return nil, err 266 } 267 268 serviceMonitor := &promoperapi.ServiceMonitor{} 269 err = cli.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: name}, serviceMonitor) 270 if err != nil { 271 return nil, err 272 } 273 return serviceMonitor, nil 274 } 275 276 // ScrapeTargetsHealthy validates the health of the scrape targets for the given scrapePools 277 func ScrapeTargetsHealthy(scrapePools []string) (bool, error) { 278 targets, err := ScrapeTargets() 279 if err != nil { 280 Log(Error, fmt.Sprintf("Error getting scrape targets: %v", err)) 281 return false, err 282 } 283 return verifyScrapePoolsHealthy(targets, scrapePools) 284 } 285 286 // ScrapeTargetsHealthyFromExec validates the health of the scrape targets for the given scrapePools by execing into the prometheus pod 287 func ScrapeTargetsHealthyFromExec(scrapePools []string) (bool, error) { 288 targets, err := ScrapeTargetsFromExec() 289 if err != nil { 290 Log(Error, fmt.Sprintf("Error getting scrape targets: %v", err)) 291 return false, err 292 } 293 return verifyScrapePoolsHealthy(targets, scrapePools) 294 } 295 296 // verifyScrapePoolsHealthy iterates through the scrape pools and makes sure that it is present in the scrape targets 297 func verifyScrapePoolsHealthy(scrapeTargets []interface{}, scrapePools []string) (bool, error) { 298 for _, scrapePool := range scrapePools { 299 found := false 300 for _, target := range scrapeTargets { 301 targetScrapePool := Jq(target, "scrapePool").(string) 302 if strings.Contains(targetScrapePool, scrapePool) { 303 found = true 304 // If any of the target health is not "up" return false 305 health := Jq(target, "health") 306 if health != "up" { 307 scrapeURL := Jq(target, "scrapeUrl").(string) 308 Log(Error, fmt.Sprintf("target with scrapePool %s and scrapeURL %s is not ready with health %s", targetScrapePool, scrapeURL, health)) 309 return false, fmt.Errorf("target with scrapePool %s and scrapeURL %s is not healthy", targetScrapePool, scrapeURL) 310 } 311 } 312 } 313 // If target with scrapePool not found, then return false and error 314 if !found { 315 Log(Error, fmt.Sprintf("target with scrapePool %s is not found", scrapePool)) 316 return false, fmt.Errorf("target with scrapePool %s not found", scrapePool) 317 } 318 } 319 return true, nil 320 } 321 func GetScrapePools(namespace, appName string, componentNames []string, isMinVersion140 bool) []string { 322 var scrapePools []string 323 var scrapePool string 324 for _, comp := range componentNames { 325 if isMinVersion140 { 326 scrapePool = "serviceMonitor/" + namespace + "/" + GetAppServiceMonitorName(namespace, appName, comp) 327 } else { 328 // For previous versions than 1.4x, scrapePools were named in different way as there was no concept of service monitors 329 scrapePool = appName + "_default_" + namespace + "_" + comp 330 } 331 scrapePools = append(scrapePools, scrapePool) 332 } 333 return scrapePools 334 335 }