github.com/verrazzano/verrazzano@v1.7.1/tests/e2e/pkg/metrics.go (about)

     1  // Copyright (c) 2021, 2023, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package pkg
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"fmt"
    10  	"net/http"
    11  	"os/exec"
    12  	"strings"
    13  
    14  	promoperapi "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
    15  	"github.com/verrazzano/verrazzano/pkg/k8sutil"
    16  	"github.com/verrazzano/verrazzano/pkg/vzcr"
    17  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    18  	"k8s.io/apimachinery/pkg/runtime"
    19  	"k8s.io/apimachinery/pkg/types"
    20  	"sigs.k8s.io/controller-runtime/pkg/client"
    21  )
    22  
    23  type MetricsTest struct {
    24  	Source        MetricSource
    25  	DefaultLabels map[string]string
    26  }
    27  
    28  // NewMetricsTest returns a metric test object with which to query metrics
    29  // Parameters:
    30  // kubeconfigs 		a list of kubeconfigs from all clusters
    31  // kubeconfigPath 	this is the kubeconfigPath for the cluster we want to search metrics from
    32  // defaultLabels    the default labels will be added to the test metric when the query begins
    33  func NewMetricsTest(kubeconfigPath string, defaultLabels map[string]string, extraKubeconfigs ...string) (MetricsTest, error) {
    34  	mt := MetricsTest{
    35  		DefaultLabels: defaultLabels,
    36  	}
    37  
    38  	for _, kc := range append(extraKubeconfigs, kubeconfigPath) {
    39  		vz, err := GetVerrazzanoInstallResourceInClusterV1beta1(kc)
    40  		if err != nil {
    41  			return MetricsTest{}, err
    42  		}
    43  		if !vzcr.IsThanosEnabled(vz) {
    44  			source, err := NewPrometheusSource(kubeconfigPath)
    45  			if err != nil {
    46  				return MetricsTest{}, err
    47  			}
    48  			mt.Source = source
    49  			return mt, nil
    50  		}
    51  	}
    52  
    53  	source, err := NewThanosSource(kubeconfigPath)
    54  	if err != nil {
    55  		return MetricsTest{}, err
    56  	}
    57  	mt.Source = source
    58  	return mt, nil
    59  }
    60  
    61  func (m MetricsTest) QueryMetric(metricName string, labels map[string]string) (string, error) {
    62  	metricsURL := fmt.Sprintf("https://%s/api/v1/query?query=%s", m.Source.GetHost(), metricName)
    63  	metricsURL = m.appendLabels(metricsURL, labels)
    64  	password, err := GetVerrazzanoPasswordInCluster(m.Source.getKubeConfigPath())
    65  	if err != nil {
    66  		return "", err
    67  	}
    68  	resp, err := GetWebPageWithBasicAuth(metricsURL, "", "verrazzano", password, m.Source.getKubeConfigPath())
    69  	if err != nil {
    70  		return "", err
    71  	}
    72  	if resp.StatusCode != http.StatusOK {
    73  		return "", fmt.Errorf("error retrieving metric %s, status %d", metricName, resp.StatusCode)
    74  	}
    75  	Log(Info, fmt.Sprintf("metric: %s", resp.Body))
    76  	return string(resp.Body), nil
    77  }
    78  
    79  func (m MetricsTest) MetricsExist(metricName string, labels map[string]string) bool {
    80  	result, err := m.QueryMetric(metricName, labels)
    81  	if err != nil {
    82  		return false
    83  	}
    84  
    85  	metricList, ok := JTq(result, "data", "result").([]interface{})
    86  	if !ok {
    87  		Log(Error, "error extracting metric result, format is not a list type")
    88  	}
    89  	return ok && len(metricList) > 0
    90  }
    91  
    92  func (m MetricsTest) appendLabels(query string, labels map[string]string) string {
    93  	if len(labels) == 0 && len(m.DefaultLabels) == 0 {
    94  		return query
    95  	}
    96  
    97  	var labelStrings []string
    98  	for k, v := range m.DefaultLabels {
    99  		labelStrings = append(labelStrings, fmt.Sprintf(`%s="%s"`, k, v))
   100  	}
   101  	for k, v := range labels {
   102  		labelStrings = append(labelStrings, fmt.Sprintf(`%s="%s"`, k, v))
   103  	}
   104  	return fmt.Sprintf("%s{%s}", query, strings.Join(labelStrings, ","))
   105  }
   106  
   107  // GetPrometheusIngressHost gets the host used for ingress to the system Prometheus in the given cluster
   108  func GetPrometheusIngressHost(kubeconfigPath string) string {
   109  	source, err := NewPrometheusSource(kubeconfigPath)
   110  	if err != nil {
   111  		return ""
   112  	}
   113  	return source.GetHost()
   114  }
   115  
   116  // GetThanosQueryIngressHost gets the host used for ingress to Thanos Query in the given cluster
   117  func GetThanosQueryIngressHost(kubeconfigPath string) string {
   118  	source, err := NewThanosSource(kubeconfigPath)
   119  	if err != nil {
   120  		return ""
   121  	}
   122  	return source.GetHost()
   123  }
   124  
   125  // GetQueryStoreIngressHost gets the host used for ingress to Thanos Query Store in the given cluster
   126  func GetQueryStoreIngressHost(kubeconfigPath string) string {
   127  	clientset, err := GetKubernetesClientsetForCluster(kubeconfigPath)
   128  	if err != nil {
   129  		Log(Error, fmt.Sprintf("Failed to get clientset for cluster %v", err))
   130  		return ""
   131  	}
   132  	ingress, err := clientset.NetworkingV1().Ingresses("verrazzano-system").Get(context.TODO(), "thanos-query-store", metav1.GetOptions{})
   133  	if err != nil {
   134  		Log(Error, fmt.Sprintf("Failed to get Ingress thanos-query-store from the cluster: %v", err))
   135  	}
   136  	return ingress.Spec.Rules[0].Host
   137  }
   138  
   139  // GetClusterNameMetricLabel returns the label name used for labeling metrics with the Verrazzano cluster
   140  // This is different in pre-1.1 VZ releases versus later releases
   141  func GetClusterNameMetricLabel(kubeconfigPath string) (string, error) {
   142  	isVz11OrGreater, err := IsVerrazzanoMinVersion("1.1.0", kubeconfigPath)
   143  	if err != nil {
   144  		Log(Error, fmt.Sprintf("Error checking Verrazzano min version == 1.1: %t", err))
   145  		return "verrazzano_cluster", err // callers can choose to ignore the error
   146  	} else if !isVz11OrGreater {
   147  		Log(Info, "GetClusterNameMetricsLabel: version is less than 1.1.0")
   148  		// versions < 1.1 use the managed_cluster label not the verrazzano_cluster label
   149  		return "managed_cluster", nil
   150  	}
   151  	Log(Info, "GetClusterNameMetricsLabel: version is greater than or equal to 1.1.0")
   152  	return "verrazzano_cluster", nil
   153  }
   154  
   155  // JTq queries JSON text with a JSON path
   156  func JTq(jtext string, path ...string) interface{} {
   157  	var j map[string]interface{}
   158  	json.Unmarshal([]byte(jtext), &j)
   159  	return Jq(j, path...)
   160  }
   161  
   162  // ScrapeTargets queries Prometheus API /api/v1/targets to list scrape targets
   163  func ScrapeTargets() ([]interface{}, error) {
   164  	kubeconfigPath, err := k8sutil.GetKubeConfigLocation()
   165  	if err != nil {
   166  		Log(Error, fmt.Sprintf("Error getting kubeconfig, error: %v", err))
   167  		return nil, err
   168  	}
   169  
   170  	metricsURL := fmt.Sprintf("https://%s/api/v1/targets", GetPrometheusIngressHost(kubeconfigPath))
   171  	password, err := GetVerrazzanoPasswordInCluster(kubeconfigPath)
   172  	if err != nil {
   173  		return nil, err
   174  	}
   175  	resp, err := GetWebPageWithBasicAuth(metricsURL, "", "verrazzano", password, kubeconfigPath)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  	if resp.StatusCode != http.StatusOK {
   180  		return nil, fmt.Errorf("error retrieving targets %d", resp.StatusCode)
   181  	}
   182  	var result map[string]interface{}
   183  	if err = json.Unmarshal(resp.Body, &result); err != nil {
   184  		return nil, err
   185  	}
   186  	activeTargets := Jq(result, "data", "activeTargets").([]interface{})
   187  	return activeTargets, nil
   188  }
   189  
   190  func ScrapeTargetsFromExec() ([]interface{}, error) {
   191  	metricsURL := "http://localhost:9090/api/v1/targets"
   192  	cmd := exec.Command("kubectl", "exec", "prometheus-prometheus-operator-kube-p-prometheus-0", "-n", "verrazzano-monitoring", "--", "curl", metricsURL)
   193  	out, err := cmd.Output()
   194  	if err != nil {
   195  		return nil, err
   196  	}
   197  	if len(string(out)) == 0 {
   198  		return nil, fmt.Errorf("prometheus scrape targets request returned no data")
   199  	}
   200  	var data map[string]interface{}
   201  	if err = json.Unmarshal(out, &data); err != nil {
   202  		return nil, err
   203  	}
   204  	activeTargets := Jq(data, "data", "activeTargets").([]interface{})
   205  	return activeTargets, nil
   206  }
   207  
   208  // Jq queries JSON nodes with a JSON path
   209  func Jq(node interface{}, path ...string) interface{} {
   210  	for _, p := range path {
   211  		if node == nil {
   212  			return nil
   213  		}
   214  		var nodeMap, ok = node.(map[string]interface{})
   215  		if ok {
   216  			node = nodeMap[p]
   217  		} else {
   218  			return nil
   219  		}
   220  	}
   221  	return node
   222  }
   223  
   224  // getPromOperatorClient returns a client for fetching ServiceMonitor resources
   225  func getPromOperatorClient() (client.Client, error) {
   226  	config, err := k8sutil.GetKubeConfig()
   227  	if err != nil {
   228  		return nil, err
   229  	}
   230  
   231  	scheme := runtime.NewScheme()
   232  	_ = promoperapi.AddToScheme(scheme)
   233  
   234  	cli, err := client.New(config, client.Options{Scheme: scheme})
   235  	if err != nil {
   236  		return nil, err
   237  	}
   238  	return cli, nil
   239  }
   240  
   241  // GetAppServiceMonitorName returns the service monitor name used in VZ 1.4+ for the given
   242  // namespace and app name
   243  func GetAppServiceMonitorName(namespace string, appName string, component string) string {
   244  	// For VZ versions starting from 1.4.0, the service monitor name for prometheus is of the format
   245  	// <app_name>_<app_namespace>
   246  	var smName string
   247  	if component == "" {
   248  		smName = fmt.Sprintf("%s-%s", appName, namespace)
   249  	} else {
   250  		smName = fmt.Sprintf("%s-%s-%s", appName, namespace, component)
   251  		if len(smName) > 63 {
   252  			smName = fmt.Sprintf("%s-%s", appName, namespace)
   253  		}
   254  	}
   255  	if len(smName) > 63 {
   256  		smName = smName[:63]
   257  	}
   258  	return smName
   259  }
   260  
   261  // GetServiceMonitor returns the ServiceMonitor identified by namespace and name
   262  func GetServiceMonitor(namespace, name string) (*promoperapi.ServiceMonitor, error) {
   263  	cli, err := getPromOperatorClient()
   264  	if err != nil {
   265  		return nil, err
   266  	}
   267  
   268  	serviceMonitor := &promoperapi.ServiceMonitor{}
   269  	err = cli.Get(context.TODO(), types.NamespacedName{Namespace: namespace, Name: name}, serviceMonitor)
   270  	if err != nil {
   271  		return nil, err
   272  	}
   273  	return serviceMonitor, nil
   274  }
   275  
   276  // ScrapeTargetsHealthy validates the health of the scrape targets for the given scrapePools
   277  func ScrapeTargetsHealthy(scrapePools []string) (bool, error) {
   278  	targets, err := ScrapeTargets()
   279  	if err != nil {
   280  		Log(Error, fmt.Sprintf("Error getting scrape targets: %v", err))
   281  		return false, err
   282  	}
   283  	return verifyScrapePoolsHealthy(targets, scrapePools)
   284  }
   285  
   286  // ScrapeTargetsHealthyFromExec validates the health of the scrape targets for the given scrapePools by execing into the prometheus pod
   287  func ScrapeTargetsHealthyFromExec(scrapePools []string) (bool, error) {
   288  	targets, err := ScrapeTargetsFromExec()
   289  	if err != nil {
   290  		Log(Error, fmt.Sprintf("Error getting scrape targets: %v", err))
   291  		return false, err
   292  	}
   293  	return verifyScrapePoolsHealthy(targets, scrapePools)
   294  }
   295  
   296  // verifyScrapePoolsHealthy iterates through the scrape pools and makes sure that it is present in the scrape targets
   297  func verifyScrapePoolsHealthy(scrapeTargets []interface{}, scrapePools []string) (bool, error) {
   298  	for _, scrapePool := range scrapePools {
   299  		found := false
   300  		for _, target := range scrapeTargets {
   301  			targetScrapePool := Jq(target, "scrapePool").(string)
   302  			if strings.Contains(targetScrapePool, scrapePool) {
   303  				found = true
   304  				// If any of the target health is not "up" return false
   305  				health := Jq(target, "health")
   306  				if health != "up" {
   307  					scrapeURL := Jq(target, "scrapeUrl").(string)
   308  					Log(Error, fmt.Sprintf("target with scrapePool %s and scrapeURL %s is not ready with health %s", targetScrapePool, scrapeURL, health))
   309  					return false, fmt.Errorf("target with scrapePool %s and scrapeURL %s is not healthy", targetScrapePool, scrapeURL)
   310  				}
   311  			}
   312  		}
   313  		// If target with scrapePool not found, then return false and error
   314  		if !found {
   315  			Log(Error, fmt.Sprintf("target with scrapePool %s is not found", scrapePool))
   316  			return false, fmt.Errorf("target with scrapePool %s not found", scrapePool)
   317  		}
   318  	}
   319  	return true, nil
   320  }
   321  func GetScrapePools(namespace, appName string, componentNames []string, isMinVersion140 bool) []string {
   322  	var scrapePools []string
   323  	var scrapePool string
   324  	for _, comp := range componentNames {
   325  		if isMinVersion140 {
   326  			scrapePool = "serviceMonitor/" + namespace + "/" + GetAppServiceMonitorName(namespace, appName, comp)
   327  		} else {
   328  			// For previous versions than 1.4x, scrapePools were named in different way as there was no concept of service monitors
   329  			scrapePool = appName + "_default_" + namespace + "_" + comp
   330  		}
   331  		scrapePools = append(scrapePools, scrapePool)
   332  	}
   333  	return scrapePools
   334  
   335  }