istio.io/istio@v0.0.0-20240520182934-d79c90f27776/tests/integration/telemetry/util.go (about)

     1  //go:build integ
     2  // +build integ
     3  
     4  // Copyright Istio Authors
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  package telemetry
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"sort"
    24  	"time"
    25  
    26  	"github.com/prometheus/common/model"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  
    29  	"istio.io/istio/pkg/config/mesh"
    30  	"istio.io/istio/pkg/test"
    31  	"istio.io/istio/pkg/test/framework"
    32  	"istio.io/istio/pkg/test/framework/components/cluster"
    33  	"istio.io/istio/pkg/test/framework/components/prometheus"
    34  	"istio.io/istio/pkg/test/util/retry"
    35  )
    36  
    37  // PromDiff compares a query with labels to a query of the same metric without labels, and notes the closest matching
    38  // metric.
    39  func PromDiff(t test.Failer, prom prometheus.Instance, cluster cluster.Cluster, query prometheus.Query) {
    40  	t.Helper()
    41  	unlabelled := prometheus.Query{Metric: query.Metric}
    42  	v, _ := prom.Query(cluster, unlabelled)
    43  	if v == nil {
    44  		t.Logf("no metrics found for %v", unlabelled)
    45  		return
    46  	}
    47  	switch v.Type() {
    48  	case model.ValVector:
    49  		value := v.(model.Vector)
    50  		var allMismatches []map[string]string
    51  		full := []model.Metric{}
    52  		for _, s := range value {
    53  			misMatched := map[string]string{}
    54  			for k, want := range query.Labels {
    55  				got := string(s.Metric[model.LabelName(k)])
    56  				if want != got {
    57  					misMatched[k] = got
    58  				}
    59  			}
    60  			if len(misMatched) == 0 {
    61  				continue
    62  			}
    63  			allMismatches = append(allMismatches, misMatched)
    64  			full = append(full, s.Metric)
    65  		}
    66  		if len(allMismatches) == 0 {
    67  			t.Logf("no diff found")
    68  			return
    69  		}
    70  		sort.Slice(allMismatches, func(i, j int) bool {
    71  			return len(allMismatches[i]) < len(allMismatches[j])
    72  		})
    73  		t.Logf("query %q returned %v series, but none matched our query exactly.", query.Metric, len(value))
    74  		t.Logf("Original query: %v", query.String())
    75  		for i, m := range allMismatches {
    76  			t.Logf("Series %d (source: %v/%v)", i, full[i]["namespace"], full[i]["pod"])
    77  			missing := []string{}
    78  			for k, v := range m {
    79  				if v == "" {
    80  					missing = append(missing, k)
    81  				} else {
    82  					t.Logf("  for label %q, wanted %q but got %q", k, query.Labels[k], v)
    83  				}
    84  			}
    85  			if len(missing) > 0 {
    86  				t.Logf("  missing labels: %v", missing)
    87  			}
    88  		}
    89  
    90  	default:
    91  		t.Fatalf("PromDiff expects Vector, got %v", v.Type())
    92  
    93  	}
    94  }
    95  
    96  // PromDump gets all the recorded values for a metric by name and generates a report of the values.
    97  // used for debugging of failures to provide a comprehensive view of traffic experienced.
    98  func PromDump(cluster cluster.Cluster, prometheus prometheus.Instance, query prometheus.Query) string {
    99  	if value, err := prometheus.Query(cluster, query); err == nil {
   100  		return value.String()
   101  	}
   102  
   103  	return ""
   104  }
   105  
   106  // GetTrustDomain return trust domain of the cluster.
   107  func GetTrustDomain(cluster cluster.Cluster, istioNamespace string) string {
   108  	meshConfigMap, err := cluster.Kube().CoreV1().ConfigMaps(istioNamespace).Get(context.Background(), "istio", metav1.GetOptions{})
   109  	defaultTrustDomain := mesh.DefaultMeshConfig().TrustDomain
   110  	if err != nil {
   111  		return defaultTrustDomain
   112  	}
   113  
   114  	configYaml, ok := meshConfigMap.Data["mesh"]
   115  	if !ok {
   116  		return defaultTrustDomain
   117  	}
   118  
   119  	cfg, err := mesh.ApplyMeshConfigDefaults(configYaml)
   120  	if err != nil {
   121  		return defaultTrustDomain
   122  	}
   123  
   124  	return cfg.TrustDomain
   125  }
   126  
   127  // QueryPrometheus queries prometheus and returns the result once the query stabilizes
   128  func QueryPrometheus(t framework.TestContext, cluster cluster.Cluster, query prometheus.Query, promInst prometheus.Instance) (string, error) {
   129  	t.Helper()
   130  	t.Logf("query prometheus with: %v", query)
   131  
   132  	val, err := promInst.Query(cluster, query)
   133  	if err != nil {
   134  		return "", err
   135  	}
   136  	got, err := prometheus.Sum(val)
   137  	if err != nil {
   138  		t.Logf("value: %s", val.String())
   139  		return "", fmt.Errorf("could not find metric value: %v", err)
   140  	}
   141  	t.Logf("get value %v", got)
   142  	return val.String(), nil
   143  }
   144  
   145  func ValidateMetric(t framework.TestContext, cluster cluster.Cluster, prometheus prometheus.Instance, query prometheus.Query, want float64) {
   146  	t.Helper()
   147  	err := retry.UntilSuccess(func() error {
   148  		got, err := prometheus.QuerySum(cluster, query)
   149  		t.Logf("%s: %f", query.Metric, got)
   150  		if err != nil {
   151  			return err
   152  		}
   153  		if got < want {
   154  			return fmt.Errorf("bad metric value: got %f, want at least %f", got, want)
   155  		}
   156  		return nil
   157  	}, retry.Delay(time.Second), retry.Timeout(time.Second*20))
   158  	if err != nil {
   159  		PromDiff(t, prometheus, cluster, query)
   160  		t.Fatal(err)
   161  	}
   162  }