istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/monitoring/monitortest/test.go

istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/monitoring/monitortest/test.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package monitortest
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  	"time"
    21  
    22  	"github.com/prometheus/client_golang/prometheus"
    23  	"github.com/prometheus/client_golang/prometheus/testutil/promlint"
    24  	dto "github.com/prometheus/client_model/go"
    25  	"go.opentelemetry.io/otel/attribute"
    26  
    27  	"istio.io/istio/pkg/lazy"
    28  	"istio.io/istio/pkg/maps"
    29  	"istio.io/istio/pkg/monitoring"
    30  	"istio.io/istio/pkg/test"
    31  	"istio.io/istio/pkg/test/util/retry"
    32  )
    33  
    34  type MetricsTest struct {
    35  	t      test.Failer
    36  	reg    prometheus.Gatherer
    37  	deltas map[metricKey]float64
    38  }
    39  
    40  type metricKey struct {
    41  	name  string
    42  	attrs attribute.Set
    43  }
    44  
    45  var reg = lazy.New(func() (prometheus.Gatherer, error) {
    46  	// TODO: do not use a global and/or add a way to reset (https://github.com/open-telemetry/opentelemetry-go/issues/4291)
    47  	reg := prometheus.NewRegistry()
    48  	_, err := monitoring.RegisterPrometheusExporter(reg, reg)
    49  	if err != nil {
    50  		return nil, err
    51  	}
    52  	return reg, nil
    53  })
    54  
    55  func TestRegistry(t test.Failer) prometheus.Gatherer {
    56  	r, err := reg.Get()
    57  	if err != nil {
    58  		t.Fatal(err)
    59  	}
    60  	return r
    61  }
    62  
    63  func New(t test.Failer) *MetricsTest {
    64  	r := TestRegistry(t)
    65  	mt := &MetricsTest{t: t, reg: r, deltas: computeDeltas(t, r)}
    66  	return mt
    67  }
    68  
    69  func computeDeltas(t test.Failer, reg prometheus.Gatherer) map[metricKey]float64 {
    70  	res := map[metricKey]float64{}
    71  	metrics, err := reg.Gather()
    72  	if err != nil {
    73  		t.Fatal(err)
    74  	}
    75  	for _, metric := range metrics {
    76  		for _, row := range metric.Metric {
    77  			if row.Counter == nil {
    78  				continue
    79  			}
    80  			key := toMetricKey(row, metric)
    81  			res[key] = *row.Counter.Value
    82  		}
    83  	}
    84  	return res
    85  }
    86  
    87  func toMetricKey(row *dto.Metric, metric *dto.MetricFamily) metricKey {
    88  	kvs := []attribute.KeyValue{}
    89  	for _, lv := range row.Label {
    90  		kvs = append(kvs, attribute.String(*lv.Name, *lv.Value))
    91  	}
    92  	key := metricKey{
    93  		name:  *metric.Name,
    94  		attrs: attribute.NewSet(kvs...),
    95  	}
    96  	return key
    97  }
    98  
    99  type Compare func(any) error
   100  
   101  func DoesNotExist(any) error {
   102  	// special case logic in the Assert
   103  	return nil
   104  }
   105  
   106  func Exactly(v float64) func(any) error {
   107  	return func(f any) error {
   108  		if v != toFloat(f) {
   109  			return fmt.Errorf("want %v, got %v", v, toFloat(f))
   110  		}
   111  		return nil
   112  	}
   113  }
   114  
   115  func Distribution(count uint64, sum float64) func(any) error {
   116  	return func(f any) error {
   117  		d := f.(*dto.Histogram)
   118  		if *d.SampleCount != count {
   119  			return fmt.Errorf("want %v samples, got %v", count, *d.SampleCount)
   120  		}
   121  		if *d.SampleSum != sum {
   122  			return fmt.Errorf("want %v sum, got %v", count, *d.SampleSum)
   123  		}
   124  		return nil
   125  	}
   126  }
   127  
   128  // Buckets asserts a distribution has the number of buckets
   129  func Buckets(count int) func(any) error {
   130  	return func(f any) error {
   131  		d := f.(*dto.Histogram)
   132  		if len(d.Bucket) != count {
   133  			return fmt.Errorf("want %v buckets, got %v", count, len(d.Bucket))
   134  		}
   135  		return nil
   136  	}
   137  }
   138  
   139  func AtLeast(want float64) func(any) error {
   140  	return func(got any) error {
   141  		if want > toFloat(got) {
   142  			return fmt.Errorf("want %v <= %v (got %v)", want, toFloat(got), want)
   143  		}
   144  		return nil
   145  	}
   146  }
   147  
   148  func (m *MetricsTest) Assert(name string, tags map[string]string, compare Compare, opts ...retry.Option) {
   149  	m.t.Helper()
   150  	opt := []retry.Option{retry.Timeout(time.Second * 5), retry.Message("metric not found")}
   151  	opt = append(opt, opts...)
   152  	err := retry.UntilSuccess(func() error {
   153  		res, err := m.reg.Gather()
   154  		if err != nil {
   155  			return err
   156  		}
   157  		if fmt.Sprintf("%p", compare) == fmt.Sprintf("%p", DoesNotExist) {
   158  			for _, metric := range res {
   159  				if *metric.Name == name {
   160  					return fmt.Errorf("metric was found when it should not have been")
   161  				}
   162  			}
   163  			return nil
   164  		}
   165  		for _, metric := range res {
   166  			if *metric.Name != name {
   167  				continue
   168  			}
   169  			for _, row := range metric.Metric {
   170  				want := maps.Clone(tags)
   171  				for _, lv := range row.Label {
   172  					k, v := *lv.Name, *lv.Value
   173  					if want[k] == v {
   174  						delete(want, k)
   175  					} else {
   176  						m.t.Logf("skip metric: want %v=%v, got %v=%v", k, want[k], k, v)
   177  					}
   178  				}
   179  				if len(want) > 0 {
   180  					// Not a match
   181  					m.t.Logf("skip metric: missing labels: %+v", want)
   182  					continue
   183  				}
   184  				var v any
   185  				if row.Counter != nil {
   186  					cv := *row.Counter.Value
   187  					key := toMetricKey(row, metric)
   188  					if delta, f := m.deltas[key]; f {
   189  						cv -= delta
   190  					}
   191  					v = cv
   192  				} else if row.Gauge != nil {
   193  					v = *row.Gauge.Value
   194  				} else if row.Histogram != nil {
   195  					v = row.Histogram
   196  				}
   197  				err := compare(v)
   198  				if err != nil {
   199  					return fmt.Errorf("got unexpected val %v: %v", v, err)
   200  				}
   201  				return nil
   202  			}
   203  		}
   204  		return fmt.Errorf("no matching rows found")
   205  	}, opt...)
   206  	if err != nil {
   207  		m.t.Logf("Metric %v/%v not matched (%v); Dumping known metrics:", name, tags, err)
   208  		m.Dump()
   209  		m.t.Fatal(err)
   210  	}
   211  
   212  	// Run through linter. For now this is warning, maybe allow opt-in to strict
   213  	res, err := m.reg.Gather()
   214  	if err != nil {
   215  		m.t.Fatal(err)
   216  	}
   217  	problems, err := promlint.NewWithMetricFamilies(res).Lint()
   218  	if err != nil {
   219  		m.t.Fatal(err)
   220  	}
   221  	if len(problems) > 0 {
   222  		m.t.Logf("WARNING: Prometheus linter issue: %v", problems)
   223  	}
   224  }
   225  
   226  func toFloat(r interface{}) float64 {
   227  	switch v := r.(type) {
   228  	default:
   229  		panic(fmt.Sprintf("unknown type %T", r))
   230  	case int64:
   231  		return float64(v)
   232  	case float64:
   233  		return v
   234  	}
   235  }
   236  
   237  // Metrics returns the full list of known metrics. Usually Assert should be used
   238  func (m *MetricsTest) Metrics() []Metric {
   239  	m.t.Helper()
   240  	res, err := m.reg.Gather()
   241  	if err != nil {
   242  		m.t.Fatal(err)
   243  	}
   244  	metrics := []Metric{}
   245  	for _, metric := range res {
   246  		if len(metric.Metric) == 0 {
   247  			m.t.Logf("%v: no rows", *metric.Name)
   248  		}
   249  		for _, row := range metric.Metric {
   250  			m := Metric{Name: *metric.Name, Labels: map[string]string{}, Value: display(row)}
   251  			for _, kv := range row.Label {
   252  				k, v := *kv.Name, *kv.Value
   253  				m.Labels[k] = v
   254  			}
   255  			metrics = append(metrics, m)
   256  		}
   257  	}
   258  	return metrics
   259  }
   260  
   261  type Metric struct {
   262  	Name   string
   263  	Labels map[string]string
   264  	Value  string
   265  }
   266  
   267  func (m *MetricsTest) Dump() {
   268  	m.t.Helper()
   269  	res, err := m.reg.Gather()
   270  	if err != nil {
   271  		m.t.Fatal(err)
   272  	}
   273  	for _, metric := range res {
   274  		if len(metric.Metric) == 0 {
   275  			m.t.Logf("%v: no rows", *metric.Name)
   276  		}
   277  		for _, row := range metric.Metric {
   278  			kvs := []string{}
   279  			for _, kv := range row.Label {
   280  				k, v := *kv.Name, *kv.Value
   281  				kvs = append(kvs, k+"="+v)
   282  			}
   283  			tags := strings.Join(kvs, ",")
   284  			m.t.Logf(" %v{%v} %v", *metric.Name, tags, display(row))
   285  		}
   286  	}
   287  }
   288  
   289  func display(row *dto.Metric) string {
   290  	if row.Counter != nil {
   291  		return fmt.Sprint(*row.Counter.Value)
   292  	} else if row.Gauge != nil {
   293  		return fmt.Sprint(*row.Gauge.Value)
   294  	} else if row.Histogram != nil {
   295  		return fmt.Sprintf("histogram{count=%v,sum=%v}", *row.Histogram.SampleCount, *row.Histogram.SampleSum)
   296  	} else if row.Summary != nil {
   297  		return fmt.Sprintf("summary{count=%v,sum=%v}", *row.Summary.SampleCount, *row.Summary.SampleSum)
   298  	}
   299  	return "?"
   300  }