gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/metric/metric_test.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metric
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"fmt"
    21  	"hash/adler32"
    22  	"math"
    23  	"os"
    24  	"slices"
    25  	"strconv"
    26  	"strings"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/prometheus/common/expfmt"
    31  	"google.golang.org/protobuf/proto"
    32  	pb "gvisor.dev/gvisor/pkg/metric/metric_go_proto"
    33  	"gvisor.dev/gvisor/pkg/prometheus"
    34  	"gvisor.dev/gvisor/pkg/sync"
    35  )
    36  
    37  const (
    38  	fooDescription     = "Foo!"
    39  	barDescription     = "Bar Baz"
    40  	counterDescription = "Counter"
    41  	distribDescription = "A distribution metric for testing"
    42  )
    43  
    44  var (
    45  	fieldValFoo  = FieldValue{"foo"}
    46  	fieldValBar  = FieldValue{"bar"}
    47  	fieldValBaz  = FieldValue{"baz"}
    48  	fieldValQuux = FieldValue{"quux"}
    49  )
    50  
    51  // Helper method that exercises Prometheus metric exporting.
    52  // Ensures that current metric data, if it were to be exported and formatted as Prometheus format,
    53  // would be successfully parsable by the reference Prometheus implementation.
    54  // However, it does not verify that the data that was parsed actually matches the metric data.
    55  func verifyPrometheusParsing(t *testing.T) {
    56  	t.Helper()
    57  	snapshot, err := GetSnapshot(SnapshotOptions{})
    58  	if err != nil {
    59  		t.Errorf("failed to get Prometheus snapshot: %v", err)
    60  		return
    61  	}
    62  	var buf bytes.Buffer
    63  	if _, err := prometheus.Write(&buf, prometheus.ExportOptions{}, map[*prometheus.Snapshot]prometheus.SnapshotExportOptions{
    64  		snapshot: prometheus.SnapshotExportOptions{},
    65  	}); err != nil {
    66  		t.Errorf("failed to get Prometheus snapshot: %v", err)
    67  		return
    68  	}
    69  	if _, err := (&expfmt.TextParser{}).TextToMetricFamilies(&buf); err != nil {
    70  		t.Errorf("failed to parse Prometheus output: %v", err)
    71  	}
    72  }
    73  
    74  func TestVerifyName(t *testing.T) {
    75  	for name, wantErr := range map[string]bool{
    76  		"":              true,
    77  		"/":             true,
    78  		"/foo":          false,
    79  		"/foo/bar":      false,
    80  		"/foo/bar/baz":  false,
    81  		"/foo/bar/bar":  false,
    82  		"/foo//bar/baz": true,
    83  		"//foo/bar":     true,
    84  		"//":            true,
    85  		"foo":           true,
    86  		"foo/bar":       true,
    87  		"/foo-bar":      true,
    88  		"/foo bar":      true,
    89  		"/foo_bar":      false,
    90  	} {
    91  		t.Run(name, func(t *testing.T) {
    92  			err := verifyName(name)
    93  			if gotErr := (err != nil); gotErr != wantErr {
    94  				t.Errorf("verifyName(%q) got err=%v wantErr=%v", name, err, wantErr)
    95  			}
    96  		})
    97  	}
    98  }
    99  
   100  func TestInitialize(t *testing.T) {
   101  	defer resetTest()
   102  
   103  	_, err := NewUint64Metric("/foo", false, pb.MetricMetadata_UNITS_NONE, fooDescription)
   104  	if err != nil {
   105  		t.Fatalf("NewUint64Metric got err %v want nil", err)
   106  	}
   107  
   108  	_, err = NewUint64Metric("/bar", true, pb.MetricMetadata_UNITS_NANOSECONDS, barDescription)
   109  	if err != nil {
   110  		t.Fatalf("NewUint64Metric got err %v want nil", err)
   111  	}
   112  
   113  	bucketer := NewExponentialBucketer(3, 2, 0, 1)
   114  	field1 := NewField("field1", &fieldValFoo, &fieldValBar)
   115  	field2 := NewField("field2", &fieldValBaz, &fieldValQuux)
   116  	_, err = NewDistributionMetric("/distrib", true, bucketer, pb.MetricMetadata_UNITS_NANOSECONDS, distribDescription, field1, field2)
   117  	if err != nil {
   118  		t.Fatalf("NewDistributionMetric got err %v want nil", err)
   119  	}
   120  
   121  	if err := Initialize(); err != nil {
   122  		t.Fatalf("Initialize(): %s", err)
   123  	}
   124  	verifyPrometheusParsing(t)
   125  
   126  	if len(emitter) != 1 {
   127  		t.Fatalf("Initialize emitted %d events want 1", len(emitter))
   128  	}
   129  
   130  	mr, ok := emitter[0].(*pb.MetricRegistration)
   131  	if !ok {
   132  		t.Fatalf("emitter %v got %T want pb.MetricRegistration", emitter[0], emitter[0])
   133  	}
   134  
   135  	if len(mr.Metrics) != 3 {
   136  		t.Errorf("MetricRegistration got %d metrics want %d", len(mr.Metrics), 3)
   137  	}
   138  
   139  	foundFoo := false
   140  	foundBar := false
   141  	foundDistrib := false
   142  	for _, m := range mr.Metrics {
   143  		switch m.Name {
   144  		case "/foo":
   145  			foundFoo = true
   146  			if m.Type != pb.MetricMetadata_TYPE_UINT64 {
   147  				t.Errorf("Metadata %+v Type got %v want pb.MetricMetadata_TYPE_UINT64", m, m.Type)
   148  			}
   149  			if !m.Cumulative {
   150  				t.Errorf("Metadata %+v Cumulative got false want true", m)
   151  			}
   152  			if m.Description != fooDescription {
   153  				t.Errorf("/foo %+v Description got %q want %q", m, m.Description, fooDescription)
   154  			}
   155  			if m.Sync {
   156  				t.Errorf("/foo %+v Sync got true want false", m)
   157  			}
   158  			if m.Units != pb.MetricMetadata_UNITS_NONE {
   159  				t.Errorf("/foo %+v Units got %v want %v", m, m.Units, pb.MetricMetadata_UNITS_NONE)
   160  			}
   161  		case "/bar":
   162  			foundBar = true
   163  			if m.Type != pb.MetricMetadata_TYPE_UINT64 {
   164  				t.Errorf("Metadata %+v Type got %v want pb.MetricMetadata_TYPE_UINT64", m, m.Type)
   165  			}
   166  			if !m.Cumulative {
   167  				t.Errorf("Metadata %+v Cumulative got false want true", m)
   168  			}
   169  			if m.Description != barDescription {
   170  				t.Errorf("/bar %+v Description got %q want %q", m, m.Description, barDescription)
   171  			}
   172  			if !m.Sync {
   173  				t.Errorf("/bar %+v Sync got true want false", m)
   174  			}
   175  			if m.Units != pb.MetricMetadata_UNITS_NANOSECONDS {
   176  				t.Errorf("/bar %+v Units got %v want %v", m, m.Units, pb.MetricMetadata_UNITS_NANOSECONDS)
   177  			}
   178  		case "/distrib":
   179  			foundDistrib = true
   180  			want := &pb.MetricMetadata{
   181  				Name:           "/distrib",
   182  				PrometheusName: "distrib",
   183  				Type:           pb.MetricMetadata_TYPE_DISTRIBUTION,
   184  				Units:          pb.MetricMetadata_UNITS_NANOSECONDS,
   185  				Description:    distribDescription,
   186  				Sync:           true,
   187  				Fields: []*pb.MetricMetadata_Field{
   188  					{FieldName: "field1", AllowedValues: []string{"foo", "bar"}},
   189  					{FieldName: "field2", AllowedValues: []string{"baz", "quux"}},
   190  				},
   191  				DistributionBucketLowerBounds: []int64{0, 2, 4, 6},
   192  			}
   193  			if !proto.Equal(m, want) {
   194  				t.Fatalf("got /distrib metadata:\n%v\nwant:\n%v", m, want)
   195  			}
   196  		}
   197  	}
   198  
   199  	if !foundFoo {
   200  		t.Errorf("/foo not found: %+v", emitter)
   201  	}
   202  	if !foundBar {
   203  		t.Errorf("/bar not found: %+v", emitter)
   204  	}
   205  	if !foundDistrib {
   206  		t.Errorf("/distrib not found: %+v", emitter)
   207  	}
   208  	verifyPrometheusParsing(t)
   209  }
   210  
   211  func TestDisable(t *testing.T) {
   212  	defer resetTest()
   213  
   214  	_, err := NewUint64Metric("/foo", false, pb.MetricMetadata_UNITS_NONE, fooDescription)
   215  	if err != nil {
   216  		t.Fatalf("NewUint64Metric got err %v want nil", err)
   217  	}
   218  
   219  	_, err = NewUint64Metric("/bar", true, pb.MetricMetadata_UNITS_NONE, barDescription)
   220  	if err != nil {
   221  		t.Fatalf("NewUint64Metric got err %v want nil", err)
   222  	}
   223  
   224  	_, err = NewDistributionMetric("/distrib", false, NewExponentialBucketer(2, 2, 0, 1), pb.MetricMetadata_UNITS_NONE, distribDescription)
   225  	if err != nil {
   226  		t.Fatalf("NewDistributionMetric got err %v want nil", err)
   227  	}
   228  
   229  	if err := Disable(); err != nil {
   230  		t.Fatalf("Disable(): %s", err)
   231  	}
   232  
   233  	if len(emitter) != 1 {
   234  		t.Fatalf("Initialize emitted %d events want 1", len(emitter))
   235  	}
   236  
   237  	mr, ok := emitter[0].(*pb.MetricRegistration)
   238  	if !ok {
   239  		t.Fatalf("emitter %v got %T want pb.MetricRegistration", emitter[0], emitter[0])
   240  	}
   241  
   242  	if len(mr.Metrics) != 0 {
   243  		t.Errorf("MetricRegistration got %d metrics want 0", len(mr.Metrics))
   244  	}
   245  }
   246  
   247  func TestEmitMetricUpdate(t *testing.T) {
   248  	defer resetTest()
   249  
   250  	foo, err := NewUint64Metric("/foo", false, pb.MetricMetadata_UNITS_NONE, fooDescription)
   251  	if err != nil {
   252  		t.Fatalf("NewUint64Metric got err %v want nil", err)
   253  	}
   254  
   255  	_, err = NewUint64Metric("/bar", true, pb.MetricMetadata_UNITS_NONE, barDescription)
   256  	if err != nil {
   257  		t.Fatalf("NewUint64Metric got err %v want nil", err)
   258  	}
   259  
   260  	bucketer := NewExponentialBucketer(2, 2, 0, 1)
   261  	field1 := NewField("field1", &fieldValFoo, &fieldValBar)
   262  	field2 := NewField("field2", &fieldValBaz, &fieldValQuux)
   263  	distrib, err := NewDistributionMetric("/distrib", false, bucketer, pb.MetricMetadata_UNITS_NONE, distribDescription, field1, field2)
   264  	if err != nil {
   265  		t.Fatalf("NewDistributionMetric: %v", err)
   266  	}
   267  
   268  	if err := Initialize(); err != nil {
   269  		t.Fatalf("Initialize(): %s", err)
   270  	}
   271  
   272  	// Don't care about the registration metrics.
   273  	emitter.Reset()
   274  	EmitMetricUpdate()
   275  
   276  	if len(emitter) != 1 {
   277  		t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1)
   278  	}
   279  
   280  	update, ok := emitter[0].(*pb.MetricUpdate)
   281  	if !ok {
   282  		t.Fatalf("emitter %v got %T want pb.MetricUpdate", emitter[0], emitter[0])
   283  	}
   284  
   285  	if len(update.Metrics) != 2 {
   286  		t.Errorf("MetricUpdate got %d metrics want %d", len(update.Metrics), 2)
   287  	}
   288  
   289  	// Both are included for their initial values.
   290  	foundFoo := false
   291  	foundBar := false
   292  	foundDistrib := false
   293  	for _, m := range update.Metrics {
   294  		switch m.Name {
   295  		case "/foo":
   296  			foundFoo = true
   297  		case "/bar":
   298  			foundBar = true
   299  		case "/distrib":
   300  			foundDistrib = true
   301  		}
   302  		if m.Name != "/distrib" {
   303  			uv, ok := m.Value.(*pb.MetricValue_Uint64Value)
   304  			if !ok {
   305  				t.Errorf("%+v: value %v got %T want pb.MetricValue_Uint64Value", m, m.Value, m.Value)
   306  				continue
   307  			}
   308  			if uv.Uint64Value != 0 {
   309  				t.Errorf("%v: Value got %v want %d", m, uv.Uint64Value, 0)
   310  			}
   311  		}
   312  	}
   313  
   314  	if !foundFoo {
   315  		t.Errorf("/foo not found: %+v", emitter)
   316  	}
   317  	if !foundBar {
   318  		t.Errorf("/bar not found: %+v", emitter)
   319  	}
   320  	if foundDistrib {
   321  		t.Errorf("/distrib unexpectedly found: %+v", emitter)
   322  	}
   323  	if t.Failed() {
   324  		t.Fatal("Aborting test so far due to earlier errors.")
   325  	}
   326  
   327  	verifyPrometheusParsing(t)
   328  
   329  	// Increment foo. Only it is included in the next update.
   330  	foo.Increment()
   331  	foo.Increment()
   332  	foo.Increment()
   333  	emitter.Reset()
   334  	EmitMetricUpdate()
   335  	if len(emitter) != 1 {
   336  		t.Fatalf("EmitMetricUpdate emitted %d events want 1", len(emitter))
   337  	}
   338  	update, ok = emitter[0].(*pb.MetricUpdate)
   339  	if !ok {
   340  		t.Fatalf("emitter %v got %T want pb.MetricUpdate", emitter[0], emitter[0])
   341  	}
   342  	if len(update.Metrics) != 1 {
   343  		t.Fatalf("MetricUpdate got %d metrics want %d", len(update.Metrics), 1)
   344  	}
   345  	m := update.Metrics[0]
   346  	if m.Name != "/foo" {
   347  		t.Fatalf("Metric %+v name got %q want '/foo'", m, m.Name)
   348  	}
   349  	uv, ok := m.Value.(*pb.MetricValue_Uint64Value)
   350  	if !ok {
   351  		t.Fatalf("%+v: value %v got %T want pb.MetricValue_Uint64Value", m, m.Value, m.Value)
   352  	}
   353  	if uv.Uint64Value != 3 {
   354  		t.Errorf("%v: Value got %v want %d", m, uv.Uint64Value, 3)
   355  	}
   356  	verifyPrometheusParsing(t)
   357  
   358  	// Add a few samples to the distribution metric.
   359  	distrib.AddSample(1, &fieldValFoo, &fieldValBaz)
   360  	distrib.AddSample(1, &fieldValFoo, &fieldValBaz)
   361  	distrib.AddSample(3, &fieldValFoo, &fieldValBaz)
   362  	distrib.AddSample(-1, &fieldValFoo, &fieldValQuux)
   363  	distrib.AddSample(1, &fieldValFoo, &fieldValQuux)
   364  	distrib.AddSample(100, &fieldValFoo, &fieldValQuux)
   365  	emitter.Reset()
   366  	EmitMetricUpdate()
   367  	if len(emitter) != 1 {
   368  		t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1)
   369  	}
   370  	update, ok = emitter[0].(*pb.MetricUpdate)
   371  	if !ok {
   372  		t.Fatalf("emitter %v got %T want pb.MetricUpdate", emitter[0], emitter[0])
   373  	}
   374  	if len(update.Metrics) != 2 {
   375  		t.Fatalf("MetricUpdate got %d metrics want %d", len(update.Metrics), 1)
   376  	}
   377  	for _, m := range update.Metrics {
   378  		if m.Name != "/distrib" {
   379  			t.Fatalf("Metric %+v name got %q want '/distrib'", m, m.Name)
   380  		}
   381  		if len(m.FieldValues) != 2 {
   382  			t.Fatalf("Metric %+v fields: got %v want %d fields", m, m.FieldValues, 2)
   383  		}
   384  		if m.FieldValues[0] != "foo" {
   385  			t.Fatalf("Metric %+v field 0: got %v want %v", m, m.FieldValues[0], "foo")
   386  		}
   387  		dv, ok := m.Value.(*pb.MetricValue_DistributionValue)
   388  		if !ok {
   389  			t.Fatalf("%+v: value %v got %T want pb.MetricValue_DistributionValue", m, m.Value, m.Value)
   390  		}
   391  		samples := dv.DistributionValue.GetNewSamples()
   392  		if len(samples) != 4 {
   393  			t.Fatalf("%+v: got %d buckets, want %d", dv.DistributionValue, len(samples), 4)
   394  		}
   395  		var wantSamples []uint64
   396  		switch m.FieldValues[1] {
   397  		case "baz":
   398  			wantSamples = []uint64{0, 2, 1, 0}
   399  		case "quux":
   400  			wantSamples = []uint64{1, 1, 0, 1}
   401  		default:
   402  			t.Fatalf("%+v: got unexpected field[1]: %q", m, m.FieldValues[1])
   403  		}
   404  		for i, s := range samples {
   405  			if s != wantSamples[i] {
   406  				t.Errorf("%+v [fields %v]: sample %d: got %d want %d", dv.DistributionValue, m.FieldValues, i, s, wantSamples[i])
   407  			}
   408  		}
   409  	}
   410  	verifyPrometheusParsing(t)
   411  
   412  	// Add more samples to the distribution metric, check that we get the delta.
   413  	distrib.AddSample(3, &fieldValFoo, &fieldValBaz)
   414  	distrib.AddSample(2, &fieldValFoo, &fieldValBaz)
   415  	distrib.AddSample(1, &fieldValFoo, &fieldValBaz)
   416  	distrib.AddSample(3, &fieldValFoo, &fieldValBaz)
   417  	emitter.Reset()
   418  	EmitMetricUpdate()
   419  	if len(emitter) != 1 {
   420  		t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1)
   421  	}
   422  	dv, ok := emitter[0].(*pb.MetricUpdate).Metrics[0].Value.(*pb.MetricValue_DistributionValue)
   423  	if !ok {
   424  		t.Fatalf("%+v: want pb.MetricValue_DistributionValue", emitter)
   425  	}
   426  	samples := dv.DistributionValue.GetNewSamples()
   427  	if len(samples) != 4 {
   428  		t.Fatalf("%+v: got %d buckets, want %d", dv.DistributionValue, len(samples), 4)
   429  	}
   430  	wantSamples := []uint64{0, 1, 3, 0}
   431  	for i, s := range samples {
   432  		if s != wantSamples[i] {
   433  			t.Errorf("%+v: sample %d: got %d want %d", dv.DistributionValue, i, s, wantSamples[i])
   434  		}
   435  	}
   436  	verifyPrometheusParsing(t)
   437  
   438  	// Change nothing but still call EmitMetricUpdate. Verify that nothing gets sent.
   439  	emitter.Reset()
   440  	EmitMetricUpdate()
   441  	if len(emitter) != 0 {
   442  		t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 0)
   443  	}
   444  	verifyPrometheusParsing(t)
   445  }
   446  
   447  func TestEmitMetricUpdateWithFields(t *testing.T) {
   448  	defer resetTest()
   449  
   450  	var (
   451  		weird1 = FieldValue{"weird1"}
   452  		weird2 = FieldValue{"weird2"}
   453  	)
   454  	field := NewField("weirdness_type", &weird1, &weird2)
   455  
   456  	counter, err := NewUint64Metric("/weirdness", false, pb.MetricMetadata_UNITS_NONE, counterDescription, field)
   457  	if err != nil {
   458  		t.Fatalf("NewUint64Metric got err %v want nil", err)
   459  	}
   460  
   461  	if err := Initialize(); err != nil {
   462  		t.Fatalf("Initialize(): %s", err)
   463  	}
   464  	verifyPrometheusParsing(t)
   465  
   466  	// Don't care about the registration metrics.
   467  	emitter.Reset()
   468  	EmitMetricUpdate()
   469  
   470  	// For metrics with fields, we do not emit data unless the value is
   471  	// incremented.
   472  	if len(emitter) != 0 {
   473  		t.Fatalf("EmitMetricUpdate emitted %d events want 0", len(emitter))
   474  	}
   475  	verifyPrometheusParsing(t)
   476  
   477  	counter.IncrementBy(4, &weird1)
   478  	counter.Increment(&weird2)
   479  
   480  	emitter.Reset()
   481  	EmitMetricUpdate()
   482  
   483  	if len(emitter) != 1 {
   484  		t.Fatalf("EmitMetricUpdate emitted %d events want 1", len(emitter))
   485  	}
   486  
   487  	update, ok := emitter[0].(*pb.MetricUpdate)
   488  	if !ok {
   489  		t.Fatalf("emitter %v got %T want pb.MetricUpdate", emitter[0], emitter[0])
   490  	}
   491  
   492  	if len(update.Metrics) != 2 {
   493  		t.Errorf("MetricUpdate got %d metrics want 2", len(update.Metrics))
   494  	}
   495  
   496  	foundWeird1 := false
   497  	foundWeird2 := false
   498  	for i := 0; i < len(update.Metrics); i++ {
   499  		m := update.Metrics[i]
   500  
   501  		if m.Name != "/weirdness" {
   502  			t.Errorf("Metric %+v name got %q want '/weirdness'", m, m.Name)
   503  		}
   504  		if len(m.FieldValues) != 1 {
   505  			t.Errorf("MetricUpdate got %d fields want 1", len(m.FieldValues))
   506  		}
   507  
   508  		switch m.FieldValues[0] {
   509  		case weird1.Value:
   510  			uv, ok := m.Value.(*pb.MetricValue_Uint64Value)
   511  			if !ok {
   512  				t.Errorf("%+v: value %v got %T want pb.MetricValue_Uint64Value", m, m.Value, m.Value)
   513  			}
   514  			if uv.Uint64Value != 4 {
   515  				t.Errorf("%v: Value got %v want 4", m, uv.Uint64Value)
   516  			}
   517  			foundWeird1 = true
   518  		case weird2.Value:
   519  			uv, ok := m.Value.(*pb.MetricValue_Uint64Value)
   520  			if !ok {
   521  				t.Errorf("%+v: value %v got %T want pb.MetricValue_Uint64Value", m, m.Value, m.Value)
   522  			}
   523  			if uv.Uint64Value != 1 {
   524  				t.Errorf("%v: Value got %v want 1", m, uv.Uint64Value)
   525  			}
   526  			foundWeird2 = true
   527  		}
   528  	}
   529  
   530  	if !foundWeird1 {
   531  		t.Errorf("Field value weird1 not found: %+v", emitter)
   532  	}
   533  	if !foundWeird2 {
   534  		t.Errorf("Field value weird2 not found: %+v", emitter)
   535  	}
   536  
   537  	verifyPrometheusParsing(t)
   538  }
   539  
   540  func TestMetricUpdateStageTiming(t *testing.T) {
   541  	defer resetTest()
   542  
   543  	expectedTimings := map[InitStage]struct{ min, max time.Duration }{}
   544  	measureStage := func(stage InitStage, body func()) {
   545  		stageStarted := time.Now()
   546  		endStage := StartStage(stage)
   547  		bodyStarted := time.Now()
   548  		body()
   549  		bodyEnded := time.Now()
   550  		endStage()
   551  		stageEnded := time.Now()
   552  
   553  		expectedTimings[stage] = struct{ min, max time.Duration }{
   554  			min: bodyEnded.Sub(bodyStarted),
   555  			max: stageEnded.Sub(stageStarted),
   556  		}
   557  	}
   558  	checkStage := func(got *pb.StageTiming, want InitStage) {
   559  		if InitStage(got.GetStage()) != want {
   560  			t.Errorf("%v: got stage %q expected %q", got, got.GetStage(), want)
   561  		}
   562  		timingBounds, found := expectedTimings[want]
   563  		if !found {
   564  			t.Fatalf("invalid init stage name %q", want)
   565  		}
   566  		started := got.Started.AsTime()
   567  		ended := got.Ended.AsTime()
   568  		duration := ended.Sub(started)
   569  		if duration < timingBounds.min {
   570  			t.Errorf("stage %v: lasted %v, expected at least %v", want, duration, timingBounds.min)
   571  		} else if duration > timingBounds.max {
   572  			t.Errorf("stage %v: lasted %v, expected no more than %v", want, duration, timingBounds.max)
   573  		}
   574  	}
   575  
   576  	// Test that it's legit to go through stages before metric registration.
   577  	measureStage("before_first_update_1", func() {
   578  		time.Sleep(100 * time.Millisecond)
   579  	})
   580  	measureStage("before_first_update_2", func() {
   581  		time.Sleep(100 * time.Millisecond)
   582  	})
   583  
   584  	fooMetric, err := NewUint64Metric("/foo", false, pb.MetricMetadata_UNITS_NONE, fooDescription)
   585  	if err != nil {
   586  		t.Fatalf("Cannot register /foo: %v", err)
   587  	}
   588  	emitter.Reset()
   589  	Initialize()
   590  	EmitMetricUpdate()
   591  
   592  	// We should have gotten the metric registration and the first MetricUpdate.
   593  	if len(emitter) != 2 {
   594  		t.Fatalf("emitter has %d messages (%v), expected %d", len(emitter), emitter, 2)
   595  	}
   596  
   597  	if registration, ok := emitter[0].(*pb.MetricRegistration); !ok {
   598  		t.Errorf("first message is not MetricRegistration: %T / %v", emitter[0], emitter[0])
   599  	} else if len(registration.Stages) != len(allStages) {
   600  		t.Errorf("MetricRegistration has %d stages (%v), expected %d (%v)", len(registration.Stages), registration.Stages, len(allStages), allStages)
   601  	} else {
   602  		for i := 0; i < len(allStages); i++ {
   603  			if InitStage(registration.Stages[i]) != allStages[i] {
   604  				t.Errorf("MetricRegistration.Stages[%d]: got %q want %q", i, registration.Stages[i], allStages[i])
   605  			}
   606  		}
   607  	}
   608  
   609  	if firstUpdate, ok := emitter[1].(*pb.MetricUpdate); !ok {
   610  		t.Errorf("second message is not MetricUpdate: %T / %v", emitter[1], emitter[1])
   611  	} else if len(firstUpdate.StageTiming) != 2 {
   612  		t.Errorf("MetricUpdate has %d stage timings (%v), expected %d", len(firstUpdate.StageTiming), firstUpdate.StageTiming, 2)
   613  	} else {
   614  		checkStage(firstUpdate.StageTiming[0], "before_first_update_1")
   615  		checkStage(firstUpdate.StageTiming[1], "before_first_update_2")
   616  	}
   617  	verifyPrometheusParsing(t)
   618  
   619  	// Ensure re-emitting doesn't cause another event to be sent.
   620  	emitter.Reset()
   621  	EmitMetricUpdate()
   622  	if len(emitter) != 0 {
   623  		t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 0)
   624  	}
   625  	verifyPrometheusParsing(t)
   626  
   627  	// Generate monitoring data, we should get an event with no stages.
   628  	fooMetric.Increment()
   629  	emitter.Reset()
   630  	EmitMetricUpdate()
   631  	if len(emitter) != 1 {
   632  		t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1)
   633  	} else if update, ok := emitter[0].(*pb.MetricUpdate); !ok {
   634  		t.Errorf("message is not MetricUpdate: %T / %v", emitter[1], emitter[1])
   635  	} else if len(update.StageTiming) != 0 {
   636  		t.Errorf("unexpected stage timing information: %v", update.StageTiming)
   637  	}
   638  	verifyPrometheusParsing(t)
   639  
   640  	// Now generate new stages.
   641  	measureStage("foo_stage_1", func() {
   642  		time.Sleep(100 * time.Millisecond)
   643  	})
   644  	measureStage("foo_stage_2", func() {
   645  		time.Sleep(100 * time.Millisecond)
   646  	})
   647  	emitter.Reset()
   648  	EmitMetricUpdate()
   649  	if len(emitter) != 1 {
   650  		t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1)
   651  	} else if update, ok := emitter[0].(*pb.MetricUpdate); !ok {
   652  		t.Errorf("message is not MetricUpdate: %T / %v", emitter[1], emitter[1])
   653  	} else if len(update.Metrics) != 0 {
   654  		t.Errorf("MetricUpdate has %d metric value changes (%v), expected %d", len(update.Metrics), update.Metrics, 0)
   655  	} else if len(update.StageTiming) != 2 {
   656  		t.Errorf("MetricUpdate has %d stages (%v), expected %d", len(update.StageTiming), update.StageTiming, 2)
   657  	} else {
   658  		checkStage(update.StageTiming[0], "foo_stage_1")
   659  		checkStage(update.StageTiming[1], "foo_stage_2")
   660  	}
   661  	verifyPrometheusParsing(t)
   662  
   663  	// Now try generating data for both metrics and stages.
   664  	fooMetric.Increment()
   665  	measureStage("last_stage_1", func() {
   666  		time.Sleep(100 * time.Millisecond)
   667  	})
   668  	measureStage("last_stage_2", func() {
   669  		time.Sleep(100 * time.Millisecond)
   670  	})
   671  	fooMetric.Increment()
   672  	emitter.Reset()
   673  	EmitMetricUpdate()
   674  	if len(emitter) != 1 {
   675  		t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1)
   676  	} else if update, ok := emitter[0].(*pb.MetricUpdate); !ok {
   677  		t.Errorf("message is not MetricUpdate: %T / %v", emitter[1], emitter[1])
   678  	} else if len(update.Metrics) != 1 {
   679  		t.Errorf("MetricUpdate has %d metric value changes (%v), expected %d", len(update.Metrics), update.Metrics, 1)
   680  	} else if len(update.StageTiming) != 2 {
   681  		t.Errorf("MetricUpdate has %d stages (%v), expected %d", len(update.StageTiming), update.StageTiming, 2)
   682  	} else {
   683  		checkStage(update.StageTiming[0], "last_stage_1")
   684  		checkStage(update.StageTiming[1], "last_stage_2")
   685  	}
   686  	verifyPrometheusParsing(t)
   687  }
   688  
   689  func TestTimerMetric(t *testing.T) {
   690  	defer resetTest()
   691  	// This bucketer just has 2 finite buckets: [0, 500ms) and [500ms, 1s).
   692  	bucketer := NewExponentialBucketer(2, uint64((500 * time.Millisecond).Nanoseconds()), 0, 1)
   693  	field1 := NewField("field1", &fieldValFoo, &fieldValBar)
   694  	field2 := NewField("field2", &fieldValBaz, &fieldValQuux)
   695  	timer, err := NewTimerMetric("/timer", bucketer, "a timer metric", field1, field2)
   696  	if err != nil {
   697  		t.Fatalf("NewTimerMetric: %v", err)
   698  	}
   699  	if err := Initialize(); err != nil {
   700  		t.Fatalf("Initialize(): %s", err)
   701  	}
   702  	// Don't care about the registration metrics.
   703  	emitter.Reset()
   704  
   705  	// Create timer data.
   706  	var wg sync.WaitGroup
   707  	for i := 0; i < 5; i++ {
   708  		wg.Add(1)
   709  		go func() {
   710  			defer wg.Done()
   711  			op := timer.Start(&fieldValFoo)
   712  			defer op.Finish(&fieldValQuux)
   713  			time.Sleep(250 * time.Millisecond)
   714  		}()
   715  	}
   716  	for i := 0; i < 3; i++ {
   717  		wg.Add(1)
   718  		go func() {
   719  			defer wg.Done()
   720  			op := timer.Start()
   721  			defer op.Finish(&fieldValFoo, &fieldValQuux)
   722  			time.Sleep(750 * time.Millisecond)
   723  		}()
   724  	}
   725  	wg.Wait()
   726  	EmitMetricUpdate()
   727  	if len(emitter) != 1 {
   728  		t.Fatalf("EmitMetricUpdate emitted %d events want %d", len(emitter), 1)
   729  	}
   730  	m := emitter[0].(*pb.MetricUpdate).Metrics[0]
   731  	wantFields := []string{"foo", "quux"}
   732  	if !slices.Equal(m.GetFieldValues(), wantFields) {
   733  		t.Errorf("%+v: got fields %v want %v", m, m.GetFieldValues(), wantFields)
   734  	}
   735  	dv, ok := m.Value.(*pb.MetricValue_DistributionValue)
   736  	if !ok {
   737  		t.Fatalf("%+v: want pb.MetricValue_DistributionValue", m)
   738  	}
   739  	samples := dv.DistributionValue.GetNewSamples()
   740  	if len(samples) != 4 {
   741  		t.Fatalf("%+v: got %d buckets, want %d", dv.DistributionValue, len(samples), 4)
   742  	}
   743  	wantSamples := []uint64{0, 5, 3, 0}
   744  	for i, s := range samples {
   745  		if s != wantSamples[i] {
   746  			t.Errorf("%+v: sample %d: got %d want %d", dv.DistributionValue, i, s, wantSamples[i])
   747  		}
   748  	}
   749  	verifyPrometheusParsing(t)
   750  }
   751  
   752  func TestBucketer(t *testing.T) {
   753  	for _, test := range []struct {
   754  		name                    string
   755  		bucketer                Bucketer
   756  		minSample               int64
   757  		maxSample               int64
   758  		step                    int64
   759  		firstFewLowerBounds     []int64
   760  		successiveBucketSamples []int64
   761  	}{
   762  		{
   763  			name:                "static-sized buckets",
   764  			bucketer:            NewExponentialBucketer(10, 10, 0, 1),
   765  			minSample:           -5,
   766  			maxSample:           105,
   767  			firstFewLowerBounds: []int64{0, 10, 20, 30, 40, 50},
   768  		},
   769  		{
   770  			name:      "exponential buckets",
   771  			bucketer:  NewExponentialBucketer(10, 10, 2, 1.5),
   772  			minSample: -5,
   773  			maxSample: int64(20 * math.Pow(1.5, 12)),
   774  			firstFewLowerBounds: []int64{
   775  				0,
   776  				10 + 2,
   777  				20 + int64(2*1.5),
   778  				30 + int64(math.Floor(2*1.5*1.5)),
   779  				40 + int64(math.Floor(2*1.5*1.5*1.5)),
   780  				50 + int64(math.Floor(2*1.5*1.5*1.5*1.5)),
   781  			},
   782  		},
   783  		{
   784  			name:      "timer buckets",
   785  			bucketer:  NewDurationBucketer(8, time.Second, time.Minute),
   786  			minSample: 0,
   787  			maxSample: (5 * time.Minute).Nanoseconds(),
   788  			step:      (500 * time.Millisecond).Nanoseconds(),
   789  			successiveBucketSamples: []int64{
   790  				// Roughly exponential successive durations:
   791  				(500 * time.Millisecond).Nanoseconds(),
   792  				(1200 * time.Millisecond).Nanoseconds(),
   793  				(2500 * time.Millisecond).Nanoseconds(),
   794  				(5 * time.Second).Nanoseconds(),
   795  				(15 * time.Second).Nanoseconds(),
   796  				(35 * time.Second).Nanoseconds(),
   797  				(75 * time.Second).Nanoseconds(),
   798  				(3 * time.Minute).Nanoseconds(),
   799  				(7 * time.Minute).Nanoseconds(),
   800  			},
   801  		},
   802  	} {
   803  		t.Run(test.name, func(t *testing.T) {
   804  			numFiniteBuckets := test.bucketer.NumFiniteBuckets()
   805  			t.Logf("Underflow bucket has bounds (-inf, %d)", test.bucketer.LowerBound(0))
   806  			for b := 0; b < numFiniteBuckets; b++ {
   807  				t.Logf("Bucket %d has bounds [%d, %d)", b, test.bucketer.LowerBound(b), test.bucketer.LowerBound(b+1))
   808  			}
   809  			t.Logf("Overflow bucket has bounds [%d, +inf)", test.bucketer.LowerBound(numFiniteBuckets))
   810  			testAround := func(bound int64, bucketIndex int) {
   811  				for sample := bound - 2; sample <= bound+2; sample++ {
   812  					gotIndex := test.bucketer.BucketIndex(sample)
   813  					if sample < bound && gotIndex != bucketIndex-1 || sample >= bound && gotIndex != bucketIndex {
   814  						t.Errorf("LowerBound(%d) = %d, yet BucketIndex(%d) = %d", bucketIndex, bound, sample, gotIndex)
   815  					}
   816  				}
   817  			}
   818  			step := test.step
   819  			if step == 0 {
   820  				step = 1
   821  			}
   822  			for sample := test.minSample; sample <= test.maxSample; sample += step {
   823  				bucket := test.bucketer.BucketIndex(sample)
   824  				if bucket == -1 {
   825  					lowestBound := test.bucketer.LowerBound(0)
   826  					if sample >= lowestBound {
   827  						t.Errorf("sample %d: got bucket %d but lowest bound %d", sample, bucket, lowestBound)
   828  					}
   829  					testAround(lowestBound, 0)
   830  				} else if bucket > numFiniteBuckets {
   831  					t.Errorf("sample %d: got bucket with 0-based index %d but bucketer supposedly only has %d buckets", sample, bucket, numFiniteBuckets)
   832  				} else if bucket == numFiniteBuckets {
   833  					lastBucketBound := test.bucketer.LowerBound(bucket)
   834  					if sample < lastBucketBound {
   835  						t.Errorf("sample %d: got bucket %d but it has lower bound %d", sample, bucket, lastBucketBound)
   836  					}
   837  					testAround(lastBucketBound, bucket)
   838  				} else {
   839  					lowerBound := test.bucketer.LowerBound(bucket)
   840  					upperBound := test.bucketer.LowerBound(bucket + 1)
   841  					if upperBound <= lowerBound {
   842  						t.Errorf("sample %d: got bucket %d, upperbound %d <= lowerbound %d", sample, bucket, upperBound, lowerBound)
   843  					}
   844  					if sample < lowerBound || sample >= upperBound {
   845  						t.Errorf("sample %d: got bucket %d which has range [%d, %d)", sample, bucket, lowerBound, upperBound)
   846  					}
   847  					testAround(lowerBound, bucket)
   848  					testAround(upperBound, bucket+1)
   849  				}
   850  			}
   851  			for bi, want := range test.firstFewLowerBounds {
   852  				if got := test.bucketer.LowerBound(bi); got != want {
   853  					t.Errorf("bucket %d has lower bound %d, want %d", bi, got, want)
   854  				}
   855  			}
   856  			previousBucket := -1
   857  			for i, sample := range test.successiveBucketSamples {
   858  				gotBucket := test.bucketer.BucketIndex(sample)
   859  				if gotBucket != previousBucket+1 {
   860  					t.Errorf("successive-bucket sample #%d (%d) fell in bucket %d whereas previous sample fell in bucket %d", i, sample, gotBucket, previousBucket)
   861  				}
   862  				previousBucket = gotBucket
   863  			}
   864  		})
   865  	}
   866  }
   867  
   868  func TestBucketerPanics(t *testing.T) {
   869  	for name, fn := range map[string]func(){
   870  		"NewExponentialBucketer @ 0": func() {
   871  			NewExponentialBucketer(0, 2, 0, 1)
   872  		},
   873  		"NewExponentialBucketer @ 120": func() {
   874  			NewExponentialBucketer(120, 2, 0, 1)
   875  		},
   876  		"NewDurationBucketer @ 2": func() {
   877  			NewDurationBucketer(2, time.Second, time.Minute)
   878  		},
   879  		"NewDurationBucketer @ 80": func() {
   880  			NewDurationBucketer(80, time.Microsecond, 50*time.Microsecond)
   881  		},
   882  	} {
   883  		t.Run(name, func(t *testing.T) {
   884  			var recovered any
   885  			func() {
   886  				defer func() {
   887  					recovered = recover()
   888  				}()
   889  				fn()
   890  			}()
   891  			if recovered == nil {
   892  				t.Error("did not panic")
   893  			}
   894  		})
   895  	}
   896  }
   897  
   898  func TestFieldMapperWithFields(t *testing.T) {
   899  	generateFields := func(fieldSizes []int) []Field {
   900  		fields := make([]Field, len(fieldSizes))
   901  		for i, fieldSize := range fieldSizes {
   902  			fieldName := fmt.Sprintf("%c", 'A'+i)
   903  			allowedValues := make([]*FieldValue, fieldSize)
   904  			for val := range allowedValues {
   905  				allowedValues[val] = &FieldValue{fmt.Sprintf("%s%d", fieldName, val)}
   906  			}
   907  			fields[i] = NewField(fieldName, allowedValues...)
   908  		}
   909  		return fields
   910  	}
   911  
   912  	for _, test := range []struct {
   913  		name          string
   914  		fields        []Field
   915  		errOnCreation error
   916  	}{
   917  		{
   918  			name:          "FieldMapper8x10",
   919  			fields:        generateFields([]int{8, 10}),
   920  			errOnCreation: nil,
   921  		},
   922  		{
   923  			name:          "FieldMapper3x4x5",
   924  			fields:        generateFields([]int{3, 4, 5}),
   925  			errOnCreation: nil,
   926  		},
   927  		{
   928  			name:          "FieldMapper4x5x6x7",
   929  			fields:        generateFields([]int{4, 5, 6, 7}),
   930  			errOnCreation: nil,
   931  		},
   932  		{
   933  			name:          "FieldMapperErrNoAllowedValues",
   934  			fields:        []Field{NewField("TheNoValuesField")},
   935  			errOnCreation: ErrFieldHasNoAllowedValues,
   936  		},
   937  	} {
   938  		t.Run(test.name, func(t *testing.T) {
   939  			m, err := newFieldMapper(test.fields...)
   940  			if err != test.errOnCreation {
   941  				t.Fatalf("newFieldMapper err: got %v wanted %v", err, test.errOnCreation)
   942  			}
   943  
   944  			// Test that every field value combination corresponds to just one entry.
   945  			mapping := make([]int, m.numKeys())
   946  			var visitCombinations func(curFields []*FieldValue, remFields []Field)
   947  			visitCombinations = func(curFields []*FieldValue, remFields []Field) {
   948  				depth := len(remFields)
   949  				if depth == 0 {
   950  					return
   951  				}
   952  				if depth == 1 {
   953  					for _, val := range remFields[0].values {
   954  						fields := append(curFields, val)
   955  						key := m.lookup(fields...)
   956  						mapping[key]++
   957  
   958  						// Assert that the reverse operation is also correct.
   959  						fields2 := m.keyToMultiField(key)
   960  						for i, f1val := range fields {
   961  							if f1val.Value != fields2[i] {
   962  								t.Errorf("Field values put into the map are not the same as ones returned: got %v wanted %v", fields2, f1val)
   963  							}
   964  						}
   965  					}
   966  				} else {
   967  					for _, val := range remFields[0].values {
   968  						visitCombinations(append(curFields, val), remFields[1:])
   969  					}
   970  				}
   971  			}
   972  			visitCombinations(nil, test.fields)
   973  
   974  			for i, numVisits := range mapping {
   975  				if numVisits != 1 {
   976  					t.Errorf("Index key %d incorrect number of mappings: got %d wanted 1", i, numVisits)
   977  				}
   978  			}
   979  		})
   980  	}
   981  }
   982  
   983  func TestFieldMapperNoFields(t *testing.T) {
   984  	m, err := newFieldMapper()
   985  	if err != nil {
   986  		t.Fatalf("newFieldMapper err: got %v wanted nil", err)
   987  	}
   988  
   989  	if n := m.numKeys(); n > 1 {
   990  		t.Fatalf("m.numKeys() err: got %d wanted 1", n)
   991  	}
   992  
   993  	key := m.lookup()
   994  	if len(m.keyToMultiField(key)) != 0 {
   995  		t.Errorf("keyToMultiField using key %v (corresponding to no field values): expected no values, got some", key)
   996  	}
   997  }
   998  
   999  func TestFieldValueUniqueness(t *testing.T) {
  1000  	panicked := false
  1001  	func() {
  1002  		defer func() {
  1003  			recover()
  1004  			panicked = true
  1005  		}()
  1006  		NewField("field1", &FieldValue{"foo"}, &FieldValue{"foo"})
  1007  	}()
  1008  	if !panicked {
  1009  		t.Error("did not panic")
  1010  	}
  1011  }
  1012  
  1013  func TestFieldMapperMustUseSameValuePointer(t *testing.T) {
  1014  	const fooString = "foo"
  1015  	var constFoo = FieldValue{fooString}
  1016  	var heapBar = &FieldValue{fmt.Sprintf("%sr", "ba")}
  1017  	n, err := newFieldMapper(NewField("field1", &constFoo, heapBar))
  1018  	if err != nil {
  1019  		t.Fatalf("newFieldMapper err: got %v wanted nil", err)
  1020  	}
  1021  	n.lookup(&constFoo)
  1022  	n.lookup(heapBar)
  1023  	newFoo := &FieldValue{fmt.Sprintf("%so", "fo")}
  1024  	panicked := false
  1025  	func() {
  1026  		defer func() {
  1027  			recover()
  1028  			panicked = true
  1029  		}()
  1030  		n.lookup(newFoo)
  1031  	}()
  1032  	if !panicked {
  1033  		t.Error("did not panic")
  1034  	}
  1035  }
  1036  
  1037  func TestMetricProfiling(t *testing.T) {
  1038  	for _, test := range []struct {
  1039  		name                 string
  1040  		profilingMetricsFlag string
  1041  		metricNames          []string
  1042  		lossy                bool
  1043  		incrementMetricBy    []uint64
  1044  		numIterations        uint64
  1045  		errOnStartProfiling  bool
  1046  	}{
  1047  		{
  1048  			name:                 "simple single metric",
  1049  			profilingMetricsFlag: "/foo",
  1050  			metricNames:          []string{"/foo"},
  1051  			incrementMetricBy:    []uint64{50},
  1052  			numIterations:        100,
  1053  			errOnStartProfiling:  false,
  1054  		},
  1055  		{
  1056  			name:                 "simple single metric, lossy writer",
  1057  			profilingMetricsFlag: "/foo",
  1058  			metricNames:          []string{"/foo"},
  1059  			lossy:                true,
  1060  			incrementMetricBy:    []uint64{50},
  1061  			numIterations:        100,
  1062  			errOnStartProfiling:  false,
  1063  		},
  1064  		{
  1065  			name:                 "single metric exceeds snapshot buffer",
  1066  			profilingMetricsFlag: "/foo",
  1067  			metricNames:          []string{"/foo"},
  1068  			incrementMetricBy:    []uint64{1},
  1069  			numIterations:        3500,
  1070  			errOnStartProfiling:  false,
  1071  		},
  1072  		{
  1073  			name:                 "multiple metrics",
  1074  			profilingMetricsFlag: "/foo,/bar,/big/test/baz,/metric",
  1075  			metricNames:          []string{"/foo", "/bar", "/big/test/baz", "/metric"},
  1076  			incrementMetricBy:    []uint64{1, 29, 73, 991},
  1077  			numIterations:        100,
  1078  			errOnStartProfiling:  false,
  1079  		},
  1080  		{
  1081  			name:                 "mismatched names",
  1082  			profilingMetricsFlag: "/foo,/fighter,/big/test/baz,/metric",
  1083  			metricNames:          []string{"/foo", "/bar", "/big/test/baz", "/metric"},
  1084  			incrementMetricBy:    []uint64{},
  1085  			numIterations:        0,
  1086  			errOnStartProfiling:  true,
  1087  		},
  1088  	} {
  1089  		t.Run(test.name, func(t *testing.T) {
  1090  			defer resetTest()
  1091  			const profilingRate = 1000 * time.Microsecond
  1092  			numMetrics := len(test.metricNames)
  1093  
  1094  			metrics := make([]*Uint64Metric, numMetrics)
  1095  			for i, m := range test.metricNames {
  1096  				newMetric, err := NewUint64Metric(m, true, pb.MetricMetadata_UNITS_NANOSECONDS, fooDescription)
  1097  				metrics[i] = newMetric
  1098  				if err != nil {
  1099  					t.Fatalf("NewUint64Metric got err '%v' want nil", err)
  1100  				}
  1101  			}
  1102  
  1103  			f, err := os.CreateTemp(t.TempDir(), "profiling-metrics")
  1104  			if err != nil {
  1105  				t.Fatalf("failed to create file: '%v'", err)
  1106  			}
  1107  			fName := f.Name()
  1108  
  1109  			if err := Initialize(); err != nil {
  1110  				t.Fatalf("Initialize error: got '%v' want nil", err)
  1111  			}
  1112  			if err := StartProfilingMetrics(ProfilingMetricsOptions[*os.File]{
  1113  				Sink:    f,
  1114  				Lossy:   test.lossy,
  1115  				Metrics: test.profilingMetricsFlag,
  1116  				Rate:    profilingRate,
  1117  			}); err != nil {
  1118  				if test.errOnStartProfiling {
  1119  					return
  1120  				}
  1121  				t.Fatalf("StartProfilingMetrics error: got '%v' want '%v'", err, test.errOnStartProfiling)
  1122  			}
  1123  			if test.errOnStartProfiling {
  1124  				t.Fatal("did not error out on StartProflingMetrics as expected")
  1125  			}
  1126  
  1127  			// Generate some test data
  1128  			for i := 0; i < int(test.numIterations); i++ {
  1129  				for metricIdx, m := range metrics {
  1130  					m.IncrementBy(test.incrementMetricBy[metricIdx])
  1131  				}
  1132  				// Give time to the collector to record it.
  1133  				time.Sleep(profilingRate)
  1134  			}
  1135  
  1136  			StopProfilingMetrics()
  1137  
  1138  			f, err = os.Open(fName)
  1139  			if err != nil {
  1140  				t.Fatalf("failed to open file a second time: %v", err)
  1141  			}
  1142  
  1143  			// Check that the log looks sane:
  1144  			// - Header should match what we expect.
  1145  			// - We should have one metadata line per metric.
  1146  			// - We should have one start time line.
  1147  			// - If in lossy mode, we should have a hash line.
  1148  			// - If we have a hash, it should match the one computed by this test.
  1149  			// - Each timestamp should always be bigger.
  1150  			// - Each metric value should be at least as big as the previous.
  1151  			h := adler32.New()
  1152  			lines := bufio.NewScanner(f)
  1153  			expectedHeader := TimeColumn + "\t" + strings.Join(test.metricNames, "\t")
  1154  			if test.lossy {
  1155  				expectedHeader += "\tChecksum"
  1156  			}
  1157  			prevTS := uint64(0)
  1158  			prevValues := make([]uint64, numMetrics)
  1159  			numDatapoints := 0
  1160  			var hashLine string
  1161  			gotMetadataFor := make(map[string]struct{}, numMetrics)
  1162  			gotHeader := false
  1163  			gotStartTime := false
  1164  			for lines.Scan() {
  1165  				line := lines.Text()
  1166  				if line == "" {
  1167  					continue
  1168  				}
  1169  				if test.lossy {
  1170  					if !strings.HasPrefix(line, MetricsPrefix) {
  1171  						t.Fatalf("lossy writer output does not have expected prefix: got %q want %q", line, MetricsPrefix)
  1172  					}
  1173  					line = strings.TrimPrefix(line, MetricsPrefix)
  1174  				}
  1175  				if strings.HasPrefix(line, MetricsHashIndicator) {
  1176  					if hashLine != "" {
  1177  						t.Fatalf("got multiple hash lines")
  1178  					}
  1179  					hashLine = line
  1180  					continue
  1181  				}
  1182  				h.Write([]byte(line + "\n"))
  1183  				// Check line checksum
  1184  				if test.lossy {
  1185  					tabSplit := strings.Split(line, "\t")
  1186  					gotLineChecksum := tabSplit[len(tabSplit)-1]
  1187  					wantLineChecksum := fmt.Sprintf("0x%x", adler32.Checksum([]byte(strings.Join(tabSplit[:len(tabSplit)-1], "\t"))))
  1188  					if gotLineChecksum != wantLineChecksum {
  1189  						t.Errorf("got line checksum %q, want %q", gotLineChecksum, wantLineChecksum)
  1190  						continue
  1191  					}
  1192  					line = strings.TrimSuffix(line, "\t"+gotLineChecksum)
  1193  				}
  1194  				if strings.HasPrefix(line, MetricsMetaIndicator) {
  1195  					line = strings.TrimPrefix(line, MetricsMetaIndicator)
  1196  					components := strings.Split(line, "\t")
  1197  					if len(components) != 2 {
  1198  						t.Fatalf("got %d components in metadata line %q, want 2", len(components), line)
  1199  					}
  1200  					// We only verify that the metadata is present, not its contents.
  1201  					if components[0] == "" || components[1] == "" {
  1202  						t.Fatalf("got empty metadata line: %q", line)
  1203  					}
  1204  					gotMetadataFor[components[0]] = struct{}{}
  1205  					continue
  1206  				}
  1207  				if strings.HasPrefix(line, MetricsStartTimeIndicator) {
  1208  					if gotStartTime {
  1209  						t.Fatalf("got multiple start time lines")
  1210  					}
  1211  					gotStartTime = true
  1212  					continue
  1213  				}
  1214  				if !gotHeader {
  1215  					// This line must be the header.
  1216  					if line != expectedHeader {
  1217  						t.Fatalf("got header %q, want %q", line, expectedHeader)
  1218  					}
  1219  					gotHeader = true
  1220  					continue
  1221  				}
  1222  				numDatapoints++
  1223  				items := strings.Split(line, "\t")
  1224  				if len(items) != (numMetrics + 1) {
  1225  					t.Fatalf("incorrect number of items on line '%s': got %d, want %d", line, len(items), numMetrics+1)
  1226  				}
  1227  				// Check timestamp
  1228  				ts, err := strconv.ParseUint(items[0], 10, 64)
  1229  				if err != nil {
  1230  					t.Errorf("ts ParseUint error on line '%s': got '%v' want nil", line, err)
  1231  				}
  1232  				if ts <= prevTS && numDatapoints > 1 {
  1233  					t.Errorf("expecting timestamp to always increase on line '%s': got %d, previous was %d", line, ts, prevTS)
  1234  				}
  1235  				prevTS = ts
  1236  
  1237  				// Check metric values
  1238  				for i := 1; i <= numMetrics; i++ {
  1239  					m, err := strconv.ParseUint(items[i], 10, 64)
  1240  					if err != nil {
  1241  						t.Errorf("m ParseUint error on line '%s': got '%v' want nil", line, err)
  1242  					}
  1243  					if m < prevValues[i-1] {
  1244  						t.Errorf("expecting metric value to always increase on line '%s': got %d, previous was %d", line, m, prevValues[i-1])
  1245  					}
  1246  					prevValues[i-1] = m
  1247  				}
  1248  			}
  1249  			expectedMinNumDatapoints := int(0.7 * float32(test.numIterations))
  1250  			if numDatapoints < expectedMinNumDatapoints {
  1251  				t.Errorf("numDatapoints: got %d, want at least %d", numDatapoints, expectedMinNumDatapoints)
  1252  			}
  1253  			// Check that the final total for each metric is correct
  1254  			for i := 0; i < numMetrics; i++ {
  1255  				expected := test.numIterations * test.incrementMetricBy[i]
  1256  				if prevValues[i] != expected {
  1257  					t.Errorf("incorrect final metric value: got %d, want %d", prevValues[i], expected)
  1258  				}
  1259  			}
  1260  			if len(gotMetadataFor) != numMetrics {
  1261  				t.Errorf("got metadata for %d metrics, want %d", len(gotMetadataFor), numMetrics)
  1262  			}
  1263  			for _, metricName := range test.metricNames {
  1264  				if _, ok := gotMetadataFor[metricName]; !ok {
  1265  					t.Errorf("did not get metadata for metric %q", metricName)
  1266  				}
  1267  			}
  1268  			if !gotStartTime {
  1269  				t.Error("did not get start time metadata")
  1270  			}
  1271  			if !gotHeader {
  1272  				t.Error("did not get header")
  1273  			}
  1274  			if test.lossy {
  1275  				if hashLine == "" {
  1276  					t.Fatal("lossy writer output does not have expected hash line")
  1277  				}
  1278  				wantHash := fmt.Sprintf("0x%x", h.Sum32())
  1279  				gotHash := strings.TrimPrefix(hashLine, MetricsHashIndicator)
  1280  				if gotHash != wantHash {
  1281  					t.Fatalf("lossy writer output does not have expected hash line: got %q want %q", gotHash, wantHash)
  1282  				}
  1283  			} else {
  1284  				if hashLine != "" {
  1285  					t.Fatalf("unexpectedly found hash line in non-lossy output: %q", hashLine)
  1286  				}
  1287  			}
  1288  			f.Close()
  1289  		})
  1290  	}
  1291  }