github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/metrics_test.go

github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/metrics_test.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"internal/goexperiment"
    11  	"internal/profile"
    12  	"internal/testenv"
    13  	"os"
    14  	"reflect"
    15  	"runtime"
    16  	"runtime/debug"
    17  	"runtime/metrics"
    18  	"runtime/pprof"
    19  	"runtime/trace"
    20  	"slices"
    21  	"sort"
    22  	"strings"
    23  	"sync"
    24  	"sync/atomic"
    25  	"testing"
    26  	"time"
    27  	"unsafe"
    28  )
    29  
    30  func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) {
    31  	all := metrics.All()
    32  	samples := make([]metrics.Sample, len(all))
    33  	descs := make(map[string]metrics.Description)
    34  	for i := range all {
    35  		samples[i].Name = all[i].Name
    36  		descs[all[i].Name] = all[i]
    37  	}
    38  	return descs, samples
    39  }
    40  
    41  func TestReadMetrics(t *testing.T) {
    42  	// Run a GC cycle to get some of the stats to be non-zero.
    43  	runtime.GC()
    44  
    45  	// Set an arbitrary memory limit to check the metric for it
    46  	limit := int64(512 * 1024 * 1024)
    47  	oldLimit := debug.SetMemoryLimit(limit)
    48  	defer debug.SetMemoryLimit(oldLimit)
    49  
    50  	// Set a GC percent to check the metric for it
    51  	gcPercent := 99
    52  	oldGCPercent := debug.SetGCPercent(gcPercent)
    53  	defer debug.SetGCPercent(oldGCPercent)
    54  
    55  	// Tests whether readMetrics produces values aligning
    56  	// with ReadMemStats while the world is stopped.
    57  	var mstats runtime.MemStats
    58  	_, samples := prepareAllMetricsSamples()
    59  	runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples))
    60  
    61  	checkUint64 := func(t *testing.T, m string, got, want uint64) {
    62  		t.Helper()
    63  		if got != want {
    64  			t.Errorf("metric %q: got %d, want %d", m, got, want)
    65  		}
    66  	}
    67  
    68  	// Check to make sure the values we read line up with other values we read.
    69  	var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram
    70  	var tinyAllocs uint64
    71  	var mallocs, frees uint64
    72  	for i := range samples {
    73  		switch name := samples[i].Name; name {
    74  		case "/cgo/go-to-c-calls:calls":
    75  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall()))
    76  		case "/memory/classes/heap/free:bytes":
    77  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased)
    78  		case "/memory/classes/heap/released:bytes":
    79  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased)
    80  		case "/memory/classes/heap/objects:bytes":
    81  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc)
    82  		case "/memory/classes/heap/unused:bytes":
    83  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc)
    84  		case "/memory/classes/heap/stacks:bytes":
    85  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse)
    86  		case "/memory/classes/metadata/mcache/free:bytes":
    87  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse)
    88  		case "/memory/classes/metadata/mcache/inuse:bytes":
    89  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse)
    90  		case "/memory/classes/metadata/mspan/free:bytes":
    91  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse)
    92  		case "/memory/classes/metadata/mspan/inuse:bytes":
    93  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse)
    94  		case "/memory/classes/metadata/other:bytes":
    95  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys)
    96  		case "/memory/classes/os-stacks:bytes":
    97  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse)
    98  		case "/memory/classes/other:bytes":
    99  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys)
   100  		case "/memory/classes/profiling/buckets:bytes":
   101  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys)
   102  		case "/memory/classes/total:bytes":
   103  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys)
   104  		case "/gc/heap/allocs-by-size:bytes":
   105  			hist := samples[i].Value.Float64Histogram()
   106  			// Skip size class 0 in BySize, because it's always empty and not represented
   107  			// in the histogram.
   108  			for i, sc := range mstats.BySize[1:] {
   109  				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
   110  					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
   111  					// The rest of the checks aren't expected to work anyway.
   112  					continue
   113  				}
   114  				if c, m := hist.Counts[i], sc.Mallocs; c != m {
   115  					t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m)
   116  				}
   117  			}
   118  			allocsBySize = hist
   119  		case "/gc/heap/allocs:bytes":
   120  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc)
   121  		case "/gc/heap/frees-by-size:bytes":
   122  			hist := samples[i].Value.Float64Histogram()
   123  			// Skip size class 0 in BySize, because it's always empty and not represented
   124  			// in the histogram.
   125  			for i, sc := range mstats.BySize[1:] {
   126  				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
   127  					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
   128  					// The rest of the checks aren't expected to work anyway.
   129  					continue
   130  				}
   131  				if c, f := hist.Counts[i], sc.Frees; c != f {
   132  					t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f)
   133  				}
   134  			}
   135  		case "/gc/heap/frees:bytes":
   136  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc)
   137  		case "/gc/heap/tiny/allocs:objects":
   138  			// Currently, MemStats adds tiny alloc count to both Mallocs AND Frees.
   139  			// The reason for this is because MemStats couldn't be extended at the time
   140  			// but there was a desire to have Mallocs at least be a little more representative,
   141  			// while having Mallocs - Frees still represent a live object count.
   142  			// Unfortunately, MemStats doesn't actually export a large allocation count,
   143  			// so it's impossible to pull this number out directly.
   144  			//
   145  			// Check tiny allocation count outside of this loop, by using the allocs-by-size
   146  			// histogram in order to figure out how many large objects there are.
   147  			tinyAllocs = samples[i].Value.Uint64()
   148  			// Because the next two metrics tests are checking against Mallocs and Frees,
   149  			// we can't check them directly for the same reason: we need to account for tiny
   150  			// allocations included in Mallocs and Frees.
   151  		case "/gc/heap/allocs:objects":
   152  			mallocs = samples[i].Value.Uint64()
   153  		case "/gc/heap/frees:objects":
   154  			frees = samples[i].Value.Uint64()
   155  		case "/gc/heap/live:bytes":
   156  			// Check for "obviously wrong" values. We can't check a stronger invariant,
   157  			// such as live <= HeapAlloc, because live is not 100% accurate. It's computed
   158  			// under racy conditions, and some objects may be double-counted (this is
   159  			// intentional and necessary for GC performance).
   160  			//
   161  			// Instead, check against a much more reasonable upper-bound: the amount of
   162  			// mapped heap memory. We can't possibly overcount to the point of exceeding
   163  			// total mapped heap memory, except if there's an accounting bug.
   164  			if live := samples[i].Value.Uint64(); live > mstats.HeapSys {
   165  				t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys)
   166  			} else if live == 0 {
   167  				// Might happen if we don't call runtime.GC() above.
   168  				t.Error("live bytes is 0")
   169  			}
   170  		case "/gc/gomemlimit:bytes":
   171  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit))
   172  		case "/gc/heap/objects:objects":
   173  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects)
   174  		case "/gc/heap/goal:bytes":
   175  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC)
   176  		case "/gc/gogc:percent":
   177  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent))
   178  		case "/gc/cycles/automatic:gc-cycles":
   179  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC))
   180  		case "/gc/cycles/forced:gc-cycles":
   181  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC))
   182  		case "/gc/cycles/total:gc-cycles":
   183  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC))
   184  		case "/gc/pauses:seconds":
   185  			gcPauses = samples[i].Value.Float64Histogram()
   186  		case "/sched/pauses/total/gc:seconds":
   187  			schedPausesTotalGC = samples[i].Value.Float64Histogram()
   188  		}
   189  	}
   190  
   191  	// Check tinyAllocs.
   192  	nonTinyAllocs := uint64(0)
   193  	for _, c := range allocsBySize.Counts {
   194  		nonTinyAllocs += c
   195  	}
   196  	checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs)
   197  
   198  	// Check allocation and free counts.
   199  	checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs)
   200  	checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs)
   201  
   202  	// Verify that /gc/pauses:seconds is a copy of /sched/pauses/total/gc:seconds
   203  	if !reflect.DeepEqual(gcPauses.Buckets, schedPausesTotalGC.Buckets) {
   204  		t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts)
   205  	}
   206  	if !reflect.DeepEqual(gcPauses.Counts, schedPausesTotalGC.Counts) {
   207  		t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts)
   208  	}
   209  }
   210  
   211  func TestReadMetricsConsistency(t *testing.T) {
   212  	// Tests whether readMetrics produces consistent, sensible values.
   213  	// The values are read concurrently with the runtime doing other
   214  	// things (e.g. allocating) so what we read can't reasonably compared
   215  	// to other runtime values (e.g. MemStats).
   216  
   217  	// Run a few GC cycles to get some of the stats to be non-zero.
   218  	runtime.GC()
   219  	runtime.GC()
   220  	runtime.GC()
   221  
   222  	// Set GOMAXPROCS high then sleep briefly to ensure we generate
   223  	// some idle time.
   224  	oldmaxprocs := runtime.GOMAXPROCS(10)
   225  	time.Sleep(time.Millisecond)
   226  	runtime.GOMAXPROCS(oldmaxprocs)
   227  
   228  	// Read all the supported metrics through the metrics package.
   229  	descs, samples := prepareAllMetricsSamples()
   230  	metrics.Read(samples)
   231  
   232  	// Check to make sure the values we read make sense.
   233  	var totalVirtual struct {
   234  		got, want uint64
   235  	}
   236  	var objects struct {
   237  		alloc, free             *metrics.Float64Histogram
   238  		allocs, frees           uint64
   239  		allocdBytes, freedBytes uint64
   240  		total, totalBytes       uint64
   241  	}
   242  	var gc struct {
   243  		numGC  uint64
   244  		pauses uint64
   245  	}
   246  	var totalScan struct {
   247  		got, want uint64
   248  	}
   249  	var cpu struct {
   250  		gcAssist    float64
   251  		gcDedicated float64
   252  		gcIdle      float64
   253  		gcPause     float64
   254  		gcTotal     float64
   255  
   256  		idle float64
   257  		user float64
   258  
   259  		scavengeAssist float64
   260  		scavengeBg     float64
   261  		scavengeTotal  float64
   262  
   263  		total float64
   264  	}
   265  	for i := range samples {
   266  		kind := samples[i].Value.Kind()
   267  		if want := descs[samples[i].Name].Kind; kind != want {
   268  			t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want)
   269  			continue
   270  		}
   271  		if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") {
   272  			v := samples[i].Value.Uint64()
   273  			totalVirtual.want += v
   274  
   275  			// None of these stats should ever get this big.
   276  			// If they do, there's probably overflow involved,
   277  			// usually due to bad accounting.
   278  			if int64(v) < 0 {
   279  				t.Errorf("%q has high/negative value: %d", samples[i].Name, v)
   280  			}
   281  		}
   282  		switch samples[i].Name {
   283  		case "/cpu/classes/gc/mark/assist:cpu-seconds":
   284  			cpu.gcAssist = samples[i].Value.Float64()
   285  		case "/cpu/classes/gc/mark/dedicated:cpu-seconds":
   286  			cpu.gcDedicated = samples[i].Value.Float64()
   287  		case "/cpu/classes/gc/mark/idle:cpu-seconds":
   288  			cpu.gcIdle = samples[i].Value.Float64()
   289  		case "/cpu/classes/gc/pause:cpu-seconds":
   290  			cpu.gcPause = samples[i].Value.Float64()
   291  		case "/cpu/classes/gc/total:cpu-seconds":
   292  			cpu.gcTotal = samples[i].Value.Float64()
   293  		case "/cpu/classes/idle:cpu-seconds":
   294  			cpu.idle = samples[i].Value.Float64()
   295  		case "/cpu/classes/scavenge/assist:cpu-seconds":
   296  			cpu.scavengeAssist = samples[i].Value.Float64()
   297  		case "/cpu/classes/scavenge/background:cpu-seconds":
   298  			cpu.scavengeBg = samples[i].Value.Float64()
   299  		case "/cpu/classes/scavenge/total:cpu-seconds":
   300  			cpu.scavengeTotal = samples[i].Value.Float64()
   301  		case "/cpu/classes/total:cpu-seconds":
   302  			cpu.total = samples[i].Value.Float64()
   303  		case "/cpu/classes/user:cpu-seconds":
   304  			cpu.user = samples[i].Value.Float64()
   305  		case "/memory/classes/total:bytes":
   306  			totalVirtual.got = samples[i].Value.Uint64()
   307  		case "/memory/classes/heap/objects:bytes":
   308  			objects.totalBytes = samples[i].Value.Uint64()
   309  		case "/gc/heap/objects:objects":
   310  			objects.total = samples[i].Value.Uint64()
   311  		case "/gc/heap/allocs:bytes":
   312  			objects.allocdBytes = samples[i].Value.Uint64()
   313  		case "/gc/heap/allocs:objects":
   314  			objects.allocs = samples[i].Value.Uint64()
   315  		case "/gc/heap/allocs-by-size:bytes":
   316  			objects.alloc = samples[i].Value.Float64Histogram()
   317  		case "/gc/heap/frees:bytes":
   318  			objects.freedBytes = samples[i].Value.Uint64()
   319  		case "/gc/heap/frees:objects":
   320  			objects.frees = samples[i].Value.Uint64()
   321  		case "/gc/heap/frees-by-size:bytes":
   322  			objects.free = samples[i].Value.Float64Histogram()
   323  		case "/gc/cycles:gc-cycles":
   324  			gc.numGC = samples[i].Value.Uint64()
   325  		case "/gc/pauses:seconds":
   326  			h := samples[i].Value.Float64Histogram()
   327  			gc.pauses = 0
   328  			for i := range h.Counts {
   329  				gc.pauses += h.Counts[i]
   330  			}
   331  		case "/gc/scan/heap:bytes":
   332  			totalScan.want += samples[i].Value.Uint64()
   333  		case "/gc/scan/globals:bytes":
   334  			totalScan.want += samples[i].Value.Uint64()
   335  		case "/gc/scan/stack:bytes":
   336  			totalScan.want += samples[i].Value.Uint64()
   337  		case "/gc/scan/total:bytes":
   338  			totalScan.got = samples[i].Value.Uint64()
   339  		case "/sched/gomaxprocs:threads":
   340  			if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want {
   341  				t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want)
   342  			}
   343  		case "/sched/goroutines:goroutines":
   344  			if samples[i].Value.Uint64() < 1 {
   345  				t.Error("number of goroutines is less than one")
   346  			}
   347  		}
   348  	}
   349  	// Only check this on Linux where we can be reasonably sure we have a high-resolution timer.
   350  	if runtime.GOOS == "linux" {
   351  		if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 {
   352  			t.Errorf("found no time spent on GC work: %#v", cpu)
   353  		}
   354  		if cpu.gcPause <= 0 {
   355  			t.Errorf("found no GC pauses: %f", cpu.gcPause)
   356  		}
   357  		if cpu.idle <= 0 {
   358  			t.Errorf("found no idle time: %f", cpu.idle)
   359  		}
   360  		if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.01) {
   361  			t.Errorf("calculated total GC CPU not within 1%% of sampled total: %f vs. %f", total, cpu.gcTotal)
   362  		}
   363  		if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.01) {
   364  			t.Errorf("calculated total scavenge CPU not within 1%% of sampled total: %f vs. %f", total, cpu.scavengeTotal)
   365  		}
   366  		if cpu.total <= 0 {
   367  			t.Errorf("found no total CPU time passed")
   368  		}
   369  		if cpu.user <= 0 {
   370  			t.Errorf("found no user time passed")
   371  		}
   372  		if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.02) {
   373  			t.Errorf("calculated total CPU not within 2%% of sampled total: %f vs. %f", total, cpu.total)
   374  		}
   375  	}
   376  	if totalVirtual.got != totalVirtual.want {
   377  		t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
   378  	}
   379  	if got, want := objects.allocs-objects.frees, objects.total; got != want {
   380  		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
   381  	}
   382  	if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want {
   383  		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
   384  	}
   385  	if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 {
   386  		t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
   387  	}
   388  	if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 {
   389  		t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
   390  	}
   391  	if len(objects.alloc.Buckets) != len(objects.free.Buckets) {
   392  		t.Error("allocs-by-size and frees-by-size buckets don't match in length")
   393  	} else if len(objects.alloc.Counts) != len(objects.free.Counts) {
   394  		t.Error("allocs-by-size and frees-by-size counts don't match in length")
   395  	} else {
   396  		for i := range objects.alloc.Buckets {
   397  			ba := objects.alloc.Buckets[i]
   398  			bf := objects.free.Buckets[i]
   399  			if ba != bf {
   400  				t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf)
   401  			}
   402  		}
   403  		if !t.Failed() {
   404  			var gotAlloc, gotFree uint64
   405  			want := objects.total
   406  			for i := range objects.alloc.Counts {
   407  				if objects.alloc.Counts[i] < objects.free.Counts[i] {
   408  					t.Errorf("found more allocs than frees in object dist bucket %d", i)
   409  					continue
   410  				}
   411  				gotAlloc += objects.alloc.Counts[i]
   412  				gotFree += objects.free.Counts[i]
   413  			}
   414  			if got := gotAlloc - gotFree; got != want {
   415  				t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want)
   416  			}
   417  			if gotAlloc != objects.allocs {
   418  				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs)
   419  			}
   420  			if gotFree != objects.frees {
   421  				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees)
   422  			}
   423  		}
   424  	}
   425  	// The current GC has at least 2 pauses per GC.
   426  	// Check to see if that value makes sense.
   427  	if gc.pauses < gc.numGC*2 {
   428  		t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2)
   429  	}
   430  	if totalScan.got <= 0 {
   431  		t.Errorf("scannable GC space is empty: %d", totalScan.got)
   432  	}
   433  	if totalScan.got != totalScan.want {
   434  		t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want)
   435  	}
   436  }
   437  
   438  func BenchmarkReadMetricsLatency(b *testing.B) {
   439  	stop := applyGCLoad(b)
   440  
   441  	// Spend this much time measuring latencies.
   442  	latencies := make([]time.Duration, 0, 1024)
   443  	_, samples := prepareAllMetricsSamples()
   444  
   445  	// Hit metrics.Read continuously and measure.
   446  	b.ResetTimer()
   447  	for i := 0; i < b.N; i++ {
   448  		start := time.Now()
   449  		metrics.Read(samples)
   450  		latencies = append(latencies, time.Since(start))
   451  	}
   452  	// Make sure to stop the timer before we wait! The load created above
   453  	// is very heavy-weight and not easy to stop, so we could end up
   454  	// confusing the benchmarking framework for small b.N.
   455  	b.StopTimer()
   456  	stop()
   457  
   458  	// Disable the default */op metrics.
   459  	// ns/op doesn't mean anything because it's an average, but we
   460  	// have a sleep in our b.N loop above which skews this significantly.
   461  	b.ReportMetric(0, "ns/op")
   462  	b.ReportMetric(0, "B/op")
   463  	b.ReportMetric(0, "allocs/op")
   464  
   465  	// Sort latencies then report percentiles.
   466  	sort.Slice(latencies, func(i, j int) bool {
   467  		return latencies[i] < latencies[j]
   468  	})
   469  	b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
   470  	b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
   471  	b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
   472  }
   473  
   474  var readMetricsSink [1024]interface{}
   475  
   476  func TestReadMetricsCumulative(t *testing.T) {
   477  	// Set up the set of metrics marked cumulative.
   478  	descs := metrics.All()
   479  	var samples [2][]metrics.Sample
   480  	samples[0] = make([]metrics.Sample, len(descs))
   481  	samples[1] = make([]metrics.Sample, len(descs))
   482  	total := 0
   483  	for i := range samples[0] {
   484  		if !descs[i].Cumulative {
   485  			continue
   486  		}
   487  		samples[0][total].Name = descs[i].Name
   488  		total++
   489  	}
   490  	samples[0] = samples[0][:total]
   491  	samples[1] = samples[1][:total]
   492  	copy(samples[1], samples[0])
   493  
   494  	// Start some noise in the background.
   495  	var wg sync.WaitGroup
   496  	wg.Add(1)
   497  	done := make(chan struct{})
   498  	go func() {
   499  		defer wg.Done()
   500  		for {
   501  			// Add more things here that could influence metrics.
   502  			for i := 0; i < len(readMetricsSink); i++ {
   503  				readMetricsSink[i] = make([]byte, 1024)
   504  				select {
   505  				case <-done:
   506  					return
   507  				default:
   508  				}
   509  			}
   510  			runtime.GC()
   511  		}
   512  	}()
   513  
   514  	sum := func(us []uint64) uint64 {
   515  		total := uint64(0)
   516  		for _, u := range us {
   517  			total += u
   518  		}
   519  		return total
   520  	}
   521  
   522  	// Populate the first generation.
   523  	metrics.Read(samples[0])
   524  
   525  	// Check to make sure that these metrics only grow monotonically.
   526  	for gen := 1; gen < 10; gen++ {
   527  		metrics.Read(samples[gen%2])
   528  		for i := range samples[gen%2] {
   529  			name := samples[gen%2][i].Name
   530  			vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value
   531  
   532  			switch vNew.Kind() {
   533  			case metrics.KindUint64:
   534  				new := vNew.Uint64()
   535  				old := vOld.Uint64()
   536  				if new < old {
   537  					t.Errorf("%s decreased: %d < %d", name, new, old)
   538  				}
   539  			case metrics.KindFloat64:
   540  				new := vNew.Float64()
   541  				old := vOld.Float64()
   542  				if new < old {
   543  					t.Errorf("%s decreased: %f < %f", name, new, old)
   544  				}
   545  			case metrics.KindFloat64Histogram:
   546  				new := sum(vNew.Float64Histogram().Counts)
   547  				old := sum(vOld.Float64Histogram().Counts)
   548  				if new < old {
   549  					t.Errorf("%s counts decreased: %d < %d", name, new, old)
   550  				}
   551  			}
   552  		}
   553  	}
   554  	close(done)
   555  
   556  	wg.Wait()
   557  }
   558  
   559  func withinEpsilon(v1, v2, e float64) bool {
   560  	return v2-v2*e <= v1 && v1 <= v2+v2*e
   561  }
   562  
   563  func TestMutexWaitTimeMetric(t *testing.T) {
   564  	var sample [1]metrics.Sample
   565  	sample[0].Name = "/sync/mutex/wait/total:seconds"
   566  
   567  	locks := []locker2{
   568  		new(mutex),
   569  		new(rwmutexWrite),
   570  		new(rwmutexReadWrite),
   571  		new(rwmutexWriteRead),
   572  	}
   573  	for _, lock := range locks {
   574  		t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) {
   575  			metrics.Read(sample[:])
   576  			before := time.Duration(sample[0].Value.Float64() * 1e9)
   577  
   578  			minMutexWaitTime := generateMutexWaitTime(lock)
   579  
   580  			metrics.Read(sample[:])
   581  			after := time.Duration(sample[0].Value.Float64() * 1e9)
   582  
   583  			if wt := after - before; wt < minMutexWaitTime {
   584  				t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime)
   585  			}
   586  		})
   587  	}
   588  }
   589  
   590  // locker2 represents an API surface of two concurrent goroutines
   591  // locking the same resource, but through different APIs. It's intended
   592  // to abstract over the relationship of two Lock calls or an RLock
   593  // and a Lock call.
   594  type locker2 interface {
   595  	Lock1()
   596  	Unlock1()
   597  	Lock2()
   598  	Unlock2()
   599  }
   600  
   601  type mutex struct {
   602  	mu sync.Mutex
   603  }
   604  
   605  func (m *mutex) Lock1()   { m.mu.Lock() }
   606  func (m *mutex) Unlock1() { m.mu.Unlock() }
   607  func (m *mutex) Lock2()   { m.mu.Lock() }
   608  func (m *mutex) Unlock2() { m.mu.Unlock() }
   609  
   610  type rwmutexWrite struct {
   611  	mu sync.RWMutex
   612  }
   613  
   614  func (m *rwmutexWrite) Lock1()   { m.mu.Lock() }
   615  func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() }
   616  func (m *rwmutexWrite) Lock2()   { m.mu.Lock() }
   617  func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() }
   618  
   619  type rwmutexReadWrite struct {
   620  	mu sync.RWMutex
   621  }
   622  
   623  func (m *rwmutexReadWrite) Lock1()   { m.mu.RLock() }
   624  func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() }
   625  func (m *rwmutexReadWrite) Lock2()   { m.mu.Lock() }
   626  func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() }
   627  
   628  type rwmutexWriteRead struct {
   629  	mu sync.RWMutex
   630  }
   631  
   632  func (m *rwmutexWriteRead) Lock1()   { m.mu.Lock() }
   633  func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() }
   634  func (m *rwmutexWriteRead) Lock2()   { m.mu.RLock() }
   635  func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() }
   636  
   637  // generateMutexWaitTime causes a couple of goroutines
   638  // to block a whole bunch of times on a sync.Mutex, returning
   639  // the minimum amount of time that should be visible in the
   640  // /sync/mutex-wait:seconds metric.
   641  func generateMutexWaitTime(mu locker2) time.Duration {
   642  	// Set up the runtime to always track casgstatus transitions for metrics.
   643  	*runtime.CasGStatusAlwaysTrack = true
   644  
   645  	mu.Lock1()
   646  
   647  	// Start up a goroutine to wait on the lock.
   648  	gc := make(chan *runtime.G)
   649  	done := make(chan bool)
   650  	go func() {
   651  		gc <- runtime.Getg()
   652  
   653  		for {
   654  			mu.Lock2()
   655  			mu.Unlock2()
   656  			if <-done {
   657  				return
   658  			}
   659  		}
   660  	}()
   661  	gp := <-gc
   662  
   663  	// Set the block time high enough so that it will always show up, even
   664  	// on systems with coarse timer granularity.
   665  	const blockTime = 100 * time.Millisecond
   666  
   667  	// Make sure the goroutine spawned above actually blocks on the lock.
   668  	for {
   669  		if runtime.GIsWaitingOnMutex(gp) {
   670  			break
   671  		}
   672  		runtime.Gosched()
   673  	}
   674  
   675  	// Let some amount of time pass.
   676  	time.Sleep(blockTime)
   677  
   678  	// Let the other goroutine acquire the lock.
   679  	mu.Unlock1()
   680  	done <- true
   681  
   682  	// Reset flag.
   683  	*runtime.CasGStatusAlwaysTrack = false
   684  	return blockTime
   685  }
   686  
   687  // See issue #60276.
   688  func TestCPUMetricsSleep(t *testing.T) {
   689  	if runtime.GOOS == "wasip1" {
   690  		// Since wasip1 busy-waits in the scheduler, there's no meaningful idle
   691  		// time. This is accurately reflected in the metrics, but it means this
   692  		// test is basically meaningless on this platform.
   693  		t.Skip("wasip1 currently busy-waits in idle time; test not applicable")
   694  	}
   695  
   696  	names := []string{
   697  		"/cpu/classes/idle:cpu-seconds",
   698  
   699  		"/cpu/classes/gc/mark/assist:cpu-seconds",
   700  		"/cpu/classes/gc/mark/dedicated:cpu-seconds",
   701  		"/cpu/classes/gc/mark/idle:cpu-seconds",
   702  		"/cpu/classes/gc/pause:cpu-seconds",
   703  		"/cpu/classes/gc/total:cpu-seconds",
   704  		"/cpu/classes/scavenge/assist:cpu-seconds",
   705  		"/cpu/classes/scavenge/background:cpu-seconds",
   706  		"/cpu/classes/scavenge/total:cpu-seconds",
   707  		"/cpu/classes/total:cpu-seconds",
   708  		"/cpu/classes/user:cpu-seconds",
   709  	}
   710  	prep := func() []metrics.Sample {
   711  		mm := make([]metrics.Sample, len(names))
   712  		for i := range names {
   713  			mm[i].Name = names[i]
   714  		}
   715  		return mm
   716  	}
   717  	m1, m2 := prep(), prep()
   718  
   719  	const (
   720  		// Expected time spent idle.
   721  		dur = 100 * time.Millisecond
   722  
   723  		// maxFailures is the number of consecutive failures requires to cause the test to fail.
   724  		maxFailures = 10
   725  	)
   726  
   727  	failureIdleTimes := make([]float64, 0, maxFailures)
   728  
   729  	// If the bug we expect is happening, then the Sleep CPU time will be accounted for
   730  	// as user time rather than idle time. In an ideal world we'd expect the whole application
   731  	// to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that
   732  	// duration. However, the Go runtime can easily eat into idle time while this goroutine is
   733  	// blocked in a sleep. For example, slow platforms might spend more time expected in the
   734  	// scheduler. Another example is that a Go runtime background goroutine could run while
   735  	// everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough
   736  	// time may pass such that the goroutine is ready to wake, even though the runtime couldn't
   737  	// observe itself as idle with nanotime.
   738  	//
   739  	// To deal with all this, we give a half-proc's worth of leniency.
   740  	//
   741  	// We also retry multiple times to deal with the fact that the OS might deschedule us before
   742  	// we yield and go idle. That has a rare enough chance that retries should resolve it.
   743  	// If the issue we expect is happening, it should be persistent.
   744  	minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5)
   745  
   746  	// Let's make sure there's no background scavenge work to do.
   747  	//
   748  	// The runtime.GC calls below ensure the background sweeper
   749  	// will not run during the idle period.
   750  	debug.FreeOSMemory()
   751  
   752  	for retries := 0; retries < maxFailures; retries++ {
   753  		// Read 1.
   754  		runtime.GC() // Update /cpu/classes metrics.
   755  		metrics.Read(m1)
   756  
   757  		// Sleep.
   758  		time.Sleep(dur)
   759  
   760  		// Read 2.
   761  		runtime.GC() // Update /cpu/classes metrics.
   762  		metrics.Read(m2)
   763  
   764  		dt := m2[0].Value.Float64() - m1[0].Value.Float64()
   765  		if dt >= minIdleCPUSeconds {
   766  			// All is well. Test passed.
   767  			return
   768  		}
   769  		failureIdleTimes = append(failureIdleTimes, dt)
   770  		// Try again.
   771  	}
   772  
   773  	// We couldn't observe the expected idle time even once.
   774  	for i, dt := range failureIdleTimes {
   775  		t.Logf("try %2d: idle time = %.5fs\n", i+1, dt)
   776  	}
   777  	t.Logf("try %d breakdown:\n", len(failureIdleTimes))
   778  	for i := range names {
   779  		if m1[i].Value.Kind() == metrics.KindBad {
   780  			continue
   781  		}
   782  		t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
   783  	}
   784  	t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds)
   785  }
   786  
   787  // Call f() and verify that the correct STW metrics increment. If isGC is true,
   788  // fn triggers a GC STW. Otherwise, fn triggers an other STW.
   789  func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) {
   790  	m := []metrics.Sample{
   791  		{Name: "/sched/pauses/stopping/gc:seconds"},
   792  		{Name: "/sched/pauses/stopping/other:seconds"},
   793  		{Name: "/sched/pauses/total/gc:seconds"},
   794  		{Name: "/sched/pauses/total/other:seconds"},
   795  	}
   796  
   797  	stoppingGC := &m[0]
   798  	stoppingOther := &m[1]
   799  	totalGC := &m[2]
   800  	totalOther := &m[3]
   801  
   802  	sampleCount := func(s *metrics.Sample) uint64 {
   803  		h := s.Value.Float64Histogram()
   804  
   805  		var n uint64
   806  		for _, c := range h.Counts {
   807  			n += c
   808  		}
   809  		return n
   810  	}
   811  
   812  	// Read baseline.
   813  	metrics.Read(m)
   814  
   815  	baselineStartGC := sampleCount(stoppingGC)
   816  	baselineStartOther := sampleCount(stoppingOther)
   817  	baselineTotalGC := sampleCount(totalGC)
   818  	baselineTotalOther := sampleCount(totalOther)
   819  
   820  	fn(t)
   821  
   822  	metrics.Read(m)
   823  
   824  	if isGC {
   825  		if got := sampleCount(stoppingGC); got <= baselineStartGC {
   826  			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC)
   827  		}
   828  		if got := sampleCount(totalGC); got <= baselineTotalGC {
   829  			t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC)
   830  		}
   831  
   832  		if got := sampleCount(stoppingOther); got != baselineStartOther {
   833  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther)
   834  		}
   835  		if got := sampleCount(totalOther); got != baselineTotalOther {
   836  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther)
   837  		}
   838  	} else {
   839  		if got := sampleCount(stoppingGC); got != baselineStartGC {
   840  			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC)
   841  		}
   842  		if got := sampleCount(totalGC); got != baselineTotalGC {
   843  			t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC)
   844  		}
   845  
   846  		if got := sampleCount(stoppingOther); got <= baselineStartOther {
   847  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther)
   848  		}
   849  		if got := sampleCount(totalOther); got <= baselineTotalOther {
   850  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther)
   851  		}
   852  	}
   853  }
   854  
   855  func TestSchedPauseMetrics(t *testing.T) {
   856  	tests := []struct {
   857  		name string
   858  		isGC bool
   859  		fn   func(t *testing.T)
   860  	}{
   861  		{
   862  			name: "runtime.GC",
   863  			isGC: true,
   864  			fn: func(t *testing.T) {
   865  				runtime.GC()
   866  			},
   867  		},
   868  		{
   869  			name: "runtime.GOMAXPROCS",
   870  			fn: func(t *testing.T) {
   871  				if runtime.GOARCH == "wasm" {
   872  					t.Skip("GOMAXPROCS >1 not supported on wasm")
   873  				}
   874  
   875  				n := runtime.GOMAXPROCS(0)
   876  				defer runtime.GOMAXPROCS(n)
   877  
   878  				runtime.GOMAXPROCS(n + 1)
   879  			},
   880  		},
   881  		{
   882  			name: "runtime.GoroutineProfile",
   883  			fn: func(t *testing.T) {
   884  				var s [1]runtime.StackRecord
   885  				runtime.GoroutineProfile(s[:])
   886  			},
   887  		},
   888  		{
   889  			name: "runtime.ReadMemStats",
   890  			fn: func(t *testing.T) {
   891  				var mstats runtime.MemStats
   892  				runtime.ReadMemStats(&mstats)
   893  			},
   894  		},
   895  		{
   896  			name: "runtime.Stack",
   897  			fn: func(t *testing.T) {
   898  				var b [64]byte
   899  				runtime.Stack(b[:], true)
   900  			},
   901  		},
   902  		{
   903  			name: "runtime/debug.WriteHeapDump",
   904  			fn: func(t *testing.T) {
   905  				if runtime.GOOS == "js" {
   906  					t.Skip("WriteHeapDump not supported on js")
   907  				}
   908  
   909  				f, err := os.CreateTemp(t.TempDir(), "heapdumptest")
   910  				if err != nil {
   911  					t.Fatalf("os.CreateTemp failed: %v", err)
   912  				}
   913  				defer os.Remove(f.Name())
   914  				defer f.Close()
   915  				debug.WriteHeapDump(f.Fd())
   916  			},
   917  		},
   918  		{
   919  			name: "runtime/trace.Start",
   920  			fn: func(t *testing.T) {
   921  				if trace.IsEnabled() {
   922  					t.Skip("tracing already enabled")
   923  				}
   924  
   925  				var buf bytes.Buffer
   926  				if err := trace.Start(&buf); err != nil {
   927  					t.Errorf("trace.Start err got %v want nil", err)
   928  				}
   929  				trace.Stop()
   930  			},
   931  		},
   932  	}
   933  
   934  	// These tests count STW pauses, classified based on whether they're related
   935  	// to the GC or not. Disable automatic GC cycles during the test so we don't
   936  	// have an incidental GC pause when we're trying to observe only
   937  	// non-GC-related pauses. This is especially important for the
   938  	// runtime/trace.Start test, since (as of this writing) that will block
   939  	// until any active GC mark phase completes.
   940  	defer debug.SetGCPercent(debug.SetGCPercent(-1))
   941  	runtime.GC()
   942  
   943  	for _, tc := range tests {
   944  		t.Run(tc.name, func(t *testing.T) {
   945  			testSchedPauseMetrics(t, tc.fn, tc.isGC)
   946  		})
   947  	}
   948  }
   949  
   950  func TestRuntimeLockMetricsAndProfile(t *testing.T) {
   951  	testenv.SkipFlaky(t, 64253)
   952  
   953  	old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests
   954  	defer runtime.SetMutexProfileFraction(old)
   955  	if old != 0 {
   956  		t.Fatalf("need MutexProfileRate 0, got %d", old)
   957  	}
   958  
   959  	{
   960  		before := os.Getenv("GODEBUG")
   961  		for _, s := range strings.Split(before, ",") {
   962  			if strings.HasPrefix(s, "runtimecontentionstacks=") {
   963  				t.Logf("GODEBUG includes explicit setting %q", s)
   964  			}
   965  		}
   966  		defer func() { os.Setenv("GODEBUG", before) }()
   967  		os.Setenv("GODEBUG", fmt.Sprintf("%s,runtimecontentionstacks=1", before))
   968  	}
   969  
   970  	t.Logf("NumCPU %d", runtime.NumCPU())
   971  	t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0))
   972  	if minCPU := 2; runtime.NumCPU() < minCPU {
   973  		t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU)
   974  	}
   975  
   976  	loadProfile := func(t *testing.T) *profile.Profile {
   977  		var w bytes.Buffer
   978  		pprof.Lookup("mutex").WriteTo(&w, 0)
   979  		p, err := profile.Parse(&w)
   980  		if err != nil {
   981  			t.Fatalf("failed to parse profile: %v", err)
   982  		}
   983  		if err := p.CheckValid(); err != nil {
   984  			t.Fatalf("invalid profile: %v", err)
   985  		}
   986  		return p
   987  	}
   988  
   989  	measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) {
   990  		beforeProfile := loadProfile(t)
   991  		beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
   992  		metrics.Read(beforeMetrics)
   993  
   994  		fn()
   995  
   996  		afterProfile := loadProfile(t)
   997  		afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
   998  		metrics.Read(afterMetrics)
   999  
  1000  		sumSamples := func(p *profile.Profile, i int) int64 {
  1001  			var sum int64
  1002  			for _, s := range p.Sample {
  1003  				sum += s.Value[i]
  1004  			}
  1005  			return sum
  1006  		}
  1007  
  1008  		metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64()
  1009  		profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds()
  1010  
  1011  		// The internal/profile package does not support compaction; this delta
  1012  		// profile will include separate positive and negative entries.
  1013  		p = afterProfile.Copy()
  1014  		if len(beforeProfile.Sample) > 0 {
  1015  			err := p.Merge(beforeProfile, -1)
  1016  			if err != nil {
  1017  				t.Fatalf("Merge profiles: %v", err)
  1018  			}
  1019  		}
  1020  
  1021  		return metricGrowth, profileGrowth, p
  1022  	}
  1023  
  1024  	testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
  1025  		return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
  1026  			metricGrowth, profileGrowth, p := measureDelta(t, func() {
  1027  				var started, stopped sync.WaitGroup
  1028  				started.Add(workers)
  1029  				stopped.Add(workers)
  1030  				for i := 0; i < workers; i++ {
  1031  					w := &contentionWorker{
  1032  						before: func() {
  1033  							started.Done()
  1034  							started.Wait()
  1035  						},
  1036  						after: func() {
  1037  							stopped.Done()
  1038  						},
  1039  						fn: fn,
  1040  					}
  1041  					go w.run()
  1042  				}
  1043  				stopped.Wait()
  1044  			})
  1045  
  1046  			if profileGrowth == 0 {
  1047  				t.Errorf("no increase in mutex profile")
  1048  			}
  1049  			if metricGrowth == 0 && strictTiming {
  1050  				// If the critical section is very short, systems with low timer
  1051  				// resolution may be unable to measure it via nanotime.
  1052  				t.Errorf("no increase in /sync/mutex/wait/total:seconds metric")
  1053  			}
  1054  			// This comparison is possible because the time measurements in support of
  1055  			// runtime/pprof and runtime/metrics for runtime-internal locks are so close
  1056  			// together. It doesn't work as well for user-space contention, where the
  1057  			// involved goroutines are not _Grunnable the whole time and so need to pass
  1058  			// through the scheduler.
  1059  			t.Logf("lock contention growth in runtime/pprof's view  (%fs)", profileGrowth)
  1060  			t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
  1061  
  1062  			acceptStacks = append([][]string(nil), acceptStacks...)
  1063  			for i, stk := range acceptStacks {
  1064  				if goexperiment.StaticLockRanking {
  1065  					if !slices.ContainsFunc(stk, func(s string) bool {
  1066  						return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart"
  1067  					}) {
  1068  						// stk is a call stack that is still on the user stack when
  1069  						// it calls runtime.unlock. Add the extra function that
  1070  						// we'll see, when the static lock ranking implementation of
  1071  						// runtime.unlockWithRank switches to the system stack.
  1072  						stk = append([]string{"runtime.unlockWithRank"}, stk...)
  1073  					}
  1074  				}
  1075  				acceptStacks[i] = stk
  1076  			}
  1077  
  1078  			var stks [][]string
  1079  			values := make([][2]int64, len(acceptStacks))
  1080  			for _, s := range p.Sample {
  1081  				var have []string
  1082  				for _, loc := range s.Location {
  1083  					for _, line := range loc.Line {
  1084  						have = append(have, line.Function.Name)
  1085  					}
  1086  				}
  1087  				stks = append(stks, have)
  1088  				for i, stk := range acceptStacks {
  1089  					if slices.Equal(have, stk) {
  1090  						values[i][0] += s.Value[0]
  1091  						values[i][1] += s.Value[1]
  1092  					}
  1093  				}
  1094  			}
  1095  			for i, stk := range acceptStacks {
  1096  				n += values[i][0]
  1097  				value += values[i][1]
  1098  				t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
  1099  			}
  1100  			if n == 0 && value == 0 {
  1101  				t.Logf("profile:\n%s", p)
  1102  				for _, have := range stks {
  1103  					t.Logf("have stack %v", have)
  1104  				}
  1105  				for _, stk := range acceptStacks {
  1106  					t.Errorf("want stack %v", stk)
  1107  				}
  1108  			}
  1109  
  1110  			return metricGrowth, profileGrowth, n, value
  1111  		}
  1112  	}
  1113  
  1114  	name := t.Name()
  1115  
  1116  	t.Run("runtime.lock", func(t *testing.T) {
  1117  		mus := make([]runtime.Mutex, 100)
  1118  		var needContention atomic.Int64
  1119  		delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
  1120  		delayMicros := delay.Microseconds()
  1121  
  1122  		// The goroutine that acquires the lock will only proceed when it
  1123  		// detects that its partner is contended for the lock. That will lead to
  1124  		// live-lock if anything (such as a STW) prevents the partner goroutine
  1125  		// from running. Allowing the contention workers to pause and restart
  1126  		// (to allow a STW to proceed) makes it harder to confirm that we're
  1127  		// counting the correct number of contention events, since some locks
  1128  		// will end up contended twice. Instead, disable the GC.
  1129  		defer debug.SetGCPercent(debug.SetGCPercent(-1))
  1130  
  1131  		const workers = 2
  1132  		if runtime.GOMAXPROCS(0) < workers {
  1133  			t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers)
  1134  		}
  1135  
  1136  		fn := func() bool {
  1137  			n := int(needContention.Load())
  1138  			if n < 0 {
  1139  				return false
  1140  			}
  1141  			mu := &mus[n]
  1142  
  1143  			runtime.Lock(mu)
  1144  			for int(needContention.Load()) == n {
  1145  				if runtime.MutexContended(mu) {
  1146  					// make them wait a little while
  1147  					for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
  1148  						runtime.Usleep(uint32(delayMicros))
  1149  					}
  1150  					break
  1151  				}
  1152  			}
  1153  			runtime.Unlock(mu)
  1154  			needContention.Store(int64(n - 1))
  1155  
  1156  			return true
  1157  		}
  1158  
  1159  		stks := [][]string{{
  1160  			"runtime.unlock",
  1161  			"runtime_test." + name + ".func5.1",
  1162  			"runtime_test.(*contentionWorker).run",
  1163  		}}
  1164  
  1165  		t.Run("sample-1", func(t *testing.T) {
  1166  			old := runtime.SetMutexProfileFraction(1)
  1167  			defer runtime.SetMutexProfileFraction(old)
  1168  
  1169  			needContention.Store(int64(len(mus) - 1))
  1170  			metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t)
  1171  
  1172  			if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want {
  1173  				// The test imposes a delay with usleep, verified with calls to
  1174  				// nanotime. Compare against the runtime/metrics package's view
  1175  				// (based on nanotime) rather than runtime/pprof's view (based
  1176  				// on cputicks).
  1177  				t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want)
  1178  			}
  1179  			if have, want := n, int64(len(mus)); have != want {
  1180  				t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
  1181  			}
  1182  
  1183  			const slop = 1.5 // account for nanotime vs cputicks
  1184  			if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth {
  1185  				t.Errorf("views differ by more than %fx", slop)
  1186  			}
  1187  		})
  1188  
  1189  		t.Run("sample-2", func(t *testing.T) {
  1190  			old := runtime.SetMutexProfileFraction(2)
  1191  			defer runtime.SetMutexProfileFraction(old)
  1192  
  1193  			needContention.Store(int64(len(mus) - 1))
  1194  			metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t)
  1195  
  1196  			// With 100 trials and profile fraction of 2, we expect to capture
  1197  			// 50 samples. Allow the test to pass if we get at least 20 samples;
  1198  			// the CDF of the binomial distribution says there's less than a
  1199  			// 1e-9 chance of that, which is an acceptably low flakiness rate.
  1200  			const samplingSlop = 2.5
  1201  
  1202  			if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want {
  1203  				// The test imposes a delay with usleep, verified with calls to
  1204  				// nanotime. Compare against the runtime/metrics package's view
  1205  				// (based on nanotime) rather than runtime/pprof's view (based
  1206  				// on cputicks).
  1207  				t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want)
  1208  			}
  1209  			if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop {
  1210  				t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
  1211  			}
  1212  
  1213  			const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
  1214  			if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth {
  1215  				t.Errorf("views differ by more than %fx", timerSlop)
  1216  			}
  1217  		})
  1218  	})
  1219  
  1220  	t.Run("runtime.semrelease", func(t *testing.T) {
  1221  		old := runtime.SetMutexProfileFraction(1)
  1222  		defer runtime.SetMutexProfileFraction(old)
  1223  
  1224  		const workers = 3
  1225  		if runtime.GOMAXPROCS(0) < workers {
  1226  			t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers)
  1227  		}
  1228  
  1229  		var sem uint32 = 1
  1230  		var tries atomic.Int32
  1231  		tries.Store(10_000_000) // prefer controlled failure to timeout
  1232  		var sawContention atomic.Int32
  1233  		var need int32 = 1
  1234  		fn := func() bool {
  1235  			if sawContention.Load() >= need {
  1236  				return false
  1237  			}
  1238  			if tries.Add(-1) < 0 {
  1239  				return false
  1240  			}
  1241  
  1242  			runtime.Semacquire(&sem)
  1243  			runtime.Semrelease1(&sem, false, 0)
  1244  			if runtime.MutexContended(runtime.SemRootLock(&sem)) {
  1245  				sawContention.Add(1)
  1246  			}
  1247  			return true
  1248  		}
  1249  
  1250  		stks := [][]string{
  1251  			{
  1252  				"runtime.unlock",
  1253  				"runtime.semrelease1",
  1254  				"runtime_test.TestRuntimeLockMetricsAndProfile.func6.1",
  1255  				"runtime_test.(*contentionWorker).run",
  1256  			},
  1257  			{
  1258  				"runtime.unlock",
  1259  				"runtime.semacquire1",
  1260  				"runtime.semacquire",
  1261  				"runtime_test.TestRuntimeLockMetricsAndProfile.func6.1",
  1262  				"runtime_test.(*contentionWorker).run",
  1263  			},
  1264  		}
  1265  
  1266  		// Verify that we get call stack we expect, with anything more than zero
  1267  		// cycles / zero samples. The duration of each contention event is too
  1268  		// small relative to the expected overhead for us to verify its value
  1269  		// more directly. Leave that to the explicit lock/unlock test.
  1270  
  1271  		testcase(false, stks, workers, fn)(t)
  1272  
  1273  		if remaining := tries.Load(); remaining >= 0 {
  1274  			t.Logf("finished test early (%d tries remaining)", remaining)
  1275  		}
  1276  	})
  1277  }
  1278  
  1279  // contentionWorker provides cleaner call stacks for lock contention profile tests
  1280  type contentionWorker struct {
  1281  	before func()
  1282  	fn     func() bool
  1283  	after  func()
  1284  }
  1285  
  1286  func (w *contentionWorker) run() {
  1287  	defer w.after()
  1288  	w.before()
  1289  
  1290  	for w.fn() {
  1291  	}
  1292  }