github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/metrics_test.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime_test 6 7 import ( 8 "bytes" 9 "fmt" 10 "internal/goexperiment" 11 "internal/profile" 12 "internal/testenv" 13 "os" 14 "reflect" 15 "runtime" 16 "runtime/debug" 17 "runtime/metrics" 18 "runtime/pprof" 19 "runtime/trace" 20 "slices" 21 "sort" 22 "strings" 23 "sync" 24 "sync/atomic" 25 "testing" 26 "time" 27 "unsafe" 28 ) 29 30 func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) { 31 all := metrics.All() 32 samples := make([]metrics.Sample, len(all)) 33 descs := make(map[string]metrics.Description) 34 for i := range all { 35 samples[i].Name = all[i].Name 36 descs[all[i].Name] = all[i] 37 } 38 return descs, samples 39 } 40 41 func TestReadMetrics(t *testing.T) { 42 // Run a GC cycle to get some of the stats to be non-zero. 43 runtime.GC() 44 45 // Set an arbitrary memory limit to check the metric for it 46 limit := int64(512 * 1024 * 1024) 47 oldLimit := debug.SetMemoryLimit(limit) 48 defer debug.SetMemoryLimit(oldLimit) 49 50 // Set a GC percent to check the metric for it 51 gcPercent := 99 52 oldGCPercent := debug.SetGCPercent(gcPercent) 53 defer debug.SetGCPercent(oldGCPercent) 54 55 // Tests whether readMetrics produces values aligning 56 // with ReadMemStats while the world is stopped. 57 var mstats runtime.MemStats 58 _, samples := prepareAllMetricsSamples() 59 runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples)) 60 61 checkUint64 := func(t *testing.T, m string, got, want uint64) { 62 t.Helper() 63 if got != want { 64 t.Errorf("metric %q: got %d, want %d", m, got, want) 65 } 66 } 67 68 // Check to make sure the values we read line up with other values we read. 69 var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram 70 var tinyAllocs uint64 71 var mallocs, frees uint64 72 for i := range samples { 73 switch name := samples[i].Name; name { 74 case "/cgo/go-to-c-calls:calls": 75 checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall())) 76 case "/memory/classes/heap/free:bytes": 77 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased) 78 case "/memory/classes/heap/released:bytes": 79 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased) 80 case "/memory/classes/heap/objects:bytes": 81 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc) 82 case "/memory/classes/heap/unused:bytes": 83 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc) 84 case "/memory/classes/heap/stacks:bytes": 85 checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse) 86 case "/memory/classes/metadata/mcache/free:bytes": 87 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse) 88 case "/memory/classes/metadata/mcache/inuse:bytes": 89 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse) 90 case "/memory/classes/metadata/mspan/free:bytes": 91 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse) 92 case "/memory/classes/metadata/mspan/inuse:bytes": 93 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse) 94 case "/memory/classes/metadata/other:bytes": 95 checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys) 96 case "/memory/classes/os-stacks:bytes": 97 checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse) 98 case "/memory/classes/other:bytes": 99 checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys) 100 case "/memory/classes/profiling/buckets:bytes": 101 checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys) 102 case "/memory/classes/total:bytes": 103 checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys) 104 case "/gc/heap/allocs-by-size:bytes": 105 hist := samples[i].Value.Float64Histogram() 106 // Skip size class 0 in BySize, because it's always empty and not represented 107 // in the histogram. 108 for i, sc := range mstats.BySize[1:] { 109 if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s { 110 t.Errorf("bucket does not match size class: got %f, want %f", b, s) 111 // The rest of the checks aren't expected to work anyway. 112 continue 113 } 114 if c, m := hist.Counts[i], sc.Mallocs; c != m { 115 t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m) 116 } 117 } 118 allocsBySize = hist 119 case "/gc/heap/allocs:bytes": 120 checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc) 121 case "/gc/heap/frees-by-size:bytes": 122 hist := samples[i].Value.Float64Histogram() 123 // Skip size class 0 in BySize, because it's always empty and not represented 124 // in the histogram. 125 for i, sc := range mstats.BySize[1:] { 126 if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s { 127 t.Errorf("bucket does not match size class: got %f, want %f", b, s) 128 // The rest of the checks aren't expected to work anyway. 129 continue 130 } 131 if c, f := hist.Counts[i], sc.Frees; c != f { 132 t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f) 133 } 134 } 135 case "/gc/heap/frees:bytes": 136 checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc) 137 case "/gc/heap/tiny/allocs:objects": 138 // Currently, MemStats adds tiny alloc count to both Mallocs AND Frees. 139 // The reason for this is because MemStats couldn't be extended at the time 140 // but there was a desire to have Mallocs at least be a little more representative, 141 // while having Mallocs - Frees still represent a live object count. 142 // Unfortunately, MemStats doesn't actually export a large allocation count, 143 // so it's impossible to pull this number out directly. 144 // 145 // Check tiny allocation count outside of this loop, by using the allocs-by-size 146 // histogram in order to figure out how many large objects there are. 147 tinyAllocs = samples[i].Value.Uint64() 148 // Because the next two metrics tests are checking against Mallocs and Frees, 149 // we can't check them directly for the same reason: we need to account for tiny 150 // allocations included in Mallocs and Frees. 151 case "/gc/heap/allocs:objects": 152 mallocs = samples[i].Value.Uint64() 153 case "/gc/heap/frees:objects": 154 frees = samples[i].Value.Uint64() 155 case "/gc/heap/live:bytes": 156 // Check for "obviously wrong" values. We can't check a stronger invariant, 157 // such as live <= HeapAlloc, because live is not 100% accurate. It's computed 158 // under racy conditions, and some objects may be double-counted (this is 159 // intentional and necessary for GC performance). 160 // 161 // Instead, check against a much more reasonable upper-bound: the amount of 162 // mapped heap memory. We can't possibly overcount to the point of exceeding 163 // total mapped heap memory, except if there's an accounting bug. 164 if live := samples[i].Value.Uint64(); live > mstats.HeapSys { 165 t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys) 166 } else if live == 0 { 167 // Might happen if we don't call runtime.GC() above. 168 t.Error("live bytes is 0") 169 } 170 case "/gc/gomemlimit:bytes": 171 checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit)) 172 case "/gc/heap/objects:objects": 173 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects) 174 case "/gc/heap/goal:bytes": 175 checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC) 176 case "/gc/gogc:percent": 177 checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent)) 178 case "/gc/cycles/automatic:gc-cycles": 179 checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC)) 180 case "/gc/cycles/forced:gc-cycles": 181 checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC)) 182 case "/gc/cycles/total:gc-cycles": 183 checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC)) 184 case "/gc/pauses:seconds": 185 gcPauses = samples[i].Value.Float64Histogram() 186 case "/sched/pauses/total/gc:seconds": 187 schedPausesTotalGC = samples[i].Value.Float64Histogram() 188 } 189 } 190 191 // Check tinyAllocs. 192 nonTinyAllocs := uint64(0) 193 for _, c := range allocsBySize.Counts { 194 nonTinyAllocs += c 195 } 196 checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs) 197 198 // Check allocation and free counts. 199 checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs) 200 checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs) 201 202 // Verify that /gc/pauses:seconds is a copy of /sched/pauses/total/gc:seconds 203 if !reflect.DeepEqual(gcPauses.Buckets, schedPausesTotalGC.Buckets) { 204 t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts) 205 } 206 if !reflect.DeepEqual(gcPauses.Counts, schedPausesTotalGC.Counts) { 207 t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts) 208 } 209 } 210 211 func TestReadMetricsConsistency(t *testing.T) { 212 // Tests whether readMetrics produces consistent, sensible values. 213 // The values are read concurrently with the runtime doing other 214 // things (e.g. allocating) so what we read can't reasonably compared 215 // to other runtime values (e.g. MemStats). 216 217 // Run a few GC cycles to get some of the stats to be non-zero. 218 runtime.GC() 219 runtime.GC() 220 runtime.GC() 221 222 // Set GOMAXPROCS high then sleep briefly to ensure we generate 223 // some idle time. 224 oldmaxprocs := runtime.GOMAXPROCS(10) 225 time.Sleep(time.Millisecond) 226 runtime.GOMAXPROCS(oldmaxprocs) 227 228 // Read all the supported metrics through the metrics package. 229 descs, samples := prepareAllMetricsSamples() 230 metrics.Read(samples) 231 232 // Check to make sure the values we read make sense. 233 var totalVirtual struct { 234 got, want uint64 235 } 236 var objects struct { 237 alloc, free *metrics.Float64Histogram 238 allocs, frees uint64 239 allocdBytes, freedBytes uint64 240 total, totalBytes uint64 241 } 242 var gc struct { 243 numGC uint64 244 pauses uint64 245 } 246 var totalScan struct { 247 got, want uint64 248 } 249 var cpu struct { 250 gcAssist float64 251 gcDedicated float64 252 gcIdle float64 253 gcPause float64 254 gcTotal float64 255 256 idle float64 257 user float64 258 259 scavengeAssist float64 260 scavengeBg float64 261 scavengeTotal float64 262 263 total float64 264 } 265 for i := range samples { 266 kind := samples[i].Value.Kind() 267 if want := descs[samples[i].Name].Kind; kind != want { 268 t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want) 269 continue 270 } 271 if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") { 272 v := samples[i].Value.Uint64() 273 totalVirtual.want += v 274 275 // None of these stats should ever get this big. 276 // If they do, there's probably overflow involved, 277 // usually due to bad accounting. 278 if int64(v) < 0 { 279 t.Errorf("%q has high/negative value: %d", samples[i].Name, v) 280 } 281 } 282 switch samples[i].Name { 283 case "/cpu/classes/gc/mark/assist:cpu-seconds": 284 cpu.gcAssist = samples[i].Value.Float64() 285 case "/cpu/classes/gc/mark/dedicated:cpu-seconds": 286 cpu.gcDedicated = samples[i].Value.Float64() 287 case "/cpu/classes/gc/mark/idle:cpu-seconds": 288 cpu.gcIdle = samples[i].Value.Float64() 289 case "/cpu/classes/gc/pause:cpu-seconds": 290 cpu.gcPause = samples[i].Value.Float64() 291 case "/cpu/classes/gc/total:cpu-seconds": 292 cpu.gcTotal = samples[i].Value.Float64() 293 case "/cpu/classes/idle:cpu-seconds": 294 cpu.idle = samples[i].Value.Float64() 295 case "/cpu/classes/scavenge/assist:cpu-seconds": 296 cpu.scavengeAssist = samples[i].Value.Float64() 297 case "/cpu/classes/scavenge/background:cpu-seconds": 298 cpu.scavengeBg = samples[i].Value.Float64() 299 case "/cpu/classes/scavenge/total:cpu-seconds": 300 cpu.scavengeTotal = samples[i].Value.Float64() 301 case "/cpu/classes/total:cpu-seconds": 302 cpu.total = samples[i].Value.Float64() 303 case "/cpu/classes/user:cpu-seconds": 304 cpu.user = samples[i].Value.Float64() 305 case "/memory/classes/total:bytes": 306 totalVirtual.got = samples[i].Value.Uint64() 307 case "/memory/classes/heap/objects:bytes": 308 objects.totalBytes = samples[i].Value.Uint64() 309 case "/gc/heap/objects:objects": 310 objects.total = samples[i].Value.Uint64() 311 case "/gc/heap/allocs:bytes": 312 objects.allocdBytes = samples[i].Value.Uint64() 313 case "/gc/heap/allocs:objects": 314 objects.allocs = samples[i].Value.Uint64() 315 case "/gc/heap/allocs-by-size:bytes": 316 objects.alloc = samples[i].Value.Float64Histogram() 317 case "/gc/heap/frees:bytes": 318 objects.freedBytes = samples[i].Value.Uint64() 319 case "/gc/heap/frees:objects": 320 objects.frees = samples[i].Value.Uint64() 321 case "/gc/heap/frees-by-size:bytes": 322 objects.free = samples[i].Value.Float64Histogram() 323 case "/gc/cycles:gc-cycles": 324 gc.numGC = samples[i].Value.Uint64() 325 case "/gc/pauses:seconds": 326 h := samples[i].Value.Float64Histogram() 327 gc.pauses = 0 328 for i := range h.Counts { 329 gc.pauses += h.Counts[i] 330 } 331 case "/gc/scan/heap:bytes": 332 totalScan.want += samples[i].Value.Uint64() 333 case "/gc/scan/globals:bytes": 334 totalScan.want += samples[i].Value.Uint64() 335 case "/gc/scan/stack:bytes": 336 totalScan.want += samples[i].Value.Uint64() 337 case "/gc/scan/total:bytes": 338 totalScan.got = samples[i].Value.Uint64() 339 case "/sched/gomaxprocs:threads": 340 if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want { 341 t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want) 342 } 343 case "/sched/goroutines:goroutines": 344 if samples[i].Value.Uint64() < 1 { 345 t.Error("number of goroutines is less than one") 346 } 347 } 348 } 349 // Only check this on Linux where we can be reasonably sure we have a high-resolution timer. 350 if runtime.GOOS == "linux" { 351 if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 { 352 t.Errorf("found no time spent on GC work: %#v", cpu) 353 } 354 if cpu.gcPause <= 0 { 355 t.Errorf("found no GC pauses: %f", cpu.gcPause) 356 } 357 if cpu.idle <= 0 { 358 t.Errorf("found no idle time: %f", cpu.idle) 359 } 360 if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.01) { 361 t.Errorf("calculated total GC CPU not within 1%% of sampled total: %f vs. %f", total, cpu.gcTotal) 362 } 363 if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.01) { 364 t.Errorf("calculated total scavenge CPU not within 1%% of sampled total: %f vs. %f", total, cpu.scavengeTotal) 365 } 366 if cpu.total <= 0 { 367 t.Errorf("found no total CPU time passed") 368 } 369 if cpu.user <= 0 { 370 t.Errorf("found no user time passed") 371 } 372 if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.02) { 373 t.Errorf("calculated total CPU not within 2%% of sampled total: %f vs. %f", total, cpu.total) 374 } 375 } 376 if totalVirtual.got != totalVirtual.want { 377 t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want) 378 } 379 if got, want := objects.allocs-objects.frees, objects.total; got != want { 380 t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want) 381 } 382 if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want { 383 t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want) 384 } 385 if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 { 386 t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c) 387 } 388 if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 { 389 t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c) 390 } 391 if len(objects.alloc.Buckets) != len(objects.free.Buckets) { 392 t.Error("allocs-by-size and frees-by-size buckets don't match in length") 393 } else if len(objects.alloc.Counts) != len(objects.free.Counts) { 394 t.Error("allocs-by-size and frees-by-size counts don't match in length") 395 } else { 396 for i := range objects.alloc.Buckets { 397 ba := objects.alloc.Buckets[i] 398 bf := objects.free.Buckets[i] 399 if ba != bf { 400 t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf) 401 } 402 } 403 if !t.Failed() { 404 var gotAlloc, gotFree uint64 405 want := objects.total 406 for i := range objects.alloc.Counts { 407 if objects.alloc.Counts[i] < objects.free.Counts[i] { 408 t.Errorf("found more allocs than frees in object dist bucket %d", i) 409 continue 410 } 411 gotAlloc += objects.alloc.Counts[i] 412 gotFree += objects.free.Counts[i] 413 } 414 if got := gotAlloc - gotFree; got != want { 415 t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want) 416 } 417 if gotAlloc != objects.allocs { 418 t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs) 419 } 420 if gotFree != objects.frees { 421 t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees) 422 } 423 } 424 } 425 // The current GC has at least 2 pauses per GC. 426 // Check to see if that value makes sense. 427 if gc.pauses < gc.numGC*2 { 428 t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2) 429 } 430 if totalScan.got <= 0 { 431 t.Errorf("scannable GC space is empty: %d", totalScan.got) 432 } 433 if totalScan.got != totalScan.want { 434 t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want) 435 } 436 } 437 438 func BenchmarkReadMetricsLatency(b *testing.B) { 439 stop := applyGCLoad(b) 440 441 // Spend this much time measuring latencies. 442 latencies := make([]time.Duration, 0, 1024) 443 _, samples := prepareAllMetricsSamples() 444 445 // Hit metrics.Read continuously and measure. 446 b.ResetTimer() 447 for i := 0; i < b.N; i++ { 448 start := time.Now() 449 metrics.Read(samples) 450 latencies = append(latencies, time.Since(start)) 451 } 452 // Make sure to stop the timer before we wait! The load created above 453 // is very heavy-weight and not easy to stop, so we could end up 454 // confusing the benchmarking framework for small b.N. 455 b.StopTimer() 456 stop() 457 458 // Disable the default */op metrics. 459 // ns/op doesn't mean anything because it's an average, but we 460 // have a sleep in our b.N loop above which skews this significantly. 461 b.ReportMetric(0, "ns/op") 462 b.ReportMetric(0, "B/op") 463 b.ReportMetric(0, "allocs/op") 464 465 // Sort latencies then report percentiles. 466 sort.Slice(latencies, func(i, j int) bool { 467 return latencies[i] < latencies[j] 468 }) 469 b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns") 470 b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns") 471 b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns") 472 } 473 474 var readMetricsSink [1024]interface{} 475 476 func TestReadMetricsCumulative(t *testing.T) { 477 // Set up the set of metrics marked cumulative. 478 descs := metrics.All() 479 var samples [2][]metrics.Sample 480 samples[0] = make([]metrics.Sample, len(descs)) 481 samples[1] = make([]metrics.Sample, len(descs)) 482 total := 0 483 for i := range samples[0] { 484 if !descs[i].Cumulative { 485 continue 486 } 487 samples[0][total].Name = descs[i].Name 488 total++ 489 } 490 samples[0] = samples[0][:total] 491 samples[1] = samples[1][:total] 492 copy(samples[1], samples[0]) 493 494 // Start some noise in the background. 495 var wg sync.WaitGroup 496 wg.Add(1) 497 done := make(chan struct{}) 498 go func() { 499 defer wg.Done() 500 for { 501 // Add more things here that could influence metrics. 502 for i := 0; i < len(readMetricsSink); i++ { 503 readMetricsSink[i] = make([]byte, 1024) 504 select { 505 case <-done: 506 return 507 default: 508 } 509 } 510 runtime.GC() 511 } 512 }() 513 514 sum := func(us []uint64) uint64 { 515 total := uint64(0) 516 for _, u := range us { 517 total += u 518 } 519 return total 520 } 521 522 // Populate the first generation. 523 metrics.Read(samples[0]) 524 525 // Check to make sure that these metrics only grow monotonically. 526 for gen := 1; gen < 10; gen++ { 527 metrics.Read(samples[gen%2]) 528 for i := range samples[gen%2] { 529 name := samples[gen%2][i].Name 530 vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value 531 532 switch vNew.Kind() { 533 case metrics.KindUint64: 534 new := vNew.Uint64() 535 old := vOld.Uint64() 536 if new < old { 537 t.Errorf("%s decreased: %d < %d", name, new, old) 538 } 539 case metrics.KindFloat64: 540 new := vNew.Float64() 541 old := vOld.Float64() 542 if new < old { 543 t.Errorf("%s decreased: %f < %f", name, new, old) 544 } 545 case metrics.KindFloat64Histogram: 546 new := sum(vNew.Float64Histogram().Counts) 547 old := sum(vOld.Float64Histogram().Counts) 548 if new < old { 549 t.Errorf("%s counts decreased: %d < %d", name, new, old) 550 } 551 } 552 } 553 } 554 close(done) 555 556 wg.Wait() 557 } 558 559 func withinEpsilon(v1, v2, e float64) bool { 560 return v2-v2*e <= v1 && v1 <= v2+v2*e 561 } 562 563 func TestMutexWaitTimeMetric(t *testing.T) { 564 var sample [1]metrics.Sample 565 sample[0].Name = "/sync/mutex/wait/total:seconds" 566 567 locks := []locker2{ 568 new(mutex), 569 new(rwmutexWrite), 570 new(rwmutexReadWrite), 571 new(rwmutexWriteRead), 572 } 573 for _, lock := range locks { 574 t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) { 575 metrics.Read(sample[:]) 576 before := time.Duration(sample[0].Value.Float64() * 1e9) 577 578 minMutexWaitTime := generateMutexWaitTime(lock) 579 580 metrics.Read(sample[:]) 581 after := time.Duration(sample[0].Value.Float64() * 1e9) 582 583 if wt := after - before; wt < minMutexWaitTime { 584 t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime) 585 } 586 }) 587 } 588 } 589 590 // locker2 represents an API surface of two concurrent goroutines 591 // locking the same resource, but through different APIs. It's intended 592 // to abstract over the relationship of two Lock calls or an RLock 593 // and a Lock call. 594 type locker2 interface { 595 Lock1() 596 Unlock1() 597 Lock2() 598 Unlock2() 599 } 600 601 type mutex struct { 602 mu sync.Mutex 603 } 604 605 func (m *mutex) Lock1() { m.mu.Lock() } 606 func (m *mutex) Unlock1() { m.mu.Unlock() } 607 func (m *mutex) Lock2() { m.mu.Lock() } 608 func (m *mutex) Unlock2() { m.mu.Unlock() } 609 610 type rwmutexWrite struct { 611 mu sync.RWMutex 612 } 613 614 func (m *rwmutexWrite) Lock1() { m.mu.Lock() } 615 func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() } 616 func (m *rwmutexWrite) Lock2() { m.mu.Lock() } 617 func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() } 618 619 type rwmutexReadWrite struct { 620 mu sync.RWMutex 621 } 622 623 func (m *rwmutexReadWrite) Lock1() { m.mu.RLock() } 624 func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() } 625 func (m *rwmutexReadWrite) Lock2() { m.mu.Lock() } 626 func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() } 627 628 type rwmutexWriteRead struct { 629 mu sync.RWMutex 630 } 631 632 func (m *rwmutexWriteRead) Lock1() { m.mu.Lock() } 633 func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() } 634 func (m *rwmutexWriteRead) Lock2() { m.mu.RLock() } 635 func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() } 636 637 // generateMutexWaitTime causes a couple of goroutines 638 // to block a whole bunch of times on a sync.Mutex, returning 639 // the minimum amount of time that should be visible in the 640 // /sync/mutex-wait:seconds metric. 641 func generateMutexWaitTime(mu locker2) time.Duration { 642 // Set up the runtime to always track casgstatus transitions for metrics. 643 *runtime.CasGStatusAlwaysTrack = true 644 645 mu.Lock1() 646 647 // Start up a goroutine to wait on the lock. 648 gc := make(chan *runtime.G) 649 done := make(chan bool) 650 go func() { 651 gc <- runtime.Getg() 652 653 for { 654 mu.Lock2() 655 mu.Unlock2() 656 if <-done { 657 return 658 } 659 } 660 }() 661 gp := <-gc 662 663 // Set the block time high enough so that it will always show up, even 664 // on systems with coarse timer granularity. 665 const blockTime = 100 * time.Millisecond 666 667 // Make sure the goroutine spawned above actually blocks on the lock. 668 for { 669 if runtime.GIsWaitingOnMutex(gp) { 670 break 671 } 672 runtime.Gosched() 673 } 674 675 // Let some amount of time pass. 676 time.Sleep(blockTime) 677 678 // Let the other goroutine acquire the lock. 679 mu.Unlock1() 680 done <- true 681 682 // Reset flag. 683 *runtime.CasGStatusAlwaysTrack = false 684 return blockTime 685 } 686 687 // See issue #60276. 688 func TestCPUMetricsSleep(t *testing.T) { 689 if runtime.GOOS == "wasip1" { 690 // Since wasip1 busy-waits in the scheduler, there's no meaningful idle 691 // time. This is accurately reflected in the metrics, but it means this 692 // test is basically meaningless on this platform. 693 t.Skip("wasip1 currently busy-waits in idle time; test not applicable") 694 } 695 696 names := []string{ 697 "/cpu/classes/idle:cpu-seconds", 698 699 "/cpu/classes/gc/mark/assist:cpu-seconds", 700 "/cpu/classes/gc/mark/dedicated:cpu-seconds", 701 "/cpu/classes/gc/mark/idle:cpu-seconds", 702 "/cpu/classes/gc/pause:cpu-seconds", 703 "/cpu/classes/gc/total:cpu-seconds", 704 "/cpu/classes/scavenge/assist:cpu-seconds", 705 "/cpu/classes/scavenge/background:cpu-seconds", 706 "/cpu/classes/scavenge/total:cpu-seconds", 707 "/cpu/classes/total:cpu-seconds", 708 "/cpu/classes/user:cpu-seconds", 709 } 710 prep := func() []metrics.Sample { 711 mm := make([]metrics.Sample, len(names)) 712 for i := range names { 713 mm[i].Name = names[i] 714 } 715 return mm 716 } 717 m1, m2 := prep(), prep() 718 719 const ( 720 // Expected time spent idle. 721 dur = 100 * time.Millisecond 722 723 // maxFailures is the number of consecutive failures requires to cause the test to fail. 724 maxFailures = 10 725 ) 726 727 failureIdleTimes := make([]float64, 0, maxFailures) 728 729 // If the bug we expect is happening, then the Sleep CPU time will be accounted for 730 // as user time rather than idle time. In an ideal world we'd expect the whole application 731 // to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that 732 // duration. However, the Go runtime can easily eat into idle time while this goroutine is 733 // blocked in a sleep. For example, slow platforms might spend more time expected in the 734 // scheduler. Another example is that a Go runtime background goroutine could run while 735 // everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough 736 // time may pass such that the goroutine is ready to wake, even though the runtime couldn't 737 // observe itself as idle with nanotime. 738 // 739 // To deal with all this, we give a half-proc's worth of leniency. 740 // 741 // We also retry multiple times to deal with the fact that the OS might deschedule us before 742 // we yield and go idle. That has a rare enough chance that retries should resolve it. 743 // If the issue we expect is happening, it should be persistent. 744 minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5) 745 746 // Let's make sure there's no background scavenge work to do. 747 // 748 // The runtime.GC calls below ensure the background sweeper 749 // will not run during the idle period. 750 debug.FreeOSMemory() 751 752 for retries := 0; retries < maxFailures; retries++ { 753 // Read 1. 754 runtime.GC() // Update /cpu/classes metrics. 755 metrics.Read(m1) 756 757 // Sleep. 758 time.Sleep(dur) 759 760 // Read 2. 761 runtime.GC() // Update /cpu/classes metrics. 762 metrics.Read(m2) 763 764 dt := m2[0].Value.Float64() - m1[0].Value.Float64() 765 if dt >= minIdleCPUSeconds { 766 // All is well. Test passed. 767 return 768 } 769 failureIdleTimes = append(failureIdleTimes, dt) 770 // Try again. 771 } 772 773 // We couldn't observe the expected idle time even once. 774 for i, dt := range failureIdleTimes { 775 t.Logf("try %2d: idle time = %.5fs\n", i+1, dt) 776 } 777 t.Logf("try %d breakdown:\n", len(failureIdleTimes)) 778 for i := range names { 779 if m1[i].Value.Kind() == metrics.KindBad { 780 continue 781 } 782 t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64()) 783 } 784 t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds) 785 } 786 787 // Call f() and verify that the correct STW metrics increment. If isGC is true, 788 // fn triggers a GC STW. Otherwise, fn triggers an other STW. 789 func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) { 790 m := []metrics.Sample{ 791 {Name: "/sched/pauses/stopping/gc:seconds"}, 792 {Name: "/sched/pauses/stopping/other:seconds"}, 793 {Name: "/sched/pauses/total/gc:seconds"}, 794 {Name: "/sched/pauses/total/other:seconds"}, 795 } 796 797 stoppingGC := &m[0] 798 stoppingOther := &m[1] 799 totalGC := &m[2] 800 totalOther := &m[3] 801 802 sampleCount := func(s *metrics.Sample) uint64 { 803 h := s.Value.Float64Histogram() 804 805 var n uint64 806 for _, c := range h.Counts { 807 n += c 808 } 809 return n 810 } 811 812 // Read baseline. 813 metrics.Read(m) 814 815 baselineStartGC := sampleCount(stoppingGC) 816 baselineStartOther := sampleCount(stoppingOther) 817 baselineTotalGC := sampleCount(totalGC) 818 baselineTotalOther := sampleCount(totalOther) 819 820 fn(t) 821 822 metrics.Read(m) 823 824 if isGC { 825 if got := sampleCount(stoppingGC); got <= baselineStartGC { 826 t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC) 827 } 828 if got := sampleCount(totalGC); got <= baselineTotalGC { 829 t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC) 830 } 831 832 if got := sampleCount(stoppingOther); got != baselineStartOther { 833 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther) 834 } 835 if got := sampleCount(totalOther); got != baselineTotalOther { 836 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther) 837 } 838 } else { 839 if got := sampleCount(stoppingGC); got != baselineStartGC { 840 t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC) 841 } 842 if got := sampleCount(totalGC); got != baselineTotalGC { 843 t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC) 844 } 845 846 if got := sampleCount(stoppingOther); got <= baselineStartOther { 847 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther) 848 } 849 if got := sampleCount(totalOther); got <= baselineTotalOther { 850 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther) 851 } 852 } 853 } 854 855 func TestSchedPauseMetrics(t *testing.T) { 856 tests := []struct { 857 name string 858 isGC bool 859 fn func(t *testing.T) 860 }{ 861 { 862 name: "runtime.GC", 863 isGC: true, 864 fn: func(t *testing.T) { 865 runtime.GC() 866 }, 867 }, 868 { 869 name: "runtime.GOMAXPROCS", 870 fn: func(t *testing.T) { 871 if runtime.GOARCH == "wasm" { 872 t.Skip("GOMAXPROCS >1 not supported on wasm") 873 } 874 875 n := runtime.GOMAXPROCS(0) 876 defer runtime.GOMAXPROCS(n) 877 878 runtime.GOMAXPROCS(n + 1) 879 }, 880 }, 881 { 882 name: "runtime.GoroutineProfile", 883 fn: func(t *testing.T) { 884 var s [1]runtime.StackRecord 885 runtime.GoroutineProfile(s[:]) 886 }, 887 }, 888 { 889 name: "runtime.ReadMemStats", 890 fn: func(t *testing.T) { 891 var mstats runtime.MemStats 892 runtime.ReadMemStats(&mstats) 893 }, 894 }, 895 { 896 name: "runtime.Stack", 897 fn: func(t *testing.T) { 898 var b [64]byte 899 runtime.Stack(b[:], true) 900 }, 901 }, 902 { 903 name: "runtime/debug.WriteHeapDump", 904 fn: func(t *testing.T) { 905 if runtime.GOOS == "js" { 906 t.Skip("WriteHeapDump not supported on js") 907 } 908 909 f, err := os.CreateTemp(t.TempDir(), "heapdumptest") 910 if err != nil { 911 t.Fatalf("os.CreateTemp failed: %v", err) 912 } 913 defer os.Remove(f.Name()) 914 defer f.Close() 915 debug.WriteHeapDump(f.Fd()) 916 }, 917 }, 918 { 919 name: "runtime/trace.Start", 920 fn: func(t *testing.T) { 921 if trace.IsEnabled() { 922 t.Skip("tracing already enabled") 923 } 924 925 var buf bytes.Buffer 926 if err := trace.Start(&buf); err != nil { 927 t.Errorf("trace.Start err got %v want nil", err) 928 } 929 trace.Stop() 930 }, 931 }, 932 } 933 934 // These tests count STW pauses, classified based on whether they're related 935 // to the GC or not. Disable automatic GC cycles during the test so we don't 936 // have an incidental GC pause when we're trying to observe only 937 // non-GC-related pauses. This is especially important for the 938 // runtime/trace.Start test, since (as of this writing) that will block 939 // until any active GC mark phase completes. 940 defer debug.SetGCPercent(debug.SetGCPercent(-1)) 941 runtime.GC() 942 943 for _, tc := range tests { 944 t.Run(tc.name, func(t *testing.T) { 945 testSchedPauseMetrics(t, tc.fn, tc.isGC) 946 }) 947 } 948 } 949 950 func TestRuntimeLockMetricsAndProfile(t *testing.T) { 951 testenv.SkipFlaky(t, 64253) 952 953 old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests 954 defer runtime.SetMutexProfileFraction(old) 955 if old != 0 { 956 t.Fatalf("need MutexProfileRate 0, got %d", old) 957 } 958 959 { 960 before := os.Getenv("GODEBUG") 961 for _, s := range strings.Split(before, ",") { 962 if strings.HasPrefix(s, "runtimecontentionstacks=") { 963 t.Logf("GODEBUG includes explicit setting %q", s) 964 } 965 } 966 defer func() { os.Setenv("GODEBUG", before) }() 967 os.Setenv("GODEBUG", fmt.Sprintf("%s,runtimecontentionstacks=1", before)) 968 } 969 970 t.Logf("NumCPU %d", runtime.NumCPU()) 971 t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0)) 972 if minCPU := 2; runtime.NumCPU() < minCPU { 973 t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU) 974 } 975 976 loadProfile := func(t *testing.T) *profile.Profile { 977 var w bytes.Buffer 978 pprof.Lookup("mutex").WriteTo(&w, 0) 979 p, err := profile.Parse(&w) 980 if err != nil { 981 t.Fatalf("failed to parse profile: %v", err) 982 } 983 if err := p.CheckValid(); err != nil { 984 t.Fatalf("invalid profile: %v", err) 985 } 986 return p 987 } 988 989 measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) { 990 beforeProfile := loadProfile(t) 991 beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} 992 metrics.Read(beforeMetrics) 993 994 fn() 995 996 afterProfile := loadProfile(t) 997 afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} 998 metrics.Read(afterMetrics) 999 1000 sumSamples := func(p *profile.Profile, i int) int64 { 1001 var sum int64 1002 for _, s := range p.Sample { 1003 sum += s.Value[i] 1004 } 1005 return sum 1006 } 1007 1008 metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64() 1009 profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds() 1010 1011 // The internal/profile package does not support compaction; this delta 1012 // profile will include separate positive and negative entries. 1013 p = afterProfile.Copy() 1014 if len(beforeProfile.Sample) > 0 { 1015 err := p.Merge(beforeProfile, -1) 1016 if err != nil { 1017 t.Fatalf("Merge profiles: %v", err) 1018 } 1019 } 1020 1021 return metricGrowth, profileGrowth, p 1022 } 1023 1024 testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { 1025 return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { 1026 metricGrowth, profileGrowth, p := measureDelta(t, func() { 1027 var started, stopped sync.WaitGroup 1028 started.Add(workers) 1029 stopped.Add(workers) 1030 for i := 0; i < workers; i++ { 1031 w := &contentionWorker{ 1032 before: func() { 1033 started.Done() 1034 started.Wait() 1035 }, 1036 after: func() { 1037 stopped.Done() 1038 }, 1039 fn: fn, 1040 } 1041 go w.run() 1042 } 1043 stopped.Wait() 1044 }) 1045 1046 if profileGrowth == 0 { 1047 t.Errorf("no increase in mutex profile") 1048 } 1049 if metricGrowth == 0 && strictTiming { 1050 // If the critical section is very short, systems with low timer 1051 // resolution may be unable to measure it via nanotime. 1052 t.Errorf("no increase in /sync/mutex/wait/total:seconds metric") 1053 } 1054 // This comparison is possible because the time measurements in support of 1055 // runtime/pprof and runtime/metrics for runtime-internal locks are so close 1056 // together. It doesn't work as well for user-space contention, where the 1057 // involved goroutines are not _Grunnable the whole time and so need to pass 1058 // through the scheduler. 1059 t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth) 1060 t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth) 1061 1062 acceptStacks = append([][]string(nil), acceptStacks...) 1063 for i, stk := range acceptStacks { 1064 if goexperiment.StaticLockRanking { 1065 if !slices.ContainsFunc(stk, func(s string) bool { 1066 return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart" 1067 }) { 1068 // stk is a call stack that is still on the user stack when 1069 // it calls runtime.unlock. Add the extra function that 1070 // we'll see, when the static lock ranking implementation of 1071 // runtime.unlockWithRank switches to the system stack. 1072 stk = append([]string{"runtime.unlockWithRank"}, stk...) 1073 } 1074 } 1075 acceptStacks[i] = stk 1076 } 1077 1078 var stks [][]string 1079 values := make([][2]int64, len(acceptStacks)) 1080 for _, s := range p.Sample { 1081 var have []string 1082 for _, loc := range s.Location { 1083 for _, line := range loc.Line { 1084 have = append(have, line.Function.Name) 1085 } 1086 } 1087 stks = append(stks, have) 1088 for i, stk := range acceptStacks { 1089 if slices.Equal(have, stk) { 1090 values[i][0] += s.Value[0] 1091 values[i][1] += s.Value[1] 1092 } 1093 } 1094 } 1095 for i, stk := range acceptStacks { 1096 n += values[i][0] 1097 value += values[i][1] 1098 t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1]) 1099 } 1100 if n == 0 && value == 0 { 1101 t.Logf("profile:\n%s", p) 1102 for _, have := range stks { 1103 t.Logf("have stack %v", have) 1104 } 1105 for _, stk := range acceptStacks { 1106 t.Errorf("want stack %v", stk) 1107 } 1108 } 1109 1110 return metricGrowth, profileGrowth, n, value 1111 } 1112 } 1113 1114 name := t.Name() 1115 1116 t.Run("runtime.lock", func(t *testing.T) { 1117 mus := make([]runtime.Mutex, 100) 1118 var needContention atomic.Int64 1119 delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks 1120 delayMicros := delay.Microseconds() 1121 1122 // The goroutine that acquires the lock will only proceed when it 1123 // detects that its partner is contended for the lock. That will lead to 1124 // live-lock if anything (such as a STW) prevents the partner goroutine 1125 // from running. Allowing the contention workers to pause and restart 1126 // (to allow a STW to proceed) makes it harder to confirm that we're 1127 // counting the correct number of contention events, since some locks 1128 // will end up contended twice. Instead, disable the GC. 1129 defer debug.SetGCPercent(debug.SetGCPercent(-1)) 1130 1131 const workers = 2 1132 if runtime.GOMAXPROCS(0) < workers { 1133 t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers) 1134 } 1135 1136 fn := func() bool { 1137 n := int(needContention.Load()) 1138 if n < 0 { 1139 return false 1140 } 1141 mu := &mus[n] 1142 1143 runtime.Lock(mu) 1144 for int(needContention.Load()) == n { 1145 if runtime.MutexContended(mu) { 1146 // make them wait a little while 1147 for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; { 1148 runtime.Usleep(uint32(delayMicros)) 1149 } 1150 break 1151 } 1152 } 1153 runtime.Unlock(mu) 1154 needContention.Store(int64(n - 1)) 1155 1156 return true 1157 } 1158 1159 stks := [][]string{{ 1160 "runtime.unlock", 1161 "runtime_test." + name + ".func5.1", 1162 "runtime_test.(*contentionWorker).run", 1163 }} 1164 1165 t.Run("sample-1", func(t *testing.T) { 1166 old := runtime.SetMutexProfileFraction(1) 1167 defer runtime.SetMutexProfileFraction(old) 1168 1169 needContention.Store(int64(len(mus) - 1)) 1170 metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t) 1171 1172 if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want { 1173 // The test imposes a delay with usleep, verified with calls to 1174 // nanotime. Compare against the runtime/metrics package's view 1175 // (based on nanotime) rather than runtime/pprof's view (based 1176 // on cputicks). 1177 t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want) 1178 } 1179 if have, want := n, int64(len(mus)); have != want { 1180 t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want) 1181 } 1182 1183 const slop = 1.5 // account for nanotime vs cputicks 1184 if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth { 1185 t.Errorf("views differ by more than %fx", slop) 1186 } 1187 }) 1188 1189 t.Run("sample-2", func(t *testing.T) { 1190 old := runtime.SetMutexProfileFraction(2) 1191 defer runtime.SetMutexProfileFraction(old) 1192 1193 needContention.Store(int64(len(mus) - 1)) 1194 metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t) 1195 1196 // With 100 trials and profile fraction of 2, we expect to capture 1197 // 50 samples. Allow the test to pass if we get at least 20 samples; 1198 // the CDF of the binomial distribution says there's less than a 1199 // 1e-9 chance of that, which is an acceptably low flakiness rate. 1200 const samplingSlop = 2.5 1201 1202 if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want { 1203 // The test imposes a delay with usleep, verified with calls to 1204 // nanotime. Compare against the runtime/metrics package's view 1205 // (based on nanotime) rather than runtime/pprof's view (based 1206 // on cputicks). 1207 t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want) 1208 } 1209 if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop { 1210 t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want) 1211 } 1212 1213 const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling 1214 if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth { 1215 t.Errorf("views differ by more than %fx", timerSlop) 1216 } 1217 }) 1218 }) 1219 1220 t.Run("runtime.semrelease", func(t *testing.T) { 1221 old := runtime.SetMutexProfileFraction(1) 1222 defer runtime.SetMutexProfileFraction(old) 1223 1224 const workers = 3 1225 if runtime.GOMAXPROCS(0) < workers { 1226 t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers) 1227 } 1228 1229 var sem uint32 = 1 1230 var tries atomic.Int32 1231 tries.Store(10_000_000) // prefer controlled failure to timeout 1232 var sawContention atomic.Int32 1233 var need int32 = 1 1234 fn := func() bool { 1235 if sawContention.Load() >= need { 1236 return false 1237 } 1238 if tries.Add(-1) < 0 { 1239 return false 1240 } 1241 1242 runtime.Semacquire(&sem) 1243 runtime.Semrelease1(&sem, false, 0) 1244 if runtime.MutexContended(runtime.SemRootLock(&sem)) { 1245 sawContention.Add(1) 1246 } 1247 return true 1248 } 1249 1250 stks := [][]string{ 1251 { 1252 "runtime.unlock", 1253 "runtime.semrelease1", 1254 "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1", 1255 "runtime_test.(*contentionWorker).run", 1256 }, 1257 { 1258 "runtime.unlock", 1259 "runtime.semacquire1", 1260 "runtime.semacquire", 1261 "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1", 1262 "runtime_test.(*contentionWorker).run", 1263 }, 1264 } 1265 1266 // Verify that we get call stack we expect, with anything more than zero 1267 // cycles / zero samples. The duration of each contention event is too 1268 // small relative to the expected overhead for us to verify its value 1269 // more directly. Leave that to the explicit lock/unlock test. 1270 1271 testcase(false, stks, workers, fn)(t) 1272 1273 if remaining := tries.Load(); remaining >= 0 { 1274 t.Logf("finished test early (%d tries remaining)", remaining) 1275 } 1276 }) 1277 } 1278 1279 // contentionWorker provides cleaner call stacks for lock contention profile tests 1280 type contentionWorker struct { 1281 before func() 1282 fn func() bool 1283 after func() 1284 } 1285 1286 func (w *contentionWorker) run() { 1287 defer w.after() 1288 w.before() 1289 1290 for w.fn() { 1291 } 1292 }