github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/mstats.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Memory statistics 6 7 package runtime 8 9 import ( 10 "runtime/internal/atomic" 11 "unsafe" 12 ) 13 14 type mstats struct { 15 // Statistics about malloc heap. 16 heapStats consistentHeapStats 17 18 // Statistics about stacks. 19 stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys 20 21 // Statistics about allocation of low-level fixed-size structures. 22 mspan_sys sysMemStat 23 mcache_sys sysMemStat 24 buckhash_sys sysMemStat // profiling bucket hash table 25 26 // Statistics about GC overhead. 27 gcMiscSys sysMemStat // updated atomically or during STW 28 29 // Miscellaneous statistics. 30 other_sys sysMemStat // updated atomically or during STW 31 32 // Statistics about the garbage collector. 33 34 // Protected by mheap or stopping the world during GC. 35 last_gc_unix uint64 // last gc (in unix time) 36 pause_total_ns uint64 37 pause_ns [256]uint64 // circular buffer of recent gc pause lengths 38 pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970) 39 numgc uint32 40 numforcedgc uint32 // number of user-forced GCs 41 gc_cpu_fraction float64 // fraction of CPU time used by GC 42 43 last_gc_nanotime uint64 // last gc (monotonic time) 44 lastHeapInUse uint64 // heapInUse at mark termination of the previous GC 45 46 enablegc bool 47 48 // gcPauseDist represents the distribution of all GC-related 49 // application pauses in the runtime. 50 // 51 // Each individual pause is counted separately, unlike pause_ns. 52 gcPauseDist timeHistogram 53 } 54 55 var memstats mstats 56 57 // A MemStats records statistics about the memory allocator. 58 type MemStats struct { 59 // General statistics. 60 61 // Alloc is bytes of allocated heap objects. 62 // 63 // This is the same as HeapAlloc (see below). 64 Alloc uint64 65 66 // TotalAlloc is cumulative bytes allocated for heap objects. 67 // 68 // TotalAlloc increases as heap objects are allocated, but 69 // unlike Alloc and HeapAlloc, it does not decrease when 70 // objects are freed. 71 TotalAlloc uint64 72 73 // Sys is the total bytes of memory obtained from the OS. 74 // 75 // Sys is the sum of the XSys fields below. Sys measures the 76 // virtual address space reserved by the Go runtime for the 77 // heap, stacks, and other internal data structures. It's 78 // likely that not all of the virtual address space is backed 79 // by physical memory at any given moment, though in general 80 // it all was at some point. 81 Sys uint64 82 83 // Lookups is the number of pointer lookups performed by the 84 // runtime. 85 // 86 // This is primarily useful for debugging runtime internals. 87 Lookups uint64 88 89 // Mallocs is the cumulative count of heap objects allocated. 90 // The number of live objects is Mallocs - Frees. 91 Mallocs uint64 92 93 // Frees is the cumulative count of heap objects freed. 94 Frees uint64 95 96 // Heap memory statistics. 97 // 98 // Interpreting the heap statistics requires some knowledge of 99 // how Go organizes memory. Go divides the virtual address 100 // space of the heap into "spans", which are contiguous 101 // regions of memory 8K or larger. A span may be in one of 102 // three states: 103 // 104 // An "idle" span contains no objects or other data. The 105 // physical memory backing an idle span can be released back 106 // to the OS (but the virtual address space never is), or it 107 // can be converted into an "in use" or "stack" span. 108 // 109 // An "in use" span contains at least one heap object and may 110 // have free space available to allocate more heap objects. 111 // 112 // A "stack" span is used for goroutine stacks. Stack spans 113 // are not considered part of the heap. A span can change 114 // between heap and stack memory; it is never used for both 115 // simultaneously. 116 117 // HeapAlloc is bytes of allocated heap objects. 118 // 119 // "Allocated" heap objects include all reachable objects, as 120 // well as unreachable objects that the garbage collector has 121 // not yet freed. Specifically, HeapAlloc increases as heap 122 // objects are allocated and decreases as the heap is swept 123 // and unreachable objects are freed. Sweeping occurs 124 // incrementally between GC cycles, so these two processes 125 // occur simultaneously, and as a result HeapAlloc tends to 126 // change smoothly (in contrast with the sawtooth that is 127 // typical of stop-the-world garbage collectors). 128 HeapAlloc uint64 129 130 // HeapSys is bytes of heap memory obtained from the OS. 131 // 132 // HeapSys measures the amount of virtual address space 133 // reserved for the heap. This includes virtual address space 134 // that has been reserved but not yet used, which consumes no 135 // physical memory, but tends to be small, as well as virtual 136 // address space for which the physical memory has been 137 // returned to the OS after it became unused (see HeapReleased 138 // for a measure of the latter). 139 // 140 // HeapSys estimates the largest size the heap has had. 141 HeapSys uint64 142 143 // HeapIdle is bytes in idle (unused) spans. 144 // 145 // Idle spans have no objects in them. These spans could be 146 // (and may already have been) returned to the OS, or they can 147 // be reused for heap allocations, or they can be reused as 148 // stack memory. 149 // 150 // HeapIdle minus HeapReleased estimates the amount of memory 151 // that could be returned to the OS, but is being retained by 152 // the runtime so it can grow the heap without requesting more 153 // memory from the OS. If this difference is significantly 154 // larger than the heap size, it indicates there was a recent 155 // transient spike in live heap size. 156 HeapIdle uint64 157 158 // HeapInuse is bytes in in-use spans. 159 // 160 // In-use spans have at least one object in them. These spans 161 // can only be used for other objects of roughly the same 162 // size. 163 // 164 // HeapInuse minus HeapAlloc estimates the amount of memory 165 // that has been dedicated to particular size classes, but is 166 // not currently being used. This is an upper bound on 167 // fragmentation, but in general this memory can be reused 168 // efficiently. 169 HeapInuse uint64 170 171 // HeapReleased is bytes of physical memory returned to the OS. 172 // 173 // This counts heap memory from idle spans that was returned 174 // to the OS and has not yet been reacquired for the heap. 175 HeapReleased uint64 176 177 // HeapObjects is the number of allocated heap objects. 178 // 179 // Like HeapAlloc, this increases as objects are allocated and 180 // decreases as the heap is swept and unreachable objects are 181 // freed. 182 HeapObjects uint64 183 184 // Stack memory statistics. 185 // 186 // Stacks are not considered part of the heap, but the runtime 187 // can reuse a span of heap memory for stack memory, and 188 // vice-versa. 189 190 // StackInuse is bytes in stack spans. 191 // 192 // In-use stack spans have at least one stack in them. These 193 // spans can only be used for other stacks of the same size. 194 // 195 // There is no StackIdle because unused stack spans are 196 // returned to the heap (and hence counted toward HeapIdle). 197 StackInuse uint64 198 199 // StackSys is bytes of stack memory obtained from the OS. 200 // 201 // StackSys is StackInuse, plus any memory obtained directly 202 // from the OS for OS thread stacks (which should be minimal). 203 StackSys uint64 204 205 // Off-heap memory statistics. 206 // 207 // The following statistics measure runtime-internal 208 // structures that are not allocated from heap memory (usually 209 // because they are part of implementing the heap). Unlike 210 // heap or stack memory, any memory allocated to these 211 // structures is dedicated to these structures. 212 // 213 // These are primarily useful for debugging runtime memory 214 // overheads. 215 216 // MSpanInuse is bytes of allocated mspan structures. 217 MSpanInuse uint64 218 219 // MSpanSys is bytes of memory obtained from the OS for mspan 220 // structures. 221 MSpanSys uint64 222 223 // MCacheInuse is bytes of allocated mcache structures. 224 MCacheInuse uint64 225 226 // MCacheSys is bytes of memory obtained from the OS for 227 // mcache structures. 228 MCacheSys uint64 229 230 // BuckHashSys is bytes of memory in profiling bucket hash tables. 231 BuckHashSys uint64 232 233 // GCSys is bytes of memory in garbage collection metadata. 234 GCSys uint64 235 236 // OtherSys is bytes of memory in miscellaneous off-heap 237 // runtime allocations. 238 OtherSys uint64 239 240 // Garbage collector statistics. 241 242 // NextGC is the target heap size of the next GC cycle. 243 // 244 // The garbage collector's goal is to keep HeapAlloc ≤ NextGC. 245 // At the end of each GC cycle, the target for the next cycle 246 // is computed based on the amount of reachable data and the 247 // value of GOGC. 248 NextGC uint64 249 250 // LastGC is the time the last garbage collection finished, as 251 // nanoseconds since 1970 (the UNIX epoch). 252 LastGC uint64 253 254 // PauseTotalNs is the cumulative nanoseconds in GC 255 // stop-the-world pauses since the program started. 256 // 257 // During a stop-the-world pause, all goroutines are paused 258 // and only the garbage collector can run. 259 PauseTotalNs uint64 260 261 // PauseNs is a circular buffer of recent GC stop-the-world 262 // pause times in nanoseconds. 263 // 264 // The most recent pause is at PauseNs[(NumGC+255)%256]. In 265 // general, PauseNs[N%256] records the time paused in the most 266 // recent N%256th GC cycle. There may be multiple pauses per 267 // GC cycle; this is the sum of all pauses during a cycle. 268 PauseNs [256]uint64 269 270 // PauseEnd is a circular buffer of recent GC pause end times, 271 // as nanoseconds since 1970 (the UNIX epoch). 272 // 273 // This buffer is filled the same way as PauseNs. There may be 274 // multiple pauses per GC cycle; this records the end of the 275 // last pause in a cycle. 276 PauseEnd [256]uint64 277 278 // NumGC is the number of completed GC cycles. 279 NumGC uint32 280 281 // NumForcedGC is the number of GC cycles that were forced by 282 // the application calling the GC function. 283 NumForcedGC uint32 284 285 // GCCPUFraction is the fraction of this program's available 286 // CPU time used by the GC since the program started. 287 // 288 // GCCPUFraction is expressed as a number between 0 and 1, 289 // where 0 means GC has consumed none of this program's CPU. A 290 // program's available CPU time is defined as the integral of 291 // GOMAXPROCS since the program started. That is, if 292 // GOMAXPROCS is 2 and a program has been running for 10 293 // seconds, its "available CPU" is 20 seconds. GCCPUFraction 294 // does not include CPU time used for write barrier activity. 295 // 296 // This is the same as the fraction of CPU reported by 297 // GODEBUG=gctrace=1. 298 GCCPUFraction float64 299 300 // EnableGC indicates that GC is enabled. It is always true, 301 // even if GOGC=off. 302 EnableGC bool 303 304 // DebugGC is currently unused. 305 DebugGC bool 306 307 // BySize reports per-size class allocation statistics. 308 // 309 // BySize[N] gives statistics for allocations of size S where 310 // BySize[N-1].Size < S ≤ BySize[N].Size. 311 // 312 // This does not report allocations larger than BySize[60].Size. 313 BySize [61]struct { 314 // Size is the maximum byte size of an object in this 315 // size class. 316 Size uint32 317 318 // Mallocs is the cumulative count of heap objects 319 // allocated in this size class. The cumulative bytes 320 // of allocation is Size*Mallocs. The number of live 321 // objects in this size class is Mallocs - Frees. 322 Mallocs uint64 323 324 // Frees is the cumulative count of heap objects freed 325 // in this size class. 326 Frees uint64 327 } 328 } 329 330 func init() { 331 if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 { 332 println(offset) 333 throw("memstats.heapStats not aligned to 8 bytes") 334 } 335 // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. 336 // [3]heapStatsDelta) to be 8-byte aligned. 337 if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { 338 println(size) 339 throw("heapStatsDelta not a multiple of 8 bytes in size") 340 } 341 } 342 343 // ReadMemStats populates m with memory allocator statistics. 344 // 345 // The returned memory allocator statistics are up to date as of the 346 // call to ReadMemStats. This is in contrast with a heap profile, 347 // which is a snapshot as of the most recently completed garbage 348 // collection cycle. 349 func ReadMemStats(m *MemStats) { 350 stopTheWorld("read mem stats") 351 352 systemstack(func() { 353 readmemstats_m(m) 354 }) 355 356 startTheWorld() 357 } 358 359 // readmemstats_m populates stats for internal runtime values. 360 // 361 // The world must be stopped. 362 func readmemstats_m(stats *MemStats) { 363 assertWorldStopped() 364 365 // Flush mcaches to mcentral before doing anything else. 366 // 367 // Flushing to the mcentral may in general cause stats to 368 // change as mcentral data structures are manipulated. 369 systemstack(flushallmcaches) 370 371 // Calculate memory allocator stats. 372 // During program execution we only count number of frees and amount of freed memory. 373 // Current number of alive objects in the heap and amount of alive heap memory 374 // are calculated by scanning all spans. 375 // Total number of mallocs is calculated as number of frees plus number of alive objects. 376 // Similarly, total amount of allocated memory is calculated as amount of freed memory 377 // plus amount of alive heap memory. 378 379 // Collect consistent stats, which are the source-of-truth in some cases. 380 var consStats heapStatsDelta 381 memstats.heapStats.unsafeRead(&consStats) 382 383 // Collect large allocation stats. 384 totalAlloc := consStats.largeAlloc 385 nMalloc := consStats.largeAllocCount 386 totalFree := consStats.largeFree 387 nFree := consStats.largeFreeCount 388 389 // Collect per-sizeclass stats. 390 var bySize [_NumSizeClasses]struct { 391 Size uint32 392 Mallocs uint64 393 Frees uint64 394 } 395 for i := range bySize { 396 bySize[i].Size = uint32(class_to_size[i]) 397 398 // Malloc stats. 399 a := consStats.smallAllocCount[i] 400 totalAlloc += a * uint64(class_to_size[i]) 401 nMalloc += a 402 bySize[i].Mallocs = a 403 404 // Free stats. 405 f := consStats.smallFreeCount[i] 406 totalFree += f * uint64(class_to_size[i]) 407 nFree += f 408 bySize[i].Frees = f 409 } 410 411 // Account for tiny allocations. 412 // For historical reasons, MemStats includes tiny allocations 413 // in both the total free and total alloc count. This double-counts 414 // memory in some sense because their tiny allocation block is also 415 // counted. Tracking the lifetime of individual tiny allocations is 416 // currently not done because it would be too expensive. 417 nFree += consStats.tinyAllocCount 418 nMalloc += consStats.tinyAllocCount 419 420 // Calculate derived stats. 421 422 stackInUse := uint64(consStats.inStacks) 423 gcWorkBufInUse := uint64(consStats.inWorkBufs) 424 gcProgPtrScalarBitsInUse := uint64(consStats.inPtrScalarBits) 425 426 totalMapped := gcController.heapInUse.load() + gcController.heapFree.load() + gcController.heapReleased.load() + 427 memstats.stacks_sys.load() + memstats.mspan_sys.load() + memstats.mcache_sys.load() + 428 memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + memstats.other_sys.load() + 429 stackInUse + gcWorkBufInUse + gcProgPtrScalarBitsInUse 430 431 heapGoal := gcController.heapGoal() 432 433 // The world is stopped, so the consistent stats (after aggregation) 434 // should be identical to some combination of memstats. In particular: 435 // 436 // * memstats.heapInUse == inHeap 437 // * memstats.heapReleased == released 438 // * memstats.heapInUse + memstats.heapFree == committed - inStacks - inWorkBufs - inPtrScalarBits 439 // * memstats.totalAlloc == totalAlloc 440 // * memstats.totalFree == totalFree 441 // 442 // Check if that's actually true. 443 // 444 // TODO(mknyszek): Maybe don't throw here. It would be bad if a 445 // bug in otherwise benign accounting caused the whole application 446 // to crash. 447 if gcController.heapInUse.load() != uint64(consStats.inHeap) { 448 print("runtime: heapInUse=", gcController.heapInUse.load(), "\n") 449 print("runtime: consistent value=", consStats.inHeap, "\n") 450 throw("heapInUse and consistent stats are not equal") 451 } 452 if gcController.heapReleased.load() != uint64(consStats.released) { 453 print("runtime: heapReleased=", gcController.heapReleased.load(), "\n") 454 print("runtime: consistent value=", consStats.released, "\n") 455 throw("heapReleased and consistent stats are not equal") 456 } 457 heapRetained := gcController.heapInUse.load() + gcController.heapFree.load() 458 consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits) 459 if heapRetained != consRetained { 460 print("runtime: global value=", heapRetained, "\n") 461 print("runtime: consistent value=", consRetained, "\n") 462 throw("measures of the retained heap are not equal") 463 } 464 if gcController.totalAlloc.Load() != totalAlloc { 465 print("runtime: totalAlloc=", gcController.totalAlloc.Load(), "\n") 466 print("runtime: consistent value=", totalAlloc, "\n") 467 throw("totalAlloc and consistent stats are not equal") 468 } 469 if gcController.totalFree.Load() != totalFree { 470 print("runtime: totalFree=", gcController.totalFree.Load(), "\n") 471 print("runtime: consistent value=", totalFree, "\n") 472 throw("totalFree and consistent stats are not equal") 473 } 474 // Also check that mappedReady lines up with totalMapped - released. 475 // This isn't really the same type of "make sure consistent stats line up" situation, 476 // but this is an opportune time to check. 477 if gcController.mappedReady.Load() != totalMapped-uint64(consStats.released) { 478 print("runtime: mappedReady=", gcController.mappedReady.Load(), "\n") 479 print("runtime: totalMapped=", totalMapped, "\n") 480 print("runtime: released=", uint64(consStats.released), "\n") 481 print("runtime: totalMapped-released=", totalMapped-uint64(consStats.released), "\n") 482 throw("mappedReady and other memstats are not equal") 483 } 484 485 // We've calculated all the values we need. Now, populate stats. 486 487 stats.Alloc = totalAlloc - totalFree 488 stats.TotalAlloc = totalAlloc 489 stats.Sys = totalMapped 490 stats.Mallocs = nMalloc 491 stats.Frees = nFree 492 stats.HeapAlloc = totalAlloc - totalFree 493 stats.HeapSys = gcController.heapInUse.load() + gcController.heapFree.load() + gcController.heapReleased.load() 494 // By definition, HeapIdle is memory that was mapped 495 // for the heap but is not currently used to hold heap 496 // objects. It also specifically is memory that can be 497 // used for other purposes, like stacks, but this memory 498 // is subtracted out of HeapSys before it makes that 499 // transition. Put another way: 500 // 501 // HeapSys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes 502 // HeapIdle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose 503 // 504 // or 505 // 506 // HeapSys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse 507 // HeapIdle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heapInUse 508 // 509 // => HeapIdle = HeapSys - heapInUse = heapFree + heapReleased 510 stats.HeapIdle = gcController.heapFree.load() + gcController.heapReleased.load() 511 stats.HeapInuse = gcController.heapInUse.load() 512 stats.HeapReleased = gcController.heapReleased.load() 513 stats.HeapObjects = nMalloc - nFree 514 stats.StackInuse = stackInUse 515 // memstats.stacks_sys is only memory mapped directly for OS stacks. 516 // Add in heap-allocated stack memory for user consumption. 517 stats.StackSys = stackInUse + memstats.stacks_sys.load() 518 stats.MSpanInuse = uint64(mheap_.spanalloc.inuse) 519 stats.MSpanSys = memstats.mspan_sys.load() 520 stats.MCacheInuse = uint64(mheap_.cachealloc.inuse) 521 stats.MCacheSys = memstats.mcache_sys.load() 522 stats.BuckHashSys = memstats.buckhash_sys.load() 523 // MemStats defines GCSys as an aggregate of all memory related 524 // to the memory management system, but we track this memory 525 // at a more granular level in the runtime. 526 stats.GCSys = memstats.gcMiscSys.load() + gcWorkBufInUse + gcProgPtrScalarBitsInUse 527 stats.OtherSys = memstats.other_sys.load() 528 stats.NextGC = heapGoal 529 stats.LastGC = memstats.last_gc_unix 530 stats.PauseTotalNs = memstats.pause_total_ns 531 stats.PauseNs = memstats.pause_ns 532 stats.PauseEnd = memstats.pause_end 533 stats.NumGC = memstats.numgc 534 stats.NumForcedGC = memstats.numforcedgc 535 stats.GCCPUFraction = memstats.gc_cpu_fraction 536 stats.EnableGC = true 537 538 // stats.BySize and bySize might not match in length. 539 // That's OK, stats.BySize cannot change due to backwards 540 // compatibility issues. copy will copy the minimum amount 541 // of values between the two of them. 542 copy(stats.BySize[:], bySize[:]) 543 } 544 545 //go:linkname readGCStats runtime/debug.readGCStats 546 func readGCStats(pauses *[]uint64) { 547 systemstack(func() { 548 readGCStats_m(pauses) 549 }) 550 } 551 552 // readGCStats_m must be called on the system stack because it acquires the heap 553 // lock. See mheap for details. 554 // 555 //go:systemstack 556 func readGCStats_m(pauses *[]uint64) { 557 p := *pauses 558 // Calling code in runtime/debug should make the slice large enough. 559 if cap(p) < len(memstats.pause_ns)+3 { 560 throw("short slice passed to readGCStats") 561 } 562 563 // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns. 564 lock(&mheap_.lock) 565 566 n := memstats.numgc 567 if n > uint32(len(memstats.pause_ns)) { 568 n = uint32(len(memstats.pause_ns)) 569 } 570 571 // The pause buffer is circular. The most recent pause is at 572 // pause_ns[(numgc-1)%len(pause_ns)], and then backward 573 // from there to go back farther in time. We deliver the times 574 // most recent first (in p[0]). 575 p = p[:cap(p)] 576 for i := uint32(0); i < n; i++ { 577 j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns)) 578 p[i] = memstats.pause_ns[j] 579 p[n+i] = memstats.pause_end[j] 580 } 581 582 p[n+n] = memstats.last_gc_unix 583 p[n+n+1] = uint64(memstats.numgc) 584 p[n+n+2] = memstats.pause_total_ns 585 unlock(&mheap_.lock) 586 *pauses = p[:n+n+3] 587 } 588 589 // flushmcache flushes the mcache of allp[i]. 590 // 591 // The world must be stopped. 592 // 593 //go:nowritebarrier 594 func flushmcache(i int) { 595 assertWorldStopped() 596 597 p := allp[i] 598 c := p.mcache 599 if c == nil { 600 return 601 } 602 c.releaseAll() 603 stackcache_clear(c) 604 } 605 606 // flushallmcaches flushes the mcaches of all Ps. 607 // 608 // The world must be stopped. 609 // 610 //go:nowritebarrier 611 func flushallmcaches() { 612 assertWorldStopped() 613 614 for i := 0; i < int(gomaxprocs); i++ { 615 flushmcache(i) 616 } 617 } 618 619 // sysMemStat represents a global system statistic that is managed atomically. 620 // 621 // This type must structurally be a uint64 so that mstats aligns with MemStats. 622 type sysMemStat uint64 623 624 // load atomically reads the value of the stat. 625 // 626 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. 627 // 628 //go:nosplit 629 func (s *sysMemStat) load() uint64 { 630 return atomic.Load64((*uint64)(s)) 631 } 632 633 // add atomically adds the sysMemStat by n. 634 // 635 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. 636 // 637 //go:nosplit 638 func (s *sysMemStat) add(n int64) { 639 val := atomic.Xadd64((*uint64)(s), n) 640 if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) { 641 print("runtime: val=", val, " n=", n, "\n") 642 throw("sysMemStat overflow") 643 } 644 } 645 646 // heapStatsDelta contains deltas of various runtime memory statistics 647 // that need to be updated together in order for them to be kept 648 // consistent with one another. 649 type heapStatsDelta struct { 650 // Memory stats. 651 committed int64 // byte delta of memory committed 652 released int64 // byte delta of released memory generated 653 inHeap int64 // byte delta of memory placed in the heap 654 inStacks int64 // byte delta of memory reserved for stacks 655 inWorkBufs int64 // byte delta of memory reserved for work bufs 656 inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits 657 658 // Allocator stats. 659 // 660 // These are all uint64 because they're cumulative, and could quickly wrap 661 // around otherwise. 662 tinyAllocCount uint64 // number of tiny allocations 663 largeAlloc uint64 // bytes allocated for large objects 664 largeAllocCount uint64 // number of large object allocations 665 smallAllocCount [_NumSizeClasses]uint64 // number of allocs for small objects 666 largeFree uint64 // bytes freed for large objects (>maxSmallSize) 667 largeFreeCount uint64 // number of frees for large objects (>maxSmallSize) 668 smallFreeCount [_NumSizeClasses]uint64 // number of frees for small objects (<=maxSmallSize) 669 670 // NOTE: This struct must be a multiple of 8 bytes in size because it 671 // is stored in an array. If it's not, atomic accesses to the above 672 // fields may be unaligned and fail on 32-bit platforms. 673 } 674 675 // merge adds in the deltas from b into a. 676 func (a *heapStatsDelta) merge(b *heapStatsDelta) { 677 a.committed += b.committed 678 a.released += b.released 679 a.inHeap += b.inHeap 680 a.inStacks += b.inStacks 681 a.inWorkBufs += b.inWorkBufs 682 a.inPtrScalarBits += b.inPtrScalarBits 683 684 a.tinyAllocCount += b.tinyAllocCount 685 a.largeAlloc += b.largeAlloc 686 a.largeAllocCount += b.largeAllocCount 687 for i := range b.smallAllocCount { 688 a.smallAllocCount[i] += b.smallAllocCount[i] 689 } 690 a.largeFree += b.largeFree 691 a.largeFreeCount += b.largeFreeCount 692 for i := range b.smallFreeCount { 693 a.smallFreeCount[i] += b.smallFreeCount[i] 694 } 695 } 696 697 // consistentHeapStats represents a set of various memory statistics 698 // whose updates must be viewed completely to get a consistent 699 // state of the world. 700 // 701 // To write updates to memory stats use the acquire and release 702 // methods. To obtain a consistent global snapshot of these statistics, 703 // use read. 704 type consistentHeapStats struct { 705 // stats is a ring buffer of heapStatsDelta values. 706 // Writers always atomically update the delta at index gen. 707 // 708 // Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...) 709 // and synchronizing with writers by observing each P's 710 // statsSeq field. If the reader observes a P not writing, 711 // it can be sure that it will pick up the new gen value the 712 // next time it writes. 713 // 714 // The reader then takes responsibility by clearing space 715 // in the ring buffer for the next reader to rotate gen to 716 // that space (i.e. it merges in values from index (gen-2) mod 3 717 // to index (gen-1) mod 3, then clears the former). 718 // 719 // Note that this means only one reader can be reading at a time. 720 // There is no way for readers to synchronize. 721 // 722 // This process is why we need a ring buffer of size 3 instead 723 // of 2: one is for the writers, one contains the most recent 724 // data, and the last one is clear so writers can begin writing 725 // to it the moment gen is updated. 726 stats [3]heapStatsDelta 727 728 // gen represents the current index into which writers 729 // are writing, and can take on the value of 0, 1, or 2. 730 gen atomic.Uint32 731 732 // noPLock is intended to provide mutual exclusion for updating 733 // stats when no P is available. It does not block other writers 734 // with a P, only other writers without a P and the reader. Because 735 // stats are usually updated when a P is available, contention on 736 // this lock should be minimal. 737 noPLock mutex 738 } 739 740 // acquire returns a heapStatsDelta to be updated. In effect, 741 // it acquires the shard for writing. release must be called 742 // as soon as the relevant deltas are updated. 743 // 744 // The returned heapStatsDelta must be updated atomically. 745 // 746 // The caller's P must not change between acquire and 747 // release. This also means that the caller should not 748 // acquire a P or release its P in between. A P also must 749 // not acquire a given consistentHeapStats if it hasn't 750 // yet released it. 751 // 752 // nosplit because a stack growth in this function could 753 // lead to a stack allocation that could reenter the 754 // function. 755 // 756 //go:nosplit 757 func (m *consistentHeapStats) acquire() *heapStatsDelta { 758 if pp := getg().m.p.ptr(); pp != nil { 759 seq := pp.statsSeq.Add(1) 760 if seq%2 == 0 { 761 // Should have been incremented to odd. 762 print("runtime: seq=", seq, "\n") 763 throw("bad sequence number") 764 } 765 } else { 766 lock(&m.noPLock) 767 } 768 gen := m.gen.Load() % 3 769 return &m.stats[gen] 770 } 771 772 // release indicates that the writer is done modifying 773 // the delta. The value returned by the corresponding 774 // acquire must no longer be accessed or modified after 775 // release is called. 776 // 777 // The caller's P must not change between acquire and 778 // release. This also means that the caller should not 779 // acquire a P or release its P in between. 780 // 781 // nosplit because a stack growth in this function could 782 // lead to a stack allocation that causes another acquire 783 // before this operation has completed. 784 // 785 //go:nosplit 786 func (m *consistentHeapStats) release() { 787 if pp := getg().m.p.ptr(); pp != nil { 788 seq := pp.statsSeq.Add(1) 789 if seq%2 != 0 { 790 // Should have been incremented to even. 791 print("runtime: seq=", seq, "\n") 792 throw("bad sequence number") 793 } 794 } else { 795 unlock(&m.noPLock) 796 } 797 } 798 799 // unsafeRead aggregates the delta for this shard into out. 800 // 801 // Unsafe because it does so without any synchronization. The 802 // world must be stopped. 803 func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { 804 assertWorldStopped() 805 806 for i := range m.stats { 807 out.merge(&m.stats[i]) 808 } 809 } 810 811 // unsafeClear clears the shard. 812 // 813 // Unsafe because the world must be stopped and values should 814 // be donated elsewhere before clearing. 815 func (m *consistentHeapStats) unsafeClear() { 816 assertWorldStopped() 817 818 for i := range m.stats { 819 m.stats[i] = heapStatsDelta{} 820 } 821 } 822 823 // read takes a globally consistent snapshot of m 824 // and puts the aggregated value in out. Even though out is a 825 // heapStatsDelta, the resulting values should be complete and 826 // valid statistic values. 827 // 828 // Not safe to call concurrently. The world must be stopped 829 // or metricsSema must be held. 830 func (m *consistentHeapStats) read(out *heapStatsDelta) { 831 // Getting preempted after this point is not safe because 832 // we read allp. We need to make sure a STW can't happen 833 // so it doesn't change out from under us. 834 mp := acquirem() 835 836 // Get the current generation. We can be confident that this 837 // will not change since read is serialized and is the only 838 // one that modifies currGen. 839 currGen := m.gen.Load() 840 prevGen := currGen - 1 841 if currGen == 0 { 842 prevGen = 2 843 } 844 845 // Prevent writers without a P from writing while we update gen. 846 lock(&m.noPLock) 847 848 // Rotate gen, effectively taking a snapshot of the state of 849 // these statistics at the point of the exchange by moving 850 // writers to the next set of deltas. 851 // 852 // This exchange is safe to do because we won't race 853 // with anyone else trying to update this value. 854 m.gen.Swap((currGen + 1) % 3) 855 856 // Allow P-less writers to continue. They'll be writing to the 857 // next generation now. 858 unlock(&m.noPLock) 859 860 for _, p := range allp { 861 // Spin until there are no more writers. 862 for p.statsSeq.Load()%2 != 0 { 863 } 864 } 865 866 // At this point we've observed that each sequence 867 // number is even, so any future writers will observe 868 // the new gen value. That means it's safe to read from 869 // the other deltas in the stats buffer. 870 871 // Perform our responsibilities and free up 872 // stats[prevGen] for the next time we want to take 873 // a snapshot. 874 m.stats[currGen].merge(&m.stats[prevGen]) 875 m.stats[prevGen] = heapStatsDelta{} 876 877 // Finally, copy out the complete delta. 878 *out = m.stats[currGen] 879 880 releasem(mp) 881 } 882 883 type cpuStats struct { 884 // All fields are CPU time in nanoseconds computed by comparing 885 // calls of nanotime. This means they're all overestimates, because 886 // they don't accurately compute on-CPU time (so some of the time 887 // could be spent scheduled away by the OS). 888 889 gcAssistTime int64 // GC assists 890 gcDedicatedTime int64 // GC dedicated mark workers + pauses 891 gcIdleTime int64 // GC idle mark workers 892 gcPauseTime int64 // GC pauses (all GOMAXPROCS, even if just 1 is running) 893 gcTotalTime int64 894 895 scavengeAssistTime int64 // background scavenger 896 scavengeBgTime int64 // scavenge assists 897 scavengeTotalTime int64 898 899 idleTime int64 // Time Ps spent in _Pidle. 900 userTime int64 // Time Ps spent in _Prunning or _Psyscall that's not any of the above. 901 902 totalTime int64 // GOMAXPROCS * (monotonic wall clock time elapsed) 903 }