github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/runtime/mstats.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Memory statistics 6 7 package runtime 8 9 import ( 10 "runtime/internal/atomic" 11 "unsafe" 12 ) 13 14 // Statistics. 15 // 16 // For detailed descriptions see the documentation for MemStats. 17 // Fields that differ from MemStats are further documented here. 18 // 19 // Many of these fields are updated on the fly, while others are only 20 // updated when updatememstats is called. 21 type mstats struct { 22 // General statistics. 23 alloc uint64 // bytes allocated and not yet freed 24 total_alloc uint64 // bytes allocated (even if freed) 25 sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate) 26 nlookup uint64 // number of pointer lookups (unused) 27 nmalloc uint64 // number of mallocs 28 nfree uint64 // number of frees 29 30 // Statistics about malloc heap. 31 // Updated atomically, or with the world stopped. 32 // 33 // Like MemStats, heap_sys and heap_inuse do not count memory 34 // in manually-managed spans. 35 heap_sys sysMemStat // virtual address space obtained from system for GC'd heap 36 heap_inuse uint64 // bytes in mSpanInUse spans 37 heap_released uint64 // bytes released to the os 38 39 // heap_objects is not used by the runtime directly and instead 40 // computed on the fly by updatememstats. 41 heap_objects uint64 // total number of allocated objects 42 43 // Statistics about stacks. 44 stacks_inuse uint64 // bytes in manually-managed stack spans; computed by updatememstats 45 stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys 46 47 // Statistics about allocation of low-level fixed-size structures. 48 // Protected by FixAlloc locks. 49 mspan_inuse uint64 // mspan structures 50 mspan_sys sysMemStat 51 mcache_inuse uint64 // mcache structures 52 mcache_sys sysMemStat 53 buckhash_sys sysMemStat // profiling bucket hash table 54 55 // Statistics about GC overhead. 56 gcWorkBufInUse uint64 // computed by updatememstats 57 gcProgPtrScalarBitsInUse uint64 // computed by updatememstats 58 gcMiscSys sysMemStat // updated atomically or during STW 59 60 // Miscellaneous statistics. 61 other_sys sysMemStat // updated atomically or during STW 62 63 // Statistics about the garbage collector. 64 65 // next_gc is the goal heap_live for when next GC ends. 66 // Set to ^uint64(0) if disabled. 67 // 68 // Read and written atomically, unless the world is stopped. 69 next_gc uint64 70 71 // Protected by mheap or stopping the world during GC. 72 last_gc_unix uint64 // last gc (in unix time) 73 pause_total_ns uint64 74 pause_ns [256]uint64 // circular buffer of recent gc pause lengths 75 pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970) 76 numgc uint32 77 numforcedgc uint32 // number of user-forced GCs 78 gc_cpu_fraction float64 // fraction of CPU time used by GC 79 enablegc bool 80 debuggc bool 81 82 // Statistics about allocation size classes. 83 84 by_size [_NumSizeClasses]struct { 85 size uint32 86 nmalloc uint64 87 nfree uint64 88 } 89 90 // Add an uint32 for even number of size classes to align below fields 91 // to 64 bits for atomic operations on 32 bit platforms. 92 _ [1 - _NumSizeClasses%2]uint32 93 94 last_gc_nanotime uint64 // last gc (monotonic time) 95 tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly 96 last_next_gc uint64 // next_gc for the previous GC 97 last_heap_inuse uint64 // heap_inuse at mark termination of the previous GC 98 99 // triggerRatio is the heap growth ratio that triggers marking. 100 // 101 // E.g., if this is 0.6, then GC should start when the live 102 // heap has reached 1.6 times the heap size marked by the 103 // previous cycle. This should be ≤ GOGC/100 so the trigger 104 // heap size is less than the goal heap size. This is set 105 // during mark termination for the next cycle's trigger. 106 triggerRatio float64 107 108 // gc_trigger is the heap size that triggers marking. 109 // 110 // When heap_live ≥ gc_trigger, the mark phase will start. 111 // This is also the heap size by which proportional sweeping 112 // must be complete. 113 // 114 // This is computed from triggerRatio during mark termination 115 // for the next cycle's trigger. 116 gc_trigger uint64 117 118 // heap_live is the number of bytes considered live by the GC. 119 // That is: retained by the most recent GC plus allocated 120 // since then. heap_live <= alloc, since alloc includes unmarked 121 // objects that have not yet been swept (and hence goes up as we 122 // allocate and down as we sweep) while heap_live excludes these 123 // objects (and hence only goes up between GCs). 124 // 125 // This is updated atomically without locking. To reduce 126 // contention, this is updated only when obtaining a span from 127 // an mcentral and at this point it counts all of the 128 // unallocated slots in that span (which will be allocated 129 // before that mcache obtains another span from that 130 // mcentral). Hence, it slightly overestimates the "true" live 131 // heap size. It's better to overestimate than to 132 // underestimate because 1) this triggers the GC earlier than 133 // necessary rather than potentially too late and 2) this 134 // leads to a conservative GC rate rather than a GC rate that 135 // is potentially too low. 136 // 137 // Reads should likewise be atomic (or during STW). 138 // 139 // Whenever this is updated, call traceHeapAlloc() and 140 // gcController.revise(). 141 heap_live uint64 142 143 // heap_scan is the number of bytes of "scannable" heap. This 144 // is the live heap (as counted by heap_live), but omitting 145 // no-scan objects and no-scan tails of objects. 146 // 147 // Whenever this is updated, call gcController.revise(). 148 // 149 // Read and written atomically or with the world stopped. 150 heap_scan uint64 151 152 // heap_marked is the number of bytes marked by the previous 153 // GC. After mark termination, heap_live == heap_marked, but 154 // unlike heap_live, heap_marked does not change until the 155 // next mark termination. 156 heap_marked uint64 157 158 // heapStats is a set of statistics 159 heapStats consistentHeapStats 160 161 // _ uint32 // ensure gcPauseDist is aligned 162 163 // gcPauseDist represents the distribution of all GC-related 164 // application pauses in the runtime. 165 // 166 // Each individual pause is counted separately, unlike pause_ns. 167 gcPauseDist timeHistogram 168 } 169 170 var memstats mstats 171 172 // A MemStats records statistics about the memory allocator. 173 type MemStats struct { 174 // General statistics. 175 176 // Alloc is bytes of allocated heap objects. 177 // 178 // This is the same as HeapAlloc (see below). 179 Alloc uint64 180 181 // TotalAlloc is cumulative bytes allocated for heap objects. 182 // 183 // TotalAlloc increases as heap objects are allocated, but 184 // unlike Alloc and HeapAlloc, it does not decrease when 185 // objects are freed. 186 TotalAlloc uint64 187 188 // Sys is the total bytes of memory obtained from the OS. 189 // 190 // Sys is the sum of the XSys fields below. Sys measures the 191 // virtual address space reserved by the Go runtime for the 192 // heap, stacks, and other internal data structures. It's 193 // likely that not all of the virtual address space is backed 194 // by physical memory at any given moment, though in general 195 // it all was at some point. 196 Sys uint64 197 198 // Lookups is the number of pointer lookups performed by the 199 // runtime. 200 // 201 // This is primarily useful for debugging runtime internals. 202 Lookups uint64 203 204 // Mallocs is the cumulative count of heap objects allocated. 205 // The number of live objects is Mallocs - Frees. 206 Mallocs uint64 207 208 // Frees is the cumulative count of heap objects freed. 209 Frees uint64 210 211 // Heap memory statistics. 212 // 213 // Interpreting the heap statistics requires some knowledge of 214 // how Go organizes memory. Go divides the virtual address 215 // space of the heap into "spans", which are contiguous 216 // regions of memory 8K or larger. A span may be in one of 217 // three states: 218 // 219 // An "idle" span contains no objects or other data. The 220 // physical memory backing an idle span can be released back 221 // to the OS (but the virtual address space never is), or it 222 // can be converted into an "in use" or "stack" span. 223 // 224 // An "in use" span contains at least one heap object and may 225 // have free space available to allocate more heap objects. 226 // 227 // A "stack" span is used for goroutine stacks. Stack spans 228 // are not considered part of the heap. A span can change 229 // between heap and stack memory; it is never used for both 230 // simultaneously. 231 232 // HeapAlloc is bytes of allocated heap objects. 233 // 234 // "Allocated" heap objects include all reachable objects, as 235 // well as unreachable objects that the garbage collector has 236 // not yet freed. Specifically, HeapAlloc increases as heap 237 // objects are allocated and decreases as the heap is swept 238 // and unreachable objects are freed. Sweeping occurs 239 // incrementally between GC cycles, so these two processes 240 // occur simultaneously, and as a result HeapAlloc tends to 241 // change smoothly (in contrast with the sawtooth that is 242 // typical of stop-the-world garbage collectors). 243 HeapAlloc uint64 244 245 // HeapSys is bytes of heap memory obtained from the OS. 246 // 247 // HeapSys measures the amount of virtual address space 248 // reserved for the heap. This includes virtual address space 249 // that has been reserved but not yet used, which consumes no 250 // physical memory, but tends to be small, as well as virtual 251 // address space for which the physical memory has been 252 // returned to the OS after it became unused (see HeapReleased 253 // for a measure of the latter). 254 // 255 // HeapSys estimates the largest size the heap has had. 256 HeapSys uint64 257 258 // HeapIdle is bytes in idle (unused) spans. 259 // 260 // Idle spans have no objects in them. These spans could be 261 // (and may already have been) returned to the OS, or they can 262 // be reused for heap allocations, or they can be reused as 263 // stack memory. 264 // 265 // HeapIdle minus HeapReleased estimates the amount of memory 266 // that could be returned to the OS, but is being retained by 267 // the runtime so it can grow the heap without requesting more 268 // memory from the OS. If this difference is significantly 269 // larger than the heap size, it indicates there was a recent 270 // transient spike in live heap size. 271 HeapIdle uint64 272 273 // HeapInuse is bytes in in-use spans. 274 // 275 // In-use spans have at least one object in them. These spans 276 // can only be used for other objects of roughly the same 277 // size. 278 // 279 // HeapInuse minus HeapAlloc estimates the amount of memory 280 // that has been dedicated to particular size classes, but is 281 // not currently being used. This is an upper bound on 282 // fragmentation, but in general this memory can be reused 283 // efficiently. 284 HeapInuse uint64 285 286 // HeapReleased is bytes of physical memory returned to the OS. 287 // 288 // This counts heap memory from idle spans that was returned 289 // to the OS and has not yet been reacquired for the heap. 290 HeapReleased uint64 291 292 // HeapObjects is the number of allocated heap objects. 293 // 294 // Like HeapAlloc, this increases as objects are allocated and 295 // decreases as the heap is swept and unreachable objects are 296 // freed. 297 HeapObjects uint64 298 299 // Stack memory statistics. 300 // 301 // Stacks are not considered part of the heap, but the runtime 302 // can reuse a span of heap memory for stack memory, and 303 // vice-versa. 304 305 // StackInuse is bytes in stack spans. 306 // 307 // In-use stack spans have at least one stack in them. These 308 // spans can only be used for other stacks of the same size. 309 // 310 // There is no StackIdle because unused stack spans are 311 // returned to the heap (and hence counted toward HeapIdle). 312 StackInuse uint64 313 314 // StackSys is bytes of stack memory obtained from the OS. 315 // 316 // StackSys is StackInuse, plus any memory obtained directly 317 // from the OS for OS thread stacks (which should be minimal). 318 StackSys uint64 319 320 // Off-heap memory statistics. 321 // 322 // The following statistics measure runtime-internal 323 // structures that are not allocated from heap memory (usually 324 // because they are part of implementing the heap). Unlike 325 // heap or stack memory, any memory allocated to these 326 // structures is dedicated to these structures. 327 // 328 // These are primarily useful for debugging runtime memory 329 // overheads. 330 331 // MSpanInuse is bytes of allocated mspan structures. 332 MSpanInuse uint64 333 334 // MSpanSys is bytes of memory obtained from the OS for mspan 335 // structures. 336 MSpanSys uint64 337 338 // MCacheInuse is bytes of allocated mcache structures. 339 MCacheInuse uint64 340 341 // MCacheSys is bytes of memory obtained from the OS for 342 // mcache structures. 343 MCacheSys uint64 344 345 // BuckHashSys is bytes of memory in profiling bucket hash tables. 346 BuckHashSys uint64 347 348 // GCSys is bytes of memory in garbage collection metadata. 349 GCSys uint64 350 351 // OtherSys is bytes of memory in miscellaneous off-heap 352 // runtime allocations. 353 OtherSys uint64 354 355 // Garbage collector statistics. 356 357 // NextGC is the target heap size of the next GC cycle. 358 // 359 // The garbage collector's goal is to keep HeapAlloc ≤ NextGC. 360 // At the end of each GC cycle, the target for the next cycle 361 // is computed based on the amount of reachable data and the 362 // value of GOGC. 363 NextGC uint64 364 365 // LastGC is the time the last garbage collection finished, as 366 // nanoseconds since 1970 (the UNIX epoch). 367 LastGC uint64 368 369 // PauseTotalNs is the cumulative nanoseconds in GC 370 // stop-the-world pauses since the program started. 371 // 372 // During a stop-the-world pause, all goroutines are paused 373 // and only the garbage collector can run. 374 PauseTotalNs uint64 375 376 // PauseNs is a circular buffer of recent GC stop-the-world 377 // pause times in nanoseconds. 378 // 379 // The most recent pause is at PauseNs[(NumGC+255)%256]. In 380 // general, PauseNs[N%256] records the time paused in the most 381 // recent N%256th GC cycle. There may be multiple pauses per 382 // GC cycle; this is the sum of all pauses during a cycle. 383 PauseNs [256]uint64 384 385 // PauseEnd is a circular buffer of recent GC pause end times, 386 // as nanoseconds since 1970 (the UNIX epoch). 387 // 388 // This buffer is filled the same way as PauseNs. There may be 389 // multiple pauses per GC cycle; this records the end of the 390 // last pause in a cycle. 391 PauseEnd [256]uint64 392 393 // NumGC is the number of completed GC cycles. 394 NumGC uint32 395 396 // NumForcedGC is the number of GC cycles that were forced by 397 // the application calling the GC function. 398 NumForcedGC uint32 399 400 // GCCPUFraction is the fraction of this program's available 401 // CPU time used by the GC since the program started. 402 // 403 // GCCPUFraction is expressed as a number between 0 and 1, 404 // where 0 means GC has consumed none of this program's CPU. A 405 // program's available CPU time is defined as the integral of 406 // GOMAXPROCS since the program started. That is, if 407 // GOMAXPROCS is 2 and a program has been running for 10 408 // seconds, its "available CPU" is 20 seconds. GCCPUFraction 409 // does not include CPU time used for write barrier activity. 410 // 411 // This is the same as the fraction of CPU reported by 412 // GODEBUG=gctrace=1. 413 GCCPUFraction float64 414 415 // EnableGC indicates that GC is enabled. It is always true, 416 // even if GOGC=off. 417 EnableGC bool 418 419 // DebugGC is currently unused. 420 DebugGC bool 421 422 // BySize reports per-size class allocation statistics. 423 // 424 // BySize[N] gives statistics for allocations of size S where 425 // BySize[N-1].Size < S ≤ BySize[N].Size. 426 // 427 // This does not report allocations larger than BySize[60].Size. 428 BySize [61]struct { 429 // Size is the maximum byte size of an object in this 430 // size class. 431 Size uint32 432 433 // Mallocs is the cumulative count of heap objects 434 // allocated in this size class. The cumulative bytes 435 // of allocation is Size*Mallocs. The number of live 436 // objects in this size class is Mallocs - Frees. 437 Mallocs uint64 438 439 // Frees is the cumulative count of heap objects freed 440 // in this size class. 441 Frees uint64 442 } 443 } 444 445 func init() { 446 if offset := unsafe.Offsetof(memstats.heap_live); offset%8 != 0 { 447 println(offset) 448 throw("memstats.heap_live not aligned to 8 bytes") 449 } 450 if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 { 451 println(offset) 452 throw("memstats.heapStats not aligned to 8 bytes") 453 } 454 if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 { 455 println(offset) 456 throw("memstats.gcPauseDist not aligned to 8 bytes") 457 } 458 // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. 459 // [3]heapStatsDelta) to be 8-byte aligned. 460 if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { 461 println(size) 462 throw("heapStatsDelta not a multiple of 8 bytes in size") 463 } 464 } 465 466 // ReadMemStats populates m with memory allocator statistics. 467 // 468 // The returned memory allocator statistics are up to date as of the 469 // call to ReadMemStats. This is in contrast with a heap profile, 470 // which is a snapshot as of the most recently completed garbage 471 // collection cycle. 472 func ReadMemStats(m *MemStats) { 473 stopTheWorld("read mem stats") 474 475 systemstack(func() { 476 readmemstats_m(m) 477 }) 478 479 startTheWorld() 480 } 481 482 func readmemstats_m(stats *MemStats) { 483 updatememstats() 484 485 stats.Alloc = memstats.alloc 486 stats.TotalAlloc = memstats.total_alloc 487 stats.Sys = memstats.sys 488 stats.Mallocs = memstats.nmalloc 489 stats.Frees = memstats.nfree 490 stats.HeapAlloc = memstats.alloc 491 stats.HeapSys = memstats.heap_sys.load() 492 // By definition, HeapIdle is memory that was mapped 493 // for the heap but is not currently used to hold heap 494 // objects. It also specifically is memory that can be 495 // used for other purposes, like stacks, but this memory 496 // is subtracted out of HeapSys before it makes that 497 // transition. Put another way: 498 // 499 // heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes 500 // heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose 501 // 502 // or 503 // 504 // heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse 505 // heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse 506 // 507 // => heap_idle = heap_sys - heap_inuse 508 stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse 509 stats.HeapInuse = memstats.heap_inuse 510 stats.HeapReleased = memstats.heap_released 511 stats.HeapObjects = memstats.heap_objects 512 stats.StackInuse = memstats.stacks_inuse 513 // memstats.stacks_sys is only memory mapped directly for OS stacks. 514 // Add in heap-allocated stack memory for user consumption. 515 stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load() 516 stats.MSpanInuse = memstats.mspan_inuse 517 stats.MSpanSys = memstats.mspan_sys.load() 518 stats.MCacheInuse = memstats.mcache_inuse 519 stats.MCacheSys = memstats.mcache_sys.load() 520 stats.BuckHashSys = memstats.buckhash_sys.load() 521 // MemStats defines GCSys as an aggregate of all memory related 522 // to the memory management system, but we track this memory 523 // at a more granular level in the runtime. 524 stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse 525 stats.OtherSys = memstats.other_sys.load() 526 stats.NextGC = memstats.next_gc 527 stats.LastGC = memstats.last_gc_unix 528 stats.PauseTotalNs = memstats.pause_total_ns 529 stats.PauseNs = memstats.pause_ns 530 stats.PauseEnd = memstats.pause_end 531 stats.NumGC = memstats.numgc 532 stats.NumForcedGC = memstats.numforcedgc 533 stats.GCCPUFraction = memstats.gc_cpu_fraction 534 stats.EnableGC = true 535 536 // Handle BySize. Copy N values, where N is 537 // the minimum of the lengths of the two arrays. 538 // Unfortunately copy() won't work here because 539 // the arrays have different structs. 540 // 541 // TODO(mknyszek): Consider renaming the fields 542 // of by_size's elements to align so we can use 543 // the copy built-in. 544 bySizeLen := len(stats.BySize) 545 if l := len(memstats.by_size); l < bySizeLen { 546 bySizeLen = l 547 } 548 for i := 0; i < bySizeLen; i++ { 549 stats.BySize[i].Size = memstats.by_size[i].size 550 stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc 551 stats.BySize[i].Frees = memstats.by_size[i].nfree 552 } 553 } 554 555 //go:linkname readGCStats runtime/debug.readGCStats 556 func readGCStats(pauses *[]uint64) { 557 systemstack(func() { 558 readGCStats_m(pauses) 559 }) 560 } 561 562 // readGCStats_m must be called on the system stack because it acquires the heap 563 // lock. See mheap for details. 564 //go:systemstack 565 func readGCStats_m(pauses *[]uint64) { 566 p := *pauses 567 // Calling code in runtime/debug should make the slice large enough. 568 if cap(p) < len(memstats.pause_ns)+3 { 569 throw("short slice passed to readGCStats") 570 } 571 572 // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns. 573 lock(&mheap_.lock) 574 575 n := memstats.numgc 576 if n > uint32(len(memstats.pause_ns)) { 577 n = uint32(len(memstats.pause_ns)) 578 } 579 580 // The pause buffer is circular. The most recent pause is at 581 // pause_ns[(numgc-1)%len(pause_ns)], and then backward 582 // from there to go back farther in time. We deliver the times 583 // most recent first (in p[0]). 584 p = p[:cap(p)] 585 for i := uint32(0); i < n; i++ { 586 j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns)) 587 p[i] = memstats.pause_ns[j] 588 p[n+i] = memstats.pause_end[j] 589 } 590 591 p[n+n] = memstats.last_gc_unix 592 p[n+n+1] = uint64(memstats.numgc) 593 p[n+n+2] = memstats.pause_total_ns 594 unlock(&mheap_.lock) 595 *pauses = p[:n+n+3] 596 } 597 598 // Updates the memstats structure. 599 // 600 // The world must be stopped. 601 // 602 //go:nowritebarrier 603 func updatememstats() { 604 assertWorldStopped() 605 606 // Flush mcaches to mcentral before doing anything else. 607 // 608 // Flushing to the mcentral may in general cause stats to 609 // change as mcentral data structures are manipulated. 610 systemstack(flushallmcaches) 611 612 memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse) 613 memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse) 614 memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() + 615 memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + 616 memstats.other_sys.load() 617 618 // Calculate memory allocator stats. 619 // During program execution we only count number of frees and amount of freed memory. 620 // Current number of alive objects in the heap and amount of alive heap memory 621 // are calculated by scanning all spans. 622 // Total number of mallocs is calculated as number of frees plus number of alive objects. 623 // Similarly, total amount of allocated memory is calculated as amount of freed memory 624 // plus amount of alive heap memory. 625 memstats.alloc = 0 626 memstats.total_alloc = 0 627 memstats.nmalloc = 0 628 memstats.nfree = 0 629 for i := 0; i < len(memstats.by_size); i++ { 630 memstats.by_size[i].nmalloc = 0 631 memstats.by_size[i].nfree = 0 632 } 633 // Collect consistent stats, which are the source-of-truth in the some cases. 634 var consStats heapStatsDelta 635 memstats.heapStats.unsafeRead(&consStats) 636 637 // Collect large allocation stats. 638 totalAlloc := uint64(consStats.largeAlloc) 639 memstats.nmalloc += uint64(consStats.largeAllocCount) 640 totalFree := uint64(consStats.largeFree) 641 memstats.nfree += uint64(consStats.largeFreeCount) 642 643 // Collect per-sizeclass stats. 644 for i := 0; i < _NumSizeClasses; i++ { 645 // Malloc stats. 646 a := uint64(consStats.smallAllocCount[i]) 647 totalAlloc += a * uint64(class_to_size[i]) 648 memstats.nmalloc += a 649 memstats.by_size[i].nmalloc = a 650 651 // Free stats. 652 f := uint64(consStats.smallFreeCount[i]) 653 totalFree += f * uint64(class_to_size[i]) 654 memstats.nfree += f 655 memstats.by_size[i].nfree = f 656 } 657 658 // Account for tiny allocations. 659 memstats.nfree += memstats.tinyallocs 660 memstats.nmalloc += memstats.tinyallocs 661 662 // Calculate derived stats. 663 memstats.total_alloc = totalAlloc 664 memstats.alloc = totalAlloc - totalFree 665 memstats.heap_objects = memstats.nmalloc - memstats.nfree 666 667 memstats.stacks_inuse = uint64(consStats.inStacks) 668 memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs) 669 memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits) 670 671 // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. 672 memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse 673 674 // The world is stopped, so the consistent stats (after aggregation) 675 // should be identical to some combination of memstats. In particular: 676 // 677 // * heap_inuse == inHeap 678 // * heap_released == released 679 // * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits 680 // 681 // Check if that's actually true. 682 // 683 // TODO(mknyszek): Maybe don't throw here. It would be bad if a 684 // bug in otherwise benign accounting caused the whole application 685 // to crash. 686 if memstats.heap_inuse != uint64(consStats.inHeap) { 687 print("runtime: heap_inuse=", memstats.heap_inuse, "\n") 688 print("runtime: consistent value=", consStats.inHeap, "\n") 689 throw("heap_inuse and consistent stats are not equal") 690 } 691 if memstats.heap_released != uint64(consStats.released) { 692 print("runtime: heap_released=", memstats.heap_released, "\n") 693 print("runtime: consistent value=", consStats.released, "\n") 694 throw("heap_released and consistent stats are not equal") 695 } 696 globalRetained := memstats.heap_sys.load() - memstats.heap_released 697 consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits) 698 if globalRetained != consRetained { 699 print("runtime: global value=", globalRetained, "\n") 700 print("runtime: consistent value=", consRetained, "\n") 701 throw("measures of the retained heap are not equal") 702 } 703 } 704 705 // flushmcache flushes the mcache of allp[i]. 706 // 707 // The world must be stopped. 708 // 709 //go:nowritebarrier 710 func flushmcache(i int) { 711 assertWorldStopped() 712 713 p := allp[i] 714 c := p.mcache 715 if c == nil { 716 return 717 } 718 c.releaseAll() 719 stackcache_clear(c) 720 } 721 722 // flushallmcaches flushes the mcaches of all Ps. 723 // 724 // The world must be stopped. 725 // 726 //go:nowritebarrier 727 func flushallmcaches() { 728 assertWorldStopped() 729 730 for i := 0; i < int(gomaxprocs); i++ { 731 flushmcache(i) 732 } 733 } 734 735 // sysMemStat represents a global system statistic that is managed atomically. 736 // 737 // This type must structurally be a uint64 so that mstats aligns with MemStats. 738 type sysMemStat uint64 739 740 // load atomically reads the value of the stat. 741 // 742 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. 743 //go:nosplit 744 func (s *sysMemStat) load() uint64 { 745 return atomic.Load64((*uint64)(s)) 746 } 747 748 // add atomically adds the sysMemStat by n. 749 // 750 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. 751 //go:nosplit 752 func (s *sysMemStat) add(n int64) { 753 if s == nil { 754 return 755 } 756 val := atomic.Xadd64((*uint64)(s), n) 757 if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) { 758 print("runtime: val=", val, " n=", n, "\n") 759 throw("sysMemStat overflow") 760 } 761 } 762 763 // heapStatsDelta contains deltas of various runtime memory statistics 764 // that need to be updated together in order for them to be kept 765 // consistent with one another. 766 type heapStatsDelta struct { 767 // Memory stats. 768 committed int64 // byte delta of memory committed 769 released int64 // byte delta of released memory generated 770 inHeap int64 // byte delta of memory placed in the heap 771 inStacks int64 // byte delta of memory reserved for stacks 772 inWorkBufs int64 // byte delta of memory reserved for work bufs 773 inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits 774 775 // Allocator stats. 776 largeAlloc uintptr // bytes allocated for large objects 777 largeAllocCount uintptr // number of large object allocations 778 smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects 779 largeFree uintptr // bytes freed for large objects (>maxSmallSize) 780 largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) 781 smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) 782 783 // Add a uint32 to ensure this struct is a multiple of 8 bytes in size. 784 // Only necessary on 32-bit platforms. 785 // _ [(sys.PtrSize / 4) % 2]uint32 786 } 787 788 // merge adds in the deltas from b into a. 789 func (a *heapStatsDelta) merge(b *heapStatsDelta) { 790 a.committed += b.committed 791 a.released += b.released 792 a.inHeap += b.inHeap 793 a.inStacks += b.inStacks 794 a.inWorkBufs += b.inWorkBufs 795 a.inPtrScalarBits += b.inPtrScalarBits 796 797 a.largeAlloc += b.largeAlloc 798 a.largeAllocCount += b.largeAllocCount 799 for i := range b.smallAllocCount { 800 a.smallAllocCount[i] += b.smallAllocCount[i] 801 } 802 a.largeFree += b.largeFree 803 a.largeFreeCount += b.largeFreeCount 804 for i := range b.smallFreeCount { 805 a.smallFreeCount[i] += b.smallFreeCount[i] 806 } 807 } 808 809 // consistentHeapStats represents a set of various memory statistics 810 // whose updates must be viewed completely to get a consistent 811 // state of the world. 812 // 813 // To write updates to memory stats use the acquire and release 814 // methods. To obtain a consistent global snapshot of these statistics, 815 // use read. 816 type consistentHeapStats struct { 817 // stats is a ring buffer of heapStatsDelta values. 818 // Writers always atomically update the delta at index gen. 819 // 820 // Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...) 821 // and synchronizing with writers by observing each P's 822 // statsSeq field. If the reader observes a P not writing, 823 // it can be sure that it will pick up the new gen value the 824 // next time it writes. 825 // 826 // The reader then takes responsibility by clearing space 827 // in the ring buffer for the next reader to rotate gen to 828 // that space (i.e. it merges in values from index (gen-2) mod 3 829 // to index (gen-1) mod 3, then clears the former). 830 // 831 // Note that this means only one reader can be reading at a time. 832 // There is no way for readers to synchronize. 833 // 834 // This process is why we need a ring buffer of size 3 instead 835 // of 2: one is for the writers, one contains the most recent 836 // data, and the last one is clear so writers can begin writing 837 // to it the moment gen is updated. 838 stats [3]heapStatsDelta 839 840 // gen represents the current index into which writers 841 // are writing, and can take on the value of 0, 1, or 2. 842 // This value is updated atomically. 843 gen uint32 844 845 // noPLock is intended to provide mutual exclusion for updating 846 // stats when no P is available. It does not block other writers 847 // with a P, only other writers without a P and the reader. Because 848 // stats are usually updated when a P is available, contention on 849 // this lock should be minimal. 850 noPLock mutex 851 } 852 853 // acquire returns a heapStatsDelta to be updated. In effect, 854 // it acquires the shard for writing. release must be called 855 // as soon as the relevant deltas are updated. 856 // 857 // The returned heapStatsDelta must be updated atomically. 858 // 859 // The caller's P must not change between acquire and 860 // release. This also means that the caller should not 861 // acquire a P or release its P in between. 862 func (m *consistentHeapStats) acquire() *heapStatsDelta { 863 if pp := getg().m.p.ptr(); pp != nil { 864 seq := atomic.Xadd(&pp.statsSeq, 1) 865 if seq%2 == 0 { 866 // Should have been incremented to odd. 867 print("runtime: seq=", seq, "\n") 868 throw("bad sequence number") 869 } 870 } else { 871 lock(&m.noPLock) 872 } 873 gen := atomic.Load(&m.gen) % 3 874 return &m.stats[gen] 875 } 876 877 // release indicates that the writer is done modifying 878 // the delta. The value returned by the corresponding 879 // acquire must no longer be accessed or modified after 880 // release is called. 881 // 882 // The caller's P must not change between acquire and 883 // release. This also means that the caller should not 884 // acquire a P or release its P in between. 885 func (m *consistentHeapStats) release() { 886 if pp := getg().m.p.ptr(); pp != nil { 887 seq := atomic.Xadd(&pp.statsSeq, 1) 888 if seq%2 != 0 { 889 // Should have been incremented to even. 890 print("runtime: seq=", seq, "\n") 891 throw("bad sequence number") 892 } 893 } else { 894 unlock(&m.noPLock) 895 } 896 } 897 898 // unsafeRead aggregates the delta for this shard into out. 899 // 900 // Unsafe because it does so without any synchronization. The 901 // world must be stopped. 902 func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { 903 assertWorldStopped() 904 905 for i := range m.stats { 906 out.merge(&m.stats[i]) 907 } 908 } 909 910 // unsafeClear clears the shard. 911 // 912 // Unsafe because the world must be stopped and values should 913 // be donated elsewhere before clearing. 914 func (m *consistentHeapStats) unsafeClear() { 915 assertWorldStopped() 916 917 for i := range m.stats { 918 m.stats[i] = heapStatsDelta{} 919 } 920 } 921 922 // read takes a globally consistent snapshot of m 923 // and puts the aggregated value in out. Even though out is a 924 // heapStatsDelta, the resulting values should be complete and 925 // valid statistic values. 926 // 927 // Not safe to call concurrently. The world must be stopped 928 // or metricsSema must be held. 929 func (m *consistentHeapStats) read(out *heapStatsDelta) { 930 // Getting preempted after this point is not safe because 931 // we read allp. We need to make sure a STW can't happen 932 // so it doesn't change out from under us. 933 mp := acquirem() 934 935 // Get the current generation. We can be confident that this 936 // will not change since read is serialized and is the only 937 // one that modifies currGen. 938 currGen := atomic.Load(&m.gen) 939 prevGen := currGen - 1 940 if currGen == 0 { 941 prevGen = 2 942 } 943 944 // Prevent writers without a P from writing while we update gen. 945 lock(&m.noPLock) 946 947 // Rotate gen, effectively taking a snapshot of the state of 948 // these statistics at the point of the exchange by moving 949 // writers to the next set of deltas. 950 // 951 // This exchange is safe to do because we won't race 952 // with anyone else trying to update this value. 953 atomic.Xchg(&m.gen, (currGen+1)%3) 954 955 // Allow P-less writers to continue. They'll be writing to the 956 // next generation now. 957 unlock(&m.noPLock) 958 959 for _, p := range allp { 960 // Spin until there are no more writers. 961 for atomic.Load(&p.statsSeq)%2 != 0 { 962 } 963 } 964 965 // At this point we've observed that each sequence 966 // number is even, so any future writers will observe 967 // the new gen value. That means it's safe to read from 968 // the other deltas in the stats buffer. 969 970 // Perform our responsibilities and free up 971 // stats[prevGen] for the next time we want to take 972 // a snapshot. 973 m.stats[currGen].merge(&m.stats[prevGen]) 974 m.stats[prevGen] = heapStatsDelta{} 975 976 // Finally, copy out the complete delta. 977 *out = m.stats[currGen] 978 979 releasem(mp) 980 }