github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/mprof.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Malloc profiling. 6 // Patterned after tcmalloc's algorithms; shorter code. 7 8 package runtime 9 10 import ( 11 "internal/abi" 12 "runtime/internal/atomic" 13 "runtime/internal/sys" 14 "unsafe" 15 ) 16 17 // NOTE(rsc): Everything here could use cas if contention became an issue. 18 var ( 19 // profInsertLock protects changes to the start of all *bucket linked lists 20 profInsertLock mutex 21 // profBlockLock protects the contents of every blockRecord struct 22 profBlockLock mutex 23 // profMemActiveLock protects the active field of every memRecord struct 24 profMemActiveLock mutex 25 // profMemFutureLock is a set of locks that protect the respective elements 26 // of the future array of every memRecord struct 27 profMemFutureLock [len(memRecord{}.future)]mutex 28 ) 29 30 // All memory allocations are local and do not escape outside of the profiler. 31 // The profiler is forbidden from referring to garbage-collected memory. 32 33 const ( 34 // profile types 35 memProfile bucketType = 1 + iota 36 blockProfile 37 mutexProfile 38 39 // size of bucket hash table 40 buckHashSize = 179999 41 42 // max depth of stack to record in bucket 43 maxStack = 32 44 ) 45 46 type bucketType int 47 48 // A bucket holds per-call-stack profiling information. 49 // The representation is a bit sleazy, inherited from C. 50 // This struct defines the bucket header. It is followed in 51 // memory by the stack words and then the actual record 52 // data, either a memRecord or a blockRecord. 53 // 54 // Per-call-stack profiling information. 55 // Lookup by hashing call stack into a linked-list hash table. 56 // 57 // None of the fields in this bucket header are modified after 58 // creation, including its next and allnext links. 59 // 60 // No heap pointers. 61 type bucket struct { 62 _ sys.NotInHeap 63 next *bucket 64 allnext *bucket 65 typ bucketType // memBucket or blockBucket (includes mutexProfile) 66 hash uintptr 67 size uintptr 68 nstk uintptr 69 } 70 71 // A memRecord is the bucket data for a bucket of type memProfile, 72 // part of the memory profile. 73 type memRecord struct { 74 // The following complex 3-stage scheme of stats accumulation 75 // is required to obtain a consistent picture of mallocs and frees 76 // for some point in time. 77 // The problem is that mallocs come in real time, while frees 78 // come only after a GC during concurrent sweeping. So if we would 79 // naively count them, we would get a skew toward mallocs. 80 // 81 // Hence, we delay information to get consistent snapshots as 82 // of mark termination. Allocations count toward the next mark 83 // termination's snapshot, while sweep frees count toward the 84 // previous mark termination's snapshot: 85 // 86 // MT MT MT MT 87 // .·| .·| .·| .·| 88 // .·˙ | .·˙ | .·˙ | .·˙ | 89 // .·˙ | .·˙ | .·˙ | .·˙ | 90 // .·˙ |.·˙ |.·˙ |.·˙ | 91 // 92 // alloc → ▲ ← free 93 // ┠┅┅┅┅┅┅┅┅┅┅┅P 94 // C+2 → C+1 → C 95 // 96 // alloc → ▲ ← free 97 // ┠┅┅┅┅┅┅┅┅┅┅┅P 98 // C+2 → C+1 → C 99 // 100 // Since we can't publish a consistent snapshot until all of 101 // the sweep frees are accounted for, we wait until the next 102 // mark termination ("MT" above) to publish the previous mark 103 // termination's snapshot ("P" above). To do this, allocation 104 // and free events are accounted to *future* heap profile 105 // cycles ("C+n" above) and we only publish a cycle once all 106 // of the events from that cycle must be done. Specifically: 107 // 108 // Mallocs are accounted to cycle C+2. 109 // Explicit frees are accounted to cycle C+2. 110 // GC frees (done during sweeping) are accounted to cycle C+1. 111 // 112 // After mark termination, we increment the global heap 113 // profile cycle counter and accumulate the stats from cycle C 114 // into the active profile. 115 116 // active is the currently published profile. A profiling 117 // cycle can be accumulated into active once its complete. 118 active memRecordCycle 119 120 // future records the profile events we're counting for cycles 121 // that have not yet been published. This is ring buffer 122 // indexed by the global heap profile cycle C and stores 123 // cycles C, C+1, and C+2. Unlike active, these counts are 124 // only for a single cycle; they are not cumulative across 125 // cycles. 126 // 127 // We store cycle C here because there's a window between when 128 // C becomes the active cycle and when we've flushed it to 129 // active. 130 future [3]memRecordCycle 131 } 132 133 // memRecordCycle 134 type memRecordCycle struct { 135 allocs, frees uintptr 136 alloc_bytes, free_bytes uintptr 137 } 138 139 // add accumulates b into a. It does not zero b. 140 func (a *memRecordCycle) add(b *memRecordCycle) { 141 a.allocs += b.allocs 142 a.frees += b.frees 143 a.alloc_bytes += b.alloc_bytes 144 a.free_bytes += b.free_bytes 145 } 146 147 // A blockRecord is the bucket data for a bucket of type blockProfile, 148 // which is used in blocking and mutex profiles. 149 type blockRecord struct { 150 count float64 151 cycles int64 152 } 153 154 var ( 155 mbuckets atomic.UnsafePointer // *bucket, memory profile buckets 156 bbuckets atomic.UnsafePointer // *bucket, blocking profile buckets 157 xbuckets atomic.UnsafePointer // *bucket, mutex profile buckets 158 buckhash atomic.UnsafePointer // *buckhashArray 159 160 mProfCycle mProfCycleHolder 161 ) 162 163 type buckhashArray [buckHashSize]atomic.UnsafePointer // *bucket 164 165 const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24) 166 167 // mProfCycleHolder holds the global heap profile cycle number (wrapped at 168 // mProfCycleWrap, stored starting at bit 1), and a flag (stored at bit 0) to 169 // indicate whether future[cycle] in all buckets has been queued to flush into 170 // the active profile. 171 type mProfCycleHolder struct { 172 value atomic.Uint32 173 } 174 175 // read returns the current cycle count. 176 func (c *mProfCycleHolder) read() (cycle uint32) { 177 v := c.value.Load() 178 cycle = v >> 1 179 return cycle 180 } 181 182 // setFlushed sets the flushed flag. It returns the current cycle count and the 183 // previous value of the flushed flag. 184 func (c *mProfCycleHolder) setFlushed() (cycle uint32, alreadyFlushed bool) { 185 for { 186 prev := c.value.Load() 187 cycle = prev >> 1 188 alreadyFlushed = (prev & 0x1) != 0 189 next := prev | 0x1 190 if c.value.CompareAndSwap(prev, next) { 191 return cycle, alreadyFlushed 192 } 193 } 194 } 195 196 // increment increases the cycle count by one, wrapping the value at 197 // mProfCycleWrap. It clears the flushed flag. 198 func (c *mProfCycleHolder) increment() { 199 // We explicitly wrap mProfCycle rather than depending on 200 // uint wraparound because the memRecord.future ring does not 201 // itself wrap at a power of two. 202 for { 203 prev := c.value.Load() 204 cycle := prev >> 1 205 cycle = (cycle + 1) % mProfCycleWrap 206 next := cycle << 1 207 if c.value.CompareAndSwap(prev, next) { 208 break 209 } 210 } 211 } 212 213 // newBucket allocates a bucket with the given type and number of stack entries. 214 func newBucket(typ bucketType, nstk int) *bucket { 215 size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr(0)) 216 switch typ { 217 default: 218 throw("invalid profile bucket type") 219 case memProfile: 220 size += unsafe.Sizeof(memRecord{}) 221 case blockProfile, mutexProfile: 222 size += unsafe.Sizeof(blockRecord{}) 223 } 224 225 b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys)) 226 b.typ = typ 227 b.nstk = uintptr(nstk) 228 return b 229 } 230 231 // stk returns the slice in b holding the stack. 232 func (b *bucket) stk() []uintptr { 233 stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) 234 return stk[:b.nstk:b.nstk] 235 } 236 237 // mp returns the memRecord associated with the memProfile bucket b. 238 func (b *bucket) mp() *memRecord { 239 if b.typ != memProfile { 240 throw("bad use of bucket.mp") 241 } 242 data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0))) 243 return (*memRecord)(data) 244 } 245 246 // bp returns the blockRecord associated with the blockProfile bucket b. 247 func (b *bucket) bp() *blockRecord { 248 if b.typ != blockProfile && b.typ != mutexProfile { 249 throw("bad use of bucket.bp") 250 } 251 data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0))) 252 return (*blockRecord)(data) 253 } 254 255 // Return the bucket for stk[0:nstk], allocating new bucket if needed. 256 func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket { 257 bh := (*buckhashArray)(buckhash.Load()) 258 if bh == nil { 259 lock(&profInsertLock) 260 // check again under the lock 261 bh = (*buckhashArray)(buckhash.Load()) 262 if bh == nil { 263 bh = (*buckhashArray)(sysAlloc(unsafe.Sizeof(buckhashArray{}), &memstats.buckhash_sys)) 264 if bh == nil { 265 throw("runtime: cannot allocate memory") 266 } 267 buckhash.StoreNoWB(unsafe.Pointer(bh)) 268 } 269 unlock(&profInsertLock) 270 } 271 272 // Hash stack. 273 var h uintptr 274 for _, pc := range stk { 275 h += pc 276 h += h << 10 277 h ^= h >> 6 278 } 279 // hash in size 280 h += size 281 h += h << 10 282 h ^= h >> 6 283 // finalize 284 h += h << 3 285 h ^= h >> 11 286 287 i := int(h % buckHashSize) 288 // first check optimistically, without the lock 289 for b := (*bucket)(bh[i].Load()); b != nil; b = b.next { 290 if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) { 291 return b 292 } 293 } 294 295 if !alloc { 296 return nil 297 } 298 299 lock(&profInsertLock) 300 // check again under the insertion lock 301 for b := (*bucket)(bh[i].Load()); b != nil; b = b.next { 302 if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) { 303 unlock(&profInsertLock) 304 return b 305 } 306 } 307 308 // Create new bucket. 309 b := newBucket(typ, len(stk)) 310 copy(b.stk(), stk) 311 b.hash = h 312 b.size = size 313 314 var allnext *atomic.UnsafePointer 315 if typ == memProfile { 316 allnext = &mbuckets 317 } else if typ == mutexProfile { 318 allnext = &xbuckets 319 } else { 320 allnext = &bbuckets 321 } 322 323 b.next = (*bucket)(bh[i].Load()) 324 b.allnext = (*bucket)(allnext.Load()) 325 326 bh[i].StoreNoWB(unsafe.Pointer(b)) 327 allnext.StoreNoWB(unsafe.Pointer(b)) 328 329 unlock(&profInsertLock) 330 return b 331 } 332 333 func eqslice(x, y []uintptr) bool { 334 if len(x) != len(y) { 335 return false 336 } 337 for i, xi := range x { 338 if xi != y[i] { 339 return false 340 } 341 } 342 return true 343 } 344 345 // mProf_NextCycle publishes the next heap profile cycle and creates a 346 // fresh heap profile cycle. This operation is fast and can be done 347 // during STW. The caller must call mProf_Flush before calling 348 // mProf_NextCycle again. 349 // 350 // This is called by mark termination during STW so allocations and 351 // frees after the world is started again count towards a new heap 352 // profiling cycle. 353 func mProf_NextCycle() { 354 mProfCycle.increment() 355 } 356 357 // mProf_Flush flushes the events from the current heap profiling 358 // cycle into the active profile. After this it is safe to start a new 359 // heap profiling cycle with mProf_NextCycle. 360 // 361 // This is called by GC after mark termination starts the world. In 362 // contrast with mProf_NextCycle, this is somewhat expensive, but safe 363 // to do concurrently. 364 func mProf_Flush() { 365 cycle, alreadyFlushed := mProfCycle.setFlushed() 366 if alreadyFlushed { 367 return 368 } 369 370 index := cycle % uint32(len(memRecord{}.future)) 371 lock(&profMemActiveLock) 372 lock(&profMemFutureLock[index]) 373 mProf_FlushLocked(index) 374 unlock(&profMemFutureLock[index]) 375 unlock(&profMemActiveLock) 376 } 377 378 // mProf_FlushLocked flushes the events from the heap profiling cycle at index 379 // into the active profile. The caller must hold the lock for the active profile 380 // (profMemActiveLock) and for the profiling cycle at index 381 // (profMemFutureLock[index]). 382 func mProf_FlushLocked(index uint32) { 383 assertLockHeld(&profMemActiveLock) 384 assertLockHeld(&profMemFutureLock[index]) 385 head := (*bucket)(mbuckets.Load()) 386 for b := head; b != nil; b = b.allnext { 387 mp := b.mp() 388 389 // Flush cycle C into the published profile and clear 390 // it for reuse. 391 mpc := &mp.future[index] 392 mp.active.add(mpc) 393 *mpc = memRecordCycle{} 394 } 395 } 396 397 // mProf_PostSweep records that all sweep frees for this GC cycle have 398 // completed. This has the effect of publishing the heap profile 399 // snapshot as of the last mark termination without advancing the heap 400 // profile cycle. 401 func mProf_PostSweep() { 402 // Flush cycle C+1 to the active profile so everything as of 403 // the last mark termination becomes visible. *Don't* advance 404 // the cycle, since we're still accumulating allocs in cycle 405 // C+2, which have to become C+1 in the next mark termination 406 // and so on. 407 cycle := mProfCycle.read() + 1 408 409 index := cycle % uint32(len(memRecord{}.future)) 410 lock(&profMemActiveLock) 411 lock(&profMemFutureLock[index]) 412 mProf_FlushLocked(index) 413 unlock(&profMemFutureLock[index]) 414 unlock(&profMemActiveLock) 415 } 416 417 // Called by malloc to record a profiled block. 418 func mProf_Malloc(p unsafe.Pointer, size uintptr) { 419 var stk [maxStack]uintptr 420 nstk := callers(4, stk[:]) 421 422 index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) 423 424 b := stkbucket(memProfile, size, stk[:nstk], true) 425 mp := b.mp() 426 mpc := &mp.future[index] 427 428 lock(&profMemFutureLock[index]) 429 mpc.allocs++ 430 mpc.alloc_bytes += size 431 unlock(&profMemFutureLock[index]) 432 433 // Setprofilebucket locks a bunch of other mutexes, so we call it outside of 434 // the profiler locks. This reduces potential contention and chances of 435 // deadlocks. Since the object must be alive during the call to 436 // mProf_Malloc, it's fine to do this non-atomically. 437 systemstack(func() { 438 setprofilebucket(p, b) 439 }) 440 } 441 442 // Called when freeing a profiled block. 443 func mProf_Free(b *bucket, size uintptr) { 444 index := (mProfCycle.read() + 1) % uint32(len(memRecord{}.future)) 445 446 mp := b.mp() 447 mpc := &mp.future[index] 448 449 lock(&profMemFutureLock[index]) 450 mpc.frees++ 451 mpc.free_bytes += size 452 unlock(&profMemFutureLock[index]) 453 } 454 455 var blockprofilerate uint64 // in CPU ticks 456 457 // SetBlockProfileRate controls the fraction of goroutine blocking events 458 // that are reported in the blocking profile. The profiler aims to sample 459 // an average of one blocking event per rate nanoseconds spent blocked. 460 // 461 // To include every blocking event in the profile, pass rate = 1. 462 // To turn off profiling entirely, pass rate <= 0. 463 func SetBlockProfileRate(rate int) { 464 var r int64 465 if rate <= 0 { 466 r = 0 // disable profiling 467 } else if rate == 1 { 468 r = 1 // profile everything 469 } else { 470 // convert ns to cycles, use float64 to prevent overflow during multiplication 471 r = int64(float64(rate) * float64(tickspersecond()) / (1000 * 1000 * 1000)) 472 if r == 0 { 473 r = 1 474 } 475 } 476 477 atomic.Store64(&blockprofilerate, uint64(r)) 478 } 479 480 func blockevent(cycles int64, skip int) { 481 if cycles <= 0 { 482 cycles = 1 483 } 484 485 rate := int64(atomic.Load64(&blockprofilerate)) 486 if blocksampled(cycles, rate) { 487 saveblockevent(cycles, rate, skip+1, blockProfile) 488 } 489 } 490 491 // blocksampled returns true for all events where cycles >= rate. Shorter 492 // events have a cycles/rate random chance of returning true. 493 func blocksampled(cycles, rate int64) bool { 494 if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) { 495 return false 496 } 497 return true 498 } 499 500 func saveblockevent(cycles, rate int64, skip int, which bucketType) { 501 gp := getg() 502 var nstk int 503 var stk [maxStack]uintptr 504 if gp.m.curg == nil || gp.m.curg == gp { 505 nstk = callers(skip, stk[:]) 506 } else { 507 nstk = gcallers(gp.m.curg, skip, stk[:]) 508 } 509 b := stkbucket(which, 0, stk[:nstk], true) 510 bp := b.bp() 511 512 lock(&profBlockLock) 513 // We want to up-scale the count and cycles according to the 514 // probability that the event was sampled. For block profile events, 515 // the sample probability is 1 if cycles >= rate, and cycles / rate 516 // otherwise. For mutex profile events, the sample probability is 1 / rate. 517 // We scale the events by 1 / (probability the event was sampled). 518 if which == blockProfile && cycles < rate { 519 // Remove sampling bias, see discussion on http://golang.org/cl/299991. 520 bp.count += float64(rate) / float64(cycles) 521 bp.cycles += rate 522 } else if which == mutexProfile { 523 bp.count += float64(rate) 524 bp.cycles += rate * cycles 525 } else { 526 bp.count++ 527 bp.cycles += cycles 528 } 529 unlock(&profBlockLock) 530 } 531 532 var mutexprofilerate uint64 // fraction sampled 533 534 // SetMutexProfileFraction controls the fraction of mutex contention events 535 // that are reported in the mutex profile. On average 1/rate events are 536 // reported. The previous rate is returned. 537 // 538 // To turn off profiling entirely, pass rate 0. 539 // To just read the current rate, pass rate < 0. 540 // (For n>1 the details of sampling may change.) 541 func SetMutexProfileFraction(rate int) int { 542 if rate < 0 { 543 return int(mutexprofilerate) 544 } 545 old := mutexprofilerate 546 atomic.Store64(&mutexprofilerate, uint64(rate)) 547 return int(old) 548 } 549 550 //go:linkname mutexevent sync.event 551 func mutexevent(cycles int64, skip int) { 552 if cycles < 0 { 553 cycles = 0 554 } 555 rate := int64(atomic.Load64(&mutexprofilerate)) 556 // TODO(pjw): measure impact of always calling fastrand vs using something 557 // like malloc.go:nextSample() 558 if rate > 0 && int64(fastrand())%rate == 0 { 559 saveblockevent(cycles, rate, skip+1, mutexProfile) 560 } 561 } 562 563 // Go interface to profile data. 564 565 // A StackRecord describes a single execution stack. 566 type StackRecord struct { 567 Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry 568 } 569 570 // Stack returns the stack trace associated with the record, 571 // a prefix of r.Stack0. 572 func (r *StackRecord) Stack() []uintptr { 573 for i, v := range r.Stack0 { 574 if v == 0 { 575 return r.Stack0[0:i] 576 } 577 } 578 return r.Stack0[0:] 579 } 580 581 // MemProfileRate controls the fraction of memory allocations 582 // that are recorded and reported in the memory profile. 583 // The profiler aims to sample an average of 584 // one allocation per MemProfileRate bytes allocated. 585 // 586 // To include every allocated block in the profile, set MemProfileRate to 1. 587 // To turn off profiling entirely, set MemProfileRate to 0. 588 // 589 // The tools that process the memory profiles assume that the 590 // profile rate is constant across the lifetime of the program 591 // and equal to the current value. Programs that change the 592 // memory profiling rate should do so just once, as early as 593 // possible in the execution of the program (for example, 594 // at the beginning of main). 595 var MemProfileRate int = 512 * 1024 596 597 // disableMemoryProfiling is set by the linker if runtime.MemProfile 598 // is not used and the link type guarantees nobody else could use it 599 // elsewhere. 600 var disableMemoryProfiling bool 601 602 // A MemProfileRecord describes the live objects allocated 603 // by a particular call sequence (stack trace). 604 type MemProfileRecord struct { 605 AllocBytes, FreeBytes int64 // number of bytes allocated, freed 606 AllocObjects, FreeObjects int64 // number of objects allocated, freed 607 Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry 608 } 609 610 // InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes). 611 func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes } 612 613 // InUseObjects returns the number of objects in use (AllocObjects - FreeObjects). 614 func (r *MemProfileRecord) InUseObjects() int64 { 615 return r.AllocObjects - r.FreeObjects 616 } 617 618 // Stack returns the stack trace associated with the record, 619 // a prefix of r.Stack0. 620 func (r *MemProfileRecord) Stack() []uintptr { 621 for i, v := range r.Stack0 { 622 if v == 0 { 623 return r.Stack0[0:i] 624 } 625 } 626 return r.Stack0[0:] 627 } 628 629 // MemProfile returns a profile of memory allocated and freed per allocation 630 // site. 631 // 632 // MemProfile returns n, the number of records in the current memory profile. 633 // If len(p) >= n, MemProfile copies the profile into p and returns n, true. 634 // If len(p) < n, MemProfile does not change p and returns n, false. 635 // 636 // If inuseZero is true, the profile includes allocation records 637 // where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes. 638 // These are sites where memory was allocated, but it has all 639 // been released back to the runtime. 640 // 641 // The returned profile may be up to two garbage collection cycles old. 642 // This is to avoid skewing the profile toward allocations; because 643 // allocations happen in real time but frees are delayed until the garbage 644 // collector performs sweeping, the profile only accounts for allocations 645 // that have had a chance to be freed by the garbage collector. 646 // 647 // Most clients should use the runtime/pprof package or 648 // the testing package's -test.memprofile flag instead 649 // of calling MemProfile directly. 650 func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { 651 cycle := mProfCycle.read() 652 // If we're between mProf_NextCycle and mProf_Flush, take care 653 // of flushing to the active profile so we only have to look 654 // at the active profile below. 655 index := cycle % uint32(len(memRecord{}.future)) 656 lock(&profMemActiveLock) 657 lock(&profMemFutureLock[index]) 658 mProf_FlushLocked(index) 659 unlock(&profMemFutureLock[index]) 660 clear := true 661 head := (*bucket)(mbuckets.Load()) 662 for b := head; b != nil; b = b.allnext { 663 mp := b.mp() 664 if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { 665 n++ 666 } 667 if mp.active.allocs != 0 || mp.active.frees != 0 { 668 clear = false 669 } 670 } 671 if clear { 672 // Absolutely no data, suggesting that a garbage collection 673 // has not yet happened. In order to allow profiling when 674 // garbage collection is disabled from the beginning of execution, 675 // accumulate all of the cycles, and recount buckets. 676 n = 0 677 for b := head; b != nil; b = b.allnext { 678 mp := b.mp() 679 for c := range mp.future { 680 lock(&profMemFutureLock[c]) 681 mp.active.add(&mp.future[c]) 682 mp.future[c] = memRecordCycle{} 683 unlock(&profMemFutureLock[c]) 684 } 685 if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { 686 n++ 687 } 688 } 689 } 690 if n <= len(p) { 691 ok = true 692 idx := 0 693 for b := head; b != nil; b = b.allnext { 694 mp := b.mp() 695 if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { 696 record(&p[idx], b) 697 idx++ 698 } 699 } 700 } 701 unlock(&profMemActiveLock) 702 return 703 } 704 705 // Write b's data to r. 706 func record(r *MemProfileRecord, b *bucket) { 707 mp := b.mp() 708 r.AllocBytes = int64(mp.active.alloc_bytes) 709 r.FreeBytes = int64(mp.active.free_bytes) 710 r.AllocObjects = int64(mp.active.allocs) 711 r.FreeObjects = int64(mp.active.frees) 712 if raceenabled { 713 racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile)) 714 } 715 if msanenabled { 716 msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) 717 } 718 if asanenabled { 719 asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) 720 } 721 copy(r.Stack0[:], b.stk()) 722 for i := int(b.nstk); i < len(r.Stack0); i++ { 723 r.Stack0[i] = 0 724 } 725 } 726 727 func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) { 728 lock(&profMemActiveLock) 729 head := (*bucket)(mbuckets.Load()) 730 for b := head; b != nil; b = b.allnext { 731 mp := b.mp() 732 fn(b, b.nstk, &b.stk()[0], b.size, mp.active.allocs, mp.active.frees) 733 } 734 unlock(&profMemActiveLock) 735 } 736 737 // BlockProfileRecord describes blocking events originated 738 // at a particular call sequence (stack trace). 739 type BlockProfileRecord struct { 740 Count int64 741 Cycles int64 742 StackRecord 743 } 744 745 // BlockProfile returns n, the number of records in the current blocking profile. 746 // If len(p) >= n, BlockProfile copies the profile into p and returns n, true. 747 // If len(p) < n, BlockProfile does not change p and returns n, false. 748 // 749 // Most clients should use the runtime/pprof package or 750 // the testing package's -test.blockprofile flag instead 751 // of calling BlockProfile directly. 752 func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { 753 lock(&profBlockLock) 754 head := (*bucket)(bbuckets.Load()) 755 for b := head; b != nil; b = b.allnext { 756 n++ 757 } 758 if n <= len(p) { 759 ok = true 760 for b := head; b != nil; b = b.allnext { 761 bp := b.bp() 762 r := &p[0] 763 r.Count = int64(bp.count) 764 // Prevent callers from having to worry about division by zero errors. 765 // See discussion on http://golang.org/cl/299991. 766 if r.Count == 0 { 767 r.Count = 1 768 } 769 r.Cycles = bp.cycles 770 if raceenabled { 771 racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile)) 772 } 773 if msanenabled { 774 msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) 775 } 776 if asanenabled { 777 asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) 778 } 779 i := copy(r.Stack0[:], b.stk()) 780 for ; i < len(r.Stack0); i++ { 781 r.Stack0[i] = 0 782 } 783 p = p[1:] 784 } 785 } 786 unlock(&profBlockLock) 787 return 788 } 789 790 // MutexProfile returns n, the number of records in the current mutex profile. 791 // If len(p) >= n, MutexProfile copies the profile into p and returns n, true. 792 // Otherwise, MutexProfile does not change p, and returns n, false. 793 // 794 // Most clients should use the runtime/pprof package 795 // instead of calling MutexProfile directly. 796 func MutexProfile(p []BlockProfileRecord) (n int, ok bool) { 797 lock(&profBlockLock) 798 head := (*bucket)(xbuckets.Load()) 799 for b := head; b != nil; b = b.allnext { 800 n++ 801 } 802 if n <= len(p) { 803 ok = true 804 for b := head; b != nil; b = b.allnext { 805 bp := b.bp() 806 r := &p[0] 807 r.Count = int64(bp.count) 808 r.Cycles = bp.cycles 809 i := copy(r.Stack0[:], b.stk()) 810 for ; i < len(r.Stack0); i++ { 811 r.Stack0[i] = 0 812 } 813 p = p[1:] 814 } 815 } 816 unlock(&profBlockLock) 817 return 818 } 819 820 // ThreadCreateProfile returns n, the number of records in the thread creation profile. 821 // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true. 822 // If len(p) < n, ThreadCreateProfile does not change p and returns n, false. 823 // 824 // Most clients should use the runtime/pprof package instead 825 // of calling ThreadCreateProfile directly. 826 func ThreadCreateProfile(p []StackRecord) (n int, ok bool) { 827 first := (*m)(atomic.Loadp(unsafe.Pointer(&allm))) 828 for mp := first; mp != nil; mp = mp.alllink { 829 n++ 830 } 831 if n <= len(p) { 832 ok = true 833 i := 0 834 for mp := first; mp != nil; mp = mp.alllink { 835 p[i].Stack0 = mp.createstack 836 i++ 837 } 838 } 839 return 840 } 841 842 //go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels 843 func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { 844 return goroutineProfileWithLabels(p, labels) 845 } 846 847 // labels may be nil. If labels is non-nil, it must have the same length as p. 848 func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { 849 if labels != nil && len(labels) != len(p) { 850 labels = nil 851 } 852 853 return goroutineProfileWithLabelsConcurrent(p, labels) 854 } 855 856 var goroutineProfile = struct { 857 sema uint32 858 active bool 859 offset atomic.Int64 860 records []StackRecord 861 labels []unsafe.Pointer 862 }{ 863 sema: 1, 864 } 865 866 // goroutineProfileState indicates the status of a goroutine's stack for the 867 // current in-progress goroutine profile. Goroutines' stacks are initially 868 // "Absent" from the profile, and end up "Satisfied" by the time the profile is 869 // complete. While a goroutine's stack is being captured, its 870 // goroutineProfileState will be "InProgress" and it will not be able to run 871 // until the capture completes and the state moves to "Satisfied". 872 // 873 // Some goroutines (the finalizer goroutine, which at various times can be 874 // either a "system" or a "user" goroutine, and the goroutine that is 875 // coordinating the profile, any goroutines created during the profile) move 876 // directly to the "Satisfied" state. 877 type goroutineProfileState uint32 878 879 const ( 880 goroutineProfileAbsent goroutineProfileState = iota 881 goroutineProfileInProgress 882 goroutineProfileSatisfied 883 ) 884 885 type goroutineProfileStateHolder atomic.Uint32 886 887 func (p *goroutineProfileStateHolder) Load() goroutineProfileState { 888 return goroutineProfileState((*atomic.Uint32)(p).Load()) 889 } 890 891 func (p *goroutineProfileStateHolder) Store(value goroutineProfileState) { 892 (*atomic.Uint32)(p).Store(uint32(value)) 893 } 894 895 func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileState) bool { 896 return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new)) 897 } 898 899 func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { 900 semacquire(&goroutineProfile.sema) 901 902 ourg := getg() 903 904 stopTheWorld("profile") 905 // Using gcount while the world is stopped should give us a consistent view 906 // of the number of live goroutines, minus the number of goroutines that are 907 // alive and permanently marked as "system". But to make this count agree 908 // with what we'd get from isSystemGoroutine, we need special handling for 909 // goroutines that can vary between user and system to ensure that the count 910 // doesn't change during the collection. So, check the finalizer goroutine 911 // in particular. 912 n = int(gcount()) 913 if fingStatus.Load()&fingRunningFinalizer != 0 { 914 n++ 915 } 916 917 if n > len(p) { 918 // There's not enough space in p to store the whole profile, so (per the 919 // contract of runtime.GoroutineProfile) we're not allowed to write to p 920 // at all and must return n, false. 921 startTheWorld() 922 semrelease(&goroutineProfile.sema) 923 return n, false 924 } 925 926 // Save current goroutine. 927 sp := getcallersp() 928 pc := getcallerpc() 929 systemstack(func() { 930 saveg(pc, sp, ourg, &p[0]) 931 }) 932 ourg.goroutineProfiled.Store(goroutineProfileSatisfied) 933 goroutineProfile.offset.Store(1) 934 935 // Prepare for all other goroutines to enter the profile. Aside from ourg, 936 // every goroutine struct in the allgs list has its goroutineProfiled field 937 // cleared. Any goroutine created from this point on (while 938 // goroutineProfile.active is set) will start with its goroutineProfiled 939 // field set to goroutineProfileSatisfied. 940 goroutineProfile.active = true 941 goroutineProfile.records = p 942 goroutineProfile.labels = labels 943 // The finalizer goroutine needs special handling because it can vary over 944 // time between being a user goroutine (eligible for this profile) and a 945 // system goroutine (to be excluded). Pick one before restarting the world. 946 if fing != nil { 947 fing.goroutineProfiled.Store(goroutineProfileSatisfied) 948 if readgstatus(fing) != _Gdead && !isSystemGoroutine(fing, false) { 949 doRecordGoroutineProfile(fing) 950 } 951 } 952 startTheWorld() 953 954 // Visit each goroutine that existed as of the startTheWorld call above. 955 // 956 // New goroutines may not be in this list, but we didn't want to know about 957 // them anyway. If they do appear in this list (via reusing a dead goroutine 958 // struct, or racing to launch between the world restarting and us getting 959 // the list), they will already have their goroutineProfiled field set to 960 // goroutineProfileSatisfied before their state transitions out of _Gdead. 961 // 962 // Any goroutine that the scheduler tries to execute concurrently with this 963 // call will start by adding itself to the profile (before the act of 964 // executing can cause any changes in its stack). 965 forEachGRace(func(gp1 *g) { 966 tryRecordGoroutineProfile(gp1, Gosched) 967 }) 968 969 stopTheWorld("profile cleanup") 970 endOffset := goroutineProfile.offset.Swap(0) 971 goroutineProfile.active = false 972 goroutineProfile.records = nil 973 goroutineProfile.labels = nil 974 startTheWorld() 975 976 // Restore the invariant that every goroutine struct in allgs has its 977 // goroutineProfiled field cleared. 978 forEachGRace(func(gp1 *g) { 979 gp1.goroutineProfiled.Store(goroutineProfileAbsent) 980 }) 981 982 if raceenabled { 983 raceacquire(unsafe.Pointer(&labelSync)) 984 } 985 986 if n != int(endOffset) { 987 // It's a big surprise that the number of goroutines changed while we 988 // were collecting the profile. But probably better to return a 989 // truncated profile than to crash the whole process. 990 // 991 // For instance, needm moves a goroutine out of the _Gdead state and so 992 // might be able to change the goroutine count without interacting with 993 // the scheduler. For code like that, the race windows are small and the 994 // combination of features is uncommon, so it's hard to be (and remain) 995 // sure we've caught them all. 996 } 997 998 semrelease(&goroutineProfile.sema) 999 return n, true 1000 } 1001 1002 // tryRecordGoroutineProfileWB asserts that write barriers are allowed and calls 1003 // tryRecordGoroutineProfile. 1004 // 1005 //go:yeswritebarrierrec 1006 func tryRecordGoroutineProfileWB(gp1 *g) { 1007 if getg().m.p.ptr() == nil { 1008 throw("no P available, write barriers are forbidden") 1009 } 1010 tryRecordGoroutineProfile(gp1, osyield) 1011 } 1012 1013 // tryRecordGoroutineProfile ensures that gp1 has the appropriate representation 1014 // in the current goroutine profile: either that it should not be profiled, or 1015 // that a snapshot of its call stack and labels are now in the profile. 1016 func tryRecordGoroutineProfile(gp1 *g, yield func()) { 1017 if readgstatus(gp1) == _Gdead { 1018 // Dead goroutines should not appear in the profile. Goroutines that 1019 // start while profile collection is active will get goroutineProfiled 1020 // set to goroutineProfileSatisfied before transitioning out of _Gdead, 1021 // so here we check _Gdead first. 1022 return 1023 } 1024 if isSystemGoroutine(gp1, true) { 1025 // System goroutines should not appear in the profile. (The finalizer 1026 // goroutine is marked as "already profiled".) 1027 return 1028 } 1029 1030 for { 1031 prev := gp1.goroutineProfiled.Load() 1032 if prev == goroutineProfileSatisfied { 1033 // This goroutine is already in the profile (or is new since the 1034 // start of collection, so shouldn't appear in the profile). 1035 break 1036 } 1037 if prev == goroutineProfileInProgress { 1038 // Something else is adding gp1 to the goroutine profile right now. 1039 // Give that a moment to finish. 1040 yield() 1041 continue 1042 } 1043 1044 // While we have gp1.goroutineProfiled set to 1045 // goroutineProfileInProgress, gp1 may appear _Grunnable but will not 1046 // actually be able to run. Disable preemption for ourselves, to make 1047 // sure we finish profiling gp1 right away instead of leaving it stuck 1048 // in this limbo. 1049 mp := acquirem() 1050 if gp1.goroutineProfiled.CompareAndSwap(goroutineProfileAbsent, goroutineProfileInProgress) { 1051 doRecordGoroutineProfile(gp1) 1052 gp1.goroutineProfiled.Store(goroutineProfileSatisfied) 1053 } 1054 releasem(mp) 1055 } 1056 } 1057 1058 // doRecordGoroutineProfile writes gp1's call stack and labels to an in-progress 1059 // goroutine profile. Preemption is disabled. 1060 // 1061 // This may be called via tryRecordGoroutineProfile in two ways: by the 1062 // goroutine that is coordinating the goroutine profile (running on its own 1063 // stack), or from the scheduler in preparation to execute gp1 (running on the 1064 // system stack). 1065 func doRecordGoroutineProfile(gp1 *g) { 1066 if readgstatus(gp1) == _Grunning { 1067 print("doRecordGoroutineProfile gp1=", gp1.goid, "\n") 1068 throw("cannot read stack of running goroutine") 1069 } 1070 1071 offset := int(goroutineProfile.offset.Add(1)) - 1 1072 1073 if offset >= len(goroutineProfile.records) { 1074 // Should be impossible, but better to return a truncated profile than 1075 // to crash the entire process at this point. Instead, deal with it in 1076 // goroutineProfileWithLabelsConcurrent where we have more context. 1077 return 1078 } 1079 1080 // saveg calls gentraceback, which may call cgo traceback functions. When 1081 // called from the scheduler, this is on the system stack already so 1082 // traceback.go:cgoContextPCs will avoid calling back into the scheduler. 1083 // 1084 // When called from the goroutine coordinating the profile, we still have 1085 // set gp1.goroutineProfiled to goroutineProfileInProgress and so are still 1086 // preventing it from being truly _Grunnable. So we'll use the system stack 1087 // to avoid schedule delays. 1088 systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset]) }) 1089 1090 if goroutineProfile.labels != nil { 1091 goroutineProfile.labels[offset] = gp1.labels 1092 } 1093 } 1094 1095 func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { 1096 gp := getg() 1097 1098 isOK := func(gp1 *g) bool { 1099 // Checking isSystemGoroutine here makes GoroutineProfile 1100 // consistent with both NumGoroutine and Stack. 1101 return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false) 1102 } 1103 1104 stopTheWorld("profile") 1105 1106 // World is stopped, no locking required. 1107 n = 1 1108 forEachGRace(func(gp1 *g) { 1109 if isOK(gp1) { 1110 n++ 1111 } 1112 }) 1113 1114 if n <= len(p) { 1115 ok = true 1116 r, lbl := p, labels 1117 1118 // Save current goroutine. 1119 sp := getcallersp() 1120 pc := getcallerpc() 1121 systemstack(func() { 1122 saveg(pc, sp, gp, &r[0]) 1123 }) 1124 r = r[1:] 1125 1126 // If we have a place to put our goroutine labelmap, insert it there. 1127 if labels != nil { 1128 lbl[0] = gp.labels 1129 lbl = lbl[1:] 1130 } 1131 1132 // Save other goroutines. 1133 forEachGRace(func(gp1 *g) { 1134 if !isOK(gp1) { 1135 return 1136 } 1137 1138 if len(r) == 0 { 1139 // Should be impossible, but better to return a 1140 // truncated profile than to crash the entire process. 1141 return 1142 } 1143 // saveg calls gentraceback, which may call cgo traceback functions. 1144 // The world is stopped, so it cannot use cgocall (which will be 1145 // blocked at exitsyscall). Do it on the system stack so it won't 1146 // call into the schedular (see traceback.go:cgoContextPCs). 1147 systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0]) }) 1148 if labels != nil { 1149 lbl[0] = gp1.labels 1150 lbl = lbl[1:] 1151 } 1152 r = r[1:] 1153 }) 1154 } 1155 1156 if raceenabled { 1157 raceacquire(unsafe.Pointer(&labelSync)) 1158 } 1159 1160 startTheWorld() 1161 return n, ok 1162 } 1163 1164 // GoroutineProfile returns n, the number of records in the active goroutine stack profile. 1165 // If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true. 1166 // If len(p) < n, GoroutineProfile does not change p and returns n, false. 1167 // 1168 // Most clients should use the runtime/pprof package instead 1169 // of calling GoroutineProfile directly. 1170 func GoroutineProfile(p []StackRecord) (n int, ok bool) { 1171 1172 return goroutineProfileWithLabels(p, nil) 1173 } 1174 1175 func saveg(pc, sp uintptr, gp *g, r *StackRecord) { 1176 var u unwinder 1177 u.initAt(pc, sp, 0, gp, unwindSilentErrors) 1178 n := tracebackPCs(&u, 0, r.Stack0[:]) 1179 if n < len(r.Stack0) { 1180 r.Stack0[n] = 0 1181 } 1182 } 1183 1184 // Stack formats a stack trace of the calling goroutine into buf 1185 // and returns the number of bytes written to buf. 1186 // If all is true, Stack formats stack traces of all other goroutines 1187 // into buf after the trace for the current goroutine. 1188 func Stack(buf []byte, all bool) int { 1189 if all { 1190 stopTheWorld("stack trace") 1191 } 1192 1193 n := 0 1194 if len(buf) > 0 { 1195 gp := getg() 1196 sp := getcallersp() 1197 pc := getcallerpc() 1198 systemstack(func() { 1199 g0 := getg() 1200 // Force traceback=1 to override GOTRACEBACK setting, 1201 // so that Stack's results are consistent. 1202 // GOTRACEBACK is only about crash dumps. 1203 g0.m.traceback = 1 1204 g0.writebuf = buf[0:0:len(buf)] 1205 goroutineheader(gp) 1206 traceback(pc, sp, 0, gp) 1207 if all { 1208 tracebackothers(gp) 1209 } 1210 g0.m.traceback = 0 1211 n = len(g0.writebuf) 1212 g0.writebuf = nil 1213 }) 1214 } 1215 1216 if all { 1217 startTheWorld() 1218 } 1219 return n 1220 } 1221 1222 // Tracing of alloc/free/gc. 1223 1224 var tracelock mutex 1225 1226 func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) { 1227 lock(&tracelock) 1228 gp := getg() 1229 gp.m.traceback = 2 1230 if typ == nil { 1231 print("tracealloc(", p, ", ", hex(size), ")\n") 1232 } else { 1233 print("tracealloc(", p, ", ", hex(size), ", ", toRType(typ).string(), ")\n") 1234 } 1235 if gp.m.curg == nil || gp == gp.m.curg { 1236 goroutineheader(gp) 1237 pc := getcallerpc() 1238 sp := getcallersp() 1239 systemstack(func() { 1240 traceback(pc, sp, 0, gp) 1241 }) 1242 } else { 1243 goroutineheader(gp.m.curg) 1244 traceback(^uintptr(0), ^uintptr(0), 0, gp.m.curg) 1245 } 1246 print("\n") 1247 gp.m.traceback = 0 1248 unlock(&tracelock) 1249 } 1250 1251 func tracefree(p unsafe.Pointer, size uintptr) { 1252 lock(&tracelock) 1253 gp := getg() 1254 gp.m.traceback = 2 1255 print("tracefree(", p, ", ", hex(size), ")\n") 1256 goroutineheader(gp) 1257 pc := getcallerpc() 1258 sp := getcallersp() 1259 systemstack(func() { 1260 traceback(pc, sp, 0, gp) 1261 }) 1262 print("\n") 1263 gp.m.traceback = 0 1264 unlock(&tracelock) 1265 } 1266 1267 func tracegc() { 1268 lock(&tracelock) 1269 gp := getg() 1270 gp.m.traceback = 2 1271 print("tracegc()\n") 1272 // running on m->g0 stack; show all non-g0 goroutines 1273 tracebackothers(gp) 1274 print("end tracegc\n") 1275 print("\n") 1276 gp.m.traceback = 0 1277 unlock(&tracelock) 1278 }