github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/mprof.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Malloc profiling. 6 // Patterned after tcmalloc's algorithms; shorter code. 7 8 package runtime 9 10 import ( 11 "internal/abi" 12 "runtime/internal/atomic" 13 "runtime/internal/sys" 14 "unsafe" 15 ) 16 17 // NOTE(rsc): Everything here could use cas if contention became an issue. 18 var ( 19 // profInsertLock protects changes to the start of all *bucket linked lists 20 profInsertLock mutex 21 // profBlockLock protects the contents of every blockRecord struct 22 profBlockLock mutex 23 // profMemActiveLock protects the active field of every memRecord struct 24 profMemActiveLock mutex 25 // profMemFutureLock is a set of locks that protect the respective elements 26 // of the future array of every memRecord struct 27 profMemFutureLock [len(memRecord{}.future)]mutex 28 ) 29 30 // All memory allocations are local and do not escape outside of the profiler. 31 // The profiler is forbidden from referring to garbage-collected memory. 32 33 const ( 34 // profile types 35 memProfile bucketType = 1 + iota 36 blockProfile 37 mutexProfile 38 39 // size of bucket hash table 40 buckHashSize = 179999 41 42 // maxStack is the max depth of stack to record in bucket. 43 // Note that it's only used internally as a guard against 44 // wildly out-of-bounds slicing of the PCs that come after 45 // a bucket struct, and it could increase in the future. 46 maxStack = 32 47 ) 48 49 type bucketType int 50 51 // A bucket holds per-call-stack profiling information. 52 // The representation is a bit sleazy, inherited from C. 53 // This struct defines the bucket header. It is followed in 54 // memory by the stack words and then the actual record 55 // data, either a memRecord or a blockRecord. 56 // 57 // Per-call-stack profiling information. 58 // Lookup by hashing call stack into a linked-list hash table. 59 // 60 // None of the fields in this bucket header are modified after 61 // creation, including its next and allnext links. 62 // 63 // No heap pointers. 64 type bucket struct { 65 _ sys.NotInHeap 66 next *bucket 67 allnext *bucket 68 typ bucketType // memBucket or blockBucket (includes mutexProfile) 69 hash uintptr 70 size uintptr 71 nstk uintptr 72 } 73 74 // A memRecord is the bucket data for a bucket of type memProfile, 75 // part of the memory profile. 76 type memRecord struct { 77 // The following complex 3-stage scheme of stats accumulation 78 // is required to obtain a consistent picture of mallocs and frees 79 // for some point in time. 80 // The problem is that mallocs come in real time, while frees 81 // come only after a GC during concurrent sweeping. So if we would 82 // naively count them, we would get a skew toward mallocs. 83 // 84 // Hence, we delay information to get consistent snapshots as 85 // of mark termination. Allocations count toward the next mark 86 // termination's snapshot, while sweep frees count toward the 87 // previous mark termination's snapshot: 88 // 89 // MT MT MT MT 90 // .·| .·| .·| .·| 91 // .·˙ | .·˙ | .·˙ | .·˙ | 92 // .·˙ | .·˙ | .·˙ | .·˙ | 93 // .·˙ |.·˙ |.·˙ |.·˙ | 94 // 95 // alloc → ▲ ← free 96 // ┠┅┅┅┅┅┅┅┅┅┅┅P 97 // C+2 → C+1 → C 98 // 99 // alloc → ▲ ← free 100 // ┠┅┅┅┅┅┅┅┅┅┅┅P 101 // C+2 → C+1 → C 102 // 103 // Since we can't publish a consistent snapshot until all of 104 // the sweep frees are accounted for, we wait until the next 105 // mark termination ("MT" above) to publish the previous mark 106 // termination's snapshot ("P" above). To do this, allocation 107 // and free events are accounted to *future* heap profile 108 // cycles ("C+n" above) and we only publish a cycle once all 109 // of the events from that cycle must be done. Specifically: 110 // 111 // Mallocs are accounted to cycle C+2. 112 // Explicit frees are accounted to cycle C+2. 113 // GC frees (done during sweeping) are accounted to cycle C+1. 114 // 115 // After mark termination, we increment the global heap 116 // profile cycle counter and accumulate the stats from cycle C 117 // into the active profile. 118 119 // active is the currently published profile. A profiling 120 // cycle can be accumulated into active once its complete. 121 active memRecordCycle 122 123 // future records the profile events we're counting for cycles 124 // that have not yet been published. This is ring buffer 125 // indexed by the global heap profile cycle C and stores 126 // cycles C, C+1, and C+2. Unlike active, these counts are 127 // only for a single cycle; they are not cumulative across 128 // cycles. 129 // 130 // We store cycle C here because there's a window between when 131 // C becomes the active cycle and when we've flushed it to 132 // active. 133 future [3]memRecordCycle 134 } 135 136 // memRecordCycle 137 type memRecordCycle struct { 138 allocs, frees uintptr 139 alloc_bytes, free_bytes uintptr 140 } 141 142 // add accumulates b into a. It does not zero b. 143 func (a *memRecordCycle) add(b *memRecordCycle) { 144 a.allocs += b.allocs 145 a.frees += b.frees 146 a.alloc_bytes += b.alloc_bytes 147 a.free_bytes += b.free_bytes 148 } 149 150 // A blockRecord is the bucket data for a bucket of type blockProfile, 151 // which is used in blocking and mutex profiles. 152 type blockRecord struct { 153 count float64 154 cycles int64 155 } 156 157 var ( 158 mbuckets atomic.UnsafePointer // *bucket, memory profile buckets 159 bbuckets atomic.UnsafePointer // *bucket, blocking profile buckets 160 xbuckets atomic.UnsafePointer // *bucket, mutex profile buckets 161 buckhash atomic.UnsafePointer // *buckhashArray 162 163 mProfCycle mProfCycleHolder 164 ) 165 166 type buckhashArray [buckHashSize]atomic.UnsafePointer // *bucket 167 168 const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24) 169 170 // mProfCycleHolder holds the global heap profile cycle number (wrapped at 171 // mProfCycleWrap, stored starting at bit 1), and a flag (stored at bit 0) to 172 // indicate whether future[cycle] in all buckets has been queued to flush into 173 // the active profile. 174 type mProfCycleHolder struct { 175 value atomic.Uint32 176 } 177 178 // read returns the current cycle count. 179 func (c *mProfCycleHolder) read() (cycle uint32) { 180 v := c.value.Load() 181 cycle = v >> 1 182 return cycle 183 } 184 185 // setFlushed sets the flushed flag. It returns the current cycle count and the 186 // previous value of the flushed flag. 187 func (c *mProfCycleHolder) setFlushed() (cycle uint32, alreadyFlushed bool) { 188 for { 189 prev := c.value.Load() 190 cycle = prev >> 1 191 alreadyFlushed = (prev & 0x1) != 0 192 next := prev | 0x1 193 if c.value.CompareAndSwap(prev, next) { 194 return cycle, alreadyFlushed 195 } 196 } 197 } 198 199 // increment increases the cycle count by one, wrapping the value at 200 // mProfCycleWrap. It clears the flushed flag. 201 func (c *mProfCycleHolder) increment() { 202 // We explicitly wrap mProfCycle rather than depending on 203 // uint wraparound because the memRecord.future ring does not 204 // itself wrap at a power of two. 205 for { 206 prev := c.value.Load() 207 cycle := prev >> 1 208 cycle = (cycle + 1) % mProfCycleWrap 209 next := cycle << 1 210 if c.value.CompareAndSwap(prev, next) { 211 break 212 } 213 } 214 } 215 216 // newBucket allocates a bucket with the given type and number of stack entries. 217 func newBucket(typ bucketType, nstk int) *bucket { 218 size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr(0)) 219 switch typ { 220 default: 221 throw("invalid profile bucket type") 222 case memProfile: 223 size += unsafe.Sizeof(memRecord{}) 224 case blockProfile, mutexProfile: 225 size += unsafe.Sizeof(blockRecord{}) 226 } 227 228 b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys)) 229 b.typ = typ 230 b.nstk = uintptr(nstk) 231 return b 232 } 233 234 // stk returns the slice in b holding the stack. 235 func (b *bucket) stk() []uintptr { 236 stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) 237 if b.nstk > maxStack { 238 // prove that slicing works; otherwise a failure requires a P 239 throw("bad profile stack count") 240 } 241 return stk[:b.nstk:b.nstk] 242 } 243 244 // mp returns the memRecord associated with the memProfile bucket b. 245 func (b *bucket) mp() *memRecord { 246 if b.typ != memProfile { 247 throw("bad use of bucket.mp") 248 } 249 data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0))) 250 return (*memRecord)(data) 251 } 252 253 // bp returns the blockRecord associated with the blockProfile bucket b. 254 func (b *bucket) bp() *blockRecord { 255 if b.typ != blockProfile && b.typ != mutexProfile { 256 throw("bad use of bucket.bp") 257 } 258 data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0))) 259 return (*blockRecord)(data) 260 } 261 262 // Return the bucket for stk[0:nstk], allocating new bucket if needed. 263 func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket { 264 bh := (*buckhashArray)(buckhash.Load()) 265 if bh == nil { 266 lock(&profInsertLock) 267 // check again under the lock 268 bh = (*buckhashArray)(buckhash.Load()) 269 if bh == nil { 270 bh = (*buckhashArray)(sysAlloc(unsafe.Sizeof(buckhashArray{}), &memstats.buckhash_sys)) 271 if bh == nil { 272 throw("runtime: cannot allocate memory") 273 } 274 buckhash.StoreNoWB(unsafe.Pointer(bh)) 275 } 276 unlock(&profInsertLock) 277 } 278 279 // Hash stack. 280 var h uintptr 281 for _, pc := range stk { 282 h += pc 283 h += h << 10 284 h ^= h >> 6 285 } 286 // hash in size 287 h += size 288 h += h << 10 289 h ^= h >> 6 290 // finalize 291 h += h << 3 292 h ^= h >> 11 293 294 i := int(h % buckHashSize) 295 // first check optimistically, without the lock 296 for b := (*bucket)(bh[i].Load()); b != nil; b = b.next { 297 if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) { 298 return b 299 } 300 } 301 302 if !alloc { 303 return nil 304 } 305 306 lock(&profInsertLock) 307 // check again under the insertion lock 308 for b := (*bucket)(bh[i].Load()); b != nil; b = b.next { 309 if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) { 310 unlock(&profInsertLock) 311 return b 312 } 313 } 314 315 // Create new bucket. 316 b := newBucket(typ, len(stk)) 317 copy(b.stk(), stk) 318 b.hash = h 319 b.size = size 320 321 var allnext *atomic.UnsafePointer 322 if typ == memProfile { 323 allnext = &mbuckets 324 } else if typ == mutexProfile { 325 allnext = &xbuckets 326 } else { 327 allnext = &bbuckets 328 } 329 330 b.next = (*bucket)(bh[i].Load()) 331 b.allnext = (*bucket)(allnext.Load()) 332 333 bh[i].StoreNoWB(unsafe.Pointer(b)) 334 allnext.StoreNoWB(unsafe.Pointer(b)) 335 336 unlock(&profInsertLock) 337 return b 338 } 339 340 func eqslice(x, y []uintptr) bool { 341 if len(x) != len(y) { 342 return false 343 } 344 for i, xi := range x { 345 if xi != y[i] { 346 return false 347 } 348 } 349 return true 350 } 351 352 // mProf_NextCycle publishes the next heap profile cycle and creates a 353 // fresh heap profile cycle. This operation is fast and can be done 354 // during STW. The caller must call mProf_Flush before calling 355 // mProf_NextCycle again. 356 // 357 // This is called by mark termination during STW so allocations and 358 // frees after the world is started again count towards a new heap 359 // profiling cycle. 360 func mProf_NextCycle() { 361 mProfCycle.increment() 362 } 363 364 // mProf_Flush flushes the events from the current heap profiling 365 // cycle into the active profile. After this it is safe to start a new 366 // heap profiling cycle with mProf_NextCycle. 367 // 368 // This is called by GC after mark termination starts the world. In 369 // contrast with mProf_NextCycle, this is somewhat expensive, but safe 370 // to do concurrently. 371 func mProf_Flush() { 372 cycle, alreadyFlushed := mProfCycle.setFlushed() 373 if alreadyFlushed { 374 return 375 } 376 377 index := cycle % uint32(len(memRecord{}.future)) 378 lock(&profMemActiveLock) 379 lock(&profMemFutureLock[index]) 380 mProf_FlushLocked(index) 381 unlock(&profMemFutureLock[index]) 382 unlock(&profMemActiveLock) 383 } 384 385 // mProf_FlushLocked flushes the events from the heap profiling cycle at index 386 // into the active profile. The caller must hold the lock for the active profile 387 // (profMemActiveLock) and for the profiling cycle at index 388 // (profMemFutureLock[index]). 389 func mProf_FlushLocked(index uint32) { 390 assertLockHeld(&profMemActiveLock) 391 assertLockHeld(&profMemFutureLock[index]) 392 head := (*bucket)(mbuckets.Load()) 393 for b := head; b != nil; b = b.allnext { 394 mp := b.mp() 395 396 // Flush cycle C into the published profile and clear 397 // it for reuse. 398 mpc := &mp.future[index] 399 mp.active.add(mpc) 400 *mpc = memRecordCycle{} 401 } 402 } 403 404 // mProf_PostSweep records that all sweep frees for this GC cycle have 405 // completed. This has the effect of publishing the heap profile 406 // snapshot as of the last mark termination without advancing the heap 407 // profile cycle. 408 func mProf_PostSweep() { 409 // Flush cycle C+1 to the active profile so everything as of 410 // the last mark termination becomes visible. *Don't* advance 411 // the cycle, since we're still accumulating allocs in cycle 412 // C+2, which have to become C+1 in the next mark termination 413 // and so on. 414 cycle := mProfCycle.read() + 1 415 416 index := cycle % uint32(len(memRecord{}.future)) 417 lock(&profMemActiveLock) 418 lock(&profMemFutureLock[index]) 419 mProf_FlushLocked(index) 420 unlock(&profMemFutureLock[index]) 421 unlock(&profMemActiveLock) 422 } 423 424 // Called by malloc to record a profiled block. 425 func mProf_Malloc(p unsafe.Pointer, size uintptr) { 426 var stk [maxStack]uintptr 427 nstk := callers(4, stk[:]) 428 429 index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) 430 431 b := stkbucket(memProfile, size, stk[:nstk], true) 432 mp := b.mp() 433 mpc := &mp.future[index] 434 435 lock(&profMemFutureLock[index]) 436 mpc.allocs++ 437 mpc.alloc_bytes += size 438 unlock(&profMemFutureLock[index]) 439 440 // Setprofilebucket locks a bunch of other mutexes, so we call it outside of 441 // the profiler locks. This reduces potential contention and chances of 442 // deadlocks. Since the object must be alive during the call to 443 // mProf_Malloc, it's fine to do this non-atomically. 444 systemstack(func() { 445 setprofilebucket(p, b) 446 }) 447 } 448 449 // Called when freeing a profiled block. 450 func mProf_Free(b *bucket, size uintptr) { 451 index := (mProfCycle.read() + 1) % uint32(len(memRecord{}.future)) 452 453 mp := b.mp() 454 mpc := &mp.future[index] 455 456 lock(&profMemFutureLock[index]) 457 mpc.frees++ 458 mpc.free_bytes += size 459 unlock(&profMemFutureLock[index]) 460 } 461 462 var blockprofilerate uint64 // in CPU ticks 463 464 // SetBlockProfileRate controls the fraction of goroutine blocking events 465 // that are reported in the blocking profile. The profiler aims to sample 466 // an average of one blocking event per rate nanoseconds spent blocked. 467 // 468 // To include every blocking event in the profile, pass rate = 1. 469 // To turn off profiling entirely, pass rate <= 0. 470 func SetBlockProfileRate(rate int) { 471 var r int64 472 if rate <= 0 { 473 r = 0 // disable profiling 474 } else if rate == 1 { 475 r = 1 // profile everything 476 } else { 477 // convert ns to cycles, use float64 to prevent overflow during multiplication 478 r = int64(float64(rate) * float64(ticksPerSecond()) / (1000 * 1000 * 1000)) 479 if r == 0 { 480 r = 1 481 } 482 } 483 484 atomic.Store64(&blockprofilerate, uint64(r)) 485 } 486 487 func blockevent(cycles int64, skip int) { 488 if cycles <= 0 { 489 cycles = 1 490 } 491 492 rate := int64(atomic.Load64(&blockprofilerate)) 493 if blocksampled(cycles, rate) { 494 saveblockevent(cycles, rate, skip+1, blockProfile) 495 } 496 } 497 498 // blocksampled returns true for all events where cycles >= rate. Shorter 499 // events have a cycles/rate random chance of returning true. 500 func blocksampled(cycles, rate int64) bool { 501 if rate <= 0 || (rate > cycles && cheaprand64()%rate > cycles) { 502 return false 503 } 504 return true 505 } 506 507 func saveblockevent(cycles, rate int64, skip int, which bucketType) { 508 gp := getg() 509 var nstk int 510 var stk [maxStack]uintptr 511 if gp.m.curg == nil || gp.m.curg == gp { 512 nstk = callers(skip, stk[:]) 513 } else { 514 nstk = gcallers(gp.m.curg, skip, stk[:]) 515 } 516 517 saveBlockEventStack(cycles, rate, stk[:nstk], which) 518 } 519 520 // lockTimer assists with profiling contention on runtime-internal locks. 521 // 522 // There are several steps between the time that an M experiences contention and 523 // when that contention may be added to the profile. This comes from our 524 // constraints: We need to keep the critical section of each lock small, 525 // especially when those locks are contended. The reporting code cannot acquire 526 // new locks until the M has released all other locks, which means no memory 527 // allocations and encourages use of (temporary) M-local storage. 528 // 529 // The M will have space for storing one call stack that caused contention, and 530 // for the magnitude of that contention. It will also have space to store the 531 // magnitude of additional contention the M caused, since it only has space to 532 // remember one call stack and might encounter several contention events before 533 // it releases all of its locks and is thus able to transfer the local buffer 534 // into the profile. 535 // 536 // The M will collect the call stack when it unlocks the contended lock. That 537 // minimizes the impact on the critical section of the contended lock, and 538 // matches the mutex profile's behavior for contention in sync.Mutex: measured 539 // at the Unlock method. 540 // 541 // The profile for contention on sync.Mutex blames the caller of Unlock for the 542 // amount of contention experienced by the callers of Lock which had to wait. 543 // When there are several critical sections, this allows identifying which of 544 // them is responsible. 545 // 546 // Matching that behavior for runtime-internal locks will require identifying 547 // which Ms are blocked on the mutex. The semaphore-based implementation is 548 // ready to allow that, but the futex-based implementation will require a bit 549 // more work. Until then, we report contention on runtime-internal locks with a 550 // call stack taken from the unlock call (like the rest of the user-space 551 // "mutex" profile), but assign it a duration value based on how long the 552 // previous lock call took (like the user-space "block" profile). 553 // 554 // Thus, reporting the call stacks of runtime-internal lock contention is 555 // guarded by GODEBUG for now. Set GODEBUG=runtimecontentionstacks=1 to enable. 556 // 557 // TODO(rhysh): plumb through the delay duration, remove GODEBUG, update comment 558 // 559 // The M will track this by storing a pointer to the lock; lock/unlock pairs for 560 // runtime-internal locks are always on the same M. 561 // 562 // Together, that demands several steps for recording contention. First, when 563 // finally acquiring a contended lock, the M decides whether it should plan to 564 // profile that event by storing a pointer to the lock in its "to be profiled 565 // upon unlock" field. If that field is already set, it uses the relative 566 // magnitudes to weight a random choice between itself and the other lock, with 567 // the loser's time being added to the "additional contention" field. Otherwise 568 // if the M's call stack buffer is occupied, it does the comparison against that 569 // sample's magnitude. 570 // 571 // Second, having unlocked a mutex the M checks to see if it should capture the 572 // call stack into its local buffer. Finally, when the M unlocks its last mutex, 573 // it transfers the local buffer into the profile. As part of that step, it also 574 // transfers any "additional contention" time to the profile. Any lock 575 // contention that it experiences while adding samples to the profile will be 576 // recorded later as "additional contention" and not include a call stack, to 577 // avoid an echo. 578 type lockTimer struct { 579 lock *mutex 580 timeRate int64 581 timeStart int64 582 tickStart int64 583 } 584 585 func (lt *lockTimer) begin() { 586 rate := int64(atomic.Load64(&mutexprofilerate)) 587 588 lt.timeRate = gTrackingPeriod 589 if rate != 0 && rate < lt.timeRate { 590 lt.timeRate = rate 591 } 592 if int64(cheaprand())%lt.timeRate == 0 { 593 lt.timeStart = nanotime() 594 } 595 596 if rate > 0 && int64(cheaprand())%rate == 0 { 597 lt.tickStart = cputicks() 598 } 599 } 600 601 func (lt *lockTimer) end() { 602 gp := getg() 603 604 if lt.timeStart != 0 { 605 nowTime := nanotime() 606 gp.m.mLockProfile.waitTime.Add((nowTime - lt.timeStart) * lt.timeRate) 607 } 608 609 if lt.tickStart != 0 { 610 nowTick := cputicks() 611 gp.m.mLockProfile.recordLock(nowTick-lt.tickStart, lt.lock) 612 } 613 } 614 615 type mLockProfile struct { 616 waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank 617 stack [maxStack]uintptr // stack that experienced contention in runtime.lockWithRank 618 pending uintptr // *mutex that experienced contention (to be traceback-ed) 619 cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack" 620 cyclesLost int64 // contention for which we weren't able to record a call stack 621 disabled bool // attribute all time to "lost" 622 } 623 624 func (prof *mLockProfile) recordLock(cycles int64, l *mutex) { 625 if cycles <= 0 { 626 return 627 } 628 629 if prof.disabled { 630 // We're experiencing contention while attempting to report contention. 631 // Make a note of its magnitude, but don't allow it to be the sole cause 632 // of another contention report. 633 prof.cyclesLost += cycles 634 return 635 } 636 637 if uintptr(unsafe.Pointer(l)) == prof.pending { 638 // Optimization: we'd already planned to profile this same lock (though 639 // possibly from a different unlock site). 640 prof.cycles += cycles 641 return 642 } 643 644 if prev := prof.cycles; prev > 0 { 645 // We can only store one call stack for runtime-internal lock contention 646 // on this M, and we've already got one. Decide which should stay, and 647 // add the other to the report for runtime._LostContendedRuntimeLock. 648 prevScore := uint64(cheaprand64()) % uint64(prev) 649 thisScore := uint64(cheaprand64()) % uint64(cycles) 650 if prevScore > thisScore { 651 prof.cyclesLost += cycles 652 return 653 } else { 654 prof.cyclesLost += prev 655 } 656 } 657 // Saving the *mutex as a uintptr is safe because: 658 // - lockrank_on.go does this too, which gives it regular exercise 659 // - the lock would only move if it's stack allocated, which means it 660 // cannot experience multi-M contention 661 prof.pending = uintptr(unsafe.Pointer(l)) 662 prof.cycles = cycles 663 } 664 665 // From unlock2, we might not be holding a p in this code. 666 // 667 //go:nowritebarrierrec 668 func (prof *mLockProfile) recordUnlock(l *mutex) { 669 if uintptr(unsafe.Pointer(l)) == prof.pending { 670 prof.captureStack() 671 } 672 if gp := getg(); gp.m.locks == 1 && gp.m.mLockProfile.cycles != 0 { 673 prof.store() 674 } 675 } 676 677 func (prof *mLockProfile) captureStack() { 678 skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank 679 if staticLockRanking { 680 // When static lock ranking is enabled, we'll always be on the system 681 // stack at this point. There will be a runtime.unlockWithRank.func1 682 // frame, and if the call to runtime.unlock took place on a user stack 683 // then there'll also be a runtime.systemstack frame. To keep stack 684 // traces somewhat consistent whether or not static lock ranking is 685 // enabled, we'd like to skip those. But it's hard to tell how long 686 // we've been on the system stack so accept an extra frame in that case, 687 // with a leaf of "runtime.unlockWithRank runtime.unlock" instead of 688 // "runtime.unlock". 689 skip += 1 // runtime.unlockWithRank.func1 690 } 691 prof.pending = 0 692 693 if debug.runtimeContentionStacks.Load() == 0 { 694 prof.stack[0] = abi.FuncPCABIInternal(_LostContendedRuntimeLock) + sys.PCQuantum 695 prof.stack[1] = 0 696 return 697 } 698 699 var nstk int 700 gp := getg() 701 sp := getcallersp() 702 pc := getcallerpc() 703 systemstack(func() { 704 var u unwinder 705 u.initAt(pc, sp, 0, gp, unwindSilentErrors|unwindJumpStack) 706 nstk = tracebackPCs(&u, skip, prof.stack[:]) 707 }) 708 if nstk < len(prof.stack) { 709 prof.stack[nstk] = 0 710 } 711 } 712 713 func (prof *mLockProfile) store() { 714 // Report any contention we experience within this function as "lost"; it's 715 // important that the act of reporting a contention event not lead to a 716 // reportable contention event. This also means we can use prof.stack 717 // without copying, since it won't change during this function. 718 mp := acquirem() 719 prof.disabled = true 720 721 nstk := maxStack 722 for i := 0; i < nstk; i++ { 723 if pc := prof.stack[i]; pc == 0 { 724 nstk = i 725 break 726 } 727 } 728 729 cycles, lost := prof.cycles, prof.cyclesLost 730 prof.cycles, prof.cyclesLost = 0, 0 731 732 rate := int64(atomic.Load64(&mutexprofilerate)) 733 saveBlockEventStack(cycles, rate, prof.stack[:nstk], mutexProfile) 734 if lost > 0 { 735 lostStk := [...]uintptr{ 736 abi.FuncPCABIInternal(_LostContendedRuntimeLock) + sys.PCQuantum, 737 } 738 saveBlockEventStack(lost, rate, lostStk[:], mutexProfile) 739 } 740 741 prof.disabled = false 742 releasem(mp) 743 } 744 745 func saveBlockEventStack(cycles, rate int64, stk []uintptr, which bucketType) { 746 b := stkbucket(which, 0, stk, true) 747 bp := b.bp() 748 749 lock(&profBlockLock) 750 // We want to up-scale the count and cycles according to the 751 // probability that the event was sampled. For block profile events, 752 // the sample probability is 1 if cycles >= rate, and cycles / rate 753 // otherwise. For mutex profile events, the sample probability is 1 / rate. 754 // We scale the events by 1 / (probability the event was sampled). 755 if which == blockProfile && cycles < rate { 756 // Remove sampling bias, see discussion on http://golang.org/cl/299991. 757 bp.count += float64(rate) / float64(cycles) 758 bp.cycles += rate 759 } else if which == mutexProfile { 760 bp.count += float64(rate) 761 bp.cycles += rate * cycles 762 } else { 763 bp.count++ 764 bp.cycles += cycles 765 } 766 unlock(&profBlockLock) 767 } 768 769 var mutexprofilerate uint64 // fraction sampled 770 771 // SetMutexProfileFraction controls the fraction of mutex contention events 772 // that are reported in the mutex profile. On average 1/rate events are 773 // reported. The previous rate is returned. 774 // 775 // To turn off profiling entirely, pass rate 0. 776 // To just read the current rate, pass rate < 0. 777 // (For n>1 the details of sampling may change.) 778 func SetMutexProfileFraction(rate int) int { 779 if rate < 0 { 780 return int(mutexprofilerate) 781 } 782 old := mutexprofilerate 783 atomic.Store64(&mutexprofilerate, uint64(rate)) 784 return int(old) 785 } 786 787 //go:linkname mutexevent sync.event 788 func mutexevent(cycles int64, skip int) { 789 if cycles < 0 { 790 cycles = 0 791 } 792 rate := int64(atomic.Load64(&mutexprofilerate)) 793 if rate > 0 && cheaprand64()%rate == 0 { 794 saveblockevent(cycles, rate, skip+1, mutexProfile) 795 } 796 } 797 798 // Go interface to profile data. 799 800 // A StackRecord describes a single execution stack. 801 type StackRecord struct { 802 Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry 803 } 804 805 // Stack returns the stack trace associated with the record, 806 // a prefix of r.Stack0. 807 func (r *StackRecord) Stack() []uintptr { 808 for i, v := range r.Stack0 { 809 if v == 0 { 810 return r.Stack0[0:i] 811 } 812 } 813 return r.Stack0[0:] 814 } 815 816 // MemProfileRate controls the fraction of memory allocations 817 // that are recorded and reported in the memory profile. 818 // The profiler aims to sample an average of 819 // one allocation per MemProfileRate bytes allocated. 820 // 821 // To include every allocated block in the profile, set MemProfileRate to 1. 822 // To turn off profiling entirely, set MemProfileRate to 0. 823 // 824 // The tools that process the memory profiles assume that the 825 // profile rate is constant across the lifetime of the program 826 // and equal to the current value. Programs that change the 827 // memory profiling rate should do so just once, as early as 828 // possible in the execution of the program (for example, 829 // at the beginning of main). 830 var MemProfileRate int = 512 * 1024 831 832 // disableMemoryProfiling is set by the linker if runtime.MemProfile 833 // is not used and the link type guarantees nobody else could use it 834 // elsewhere. 835 var disableMemoryProfiling bool 836 837 // A MemProfileRecord describes the live objects allocated 838 // by a particular call sequence (stack trace). 839 type MemProfileRecord struct { 840 AllocBytes, FreeBytes int64 // number of bytes allocated, freed 841 AllocObjects, FreeObjects int64 // number of objects allocated, freed 842 Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry 843 } 844 845 // InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes). 846 func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes } 847 848 // InUseObjects returns the number of objects in use (AllocObjects - FreeObjects). 849 func (r *MemProfileRecord) InUseObjects() int64 { 850 return r.AllocObjects - r.FreeObjects 851 } 852 853 // Stack returns the stack trace associated with the record, 854 // a prefix of r.Stack0. 855 func (r *MemProfileRecord) Stack() []uintptr { 856 for i, v := range r.Stack0 { 857 if v == 0 { 858 return r.Stack0[0:i] 859 } 860 } 861 return r.Stack0[0:] 862 } 863 864 // MemProfile returns a profile of memory allocated and freed per allocation 865 // site. 866 // 867 // MemProfile returns n, the number of records in the current memory profile. 868 // If len(p) >= n, MemProfile copies the profile into p and returns n, true. 869 // If len(p) < n, MemProfile does not change p and returns n, false. 870 // 871 // If inuseZero is true, the profile includes allocation records 872 // where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes. 873 // These are sites where memory was allocated, but it has all 874 // been released back to the runtime. 875 // 876 // The returned profile may be up to two garbage collection cycles old. 877 // This is to avoid skewing the profile toward allocations; because 878 // allocations happen in real time but frees are delayed until the garbage 879 // collector performs sweeping, the profile only accounts for allocations 880 // that have had a chance to be freed by the garbage collector. 881 // 882 // Most clients should use the runtime/pprof package or 883 // the testing package's -test.memprofile flag instead 884 // of calling MemProfile directly. 885 func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { 886 cycle := mProfCycle.read() 887 // If we're between mProf_NextCycle and mProf_Flush, take care 888 // of flushing to the active profile so we only have to look 889 // at the active profile below. 890 index := cycle % uint32(len(memRecord{}.future)) 891 lock(&profMemActiveLock) 892 lock(&profMemFutureLock[index]) 893 mProf_FlushLocked(index) 894 unlock(&profMemFutureLock[index]) 895 clear := true 896 head := (*bucket)(mbuckets.Load()) 897 for b := head; b != nil; b = b.allnext { 898 mp := b.mp() 899 if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { 900 n++ 901 } 902 if mp.active.allocs != 0 || mp.active.frees != 0 { 903 clear = false 904 } 905 } 906 if clear { 907 // Absolutely no data, suggesting that a garbage collection 908 // has not yet happened. In order to allow profiling when 909 // garbage collection is disabled from the beginning of execution, 910 // accumulate all of the cycles, and recount buckets. 911 n = 0 912 for b := head; b != nil; b = b.allnext { 913 mp := b.mp() 914 for c := range mp.future { 915 lock(&profMemFutureLock[c]) 916 mp.active.add(&mp.future[c]) 917 mp.future[c] = memRecordCycle{} 918 unlock(&profMemFutureLock[c]) 919 } 920 if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { 921 n++ 922 } 923 } 924 } 925 if n <= len(p) { 926 ok = true 927 idx := 0 928 for b := head; b != nil; b = b.allnext { 929 mp := b.mp() 930 if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { 931 record(&p[idx], b) 932 idx++ 933 } 934 } 935 } 936 unlock(&profMemActiveLock) 937 return 938 } 939 940 // Write b's data to r. 941 func record(r *MemProfileRecord, b *bucket) { 942 mp := b.mp() 943 r.AllocBytes = int64(mp.active.alloc_bytes) 944 r.FreeBytes = int64(mp.active.free_bytes) 945 r.AllocObjects = int64(mp.active.allocs) 946 r.FreeObjects = int64(mp.active.frees) 947 if raceenabled { 948 racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile)) 949 } 950 if msanenabled { 951 msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) 952 } 953 if asanenabled { 954 asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) 955 } 956 copy(r.Stack0[:], b.stk()) 957 clear(r.Stack0[b.nstk:]) 958 } 959 960 func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) { 961 lock(&profMemActiveLock) 962 head := (*bucket)(mbuckets.Load()) 963 for b := head; b != nil; b = b.allnext { 964 mp := b.mp() 965 fn(b, b.nstk, &b.stk()[0], b.size, mp.active.allocs, mp.active.frees) 966 } 967 unlock(&profMemActiveLock) 968 } 969 970 // BlockProfileRecord describes blocking events originated 971 // at a particular call sequence (stack trace). 972 type BlockProfileRecord struct { 973 Count int64 974 Cycles int64 975 StackRecord 976 } 977 978 // BlockProfile returns n, the number of records in the current blocking profile. 979 // If len(p) >= n, BlockProfile copies the profile into p and returns n, true. 980 // If len(p) < n, BlockProfile does not change p and returns n, false. 981 // 982 // Most clients should use the [runtime/pprof] package or 983 // the [testing] package's -test.blockprofile flag instead 984 // of calling BlockProfile directly. 985 func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { 986 lock(&profBlockLock) 987 head := (*bucket)(bbuckets.Load()) 988 for b := head; b != nil; b = b.allnext { 989 n++ 990 } 991 if n <= len(p) { 992 ok = true 993 for b := head; b != nil; b = b.allnext { 994 bp := b.bp() 995 r := &p[0] 996 r.Count = int64(bp.count) 997 // Prevent callers from having to worry about division by zero errors. 998 // See discussion on http://golang.org/cl/299991. 999 if r.Count == 0 { 1000 r.Count = 1 1001 } 1002 r.Cycles = bp.cycles 1003 if raceenabled { 1004 racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile)) 1005 } 1006 if msanenabled { 1007 msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) 1008 } 1009 if asanenabled { 1010 asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) 1011 } 1012 i := copy(r.Stack0[:], b.stk()) 1013 clear(r.Stack0[i:]) 1014 p = p[1:] 1015 } 1016 } 1017 unlock(&profBlockLock) 1018 return 1019 } 1020 1021 // MutexProfile returns n, the number of records in the current mutex profile. 1022 // If len(p) >= n, MutexProfile copies the profile into p and returns n, true. 1023 // Otherwise, MutexProfile does not change p, and returns n, false. 1024 // 1025 // Most clients should use the [runtime/pprof] package 1026 // instead of calling MutexProfile directly. 1027 func MutexProfile(p []BlockProfileRecord) (n int, ok bool) { 1028 lock(&profBlockLock) 1029 head := (*bucket)(xbuckets.Load()) 1030 for b := head; b != nil; b = b.allnext { 1031 n++ 1032 } 1033 if n <= len(p) { 1034 ok = true 1035 for b := head; b != nil; b = b.allnext { 1036 bp := b.bp() 1037 r := &p[0] 1038 r.Count = int64(bp.count) 1039 r.Cycles = bp.cycles 1040 i := copy(r.Stack0[:], b.stk()) 1041 clear(r.Stack0[i:]) 1042 p = p[1:] 1043 } 1044 } 1045 unlock(&profBlockLock) 1046 return 1047 } 1048 1049 // ThreadCreateProfile returns n, the number of records in the thread creation profile. 1050 // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true. 1051 // If len(p) < n, ThreadCreateProfile does not change p and returns n, false. 1052 // 1053 // Most clients should use the runtime/pprof package instead 1054 // of calling ThreadCreateProfile directly. 1055 func ThreadCreateProfile(p []StackRecord) (n int, ok bool) { 1056 first := (*m)(atomic.Loadp(unsafe.Pointer(&allm))) 1057 for mp := first; mp != nil; mp = mp.alllink { 1058 n++ 1059 } 1060 if n <= len(p) { 1061 ok = true 1062 i := 0 1063 for mp := first; mp != nil; mp = mp.alllink { 1064 p[i].Stack0 = mp.createstack 1065 i++ 1066 } 1067 } 1068 return 1069 } 1070 1071 //go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels 1072 func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { 1073 return goroutineProfileWithLabels(p, labels) 1074 } 1075 1076 // labels may be nil. If labels is non-nil, it must have the same length as p. 1077 func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { 1078 if labels != nil && len(labels) != len(p) { 1079 labels = nil 1080 } 1081 1082 return goroutineProfileWithLabelsConcurrent(p, labels) 1083 } 1084 1085 var goroutineProfile = struct { 1086 sema uint32 1087 active bool 1088 offset atomic.Int64 1089 records []StackRecord 1090 labels []unsafe.Pointer 1091 }{ 1092 sema: 1, 1093 } 1094 1095 // goroutineProfileState indicates the status of a goroutine's stack for the 1096 // current in-progress goroutine profile. Goroutines' stacks are initially 1097 // "Absent" from the profile, and end up "Satisfied" by the time the profile is 1098 // complete. While a goroutine's stack is being captured, its 1099 // goroutineProfileState will be "InProgress" and it will not be able to run 1100 // until the capture completes and the state moves to "Satisfied". 1101 // 1102 // Some goroutines (the finalizer goroutine, which at various times can be 1103 // either a "system" or a "user" goroutine, and the goroutine that is 1104 // coordinating the profile, any goroutines created during the profile) move 1105 // directly to the "Satisfied" state. 1106 type goroutineProfileState uint32 1107 1108 const ( 1109 goroutineProfileAbsent goroutineProfileState = iota 1110 goroutineProfileInProgress 1111 goroutineProfileSatisfied 1112 ) 1113 1114 type goroutineProfileStateHolder atomic.Uint32 1115 1116 func (p *goroutineProfileStateHolder) Load() goroutineProfileState { 1117 return goroutineProfileState((*atomic.Uint32)(p).Load()) 1118 } 1119 1120 func (p *goroutineProfileStateHolder) Store(value goroutineProfileState) { 1121 (*atomic.Uint32)(p).Store(uint32(value)) 1122 } 1123 1124 func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileState) bool { 1125 return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new)) 1126 } 1127 1128 func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { 1129 if len(p) == 0 { 1130 // An empty slice is obviously too small. Return a rough 1131 // allocation estimate without bothering to STW. As long as 1132 // this is close, then we'll only need to STW once (on the next 1133 // call). 1134 return int(gcount()), false 1135 } 1136 1137 semacquire(&goroutineProfile.sema) 1138 1139 ourg := getg() 1140 1141 stw := stopTheWorld(stwGoroutineProfile) 1142 // Using gcount while the world is stopped should give us a consistent view 1143 // of the number of live goroutines, minus the number of goroutines that are 1144 // alive and permanently marked as "system". But to make this count agree 1145 // with what we'd get from isSystemGoroutine, we need special handling for 1146 // goroutines that can vary between user and system to ensure that the count 1147 // doesn't change during the collection. So, check the finalizer goroutine 1148 // in particular. 1149 n = int(gcount()) 1150 if fingStatus.Load()&fingRunningFinalizer != 0 { 1151 n++ 1152 } 1153 1154 if n > len(p) { 1155 // There's not enough space in p to store the whole profile, so (per the 1156 // contract of runtime.GoroutineProfile) we're not allowed to write to p 1157 // at all and must return n, false. 1158 startTheWorld(stw) 1159 semrelease(&goroutineProfile.sema) 1160 return n, false 1161 } 1162 1163 // Save current goroutine. 1164 sp := getcallersp() 1165 pc := getcallerpc() 1166 systemstack(func() { 1167 saveg(pc, sp, ourg, &p[0]) 1168 }) 1169 if labels != nil { 1170 labels[0] = ourg.labels 1171 } 1172 ourg.goroutineProfiled.Store(goroutineProfileSatisfied) 1173 goroutineProfile.offset.Store(1) 1174 1175 // Prepare for all other goroutines to enter the profile. Aside from ourg, 1176 // every goroutine struct in the allgs list has its goroutineProfiled field 1177 // cleared. Any goroutine created from this point on (while 1178 // goroutineProfile.active is set) will start with its goroutineProfiled 1179 // field set to goroutineProfileSatisfied. 1180 goroutineProfile.active = true 1181 goroutineProfile.records = p 1182 goroutineProfile.labels = labels 1183 // The finalizer goroutine needs special handling because it can vary over 1184 // time between being a user goroutine (eligible for this profile) and a 1185 // system goroutine (to be excluded). Pick one before restarting the world. 1186 if fing != nil { 1187 fing.goroutineProfiled.Store(goroutineProfileSatisfied) 1188 if readgstatus(fing) != _Gdead && !isSystemGoroutine(fing, false) { 1189 doRecordGoroutineProfile(fing) 1190 } 1191 } 1192 startTheWorld(stw) 1193 1194 // Visit each goroutine that existed as of the startTheWorld call above. 1195 // 1196 // New goroutines may not be in this list, but we didn't want to know about 1197 // them anyway. If they do appear in this list (via reusing a dead goroutine 1198 // struct, or racing to launch between the world restarting and us getting 1199 // the list), they will already have their goroutineProfiled field set to 1200 // goroutineProfileSatisfied before their state transitions out of _Gdead. 1201 // 1202 // Any goroutine that the scheduler tries to execute concurrently with this 1203 // call will start by adding itself to the profile (before the act of 1204 // executing can cause any changes in its stack). 1205 forEachGRace(func(gp1 *g) { 1206 tryRecordGoroutineProfile(gp1, Gosched) 1207 }) 1208 1209 stw = stopTheWorld(stwGoroutineProfileCleanup) 1210 endOffset := goroutineProfile.offset.Swap(0) 1211 goroutineProfile.active = false 1212 goroutineProfile.records = nil 1213 goroutineProfile.labels = nil 1214 startTheWorld(stw) 1215 1216 // Restore the invariant that every goroutine struct in allgs has its 1217 // goroutineProfiled field cleared. 1218 forEachGRace(func(gp1 *g) { 1219 gp1.goroutineProfiled.Store(goroutineProfileAbsent) 1220 }) 1221 1222 if raceenabled { 1223 raceacquire(unsafe.Pointer(&labelSync)) 1224 } 1225 1226 if n != int(endOffset) { 1227 // It's a big surprise that the number of goroutines changed while we 1228 // were collecting the profile. But probably better to return a 1229 // truncated profile than to crash the whole process. 1230 // 1231 // For instance, needm moves a goroutine out of the _Gdead state and so 1232 // might be able to change the goroutine count without interacting with 1233 // the scheduler. For code like that, the race windows are small and the 1234 // combination of features is uncommon, so it's hard to be (and remain) 1235 // sure we've caught them all. 1236 } 1237 1238 semrelease(&goroutineProfile.sema) 1239 return n, true 1240 } 1241 1242 // tryRecordGoroutineProfileWB asserts that write barriers are allowed and calls 1243 // tryRecordGoroutineProfile. 1244 // 1245 //go:yeswritebarrierrec 1246 func tryRecordGoroutineProfileWB(gp1 *g) { 1247 if getg().m.p.ptr() == nil { 1248 throw("no P available, write barriers are forbidden") 1249 } 1250 tryRecordGoroutineProfile(gp1, osyield) 1251 } 1252 1253 // tryRecordGoroutineProfile ensures that gp1 has the appropriate representation 1254 // in the current goroutine profile: either that it should not be profiled, or 1255 // that a snapshot of its call stack and labels are now in the profile. 1256 func tryRecordGoroutineProfile(gp1 *g, yield func()) { 1257 if readgstatus(gp1) == _Gdead { 1258 // Dead goroutines should not appear in the profile. Goroutines that 1259 // start while profile collection is active will get goroutineProfiled 1260 // set to goroutineProfileSatisfied before transitioning out of _Gdead, 1261 // so here we check _Gdead first. 1262 return 1263 } 1264 if isSystemGoroutine(gp1, true) { 1265 // System goroutines should not appear in the profile. (The finalizer 1266 // goroutine is marked as "already profiled".) 1267 return 1268 } 1269 1270 for { 1271 prev := gp1.goroutineProfiled.Load() 1272 if prev == goroutineProfileSatisfied { 1273 // This goroutine is already in the profile (or is new since the 1274 // start of collection, so shouldn't appear in the profile). 1275 break 1276 } 1277 if prev == goroutineProfileInProgress { 1278 // Something else is adding gp1 to the goroutine profile right now. 1279 // Give that a moment to finish. 1280 yield() 1281 continue 1282 } 1283 1284 // While we have gp1.goroutineProfiled set to 1285 // goroutineProfileInProgress, gp1 may appear _Grunnable but will not 1286 // actually be able to run. Disable preemption for ourselves, to make 1287 // sure we finish profiling gp1 right away instead of leaving it stuck 1288 // in this limbo. 1289 mp := acquirem() 1290 if gp1.goroutineProfiled.CompareAndSwap(goroutineProfileAbsent, goroutineProfileInProgress) { 1291 doRecordGoroutineProfile(gp1) 1292 gp1.goroutineProfiled.Store(goroutineProfileSatisfied) 1293 } 1294 releasem(mp) 1295 } 1296 } 1297 1298 // doRecordGoroutineProfile writes gp1's call stack and labels to an in-progress 1299 // goroutine profile. Preemption is disabled. 1300 // 1301 // This may be called via tryRecordGoroutineProfile in two ways: by the 1302 // goroutine that is coordinating the goroutine profile (running on its own 1303 // stack), or from the scheduler in preparation to execute gp1 (running on the 1304 // system stack). 1305 func doRecordGoroutineProfile(gp1 *g) { 1306 if readgstatus(gp1) == _Grunning { 1307 print("doRecordGoroutineProfile gp1=", gp1.goid, "\n") 1308 throw("cannot read stack of running goroutine") 1309 } 1310 1311 offset := int(goroutineProfile.offset.Add(1)) - 1 1312 1313 if offset >= len(goroutineProfile.records) { 1314 // Should be impossible, but better to return a truncated profile than 1315 // to crash the entire process at this point. Instead, deal with it in 1316 // goroutineProfileWithLabelsConcurrent where we have more context. 1317 return 1318 } 1319 1320 // saveg calls gentraceback, which may call cgo traceback functions. When 1321 // called from the scheduler, this is on the system stack already so 1322 // traceback.go:cgoContextPCs will avoid calling back into the scheduler. 1323 // 1324 // When called from the goroutine coordinating the profile, we still have 1325 // set gp1.goroutineProfiled to goroutineProfileInProgress and so are still 1326 // preventing it from being truly _Grunnable. So we'll use the system stack 1327 // to avoid schedule delays. 1328 systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset]) }) 1329 1330 if goroutineProfile.labels != nil { 1331 goroutineProfile.labels[offset] = gp1.labels 1332 } 1333 } 1334 1335 func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { 1336 gp := getg() 1337 1338 isOK := func(gp1 *g) bool { 1339 // Checking isSystemGoroutine here makes GoroutineProfile 1340 // consistent with both NumGoroutine and Stack. 1341 return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false) 1342 } 1343 1344 stw := stopTheWorld(stwGoroutineProfile) 1345 1346 // World is stopped, no locking required. 1347 n = 1 1348 forEachGRace(func(gp1 *g) { 1349 if isOK(gp1) { 1350 n++ 1351 } 1352 }) 1353 1354 if n <= len(p) { 1355 ok = true 1356 r, lbl := p, labels 1357 1358 // Save current goroutine. 1359 sp := getcallersp() 1360 pc := getcallerpc() 1361 systemstack(func() { 1362 saveg(pc, sp, gp, &r[0]) 1363 }) 1364 r = r[1:] 1365 1366 // If we have a place to put our goroutine labelmap, insert it there. 1367 if labels != nil { 1368 lbl[0] = gp.labels 1369 lbl = lbl[1:] 1370 } 1371 1372 // Save other goroutines. 1373 forEachGRace(func(gp1 *g) { 1374 if !isOK(gp1) { 1375 return 1376 } 1377 1378 if len(r) == 0 { 1379 // Should be impossible, but better to return a 1380 // truncated profile than to crash the entire process. 1381 return 1382 } 1383 // saveg calls gentraceback, which may call cgo traceback functions. 1384 // The world is stopped, so it cannot use cgocall (which will be 1385 // blocked at exitsyscall). Do it on the system stack so it won't 1386 // call into the schedular (see traceback.go:cgoContextPCs). 1387 systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0]) }) 1388 if labels != nil { 1389 lbl[0] = gp1.labels 1390 lbl = lbl[1:] 1391 } 1392 r = r[1:] 1393 }) 1394 } 1395 1396 if raceenabled { 1397 raceacquire(unsafe.Pointer(&labelSync)) 1398 } 1399 1400 startTheWorld(stw) 1401 return n, ok 1402 } 1403 1404 // GoroutineProfile returns n, the number of records in the active goroutine stack profile. 1405 // If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true. 1406 // If len(p) < n, GoroutineProfile does not change p and returns n, false. 1407 // 1408 // Most clients should use the [runtime/pprof] package instead 1409 // of calling GoroutineProfile directly. 1410 func GoroutineProfile(p []StackRecord) (n int, ok bool) { 1411 1412 return goroutineProfileWithLabels(p, nil) 1413 } 1414 1415 func saveg(pc, sp uintptr, gp *g, r *StackRecord) { 1416 var u unwinder 1417 u.initAt(pc, sp, 0, gp, unwindSilentErrors) 1418 n := tracebackPCs(&u, 0, r.Stack0[:]) 1419 if n < len(r.Stack0) { 1420 r.Stack0[n] = 0 1421 } 1422 } 1423 1424 // Stack formats a stack trace of the calling goroutine into buf 1425 // and returns the number of bytes written to buf. 1426 // If all is true, Stack formats stack traces of all other goroutines 1427 // into buf after the trace for the current goroutine. 1428 func Stack(buf []byte, all bool) int { 1429 var stw worldStop 1430 if all { 1431 stw = stopTheWorld(stwAllGoroutinesStack) 1432 } 1433 1434 n := 0 1435 if len(buf) > 0 { 1436 gp := getg() 1437 sp := getcallersp() 1438 pc := getcallerpc() 1439 systemstack(func() { 1440 g0 := getg() 1441 // Force traceback=1 to override GOTRACEBACK setting, 1442 // so that Stack's results are consistent. 1443 // GOTRACEBACK is only about crash dumps. 1444 g0.m.traceback = 1 1445 g0.writebuf = buf[0:0:len(buf)] 1446 goroutineheader(gp) 1447 traceback(pc, sp, 0, gp) 1448 if all { 1449 tracebackothers(gp) 1450 } 1451 g0.m.traceback = 0 1452 n = len(g0.writebuf) 1453 g0.writebuf = nil 1454 }) 1455 } 1456 1457 if all { 1458 startTheWorld(stw) 1459 } 1460 return n 1461 } 1462 1463 // Tracing of alloc/free/gc. 1464 1465 var tracelock mutex 1466 1467 func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) { 1468 lock(&tracelock) 1469 gp := getg() 1470 gp.m.traceback = 2 1471 if typ == nil { 1472 print("tracealloc(", p, ", ", hex(size), ")\n") 1473 } else { 1474 print("tracealloc(", p, ", ", hex(size), ", ", toRType(typ).string(), ")\n") 1475 } 1476 if gp.m.curg == nil || gp == gp.m.curg { 1477 goroutineheader(gp) 1478 pc := getcallerpc() 1479 sp := getcallersp() 1480 systemstack(func() { 1481 traceback(pc, sp, 0, gp) 1482 }) 1483 } else { 1484 goroutineheader(gp.m.curg) 1485 traceback(^uintptr(0), ^uintptr(0), 0, gp.m.curg) 1486 } 1487 print("\n") 1488 gp.m.traceback = 0 1489 unlock(&tracelock) 1490 } 1491 1492 func tracefree(p unsafe.Pointer, size uintptr) { 1493 lock(&tracelock) 1494 gp := getg() 1495 gp.m.traceback = 2 1496 print("tracefree(", p, ", ", hex(size), ")\n") 1497 goroutineheader(gp) 1498 pc := getcallerpc() 1499 sp := getcallersp() 1500 systemstack(func() { 1501 traceback(pc, sp, 0, gp) 1502 }) 1503 print("\n") 1504 gp.m.traceback = 0 1505 unlock(&tracelock) 1506 } 1507 1508 func tracegc() { 1509 lock(&tracelock) 1510 gp := getg() 1511 gp.m.traceback = 2 1512 print("tracegc()\n") 1513 // running on m->g0 stack; show all non-g0 goroutines 1514 tracebackothers(gp) 1515 print("end tracegc\n") 1516 print("\n") 1517 gp.m.traceback = 0 1518 unlock(&tracelock) 1519 }