github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/trace.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Go execution tracer. 6 // The tracer captures a wide range of execution events like goroutine 7 // creation/blocking/unblocking, syscall enter/exit/block, GC-related events, 8 // changes of heap size, processor start/stop, etc and writes them to a buffer 9 // in a compact form. A precise nanosecond-precision timestamp and a stack 10 // trace is captured for most events. 11 // See https://golang.org/s/go15trace for more info. 12 13 package runtime 14 15 import ( 16 "internal/abi" 17 "internal/goarch" 18 "runtime/internal/atomic" 19 "runtime/internal/sys" 20 "unsafe" 21 ) 22 23 // Event types in the trace, args are given in square brackets. 24 const ( 25 traceEvNone = 0 // unused 26 traceEvBatch = 1 // start of per-P batch of events [pid, timestamp] 27 traceEvFrequency = 2 // contains tracer timer frequency [frequency (ticks per second)] 28 traceEvStack = 3 // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}] 29 traceEvGomaxprocs = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id] 30 traceEvProcStart = 5 // start of P [timestamp, thread id] 31 traceEvProcStop = 6 // stop of P [timestamp] 32 traceEvGCStart = 7 // GC start [timestamp, seq, stack id] 33 traceEvGCDone = 8 // GC done [timestamp] 34 traceEvGCSTWStart = 9 // GC STW start [timestamp, kind] 35 traceEvGCSTWDone = 10 // GC STW done [timestamp] 36 traceEvGCSweepStart = 11 // GC sweep start [timestamp, stack id] 37 traceEvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed] 38 traceEvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id] 39 traceEvGoStart = 14 // goroutine starts running [timestamp, goroutine id, seq] 40 traceEvGoEnd = 15 // goroutine ends [timestamp] 41 traceEvGoStop = 16 // goroutine stops (like in select{}) [timestamp, stack] 42 traceEvGoSched = 17 // goroutine calls Gosched [timestamp, stack] 43 traceEvGoPreempt = 18 // goroutine is preempted [timestamp, stack] 44 traceEvGoSleep = 19 // goroutine calls Sleep [timestamp, stack] 45 traceEvGoBlock = 20 // goroutine blocks [timestamp, stack] 46 traceEvGoUnblock = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack] 47 traceEvGoBlockSend = 22 // goroutine blocks on chan send [timestamp, stack] 48 traceEvGoBlockRecv = 23 // goroutine blocks on chan recv [timestamp, stack] 49 traceEvGoBlockSelect = 24 // goroutine blocks on select [timestamp, stack] 50 traceEvGoBlockSync = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack] 51 traceEvGoBlockCond = 26 // goroutine blocks on Cond [timestamp, stack] 52 traceEvGoBlockNet = 27 // goroutine blocks on network [timestamp, stack] 53 traceEvGoSysCall = 28 // syscall enter [timestamp, stack] 54 traceEvGoSysExit = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp] 55 traceEvGoSysBlock = 30 // syscall blocks [timestamp] 56 traceEvGoWaiting = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id] 57 traceEvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id] 58 traceEvHeapAlloc = 33 // gcController.heapLive change [timestamp, heap_alloc] 59 traceEvHeapGoal = 34 // gcController.heapGoal() (formerly next_gc) change [timestamp, heap goal in bytes] 60 traceEvTimerGoroutine = 35 // not currently used; previously denoted timer goroutine [timer goroutine id] 61 traceEvFutileWakeup = 36 // not currently used; denotes that the previous wakeup of this goroutine was futile [timestamp] 62 traceEvString = 37 // string dictionary entry [ID, length, string] 63 traceEvGoStartLocal = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id] 64 traceEvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack] 65 traceEvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp] 66 traceEvGoStartLabel = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id] 67 traceEvGoBlockGC = 42 // goroutine blocks on GC assist [timestamp, stack] 68 traceEvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack] 69 traceEvGCMarkAssistDone = 44 // GC mark assist done [timestamp] 70 traceEvUserTaskCreate = 45 // trace.NewTask [timestamp, internal task id, internal parent task id, name string, stack] 71 traceEvUserTaskEnd = 46 // end of a task [timestamp, internal task id, stack] 72 traceEvUserRegion = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), name string, stack] 73 traceEvUserLog = 48 // trace.Log [timestamp, internal task id, key string id, stack, value string] 74 traceEvCPUSample = 49 // CPU profiling sample [timestamp, real timestamp, real P id (-1 when absent), goroutine id, stack] 75 traceEvCount = 50 76 // Byte is used but only 6 bits are available for event type. 77 // The remaining 2 bits are used to specify the number of arguments. 78 // That means, the max event type value is 63. 79 ) 80 81 const ( 82 // Timestamps in trace are cputicks/traceTickDiv. 83 // This makes absolute values of timestamp diffs smaller, 84 // and so they are encoded in less number of bytes. 85 // 64 on x86 is somewhat arbitrary (one tick is ~20ns on a 3GHz machine). 86 // The suggested increment frequency for PowerPC's time base register is 87 // 512 MHz according to Power ISA v2.07 section 6.2, so we use 16 on ppc64 88 // and ppc64le. 89 // Tracing won't work reliably for architectures where cputicks is emulated 90 // by nanotime, so the value doesn't matter for those architectures. 91 traceTickDiv = 16 + 48*(goarch.Is386|goarch.IsAmd64) 92 // Maximum number of PCs in a single stack trace. 93 // Since events contain only stack id rather than whole stack trace, 94 // we can allow quite large values here. 95 traceStackSize = 128 96 // Identifier of a fake P that is used when we trace without a real P. 97 traceGlobProc = -1 98 // Maximum number of bytes to encode uint64 in base-128. 99 traceBytesPerNumber = 10 100 // Shift of the number of arguments in the first event byte. 101 traceArgCountShift = 6 102 ) 103 104 // trace is global tracing context. 105 var trace struct { 106 // trace.lock must only be acquired on the system stack where 107 // stack splits cannot happen while it is held. 108 lock mutex // protects the following members 109 lockOwner *g // to avoid deadlocks during recursive lock locks 110 enabled bool // when set runtime traces events 111 shutdown bool // set when we are waiting for trace reader to finish after setting enabled to false 112 headerWritten bool // whether ReadTrace has emitted trace header 113 footerWritten bool // whether ReadTrace has emitted trace footer 114 shutdownSema uint32 // used to wait for ReadTrace completion 115 seqStart uint64 // sequence number when tracing was started 116 ticksStart int64 // cputicks when tracing was started 117 ticksEnd int64 // cputicks when tracing was stopped 118 timeStart int64 // nanotime when tracing was started 119 timeEnd int64 // nanotime when tracing was stopped 120 seqGC uint64 // GC start/done sequencer 121 reading traceBufPtr // buffer currently handed off to user 122 empty traceBufPtr // stack of empty buffers 123 fullHead traceBufPtr // queue of full buffers 124 fullTail traceBufPtr 125 stackTab traceStackTable // maps stack traces to unique ids 126 // cpuLogRead accepts CPU profile samples from the signal handler where 127 // they're generated. It uses a two-word header to hold the IDs of the P and 128 // G (respectively) that were active at the time of the sample. Because 129 // profBuf uses a record with all zeros in its header to indicate overflow, 130 // we make sure to make the P field always non-zero: The ID of a real P will 131 // start at bit 1, and bit 0 will be set. Samples that arrive while no P is 132 // running (such as near syscalls) will set the first header field to 0b10. 133 // This careful handling of the first header field allows us to store ID of 134 // the active G directly in the second field, even though that will be 0 135 // when sampling g0. 136 cpuLogRead *profBuf 137 // cpuLogBuf is a trace buffer to hold events corresponding to CPU profile 138 // samples, which arrive out of band and not directly connected to a 139 // specific P. 140 cpuLogBuf traceBufPtr 141 142 reader atomic.Pointer[g] // goroutine that called ReadTrace, or nil 143 144 signalLock atomic.Uint32 // protects use of the following member, only usable in signal handlers 145 cpuLogWrite *profBuf // copy of cpuLogRead for use in signal handlers, set without signalLock 146 147 // Dictionary for traceEvString. 148 // 149 // TODO: central lock to access the map is not ideal. 150 // option: pre-assign ids to all user annotation region names and tags 151 // option: per-P cache 152 // option: sync.Map like data structure 153 stringsLock mutex 154 strings map[string]uint64 155 stringSeq uint64 156 157 // markWorkerLabels maps gcMarkWorkerMode to string ID. 158 markWorkerLabels [len(gcMarkWorkerModeStrings)]uint64 159 160 bufLock mutex // protects buf 161 buf traceBufPtr // global trace buffer, used when running without a p 162 } 163 164 // traceLockInit initializes global trace locks. 165 func traceLockInit() { 166 lockInit(&trace.bufLock, lockRankTraceBuf) 167 lockInit(&trace.stringsLock, lockRankTraceStrings) 168 lockInit(&trace.lock, lockRankTrace) 169 lockInit(&trace.stackTab.lock, lockRankTraceStackTab) 170 } 171 172 // traceBufHeader is per-P tracing buffer. 173 type traceBufHeader struct { 174 link traceBufPtr // in trace.empty/full 175 lastTicks uint64 // when we wrote the last event 176 pos int // next write offset in arr 177 stk [traceStackSize]uintptr // scratch buffer for traceback 178 } 179 180 // traceBuf is per-P tracing buffer. 181 type traceBuf struct { 182 _ sys.NotInHeap 183 traceBufHeader 184 arr [64<<10 - unsafe.Sizeof(traceBufHeader{})]byte // underlying buffer for traceBufHeader.buf 185 } 186 187 // traceBufPtr is a *traceBuf that is not traced by the garbage 188 // collector and doesn't have write barriers. traceBufs are not 189 // allocated from the GC'd heap, so this is safe, and are often 190 // manipulated in contexts where write barriers are not allowed, so 191 // this is necessary. 192 // 193 // TODO: Since traceBuf is now embedded runtime/internal/sys.NotInHeap, this isn't necessary. 194 type traceBufPtr uintptr 195 196 func (tp traceBufPtr) ptr() *traceBuf { return (*traceBuf)(unsafe.Pointer(tp)) } 197 func (tp *traceBufPtr) set(b *traceBuf) { *tp = traceBufPtr(unsafe.Pointer(b)) } 198 func traceBufPtrOf(b *traceBuf) traceBufPtr { 199 return traceBufPtr(unsafe.Pointer(b)) 200 } 201 202 // traceEnabled returns true if the trace is currently enabled. 203 func traceEnabled() bool { 204 return trace.enabled 205 } 206 207 // traceShuttingDown returns true if the trace is currently shutting down. 208 func traceShuttingDown() bool { 209 return trace.shutdown 210 } 211 212 // StartTrace enables tracing for the current process. 213 // While tracing, the data will be buffered and available via ReadTrace. 214 // StartTrace returns an error if tracing is already enabled. 215 // Most clients should use the runtime/trace package or the testing package's 216 // -test.trace flag instead of calling StartTrace directly. 217 func StartTrace() error { 218 // Stop the world so that we can take a consistent snapshot 219 // of all goroutines at the beginning of the trace. 220 // Do not stop the world during GC so we ensure we always see 221 // a consistent view of GC-related events (e.g. a start is always 222 // paired with an end). 223 stopTheWorldGC("start tracing") 224 225 // Prevent sysmon from running any code that could generate events. 226 lock(&sched.sysmonlock) 227 228 // We are in stop-the-world, but syscalls can finish and write to trace concurrently. 229 // Exitsyscall could check trace.enabled long before and then suddenly wake up 230 // and decide to write to trace at a random point in time. 231 // However, such syscall will use the global trace.buf buffer, because we've 232 // acquired all p's by doing stop-the-world. So this protects us from such races. 233 lock(&trace.bufLock) 234 235 if trace.enabled || trace.shutdown { 236 unlock(&trace.bufLock) 237 unlock(&sched.sysmonlock) 238 startTheWorldGC() 239 return errorString("tracing is already enabled") 240 } 241 242 // Can't set trace.enabled yet. While the world is stopped, exitsyscall could 243 // already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here. 244 // That would lead to an inconsistent trace: 245 // - either GoSysExit appears before EvGoInSyscall, 246 // - or GoSysExit appears for a goroutine for which we don't emit EvGoInSyscall below. 247 // To instruct traceEvent that it must not ignore events below, we set startingtrace. 248 // trace.enabled is set afterwards once we have emitted all preliminary events. 249 mp := getg().m 250 mp.startingtrace = true 251 252 // Obtain current stack ID to use in all traceEvGoCreate events below. 253 stkBuf := make([]uintptr, traceStackSize) 254 stackID := traceStackID(mp, stkBuf, 2) 255 256 profBuf := newProfBuf(2, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid] 257 trace.cpuLogRead = profBuf 258 259 // We must not acquire trace.signalLock outside of a signal handler: a 260 // profiling signal may arrive at any time and try to acquire it, leading to 261 // deadlock. Because we can't use that lock to protect updates to 262 // trace.cpuLogWrite (only use of the structure it references), reads and 263 // writes of the pointer must be atomic. (And although this field is never 264 // the sole pointer to the profBuf value, it's best to allow a write barrier 265 // here.) 266 atomicstorep(unsafe.Pointer(&trace.cpuLogWrite), unsafe.Pointer(profBuf)) 267 268 // World is stopped, no need to lock. 269 forEachGRace(func(gp *g) { 270 status := readgstatus(gp) 271 if status != _Gdead { 272 gp.traceseq = 0 273 gp.tracelastp = getg().m.p 274 // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. 275 id := trace.stackTab.put([]uintptr{logicalStackSentinel, startPCforTrace(gp.startpc) + sys.PCQuantum}) 276 traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID) 277 } 278 if status == _Gwaiting { 279 // traceEvGoWaiting is implied to have seq=1. 280 gp.traceseq++ 281 traceEvent(traceEvGoWaiting, -1, gp.goid) 282 } 283 if status == _Gsyscall { 284 gp.traceseq++ 285 traceEvent(traceEvGoInSyscall, -1, gp.goid) 286 } else if status == _Gdead && gp.m != nil && gp.m.isextra { 287 // Trigger two trace events for the dead g in the extra m, 288 // since the next event of the g will be traceEvGoSysExit in exitsyscall, 289 // while calling from C thread to Go. 290 gp.traceseq = 0 291 gp.tracelastp = getg().m.p 292 // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. 293 id := trace.stackTab.put([]uintptr{logicalStackSentinel, startPCforTrace(0) + sys.PCQuantum}) // no start pc 294 traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID) 295 gp.traceseq++ 296 traceEvent(traceEvGoInSyscall, -1, gp.goid) 297 } else { 298 gp.sysblocktraced = false 299 } 300 }) 301 traceProcStart() 302 traceGoStart() 303 // Note: ticksStart needs to be set after we emit traceEvGoInSyscall events. 304 // If we do it the other way around, it is possible that exitsyscall will 305 // query sysexitticks after ticksStart but before traceEvGoInSyscall timestamp. 306 // It will lead to a false conclusion that cputicks is broken. 307 trace.ticksStart = cputicks() 308 trace.timeStart = nanotime() 309 trace.headerWritten = false 310 trace.footerWritten = false 311 312 // string to id mapping 313 // 0 : reserved for an empty string 314 // remaining: other strings registered by traceString 315 trace.stringSeq = 0 316 trace.strings = make(map[string]uint64) 317 318 trace.seqGC = 0 319 mp.startingtrace = false 320 trace.enabled = true 321 322 // Register runtime goroutine labels. 323 _, pid, bufp := traceAcquireBuffer() 324 for i, label := range gcMarkWorkerModeStrings[:] { 325 trace.markWorkerLabels[i], bufp = traceString(bufp, pid, label) 326 } 327 traceReleaseBuffer(mp, pid) 328 329 unlock(&trace.bufLock) 330 331 unlock(&sched.sysmonlock) 332 333 // Record the current state of HeapGoal to avoid information loss in trace. 334 traceHeapGoal() 335 336 startTheWorldGC() 337 return nil 338 } 339 340 // StopTrace stops tracing, if it was previously enabled. 341 // StopTrace only returns after all the reads for the trace have completed. 342 func StopTrace() { 343 // Stop the world so that we can collect the trace buffers from all p's below, 344 // and also to avoid races with traceEvent. 345 stopTheWorldGC("stop tracing") 346 347 // See the comment in StartTrace. 348 lock(&sched.sysmonlock) 349 350 // See the comment in StartTrace. 351 lock(&trace.bufLock) 352 353 if !trace.enabled { 354 unlock(&trace.bufLock) 355 unlock(&sched.sysmonlock) 356 startTheWorldGC() 357 return 358 } 359 360 traceGoSched() 361 362 atomicstorep(unsafe.Pointer(&trace.cpuLogWrite), nil) 363 trace.cpuLogRead.close() 364 traceReadCPU() 365 366 // Loop over all allocated Ps because dead Ps may still have 367 // trace buffers. 368 for _, p := range allp[:cap(allp)] { 369 buf := p.tracebuf 370 if buf != 0 { 371 traceFullQueue(buf) 372 p.tracebuf = 0 373 } 374 } 375 if trace.buf != 0 { 376 buf := trace.buf 377 trace.buf = 0 378 if buf.ptr().pos != 0 { 379 traceFullQueue(buf) 380 } 381 } 382 if trace.cpuLogBuf != 0 { 383 buf := trace.cpuLogBuf 384 trace.cpuLogBuf = 0 385 if buf.ptr().pos != 0 { 386 traceFullQueue(buf) 387 } 388 } 389 390 for { 391 trace.ticksEnd = cputicks() 392 trace.timeEnd = nanotime() 393 // Windows time can tick only every 15ms, wait for at least one tick. 394 if trace.timeEnd != trace.timeStart { 395 break 396 } 397 osyield() 398 } 399 400 trace.enabled = false 401 trace.shutdown = true 402 unlock(&trace.bufLock) 403 404 unlock(&sched.sysmonlock) 405 406 startTheWorldGC() 407 408 // The world is started but we've set trace.shutdown, so new tracing can't start. 409 // Wait for the trace reader to flush pending buffers and stop. 410 semacquire(&trace.shutdownSema) 411 if raceenabled { 412 raceacquire(unsafe.Pointer(&trace.shutdownSema)) 413 } 414 415 systemstack(func() { 416 // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world. 417 lock(&trace.lock) 418 for _, p := range allp[:cap(allp)] { 419 if p.tracebuf != 0 { 420 throw("trace: non-empty trace buffer in proc") 421 } 422 } 423 if trace.buf != 0 { 424 throw("trace: non-empty global trace buffer") 425 } 426 if trace.fullHead != 0 || trace.fullTail != 0 { 427 throw("trace: non-empty full trace buffer") 428 } 429 if trace.reading != 0 || trace.reader.Load() != nil { 430 throw("trace: reading after shutdown") 431 } 432 for trace.empty != 0 { 433 buf := trace.empty 434 trace.empty = buf.ptr().link 435 sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys) 436 } 437 trace.strings = nil 438 trace.shutdown = false 439 trace.cpuLogRead = nil 440 unlock(&trace.lock) 441 }) 442 } 443 444 // ReadTrace returns the next chunk of binary tracing data, blocking until data 445 // is available. If tracing is turned off and all the data accumulated while it 446 // was on has been returned, ReadTrace returns nil. The caller must copy the 447 // returned data before calling ReadTrace again. 448 // ReadTrace must be called from one goroutine at a time. 449 func ReadTrace() []byte { 450 top: 451 var buf []byte 452 var park bool 453 systemstack(func() { 454 buf, park = readTrace0() 455 }) 456 if park { 457 gopark(func(gp *g, _ unsafe.Pointer) bool { 458 if !trace.reader.CompareAndSwapNoWB(nil, gp) { 459 // We're racing with another reader. 460 // Wake up and handle this case. 461 return false 462 } 463 464 if g2 := traceReader(); gp == g2 { 465 // New data arrived between unlocking 466 // and the CAS and we won the wake-up 467 // race, so wake up directly. 468 return false 469 } else if g2 != nil { 470 printlock() 471 println("runtime: got trace reader", g2, g2.goid) 472 throw("unexpected trace reader") 473 } 474 475 return true 476 }, nil, waitReasonTraceReaderBlocked, traceEvGoBlock, 2) 477 goto top 478 } 479 480 return buf 481 } 482 483 // readTrace0 is ReadTrace's continuation on g0. This must run on the 484 // system stack because it acquires trace.lock. 485 // 486 //go:systemstack 487 func readTrace0() (buf []byte, park bool) { 488 if raceenabled { 489 // g0 doesn't have a race context. Borrow the user G's. 490 if getg().racectx != 0 { 491 throw("expected racectx == 0") 492 } 493 getg().racectx = getg().m.curg.racectx 494 // (This defer should get open-coded, which is safe on 495 // the system stack.) 496 defer func() { getg().racectx = 0 }() 497 } 498 499 // This function may need to lock trace.lock recursively 500 // (goparkunlock -> traceGoPark -> traceEvent -> traceFlush). 501 // To allow this we use trace.lockOwner. 502 // Also this function must not allocate while holding trace.lock: 503 // allocation can call heap allocate, which will try to emit a trace 504 // event while holding heap lock. 505 lock(&trace.lock) 506 trace.lockOwner = getg().m.curg 507 508 if trace.reader.Load() != nil { 509 // More than one goroutine reads trace. This is bad. 510 // But we rather do not crash the program because of tracing, 511 // because tracing can be enabled at runtime on prod servers. 512 trace.lockOwner = nil 513 unlock(&trace.lock) 514 println("runtime: ReadTrace called from multiple goroutines simultaneously") 515 return nil, false 516 } 517 // Recycle the old buffer. 518 if buf := trace.reading; buf != 0 { 519 buf.ptr().link = trace.empty 520 trace.empty = buf 521 trace.reading = 0 522 } 523 // Write trace header. 524 if !trace.headerWritten { 525 trace.headerWritten = true 526 trace.lockOwner = nil 527 unlock(&trace.lock) 528 return []byte("go 1.19 trace\x00\x00\x00"), false 529 } 530 // Optimistically look for CPU profile samples. This may write new stack 531 // records, and may write new tracing buffers. 532 if !trace.footerWritten && !trace.shutdown { 533 traceReadCPU() 534 } 535 // Wait for new data. 536 if trace.fullHead == 0 && !trace.shutdown { 537 // We don't simply use a note because the scheduler 538 // executes this goroutine directly when it wakes up 539 // (also a note would consume an M). 540 trace.lockOwner = nil 541 unlock(&trace.lock) 542 return nil, true 543 } 544 newFull: 545 assertLockHeld(&trace.lock) 546 // Write a buffer. 547 if trace.fullHead != 0 { 548 buf := traceFullDequeue() 549 trace.reading = buf 550 trace.lockOwner = nil 551 unlock(&trace.lock) 552 return buf.ptr().arr[:buf.ptr().pos], false 553 } 554 555 // Write footer with timer frequency. 556 if !trace.footerWritten { 557 trace.footerWritten = true 558 // Use float64 because (trace.ticksEnd - trace.ticksStart) * 1e9 can overflow int64. 559 freq := float64(trace.ticksEnd-trace.ticksStart) * 1e9 / float64(trace.timeEnd-trace.timeStart) / traceTickDiv 560 if freq <= 0 { 561 throw("trace: ReadTrace got invalid frequency") 562 } 563 trace.lockOwner = nil 564 unlock(&trace.lock) 565 566 // Write frequency event. 567 bufp := traceFlush(0, 0) 568 buf := bufp.ptr() 569 buf.byte(traceEvFrequency | 0<<traceArgCountShift) 570 buf.varint(uint64(freq)) 571 572 // Dump stack table. 573 // This will emit a bunch of full buffers, we will pick them up 574 // on the next iteration. 575 bufp = trace.stackTab.dump(bufp) 576 577 // Flush final buffer. 578 lock(&trace.lock) 579 traceFullQueue(bufp) 580 goto newFull // trace.lock should be held at newFull 581 } 582 // Done. 583 if trace.shutdown { 584 trace.lockOwner = nil 585 unlock(&trace.lock) 586 if raceenabled { 587 // Model synchronization on trace.shutdownSema, which race 588 // detector does not see. This is required to avoid false 589 // race reports on writer passed to trace.Start. 590 racerelease(unsafe.Pointer(&trace.shutdownSema)) 591 } 592 // trace.enabled is already reset, so can call traceable functions. 593 semrelease(&trace.shutdownSema) 594 return nil, false 595 } 596 // Also bad, but see the comment above. 597 trace.lockOwner = nil 598 unlock(&trace.lock) 599 println("runtime: spurious wakeup of trace reader") 600 return nil, false 601 } 602 603 // traceReader returns the trace reader that should be woken up, if any. 604 // Callers should first check that trace.enabled or trace.shutdown is set. 605 // 606 // This must run on the system stack because it acquires trace.lock. 607 // 608 //go:systemstack 609 func traceReader() *g { 610 // Optimistic check first 611 if traceReaderAvailable() == nil { 612 return nil 613 } 614 lock(&trace.lock) 615 gp := traceReaderAvailable() 616 if gp == nil || !trace.reader.CompareAndSwapNoWB(gp, nil) { 617 unlock(&trace.lock) 618 return nil 619 } 620 unlock(&trace.lock) 621 return gp 622 } 623 624 // traceReaderAvailable returns the trace reader if it is not currently 625 // scheduled and should be. Callers should first check that trace.enabled 626 // or trace.shutdown is set. 627 func traceReaderAvailable() *g { 628 if trace.fullHead != 0 || trace.shutdown { 629 return trace.reader.Load() 630 } 631 return nil 632 } 633 634 // traceProcFree frees trace buffer associated with pp. 635 // 636 // This must run on the system stack because it acquires trace.lock. 637 // 638 //go:systemstack 639 func traceProcFree(pp *p) { 640 buf := pp.tracebuf 641 pp.tracebuf = 0 642 if buf == 0 { 643 return 644 } 645 lock(&trace.lock) 646 traceFullQueue(buf) 647 unlock(&trace.lock) 648 } 649 650 // traceFullQueue queues buf into queue of full buffers. 651 func traceFullQueue(buf traceBufPtr) { 652 buf.ptr().link = 0 653 if trace.fullHead == 0 { 654 trace.fullHead = buf 655 } else { 656 trace.fullTail.ptr().link = buf 657 } 658 trace.fullTail = buf 659 } 660 661 // traceFullDequeue dequeues from queue of full buffers. 662 func traceFullDequeue() traceBufPtr { 663 buf := trace.fullHead 664 if buf == 0 { 665 return 0 666 } 667 trace.fullHead = buf.ptr().link 668 if trace.fullHead == 0 { 669 trace.fullTail = 0 670 } 671 buf.ptr().link = 0 672 return buf 673 } 674 675 // traceEvent writes a single event to trace buffer, flushing the buffer if necessary. 676 // ev is event type. 677 // If skip > 0, write current stack id as the last argument (skipping skip top frames). 678 // If skip = 0, this event type should contain a stack, but we don't want 679 // to collect and remember it for this particular call. 680 func traceEvent(ev byte, skip int, args ...uint64) { 681 mp, pid, bufp := traceAcquireBuffer() 682 // Double-check trace.enabled now that we've done m.locks++ and acquired bufLock. 683 // This protects from races between traceEvent and StartTrace/StopTrace. 684 685 // The caller checked that trace.enabled == true, but trace.enabled might have been 686 // turned off between the check and now. Check again. traceLockBuffer did mp.locks++, 687 // StopTrace does stopTheWorld, and stopTheWorld waits for mp.locks to go back to zero, 688 // so if we see trace.enabled == true now, we know it's true for the rest of the function. 689 // Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace 690 // during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer. 691 // 692 // Note trace_userTaskCreate runs the same check. 693 if !trace.enabled && !mp.startingtrace { 694 traceReleaseBuffer(mp, pid) 695 return 696 } 697 698 if skip > 0 { 699 if getg() == mp.curg { 700 skip++ // +1 because stack is captured in traceEventLocked. 701 } 702 } 703 traceEventLocked(0, mp, pid, bufp, ev, 0, skip, args...) 704 traceReleaseBuffer(mp, pid) 705 } 706 707 // traceEventLocked writes a single event of type ev to the trace buffer bufp, 708 // flushing the buffer if necessary. pid is the id of the current P, or 709 // traceGlobProc if we're tracing without a real P. 710 // 711 // Preemption is disabled, and if running without a real P the global tracing 712 // buffer is locked. 713 // 714 // Events types that do not include a stack set skip to -1. Event types that 715 // include a stack may explicitly reference a stackID from the trace.stackTab 716 // (obtained by an earlier call to traceStackID). Without an explicit stackID, 717 // this function will automatically capture the stack of the goroutine currently 718 // running on mp, skipping skip top frames or, if skip is 0, writing out an 719 // empty stack record. 720 // 721 // It records the event's args to the traceBuf, and also makes an effort to 722 // reserve extraBytes bytes of additional space immediately following the event, 723 // in the same traceBuf. 724 func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev byte, stackID uint32, skip int, args ...uint64) { 725 buf := bufp.ptr() 726 // TODO: test on non-zero extraBytes param. 727 maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params 728 if buf == nil || len(buf.arr)-buf.pos < maxSize { 729 systemstack(func() { 730 buf = traceFlush(traceBufPtrOf(buf), pid).ptr() 731 }) 732 bufp.set(buf) 733 } 734 735 // NOTE: ticks might be same after tick division, although the real cputicks is 736 // linear growth. 737 ticks := uint64(cputicks()) / traceTickDiv 738 tickDiff := ticks - buf.lastTicks 739 if tickDiff == 0 { 740 ticks = buf.lastTicks + 1 741 tickDiff = 1 742 } 743 744 buf.lastTicks = ticks 745 narg := byte(len(args)) 746 if stackID != 0 || skip >= 0 { 747 narg++ 748 } 749 // We have only 2 bits for number of arguments. 750 // If number is >= 3, then the event type is followed by event length in bytes. 751 if narg > 3 { 752 narg = 3 753 } 754 startPos := buf.pos 755 buf.byte(ev | narg<<traceArgCountShift) 756 var lenp *byte 757 if narg == 3 { 758 // Reserve the byte for length assuming that length < 128. 759 buf.varint(0) 760 lenp = &buf.arr[buf.pos-1] 761 } 762 buf.varint(tickDiff) 763 for _, a := range args { 764 buf.varint(a) 765 } 766 if stackID != 0 { 767 buf.varint(uint64(stackID)) 768 } else if skip == 0 { 769 buf.varint(0) 770 } else if skip > 0 { 771 buf.varint(traceStackID(mp, buf.stk[:], skip)) 772 } 773 evSize := buf.pos - startPos 774 if evSize > maxSize { 775 throw("invalid length of trace event") 776 } 777 if lenp != nil { 778 // Fill in actual length. 779 *lenp = byte(evSize - 2) 780 } 781 } 782 783 // traceCPUSample writes a CPU profile sample stack to the execution tracer's 784 // profiling buffer. It is called from a signal handler, so is limited in what 785 // it can do. 786 func traceCPUSample(gp *g, pp *p, stk []uintptr) { 787 if !trace.enabled { 788 // Tracing is usually turned off; don't spend time acquiring the signal 789 // lock unless it's active. 790 return 791 } 792 793 // Match the clock used in traceEventLocked 794 now := cputicks() 795 // The "header" here is the ID of the P that was running the profiled code, 796 // followed by the ID of the goroutine. (For normal CPU profiling, it's 797 // usually the number of samples with the given stack.) Near syscalls, pp 798 // may be nil. Reporting goid of 0 is fine for either g0 or a nil gp. 799 var hdr [2]uint64 800 if pp != nil { 801 // Overflow records in profBuf have all header values set to zero. Make 802 // sure that real headers have at least one bit set. 803 hdr[0] = uint64(pp.id)<<1 | 0b1 804 } else { 805 hdr[0] = 0b10 806 } 807 if gp != nil { 808 hdr[1] = gp.goid 809 } 810 811 // Allow only one writer at a time 812 for !trace.signalLock.CompareAndSwap(0, 1) { 813 // TODO: Is it safe to osyield here? https://go.dev/issue/52672 814 osyield() 815 } 816 817 if log := (*profBuf)(atomic.Loadp(unsafe.Pointer(&trace.cpuLogWrite))); log != nil { 818 // Note: we don't pass a tag pointer here (how should profiling tags 819 // interact with the execution tracer?), but if we did we'd need to be 820 // careful about write barriers. See the long comment in profBuf.write. 821 log.write(nil, now, hdr[:], stk) 822 } 823 824 trace.signalLock.Store(0) 825 } 826 827 func traceReadCPU() { 828 bufp := &trace.cpuLogBuf 829 830 for { 831 data, tags, _ := trace.cpuLogRead.read(profBufNonBlocking) 832 if len(data) == 0 { 833 break 834 } 835 for len(data) > 0 { 836 if len(data) < 4 || data[0] > uint64(len(data)) { 837 break // truncated profile 838 } 839 if data[0] < 4 || tags != nil && len(tags) < 1 { 840 break // malformed profile 841 } 842 if len(tags) < 1 { 843 break // mismatched profile records and tags 844 } 845 timestamp := data[1] 846 ppid := data[2] >> 1 847 if hasP := (data[2] & 0b1) != 0; !hasP { 848 ppid = ^uint64(0) 849 } 850 goid := data[3] 851 stk := data[4:data[0]] 852 empty := len(stk) == 1 && data[2] == 0 && data[3] == 0 853 data = data[data[0]:] 854 // No support here for reporting goroutine tags at the moment; if 855 // that information is to be part of the execution trace, we'd 856 // probably want to see when the tags are applied and when they 857 // change, instead of only seeing them when we get a CPU sample. 858 tags = tags[1:] 859 860 if empty { 861 // Looks like an overflow record from the profBuf. Not much to 862 // do here, we only want to report full records. 863 // 864 // TODO: should we start a goroutine to drain the profBuf, 865 // rather than relying on a high-enough volume of tracing events 866 // to keep ReadTrace busy? https://go.dev/issue/52674 867 continue 868 } 869 870 buf := bufp.ptr() 871 if buf == nil { 872 systemstack(func() { 873 *bufp = traceFlush(*bufp, 0) 874 }) 875 buf = bufp.ptr() 876 } 877 nstk := 1 878 buf.stk[0] = logicalStackSentinel 879 for ; nstk < len(buf.stk) && nstk-1 < len(stk); nstk++ { 880 buf.stk[nstk] = uintptr(stk[nstk-1]) 881 } 882 stackID := trace.stackTab.put(buf.stk[:nstk]) 883 884 traceEventLocked(0, nil, 0, bufp, traceEvCPUSample, stackID, 1, timestamp/traceTickDiv, ppid, goid) 885 } 886 } 887 } 888 889 // logicalStackSentinel is a sentinel value at pcBuf[0] signifying that 890 // pcBuf[1:] holds a logical stack requiring no further processing. Any other 891 // value at pcBuf[0] represents a skip value to apply to the physical stack in 892 // pcBuf[1:] after inline expansion. 893 const logicalStackSentinel = ^uintptr(0) 894 895 // traceStackID captures a stack trace into pcBuf, registers it in the trace 896 // stack table, and returns its unique ID. pcBuf should have a length equal to 897 // traceStackSize. skip controls the number of leaf frames to omit in order to 898 // hide tracer internals from stack traces, see CL 5523. 899 func traceStackID(mp *m, pcBuf []uintptr, skip int) uint64 { 900 gp := getg() 901 curgp := mp.curg 902 nstk := 1 903 if tracefpunwindoff() || mp.hasCgoOnStack() { 904 // Slow path: Unwind using default unwinder. Used when frame pointer 905 // unwinding is unavailable or disabled (tracefpunwindoff), or might 906 // produce incomplete results or crashes (hasCgoOnStack). Note that no 907 // cgo callback related crashes have been observed yet. The main 908 // motivation is to take advantage of a potentially registered cgo 909 // symbolizer. 910 pcBuf[0] = logicalStackSentinel 911 if curgp == gp { 912 nstk += callers(skip+1, pcBuf[1:]) 913 } else if curgp != nil { 914 nstk += gcallers(curgp, skip, pcBuf[1:]) 915 } 916 } else { 917 // Fast path: Unwind using frame pointers. 918 pcBuf[0] = uintptr(skip) 919 if curgp == gp { 920 nstk += fpTracebackPCs(unsafe.Pointer(getcallerfp()), pcBuf[1:]) 921 } else if curgp != nil { 922 // We're called on the g0 stack through mcall(fn) or systemstack(fn). To 923 // behave like gcallers above, we start unwinding from sched.bp, which 924 // points to the caller frame of the leaf frame on g's stack. The return 925 // address of the leaf frame is stored in sched.pc, which we manually 926 // capture here. 927 pcBuf[1] = curgp.sched.pc 928 nstk += 1 + fpTracebackPCs(unsafe.Pointer(curgp.sched.bp), pcBuf[2:]) 929 } 930 } 931 if nstk > 0 { 932 nstk-- // skip runtime.goexit 933 } 934 if nstk > 0 && curgp.goid == 1 { 935 nstk-- // skip runtime.main 936 } 937 id := trace.stackTab.put(pcBuf[:nstk]) 938 return uint64(id) 939 } 940 941 // tracefpunwindoff returns true if frame pointer unwinding for the tracer is 942 // disabled via GODEBUG or not supported by the architecture. 943 func tracefpunwindoff() bool { 944 return debug.tracefpunwindoff != 0 || (goarch.ArchFamily != goarch.AMD64 && goarch.ArchFamily != goarch.ARM64) 945 } 946 947 // fpTracebackPCs populates pcBuf with the return addresses for each frame and 948 // returns the number of PCs written to pcBuf. The returned PCs correspond to 949 // "physical frames" rather than "logical frames"; that is if A is inlined into 950 // B, this will return a PC for only B. 951 func fpTracebackPCs(fp unsafe.Pointer, pcBuf []uintptr) (i int) { 952 for i = 0; i < len(pcBuf) && fp != nil; i++ { 953 // return addr sits one word above the frame pointer 954 pcBuf[i] = *(*uintptr)(unsafe.Pointer(uintptr(fp) + goarch.PtrSize)) 955 // follow the frame pointer to the next one 956 fp = unsafe.Pointer(*(*uintptr)(fp)) 957 } 958 return i 959 } 960 961 // traceAcquireBuffer returns trace buffer to use and, if necessary, locks it. 962 func traceAcquireBuffer() (mp *m, pid int32, bufp *traceBufPtr) { 963 // Any time we acquire a buffer, we may end up flushing it, 964 // but flushes are rare. Record the lock edge even if it 965 // doesn't happen this time. 966 lockRankMayTraceFlush() 967 968 mp = acquirem() 969 if p := mp.p.ptr(); p != nil { 970 return mp, p.id, &p.tracebuf 971 } 972 lock(&trace.bufLock) 973 return mp, traceGlobProc, &trace.buf 974 } 975 976 // traceReleaseBuffer releases a buffer previously acquired with traceAcquireBuffer. 977 func traceReleaseBuffer(mp *m, pid int32) { 978 if pid == traceGlobProc { 979 unlock(&trace.bufLock) 980 } 981 releasem(mp) 982 } 983 984 // lockRankMayTraceFlush records the lock ranking effects of a 985 // potential call to traceFlush. 986 func lockRankMayTraceFlush() { 987 owner := trace.lockOwner 988 dolock := owner == nil || owner != getg().m.curg 989 if dolock { 990 lockWithRankMayAcquire(&trace.lock, getLockRank(&trace.lock)) 991 } 992 } 993 994 // traceFlush puts buf onto stack of full buffers and returns an empty buffer. 995 // 996 // This must run on the system stack because it acquires trace.lock. 997 // 998 //go:systemstack 999 func traceFlush(buf traceBufPtr, pid int32) traceBufPtr { 1000 owner := trace.lockOwner 1001 dolock := owner == nil || owner != getg().m.curg 1002 if dolock { 1003 lock(&trace.lock) 1004 } 1005 if buf != 0 { 1006 traceFullQueue(buf) 1007 } 1008 if trace.empty != 0 { 1009 buf = trace.empty 1010 trace.empty = buf.ptr().link 1011 } else { 1012 buf = traceBufPtr(sysAlloc(unsafe.Sizeof(traceBuf{}), &memstats.other_sys)) 1013 if buf == 0 { 1014 throw("trace: out of memory") 1015 } 1016 } 1017 bufp := buf.ptr() 1018 bufp.link.set(nil) 1019 bufp.pos = 0 1020 1021 // initialize the buffer for a new batch 1022 ticks := uint64(cputicks()) / traceTickDiv 1023 if ticks == bufp.lastTicks { 1024 ticks = bufp.lastTicks + 1 1025 } 1026 bufp.lastTicks = ticks 1027 bufp.byte(traceEvBatch | 1<<traceArgCountShift) 1028 bufp.varint(uint64(pid)) 1029 bufp.varint(ticks) 1030 1031 if dolock { 1032 unlock(&trace.lock) 1033 } 1034 return buf 1035 } 1036 1037 // traceString adds a string to the trace.strings and returns the id. 1038 func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) { 1039 if s == "" { 1040 return 0, bufp 1041 } 1042 1043 lock(&trace.stringsLock) 1044 if raceenabled { 1045 // raceacquire is necessary because the map access 1046 // below is race annotated. 1047 raceacquire(unsafe.Pointer(&trace.stringsLock)) 1048 } 1049 1050 if id, ok := trace.strings[s]; ok { 1051 if raceenabled { 1052 racerelease(unsafe.Pointer(&trace.stringsLock)) 1053 } 1054 unlock(&trace.stringsLock) 1055 1056 return id, bufp 1057 } 1058 1059 trace.stringSeq++ 1060 id := trace.stringSeq 1061 trace.strings[s] = id 1062 1063 if raceenabled { 1064 racerelease(unsafe.Pointer(&trace.stringsLock)) 1065 } 1066 unlock(&trace.stringsLock) 1067 1068 // memory allocation in above may trigger tracing and 1069 // cause *bufp changes. Following code now works with *bufp, 1070 // so there must be no memory allocation or any activities 1071 // that causes tracing after this point. 1072 1073 buf := bufp.ptr() 1074 size := 1 + 2*traceBytesPerNumber + len(s) 1075 if buf == nil || len(buf.arr)-buf.pos < size { 1076 systemstack(func() { 1077 buf = traceFlush(traceBufPtrOf(buf), pid).ptr() 1078 bufp.set(buf) 1079 }) 1080 } 1081 buf.byte(traceEvString) 1082 buf.varint(id) 1083 1084 // double-check the string and the length can fit. 1085 // Otherwise, truncate the string. 1086 slen := len(s) 1087 if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber { 1088 slen = room 1089 } 1090 1091 buf.varint(uint64(slen)) 1092 buf.pos += copy(buf.arr[buf.pos:], s[:slen]) 1093 1094 bufp.set(buf) 1095 return id, bufp 1096 } 1097 1098 // varint appends v to buf in little-endian-base-128 encoding. 1099 func (buf *traceBuf) varint(v uint64) { 1100 pos := buf.pos 1101 for ; v >= 0x80; v >>= 7 { 1102 buf.arr[pos] = 0x80 | byte(v) 1103 pos++ 1104 } 1105 buf.arr[pos] = byte(v) 1106 pos++ 1107 buf.pos = pos 1108 } 1109 1110 // varintAt writes varint v at byte position pos in buf. This always 1111 // consumes traceBytesPerNumber bytes. This is intended for when the 1112 // caller needs to reserve space for a varint but can't populate it 1113 // until later. 1114 func (buf *traceBuf) varintAt(pos int, v uint64) { 1115 for i := 0; i < traceBytesPerNumber; i++ { 1116 if i < traceBytesPerNumber-1 { 1117 buf.arr[pos] = 0x80 | byte(v) 1118 } else { 1119 buf.arr[pos] = byte(v) 1120 } 1121 v >>= 7 1122 pos++ 1123 } 1124 } 1125 1126 // byte appends v to buf. 1127 func (buf *traceBuf) byte(v byte) { 1128 buf.arr[buf.pos] = v 1129 buf.pos++ 1130 } 1131 1132 // traceStackTable maps stack traces (arrays of PC's) to unique uint32 ids. 1133 // It is lock-free for reading. 1134 type traceStackTable struct { 1135 lock mutex // Must be acquired on the system stack 1136 seq uint32 1137 mem traceAlloc 1138 tab [1 << 13]traceStackPtr 1139 } 1140 1141 // traceStack is a single stack in traceStackTable. 1142 type traceStack struct { 1143 link traceStackPtr 1144 hash uintptr 1145 id uint32 1146 n int 1147 stk [0]uintptr // real type [n]uintptr 1148 } 1149 1150 type traceStackPtr uintptr 1151 1152 func (tp traceStackPtr) ptr() *traceStack { return (*traceStack)(unsafe.Pointer(tp)) } 1153 1154 // stack returns slice of PCs. 1155 func (ts *traceStack) stack() []uintptr { 1156 return (*[traceStackSize]uintptr)(unsafe.Pointer(&ts.stk))[:ts.n] 1157 } 1158 1159 // put returns a unique id for the stack trace pcs and caches it in the table, 1160 // if it sees the trace for the first time. 1161 func (tab *traceStackTable) put(pcs []uintptr) uint32 { 1162 if len(pcs) == 0 { 1163 return 0 1164 } 1165 hash := memhash(unsafe.Pointer(&pcs[0]), 0, uintptr(len(pcs))*unsafe.Sizeof(pcs[0])) 1166 // First, search the hashtable w/o the mutex. 1167 if id := tab.find(pcs, hash); id != 0 { 1168 return id 1169 } 1170 // Now, double check under the mutex. 1171 // Switch to the system stack so we can acquire tab.lock 1172 var id uint32 1173 systemstack(func() { 1174 lock(&tab.lock) 1175 if id = tab.find(pcs, hash); id != 0 { 1176 unlock(&tab.lock) 1177 return 1178 } 1179 // Create new record. 1180 tab.seq++ 1181 stk := tab.newStack(len(pcs)) 1182 stk.hash = hash 1183 stk.id = tab.seq 1184 id = stk.id 1185 stk.n = len(pcs) 1186 stkpc := stk.stack() 1187 copy(stkpc, pcs) 1188 part := int(hash % uintptr(len(tab.tab))) 1189 stk.link = tab.tab[part] 1190 atomicstorep(unsafe.Pointer(&tab.tab[part]), unsafe.Pointer(stk)) 1191 unlock(&tab.lock) 1192 }) 1193 return id 1194 } 1195 1196 // find checks if the stack trace pcs is already present in the table. 1197 func (tab *traceStackTable) find(pcs []uintptr, hash uintptr) uint32 { 1198 part := int(hash % uintptr(len(tab.tab))) 1199 Search: 1200 for stk := tab.tab[part].ptr(); stk != nil; stk = stk.link.ptr() { 1201 if stk.hash == hash && stk.n == len(pcs) { 1202 for i, stkpc := range stk.stack() { 1203 if stkpc != pcs[i] { 1204 continue Search 1205 } 1206 } 1207 return stk.id 1208 } 1209 } 1210 return 0 1211 } 1212 1213 // newStack allocates a new stack of size n. 1214 func (tab *traceStackTable) newStack(n int) *traceStack { 1215 return (*traceStack)(tab.mem.alloc(unsafe.Sizeof(traceStack{}) + uintptr(n)*goarch.PtrSize)) 1216 } 1217 1218 // traceFrames returns the frames corresponding to pcs. It may 1219 // allocate and may emit trace events. 1220 func traceFrames(bufp traceBufPtr, pcs []uintptr) ([]traceFrame, traceBufPtr) { 1221 frames := make([]traceFrame, 0, len(pcs)) 1222 ci := CallersFrames(pcs) 1223 for { 1224 var frame traceFrame 1225 f, more := ci.Next() 1226 frame, bufp = traceFrameForPC(bufp, 0, f) 1227 frames = append(frames, frame) 1228 if !more { 1229 return frames, bufp 1230 } 1231 } 1232 } 1233 1234 // dump writes all previously cached stacks to trace buffers, 1235 // releases all memory and resets state. 1236 // 1237 // This must run on the system stack because it calls traceFlush. 1238 // 1239 //go:systemstack 1240 func (tab *traceStackTable) dump(bufp traceBufPtr) traceBufPtr { 1241 for i := range tab.tab { 1242 stk := tab.tab[i].ptr() 1243 for ; stk != nil; stk = stk.link.ptr() { 1244 var frames []traceFrame 1245 frames, bufp = traceFrames(bufp, fpunwindExpand(stk.stack())) 1246 1247 // Estimate the size of this record. This 1248 // bound is pretty loose, but avoids counting 1249 // lots of varint sizes. 1250 maxSize := 1 + traceBytesPerNumber + (2+4*len(frames))*traceBytesPerNumber 1251 // Make sure we have enough buffer space. 1252 if buf := bufp.ptr(); len(buf.arr)-buf.pos < maxSize { 1253 bufp = traceFlush(bufp, 0) 1254 } 1255 1256 // Emit header, with space reserved for length. 1257 buf := bufp.ptr() 1258 buf.byte(traceEvStack | 3<<traceArgCountShift) 1259 lenPos := buf.pos 1260 buf.pos += traceBytesPerNumber 1261 1262 // Emit body. 1263 recPos := buf.pos 1264 buf.varint(uint64(stk.id)) 1265 buf.varint(uint64(len(frames))) 1266 for _, frame := range frames { 1267 buf.varint(uint64(frame.PC)) 1268 buf.varint(frame.funcID) 1269 buf.varint(frame.fileID) 1270 buf.varint(frame.line) 1271 } 1272 1273 // Fill in size header. 1274 buf.varintAt(lenPos, uint64(buf.pos-recPos)) 1275 } 1276 } 1277 1278 tab.mem.drop() 1279 *tab = traceStackTable{} 1280 lockInit(&((*tab).lock), lockRankTraceStackTab) 1281 1282 return bufp 1283 } 1284 1285 // fpunwindExpand checks if pcBuf contains logical frames (which include inlined 1286 // frames) or physical frames (produced by frame pointer unwinding) using a 1287 // sentinel value in pcBuf[0]. Logical frames are simply returned without the 1288 // sentinel. Physical frames are turned into logical frames via inline unwinding 1289 // and by applying the skip value that's stored in pcBuf[0]. 1290 func fpunwindExpand(pcBuf []uintptr) []uintptr { 1291 if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel { 1292 // pcBuf contains logical rather than inlined frames, skip has already been 1293 // applied, just return it without the sentinel value in pcBuf[0]. 1294 return pcBuf[1:] 1295 } 1296 1297 var ( 1298 cache pcvalueCache 1299 lastFuncID = abi.FuncIDNormal 1300 newPCBuf = make([]uintptr, 0, traceStackSize) 1301 skip = pcBuf[0] 1302 // skipOrAdd skips or appends retPC to newPCBuf and returns true if more 1303 // pcs can be added. 1304 skipOrAdd = func(retPC uintptr) bool { 1305 if skip > 0 { 1306 skip-- 1307 } else { 1308 newPCBuf = append(newPCBuf, retPC) 1309 } 1310 return len(newPCBuf) < cap(newPCBuf) 1311 } 1312 ) 1313 1314 outer: 1315 for _, retPC := range pcBuf[1:] { 1316 callPC := retPC - 1 1317 fi := findfunc(callPC) 1318 if !fi.valid() { 1319 // There is no funcInfo if callPC belongs to a C function. In this case 1320 // we still keep the pc, but don't attempt to expand inlined frames. 1321 if more := skipOrAdd(retPC); !more { 1322 break outer 1323 } 1324 continue 1325 } 1326 1327 u, uf := newInlineUnwinder(fi, callPC, &cache) 1328 for ; uf.valid(); uf = u.next(uf) { 1329 sf := u.srcFunc(uf) 1330 if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(lastFuncID) { 1331 // ignore wrappers 1332 } else if more := skipOrAdd(uf.pc + 1); !more { 1333 break outer 1334 } 1335 lastFuncID = sf.funcID 1336 } 1337 } 1338 return newPCBuf 1339 } 1340 1341 type traceFrame struct { 1342 PC uintptr 1343 funcID uint64 1344 fileID uint64 1345 line uint64 1346 } 1347 1348 // traceFrameForPC records the frame information. 1349 // It may allocate memory. 1350 func traceFrameForPC(buf traceBufPtr, pid int32, f Frame) (traceFrame, traceBufPtr) { 1351 bufp := &buf 1352 var frame traceFrame 1353 frame.PC = f.PC 1354 1355 fn := f.Function 1356 const maxLen = 1 << 10 1357 if len(fn) > maxLen { 1358 fn = fn[len(fn)-maxLen:] 1359 } 1360 frame.funcID, bufp = traceString(bufp, pid, fn) 1361 frame.line = uint64(f.Line) 1362 file := f.File 1363 if len(file) > maxLen { 1364 file = file[len(file)-maxLen:] 1365 } 1366 frame.fileID, bufp = traceString(bufp, pid, file) 1367 return frame, (*bufp) 1368 } 1369 1370 // traceAlloc is a non-thread-safe region allocator. 1371 // It holds a linked list of traceAllocBlock. 1372 type traceAlloc struct { 1373 head traceAllocBlockPtr 1374 off uintptr 1375 } 1376 1377 // traceAllocBlock is a block in traceAlloc. 1378 // 1379 // traceAllocBlock is allocated from non-GC'd memory, so it must not 1380 // contain heap pointers. Writes to pointers to traceAllocBlocks do 1381 // not need write barriers. 1382 type traceAllocBlock struct { 1383 _ sys.NotInHeap 1384 next traceAllocBlockPtr 1385 data [64<<10 - goarch.PtrSize]byte 1386 } 1387 1388 // TODO: Since traceAllocBlock is now embedded runtime/internal/sys.NotInHeap, this isn't necessary. 1389 type traceAllocBlockPtr uintptr 1390 1391 func (p traceAllocBlockPtr) ptr() *traceAllocBlock { return (*traceAllocBlock)(unsafe.Pointer(p)) } 1392 func (p *traceAllocBlockPtr) set(x *traceAllocBlock) { *p = traceAllocBlockPtr(unsafe.Pointer(x)) } 1393 1394 // alloc allocates n-byte block. 1395 func (a *traceAlloc) alloc(n uintptr) unsafe.Pointer { 1396 n = alignUp(n, goarch.PtrSize) 1397 if a.head == 0 || a.off+n > uintptr(len(a.head.ptr().data)) { 1398 if n > uintptr(len(a.head.ptr().data)) { 1399 throw("trace: alloc too large") 1400 } 1401 block := (*traceAllocBlock)(sysAlloc(unsafe.Sizeof(traceAllocBlock{}), &memstats.other_sys)) 1402 if block == nil { 1403 throw("trace: out of memory") 1404 } 1405 block.next.set(a.head.ptr()) 1406 a.head.set(block) 1407 a.off = 0 1408 } 1409 p := &a.head.ptr().data[a.off] 1410 a.off += n 1411 return unsafe.Pointer(p) 1412 } 1413 1414 // drop frees all previously allocated memory and resets the allocator. 1415 func (a *traceAlloc) drop() { 1416 for a.head != 0 { 1417 block := a.head.ptr() 1418 a.head.set(block.next.ptr()) 1419 sysFree(unsafe.Pointer(block), unsafe.Sizeof(traceAllocBlock{}), &memstats.other_sys) 1420 } 1421 } 1422 1423 // The following functions write specific events to trace. 1424 1425 func traceGomaxprocs(procs int32) { 1426 traceEvent(traceEvGomaxprocs, 1, uint64(procs)) 1427 } 1428 1429 func traceProcStart() { 1430 traceEvent(traceEvProcStart, -1, uint64(getg().m.id)) 1431 } 1432 1433 func traceProcStop(pp *p) { 1434 // Sysmon and stopTheWorld can stop Ps blocked in syscalls, 1435 // to handle this we temporary employ the P. 1436 mp := acquirem() 1437 oldp := mp.p 1438 mp.p.set(pp) 1439 traceEvent(traceEvProcStop, -1) 1440 mp.p = oldp 1441 releasem(mp) 1442 } 1443 1444 func traceGCStart() { 1445 traceEvent(traceEvGCStart, 3, trace.seqGC) 1446 trace.seqGC++ 1447 } 1448 1449 func traceGCDone() { 1450 traceEvent(traceEvGCDone, -1) 1451 } 1452 1453 func traceGCSTWStart(kind int) { 1454 traceEvent(traceEvGCSTWStart, -1, uint64(kind)) 1455 } 1456 1457 func traceGCSTWDone() { 1458 traceEvent(traceEvGCSTWDone, -1) 1459 } 1460 1461 // traceGCSweepStart prepares to trace a sweep loop. This does not 1462 // emit any events until traceGCSweepSpan is called. 1463 // 1464 // traceGCSweepStart must be paired with traceGCSweepDone and there 1465 // must be no preemption points between these two calls. 1466 func traceGCSweepStart() { 1467 // Delay the actual GCSweepStart event until the first span 1468 // sweep. If we don't sweep anything, don't emit any events. 1469 pp := getg().m.p.ptr() 1470 if pp.traceSweep { 1471 throw("double traceGCSweepStart") 1472 } 1473 pp.traceSweep, pp.traceSwept, pp.traceReclaimed = true, 0, 0 1474 } 1475 1476 // traceGCSweepSpan traces the sweep of a single page. 1477 // 1478 // This may be called outside a traceGCSweepStart/traceGCSweepDone 1479 // pair; however, it will not emit any trace events in this case. 1480 func traceGCSweepSpan(bytesSwept uintptr) { 1481 pp := getg().m.p.ptr() 1482 if pp.traceSweep { 1483 if pp.traceSwept == 0 { 1484 traceEvent(traceEvGCSweepStart, 1) 1485 } 1486 pp.traceSwept += bytesSwept 1487 } 1488 } 1489 1490 func traceGCSweepDone() { 1491 pp := getg().m.p.ptr() 1492 if !pp.traceSweep { 1493 throw("missing traceGCSweepStart") 1494 } 1495 if pp.traceSwept != 0 { 1496 traceEvent(traceEvGCSweepDone, -1, uint64(pp.traceSwept), uint64(pp.traceReclaimed)) 1497 } 1498 pp.traceSweep = false 1499 } 1500 1501 func traceGCMarkAssistStart() { 1502 traceEvent(traceEvGCMarkAssistStart, 1) 1503 } 1504 1505 func traceGCMarkAssistDone() { 1506 traceEvent(traceEvGCMarkAssistDone, -1) 1507 } 1508 1509 func traceGoCreate(newg *g, pc uintptr) { 1510 newg.traceseq = 0 1511 newg.tracelastp = getg().m.p 1512 // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. 1513 id := trace.stackTab.put([]uintptr{logicalStackSentinel, startPCforTrace(pc) + sys.PCQuantum}) 1514 traceEvent(traceEvGoCreate, 2, newg.goid, uint64(id)) 1515 } 1516 1517 func traceGoStart() { 1518 gp := getg().m.curg 1519 pp := gp.m.p 1520 gp.traceseq++ 1521 if pp.ptr().gcMarkWorkerMode != gcMarkWorkerNotWorker { 1522 traceEvent(traceEvGoStartLabel, -1, gp.goid, gp.traceseq, trace.markWorkerLabels[pp.ptr().gcMarkWorkerMode]) 1523 } else if gp.tracelastp == pp { 1524 traceEvent(traceEvGoStartLocal, -1, gp.goid) 1525 } else { 1526 gp.tracelastp = pp 1527 traceEvent(traceEvGoStart, -1, gp.goid, gp.traceseq) 1528 } 1529 } 1530 1531 func traceGoEnd() { 1532 traceEvent(traceEvGoEnd, -1) 1533 } 1534 1535 func traceGoSched() { 1536 gp := getg() 1537 gp.tracelastp = gp.m.p 1538 traceEvent(traceEvGoSched, 1) 1539 } 1540 1541 func traceGoPreempt() { 1542 gp := getg() 1543 gp.tracelastp = gp.m.p 1544 traceEvent(traceEvGoPreempt, 1) 1545 } 1546 1547 func traceGoPark(traceEv byte, skip int) { 1548 traceEvent(traceEv, skip) 1549 } 1550 1551 func traceGoUnpark(gp *g, skip int) { 1552 pp := getg().m.p 1553 gp.traceseq++ 1554 if gp.tracelastp == pp { 1555 traceEvent(traceEvGoUnblockLocal, skip, gp.goid) 1556 } else { 1557 gp.tracelastp = pp 1558 traceEvent(traceEvGoUnblock, skip, gp.goid, gp.traceseq) 1559 } 1560 } 1561 1562 func traceGoSysCall() { 1563 var skip int 1564 switch { 1565 case tracefpunwindoff(): 1566 // Unwind by skipping 1 frame relative to gp.syscallsp which is captured 3 1567 // frames above this frame. For frame pointer unwinding we produce the same 1568 // results by hard coding the number of frames in between our caller and the 1569 // actual syscall, see cases below. 1570 // TODO(felixge): Implement gp.syscallbp to avoid this workaround? 1571 skip = 1 1572 case GOOS == "solaris" || GOOS == "illumos": 1573 // These platforms don't use a libc_read_trampoline. 1574 skip = 3 1575 default: 1576 // Skip the extra trampoline frame used on most systems. 1577 skip = 4 1578 } 1579 traceEvent(traceEvGoSysCall, skip) 1580 } 1581 1582 func traceGoSysExit(ts int64) { 1583 if ts != 0 && ts < trace.ticksStart { 1584 // There is a race between the code that initializes sysexitticks 1585 // (in exitsyscall, which runs without a P, and therefore is not 1586 // stopped with the rest of the world) and the code that initializes 1587 // a new trace. The recorded sysexitticks must therefore be treated 1588 // as "best effort". If they are valid for this trace, then great, 1589 // use them for greater accuracy. But if they're not valid for this 1590 // trace, assume that the trace was started after the actual syscall 1591 // exit (but before we actually managed to start the goroutine, 1592 // aka right now), and assign a fresh time stamp to keep the log consistent. 1593 ts = 0 1594 } 1595 gp := getg().m.curg 1596 gp.traceseq++ 1597 gp.tracelastp = gp.m.p 1598 traceEvent(traceEvGoSysExit, -1, gp.goid, gp.traceseq, uint64(ts)/traceTickDiv) 1599 } 1600 1601 func traceGoSysBlock(pp *p) { 1602 // Sysmon and stopTheWorld can declare syscalls running on remote Ps as blocked, 1603 // to handle this we temporary employ the P. 1604 mp := acquirem() 1605 oldp := mp.p 1606 mp.p.set(pp) 1607 traceEvent(traceEvGoSysBlock, -1) 1608 mp.p = oldp 1609 releasem(mp) 1610 } 1611 1612 func traceHeapAlloc(live uint64) { 1613 traceEvent(traceEvHeapAlloc, -1, live) 1614 } 1615 1616 func traceHeapGoal() { 1617 heapGoal := gcController.heapGoal() 1618 if heapGoal == ^uint64(0) { 1619 // Heap-based triggering is disabled. 1620 traceEvent(traceEvHeapGoal, -1, 0) 1621 } else { 1622 traceEvent(traceEvHeapGoal, -1, heapGoal) 1623 } 1624 } 1625 1626 // To access runtime functions from runtime/trace. 1627 // See runtime/trace/annotation.go 1628 1629 //go:linkname trace_userTaskCreate runtime/trace.userTaskCreate 1630 func trace_userTaskCreate(id, parentID uint64, taskType string) { 1631 if !trace.enabled { 1632 return 1633 } 1634 1635 // Same as in traceEvent. 1636 mp, pid, bufp := traceAcquireBuffer() 1637 if !trace.enabled && !mp.startingtrace { 1638 traceReleaseBuffer(mp, pid) 1639 return 1640 } 1641 1642 typeStringID, bufp := traceString(bufp, pid, taskType) 1643 traceEventLocked(0, mp, pid, bufp, traceEvUserTaskCreate, 0, 3, id, parentID, typeStringID) 1644 traceReleaseBuffer(mp, pid) 1645 } 1646 1647 //go:linkname trace_userTaskEnd runtime/trace.userTaskEnd 1648 func trace_userTaskEnd(id uint64) { 1649 traceEvent(traceEvUserTaskEnd, 2, id) 1650 } 1651 1652 //go:linkname trace_userRegion runtime/trace.userRegion 1653 func trace_userRegion(id, mode uint64, name string) { 1654 if !trace.enabled { 1655 return 1656 } 1657 1658 mp, pid, bufp := traceAcquireBuffer() 1659 if !trace.enabled && !mp.startingtrace { 1660 traceReleaseBuffer(mp, pid) 1661 return 1662 } 1663 1664 nameStringID, bufp := traceString(bufp, pid, name) 1665 traceEventLocked(0, mp, pid, bufp, traceEvUserRegion, 0, 3, id, mode, nameStringID) 1666 traceReleaseBuffer(mp, pid) 1667 } 1668 1669 //go:linkname trace_userLog runtime/trace.userLog 1670 func trace_userLog(id uint64, category, message string) { 1671 if !trace.enabled { 1672 return 1673 } 1674 1675 mp, pid, bufp := traceAcquireBuffer() 1676 if !trace.enabled && !mp.startingtrace { 1677 traceReleaseBuffer(mp, pid) 1678 return 1679 } 1680 1681 categoryID, bufp := traceString(bufp, pid, category) 1682 1683 // The log message is recorded after all of the normal trace event 1684 // arguments, including the task, category, and stack IDs. We must ask 1685 // traceEventLocked to reserve extra space for the length of the message 1686 // and the message itself. 1687 extraSpace := traceBytesPerNumber + len(message) 1688 traceEventLocked(extraSpace, mp, pid, bufp, traceEvUserLog, 0, 3, id, categoryID) 1689 buf := bufp.ptr() 1690 1691 // double-check the message and its length can fit. 1692 // Otherwise, truncate the message. 1693 slen := len(message) 1694 if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber { 1695 slen = room 1696 } 1697 buf.varint(uint64(slen)) 1698 buf.pos += copy(buf.arr[buf.pos:], message[:slen]) 1699 1700 traceReleaseBuffer(mp, pid) 1701 } 1702 1703 // the start PC of a goroutine for tracing purposes. If pc is a wrapper, 1704 // it returns the PC of the wrapped function. Otherwise it returns pc. 1705 func startPCforTrace(pc uintptr) uintptr { 1706 f := findfunc(pc) 1707 if !f.valid() { 1708 return pc // may happen for locked g in extra M since its pc is 0. 1709 } 1710 w := funcdata(f, abi.FUNCDATA_WrapInfo) 1711 if w == nil { 1712 return pc // not a wrapper 1713 } 1714 return f.datap.textAddr(*(*uint32)(w)) 1715 }