github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/pagetrace_on.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build goexperiment.pagetrace 6 7 // Page tracer. 8 // 9 // This file contains an implementation of page trace instrumentation for tracking 10 // the way the Go runtime manages pages of memory. The trace may be enabled at program 11 // startup with the GODEBUG option pagetrace. 12 // 13 // Each page trace event is either 8 or 16 bytes wide. The first 14 // 8 bytes follow this format for non-sync events: 15 // 16 // [16 timestamp delta][35 base address][10 npages][1 isLarge][2 pageTraceEventType] 17 // 18 // If the "large" bit is set then the event is 16 bytes wide with the second 8 byte word 19 // containing the full npages value (the npages bitfield is 0). 20 // 21 // The base address's bottom pageShift bits are always zero hence why we can pack other 22 // data in there. We ignore the top 16 bits, assuming a 48 bit address space for the 23 // heap. 24 // 25 // The timestamp delta is computed from the difference between the current nanotime 26 // timestamp and the last sync event's timestamp. The bottom pageTraceTimeLostBits of 27 // this delta is removed and only the next pageTraceTimeDeltaBits are kept. 28 // 29 // A sync event is emitted at the beginning of each trace buffer and whenever the 30 // timestamp delta would not fit in an event. 31 // 32 // Sync events have the following structure: 33 // 34 // [61 timestamp or P ID][1 isPID][2 pageTraceSyncEvent] 35 // 36 // In essence, the "large" bit repurposed to indicate whether it's a timestamp or a P ID 37 // (these are typically uint32). Note that we only have 61 bits for the 64-bit timestamp, 38 // but like for the delta we drop the bottom pageTraceTimeLostBits here as well. 39 40 package runtime 41 42 import ( 43 "runtime/internal/sys" 44 "unsafe" 45 ) 46 47 // pageTraceAlloc records a page trace allocation event. 48 // pp may be nil. Call only if debug.pagetracefd != 0. 49 // 50 // Must run on the system stack as a crude way to prevent preemption. 51 // 52 //go:systemstack 53 func pageTraceAlloc(pp *p, now int64, base, npages uintptr) { 54 if pageTrace.enabled { 55 if now == 0 { 56 now = nanotime() 57 } 58 pageTraceEmit(pp, now, base, npages, pageTraceAllocEvent) 59 } 60 } 61 62 // pageTraceFree records a page trace free event. 63 // pp may be nil. Call only if debug.pagetracefd != 0. 64 // 65 // Must run on the system stack as a crude way to prevent preemption. 66 // 67 //go:systemstack 68 func pageTraceFree(pp *p, now int64, base, npages uintptr) { 69 if pageTrace.enabled { 70 if now == 0 { 71 now = nanotime() 72 } 73 pageTraceEmit(pp, now, base, npages, pageTraceFreeEvent) 74 } 75 } 76 77 // pageTraceScav records a page trace scavenge event. 78 // pp may be nil. Call only if debug.pagetracefd != 0. 79 // 80 // Must run on the system stack as a crude way to prevent preemption. 81 // 82 //go:systemstack 83 func pageTraceScav(pp *p, now int64, base, npages uintptr) { 84 if pageTrace.enabled { 85 if now == 0 { 86 now = nanotime() 87 } 88 pageTraceEmit(pp, now, base, npages, pageTraceScavEvent) 89 } 90 } 91 92 // pageTraceEventType is a page trace event type. 93 type pageTraceEventType uint8 94 95 const ( 96 pageTraceSyncEvent pageTraceEventType = iota // Timestamp emission. 97 pageTraceAllocEvent // Allocation of pages. 98 pageTraceFreeEvent // Freeing pages. 99 pageTraceScavEvent // Scavenging pages. 100 ) 101 102 // pageTraceEmit emits a page trace event. 103 // 104 // Must run on the system stack as a crude way to prevent preemption. 105 // 106 //go:systemstack 107 func pageTraceEmit(pp *p, now int64, base, npages uintptr, typ pageTraceEventType) { 108 // Get a buffer. 109 var tbp *pageTraceBuf 110 pid := int32(-1) 111 if pp == nil { 112 // We have no P, so take the global buffer. 113 lock(&pageTrace.lock) 114 tbp = &pageTrace.buf 115 } else { 116 tbp = &pp.pageTraceBuf 117 pid = pp.id 118 } 119 120 // Initialize the buffer if necessary. 121 tb := *tbp 122 if tb.buf == nil { 123 tb.buf = (*pageTraceEvents)(sysAlloc(pageTraceBufSize, &memstats.other_sys)) 124 tb = tb.writePid(pid) 125 } 126 127 // Handle timestamp and emit a sync event if necessary. 128 if now < tb.timeBase { 129 now = tb.timeBase 130 } 131 if now-tb.timeBase >= pageTraceTimeMaxDelta { 132 tb.timeBase = now 133 tb = tb.writeSync(pid) 134 } 135 136 // Emit the event. 137 tb = tb.writeEvent(pid, now, base, npages, typ) 138 139 // Write back the buffer. 140 *tbp = tb 141 if pp == nil { 142 unlock(&pageTrace.lock) 143 } 144 } 145 146 const ( 147 pageTraceBufSize = 32 << 10 148 149 // These constants describe the per-event timestamp delta encoding. 150 pageTraceTimeLostBits = 7 // How many bits of precision we lose in the delta. 151 pageTraceTimeDeltaBits = 16 // Size of the delta in bits. 152 pageTraceTimeMaxDelta = 1 << (pageTraceTimeLostBits + pageTraceTimeDeltaBits) 153 ) 154 155 // pageTraceEvents is the low-level buffer containing the trace data. 156 type pageTraceEvents struct { 157 _ sys.NotInHeap 158 events [pageTraceBufSize / 8]uint64 159 } 160 161 // pageTraceBuf is a wrapper around pageTraceEvents that knows how to write events 162 // to the buffer. It tracks state necessary to do so. 163 type pageTraceBuf struct { 164 buf *pageTraceEvents 165 len int // How many events have been written so far. 166 timeBase int64 // The current timestamp base from which deltas are produced. 167 finished bool // Whether this trace buf should no longer flush anything out. 168 } 169 170 // writePid writes a P ID event indicating which P we're running on. 171 // 172 // Assumes there's always space in the buffer since this is only called at the 173 // beginning of a new buffer. 174 // 175 // Must run on the system stack as a crude way to prevent preemption. 176 // 177 //go:systemstack 178 func (tb pageTraceBuf) writePid(pid int32) pageTraceBuf { 179 e := uint64(int64(pid))<<3 | 0b100 | uint64(pageTraceSyncEvent) 180 tb.buf.events[tb.len] = e 181 tb.len++ 182 return tb 183 } 184 185 // writeSync writes a sync event, which is just a timestamp. Handles flushing. 186 // 187 // Must run on the system stack as a crude way to prevent preemption. 188 // 189 //go:systemstack 190 func (tb pageTraceBuf) writeSync(pid int32) pageTraceBuf { 191 if tb.len+1 > len(tb.buf.events) { 192 // N.B. flush will writeSync again. 193 return tb.flush(pid, tb.timeBase) 194 } 195 e := ((uint64(tb.timeBase) >> pageTraceTimeLostBits) << 3) | uint64(pageTraceSyncEvent) 196 tb.buf.events[tb.len] = e 197 tb.len++ 198 return tb 199 } 200 201 // writeEvent handles writing all non-sync and non-pid events. Handles flushing if necessary. 202 // 203 // pid indicates the P we're currently running on. Necessary in case we need to flush. 204 // now is the current nanotime timestamp. 205 // base is the base address of whatever group of pages this event is happening to. 206 // npages is the length of the group of pages this event is happening to. 207 // typ is the event that's happening to these pages. 208 // 209 // Must run on the system stack as a crude way to prevent preemption. 210 // 211 //go:systemstack 212 func (tb pageTraceBuf) writeEvent(pid int32, now int64, base, npages uintptr, typ pageTraceEventType) pageTraceBuf { 213 large := 0 214 np := npages 215 if npages >= 1024 { 216 large = 1 217 np = 0 218 } 219 if tb.len+1+large > len(tb.buf.events) { 220 tb = tb.flush(pid, now) 221 } 222 if base%pageSize != 0 { 223 throw("base address not page aligned") 224 } 225 e := uint64(base) 226 // The pageShift low-order bits are zero. 227 e |= uint64(typ) // 2 bits 228 e |= uint64(large) << 2 // 1 bit 229 e |= uint64(np) << 3 // 10 bits 230 // Write the timestamp delta in the upper pageTraceTimeDeltaBits. 231 e |= uint64((now-tb.timeBase)>>pageTraceTimeLostBits) << (64 - pageTraceTimeDeltaBits) 232 tb.buf.events[tb.len] = e 233 if large != 0 { 234 // npages doesn't fit in 10 bits, so write an additional word with that data. 235 tb.buf.events[tb.len+1] = uint64(npages) 236 } 237 tb.len += 1 + large 238 return tb 239 } 240 241 // flush writes out the contents of the buffer to pageTrace.fd and resets the buffer. 242 // It then writes out a P ID event and the first sync event for the new buffer. 243 // 244 // Must run on the system stack as a crude way to prevent preemption. 245 // 246 //go:systemstack 247 func (tb pageTraceBuf) flush(pid int32, now int64) pageTraceBuf { 248 if !tb.finished { 249 lock(&pageTrace.fdLock) 250 writeFull(uintptr(pageTrace.fd), (*byte)(unsafe.Pointer(&tb.buf.events[0])), tb.len*8) 251 unlock(&pageTrace.fdLock) 252 } 253 tb.len = 0 254 tb.timeBase = now 255 return tb.writePid(pid).writeSync(pid) 256 } 257 258 var pageTrace struct { 259 // enabled indicates whether tracing is enabled. If true, fd >= 0. 260 // 261 // Safe to read without synchronization because it's only set once 262 // at program initialization. 263 enabled bool 264 265 // buf is the page trace buffer used if there is no P. 266 // 267 // lock protects buf. 268 lock mutex 269 buf pageTraceBuf 270 271 // fdLock protects writing to fd. 272 // 273 // fd is the file to write the page trace to. 274 fdLock mutex 275 fd int32 276 } 277 278 // initPageTrace initializes the page tracing infrastructure from GODEBUG. 279 // 280 // env must be the value of the GODEBUG environment variable. 281 func initPageTrace(env string) { 282 var value string 283 for env != "" { 284 elt, rest := env, "" 285 for i := 0; i < len(env); i++ { 286 if env[i] == ',' { 287 elt, rest = env[:i], env[i+1:] 288 break 289 } 290 } 291 env = rest 292 if hasPrefix(elt, "pagetrace=") { 293 value = elt[len("pagetrace="):] 294 break 295 } 296 } 297 pageTrace.fd = -1 298 if canCreateFile && value != "" { 299 var tmp [4096]byte 300 if len(value) != 0 && len(value) < 4096 { 301 copy(tmp[:], value) 302 pageTrace.fd = create(&tmp[0], 0o664) 303 } 304 } 305 pageTrace.enabled = pageTrace.fd >= 0 306 } 307 308 // finishPageTrace flushes all P's trace buffers and disables page tracing. 309 func finishPageTrace() { 310 if !pageTrace.enabled { 311 return 312 } 313 // Grab worldsema as we're about to execute a ragged barrier. 314 semacquire(&worldsema) 315 systemstack(func() { 316 // Disable tracing. This isn't strictly necessary and it's best-effort. 317 pageTrace.enabled = false 318 319 // Execute a ragged barrier, flushing each trace buffer. 320 forEachP(func(pp *p) { 321 if pp.pageTraceBuf.buf != nil { 322 pp.pageTraceBuf = pp.pageTraceBuf.flush(pp.id, nanotime()) 323 } 324 pp.pageTraceBuf.finished = true 325 }) 326 327 // Write the global have-no-P buffer. 328 lock(&pageTrace.lock) 329 if pageTrace.buf.buf != nil { 330 pageTrace.buf = pageTrace.buf.flush(-1, nanotime()) 331 } 332 pageTrace.buf.finished = true 333 unlock(&pageTrace.lock) 334 335 // Safely close the file as nothing else should be allowed to write to the fd. 336 lock(&pageTrace.fdLock) 337 closefd(pageTrace.fd) 338 pageTrace.fd = -1 339 unlock(&pageTrace.fdLock) 340 }) 341 semrelease(&worldsema) 342 } 343 344 // writeFull ensures that a complete write of bn bytes from b is made to fd. 345 func writeFull(fd uintptr, b *byte, bn int) { 346 for bn > 0 { 347 n := write(fd, unsafe.Pointer(b), int32(bn)) 348 if n == -_EINTR || n == -_EAGAIN { 349 continue 350 } 351 if n < 0 { 352 print("errno=", -n, "\n") 353 throw("writeBytes: bad write") 354 } 355 bn -= int(n) 356 b = addb(b, uintptr(n)) 357 } 358 }