github.com/aloncn/graphics-go@v0.0.1/src/runtime/cpuprof.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // CPU profiling. 6 // Based on algorithms and data structures used in 7 // http://code.google.com/p/google-perftools/. 8 // 9 // The main difference between this code and the google-perftools 10 // code is that this code is written to allow copying the profile data 11 // to an arbitrary io.Writer, while the google-perftools code always 12 // writes to an operating system file. 13 // 14 // The signal handler for the profiling clock tick adds a new stack trace 15 // to a hash table tracking counts for recent traces. Most clock ticks 16 // hit in the cache. In the event of a cache miss, an entry must be 17 // evicted from the hash table, copied to a log that will eventually be 18 // written as profile data. The google-perftools code flushed the 19 // log itself during the signal handler. This code cannot do that, because 20 // the io.Writer might block or need system calls or locks that are not 21 // safe to use from within the signal handler. Instead, we split the log 22 // into two halves and let the signal handler fill one half while a goroutine 23 // is writing out the other half. When the signal handler fills its half, it 24 // offers to swap with the goroutine. If the writer is not done with its half, 25 // we lose the stack trace for this clock tick (and record that loss). 26 // The goroutine interacts with the signal handler by calling getprofile() to 27 // get the next log piece to write, implicitly handing back the last log 28 // piece it obtained. 29 // 30 // The state of this dance between the signal handler and the goroutine 31 // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine 32 // is not using either log half and is waiting (or will soon be waiting) for 33 // a new piece by calling notesleep(&p.wait). If the signal handler 34 // changes handoff from 0 to non-zero, it must call notewakeup(&p.wait) 35 // to wake the goroutine. The value indicates the number of entries in the 36 // log half being handed off. The goroutine leaves the non-zero value in 37 // place until it has finished processing the log half and then flips the number 38 // back to zero. Setting the high bit in handoff means that the profiling is over, 39 // and the goroutine is now in charge of flushing the data left in the hash table 40 // to the log and returning that data. 41 // 42 // The handoff field is manipulated using atomic operations. 43 // For the most part, the manipulation of handoff is orderly: if handoff == 0 44 // then the signal handler owns it and can change it to non-zero. 45 // If handoff != 0 then the goroutine owns it and can change it to zero. 46 // If that were the end of the story then we would not need to manipulate 47 // handoff using atomic operations. The operations are needed, however, 48 // in order to let the log closer set the high bit to indicate "EOF" safely 49 // in the situation when normally the goroutine "owns" handoff. 50 51 package runtime 52 53 import ( 54 "runtime/internal/atomic" 55 "unsafe" 56 ) 57 58 const ( 59 numBuckets = 1 << 10 60 logSize = 1 << 17 61 assoc = 4 62 maxCPUProfStack = 64 63 ) 64 65 type cpuprofEntry struct { 66 count uintptr 67 depth int 68 stack [maxCPUProfStack]uintptr 69 } 70 71 type cpuProfile struct { 72 on bool // profiling is on 73 wait note // goroutine waits here 74 count uintptr // tick count 75 evicts uintptr // eviction count 76 lost uintptr // lost ticks that need to be logged 77 78 // Active recent stack traces. 79 hash [numBuckets]struct { 80 entry [assoc]cpuprofEntry 81 } 82 83 // Log of traces evicted from hash. 84 // Signal handler has filled log[toggle][:nlog]. 85 // Goroutine is writing log[1-toggle][:handoff]. 86 log [2][logSize / 2]uintptr 87 nlog int 88 toggle int32 89 handoff uint32 90 91 // Writer state. 92 // Writer maintains its own toggle to avoid races 93 // looking at signal handler's toggle. 94 wtoggle uint32 95 wholding bool // holding & need to release a log half 96 flushing bool // flushing hash table - profile is over 97 eodSent bool // special end-of-data record sent; => flushing 98 } 99 100 var ( 101 cpuprofLock mutex 102 cpuprof *cpuProfile 103 104 eod = [3]uintptr{0, 1, 0} 105 ) 106 107 func setcpuprofilerate(hz int32) { 108 systemstack(func() { 109 setcpuprofilerate_m(hz) 110 }) 111 } 112 113 // lostProfileData is a no-op function used in profiles 114 // to mark the number of profiling stack traces that were 115 // discarded due to slow data writers. 116 func lostProfileData() {} 117 118 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. 119 // If hz <= 0, SetCPUProfileRate turns off profiling. 120 // If the profiler is on, the rate cannot be changed without first turning it off. 121 // 122 // Most clients should use the runtime/pprof package or 123 // the testing package's -test.cpuprofile flag instead of calling 124 // SetCPUProfileRate directly. 125 func SetCPUProfileRate(hz int) { 126 // Clamp hz to something reasonable. 127 if hz < 0 { 128 hz = 0 129 } 130 if hz > 1000000 { 131 hz = 1000000 132 } 133 134 lock(&cpuprofLock) 135 if hz > 0 { 136 if cpuprof == nil { 137 cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfile{}), &memstats.other_sys)) 138 if cpuprof == nil { 139 print("runtime: cpu profiling cannot allocate memory\n") 140 unlock(&cpuprofLock) 141 return 142 } 143 } 144 if cpuprof.on || cpuprof.handoff != 0 { 145 print("runtime: cannot set cpu profile rate until previous profile has finished.\n") 146 unlock(&cpuprofLock) 147 return 148 } 149 150 cpuprof.on = true 151 // pprof binary header format. 152 // http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117 153 p := &cpuprof.log[0] 154 p[0] = 0 // count for header 155 p[1] = 3 // depth for header 156 p[2] = 0 // version number 157 p[3] = uintptr(1e6 / hz) // period (microseconds) 158 p[4] = 0 159 cpuprof.nlog = 5 160 cpuprof.toggle = 0 161 cpuprof.wholding = false 162 cpuprof.wtoggle = 0 163 cpuprof.flushing = false 164 cpuprof.eodSent = false 165 noteclear(&cpuprof.wait) 166 167 setcpuprofilerate(int32(hz)) 168 } else if cpuprof != nil && cpuprof.on { 169 setcpuprofilerate(0) 170 cpuprof.on = false 171 172 // Now add is not running anymore, and getprofile owns the entire log. 173 // Set the high bit in cpuprof.handoff to tell getprofile. 174 for { 175 n := cpuprof.handoff 176 if n&0x80000000 != 0 { 177 print("runtime: setcpuprofile(off) twice\n") 178 } 179 if atomic.Cas(&cpuprof.handoff, n, n|0x80000000) { 180 if n == 0 { 181 // we did the transition from 0 -> nonzero so we wake getprofile 182 notewakeup(&cpuprof.wait) 183 } 184 break 185 } 186 } 187 } 188 unlock(&cpuprofLock) 189 } 190 191 // add adds the stack trace to the profile. 192 // It is called from signal handlers and other limited environments 193 // and cannot allocate memory or acquire locks that might be 194 // held at the time of the signal, nor can it use substantial amounts 195 // of stack. It is allowed to call evict. 196 func (p *cpuProfile) add(pc []uintptr) { 197 if len(pc) > maxCPUProfStack { 198 pc = pc[:maxCPUProfStack] 199 } 200 201 // Compute hash. 202 h := uintptr(0) 203 for _, x := range pc { 204 h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1))) 205 h += x * 41 206 } 207 p.count++ 208 209 // Add to entry count if already present in table. 210 b := &p.hash[h%numBuckets] 211 Assoc: 212 for i := range b.entry { 213 e := &b.entry[i] 214 if e.depth != len(pc) { 215 continue 216 } 217 for j := range pc { 218 if e.stack[j] != pc[j] { 219 continue Assoc 220 } 221 } 222 e.count++ 223 return 224 } 225 226 // Evict entry with smallest count. 227 var e *cpuprofEntry 228 for i := range b.entry { 229 if e == nil || b.entry[i].count < e.count { 230 e = &b.entry[i] 231 } 232 } 233 if e.count > 0 { 234 if !p.evict(e) { 235 // Could not evict entry. Record lost stack. 236 p.lost++ 237 return 238 } 239 p.evicts++ 240 } 241 242 // Reuse the newly evicted entry. 243 e.depth = len(pc) 244 e.count = 1 245 copy(e.stack[:], pc) 246 } 247 248 // evict copies the given entry's data into the log, so that 249 // the entry can be reused. evict is called from add, which 250 // is called from the profiling signal handler, so it must not 251 // allocate memory or block. It is safe to call flushlog. 252 // evict returns true if the entry was copied to the log, 253 // false if there was no room available. 254 func (p *cpuProfile) evict(e *cpuprofEntry) bool { 255 d := e.depth 256 nslot := d + 2 257 log := &p.log[p.toggle] 258 if p.nlog+nslot > len(log) { 259 if !p.flushlog() { 260 return false 261 } 262 log = &p.log[p.toggle] 263 } 264 265 q := p.nlog 266 log[q] = e.count 267 q++ 268 log[q] = uintptr(d) 269 q++ 270 copy(log[q:], e.stack[:d]) 271 q += d 272 p.nlog = q 273 e.count = 0 274 return true 275 } 276 277 // flushlog tries to flush the current log and switch to the other one. 278 // flushlog is called from evict, called from add, called from the signal handler, 279 // so it cannot allocate memory or block. It can try to swap logs with 280 // the writing goroutine, as explained in the comment at the top of this file. 281 func (p *cpuProfile) flushlog() bool { 282 if !atomic.Cas(&p.handoff, 0, uint32(p.nlog)) { 283 return false 284 } 285 notewakeup(&p.wait) 286 287 p.toggle = 1 - p.toggle 288 log := &p.log[p.toggle] 289 q := 0 290 if p.lost > 0 { 291 lostPC := funcPC(lostProfileData) 292 log[0] = p.lost 293 log[1] = 1 294 log[2] = lostPC 295 q = 3 296 p.lost = 0 297 } 298 p.nlog = q 299 return true 300 } 301 302 // getprofile blocks until the next block of profiling data is available 303 // and returns it as a []byte. It is called from the writing goroutine. 304 func (p *cpuProfile) getprofile() []byte { 305 if p == nil { 306 return nil 307 } 308 309 if p.wholding { 310 // Release previous log to signal handling side. 311 // Loop because we are racing against SetCPUProfileRate(0). 312 for { 313 n := p.handoff 314 if n == 0 { 315 print("runtime: phase error during cpu profile handoff\n") 316 return nil 317 } 318 if n&0x80000000 != 0 { 319 p.wtoggle = 1 - p.wtoggle 320 p.wholding = false 321 p.flushing = true 322 goto Flush 323 } 324 if atomic.Cas(&p.handoff, n, 0) { 325 break 326 } 327 } 328 p.wtoggle = 1 - p.wtoggle 329 p.wholding = false 330 } 331 332 if p.flushing { 333 goto Flush 334 } 335 336 if !p.on && p.handoff == 0 { 337 return nil 338 } 339 340 // Wait for new log. 341 notetsleepg(&p.wait, -1) 342 noteclear(&p.wait) 343 344 switch n := p.handoff; { 345 case n == 0: 346 print("runtime: phase error during cpu profile wait\n") 347 return nil 348 case n == 0x80000000: 349 p.flushing = true 350 goto Flush 351 default: 352 n &^= 0x80000000 353 354 // Return new log to caller. 355 p.wholding = true 356 357 return uintptrBytes(p.log[p.wtoggle][:n]) 358 } 359 360 // In flush mode. 361 // Add is no longer being called. We own the log. 362 // Also, p.handoff is non-zero, so flushlog will return false. 363 // Evict the hash table into the log and return it. 364 Flush: 365 for i := range p.hash { 366 b := &p.hash[i] 367 for j := range b.entry { 368 e := &b.entry[j] 369 if e.count > 0 && !p.evict(e) { 370 // Filled the log. Stop the loop and return what we've got. 371 break Flush 372 } 373 } 374 } 375 376 // Return pending log data. 377 if p.nlog > 0 { 378 // Note that we're using toggle now, not wtoggle, 379 // because we're working on the log directly. 380 n := p.nlog 381 p.nlog = 0 382 return uintptrBytes(p.log[p.toggle][:n]) 383 } 384 385 // Made it through the table without finding anything to log. 386 if !p.eodSent { 387 // We may not have space to append this to the partial log buf, 388 // so we always return a new slice for the end-of-data marker. 389 p.eodSent = true 390 return uintptrBytes(eod[:]) 391 } 392 393 // Finally done. Clean up and return nil. 394 p.flushing = false 395 if !atomic.Cas(&p.handoff, p.handoff, 0) { 396 print("runtime: profile flush racing with something\n") 397 } 398 return nil 399 } 400 401 func uintptrBytes(p []uintptr) (ret []byte) { 402 pp := (*slice)(unsafe.Pointer(&p)) 403 rp := (*slice)(unsafe.Pointer(&ret)) 404 405 rp.array = pp.array 406 rp.len = pp.len * int(unsafe.Sizeof(p[0])) 407 rp.cap = rp.len 408 409 return 410 } 411 412 // CPUProfile returns the next chunk of binary CPU profiling stack trace data, 413 // blocking until data is available. If profiling is turned off and all the profile 414 // data accumulated while it was on has been returned, CPUProfile returns nil. 415 // The caller must save the returned data before calling CPUProfile again. 416 // 417 // Most clients should use the runtime/pprof package or 418 // the testing package's -test.cpuprofile flag instead of calling 419 // CPUProfile directly. 420 func CPUProfile() []byte { 421 return cpuprof.getprofile() 422 } 423 424 //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond 425 func runtime_pprof_runtime_cyclesPerSecond() int64 { 426 return tickspersecond() 427 }