github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/runtime/cpuprof.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // CPU profiling. 6 // Based on algorithms and data structures used in 7 // http://code.google.com/p/google-perftools/. 8 // 9 // The main difference between this code and the google-perftools 10 // code is that this code is written to allow copying the profile data 11 // to an arbitrary io.Writer, while the google-perftools code always 12 // writes to an operating system file. 13 // 14 // The signal handler for the profiling clock tick adds a new stack trace 15 // to a hash table tracking counts for recent traces. Most clock ticks 16 // hit in the cache. In the event of a cache miss, an entry must be 17 // evicted from the hash table, copied to a log that will eventually be 18 // written as profile data. The google-perftools code flushed the 19 // log itself during the signal handler. This code cannot do that, because 20 // the io.Writer might block or need system calls or locks that are not 21 // safe to use from within the signal handler. Instead, we split the log 22 // into two halves and let the signal handler fill one half while a goroutine 23 // is writing out the other half. When the signal handler fills its half, it 24 // offers to swap with the goroutine. If the writer is not done with its half, 25 // we lose the stack trace for this clock tick (and record that loss). 26 // The goroutine interacts with the signal handler by calling getprofile() to 27 // get the next log piece to write, implicitly handing back the last log 28 // piece it obtained. 29 // 30 // The state of this dance between the signal handler and the goroutine 31 // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine 32 // is not using either log half and is waiting (or will soon be waiting) for 33 // a new piece by calling notesleep(&p->wait). If the signal handler 34 // changes handoff from 0 to non-zero, it must call notewakeup(&p->wait) 35 // to wake the goroutine. The value indicates the number of entries in the 36 // log half being handed off. The goroutine leaves the non-zero value in 37 // place until it has finished processing the log half and then flips the number 38 // back to zero. Setting the high bit in handoff means that the profiling is over, 39 // and the goroutine is now in charge of flushing the data left in the hash table 40 // to the log and returning that data. 41 // 42 // The handoff field is manipulated using atomic operations. 43 // For the most part, the manipulation of handoff is orderly: if handoff == 0 44 // then the signal handler owns it and can change it to non-zero. 45 // If handoff != 0 then the goroutine owns it and can change it to zero. 46 // If that were the end of the story then we would not need to manipulate 47 // handoff using atomic operations. The operations are needed, however, 48 // in order to let the log closer set the high bit to indicate "EOF" safely 49 // in the situation when normally the goroutine "owns" handoff. 50 51 package runtime 52 53 import "unsafe" 54 55 const ( 56 numBuckets = 1 << 10 57 logSize = 1 << 17 58 assoc = 4 59 maxCPUProfStack = 64 60 ) 61 62 type cpuprofEntry struct { 63 count uintptr 64 depth uintptr 65 stack [maxCPUProfStack]uintptr 66 } 67 68 type cpuProfile struct { 69 on bool // profiling is on 70 wait note // goroutine waits here 71 count uintptr // tick count 72 evicts uintptr // eviction count 73 lost uintptr // lost ticks that need to be logged 74 75 // Active recent stack traces. 76 hash [numBuckets]struct { 77 entry [assoc]cpuprofEntry 78 } 79 80 // Log of traces evicted from hash. 81 // Signal handler has filled log[toggle][:nlog]. 82 // Goroutine is writing log[1-toggle][:handoff]. 83 log [2][logSize / 2]uintptr 84 nlog uintptr 85 toggle int32 86 handoff uint32 87 88 // Writer state. 89 // Writer maintains its own toggle to avoid races 90 // looking at signal handler's toggle. 91 wtoggle uint32 92 wholding bool // holding & need to release a log half 93 flushing bool // flushing hash table - profile is over 94 eodSent bool // special end-of-data record sent; => flushing 95 } 96 97 var ( 98 cpuprofLock mutex 99 cpuprof *cpuProfile 100 101 eod = [3]uintptr{0, 1, 0} 102 ) 103 104 func setcpuprofilerate_m() // proc.c 105 106 func setcpuprofilerate(hz int32) { 107 g := getg() 108 g.m.scalararg[0] = uintptr(hz) 109 onM(setcpuprofilerate_m) 110 } 111 112 // lostProfileData is a no-op function used in profiles 113 // to mark the number of profiling stack traces that were 114 // discarded due to slow data writers. 115 func lostProfileData() {} 116 117 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. 118 // If hz <= 0, SetCPUProfileRate turns off profiling. 119 // If the profiler is on, the rate cannot be changed without first turning it off. 120 // 121 // Most clients should use the runtime/pprof package or 122 // the testing package's -test.cpuprofile flag instead of calling 123 // SetCPUProfileRate directly. 124 func SetCPUProfileRate(hz int) { 125 // Clamp hz to something reasonable. 126 if hz < 0 { 127 hz = 0 128 } 129 if hz > 1000000 { 130 hz = 1000000 131 } 132 133 lock(&cpuprofLock) 134 if hz > 0 { 135 if cpuprof == nil { 136 cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfile{}), &memstats.other_sys)) 137 if cpuprof == nil { 138 print("runtime: cpu profiling cannot allocate memory\n") 139 unlock(&cpuprofLock) 140 return 141 } 142 } 143 if cpuprof.on || cpuprof.handoff != 0 { 144 print("runtime: cannot set cpu profile rate until previous profile has finished.\n") 145 unlock(&cpuprofLock) 146 return 147 } 148 149 cpuprof.on = true 150 // pprof binary header format. 151 // http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117 152 p := &cpuprof.log[0] 153 p[0] = 0 // count for header 154 p[1] = 3 // depth for header 155 p[2] = 0 // version number 156 p[3] = uintptr(1e6 / hz) // period (microseconds) 157 p[4] = 0 158 cpuprof.nlog = 5 159 cpuprof.toggle = 0 160 cpuprof.wholding = false 161 cpuprof.wtoggle = 0 162 cpuprof.flushing = false 163 cpuprof.eodSent = false 164 noteclear(&cpuprof.wait) 165 166 setcpuprofilerate(int32(hz)) 167 } else if cpuprof != nil && cpuprof.on { 168 setcpuprofilerate(0) 169 cpuprof.on = false 170 171 // Now add is not running anymore, and getprofile owns the entire log. 172 // Set the high bit in prof->handoff to tell getprofile. 173 for { 174 n := cpuprof.handoff 175 if n&0x80000000 != 0 { 176 print("runtime: setcpuprofile(off) twice\n") 177 } 178 if cas(&cpuprof.handoff, n, n|0x80000000) { 179 if n == 0 { 180 // we did the transition from 0 -> nonzero so we wake getprofile 181 notewakeup(&cpuprof.wait) 182 } 183 break 184 } 185 } 186 } 187 unlock(&cpuprofLock) 188 } 189 190 func cpuproftick(pc *uintptr, n int32) { 191 if n > maxCPUProfStack { 192 n = maxCPUProfStack 193 } 194 s := (*[maxCPUProfStack]uintptr)(unsafe.Pointer(pc))[:n] 195 cpuprof.add(s) 196 } 197 198 // add adds the stack trace to the profile. 199 // It is called from signal handlers and other limited environments 200 // and cannot allocate memory or acquire locks that might be 201 // held at the time of the signal, nor can it use substantial amounts 202 // of stack. It is allowed to call evict. 203 func (p *cpuProfile) add(pc []uintptr) { 204 // Compute hash. 205 h := uintptr(0) 206 for _, x := range pc { 207 h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1))) 208 h += x*31 + x*7 + x*3 209 } 210 p.count++ 211 212 // Add to entry count if already present in table. 213 b := &p.hash[h%numBuckets] 214 Assoc: 215 for i := range b.entry { 216 e := &b.entry[i] 217 if e.depth != uintptr(len(pc)) { 218 continue 219 } 220 for j := range pc { 221 if e.stack[j] != pc[j] { 222 continue Assoc 223 } 224 } 225 e.count++ 226 return 227 } 228 229 // Evict entry with smallest count. 230 var e *cpuprofEntry 231 for i := range b.entry { 232 if e == nil || b.entry[i].count < e.count { 233 e = &b.entry[i] 234 } 235 } 236 if e.count > 0 { 237 if !p.evict(e) { 238 // Could not evict entry. Record lost stack. 239 p.lost++ 240 return 241 } 242 p.evicts++ 243 } 244 245 // Reuse the newly evicted entry. 246 e.depth = uintptr(len(pc)) 247 e.count = 1 248 copy(e.stack[:], pc) 249 } 250 251 // evict copies the given entry's data into the log, so that 252 // the entry can be reused. evict is called from add, which 253 // is called from the profiling signal handler, so it must not 254 // allocate memory or block. It is safe to call flushlog. 255 // evict returns true if the entry was copied to the log, 256 // false if there was no room available. 257 func (p *cpuProfile) evict(e *cpuprofEntry) bool { 258 d := e.depth 259 nslot := d + 2 260 log := &p.log[p.toggle] 261 if p.nlog+nslot > uintptr(len(p.log[0])) { 262 if !p.flushlog() { 263 return false 264 } 265 log = &p.log[p.toggle] 266 } 267 268 q := p.nlog 269 log[q] = e.count 270 q++ 271 log[q] = d 272 q++ 273 copy(log[q:], e.stack[:d]) 274 q += d 275 p.nlog = q 276 e.count = 0 277 return true 278 } 279 280 // flushlog tries to flush the current log and switch to the other one. 281 // flushlog is called from evict, called from add, called from the signal handler, 282 // so it cannot allocate memory or block. It can try to swap logs with 283 // the writing goroutine, as explained in the comment at the top of this file. 284 func (p *cpuProfile) flushlog() bool { 285 if !cas(&p.handoff, 0, uint32(p.nlog)) { 286 return false 287 } 288 notewakeup(&p.wait) 289 290 p.toggle = 1 - p.toggle 291 log := &p.log[p.toggle] 292 q := uintptr(0) 293 if p.lost > 0 { 294 lostPC := funcPC(lostProfileData) 295 log[0] = p.lost 296 log[1] = 1 297 log[2] = lostPC 298 q = 3 299 p.lost = 0 300 } 301 p.nlog = q 302 return true 303 } 304 305 // getprofile blocks until the next block of profiling data is available 306 // and returns it as a []byte. It is called from the writing goroutine. 307 func (p *cpuProfile) getprofile() []byte { 308 if p == nil { 309 return nil 310 } 311 312 if p.wholding { 313 // Release previous log to signal handling side. 314 // Loop because we are racing against SetCPUProfileRate(0). 315 for { 316 n := p.handoff 317 if n == 0 { 318 print("runtime: phase error during cpu profile handoff\n") 319 return nil 320 } 321 if n&0x80000000 != 0 { 322 p.wtoggle = 1 - p.wtoggle 323 p.wholding = false 324 p.flushing = true 325 goto Flush 326 } 327 if cas(&p.handoff, n, 0) { 328 break 329 } 330 } 331 p.wtoggle = 1 - p.wtoggle 332 p.wholding = false 333 } 334 335 if p.flushing { 336 goto Flush 337 } 338 339 if !p.on && p.handoff == 0 { 340 return nil 341 } 342 343 // Wait for new log. 344 notetsleepg(&p.wait, -1) 345 noteclear(&p.wait) 346 347 switch n := p.handoff; { 348 case n == 0: 349 print("runtime: phase error during cpu profile wait\n") 350 return nil 351 case n == 0x80000000: 352 p.flushing = true 353 goto Flush 354 default: 355 n &^= 0x80000000 356 357 // Return new log to caller. 358 p.wholding = true 359 360 return uintptrBytes(p.log[p.wtoggle][:n]) 361 } 362 363 // In flush mode. 364 // Add is no longer being called. We own the log. 365 // Also, p->handoff is non-zero, so flushlog will return false. 366 // Evict the hash table into the log and return it. 367 Flush: 368 for i := range p.hash { 369 b := &p.hash[i] 370 for j := range b.entry { 371 e := &b.entry[j] 372 if e.count > 0 && !p.evict(e) { 373 // Filled the log. Stop the loop and return what we've got. 374 break Flush 375 } 376 } 377 } 378 379 // Return pending log data. 380 if p.nlog > 0 { 381 // Note that we're using toggle now, not wtoggle, 382 // because we're working on the log directly. 383 n := p.nlog 384 p.nlog = 0 385 return uintptrBytes(p.log[p.toggle][:n]) 386 } 387 388 // Made it through the table without finding anything to log. 389 if !p.eodSent { 390 // We may not have space to append this to the partial log buf, 391 // so we always return a new slice for the end-of-data marker. 392 p.eodSent = true 393 return uintptrBytes(eod[:]) 394 } 395 396 // Finally done. Clean up and return nil. 397 p.flushing = false 398 if !cas(&p.handoff, p.handoff, 0) { 399 print("runtime: profile flush racing with something\n") 400 } 401 return nil 402 } 403 404 func uintptrBytes(p []uintptr) (ret []byte) { 405 pp := (*sliceStruct)(unsafe.Pointer(&p)) 406 rp := (*sliceStruct)(unsafe.Pointer(&ret)) 407 408 rp.array = pp.array 409 rp.len = pp.len * int(unsafe.Sizeof(p[0])) 410 rp.cap = rp.len 411 412 return 413 } 414 415 // CPUProfile returns the next chunk of binary CPU profiling stack trace data, 416 // blocking until data is available. If profiling is turned off and all the profile 417 // data accumulated while it was on has been returned, CPUProfile returns nil. 418 // The caller must save the returned data before calling CPUProfile again. 419 // 420 // Most clients should use the runtime/pprof package or 421 // the testing package's -test.cpuprofile flag instead of calling 422 // CPUProfile directly. 423 func CPUProfile() []byte { 424 return cpuprof.getprofile() 425 }