github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/cpuprof.c (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // CPU profiling. 6 // Based on algorithms and data structures used in 7 // http://code.google.com/p/google-perftools/. 8 // 9 // The main difference between this code and the google-perftools 10 // code is that this code is written to allow copying the profile data 11 // to an arbitrary io.Writer, while the google-perftools code always 12 // writes to an operating system file. 13 // 14 // The signal handler for the profiling clock tick adds a new stack trace 15 // to a hash table tracking counts for recent traces. Most clock ticks 16 // hit in the cache. In the event of a cache miss, an entry must be 17 // evicted from the hash table, copied to a log that will eventually be 18 // written as profile data. The google-perftools code flushed the 19 // log itself during the signal handler. This code cannot do that, because 20 // the io.Writer might block or need system calls or locks that are not 21 // safe to use from within the signal handler. Instead, we split the log 22 // into two halves and let the signal handler fill one half while a goroutine 23 // is writing out the other half. When the signal handler fills its half, it 24 // offers to swap with the goroutine. If the writer is not done with its half, 25 // we lose the stack trace for this clock tick (and record that loss). 26 // The goroutine interacts with the signal handler by calling getprofile() to 27 // get the next log piece to write, implicitly handing back the last log 28 // piece it obtained. 29 // 30 // The state of this dance between the signal handler and the goroutine 31 // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine 32 // is not using either log half and is waiting (or will soon be waiting) for 33 // a new piece by calling notesleep(&p->wait). If the signal handler 34 // changes handoff from 0 to non-zero, it must call notewakeup(&p->wait) 35 // to wake the goroutine. The value indicates the number of entries in the 36 // log half being handed off. The goroutine leaves the non-zero value in 37 // place until it has finished processing the log half and then flips the number 38 // back to zero. Setting the high bit in handoff means that the profiling is over, 39 // and the goroutine is now in charge of flushing the data left in the hash table 40 // to the log and returning that data. 41 // 42 // The handoff field is manipulated using atomic operations. 43 // For the most part, the manipulation of handoff is orderly: if handoff == 0 44 // then the signal handler owns it and can change it to non-zero. 45 // If handoff != 0 then the goroutine owns it and can change it to zero. 46 // If that were the end of the story then we would not need to manipulate 47 // handoff using atomic operations. The operations are needed, however, 48 // in order to let the log closer set the high bit to indicate "EOF" safely 49 // in the situation when normally the goroutine "owns" handoff. 50 51 #include "runtime.h" 52 #include "arch_GOARCH.h" 53 #include "malloc.h" 54 55 enum 56 { 57 HashSize = 1<<10, 58 LogSize = 1<<17, 59 Assoc = 4, 60 MaxStack = 64, 61 }; 62 63 typedef struct Profile Profile; 64 typedef struct Bucket Bucket; 65 typedef struct Entry Entry; 66 67 struct Entry { 68 uintptr count; 69 uintptr depth; 70 uintptr stack[MaxStack]; 71 }; 72 73 struct Bucket { 74 Entry entry[Assoc]; 75 }; 76 77 struct Profile { 78 bool on; // profiling is on 79 Note wait; // goroutine waits here 80 uintptr count; // tick count 81 uintptr evicts; // eviction count 82 uintptr lost; // lost ticks that need to be logged 83 uintptr totallost; // total lost ticks 84 85 // Active recent stack traces. 86 Bucket hash[HashSize]; 87 88 // Log of traces evicted from hash. 89 // Signal handler has filled log[toggle][:nlog]. 90 // Goroutine is writing log[1-toggle][:handoff]. 91 uintptr log[2][LogSize/2]; 92 uintptr nlog; 93 int32 toggle; 94 uint32 handoff; 95 96 // Writer state. 97 // Writer maintains its own toggle to avoid races 98 // looking at signal handler's toggle. 99 uint32 wtoggle; 100 bool wholding; // holding & need to release a log half 101 bool flushing; // flushing hash table - profile is over 102 bool eod_sent; // special end-of-data record sent; => flushing 103 }; 104 105 static Lock lk; 106 static Profile *prof; 107 108 static void tick(uintptr*, int32); 109 static void add(Profile*, uintptr*, int32); 110 static bool evict(Profile*, Entry*); 111 static bool flushlog(Profile*); 112 113 static uintptr eod[3] = {0, 1, 0}; 114 115 // LostProfileData is a no-op function used in profiles 116 // to mark the number of profiling stack traces that were 117 // discarded due to slow data writers. 118 static void 119 LostProfileData(void) 120 { 121 } 122 123 // SetCPUProfileRate sets the CPU profiling rate. 124 // The user documentation is in debug.go. 125 void 126 runtime·SetCPUProfileRate(intgo hz) 127 { 128 uintptr *p; 129 uintptr n; 130 131 // Call findfunc now so that it won't have to 132 // build tables during the signal handler. 133 runtime·findfunc(0); 134 135 // Clamp hz to something reasonable. 136 if(hz < 0) 137 hz = 0; 138 if(hz > 1000000) 139 hz = 1000000; 140 141 runtime·lock(&lk); 142 if(hz > 0) { 143 if(prof == nil) { 144 prof = runtime·SysAlloc(sizeof *prof); 145 if(prof == nil) { 146 runtime·printf("runtime: cpu profiling cannot allocate memory\n"); 147 runtime·unlock(&lk); 148 return; 149 } 150 } 151 if(prof->on || prof->handoff != 0) { 152 runtime·printf("runtime: cannot set cpu profile rate until previous profile has finished.\n"); 153 runtime·unlock(&lk); 154 return; 155 } 156 157 prof->on = true; 158 p = prof->log[0]; 159 // pprof binary header format. 160 // http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117 161 *p++ = 0; // count for header 162 *p++ = 3; // depth for header 163 *p++ = 0; // version number 164 *p++ = 1000000 / hz; // period (microseconds) 165 *p++ = 0; 166 prof->nlog = p - prof->log[0]; 167 prof->toggle = 0; 168 prof->wholding = false; 169 prof->wtoggle = 0; 170 prof->flushing = false; 171 prof->eod_sent = false; 172 runtime·noteclear(&prof->wait); 173 174 runtime·setcpuprofilerate(tick, hz); 175 } else if(prof->on) { 176 runtime·setcpuprofilerate(nil, 0); 177 prof->on = false; 178 179 // Now add is not running anymore, and getprofile owns the entire log. 180 // Set the high bit in prof->handoff to tell getprofile. 181 for(;;) { 182 n = prof->handoff; 183 if(n&0x80000000) 184 runtime·printf("runtime: setcpuprofile(off) twice"); 185 if(runtime·cas(&prof->handoff, n, n|0x80000000)) 186 break; 187 } 188 if(n == 0) { 189 // we did the transition from 0 -> nonzero so we wake getprofile 190 runtime·notewakeup(&prof->wait); 191 } 192 } 193 runtime·unlock(&lk); 194 } 195 196 static void 197 tick(uintptr *pc, int32 n) 198 { 199 add(prof, pc, n); 200 } 201 202 // add adds the stack trace to the profile. 203 // It is called from signal handlers and other limited environments 204 // and cannot allocate memory or acquire locks that might be 205 // held at the time of the signal, nor can it use substantial amounts 206 // of stack. It is allowed to call evict. 207 static void 208 add(Profile *p, uintptr *pc, int32 n) 209 { 210 int32 i, j; 211 uintptr h, x; 212 Bucket *b; 213 Entry *e; 214 215 if(n > MaxStack) 216 n = MaxStack; 217 218 // Compute hash. 219 h = 0; 220 for(i=0; i<n; i++) { 221 h = h<<8 | (h>>(8*(sizeof(h)-1))); 222 x = pc[i]; 223 h += x*31 + x*7 + x*3; 224 } 225 p->count++; 226 227 // Add to entry count if already present in table. 228 b = &p->hash[h%HashSize]; 229 for(i=0; i<Assoc; i++) { 230 e = &b->entry[i]; 231 if(e->depth != n) 232 continue; 233 for(j=0; j<n; j++) 234 if(e->stack[j] != pc[j]) 235 goto ContinueAssoc; 236 e->count++; 237 return; 238 ContinueAssoc:; 239 } 240 241 // Evict entry with smallest count. 242 e = &b->entry[0]; 243 for(i=1; i<Assoc; i++) 244 if(b->entry[i].count < e->count) 245 e = &b->entry[i]; 246 if(e->count > 0) { 247 if(!evict(p, e)) { 248 // Could not evict entry. Record lost stack. 249 p->lost++; 250 p->totallost++; 251 return; 252 } 253 p->evicts++; 254 } 255 256 // Reuse the newly evicted entry. 257 e->depth = n; 258 e->count = 1; 259 for(i=0; i<n; i++) 260 e->stack[i] = pc[i]; 261 } 262 263 // evict copies the given entry's data into the log, so that 264 // the entry can be reused. evict is called from add, which 265 // is called from the profiling signal handler, so it must not 266 // allocate memory or block. It is safe to call flushLog. 267 // evict returns true if the entry was copied to the log, 268 // false if there was no room available. 269 static bool 270 evict(Profile *p, Entry *e) 271 { 272 int32 i, d, nslot; 273 uintptr *log, *q; 274 275 d = e->depth; 276 nslot = d+2; 277 log = p->log[p->toggle]; 278 if(p->nlog+nslot > nelem(p->log[0])) { 279 if(!flushlog(p)) 280 return false; 281 log = p->log[p->toggle]; 282 } 283 284 q = log+p->nlog; 285 *q++ = e->count; 286 *q++ = d; 287 for(i=0; i<d; i++) 288 *q++ = e->stack[i]; 289 p->nlog = q - log; 290 e->count = 0; 291 return true; 292 } 293 294 // flushlog tries to flush the current log and switch to the other one. 295 // flushlog is called from evict, called from add, called from the signal handler, 296 // so it cannot allocate memory or block. It can try to swap logs with 297 // the writing goroutine, as explained in the comment at the top of this file. 298 static bool 299 flushlog(Profile *p) 300 { 301 uintptr *log, *q; 302 303 if(!runtime·cas(&p->handoff, 0, p->nlog)) 304 return false; 305 runtime·notewakeup(&p->wait); 306 307 p->toggle = 1 - p->toggle; 308 log = p->log[p->toggle]; 309 q = log; 310 if(p->lost > 0) { 311 *q++ = p->lost; 312 *q++ = 1; 313 *q++ = (uintptr)LostProfileData; 314 } 315 p->nlog = q - log; 316 return true; 317 } 318 319 // getprofile blocks until the next block of profiling data is available 320 // and returns it as a []byte. It is called from the writing goroutine. 321 Slice 322 getprofile(Profile *p) 323 { 324 uint32 i, j, n; 325 Slice ret; 326 Bucket *b; 327 Entry *e; 328 329 ret.array = nil; 330 ret.len = 0; 331 ret.cap = 0; 332 333 if(p == nil) 334 return ret; 335 336 if(p->wholding) { 337 // Release previous log to signal handling side. 338 // Loop because we are racing against setprofile(off). 339 for(;;) { 340 n = p->handoff; 341 if(n == 0) { 342 runtime·printf("runtime: phase error during cpu profile handoff\n"); 343 return ret; 344 } 345 if(n & 0x80000000) { 346 p->wtoggle = 1 - p->wtoggle; 347 p->wholding = false; 348 p->flushing = true; 349 goto flush; 350 } 351 if(runtime·cas(&p->handoff, n, 0)) 352 break; 353 } 354 p->wtoggle = 1 - p->wtoggle; 355 p->wholding = false; 356 } 357 358 if(p->flushing) 359 goto flush; 360 361 if(!p->on && p->handoff == 0) 362 return ret; 363 364 // Wait for new log. 365 runtime·entersyscallblock(); 366 runtime·notesleep(&p->wait); 367 runtime·exitsyscall(); 368 runtime·noteclear(&p->wait); 369 370 n = p->handoff; 371 if(n == 0) { 372 runtime·printf("runtime: phase error during cpu profile wait\n"); 373 return ret; 374 } 375 if(n == 0x80000000) { 376 p->flushing = true; 377 goto flush; 378 } 379 n &= ~0x80000000; 380 381 // Return new log to caller. 382 p->wholding = true; 383 384 ret.array = (byte*)p->log[p->wtoggle]; 385 ret.len = n*sizeof(uintptr); 386 ret.cap = ret.len; 387 return ret; 388 389 flush: 390 // In flush mode. 391 // Add is no longer being called. We own the log. 392 // Also, p->handoff is non-zero, so flushlog will return false. 393 // Evict the hash table into the log and return it. 394 for(i=0; i<HashSize; i++) { 395 b = &p->hash[i]; 396 for(j=0; j<Assoc; j++) { 397 e = &b->entry[j]; 398 if(e->count > 0 && !evict(p, e)) { 399 // Filled the log. Stop the loop and return what we've got. 400 goto breakflush; 401 } 402 } 403 } 404 breakflush: 405 406 // Return pending log data. 407 if(p->nlog > 0) { 408 // Note that we're using toggle now, not wtoggle, 409 // because we're working on the log directly. 410 ret.array = (byte*)p->log[p->toggle]; 411 ret.len = p->nlog*sizeof(uintptr); 412 ret.cap = ret.len; 413 p->nlog = 0; 414 return ret; 415 } 416 417 // Made it through the table without finding anything to log. 418 if(!p->eod_sent) { 419 // We may not have space to append this to the partial log buf, 420 // so we always return a new slice for the end-of-data marker. 421 p->eod_sent = true; 422 ret.array = (byte*)eod; 423 ret.len = sizeof eod; 424 ret.cap = ret.len; 425 return ret; 426 } 427 428 // Finally done. Clean up and return nil. 429 p->flushing = false; 430 if(!runtime·cas(&p->handoff, p->handoff, 0)) 431 runtime·printf("runtime: profile flush racing with something\n"); 432 return ret; // set to nil at top of function 433 } 434 435 // CPUProfile returns the next cpu profile block as a []byte. 436 // The user documentation is in debug.go. 437 void 438 runtime·CPUProfile(Slice ret) 439 { 440 ret = getprofile(prof); 441 FLUSH(&ret); 442 }