github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/cpuprof.c (about)

     1  // Copyright 2011 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // CPU profiling.
     6  // Based on algorithms and data structures used in
     7  // http://code.google.com/p/google-perftools/.
     8  //
     9  // The main difference between this code and the google-perftools
    10  // code is that this code is written to allow copying the profile data
    11  // to an arbitrary io.Writer, while the google-perftools code always
    12  // writes to an operating system file.
    13  //
    14  // The signal handler for the profiling clock tick adds a new stack trace
    15  // to a hash table tracking counts for recent traces.  Most clock ticks
    16  // hit in the cache.  In the event of a cache miss, an entry must be 
    17  // evicted from the hash table, copied to a log that will eventually be
    18  // written as profile data.  The google-perftools code flushed the
    19  // log itself during the signal handler.  This code cannot do that, because
    20  // the io.Writer might block or need system calls or locks that are not
    21  // safe to use from within the signal handler.  Instead, we split the log
    22  // into two halves and let the signal handler fill one half while a goroutine
    23  // is writing out the other half.  When the signal handler fills its half, it
    24  // offers to swap with the goroutine.  If the writer is not done with its half,
    25  // we lose the stack trace for this clock tick (and record that loss).
    26  // The goroutine interacts with the signal handler by calling getprofile() to
    27  // get the next log piece to write, implicitly handing back the last log
    28  // piece it obtained.
    29  //
    30  // The state of this dance between the signal handler and the goroutine
    31  // is encoded in the Profile.handoff field.  If handoff == 0, then the goroutine
    32  // is not using either log half and is waiting (or will soon be waiting) for
    33  // a new piece by calling notesleep(&p->wait).  If the signal handler
    34  // changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
    35  // to wake the goroutine.  The value indicates the number of entries in the
    36  // log half being handed off.  The goroutine leaves the non-zero value in
    37  // place until it has finished processing the log half and then flips the number
    38  // back to zero.  Setting the high bit in handoff means that the profiling is over, 
    39  // and the goroutine is now in charge of flushing the data left in the hash table
    40  // to the log and returning that data.  
    41  //
    42  // The handoff field is manipulated using atomic operations.
    43  // For the most part, the manipulation of handoff is orderly: if handoff == 0
    44  // then the signal handler owns it and can change it to non-zero.  
    45  // If handoff != 0 then the goroutine owns it and can change it to zero.
    46  // If that were the end of the story then we would not need to manipulate
    47  // handoff using atomic operations.  The operations are needed, however,
    48  // in order to let the log closer set the high bit to indicate "EOF" safely
    49  // in the situation when normally the goroutine "owns" handoff.
    50  
    51  #include "runtime.h"
    52  #include "arch_GOARCH.h"
    53  #include "malloc.h"
    54  
    55  enum
    56  {
    57  	HashSize = 1<<10,
    58  	LogSize = 1<<17,
    59  	Assoc = 4,
    60  	MaxStack = 64,
    61  };
    62  
    63  typedef struct Profile Profile;
    64  typedef struct Bucket Bucket;
    65  typedef struct Entry Entry;
    66  
    67  struct Entry {
    68  	uintptr count;
    69  	uintptr depth;
    70  	uintptr stack[MaxStack];
    71  };
    72  
    73  struct Bucket {
    74  	Entry entry[Assoc];
    75  };
    76  
    77  struct Profile {
    78  	bool on;		// profiling is on
    79  	Note wait;		// goroutine waits here
    80  	uintptr count;		// tick count
    81  	uintptr evicts;		// eviction count
    82  	uintptr lost;		// lost ticks that need to be logged
    83  	uintptr totallost;	// total lost ticks
    84  
    85  	// Active recent stack traces.
    86  	Bucket hash[HashSize];
    87  
    88  	// Log of traces evicted from hash.
    89  	// Signal handler has filled log[toggle][:nlog].
    90  	// Goroutine is writing log[1-toggle][:handoff].
    91  	uintptr log[2][LogSize/2];
    92  	uintptr nlog;
    93  	int32 toggle;
    94  	uint32 handoff;
    95  	
    96  	// Writer state.
    97  	// Writer maintains its own toggle to avoid races
    98  	// looking at signal handler's toggle.
    99  	uint32 wtoggle;
   100  	bool wholding;	// holding & need to release a log half
   101  	bool flushing;	// flushing hash table - profile is over
   102  	bool eod_sent;  // special end-of-data record sent; => flushing
   103  };
   104  
   105  static Lock lk;
   106  static Profile *prof;
   107  
   108  static void tick(uintptr*, int32);
   109  static void add(Profile*, uintptr*, int32);
   110  static bool evict(Profile*, Entry*);
   111  static bool flushlog(Profile*);
   112  
   113  static uintptr eod[3] = {0, 1, 0};
   114  
   115  // LostProfileData is a no-op function used in profiles
   116  // to mark the number of profiling stack traces that were
   117  // discarded due to slow data writers.
   118  static void
   119  LostProfileData(void)
   120  {
   121  }
   122  
   123  // SetCPUProfileRate sets the CPU profiling rate.
   124  // The user documentation is in debug.go.
   125  void
   126  runtime·SetCPUProfileRate(intgo hz)
   127  {
   128  	uintptr *p;
   129  	uintptr n;
   130  	
   131  	// Call findfunc now so that it won't have to
   132  	// build tables during the signal handler.
   133  	runtime·findfunc(0);
   134  
   135  	// Clamp hz to something reasonable.
   136  	if(hz < 0)
   137  		hz = 0;
   138  	if(hz > 1000000)
   139  		hz = 1000000;
   140  
   141  	runtime·lock(&lk);
   142  	if(hz > 0) {
   143  		if(prof == nil) {
   144  			prof = runtime·SysAlloc(sizeof *prof);
   145  			if(prof == nil) {
   146  				runtime·printf("runtime: cpu profiling cannot allocate memory\n");
   147  				runtime·unlock(&lk);
   148  				return;
   149  			}
   150  		}
   151  		if(prof->on || prof->handoff != 0) {
   152  			runtime·printf("runtime: cannot set cpu profile rate until previous profile has finished.\n");
   153  			runtime·unlock(&lk);
   154  			return;
   155  		}
   156  
   157  		prof->on = true;
   158  		p = prof->log[0];
   159  		// pprof binary header format.
   160  		// http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117
   161  		*p++ = 0;  // count for header
   162  		*p++ = 3;  // depth for header
   163  		*p++ = 0;  // version number
   164  		*p++ = 1000000 / hz;  // period (microseconds)
   165  		*p++ = 0;
   166  		prof->nlog = p - prof->log[0];
   167  		prof->toggle = 0;
   168  		prof->wholding = false;
   169  		prof->wtoggle = 0;
   170  		prof->flushing = false;
   171  		prof->eod_sent = false;
   172  		runtime·noteclear(&prof->wait);
   173  
   174  		runtime·setcpuprofilerate(tick, hz);
   175  	} else if(prof->on) {
   176  		runtime·setcpuprofilerate(nil, 0);
   177  		prof->on = false;
   178  
   179  		// Now add is not running anymore, and getprofile owns the entire log.
   180  		// Set the high bit in prof->handoff to tell getprofile.
   181  		for(;;) {
   182  			n = prof->handoff;
   183  			if(n&0x80000000)
   184  				runtime·printf("runtime: setcpuprofile(off) twice");
   185  			if(runtime·cas(&prof->handoff, n, n|0x80000000))
   186  				break;
   187  		}
   188  		if(n == 0) {
   189  			// we did the transition from 0 -> nonzero so we wake getprofile
   190  			runtime·notewakeup(&prof->wait);
   191  		}
   192  	}
   193  	runtime·unlock(&lk);
   194  }
   195  
   196  static void
   197  tick(uintptr *pc, int32 n)
   198  {
   199  	add(prof, pc, n);
   200  }
   201  
   202  // add adds the stack trace to the profile.
   203  // It is called from signal handlers and other limited environments
   204  // and cannot allocate memory or acquire locks that might be
   205  // held at the time of the signal, nor can it use substantial amounts
   206  // of stack.  It is allowed to call evict.
   207  static void
   208  add(Profile *p, uintptr *pc, int32 n)
   209  {
   210  	int32 i, j;
   211  	uintptr h, x;
   212  	Bucket *b;
   213  	Entry *e;
   214  
   215  	if(n > MaxStack)
   216  		n = MaxStack;
   217  	
   218  	// Compute hash.
   219  	h = 0;
   220  	for(i=0; i<n; i++) {
   221  		h = h<<8 | (h>>(8*(sizeof(h)-1)));
   222  		x = pc[i];
   223  		h += x*31 + x*7 + x*3;
   224  	}
   225  	p->count++;
   226  
   227  	// Add to entry count if already present in table.
   228  	b = &p->hash[h%HashSize];
   229  	for(i=0; i<Assoc; i++) {
   230  		e = &b->entry[i];
   231  		if(e->depth != n)	
   232  			continue;
   233  		for(j=0; j<n; j++)
   234  			if(e->stack[j] != pc[j])
   235  				goto ContinueAssoc;
   236  		e->count++;
   237  		return;
   238  	ContinueAssoc:;
   239  	}
   240  
   241  	// Evict entry with smallest count.
   242  	e = &b->entry[0];
   243  	for(i=1; i<Assoc; i++)
   244  		if(b->entry[i].count < e->count)
   245  			e = &b->entry[i];
   246  	if(e->count > 0) {
   247  		if(!evict(p, e)) {
   248  			// Could not evict entry.  Record lost stack.
   249  			p->lost++;
   250  			p->totallost++;
   251  			return;
   252  		}
   253  		p->evicts++;
   254  	}
   255  	
   256  	// Reuse the newly evicted entry.
   257  	e->depth = n;
   258  	e->count = 1;
   259  	for(i=0; i<n; i++)
   260  		e->stack[i] = pc[i];
   261  }
   262  
   263  // evict copies the given entry's data into the log, so that
   264  // the entry can be reused.  evict is called from add, which
   265  // is called from the profiling signal handler, so it must not
   266  // allocate memory or block.  It is safe to call flushLog.
   267  // evict returns true if the entry was copied to the log,
   268  // false if there was no room available.
   269  static bool
   270  evict(Profile *p, Entry *e)
   271  {
   272  	int32 i, d, nslot;
   273  	uintptr *log, *q;
   274  	
   275  	d = e->depth;
   276  	nslot = d+2;
   277  	log = p->log[p->toggle];
   278  	if(p->nlog+nslot > nelem(p->log[0])) {
   279  		if(!flushlog(p))
   280  			return false;
   281  		log = p->log[p->toggle];
   282  	}
   283  	
   284  	q = log+p->nlog;
   285  	*q++ = e->count;
   286  	*q++ = d;
   287  	for(i=0; i<d; i++)
   288  		*q++ = e->stack[i];
   289  	p->nlog = q - log;
   290  	e->count = 0;
   291  	return true;
   292  }
   293  
   294  // flushlog tries to flush the current log and switch to the other one.
   295  // flushlog is called from evict, called from add, called from the signal handler,
   296  // so it cannot allocate memory or block.  It can try to swap logs with
   297  // the writing goroutine, as explained in the comment at the top of this file.
   298  static bool
   299  flushlog(Profile *p)
   300  {
   301  	uintptr *log, *q;
   302  
   303  	if(!runtime·cas(&p->handoff, 0, p->nlog))
   304  		return false;
   305  	runtime·notewakeup(&p->wait);
   306  
   307  	p->toggle = 1 - p->toggle;
   308  	log = p->log[p->toggle];
   309  	q = log;
   310  	if(p->lost > 0) {
   311  		*q++ = p->lost;
   312  		*q++ = 1;
   313  		*q++ = (uintptr)LostProfileData;
   314  	}
   315  	p->nlog = q - log;
   316  	return true;
   317  }
   318  
   319  // getprofile blocks until the next block of profiling data is available
   320  // and returns it as a []byte.  It is called from the writing goroutine.
   321  Slice
   322  getprofile(Profile *p)
   323  {
   324  	uint32 i, j, n;
   325  	Slice ret;
   326  	Bucket *b;
   327  	Entry *e;
   328  
   329  	ret.array = nil;
   330  	ret.len = 0;
   331  	ret.cap = 0;
   332  	
   333  	if(p == nil)	
   334  		return ret;
   335  
   336  	if(p->wholding) {
   337  		// Release previous log to signal handling side.
   338  		// Loop because we are racing against setprofile(off).
   339  		for(;;) {
   340  			n = p->handoff;
   341  			if(n == 0) {
   342  				runtime·printf("runtime: phase error during cpu profile handoff\n");
   343  				return ret;
   344  			}
   345  			if(n & 0x80000000) {
   346  				p->wtoggle = 1 - p->wtoggle;
   347  				p->wholding = false;
   348  				p->flushing = true;
   349  				goto flush;
   350  			}
   351  			if(runtime·cas(&p->handoff, n, 0))
   352  				break;
   353  		}
   354  		p->wtoggle = 1 - p->wtoggle;
   355  		p->wholding = false;
   356  	}
   357  	
   358  	if(p->flushing)
   359  		goto flush;
   360  	
   361  	if(!p->on && p->handoff == 0)
   362  		return ret;
   363  
   364  	// Wait for new log.
   365  	runtime·entersyscallblock();
   366  	runtime·notesleep(&p->wait);
   367  	runtime·exitsyscall();
   368  	runtime·noteclear(&p->wait);
   369  
   370  	n = p->handoff;
   371  	if(n == 0) {
   372  		runtime·printf("runtime: phase error during cpu profile wait\n");
   373  		return ret;
   374  	}
   375  	if(n == 0x80000000) {
   376  		p->flushing = true;
   377  		goto flush;
   378  	}
   379  	n &= ~0x80000000;
   380  
   381  	// Return new log to caller.
   382  	p->wholding = true;
   383  
   384  	ret.array = (byte*)p->log[p->wtoggle];
   385  	ret.len = n*sizeof(uintptr);
   386  	ret.cap = ret.len;
   387  	return ret;
   388  
   389  flush:
   390  	// In flush mode.
   391  	// Add is no longer being called.  We own the log.
   392  	// Also, p->handoff is non-zero, so flushlog will return false.
   393  	// Evict the hash table into the log and return it.
   394  	for(i=0; i<HashSize; i++) {
   395  		b = &p->hash[i];
   396  		for(j=0; j<Assoc; j++) {
   397  			e = &b->entry[j];
   398  			if(e->count > 0 && !evict(p, e)) {
   399  				// Filled the log.  Stop the loop and return what we've got.
   400  				goto breakflush;
   401  			}
   402  		}
   403  	}
   404  breakflush:
   405  
   406  	// Return pending log data.
   407  	if(p->nlog > 0) {
   408  		// Note that we're using toggle now, not wtoggle,
   409  		// because we're working on the log directly.
   410  		ret.array = (byte*)p->log[p->toggle];
   411  		ret.len = p->nlog*sizeof(uintptr);
   412  		ret.cap = ret.len;
   413  		p->nlog = 0;
   414  		return ret;
   415  	}
   416  
   417  	// Made it through the table without finding anything to log.
   418  	if(!p->eod_sent) {
   419  		// We may not have space to append this to the partial log buf,
   420  		// so we always return a new slice for the end-of-data marker.
   421  		p->eod_sent = true;
   422  		ret.array = (byte*)eod;
   423  		ret.len = sizeof eod;
   424  		ret.cap = ret.len;
   425  		return ret;
   426  	}
   427  
   428  	// Finally done.  Clean up and return nil.
   429  	p->flushing = false;
   430  	if(!runtime·cas(&p->handoff, p->handoff, 0))
   431  		runtime·printf("runtime: profile flush racing with something\n");
   432  	return ret;  // set to nil at top of function
   433  }
   434  
   435  // CPUProfile returns the next cpu profile block as a []byte.
   436  // The user documentation is in debug.go.
   437  void
   438  runtime·CPUProfile(Slice ret)
   439  {
   440  	ret = getprofile(prof);
   441  	FLUSH(&ret);
   442  }