github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/cpuprof.c (about)

     1  // Copyright 2011 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // CPU profiling.
     6  // Based on algorithms and data structures used in
     7  // http://code.google.com/p/google-perftools/.
     8  //
     9  // The main difference between this code and the google-perftools
    10  // code is that this code is written to allow copying the profile data
    11  // to an arbitrary io.Writer, while the google-perftools code always
    12  // writes to an operating system file.
    13  //
    14  // The signal handler for the profiling clock tick adds a new stack trace
    15  // to a hash table tracking counts for recent traces.  Most clock ticks
    16  // hit in the cache.  In the event of a cache miss, an entry must be 
    17  // evicted from the hash table, copied to a log that will eventually be
    18  // written as profile data.  The google-perftools code flushed the
    19  // log itself during the signal handler.  This code cannot do that, because
    20  // the io.Writer might block or need system calls or locks that are not
    21  // safe to use from within the signal handler.  Instead, we split the log
    22  // into two halves and let the signal handler fill one half while a goroutine
    23  // is writing out the other half.  When the signal handler fills its half, it
    24  // offers to swap with the goroutine.  If the writer is not done with its half,
    25  // we lose the stack trace for this clock tick (and record that loss).
    26  // The goroutine interacts with the signal handler by calling getprofile() to
    27  // get the next log piece to write, implicitly handing back the last log
    28  // piece it obtained.
    29  //
    30  // The state of this dance between the signal handler and the goroutine
    31  // is encoded in the Profile.handoff field.  If handoff == 0, then the goroutine
    32  // is not using either log half and is waiting (or will soon be waiting) for
    33  // a new piece by calling notesleep(&p->wait).  If the signal handler
    34  // changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
    35  // to wake the goroutine.  The value indicates the number of entries in the
    36  // log half being handed off.  The goroutine leaves the non-zero value in
    37  // place until it has finished processing the log half and then flips the number
    38  // back to zero.  Setting the high bit in handoff means that the profiling is over, 
    39  // and the goroutine is now in charge of flushing the data left in the hash table
    40  // to the log and returning that data.  
    41  //
    42  // The handoff field is manipulated using atomic operations.
    43  // For the most part, the manipulation of handoff is orderly: if handoff == 0
    44  // then the signal handler owns it and can change it to non-zero.  
    45  // If handoff != 0 then the goroutine owns it and can change it to zero.
    46  // If that were the end of the story then we would not need to manipulate
    47  // handoff using atomic operations.  The operations are needed, however,
    48  // in order to let the log closer set the high bit to indicate "EOF" safely
    49  // in the situation when normally the goroutine "owns" handoff.
    50  
    51  #include "runtime.h"
    52  #include "arch_GOARCH.h"
    53  #include "malloc.h"
    54  
    55  enum
    56  {
    57  	HashSize = 1<<10,
    58  	LogSize = 1<<17,
    59  	Assoc = 4,
    60  	MaxStack = 64,
    61  };
    62  
    63  typedef struct Profile Profile;
    64  typedef struct Bucket Bucket;
    65  typedef struct Entry Entry;
    66  
    67  struct Entry {
    68  	uintptr count;
    69  	uintptr depth;
    70  	uintptr stack[MaxStack];
    71  };
    72  
    73  struct Bucket {
    74  	Entry entry[Assoc];
    75  };
    76  
    77  struct Profile {
    78  	bool on;		// profiling is on
    79  	Note wait;		// goroutine waits here
    80  	uintptr count;		// tick count
    81  	uintptr evicts;		// eviction count
    82  	uintptr lost;		// lost ticks that need to be logged
    83  	uintptr totallost;	// total lost ticks
    84  
    85  	// Active recent stack traces.
    86  	Bucket hash[HashSize];
    87  
    88  	// Log of traces evicted from hash.
    89  	// Signal handler has filled log[toggle][:nlog].
    90  	// Goroutine is writing log[1-toggle][:handoff].
    91  	uintptr log[2][LogSize/2];
    92  	uintptr nlog;
    93  	int32 toggle;
    94  	uint32 handoff;
    95  	
    96  	// Writer state.
    97  	// Writer maintains its own toggle to avoid races
    98  	// looking at signal handler's toggle.
    99  	uint32 wtoggle;
   100  	bool wholding;	// holding & need to release a log half
   101  	bool flushing;	// flushing hash table - profile is over
   102  	bool eod_sent;  // special end-of-data record sent; => flushing
   103  };
   104  
   105  static Lock lk;
   106  static Profile *prof;
   107  
   108  static void tick(uintptr*, int32);
   109  static void add(Profile*, uintptr*, int32);
   110  static bool evict(Profile*, Entry*);
   111  static bool flushlog(Profile*);
   112  
   113  static uintptr eod[3] = {0, 1, 0};
   114  
   115  // LostProfileData is a no-op function used in profiles
   116  // to mark the number of profiling stack traces that were
   117  // discarded due to slow data writers.
   118  static void
   119  LostProfileData(void)
   120  {
   121  }
   122  
   123  // SetCPUProfileRate sets the CPU profiling rate.
   124  // The user documentation is in debug.go.
   125  void
   126  runtime·SetCPUProfileRate(intgo hz)
   127  {
   128  	uintptr *p;
   129  	uintptr n;
   130  	
   131  	// Clamp hz to something reasonable.
   132  	if(hz < 0)
   133  		hz = 0;
   134  	if(hz > 1000000)
   135  		hz = 1000000;
   136  
   137  	runtime·lock(&lk);
   138  	if(hz > 0) {
   139  		if(prof == nil) {
   140  			prof = runtime·SysAlloc(sizeof *prof);
   141  			if(prof == nil) {
   142  				runtime·printf("runtime: cpu profiling cannot allocate memory\n");
   143  				runtime·unlock(&lk);
   144  				return;
   145  			}
   146  		}
   147  		if(prof->on || prof->handoff != 0) {
   148  			runtime·printf("runtime: cannot set cpu profile rate until previous profile has finished.\n");
   149  			runtime·unlock(&lk);
   150  			return;
   151  		}
   152  
   153  		prof->on = true;
   154  		p = prof->log[0];
   155  		// pprof binary header format.
   156  		// http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117
   157  		*p++ = 0;  // count for header
   158  		*p++ = 3;  // depth for header
   159  		*p++ = 0;  // version number
   160  		*p++ = 1000000 / hz;  // period (microseconds)
   161  		*p++ = 0;
   162  		prof->nlog = p - prof->log[0];
   163  		prof->toggle = 0;
   164  		prof->wholding = false;
   165  		prof->wtoggle = 0;
   166  		prof->flushing = false;
   167  		prof->eod_sent = false;
   168  		runtime·noteclear(&prof->wait);
   169  
   170  		runtime·setcpuprofilerate(tick, hz);
   171  	} else if(prof->on) {
   172  		runtime·setcpuprofilerate(nil, 0);
   173  		prof->on = false;
   174  
   175  		// Now add is not running anymore, and getprofile owns the entire log.
   176  		// Set the high bit in prof->handoff to tell getprofile.
   177  		for(;;) {
   178  			n = prof->handoff;
   179  			if(n&0x80000000)
   180  				runtime·printf("runtime: setcpuprofile(off) twice");
   181  			if(runtime·cas(&prof->handoff, n, n|0x80000000))
   182  				break;
   183  		}
   184  		if(n == 0) {
   185  			// we did the transition from 0 -> nonzero so we wake getprofile
   186  			runtime·notewakeup(&prof->wait);
   187  		}
   188  	}
   189  	runtime·unlock(&lk);
   190  }
   191  
   192  static void
   193  tick(uintptr *pc, int32 n)
   194  {
   195  	add(prof, pc, n);
   196  }
   197  
   198  // add adds the stack trace to the profile.
   199  // It is called from signal handlers and other limited environments
   200  // and cannot allocate memory or acquire locks that might be
   201  // held at the time of the signal, nor can it use substantial amounts
   202  // of stack.  It is allowed to call evict.
   203  static void
   204  add(Profile *p, uintptr *pc, int32 n)
   205  {
   206  	int32 i, j;
   207  	uintptr h, x;
   208  	Bucket *b;
   209  	Entry *e;
   210  
   211  	if(n > MaxStack)
   212  		n = MaxStack;
   213  	
   214  	// Compute hash.
   215  	h = 0;
   216  	for(i=0; i<n; i++) {
   217  		h = h<<8 | (h>>(8*(sizeof(h)-1)));
   218  		x = pc[i];
   219  		h += x*31 + x*7 + x*3;
   220  	}
   221  	p->count++;
   222  
   223  	// Add to entry count if already present in table.
   224  	b = &p->hash[h%HashSize];
   225  	for(i=0; i<Assoc; i++) {
   226  		e = &b->entry[i];
   227  		if(e->depth != n)	
   228  			continue;
   229  		for(j=0; j<n; j++)
   230  			if(e->stack[j] != pc[j])
   231  				goto ContinueAssoc;
   232  		e->count++;
   233  		return;
   234  	ContinueAssoc:;
   235  	}
   236  
   237  	// Evict entry with smallest count.
   238  	e = &b->entry[0];
   239  	for(i=1; i<Assoc; i++)
   240  		if(b->entry[i].count < e->count)
   241  			e = &b->entry[i];
   242  	if(e->count > 0) {
   243  		if(!evict(p, e)) {
   244  			// Could not evict entry.  Record lost stack.
   245  			p->lost++;
   246  			p->totallost++;
   247  			return;
   248  		}
   249  		p->evicts++;
   250  	}
   251  	
   252  	// Reuse the newly evicted entry.
   253  	e->depth = n;
   254  	e->count = 1;
   255  	for(i=0; i<n; i++)
   256  		e->stack[i] = pc[i];
   257  }
   258  
   259  // evict copies the given entry's data into the log, so that
   260  // the entry can be reused.  evict is called from add, which
   261  // is called from the profiling signal handler, so it must not
   262  // allocate memory or block.  It is safe to call flushLog.
   263  // evict returns true if the entry was copied to the log,
   264  // false if there was no room available.
   265  static bool
   266  evict(Profile *p, Entry *e)
   267  {
   268  	int32 i, d, nslot;
   269  	uintptr *log, *q;
   270  	
   271  	d = e->depth;
   272  	nslot = d+2;
   273  	log = p->log[p->toggle];
   274  	if(p->nlog+nslot > nelem(p->log[0])) {
   275  		if(!flushlog(p))
   276  			return false;
   277  		log = p->log[p->toggle];
   278  	}
   279  	
   280  	q = log+p->nlog;
   281  	*q++ = e->count;
   282  	*q++ = d;
   283  	for(i=0; i<d; i++)
   284  		*q++ = e->stack[i];
   285  	p->nlog = q - log;
   286  	e->count = 0;
   287  	return true;
   288  }
   289  
   290  // flushlog tries to flush the current log and switch to the other one.
   291  // flushlog is called from evict, called from add, called from the signal handler,
   292  // so it cannot allocate memory or block.  It can try to swap logs with
   293  // the writing goroutine, as explained in the comment at the top of this file.
   294  static bool
   295  flushlog(Profile *p)
   296  {
   297  	uintptr *log, *q;
   298  
   299  	if(!runtime·cas(&p->handoff, 0, p->nlog))
   300  		return false;
   301  	runtime·notewakeup(&p->wait);
   302  
   303  	p->toggle = 1 - p->toggle;
   304  	log = p->log[p->toggle];
   305  	q = log;
   306  	if(p->lost > 0) {
   307  		*q++ = p->lost;
   308  		*q++ = 1;
   309  		*q++ = (uintptr)LostProfileData;
   310  	}
   311  	p->nlog = q - log;
   312  	return true;
   313  }
   314  
   315  // getprofile blocks until the next block of profiling data is available
   316  // and returns it as a []byte.  It is called from the writing goroutine.
   317  Slice
   318  getprofile(Profile *p)
   319  {
   320  	uint32 i, j, n;
   321  	Slice ret;
   322  	Bucket *b;
   323  	Entry *e;
   324  
   325  	ret.array = nil;
   326  	ret.len = 0;
   327  	ret.cap = 0;
   328  	
   329  	if(p == nil)	
   330  		return ret;
   331  
   332  	if(p->wholding) {
   333  		// Release previous log to signal handling side.
   334  		// Loop because we are racing against SetCPUProfileRate(0).
   335  		for(;;) {
   336  			n = p->handoff;
   337  			if(n == 0) {
   338  				runtime·printf("runtime: phase error during cpu profile handoff\n");
   339  				return ret;
   340  			}
   341  			if(n & 0x80000000) {
   342  				p->wtoggle = 1 - p->wtoggle;
   343  				p->wholding = false;
   344  				p->flushing = true;
   345  				goto flush;
   346  			}
   347  			if(runtime·cas(&p->handoff, n, 0))
   348  				break;
   349  		}
   350  		p->wtoggle = 1 - p->wtoggle;
   351  		p->wholding = false;
   352  	}
   353  	
   354  	if(p->flushing)
   355  		goto flush;
   356  	
   357  	if(!p->on && p->handoff == 0)
   358  		return ret;
   359  
   360  	// Wait for new log.
   361  	runtime·notetsleepg(&p->wait, -1);
   362  	runtime·noteclear(&p->wait);
   363  
   364  	n = p->handoff;
   365  	if(n == 0) {
   366  		runtime·printf("runtime: phase error during cpu profile wait\n");
   367  		return ret;
   368  	}
   369  	if(n == 0x80000000) {
   370  		p->flushing = true;
   371  		goto flush;
   372  	}
   373  	n &= ~0x80000000;
   374  
   375  	// Return new log to caller.
   376  	p->wholding = true;
   377  
   378  	ret.array = (byte*)p->log[p->wtoggle];
   379  	ret.len = n*sizeof(uintptr);
   380  	ret.cap = ret.len;
   381  	return ret;
   382  
   383  flush:
   384  	// In flush mode.
   385  	// Add is no longer being called.  We own the log.
   386  	// Also, p->handoff is non-zero, so flushlog will return false.
   387  	// Evict the hash table into the log and return it.
   388  	for(i=0; i<HashSize; i++) {
   389  		b = &p->hash[i];
   390  		for(j=0; j<Assoc; j++) {
   391  			e = &b->entry[j];
   392  			if(e->count > 0 && !evict(p, e)) {
   393  				// Filled the log.  Stop the loop and return what we've got.
   394  				goto breakflush;
   395  			}
   396  		}
   397  	}
   398  breakflush:
   399  
   400  	// Return pending log data.
   401  	if(p->nlog > 0) {
   402  		// Note that we're using toggle now, not wtoggle,
   403  		// because we're working on the log directly.
   404  		ret.array = (byte*)p->log[p->toggle];
   405  		ret.len = p->nlog*sizeof(uintptr);
   406  		ret.cap = ret.len;
   407  		p->nlog = 0;
   408  		return ret;
   409  	}
   410  
   411  	// Made it through the table without finding anything to log.
   412  	if(!p->eod_sent) {
   413  		// We may not have space to append this to the partial log buf,
   414  		// so we always return a new slice for the end-of-data marker.
   415  		p->eod_sent = true;
   416  		ret.array = (byte*)eod;
   417  		ret.len = sizeof eod;
   418  		ret.cap = ret.len;
   419  		return ret;
   420  	}
   421  
   422  	// Finally done.  Clean up and return nil.
   423  	p->flushing = false;
   424  	if(!runtime·cas(&p->handoff, p->handoff, 0))
   425  		runtime·printf("runtime: profile flush racing with something\n");
   426  	return ret;  // set to nil at top of function
   427  }
   428  
   429  // CPUProfile returns the next cpu profile block as a []byte.
   430  // The user documentation is in debug.go.
   431  void
   432  runtime·CPUProfile(Slice ret)
   433  {
   434  	ret = getprofile(prof);
   435  	FLUSH(&ret);
   436  }