github.com/prattmic/llgo-embedded@v0.0.0-20150820070356-41cfecea0e1e/third_party/gofrontend/libgo/runtime/cpuprof.goc (about)

     1  // Copyright 2011 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // CPU profiling.
     6  // Based on algorithms and data structures used in
     7  // http://code.google.com/p/google-perftools/.
     8  //
     9  // The main difference between this code and the google-perftools
    10  // code is that this code is written to allow copying the profile data
    11  // to an arbitrary io.Writer, while the google-perftools code always
    12  // writes to an operating system file.
    13  //
    14  // The signal handler for the profiling clock tick adds a new stack trace
    15  // to a hash table tracking counts for recent traces.  Most clock ticks
    16  // hit in the cache.  In the event of a cache miss, an entry must be 
    17  // evicted from the hash table, copied to a log that will eventually be
    18  // written as profile data.  The google-perftools code flushed the
    19  // log itself during the signal handler.  This code cannot do that, because
    20  // the io.Writer might block or need system calls or locks that are not
    21  // safe to use from within the signal handler.  Instead, we split the log
    22  // into two halves and let the signal handler fill one half while a goroutine
    23  // is writing out the other half.  When the signal handler fills its half, it
    24  // offers to swap with the goroutine.  If the writer is not done with its half,
    25  // we lose the stack trace for this clock tick (and record that loss).
    26  // The goroutine interacts with the signal handler by calling getprofile() to
    27  // get the next log piece to write, implicitly handing back the last log
    28  // piece it obtained.
    29  //
    30  // The state of this dance between the signal handler and the goroutine
    31  // is encoded in the Profile.handoff field.  If handoff == 0, then the goroutine
    32  // is not using either log half and is waiting (or will soon be waiting) for
    33  // a new piece by calling notesleep(&p->wait).  If the signal handler
    34  // changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
    35  // to wake the goroutine.  The value indicates the number of entries in the
    36  // log half being handed off.  The goroutine leaves the non-zero value in
    37  // place until it has finished processing the log half and then flips the number
    38  // back to zero.  Setting the high bit in handoff means that the profiling is over, 
    39  // and the goroutine is now in charge of flushing the data left in the hash table
    40  // to the log and returning that data.  
    41  //
    42  // The handoff field is manipulated using atomic operations.
    43  // For the most part, the manipulation of handoff is orderly: if handoff == 0
    44  // then the signal handler owns it and can change it to non-zero.  
    45  // If handoff != 0 then the goroutine owns it and can change it to zero.
    46  // If that were the end of the story then we would not need to manipulate
    47  // handoff using atomic operations.  The operations are needed, however,
    48  // in order to let the log closer set the high bit to indicate "EOF" safely
    49  // in the situation when normally the goroutine "owns" handoff.
    50  
    51  package runtime
    52  #include "runtime.h"
    53  #include "arch.h"
    54  #include "malloc.h"
    55  
    56  #include "array.h"
    57  typedef struct __go_open_array Slice;
    58  #define array __values
    59  #define len __count
    60  #define cap __capacity
    61  
    62  enum
    63  {
    64  	HashSize = 1<<10,
    65  	LogSize = 1<<17,
    66  	Assoc = 4,
    67  	MaxStack = 64,
    68  };
    69  
    70  typedef struct Profile Profile;
    71  typedef struct Bucket Bucket;
    72  typedef struct Entry Entry;
    73  
    74  struct Entry {
    75  	uintptr count;
    76  	uintptr depth;
    77  	uintptr stack[MaxStack];
    78  };
    79  
    80  struct Bucket {
    81  	Entry entry[Assoc];
    82  };
    83  
    84  struct Profile {
    85  	bool on;		// profiling is on
    86  	Note wait;		// goroutine waits here
    87  	uintptr count;		// tick count
    88  	uintptr evicts;		// eviction count
    89  	uintptr lost;		// lost ticks that need to be logged
    90  
    91  	// Active recent stack traces.
    92  	Bucket hash[HashSize];
    93  
    94  	// Log of traces evicted from hash.
    95  	// Signal handler has filled log[toggle][:nlog].
    96  	// Goroutine is writing log[1-toggle][:handoff].
    97  	uintptr log[2][LogSize/2];
    98  	uintptr nlog;
    99  	int32 toggle;
   100  	uint32 handoff;
   101  	
   102  	// Writer state.
   103  	// Writer maintains its own toggle to avoid races
   104  	// looking at signal handler's toggle.
   105  	uint32 wtoggle;
   106  	bool wholding;	// holding & need to release a log half
   107  	bool flushing;	// flushing hash table - profile is over
   108  	bool eod_sent;  // special end-of-data record sent; => flushing
   109  };
   110  
   111  static Lock lk;
   112  static Profile *prof;
   113  
   114  static void tick(uintptr*, int32);
   115  static void add(Profile*, uintptr*, int32);
   116  static bool evict(Profile*, Entry*);
   117  static bool flushlog(Profile*);
   118  
   119  static uintptr eod[3] = {0, 1, 0};
   120  
   121  // LostProfileData is a no-op function used in profiles
   122  // to mark the number of profiling stack traces that were
   123  // discarded due to slow data writers.
   124  static void
   125  LostProfileData(void)
   126  {
   127  }
   128  
   129  extern void runtime_SetCPUProfileRate(intgo)
   130       __asm__ (GOSYM_PREFIX "runtime.SetCPUProfileRate");
   131  
   132  // SetCPUProfileRate sets the CPU profiling rate.
   133  // The user documentation is in debug.go.
   134  void
   135  runtime_SetCPUProfileRate(intgo hz)
   136  {
   137  	uintptr *p;
   138  	uintptr n;
   139  
   140  	// Clamp hz to something reasonable.
   141  	if(hz < 0)
   142  		hz = 0;
   143  	if(hz > 1000000)
   144  		hz = 1000000;
   145  
   146  	runtime_lock(&lk);
   147  	if(hz > 0) {
   148  		if(prof == nil) {
   149  			prof = runtime_SysAlloc(sizeof *prof, &mstats.other_sys);
   150  			if(prof == nil) {
   151  				runtime_printf("runtime: cpu profiling cannot allocate memory\n");
   152  				runtime_unlock(&lk);
   153  				return;
   154  			}
   155  		}
   156  		if(prof->on || prof->handoff != 0) {
   157  			runtime_printf("runtime: cannot set cpu profile rate until previous profile has finished.\n");
   158  			runtime_unlock(&lk);
   159  			return;
   160  		}
   161  
   162  		prof->on = true;
   163  		p = prof->log[0];
   164  		// pprof binary header format.
   165  		// http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117
   166  		*p++ = 0;  // count for header
   167  		*p++ = 3;  // depth for header
   168  		*p++ = 0;  // version number
   169  		*p++ = 1000000 / hz;  // period (microseconds)
   170  		*p++ = 0;
   171  		prof->nlog = p - prof->log[0];
   172  		prof->toggle = 0;
   173  		prof->wholding = false;
   174  		prof->wtoggle = 0;
   175  		prof->flushing = false;
   176  		prof->eod_sent = false;
   177  		runtime_noteclear(&prof->wait);
   178  
   179  		runtime_setcpuprofilerate(tick, hz);
   180  	} else if(prof != nil && prof->on) {
   181  		runtime_setcpuprofilerate(nil, 0);
   182  		prof->on = false;
   183  
   184  		// Now add is not running anymore, and getprofile owns the entire log.
   185  		// Set the high bit in prof->handoff to tell getprofile.
   186  		for(;;) {
   187  			n = prof->handoff;
   188  			if(n&0x80000000)
   189  				runtime_printf("runtime: setcpuprofile(off) twice");
   190  			if(runtime_cas(&prof->handoff, n, n|0x80000000))
   191  				break;
   192  		}
   193  		if(n == 0) {
   194  			// we did the transition from 0 -> nonzero so we wake getprofile
   195  			runtime_notewakeup(&prof->wait);
   196  		}
   197  	}
   198  	runtime_unlock(&lk);
   199  }
   200  
   201  static void
   202  tick(uintptr *pc, int32 n)
   203  {
   204  	add(prof, pc, n);
   205  }
   206  
   207  // add adds the stack trace to the profile.
   208  // It is called from signal handlers and other limited environments
   209  // and cannot allocate memory or acquire locks that might be
   210  // held at the time of the signal, nor can it use substantial amounts
   211  // of stack.  It is allowed to call evict.
   212  static void
   213  add(Profile *p, uintptr *pc, int32 n)
   214  {
   215  	int32 i, j;
   216  	uintptr h, x;
   217  	Bucket *b;
   218  	Entry *e;
   219  
   220  	if(n > MaxStack)
   221  		n = MaxStack;
   222  	
   223  	// Compute hash.
   224  	h = 0;
   225  	for(i=0; i<n; i++) {
   226  		h = h<<8 | (h>>(8*(sizeof(h)-1)));
   227  		x = pc[i];
   228  		h += x*31 + x*7 + x*3;
   229  	}
   230  	p->count++;
   231  
   232  	// Add to entry count if already present in table.
   233  	b = &p->hash[h%HashSize];
   234  	for(i=0; i<Assoc; i++) {
   235  		e = &b->entry[i];
   236  		if(e->depth != (uintptr)n)	
   237  			continue;
   238  		for(j=0; j<n; j++)
   239  			if(e->stack[j] != pc[j])
   240  				goto ContinueAssoc;
   241  		e->count++;
   242  		return;
   243  	ContinueAssoc:;
   244  	}
   245  
   246  	// Evict entry with smallest count.
   247  	e = &b->entry[0];
   248  	for(i=1; i<Assoc; i++)
   249  		if(b->entry[i].count < e->count)
   250  			e = &b->entry[i];
   251  	if(e->count > 0) {
   252  		if(!evict(p, e)) {
   253  			// Could not evict entry.  Record lost stack.
   254  			p->lost++;
   255  			return;
   256  		}
   257  		p->evicts++;
   258  	}
   259  	
   260  	// Reuse the newly evicted entry.
   261  	e->depth = n;
   262  	e->count = 1;
   263  	for(i=0; i<n; i++)
   264  		e->stack[i] = pc[i];
   265  }
   266  
   267  // evict copies the given entry's data into the log, so that
   268  // the entry can be reused.  evict is called from add, which
   269  // is called from the profiling signal handler, so it must not
   270  // allocate memory or block.  It is safe to call flushLog.
   271  // evict returns true if the entry was copied to the log,
   272  // false if there was no room available.
   273  static bool
   274  evict(Profile *p, Entry *e)
   275  {
   276  	int32 i, d, nslot;
   277  	uintptr *log, *q;
   278  	
   279  	d = e->depth;
   280  	nslot = d+2;
   281  	log = p->log[p->toggle];
   282  	if(p->nlog+nslot > nelem(p->log[0])) {
   283  		if(!flushlog(p))
   284  			return false;
   285  		log = p->log[p->toggle];
   286  	}
   287  	
   288  	q = log+p->nlog;
   289  	*q++ = e->count;
   290  	*q++ = d;
   291  	for(i=0; i<d; i++)
   292  		*q++ = e->stack[i];
   293  	p->nlog = q - log;
   294  	e->count = 0;
   295  	return true;
   296  }
   297  
   298  // flushlog tries to flush the current log and switch to the other one.
   299  // flushlog is called from evict, called from add, called from the signal handler,
   300  // so it cannot allocate memory or block.  It can try to swap logs with
   301  // the writing goroutine, as explained in the comment at the top of this file.
   302  static bool
   303  flushlog(Profile *p)
   304  {
   305  	uintptr *log, *q;
   306  
   307  	if(!runtime_cas(&p->handoff, 0, p->nlog))
   308  		return false;
   309  	runtime_notewakeup(&p->wait);
   310  
   311  	p->toggle = 1 - p->toggle;
   312  	log = p->log[p->toggle];
   313  	q = log;
   314  	if(p->lost > 0) {
   315  		*q++ = p->lost;
   316  		*q++ = 1;
   317  		*q++ = (uintptr)LostProfileData;
   318  		p->lost = 0;
   319  	}
   320  	p->nlog = q - log;
   321  	return true;
   322  }
   323  
   324  // getprofile blocks until the next block of profiling data is available
   325  // and returns it as a []byte.  It is called from the writing goroutine.
   326  Slice
   327  getprofile(Profile *p)
   328  {
   329  	uint32 i, j, n;
   330  	Slice ret;
   331  	Bucket *b;
   332  	Entry *e;
   333  
   334  	ret.array = nil;
   335  	ret.len = 0;
   336  	ret.cap = 0;
   337  	
   338  	if(p == nil)	
   339  		return ret;
   340  
   341  	if(p->wholding) {
   342  		// Release previous log to signal handling side.
   343  		// Loop because we are racing against SetCPUProfileRate(0).
   344  		for(;;) {
   345  			n = p->handoff;
   346  			if(n == 0) {
   347  				runtime_printf("runtime: phase error during cpu profile handoff\n");
   348  				return ret;
   349  			}
   350  			if(n & 0x80000000) {
   351  				p->wtoggle = 1 - p->wtoggle;
   352  				p->wholding = false;
   353  				p->flushing = true;
   354  				goto flush;
   355  			}
   356  			if(runtime_cas(&p->handoff, n, 0))
   357  				break;
   358  		}
   359  		p->wtoggle = 1 - p->wtoggle;
   360  		p->wholding = false;
   361  	}
   362  	
   363  	if(p->flushing)
   364  		goto flush;
   365  	
   366  	if(!p->on && p->handoff == 0)
   367  		return ret;
   368  
   369  	// Wait for new log.
   370  	runtime_notetsleepg(&p->wait, -1);
   371  	runtime_noteclear(&p->wait);
   372  
   373  	n = p->handoff;
   374  	if(n == 0) {
   375  		runtime_printf("runtime: phase error during cpu profile wait\n");
   376  		return ret;
   377  	}
   378  	if(n == 0x80000000) {
   379  		p->flushing = true;
   380  		goto flush;
   381  	}
   382  	n &= ~0x80000000;
   383  
   384  	// Return new log to caller.
   385  	p->wholding = true;
   386  
   387  	ret.array = (byte*)p->log[p->wtoggle];
   388  	ret.len = n*sizeof(uintptr);
   389  	ret.cap = ret.len;
   390  	return ret;
   391  
   392  flush:
   393  	// In flush mode.
   394  	// Add is no longer being called.  We own the log.
   395  	// Also, p->handoff is non-zero, so flushlog will return false.
   396  	// Evict the hash table into the log and return it.
   397  	for(i=0; i<HashSize; i++) {
   398  		b = &p->hash[i];
   399  		for(j=0; j<Assoc; j++) {
   400  			e = &b->entry[j];
   401  			if(e->count > 0 && !evict(p, e)) {
   402  				// Filled the log.  Stop the loop and return what we've got.
   403  				goto breakflush;
   404  			}
   405  		}
   406  	}
   407  breakflush:
   408  
   409  	// Return pending log data.
   410  	if(p->nlog > 0) {
   411  		// Note that we're using toggle now, not wtoggle,
   412  		// because we're working on the log directly.
   413  		ret.array = (byte*)p->log[p->toggle];
   414  		ret.len = p->nlog*sizeof(uintptr);
   415  		ret.cap = ret.len;
   416  		p->nlog = 0;
   417  		return ret;
   418  	}
   419  
   420  	// Made it through the table without finding anything to log.
   421  	if(!p->eod_sent) {
   422  		// We may not have space to append this to the partial log buf,
   423  		// so we always return a new slice for the end-of-data marker.
   424  		p->eod_sent = true;
   425  		ret.array = (byte*)eod;
   426  		ret.len = sizeof eod;
   427  		ret.cap = ret.len;
   428  		return ret;
   429  	}
   430  
   431  	// Finally done.  Clean up and return nil.
   432  	p->flushing = false;
   433  	if(!runtime_cas(&p->handoff, p->handoff, 0))
   434  		runtime_printf("runtime: profile flush racing with something\n");
   435  	return ret;  // set to nil at top of function
   436  }
   437  
   438  // CPUProfile returns the next cpu profile block as a []byte.
   439  // The user documentation is in debug.go.
   440  func CPUProfile() (ret Slice) {
   441  	ret = getprofile(prof);
   442  }