github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/mprof.goc (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Malloc profiling.
     6  // Patterned after tcmalloc's algorithms; shorter code.
     7  
     8  package runtime
     9  #include "runtime.h"
    10  #include "arch_GOARCH.h"
    11  #include "malloc.h"
    12  #include "defs_GOOS_GOARCH.h"
    13  #include "type.h"
    14  
    15  // NOTE(rsc): Everything here could use cas if contention became an issue.
    16  static Lock proflock;
    17  
    18  // All memory allocations are local and do not escape outside of the profiler.
    19  // The profiler is forbidden from referring to garbage-collected memory.
    20  
    21  enum { MProf, BProf };  // profile types
    22  
    23  // Per-call-stack profiling information.
    24  // Lookup by hashing call stack into a linked-list hash table.
    25  typedef struct Bucket Bucket;
    26  struct Bucket
    27  {
    28  	Bucket	*next;	// next in hash list
    29  	Bucket	*allnext;	// next in list of all mbuckets/bbuckets
    30  	int32	typ;
    31  	// Generally unions can break precise GC,
    32  	// this one is fine because it does not contain pointers.
    33  	union
    34  	{
    35  		struct  // typ == MProf
    36  		{
    37  			uintptr	allocs;
    38  			uintptr	frees;
    39  			uintptr	alloc_bytes;
    40  			uintptr	free_bytes;
    41  			uintptr	recent_allocs;  // since last gc
    42  			uintptr	recent_frees;
    43  			uintptr	recent_alloc_bytes;
    44  			uintptr	recent_free_bytes;
    45  		};
    46  		struct  // typ == BProf
    47  		{
    48  			int64	count;
    49  			int64	cycles;
    50  		};
    51  	};
    52  	uintptr	hash;
    53  	uintptr	nstk;
    54  	uintptr	stk[1];
    55  };
    56  enum {
    57  	BuckHashSize = 179999,
    58  };
    59  static Bucket **buckhash;
    60  static Bucket *mbuckets;  // memory profile buckets
    61  static Bucket *bbuckets;  // blocking profile buckets
    62  static uintptr bucketmem;
    63  
    64  // Return the bucket for stk[0:nstk], allocating new bucket if needed.
    65  static Bucket*
    66  stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
    67  {
    68  	int32 i;
    69  	uintptr h;
    70  	Bucket *b;
    71  
    72  	if(buckhash == nil) {
    73  		buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0]);
    74  		if(buckhash == nil)
    75  			runtime·throw("runtime: cannot allocate memory");
    76  		mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
    77  	}
    78  
    79  	// Hash stack.
    80  	h = 0;
    81  	for(i=0; i<nstk; i++) {
    82  		h += stk[i];
    83  		h += h<<10;
    84  		h ^= h>>6;
    85  	}
    86  	h += h<<3;
    87  	h ^= h>>11;
    88  
    89  	i = h%BuckHashSize;
    90  	for(b = buckhash[i]; b; b=b->next)
    91  		if(b->typ == typ && b->hash == h && b->nstk == nstk &&
    92  		   runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
    93  			return b;
    94  
    95  	if(!alloc)
    96  		return nil;
    97  
    98  	b = runtime·persistentalloc(sizeof *b + nstk*sizeof stk[0], 0);
    99  	bucketmem += sizeof *b + nstk*sizeof stk[0];
   100  	runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
   101  	b->typ = typ;
   102  	b->hash = h;
   103  	b->nstk = nstk;
   104  	b->next = buckhash[i];
   105  	buckhash[i] = b;
   106  	if(typ == MProf) {
   107  		b->allnext = mbuckets;
   108  		mbuckets = b;
   109  	} else {
   110  		b->allnext = bbuckets;
   111  		bbuckets = b;
   112  	}
   113  	return b;
   114  }
   115  
   116  static void
   117  MProf_GC(void)
   118  {
   119  	Bucket *b;
   120  
   121  	for(b=mbuckets; b; b=b->allnext) {
   122  		b->allocs += b->recent_allocs;
   123  		b->frees += b->recent_frees;
   124  		b->alloc_bytes += b->recent_alloc_bytes;
   125  		b->free_bytes += b->recent_free_bytes;
   126  		b->recent_allocs = 0;
   127  		b->recent_frees = 0;
   128  		b->recent_alloc_bytes = 0;
   129  		b->recent_free_bytes = 0;
   130  	}
   131  }
   132  
   133  // Record that a gc just happened: all the 'recent' statistics are now real.
   134  void
   135  runtime·MProf_GC(void)
   136  {
   137  	runtime·lock(&proflock);
   138  	MProf_GC();
   139  	runtime·unlock(&proflock);
   140  }
   141  
   142  // Map from pointer to Bucket* that allocated it.
   143  // Three levels:
   144  //	Linked-list hash table for top N-AddrHashShift bits.
   145  //	Array index for next AddrDenseBits bits.
   146  //	Linked list for next AddrHashShift-AddrDenseBits bits.
   147  // This is more efficient than using a general map,
   148  // because of the typical clustering of the pointer keys.
   149  
   150  typedef struct AddrHash AddrHash;
   151  typedef struct AddrEntry AddrEntry;
   152  
   153  enum {
   154  	AddrHashBits = 12,	// good for 4GB of used address space
   155  	AddrHashShift = 20,	// each AddrHash knows about 1MB of address space
   156  	AddrDenseBits = 8,	// good for a profiling rate of 4096 bytes
   157  };
   158  
   159  struct AddrHash
   160  {
   161  	AddrHash *next;	// next in top-level hash table linked list
   162  	uintptr addr;	// addr>>20
   163  	AddrEntry *dense[1<<AddrDenseBits];
   164  };
   165  
   166  struct AddrEntry
   167  {
   168  	AddrEntry *next;	// next in bottom-level linked list
   169  	uint32 addr;
   170  	Bucket *b;
   171  };
   172  
   173  static AddrHash **addrhash;	// points to (AddrHash*)[1<<AddrHashBits]
   174  static AddrEntry *addrfree;
   175  static uintptr addrmem;
   176  
   177  // Multiplicative hash function:
   178  // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
   179  // This is a good multiplier as suggested in CLR, Knuth.  The hash
   180  // value is taken to be the top AddrHashBits bits of the bottom 32 bits
   181  // of the multiplied value.
   182  enum {
   183  	HashMultiplier = 2654435769U
   184  };
   185  
   186  // Set the bucket associated with addr to b.
   187  static void
   188  setaddrbucket(uintptr addr, Bucket *b)
   189  {
   190  	int32 i;
   191  	uint32 h;
   192  	AddrHash *ah;
   193  	AddrEntry *e;
   194  
   195  	h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
   196  	for(ah=addrhash[h]; ah; ah=ah->next)
   197  		if(ah->addr == (addr>>AddrHashShift))
   198  			goto found;
   199  
   200  	ah = runtime·persistentalloc(sizeof *ah, 0);
   201  	addrmem += sizeof *ah;
   202  	ah->next = addrhash[h];
   203  	ah->addr = addr>>AddrHashShift;
   204  	addrhash[h] = ah;
   205  
   206  found:
   207  	if((e = addrfree) == nil) {
   208  		e = runtime·persistentalloc(64*sizeof *e, 0);
   209  		addrmem += 64*sizeof *e;
   210  		for(i=0; i+1<64; i++)
   211  			e[i].next = &e[i+1];
   212  		e[63].next = nil;
   213  	}
   214  	addrfree = e->next;
   215  	e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
   216  	e->b = b;
   217  	h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1);	// entry in dense is top 8 bits of low 20.
   218  	e->next = ah->dense[h];
   219  	ah->dense[h] = e;
   220  }
   221  
   222  // Get the bucket associated with addr and clear the association.
   223  static Bucket*
   224  getaddrbucket(uintptr addr)
   225  {
   226  	uint32 h;
   227  	AddrHash *ah;
   228  	AddrEntry *e, **l;
   229  	Bucket *b;
   230  
   231  	h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
   232  	for(ah=addrhash[h]; ah; ah=ah->next)
   233  		if(ah->addr == (addr>>AddrHashShift))
   234  			goto found;
   235  	return nil;
   236  
   237  found:
   238  	h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1);	// entry in dense is top 8 bits of low 20.
   239  	for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
   240  		if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
   241  			*l = e->next;
   242  			b = e->b;
   243  			e->next = addrfree;
   244  			addrfree = e;
   245  			return b;
   246  		}
   247  	}
   248  	return nil;
   249  }
   250  
   251  // Called by malloc to record a profiled block.
   252  void
   253  runtime·MProf_Malloc(void *p, uintptr size)
   254  {
   255  	int32 nstk;
   256  	uintptr stk[32];
   257  	Bucket *b;
   258  
   259  	if(m->nomemprof > 0)
   260  		return;
   261  
   262  	m->nomemprof++;
   263  	nstk = runtime·callers(1, stk, 32);
   264  	runtime·lock(&proflock);
   265  	b = stkbucket(MProf, stk, nstk, true);
   266  	b->recent_allocs++;
   267  	b->recent_alloc_bytes += size;
   268  	setaddrbucket((uintptr)p, b);
   269  	runtime·unlock(&proflock);
   270  	m->nomemprof--;
   271  }
   272  
   273  // Called when freeing a profiled block.
   274  void
   275  runtime·MProf_Free(void *p, uintptr size)
   276  {
   277  	Bucket *b;
   278  
   279  	if(m->nomemprof > 0)
   280  		return;
   281  
   282  	m->nomemprof++;
   283  	runtime·lock(&proflock);
   284  	b = getaddrbucket((uintptr)p);
   285  	if(b != nil) {
   286  		b->recent_frees++;
   287  		b->recent_free_bytes += size;
   288  	}
   289  	runtime·unlock(&proflock);
   290  	m->nomemprof--;
   291  }
   292  
   293  int64 runtime·blockprofilerate;  // in CPU ticks
   294  
   295  void
   296  runtime·SetBlockProfileRate(intgo rate)
   297  {
   298  	int64 r;
   299  
   300  	if(rate <= 0)
   301  		r = 0;  // disable profiling
   302  	else {
   303  		// convert ns to cycles, use float64 to prevent overflow during multiplication
   304  		r = (float64)rate*runtime·tickspersecond()/(1000*1000*1000);
   305  		if(r == 0)
   306  			r = 1;
   307  	}
   308  	runtime·atomicstore64((uint64*)&runtime·blockprofilerate, r);
   309  }
   310  
   311  void
   312  runtime·blockevent(int64 cycles, int32 skip)
   313  {
   314  	int32 nstk;
   315  	int64 rate;
   316  	uintptr stk[32];
   317  	Bucket *b;
   318  
   319  	if(cycles <= 0)
   320  		return;
   321  	rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate);
   322  	if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles))
   323  		return;
   324  
   325  	nstk = runtime·callers(skip, stk, 32);
   326  	runtime·lock(&proflock);
   327  	b = stkbucket(BProf, stk, nstk, true);
   328  	b->count++;
   329  	b->cycles += cycles;
   330  	runtime·unlock(&proflock);
   331  }
   332  
   333  // Go interface to profile data.  (Declared in debug.go)
   334  
   335  // Must match MemProfileRecord in debug.go.
   336  typedef struct Record Record;
   337  struct Record {
   338  	int64 alloc_bytes, free_bytes;
   339  	int64 alloc_objects, free_objects;
   340  	uintptr stk[32];
   341  };
   342  
   343  // Write b's data to r.
   344  static void
   345  record(Record *r, Bucket *b)
   346  {
   347  	int32 i;
   348  
   349  	r->alloc_bytes = b->alloc_bytes;
   350  	r->free_bytes = b->free_bytes;
   351  	r->alloc_objects = b->allocs;
   352  	r->free_objects = b->frees;
   353  	for(i=0; i<b->nstk && i<nelem(r->stk); i++)
   354  		r->stk[i] = b->stk[i];
   355  	for(; i<nelem(r->stk); i++)
   356  		r->stk[i] = 0;
   357  }
   358  
   359  func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
   360  	Bucket *b;
   361  	Record *r;
   362  	bool clear;
   363  
   364  	runtime·lock(&proflock);
   365  	n = 0;
   366  	clear = true;
   367  	for(b=mbuckets; b; b=b->allnext) {
   368  		if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
   369  			n++;
   370  		if(b->allocs != 0 || b->frees != 0)
   371  			clear = false;
   372  	}
   373  	if(clear) {
   374  		// Absolutely no data, suggesting that a garbage collection
   375  		// has not yet happened. In order to allow profiling when
   376  		// garbage collection is disabled from the beginning of execution,
   377  		// accumulate stats as if a GC just happened, and recount buckets.
   378  		MProf_GC();
   379  		n = 0;
   380  		for(b=mbuckets; b; b=b->allnext)
   381  			if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
   382  				n++;
   383  	}
   384  	ok = false;
   385  	if(n <= p.len) {
   386  		ok = true;
   387  		r = (Record*)p.array;
   388  		for(b=mbuckets; b; b=b->allnext)
   389  			if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
   390  				record(r++, b);
   391  	}
   392  	runtime·unlock(&proflock);
   393  }
   394  
   395  // Must match BlockProfileRecord in debug.go.
   396  typedef struct BRecord BRecord;
   397  struct BRecord {
   398  	int64 count;
   399  	int64 cycles;
   400  	uintptr stk[32];
   401  };
   402  
   403  func BlockProfile(p Slice) (n int, ok bool) {
   404  	Bucket *b;
   405  	BRecord *r;
   406  	int32 i;
   407  
   408  	runtime·lock(&proflock);
   409  	n = 0;
   410  	for(b=bbuckets; b; b=b->allnext)
   411  		n++;
   412  	ok = false;
   413  	if(n <= p.len) {
   414  		ok = true;
   415  		r = (BRecord*)p.array;
   416  		for(b=bbuckets; b; b=b->allnext, r++) {
   417  			r->count = b->count;
   418  			r->cycles = b->cycles;
   419  			for(i=0; i<b->nstk && i<nelem(r->stk); i++)
   420  				r->stk[i] = b->stk[i];
   421  			for(; i<nelem(r->stk); i++)
   422  				r->stk[i] = 0;			
   423  		}
   424  	}
   425  	runtime·unlock(&proflock);
   426  }
   427  
   428  // Must match StackRecord in debug.go.
   429  typedef struct TRecord TRecord;
   430  struct TRecord {
   431  	uintptr stk[32];
   432  };
   433  
   434  func ThreadCreateProfile(p Slice) (n int, ok bool) {
   435  	TRecord *r;
   436  	M *first, *mp;
   437  	
   438  	first = runtime·atomicloadp(&runtime·allm);
   439  	n = 0;
   440  	for(mp=first; mp; mp=mp->alllink)
   441  		n++;
   442  	ok = false;
   443  	if(n <= p.len) {
   444  		ok = true;
   445  		r = (TRecord*)p.array;
   446  		for(mp=first; mp; mp=mp->alllink) {
   447  			runtime·memmove(r->stk, mp->createstack, sizeof r->stk);
   448  			r++;
   449  		}
   450  	}
   451  }
   452  
   453  func Stack(b Slice, all bool) (n int) {
   454  	uintptr pc, sp;
   455  	
   456  	sp = runtime·getcallersp(&b);
   457  	pc = (uintptr)runtime·getcallerpc(&b);
   458  
   459  	if(all) {
   460  		runtime·semacquire(&runtime·worldsema, false);
   461  		m->gcing = 1;
   462  		runtime·stoptheworld();
   463  	}
   464  
   465  	if(b.len == 0)
   466  		n = 0;
   467  	else{
   468  		g->writebuf = (byte*)b.array;
   469  		g->writenbuf = b.len;
   470  		runtime·goroutineheader(g);
   471  		runtime·traceback(pc, sp, 0, g);
   472  		if(all)
   473  			runtime·tracebackothers(g);
   474  		n = b.len - g->writenbuf;
   475  		g->writebuf = nil;
   476  		g->writenbuf = 0;
   477  	}
   478  	
   479  	if(all) {
   480  		m->gcing = 0;
   481  		runtime·semrelease(&runtime·worldsema);
   482  		runtime·starttheworld();
   483  	}
   484  }
   485  
   486  static void
   487  saveg(uintptr pc, uintptr sp, G *gp, TRecord *r)
   488  {
   489  	int32 n;
   490  	
   491  	n = runtime·gentraceback((uintptr)pc, (uintptr)sp, 0, gp, 0, r->stk, nelem(r->stk), nil, nil, false);
   492  	if(n < nelem(r->stk))
   493  		r->stk[n] = 0;
   494  }
   495  
   496  func GoroutineProfile(b Slice) (n int, ok bool) {
   497  	uintptr pc, sp;
   498  	TRecord *r;
   499  	G *gp;
   500  	
   501  	sp = runtime·getcallersp(&b);
   502  	pc = (uintptr)runtime·getcallerpc(&b);
   503  	
   504  	ok = false;
   505  	n = runtime·gcount();
   506  	if(n <= b.len) {
   507  		runtime·semacquire(&runtime·worldsema, false);
   508  		m->gcing = 1;
   509  		runtime·stoptheworld();
   510  
   511  		n = runtime·gcount();
   512  		if(n <= b.len) {
   513  			ok = true;
   514  			r = (TRecord*)b.array;
   515  			saveg(pc, sp, g, r++);
   516  			for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
   517  				if(gp == g || gp->status == Gdead)
   518  					continue;
   519  				saveg(gp->sched.pc, gp->sched.sp, gp, r++);
   520  			}
   521  		}
   522  	
   523  		m->gcing = 0;
   524  		runtime·semrelease(&runtime·worldsema);
   525  		runtime·starttheworld();
   526  	}
   527  }
   528  
   529  void
   530  runtime·mprofinit(void)
   531  {
   532  	addrhash = runtime·persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0);
   533  }