github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/mprof.goc (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Malloc profiling.
     6  // Patterned after tcmalloc's algorithms; shorter code.
     7  
     8  package runtime
     9  #include "runtime.h"
    10  #include "arch_GOARCH.h"
    11  #include "malloc.h"
    12  #include "defs_GOOS_GOARCH.h"
    13  #include "type.h"
    14  
    15  // NOTE(rsc): Everything here could use cas if contention became an issue.
    16  static Lock proflock;
    17  
    18  // All memory allocations are local and do not escape outside of the profiler.
    19  // The profiler is forbidden from referring to garbage-collected memory.
    20  
    21  enum { MProf, BProf };  // profile types
    22  
    23  // Per-call-stack profiling information.
    24  // Lookup by hashing call stack into a linked-list hash table.
    25  typedef struct Bucket Bucket;
    26  struct Bucket
    27  {
    28  	Bucket	*next;	// next in hash list
    29  	Bucket	*allnext;	// next in list of all mbuckets/bbuckets
    30  	int32	typ;
    31  	// Generally unions can break precise GC,
    32  	// this one is fine because it does not contain pointers.
    33  	union
    34  	{
    35  		struct  // typ == MProf
    36  		{
    37  			uintptr	allocs;
    38  			uintptr	frees;
    39  			uintptr	alloc_bytes;
    40  			uintptr	free_bytes;
    41  			uintptr	recent_allocs;  // since last gc
    42  			uintptr	recent_frees;
    43  			uintptr	recent_alloc_bytes;
    44  			uintptr	recent_free_bytes;
    45  		};
    46  		struct  // typ == BProf
    47  		{
    48  			int64	count;
    49  			int64	cycles;
    50  		};
    51  	};
    52  	uintptr	hash;
    53  	uintptr	nstk;
    54  	uintptr	stk[1];
    55  };
    56  enum {
    57  	BuckHashSize = 179999,
    58  };
    59  static Bucket **buckhash;
    60  static Bucket *mbuckets;  // memory profile buckets
    61  static Bucket *bbuckets;  // blocking profile buckets
    62  static uintptr bucketmem;
    63  
    64  // Return the bucket for stk[0:nstk], allocating new bucket if needed.
    65  static Bucket*
    66  stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
    67  {
    68  	int32 i;
    69  	uintptr h;
    70  	Bucket *b;
    71  
    72  	if(buckhash == nil) {
    73  		buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys);
    74  		if(buckhash == nil)
    75  			runtime·throw("runtime: cannot allocate memory");
    76  	}
    77  
    78  	// Hash stack.
    79  	h = 0;
    80  	for(i=0; i<nstk; i++) {
    81  		h += stk[i];
    82  		h += h<<10;
    83  		h ^= h>>6;
    84  	}
    85  	h += h<<3;
    86  	h ^= h>>11;
    87  
    88  	i = h%BuckHashSize;
    89  	for(b = buckhash[i]; b; b=b->next)
    90  		if(b->typ == typ && b->hash == h && b->nstk == nstk &&
    91  		   runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
    92  			return b;
    93  
    94  	if(!alloc)
    95  		return nil;
    96  
    97  	b = runtime·persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys);
    98  	bucketmem += sizeof *b + nstk*sizeof stk[0];
    99  	runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
   100  	b->typ = typ;
   101  	b->hash = h;
   102  	b->nstk = nstk;
   103  	b->next = buckhash[i];
   104  	buckhash[i] = b;
   105  	if(typ == MProf) {
   106  		b->allnext = mbuckets;
   107  		mbuckets = b;
   108  	} else {
   109  		b->allnext = bbuckets;
   110  		bbuckets = b;
   111  	}
   112  	return b;
   113  }
   114  
   115  static void
   116  MProf_GC(void)
   117  {
   118  	Bucket *b;
   119  
   120  	for(b=mbuckets; b; b=b->allnext) {
   121  		b->allocs += b->recent_allocs;
   122  		b->frees += b->recent_frees;
   123  		b->alloc_bytes += b->recent_alloc_bytes;
   124  		b->free_bytes += b->recent_free_bytes;
   125  		b->recent_allocs = 0;
   126  		b->recent_frees = 0;
   127  		b->recent_alloc_bytes = 0;
   128  		b->recent_free_bytes = 0;
   129  	}
   130  }
   131  
   132  // Record that a gc just happened: all the 'recent' statistics are now real.
   133  void
   134  runtime·MProf_GC(void)
   135  {
   136  	runtime·lock(&proflock);
   137  	MProf_GC();
   138  	runtime·unlock(&proflock);
   139  }
   140  
   141  // Map from pointer to Bucket* that allocated it.
   142  // Three levels:
   143  //	Linked-list hash table for top N-AddrHashShift bits.
   144  //	Array index for next AddrDenseBits bits.
   145  //	Linked list for next AddrHashShift-AddrDenseBits bits.
   146  // This is more efficient than using a general map,
   147  // because of the typical clustering of the pointer keys.
   148  
   149  typedef struct AddrHash AddrHash;
   150  typedef struct AddrEntry AddrEntry;
   151  
   152  enum {
   153  	AddrHashBits = 12,	// good for 4GB of used address space
   154  	AddrHashShift = 20,	// each AddrHash knows about 1MB of address space
   155  	AddrDenseBits = 8,	// good for a profiling rate of 4096 bytes
   156  };
   157  
   158  struct AddrHash
   159  {
   160  	AddrHash *next;	// next in top-level hash table linked list
   161  	uintptr addr;	// addr>>20
   162  	AddrEntry *dense[1<<AddrDenseBits];
   163  };
   164  
   165  struct AddrEntry
   166  {
   167  	AddrEntry *next;	// next in bottom-level linked list
   168  	uint32 addr;
   169  	Bucket *b;
   170  };
   171  
   172  static AddrHash **addrhash;	// points to (AddrHash*)[1<<AddrHashBits]
   173  static AddrEntry *addrfree;
   174  static uintptr addrmem;
   175  
   176  // Multiplicative hash function:
   177  // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
   178  // This is a good multiplier as suggested in CLR, Knuth.  The hash
   179  // value is taken to be the top AddrHashBits bits of the bottom 32 bits
   180  // of the multiplied value.
   181  enum {
   182  	HashMultiplier = 2654435769U
   183  };
   184  
   185  // Set the bucket associated with addr to b.
   186  static void
   187  setaddrbucket(uintptr addr, Bucket *b)
   188  {
   189  	int32 i;
   190  	uint32 h;
   191  	AddrHash *ah;
   192  	AddrEntry *e;
   193  
   194  	h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
   195  	for(ah=addrhash[h]; ah; ah=ah->next)
   196  		if(ah->addr == (addr>>AddrHashShift))
   197  			goto found;
   198  
   199  	ah = runtime·persistentalloc(sizeof *ah, 0, &mstats.buckhash_sys);
   200  	addrmem += sizeof *ah;
   201  	ah->next = addrhash[h];
   202  	ah->addr = addr>>AddrHashShift;
   203  	addrhash[h] = ah;
   204  
   205  found:
   206  	if((e = addrfree) == nil) {
   207  		e = runtime·persistentalloc(64*sizeof *e, 0, &mstats.buckhash_sys);
   208  		addrmem += 64*sizeof *e;
   209  		for(i=0; i+1<64; i++)
   210  			e[i].next = &e[i+1];
   211  		e[63].next = nil;
   212  	}
   213  	addrfree = e->next;
   214  	e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
   215  	e->b = b;
   216  	h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1);	// entry in dense is top 8 bits of low 20.
   217  	e->next = ah->dense[h];
   218  	ah->dense[h] = e;
   219  }
   220  
   221  // Get the bucket associated with addr and clear the association.
   222  static Bucket*
   223  getaddrbucket(uintptr addr)
   224  {
   225  	uint32 h;
   226  	AddrHash *ah;
   227  	AddrEntry *e, **l;
   228  	Bucket *b;
   229  
   230  	h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
   231  	for(ah=addrhash[h]; ah; ah=ah->next)
   232  		if(ah->addr == (addr>>AddrHashShift))
   233  			goto found;
   234  	return nil;
   235  
   236  found:
   237  	h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1);	// entry in dense is top 8 bits of low 20.
   238  	for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
   239  		if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
   240  			*l = e->next;
   241  			b = e->b;
   242  			e->next = addrfree;
   243  			addrfree = e;
   244  			return b;
   245  		}
   246  	}
   247  	return nil;
   248  }
   249  
   250  // Called by malloc to record a profiled block.
   251  void
   252  runtime·MProf_Malloc(void *p, uintptr size)
   253  {
   254  	int32 nstk;
   255  	uintptr stk[32];
   256  	Bucket *b;
   257  
   258  	nstk = runtime·callers(1, stk, 32);
   259  	runtime·lock(&proflock);
   260  	b = stkbucket(MProf, stk, nstk, true);
   261  	b->recent_allocs++;
   262  	b->recent_alloc_bytes += size;
   263  	setaddrbucket((uintptr)p, b);
   264  	runtime·unlock(&proflock);
   265  }
   266  
   267  // Called when freeing a profiled block.
   268  void
   269  runtime·MProf_Free(void *p, uintptr size)
   270  {
   271  	Bucket *b;
   272  
   273  	runtime·lock(&proflock);
   274  	b = getaddrbucket((uintptr)p);
   275  	if(b != nil) {
   276  		b->recent_frees++;
   277  		b->recent_free_bytes += size;
   278  	}
   279  	runtime·unlock(&proflock);
   280  }
   281  
   282  int64 runtime·blockprofilerate;  // in CPU ticks
   283  
   284  void
   285  runtime·SetBlockProfileRate(intgo rate)
   286  {
   287  	int64 r;
   288  
   289  	if(rate <= 0)
   290  		r = 0;  // disable profiling
   291  	else {
   292  		// convert ns to cycles, use float64 to prevent overflow during multiplication
   293  		r = (float64)rate*runtime·tickspersecond()/(1000*1000*1000);
   294  		if(r == 0)
   295  			r = 1;
   296  	}
   297  	runtime·atomicstore64((uint64*)&runtime·blockprofilerate, r);
   298  }
   299  
   300  void
   301  runtime·blockevent(int64 cycles, int32 skip)
   302  {
   303  	int32 nstk;
   304  	int64 rate;
   305  	uintptr stk[32];
   306  	Bucket *b;
   307  
   308  	if(cycles <= 0)
   309  		return;
   310  	rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate);
   311  	if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles))
   312  		return;
   313  
   314  	nstk = runtime·callers(skip, stk, 32);
   315  	runtime·lock(&proflock);
   316  	b = stkbucket(BProf, stk, nstk, true);
   317  	b->count++;
   318  	b->cycles += cycles;
   319  	runtime·unlock(&proflock);
   320  }
   321  
   322  // Go interface to profile data.  (Declared in debug.go)
   323  
   324  // Must match MemProfileRecord in debug.go.
   325  typedef struct Record Record;
   326  struct Record {
   327  	int64 alloc_bytes, free_bytes;
   328  	int64 alloc_objects, free_objects;
   329  	uintptr stk[32];
   330  };
   331  
   332  // Write b's data to r.
   333  static void
   334  record(Record *r, Bucket *b)
   335  {
   336  	int32 i;
   337  
   338  	r->alloc_bytes = b->alloc_bytes;
   339  	r->free_bytes = b->free_bytes;
   340  	r->alloc_objects = b->allocs;
   341  	r->free_objects = b->frees;
   342  	for(i=0; i<b->nstk && i<nelem(r->stk); i++)
   343  		r->stk[i] = b->stk[i];
   344  	for(; i<nelem(r->stk); i++)
   345  		r->stk[i] = 0;
   346  }
   347  
   348  func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
   349  	Bucket *b;
   350  	Record *r;
   351  	bool clear;
   352  
   353  	runtime·lock(&proflock);
   354  	n = 0;
   355  	clear = true;
   356  	for(b=mbuckets; b; b=b->allnext) {
   357  		if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
   358  			n++;
   359  		if(b->allocs != 0 || b->frees != 0)
   360  			clear = false;
   361  	}
   362  	if(clear) {
   363  		// Absolutely no data, suggesting that a garbage collection
   364  		// has not yet happened. In order to allow profiling when
   365  		// garbage collection is disabled from the beginning of execution,
   366  		// accumulate stats as if a GC just happened, and recount buckets.
   367  		MProf_GC();
   368  		n = 0;
   369  		for(b=mbuckets; b; b=b->allnext)
   370  			if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
   371  				n++;
   372  	}
   373  	ok = false;
   374  	if(n <= p.len) {
   375  		ok = true;
   376  		r = (Record*)p.array;
   377  		for(b=mbuckets; b; b=b->allnext)
   378  			if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
   379  				record(r++, b);
   380  	}
   381  	runtime·unlock(&proflock);
   382  }
   383  
   384  // Must match BlockProfileRecord in debug.go.
   385  typedef struct BRecord BRecord;
   386  struct BRecord {
   387  	int64 count;
   388  	int64 cycles;
   389  	uintptr stk[32];
   390  };
   391  
   392  func BlockProfile(p Slice) (n int, ok bool) {
   393  	Bucket *b;
   394  	BRecord *r;
   395  	int32 i;
   396  
   397  	runtime·lock(&proflock);
   398  	n = 0;
   399  	for(b=bbuckets; b; b=b->allnext)
   400  		n++;
   401  	ok = false;
   402  	if(n <= p.len) {
   403  		ok = true;
   404  		r = (BRecord*)p.array;
   405  		for(b=bbuckets; b; b=b->allnext, r++) {
   406  			r->count = b->count;
   407  			r->cycles = b->cycles;
   408  			for(i=0; i<b->nstk && i<nelem(r->stk); i++)
   409  				r->stk[i] = b->stk[i];
   410  			for(; i<nelem(r->stk); i++)
   411  				r->stk[i] = 0;			
   412  		}
   413  	}
   414  	runtime·unlock(&proflock);
   415  }
   416  
   417  // Must match StackRecord in debug.go.
   418  typedef struct TRecord TRecord;
   419  struct TRecord {
   420  	uintptr stk[32];
   421  };
   422  
   423  func ThreadCreateProfile(p Slice) (n int, ok bool) {
   424  	TRecord *r;
   425  	M *first, *mp;
   426  	
   427  	first = runtime·atomicloadp(&runtime·allm);
   428  	n = 0;
   429  	for(mp=first; mp; mp=mp->alllink)
   430  		n++;
   431  	ok = false;
   432  	if(n <= p.len) {
   433  		ok = true;
   434  		r = (TRecord*)p.array;
   435  		for(mp=first; mp; mp=mp->alllink) {
   436  			runtime·memmove(r->stk, mp->createstack, sizeof r->stk);
   437  			r++;
   438  		}
   439  	}
   440  }
   441  
   442  func Stack(b Slice, all bool) (n int) {
   443  	uintptr pc, sp;
   444  	
   445  	sp = runtime·getcallersp(&b);
   446  	pc = (uintptr)runtime·getcallerpc(&b);
   447  
   448  	if(all) {
   449  		runtime·semacquire(&runtime·worldsema, false);
   450  		m->gcing = 1;
   451  		runtime·stoptheworld();
   452  	}
   453  
   454  	if(b.len == 0)
   455  		n = 0;
   456  	else{
   457  		g->writebuf = (byte*)b.array;
   458  		g->writenbuf = b.len;
   459  		runtime·goroutineheader(g);
   460  		runtime·traceback(pc, sp, 0, g);
   461  		if(all)
   462  			runtime·tracebackothers(g);
   463  		n = b.len - g->writenbuf;
   464  		g->writebuf = nil;
   465  		g->writenbuf = 0;
   466  	}
   467  	
   468  	if(all) {
   469  		m->gcing = 0;
   470  		runtime·semrelease(&runtime·worldsema);
   471  		runtime·starttheworld();
   472  	}
   473  }
   474  
   475  static void
   476  saveg(uintptr pc, uintptr sp, G *gp, TRecord *r)
   477  {
   478  	int32 n;
   479  	
   480  	n = runtime·gentraceback(pc, sp, 0, gp, 0, r->stk, nelem(r->stk), nil, nil, false);
   481  	if(n < nelem(r->stk))
   482  		r->stk[n] = 0;
   483  }
   484  
   485  func GoroutineProfile(b Slice) (n int, ok bool) {
   486  	uintptr pc, sp;
   487  	TRecord *r;
   488  	G *gp;
   489  	
   490  	sp = runtime·getcallersp(&b);
   491  	pc = (uintptr)runtime·getcallerpc(&b);
   492  	
   493  	ok = false;
   494  	n = runtime·gcount();
   495  	if(n <= b.len) {
   496  		runtime·semacquire(&runtime·worldsema, false);
   497  		m->gcing = 1;
   498  		runtime·stoptheworld();
   499  
   500  		n = runtime·gcount();
   501  		if(n <= b.len) {
   502  			ok = true;
   503  			r = (TRecord*)b.array;
   504  			saveg(pc, sp, g, r++);
   505  			for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
   506  				if(gp == g || gp->status == Gdead)
   507  					continue;
   508  				saveg(~(uintptr)0, ~(uintptr)0, gp, r++);
   509  			}
   510  		}
   511  	
   512  		m->gcing = 0;
   513  		runtime·semrelease(&runtime·worldsema);
   514  		runtime·starttheworld();
   515  	}
   516  }
   517  
   518  void
   519  runtime·mprofinit(void)
   520  {
   521  	addrhash = runtime·persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0, &mstats.buckhash_sys);
   522  }