github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/mprof.goc (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Malloc profiling.
     6  // Patterned after tcmalloc's algorithms; shorter code.
     7  
     8  package runtime
     9  #include "runtime.h"
    10  #include "arch_GOARCH.h"
    11  #include "malloc.h"
    12  #include "defs_GOOS_GOARCH.h"
    13  #include "type.h"
    14  
    15  // NOTE(rsc): Everything here could use cas if contention became an issue.
    16  static Lock proflock, alloclock;
    17  
    18  // All memory allocations are local and do not escape outside of the profiler.
    19  // The profiler is forbidden from referring to garbage-collected memory.
    20  
    21  static byte *pool;        // memory allocation pool
    22  static uintptr poolfree;  // number of bytes left in the pool
    23  enum {
    24  	Chunk = 32*PageSize,  // initial size of the pool
    25  };
    26  
    27  // Memory allocation local to this file.
    28  // There is no way to return the allocated memory back to the OS.
    29  static void*
    30  allocate(uintptr size)
    31  {
    32  	void *v;
    33  
    34  	if(size == 0)
    35  		return nil;
    36  
    37  	if(size >= Chunk/2)
    38  		return runtime·SysAlloc(size);
    39  
    40  	runtime·lock(&alloclock);
    41  	if(size > poolfree) {
    42  		pool = runtime·SysAlloc(Chunk);
    43  		if(pool == nil)
    44  			runtime·throw("runtime: cannot allocate memory");
    45  		poolfree = Chunk;
    46  	}
    47  	v = pool;
    48  	pool += size;
    49  	poolfree -= size;
    50  	runtime·unlock(&alloclock);
    51  	return v;
    52  }
    53  
    54  enum { MProf, BProf };  // profile types
    55  
    56  // Per-call-stack profiling information.
    57  // Lookup by hashing call stack into a linked-list hash table.
    58  typedef struct Bucket Bucket;
    59  struct Bucket
    60  {
    61  	Bucket	*next;	// next in hash list
    62  	Bucket	*allnext;	// next in list of all mbuckets/bbuckets
    63  	int32	typ;
    64  	// Generally unions can break precise GC,
    65  	// this one is fine because it does not contain pointers.
    66  	union
    67  	{
    68  		struct  // typ == MProf
    69  		{
    70  			uintptr	allocs;
    71  			uintptr	frees;
    72  			uintptr	alloc_bytes;
    73  			uintptr	free_bytes;
    74  			uintptr	recent_allocs;  // since last gc
    75  			uintptr	recent_frees;
    76  			uintptr	recent_alloc_bytes;
    77  			uintptr	recent_free_bytes;
    78  		};
    79  		struct  // typ == BProf
    80  		{
    81  			int64	count;
    82  			int64	cycles;
    83  		};
    84  	};
    85  	uintptr	hash;
    86  	uintptr	nstk;
    87  	uintptr	stk[1];
    88  };
    89  enum {
    90  	BuckHashSize = 179999,
    91  };
    92  static Bucket **buckhash;
    93  static Bucket *mbuckets;  // memory profile buckets
    94  static Bucket *bbuckets;  // blocking profile buckets
    95  static uintptr bucketmem;
    96  
    97  // Return the bucket for stk[0:nstk], allocating new bucket if needed.
    98  static Bucket*
    99  stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
   100  {
   101  	int32 i;
   102  	uintptr h;
   103  	Bucket *b;
   104  
   105  	if(buckhash == nil) {
   106  		buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0]);
   107  		if(buckhash == nil)
   108  			runtime·throw("runtime: cannot allocate memory");
   109  		mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
   110  	}
   111  
   112  	// Hash stack.
   113  	h = 0;
   114  	for(i=0; i<nstk; i++) {
   115  		h += stk[i];
   116  		h += h<<10;
   117  		h ^= h>>6;
   118  	}
   119  	h += h<<3;
   120  	h ^= h>>11;
   121  
   122  	i = h%BuckHashSize;
   123  	for(b = buckhash[i]; b; b=b->next)
   124  		if(b->typ == typ && b->hash == h && b->nstk == nstk &&
   125  		   runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
   126  			return b;
   127  
   128  	if(!alloc)
   129  		return nil;
   130  
   131  	b = allocate(sizeof *b + nstk*sizeof stk[0]);
   132  	if(b == nil)
   133  		runtime·throw("runtime: cannot allocate memory");
   134  	bucketmem += sizeof *b + nstk*sizeof stk[0];
   135  	runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
   136  	b->typ = typ;
   137  	b->hash = h;
   138  	b->nstk = nstk;
   139  	b->next = buckhash[i];
   140  	buckhash[i] = b;
   141  	if(typ == MProf) {
   142  		b->allnext = mbuckets;
   143  		mbuckets = b;
   144  	} else {
   145  		b->allnext = bbuckets;
   146  		bbuckets = b;
   147  	}
   148  	return b;
   149  }
   150  
   151  static void
   152  MProf_GC(void)
   153  {
   154  	Bucket *b;
   155  
   156  	for(b=mbuckets; b; b=b->allnext) {
   157  		b->allocs += b->recent_allocs;
   158  		b->frees += b->recent_frees;
   159  		b->alloc_bytes += b->recent_alloc_bytes;
   160  		b->free_bytes += b->recent_free_bytes;
   161  		b->recent_allocs = 0;
   162  		b->recent_frees = 0;
   163  		b->recent_alloc_bytes = 0;
   164  		b->recent_free_bytes = 0;
   165  	}
   166  }
   167  
   168  // Record that a gc just happened: all the 'recent' statistics are now real.
   169  void
   170  runtime·MProf_GC(void)
   171  {
   172  	runtime·lock(&proflock);
   173  	MProf_GC();
   174  	runtime·unlock(&proflock);
   175  }
   176  
   177  // Map from pointer to Bucket* that allocated it.
   178  // Three levels:
   179  //	Linked-list hash table for top N-AddrHashShift bits.
   180  //	Array index for next AddrDenseBits bits.
   181  //	Linked list for next AddrHashShift-AddrDenseBits bits.
   182  // This is more efficient than using a general map,
   183  // because of the typical clustering of the pointer keys.
   184  
   185  typedef struct AddrHash AddrHash;
   186  typedef struct AddrEntry AddrEntry;
   187  
   188  enum {
   189  	AddrHashBits = 12,	// good for 4GB of used address space
   190  	AddrHashShift = 20,	// each AddrHash knows about 1MB of address space
   191  	AddrDenseBits = 8,	// good for a profiling rate of 4096 bytes
   192  };
   193  
   194  struct AddrHash
   195  {
   196  	AddrHash *next;	// next in top-level hash table linked list
   197  	uintptr addr;	// addr>>20
   198  	AddrEntry *dense[1<<AddrDenseBits];
   199  };
   200  
   201  struct AddrEntry
   202  {
   203  	AddrEntry *next;	// next in bottom-level linked list
   204  	uint32 addr;
   205  	Bucket *b;
   206  };
   207  
   208  static AddrHash **addrhash;	// points to (AddrHash*)[1<<AddrHashBits]
   209  static AddrEntry *addrfree;
   210  static uintptr addrmem;
   211  
   212  // Multiplicative hash function:
   213  // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
   214  // This is a good multiplier as suggested in CLR, Knuth.  The hash
   215  // value is taken to be the top AddrHashBits bits of the bottom 32 bits
   216  // of the multiplied value.
   217  enum {
   218  	HashMultiplier = 2654435769U
   219  };
   220  
   221  // Set the bucket associated with addr to b.
   222  static void
   223  setaddrbucket(uintptr addr, Bucket *b)
   224  {
   225  	int32 i;
   226  	uint32 h;
   227  	AddrHash *ah;
   228  	AddrEntry *e;
   229  
   230  	h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
   231  	for(ah=addrhash[h]; ah; ah=ah->next)
   232  		if(ah->addr == (addr>>AddrHashShift))
   233  			goto found;
   234  
   235  	ah = allocate(sizeof *ah);
   236  	addrmem += sizeof *ah;
   237  	ah->next = addrhash[h];
   238  	ah->addr = addr>>AddrHashShift;
   239  	addrhash[h] = ah;
   240  
   241  found:
   242  	if((e = addrfree) == nil) {
   243  		e = allocate(64*sizeof *e);
   244  		addrmem += 64*sizeof *e;
   245  		for(i=0; i+1<64; i++)
   246  			e[i].next = &e[i+1];
   247  		e[63].next = nil;
   248  	}
   249  	addrfree = e->next;
   250  	e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
   251  	e->b = b;
   252  	h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1);	// entry in dense is top 8 bits of low 20.
   253  	e->next = ah->dense[h];
   254  	ah->dense[h] = e;
   255  }
   256  
   257  // Get the bucket associated with addr and clear the association.
   258  static Bucket*
   259  getaddrbucket(uintptr addr)
   260  {
   261  	uint32 h;
   262  	AddrHash *ah;
   263  	AddrEntry *e, **l;
   264  	Bucket *b;
   265  
   266  	h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
   267  	for(ah=addrhash[h]; ah; ah=ah->next)
   268  		if(ah->addr == (addr>>AddrHashShift))
   269  			goto found;
   270  	return nil;
   271  
   272  found:
   273  	h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1);	// entry in dense is top 8 bits of low 20.
   274  	for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
   275  		if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
   276  			*l = e->next;
   277  			b = e->b;
   278  			e->next = addrfree;
   279  			addrfree = e;
   280  			return b;
   281  		}
   282  	}
   283  	return nil;
   284  }
   285  
   286  // Called by malloc to record a profiled block.
   287  void
   288  runtime·MProf_Malloc(void *p, uintptr size)
   289  {
   290  	int32 nstk;
   291  	uintptr stk[32];
   292  	Bucket *b;
   293  
   294  	if(m->nomemprof > 0)
   295  		return;
   296  
   297  	m->nomemprof++;
   298  	nstk = runtime·callers(1, stk, 32);
   299  	runtime·lock(&proflock);
   300  	b = stkbucket(MProf, stk, nstk, true);
   301  	b->recent_allocs++;
   302  	b->recent_alloc_bytes += size;
   303  	setaddrbucket((uintptr)p, b);
   304  	runtime·unlock(&proflock);
   305  	m->nomemprof--;
   306  }
   307  
   308  // Called when freeing a profiled block.
   309  void
   310  runtime·MProf_Free(void *p, uintptr size)
   311  {
   312  	Bucket *b;
   313  
   314  	if(m->nomemprof > 0)
   315  		return;
   316  
   317  	m->nomemprof++;
   318  	runtime·lock(&proflock);
   319  	b = getaddrbucket((uintptr)p);
   320  	if(b != nil) {
   321  		b->recent_frees++;
   322  		b->recent_free_bytes += size;
   323  	}
   324  	runtime·unlock(&proflock);
   325  	m->nomemprof--;
   326  }
   327  
   328  int64 runtime·blockprofilerate;  // in CPU ticks
   329  
   330  void
   331  runtime·SetBlockProfileRate(intgo rate)
   332  {
   333  	runtime·atomicstore64((uint64*)&runtime·blockprofilerate, rate * runtime·tickspersecond() / (1000*1000*1000));
   334  }
   335  
   336  void
   337  runtime·blockevent(int64 cycles, int32 skip)
   338  {
   339  	int32 nstk;
   340  	int64 rate;
   341  	uintptr stk[32];
   342  	Bucket *b;
   343  
   344  	if(cycles <= 0)
   345  		return;
   346  	rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate);
   347  	if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles))
   348  		return;
   349  
   350  	nstk = runtime·callers(skip, stk, 32);
   351  	runtime·lock(&proflock);
   352  	b = stkbucket(BProf, stk, nstk, true);
   353  	b->count++;
   354  	b->cycles += cycles;
   355  	runtime·unlock(&proflock);
   356  }
   357  
   358  // Go interface to profile data.  (Declared in debug.go)
   359  
   360  // Must match MemProfileRecord in debug.go.
   361  typedef struct Record Record;
   362  struct Record {
   363  	int64 alloc_bytes, free_bytes;
   364  	int64 alloc_objects, free_objects;
   365  	uintptr stk[32];
   366  };
   367  
   368  // Write b's data to r.
   369  static void
   370  record(Record *r, Bucket *b)
   371  {
   372  	int32 i;
   373  
   374  	r->alloc_bytes = b->alloc_bytes;
   375  	r->free_bytes = b->free_bytes;
   376  	r->alloc_objects = b->allocs;
   377  	r->free_objects = b->frees;
   378  	for(i=0; i<b->nstk && i<nelem(r->stk); i++)
   379  		r->stk[i] = b->stk[i];
   380  	for(; i<nelem(r->stk); i++)
   381  		r->stk[i] = 0;
   382  }
   383  
   384  func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
   385  	Bucket *b;
   386  	Record *r;
   387  	bool clear;
   388  
   389  	runtime·lock(&proflock);
   390  	n = 0;
   391  	clear = true;
   392  	for(b=mbuckets; b; b=b->allnext) {
   393  		if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
   394  			n++;
   395  		if(b->allocs != 0 || b->frees != 0)
   396  			clear = false;
   397  	}
   398  	if(clear) {
   399  		// Absolutely no data, suggesting that a garbage collection
   400  		// has not yet happened. In order to allow profiling when
   401  		// garbage collection is disabled from the beginning of execution,
   402  		// accumulate stats as if a GC just happened, and recount buckets.
   403  		MProf_GC();
   404  		n = 0;
   405  		for(b=mbuckets; b; b=b->allnext)
   406  			if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
   407  				n++;
   408  	}
   409  	ok = false;
   410  	if(n <= p.len) {
   411  		ok = true;
   412  		r = (Record*)p.array;
   413  		for(b=mbuckets; b; b=b->allnext)
   414  			if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
   415  				record(r++, b);
   416  	}
   417  	runtime·unlock(&proflock);
   418  }
   419  
   420  // Must match BlockProfileRecord in debug.go.
   421  typedef struct BRecord BRecord;
   422  struct BRecord {
   423  	int64 count;
   424  	int64 cycles;
   425  	uintptr stk[32];
   426  };
   427  
   428  func BlockProfile(p Slice) (n int, ok bool) {
   429  	Bucket *b;
   430  	BRecord *r;
   431  	int32 i;
   432  
   433  	runtime·lock(&proflock);
   434  	n = 0;
   435  	for(b=bbuckets; b; b=b->allnext)
   436  		n++;
   437  	ok = false;
   438  	if(n <= p.len) {
   439  		ok = true;
   440  		r = (BRecord*)p.array;
   441  		for(b=bbuckets; b; b=b->allnext, r++) {
   442  			r->count = b->count;
   443  			r->cycles = b->cycles;
   444  			for(i=0; i<b->nstk && i<nelem(r->stk); i++)
   445  				r->stk[i] = b->stk[i];
   446  			for(; i<nelem(r->stk); i++)
   447  				r->stk[i] = 0;			
   448  		}
   449  	}
   450  	runtime·unlock(&proflock);
   451  }
   452  
   453  // Must match StackRecord in debug.go.
   454  typedef struct TRecord TRecord;
   455  struct TRecord {
   456  	uintptr stk[32];
   457  };
   458  
   459  func ThreadCreateProfile(p Slice) (n int, ok bool) {
   460  	TRecord *r;
   461  	M *first, *mp;
   462  	
   463  	first = runtime·atomicloadp(&runtime·allm);
   464  	n = 0;
   465  	for(mp=first; mp; mp=mp->alllink)
   466  		n++;
   467  	ok = false;
   468  	if(n <= p.len) {
   469  		ok = true;
   470  		r = (TRecord*)p.array;
   471  		for(mp=first; mp; mp=mp->alllink) {
   472  			runtime·memmove(r->stk, mp->createstack, sizeof r->stk);
   473  			r++;
   474  		}
   475  	}
   476  }
   477  
   478  func Stack(b Slice, all bool) (n int) {
   479  	byte *pc, *sp;
   480  	
   481  	sp = runtime·getcallersp(&b);
   482  	pc = runtime·getcallerpc(&b);
   483  
   484  	if(all) {
   485  		runtime·semacquire(&runtime·worldsema);
   486  		m->gcing = 1;
   487  		runtime·stoptheworld();
   488  	}
   489  
   490  	if(b.len == 0)
   491  		n = 0;
   492  	else{
   493  		g->writebuf = (byte*)b.array;
   494  		g->writenbuf = b.len;
   495  		runtime·goroutineheader(g);
   496  		runtime·traceback(pc, sp, 0, g);
   497  		if(all)
   498  			runtime·tracebackothers(g);
   499  		n = b.len - g->writenbuf;
   500  		g->writebuf = nil;
   501  		g->writenbuf = 0;
   502  	}
   503  	
   504  	if(all) {
   505  		m->gcing = 0;
   506  		runtime·semrelease(&runtime·worldsema);
   507  		runtime·starttheworld();
   508  	}
   509  }
   510  
   511  static void
   512  saveg(byte *pc, byte *sp, G *gp, TRecord *r)
   513  {
   514  	int32 n;
   515  	
   516  	n = runtime·gentraceback(pc, sp, 0, gp, 0, r->stk, nelem(r->stk), nil, nil);
   517  	if(n < nelem(r->stk))
   518  		r->stk[n] = 0;
   519  }
   520  
   521  func GoroutineProfile(b Slice) (n int, ok bool) {
   522  	byte *pc, *sp;
   523  	TRecord *r;
   524  	G *gp;
   525  	
   526  	sp = runtime·getcallersp(&b);
   527  	pc = runtime·getcallerpc(&b);
   528  	
   529  	ok = false;
   530  	n = runtime·gcount();
   531  	if(n <= b.len) {
   532  		runtime·semacquire(&runtime·worldsema);
   533  		m->gcing = 1;
   534  		runtime·stoptheworld();
   535  
   536  		n = runtime·gcount();
   537  		if(n <= b.len) {
   538  			ok = true;
   539  			r = (TRecord*)b.array;
   540  			saveg(pc, sp, g, r++);
   541  			for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
   542  				if(gp == g || gp->status == Gdead)
   543  					continue;
   544  				saveg(gp->sched.pc, (byte*)gp->sched.sp, gp, r++);
   545  			}
   546  		}
   547  	
   548  		m->gcing = 0;
   549  		runtime·semrelease(&runtime·worldsema);
   550  		runtime·starttheworld();
   551  	}
   552  }
   553  
   554  void
   555  runtime·mprofinit(void)
   556  {
   557  	addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash);
   558  }