github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/cmd/prof/main.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !plan9
     6  
     7  #include <u.h>
     8  #include <time.h>
     9  #include <libc.h>
    10  #include <bio.h>
    11  #include <ctype.h>
    12  
    13  #define Ureg Ureg_amd64
    14  	#include <ureg_amd64.h>
    15  #undef Ureg
    16  #define Ureg Ureg_x86
    17  	#include <ureg_x86.h>
    18  #undef Ureg
    19  #include <mach.h>
    20  
    21  char* file = "6.out";
    22  static Fhdr fhdr;
    23  int have_syms;
    24  int fd;
    25  struct Ureg_amd64 ureg_amd64;
    26  struct Ureg_x86 ureg_x86;
    27  int total_sec = 0;
    28  int delta_msec = 100;
    29  int nsample;
    30  int nsamplethread;
    31  
    32  // pprof data, stored as sequences of N followed by N PC values.
    33  // See http://code.google.com/p/google-perftools .
    34  uvlong	*ppdata;	// traces
    35  Biobuf*	pproffd;	// file descriptor to write trace info
    36  long	ppstart;	// start position of current trace
    37  long	nppdata;	// length of data
    38  long	ppalloc;	// size of allocated data
    39  char	ppmapdata[10*1024];	// the map information for the output file
    40  
    41  // output formats
    42  int pprof;	// print pprof output to named file
    43  int functions;	// print functions
    44  int histograms;	// print histograms
    45  int linenums;	// print file and line numbers rather than function names
    46  int registers;	// print registers
    47  int stacks;		// print stack traces
    48  
    49  int pid;		// main process pid
    50  
    51  int nthread;	// number of threads
    52  int thread[32];	// thread pids
    53  Map *map[32];	// thread maps
    54  
    55  void
    56  Usage(void)
    57  {
    58  	fprint(2, "Usage: prof -p pid [-t total_secs] [-d delta_msec]\n");
    59  	fprint(2, "       prof [-t total_secs] [-d delta_msec] 6.out args ...\n");
    60  	fprint(2, "\tformats (default -h):\n");
    61  	fprint(2, "\t\t-P file.prof: write [c]pprof output to file.prof\n");
    62  	fprint(2, "\t\t-h: histograms\n");
    63  	fprint(2, "\t\t-f: dynamic functions\n");
    64  	fprint(2, "\t\t-l: dynamic file and line numbers\n");
    65  	fprint(2, "\t\t-r: dynamic registers\n");
    66  	fprint(2, "\t\t-s: dynamic function stack traces\n");
    67  	fprint(2, "\t\t-hs: include stack info in histograms\n");
    68  	exit(2);
    69  }
    70  
    71  typedef struct PC PC;
    72  struct PC {
    73  	uvlong pc;
    74  	uvlong callerpc;
    75  	unsigned int count;
    76  	PC* next;
    77  };
    78  
    79  enum {
    80  	Ncounters = 256
    81  };
    82  
    83  PC *counters[Ncounters];
    84  
    85  // Set up by setarch() to make most of the code architecture-independent.
    86  typedef struct Arch Arch;
    87  struct Arch {
    88  	char*	name;
    89  	void	(*regprint)(void);
    90  	int	(*getregs)(Map*);
    91  	int	(*getPC)(Map*);
    92  	int	(*getSP)(Map*);
    93  	uvlong	(*uregPC)(void);
    94  	uvlong	(*uregSP)(void);
    95  	void	(*ppword)(uvlong w);
    96  };
    97  
    98  void
    99  amd64_regprint(void)
   100  {
   101  	fprint(2, "ax\t0x%llux\n", ureg_amd64.ax);
   102  	fprint(2, "bx\t0x%llux\n", ureg_amd64.bx);
   103  	fprint(2, "cx\t0x%llux\n", ureg_amd64.cx);
   104  	fprint(2, "dx\t0x%llux\n", ureg_amd64.dx);
   105  	fprint(2, "si\t0x%llux\n", ureg_amd64.si);
   106  	fprint(2, "di\t0x%llux\n", ureg_amd64.di);
   107  	fprint(2, "bp\t0x%llux\n", ureg_amd64.bp);
   108  	fprint(2, "r8\t0x%llux\n", ureg_amd64.r8);
   109  	fprint(2, "r9\t0x%llux\n", ureg_amd64.r9);
   110  	fprint(2, "r10\t0x%llux\n", ureg_amd64.r10);
   111  	fprint(2, "r11\t0x%llux\n", ureg_amd64.r11);
   112  	fprint(2, "r12\t0x%llux\n", ureg_amd64.r12);
   113  	fprint(2, "r13\t0x%llux\n", ureg_amd64.r13);
   114  	fprint(2, "r14\t0x%llux\n", ureg_amd64.r14);
   115  	fprint(2, "r15\t0x%llux\n", ureg_amd64.r15);
   116  	fprint(2, "ds\t0x%llux\n", ureg_amd64.ds);
   117  	fprint(2, "es\t0x%llux\n", ureg_amd64.es);
   118  	fprint(2, "fs\t0x%llux\n", ureg_amd64.fs);
   119  	fprint(2, "gs\t0x%llux\n", ureg_amd64.gs);
   120  	fprint(2, "type\t0x%llux\n", ureg_amd64.type);
   121  	fprint(2, "error\t0x%llux\n", ureg_amd64.error);
   122  	fprint(2, "pc\t0x%llux\n", ureg_amd64.ip);
   123  	fprint(2, "cs\t0x%llux\n", ureg_amd64.cs);
   124  	fprint(2, "flags\t0x%llux\n", ureg_amd64.flags);
   125  	fprint(2, "sp\t0x%llux\n", ureg_amd64.sp);
   126  	fprint(2, "ss\t0x%llux\n", ureg_amd64.ss);
   127  }
   128  
   129  int
   130  amd64_getregs(Map *map)
   131  {
   132  	int i;
   133  	union {
   134  		uvlong regs[1];
   135  		struct Ureg_amd64 ureg;
   136  	} u;
   137  
   138  	for(i = 0; i < sizeof ureg_amd64; i+=8) {
   139  		if(get8(map, (uvlong)i, &u.regs[i/8]) < 0)
   140  			return -1;
   141  	}
   142  	ureg_amd64 = u.ureg;
   143  	return 0;
   144  }
   145  
   146  int
   147  amd64_getPC(Map *map)
   148  {
   149  	uvlong x;
   150  	int r;
   151  
   152  	r = get8(map, offsetof(struct Ureg_amd64, ip), &x);
   153  	ureg_amd64.ip = x;
   154  	return r;
   155  }
   156  
   157  int
   158  amd64_getSP(Map *map)
   159  {
   160  	uvlong x;
   161  	int r;
   162  
   163  	r = get8(map, offsetof(struct Ureg_amd64, sp), &x);
   164  	ureg_amd64.sp = x;
   165  	return r;
   166  }
   167  
   168  uvlong
   169  amd64_uregPC(void)
   170  {
   171  	return ureg_amd64.ip;
   172  }
   173  
   174  uvlong
   175  amd64_uregSP(void)
   176  {
   177  	return ureg_amd64.sp;
   178  }
   179  
   180  void
   181  amd64_ppword(uvlong w)
   182  {
   183  	uchar buf[8];
   184  
   185  	buf[0] = w;
   186  	buf[1] = w >> 8;
   187  	buf[2] = w >> 16;
   188  	buf[3] = w >> 24;
   189  	buf[4] = w >> 32;
   190  	buf[5] = w >> 40;
   191  	buf[6] = w >> 48;
   192  	buf[7] = w >> 56;
   193  	Bwrite(pproffd, buf, 8);
   194  }
   195  
   196  void
   197  x86_regprint(void)
   198  {
   199  	fprint(2, "ax\t0x%ux\n", ureg_x86.ax);
   200  	fprint(2, "bx\t0x%ux\n", ureg_x86.bx);
   201  	fprint(2, "cx\t0x%ux\n", ureg_x86.cx);
   202  	fprint(2, "dx\t0x%ux\n", ureg_x86.dx);
   203  	fprint(2, "si\t0x%ux\n", ureg_x86.si);
   204  	fprint(2, "di\t0x%ux\n", ureg_x86.di);
   205  	fprint(2, "bp\t0x%ux\n", ureg_x86.bp);
   206  	fprint(2, "ds\t0x%ux\n", ureg_x86.ds);
   207  	fprint(2, "es\t0x%ux\n", ureg_x86.es);
   208  	fprint(2, "fs\t0x%ux\n", ureg_x86.fs);
   209  	fprint(2, "gs\t0x%ux\n", ureg_x86.gs);
   210  	fprint(2, "cs\t0x%ux\n", ureg_x86.cs);
   211  	fprint(2, "flags\t0x%ux\n", ureg_x86.flags);
   212  	fprint(2, "pc\t0x%ux\n", ureg_x86.pc);
   213  	fprint(2, "sp\t0x%ux\n", ureg_x86.sp);
   214  	fprint(2, "ss\t0x%ux\n", ureg_x86.ss);
   215  }
   216  
   217  int
   218  x86_getregs(Map *map)
   219  {
   220  	int i;
   221  
   222  	for(i = 0; i < sizeof ureg_x86; i+=4) {
   223  		if(get4(map, (uvlong)i, &((uint32*)&ureg_x86)[i/4]) < 0)
   224  			return -1;
   225  	}
   226  	return 0;
   227  }
   228  
   229  int
   230  x86_getPC(Map* map)
   231  {
   232  	return get4(map, offsetof(struct Ureg_x86, pc), &ureg_x86.pc);
   233  }
   234  
   235  int
   236  x86_getSP(Map* map)
   237  {
   238  	return get4(map, offsetof(struct Ureg_x86, sp), &ureg_x86.sp);
   239  }
   240  
   241  uvlong
   242  x86_uregPC(void)
   243  {
   244  	return (uvlong)ureg_x86.pc;
   245  }
   246  
   247  uvlong
   248  x86_uregSP(void)
   249  {
   250  	return (uvlong)ureg_x86.sp;
   251  }
   252  
   253  void
   254  x86_ppword(uvlong w)
   255  {
   256  	uchar buf[4];
   257  
   258  	buf[0] = w;
   259  	buf[1] = w >> 8;
   260  	buf[2] = w >> 16;
   261  	buf[3] = w >> 24;
   262  	Bwrite(pproffd, buf, 4);
   263  }
   264  
   265  Arch archtab[] = {
   266  	{
   267  		"amd64",
   268  		amd64_regprint,
   269  		amd64_getregs,
   270  		amd64_getPC,
   271  		amd64_getSP,
   272  		amd64_uregPC,
   273  		amd64_uregSP,
   274  		amd64_ppword,
   275  	},
   276  	{
   277  		"386",
   278  		x86_regprint,
   279  		x86_getregs,
   280  		x86_getPC,
   281  		x86_getSP,
   282  		x86_uregPC,
   283  		x86_uregSP,
   284  		x86_ppword,
   285  	},
   286  	{
   287  		nil
   288  	}
   289  };
   290  
   291  Arch *arch;
   292  
   293  int
   294  setarch(void)
   295  {
   296  	int i;
   297  
   298  	if(mach != nil) {
   299  		for(i = 0; archtab[i].name != nil; i++) {
   300  			if (strcmp(mach->name, archtab[i].name) == 0) {
   301  				arch = &archtab[i];
   302  				return 0;
   303  			}
   304  		}
   305  	}
   306  	return -1;
   307  }
   308  
   309  int
   310  getthreads(void)
   311  {
   312  	int i, j, curn, found;
   313  	Map *curmap[nelem(map)];
   314  	int curthread[nelem(map)];
   315  	static int complained = 0;
   316  
   317  	curn = procthreadpids(pid, curthread, nelem(curthread));
   318  	if(curn <= 0)
   319  		return curn;
   320  
   321  	if(curn > nelem(map)) {
   322  		if(complained == 0) {
   323  			fprint(2, "prof: too many threads; limiting to %d\n", nthread, nelem(map));
   324  			complained = 1;
   325  		}
   326  		curn = nelem(map);
   327  	}
   328  	if(curn == nthread && memcmp(thread, curthread, curn*sizeof(*thread)) == 0)
   329  		return curn;	// no changes
   330  
   331  	// Number of threads has changed (might be the init case).
   332  	// A bit expensive but rare enough not to bother being clever.
   333  	for(i = 0; i < curn; i++) {
   334  		found = 0;
   335  		for(j = 0; j < nthread; j++) {
   336  			if(curthread[i] == thread[j]) {
   337  				found = 1;
   338  				curmap[i] = map[j];
   339  				map[j] = nil;
   340  				break;
   341  			}
   342  		}
   343  		if(found)
   344  			continue;
   345  
   346  		// map new thread
   347  		curmap[i] = attachproc(curthread[i], &fhdr);
   348  		if(curmap[i] == nil) {
   349  			fprint(2, "prof: can't attach to %d: %r\n", curthread[i]);
   350  			return -1;
   351  		}
   352  	}
   353  
   354  	for(j = 0; j < nthread; j++)
   355  		if(map[j] != nil)
   356  			detachproc(map[j]);
   357  
   358  	nthread = curn;
   359  	memmove(thread, curthread, nthread*sizeof thread[0]);
   360  	memmove(map, curmap, sizeof map);
   361  	return nthread;
   362  }
   363  
   364  int
   365  sample(Map *map)
   366  {
   367  	static int n;
   368  
   369  	n++;
   370  	if(registers) {
   371  		if(arch->getregs(map) < 0)
   372  			goto bad;
   373  	} else {
   374  		// we need only two registers
   375  		if(arch->getPC(map) < 0)
   376  			goto bad;
   377  		if(arch->getSP(map) < 0)
   378  			goto bad;
   379  	}
   380  	return 1;
   381  bad:
   382  	if(n == 1)
   383  		fprint(2, "prof: can't read registers: %r\n");
   384  	return 0;
   385  }
   386  
   387  void
   388  addtohistogram(uvlong pc, uvlong callerpc, uvlong sp)
   389  {
   390  	int h;
   391  	PC *x;
   392  	
   393  	USED(sp);
   394  
   395  	h = (pc + callerpc*101) % Ncounters;
   396  	for(x = counters[h]; x != NULL; x = x->next) {
   397  		if(x->pc == pc && x->callerpc == callerpc) {
   398  			x->count++;
   399  			return;
   400  		}
   401  	}
   402  	x = malloc(sizeof(PC));
   403  	if(x == nil)
   404  		sysfatal("out of memory");
   405  	x->pc = pc;
   406  	x->callerpc = callerpc;
   407  	x->count = 1;
   408  	x->next = counters[h];
   409  	counters[h] = x;
   410  }
   411  
   412  void
   413  addppword(uvlong pc)
   414  {
   415  	if(pc == 0) {
   416  		return;
   417  	}
   418  	if(nppdata == ppalloc) {
   419  		ppalloc = (1000+nppdata)*2;
   420  		ppdata = realloc(ppdata, ppalloc * sizeof ppdata[0]);
   421  		if(ppdata == nil) {
   422  			fprint(2, "prof: realloc failed: %r\n");
   423  			exit(2);
   424  		}
   425  	}
   426  	ppdata[nppdata++] = pc;
   427  }
   428  
   429  void
   430  startpptrace(void)
   431  {
   432  	ppstart = nppdata;
   433  	addppword(~0);
   434  }
   435  
   436  void
   437  endpptrace(void)
   438  {
   439  	ppdata[ppstart] = nppdata-ppstart-1;
   440  }
   441  
   442  uvlong nextpc;
   443  
   444  void
   445  xptrace(Map *map, uvlong pc, uvlong sp, Symbol *sym)
   446  {
   447  	USED(map);
   448  
   449  	char buf[1024];
   450  	if(sym == nil){
   451  		fprint(2, "syms\n");
   452  		return;
   453  	}
   454  	if(histograms)
   455  		addtohistogram(nextpc, pc, sp);
   456  	if(!histograms || stacks > 1 || pprof) {
   457  		if(nextpc == 0)
   458  			nextpc = sym->value;
   459  		if(stacks){
   460  			fprint(2, "%s(", sym->name);
   461  			fprint(2, ")");
   462  			if(nextpc != sym->value)
   463  				fprint(2, "+%#llux ", nextpc - sym->value);
   464  			if(have_syms && linenums && fileline(buf, sizeof buf, pc)) {
   465  				fprint(2, " %s", buf);
   466  			}
   467  			fprint(2, "\n");
   468  		}
   469  		if (pprof) {
   470  			addppword(nextpc);
   471  		}
   472  	}
   473  	nextpc = pc;
   474  }
   475  
   476  void
   477  stacktracepcsp(Map *map, uvlong pc, uvlong sp)
   478  {
   479  	nextpc = pc;
   480  	if(pprof){
   481  		startpptrace();
   482  	}
   483  	if(machdata->ctrace==nil)
   484  		fprint(2, "no machdata->ctrace\n");
   485  	else if(machdata->ctrace(map, pc, sp, 0, xptrace) <= 0)
   486  		fprint(2, "no stack frame: pc=%#p sp=%#p\n", pc, sp);
   487  	else {
   488  		addtohistogram(nextpc, 0, sp);
   489  		if(stacks)
   490  			fprint(2, "\n");
   491  	}
   492  	if(pprof){
   493  		endpptrace();
   494  	}
   495  }
   496  
   497  void
   498  printpc(Map *map, uvlong pc, uvlong sp)
   499  {
   500  	char buf[1024];
   501  	if(registers)
   502  		arch->regprint();
   503  	if(have_syms > 0 && linenums &&  fileline(buf, sizeof buf, pc))
   504  		fprint(2, "%s\n", buf);
   505  	if(have_syms > 0 && functions) {
   506  		symoff(buf, sizeof(buf), pc, CANY);
   507  		fprint(2, "%s\n", buf);
   508  	}
   509  	if(stacks || pprof){
   510  		stacktracepcsp(map, pc, sp);
   511  	}
   512  	else if(histograms){
   513  		addtohistogram(pc, 0, sp);
   514  	}
   515  }
   516  
   517  void
   518  ppmaps(void)
   519  {
   520  	int fd, n;
   521  	char tmp[100];
   522  	Seg *seg;
   523  
   524  	// If it's Linux, the info is in /proc/$pid/maps
   525  	snprint(tmp, sizeof tmp, "/proc/%d/maps", pid);
   526  	fd = open(tmp, 0);
   527  	if(fd >= 0) {
   528  		n = read(fd, ppmapdata, sizeof ppmapdata - 1);
   529  		close(fd);
   530  		if(n < 0) {
   531  			fprint(2, "prof: can't read %s: %r\n", tmp);
   532  			exit(2);
   533  		}
   534  		ppmapdata[n] = 0;
   535  		return;
   536  	}
   537  
   538  	// It's probably a mac. Synthesize an entry for the text file.
   539  	// The register segment may come first but it has a zero offset, so grab the first non-zero offset segment.
   540  	for(n = 0; n < 3; n++){
   541  		seg = &map[0]->seg[n];
   542  		if(seg->b == 0) {
   543  			continue;
   544  		}
   545  		snprint(ppmapdata, sizeof ppmapdata,
   546  			"%.16x-%.16x r-xp %d 00:00 34968549                           %s\n",
   547  			seg->b, seg->e, seg->f, "/home/r/6.out"
   548  		);
   549  		return;
   550  	}
   551  	fprint(2, "prof: no text segment in maps for %s\n", file);
   552  	exit(2);
   553  }
   554  
   555  void
   556  samples(void)
   557  {
   558  	int i, pid, msec;
   559  	struct timespec req;
   560  	int getmaps;
   561  
   562  	req.tv_sec = delta_msec/1000;
   563  	req.tv_nsec = 1000000*(delta_msec % 1000);
   564  	getmaps = 0;
   565  	if(pprof)
   566  		getmaps= 1;
   567  	for(msec = 0; total_sec <= 0 || msec < 1000*total_sec; msec += delta_msec) {
   568  		nsample++;
   569  		nsamplethread += nthread;
   570  		for(i = 0; i < nthread; i++) {
   571  			pid = thread[i];
   572  			if(ctlproc(pid, "stop") < 0)
   573  				return;
   574  			if(!sample(map[i])) {
   575  				ctlproc(pid, "start");
   576  				return;
   577  			}
   578  			printpc(map[i], arch->uregPC(), arch->uregSP());
   579  			ctlproc(pid, "start");
   580  		}
   581  		nanosleep(&req, NULL);
   582  		getthreads();
   583  		if(nthread == 0)
   584  			break;
   585  		if(getmaps) {
   586  			getmaps = 0;
   587  			ppmaps();
   588  		}
   589  	}
   590  }
   591  
   592  typedef struct Func Func;
   593  struct Func
   594  {
   595  	Func *next;
   596  	Symbol s;
   597  	uint onstack;
   598  	uint leaf;
   599  };
   600  
   601  Func *func[257];
   602  int nfunc;
   603  
   604  Func*
   605  findfunc(uvlong pc)
   606  {
   607  	Func *f;
   608  	uint h;
   609  	Symbol s;
   610  
   611  	if(pc == 0)
   612  		return nil;
   613  
   614  	if(!findsym(pc, CTEXT, &s))
   615  		return nil;
   616  
   617  	h = s.value % nelem(func);
   618  	for(f = func[h]; f != NULL; f = f->next)
   619  		if(f->s.value == s.value)
   620  			return f;
   621  
   622  	f = malloc(sizeof *f);
   623  	if(f == nil)
   624  		sysfatal("out of memory");
   625  	memset(f, 0, sizeof *f);
   626  	f->s = s;
   627  	f->next = func[h];
   628  	func[h] = f;
   629  	nfunc++;
   630  	return f;
   631  }
   632  
   633  int
   634  compareleaf(const void *va, const void *vb)
   635  {
   636  	Func *a, *b;
   637  
   638  	a = *(Func**)va;
   639  	b = *(Func**)vb;
   640  	if(a->leaf != b->leaf)
   641  		return b->leaf - a->leaf;
   642  	if(a->onstack != b->onstack)
   643  		return b->onstack - a->onstack;
   644  	return strcmp(a->s.name, b->s.name);
   645  }
   646  
   647  void
   648  dumphistogram(void)
   649  {
   650  	int i, h, n;
   651  	PC *x;
   652  	Func *f, **ff;
   653  
   654  	if(!histograms)
   655  		return;
   656  
   657  	// assign counts to functions.
   658  	for(h = 0; h < Ncounters; h++) {
   659  		for(x = counters[h]; x != NULL; x = x->next) {
   660  			f = findfunc(x->pc);
   661  			if(f) {
   662  				f->onstack += x->count;
   663  				f->leaf += x->count;
   664  			}
   665  			f = findfunc(x->callerpc);
   666  			if(f)
   667  				f->leaf -= x->count;
   668  		}
   669  	}
   670  
   671  	// build array
   672  	ff = malloc(nfunc*sizeof ff[0]);
   673  	if(ff == nil)
   674  		sysfatal("out of memory");
   675  	n = 0;
   676  	for(h = 0; h < nelem(func); h++)
   677  		for(f = func[h]; f != NULL; f = f->next)
   678  			ff[n++] = f;
   679  
   680  	// sort by leaf counts
   681  	qsort(ff, nfunc, sizeof ff[0], compareleaf);
   682  
   683  	// print.
   684  	fprint(2, "%d samples (avg %.1g threads)\n", nsample, (double)nsamplethread/nsample);
   685  	for(i = 0; i < nfunc; i++) {
   686  		f = ff[i];
   687  		fprint(2, "%6.2f%%\t", 100.0*(double)f->leaf/nsample);
   688  		if(stacks)
   689  			fprint(2, "%6.2f%%\t", 100.0*(double)f->onstack/nsample);
   690  		fprint(2, "%s\n", f->s.name);
   691  	}
   692  }
   693  
   694  typedef struct Trace Trace;
   695  struct Trace {
   696  	int	count;
   697  	int	npc;
   698  	uvlong	*pc;
   699  	Trace	*next;
   700  };
   701  
   702  void
   703  dumppprof(void)
   704  {
   705  	uvlong i, n, *p, *e;
   706  	int ntrace;
   707  	Trace *trace, *tp, *up, *prev;
   708  
   709  	if(!pprof)
   710  		return;
   711  	e = ppdata + nppdata;
   712  	// Create list of traces.  First, count the traces
   713  	ntrace = 0;
   714  	for(p = ppdata; p < e;) {
   715  		n = *p++;
   716  		p += n;
   717  		if(n == 0)
   718  			continue;
   719  		ntrace++;
   720  	}
   721  	if(ntrace <= 0)
   722  		return;
   723  	// Allocate and link the traces together.
   724  	trace = malloc(ntrace * sizeof(Trace));
   725  	if(trace == nil)
   726  		sysfatal("out of memory");
   727  	tp = trace;
   728  	for(p = ppdata; p < e;) {
   729  		n = *p++;
   730  		if(n == 0)
   731  			continue;
   732  		tp->count = 1;
   733  		tp->npc = n;
   734  		tp->pc = p;
   735  		tp->next = tp+1;
   736  		tp++;
   737  		p += n;
   738  	}
   739  	trace[ntrace-1].next = nil;
   740  	// Eliminate duplicates.  Lousy algorithm, although not as bad as it looks because
   741  	// the list collapses fast.
   742  	for(tp = trace; tp != nil; tp = tp->next) {
   743  		prev = tp;
   744  		for(up = tp->next; up != nil; up = up->next) {
   745  			if(up->npc == tp->npc && memcmp(up->pc, tp->pc, up->npc*sizeof up->pc[0]) == 0) {
   746  				tp->count++;
   747  				prev->next = up->next;
   748  			} else {
   749  				prev = up;
   750  			}
   751  		}
   752  	}
   753  	// Write file.
   754  	// See http://code.google.com/p/google-perftools/source/browse/trunk/doc/cpuprofile-fileformat.html
   755  	// 1) Header
   756  	arch->ppword(0);	// must be zero
   757  	arch->ppword(3);	// 3 words follow in header
   758  	arch->ppword(0);	// must be zero
   759  	arch->ppword(delta_msec * 1000);	// sampling period in microseconds
   760  	arch->ppword(0);	// must be zero (padding)
   761  	// 2) One record for each trace.
   762  	for(tp = trace; tp != nil; tp = tp->next) {
   763  		arch->ppword(tp->count);
   764  		arch->ppword(tp->npc);
   765  		for(i = 0; i < tp->npc; i++) {
   766  			arch->ppword(tp->pc[i]);
   767  		}
   768  	}
   769  	// 3) Binary trailer
   770  	arch->ppword(0);	// must be zero
   771  	arch->ppword(1);	// must be one
   772  	arch->ppword(0);	// must be zero
   773  	// 4) Mapped objects.
   774  	Bwrite(pproffd, ppmapdata, strlen(ppmapdata));
   775  	// 5) That's it.
   776  	Bterm(pproffd);
   777  }
   778  
   779  int
   780  startprocess(char **argv)
   781  {
   782  	int pid;
   783  
   784  	if((pid = fork()) == 0) {
   785  		pid = getpid();
   786  		if(ctlproc(pid, "hang") < 0){
   787  			fprint(2, "prof: child process could not hang\n");
   788  			exits(0);
   789  		}
   790  		execv(argv[0], argv);
   791  		fprint(2, "prof: could not exec %s: %r\n", argv[0]);
   792  		exits(0);
   793  	}
   794  
   795  	if(pid == -1) {
   796  		fprint(2, "prof: could not fork\n");
   797  		exit(1);
   798  	}
   799  	if(ctlproc(pid, "attached") < 0 || ctlproc(pid, "waitstop") < 0) {
   800  		fprint(2, "prof: could not attach to child process: %r\n");
   801  		exit(1);
   802  	}
   803  	return pid;
   804  }
   805  
   806  void
   807  detach(void)
   808  {
   809  	int i;
   810  
   811  	for(i = 0; i < nthread; i++)
   812  		detachproc(map[i]);
   813  }
   814  
   815  int
   816  main(int argc, char *argv[])
   817  {
   818  	int i;
   819  	char *ppfile;
   820  
   821  	ARGBEGIN{
   822  	case 'P':
   823  		pprof =1;
   824  		ppfile = EARGF(Usage());
   825  		pproffd = Bopen(ppfile, OWRITE);
   826  		if(pproffd == nil) {
   827  			fprint(2, "prof: cannot open %s: %r\n", ppfile);
   828  			exit(2);
   829  		}
   830  		break;
   831  	case 'd':
   832  		delta_msec = atoi(EARGF(Usage()));
   833  		break;
   834  	case 't':
   835  		total_sec = atoi(EARGF(Usage()));
   836  		break;
   837  	case 'p':
   838  		pid = atoi(EARGF(Usage()));
   839  		break;
   840  	case 'f':
   841  		functions = 1;
   842  		break;
   843  	case 'h':
   844  		histograms = 1;
   845  		break;
   846  	case 'l':
   847  		linenums = 1;
   848  		break;
   849  	case 'r':
   850  		registers = 1;
   851  		break;
   852  	case 's':
   853  		stacks++;
   854  		break;
   855  	default:
   856  		Usage();
   857  	}ARGEND
   858  	if(pid <= 0 && argc == 0)
   859  		Usage();
   860  	if(functions+linenums+registers+stacks+pprof == 0)
   861  		histograms = 1;
   862  	if(!machbyname("amd64")) {
   863  		fprint(2, "prof: no amd64 support\n", pid);
   864  		exit(1);
   865  	}
   866  	if(argc > 0)
   867  		file = argv[0];
   868  	else if(pid) {
   869  		file = proctextfile(pid);
   870  		if (file == NULL) {
   871  			fprint(2, "prof: can't find file for pid %d: %r\n", pid);
   872  			fprint(2, "prof: on Darwin, need to provide file name explicitly\n");
   873  			exit(1);
   874  		}
   875  	}
   876  	fd = open(file, 0);
   877  	if(fd < 0) {
   878  		fprint(2, "prof: can't open %s: %r\n", file);
   879  		exit(1);
   880  	}
   881  	if(crackhdr(fd, &fhdr)) {
   882  		have_syms = syminit(fd, &fhdr);
   883  		if(!have_syms) {
   884  			fprint(2, "prof: no symbols for %s: %r\n", file);
   885  		}
   886  	} else {
   887  		fprint(2, "prof: crack header for %s: %r\n", file);
   888  		exit(1);
   889  	}
   890  	if(pid <= 0)
   891  		pid = startprocess(argv);
   892  	attachproc(pid, &fhdr);	// initializes thread list
   893  	if(setarch() < 0) {
   894  		detach();
   895  		fprint(2, "prof: can't identify binary architecture for pid %d\n", pid);
   896  		exit(1);
   897  	}
   898  	if(getthreads() <= 0) {
   899  		detach();
   900  		fprint(2, "prof: can't find threads for pid %d\n", pid);
   901  		exit(1);
   902  	}
   903  	for(i = 0; i < nthread; i++)
   904  		ctlproc(thread[i], "start");
   905  	samples();
   906  	detach();
   907  	dumphistogram();
   908  	dumppprof();
   909  	exit(0);
   910  }