github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/mgc0.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Garbage collector.
     6  
     7  #include "runtime.h"
     8  #include "arch_GOARCH.h"
     9  #include "malloc.h"
    10  #include "stack.h"
    11  #include "mgc0.h"
    12  #include "race.h"
    13  #include "type.h"
    14  #include "typekind.h"
    15  #include "funcdata.h"
    16  #include "../../cmd/ld/textflag.h"
    17  
    18  enum {
    19  	Debug = 0,
    20  	DebugMark = 0,  // run second pass to check mark
    21  	CollectStats = 0,
    22  	ScanStackByFrames = 1,
    23  	IgnorePreciseGC = 0,
    24  
    25  	// Four bits per word (see #defines below).
    26  	wordsPerBitmapWord = sizeof(void*)*8/4,
    27  	bitShift = sizeof(void*)*8/4,
    28  
    29  	handoffThreshold = 4,
    30  	IntermediateBufferCapacity = 64,
    31  
    32  	// Bits in type information
    33  	PRECISE = 1,
    34  	LOOP = 2,
    35  	PC_BITS = PRECISE | LOOP,
    36  
    37  	// Pointer map
    38  	BitsPerPointer = 2,
    39  	BitsNoPointer = 0,
    40  	BitsPointer = 1,
    41  	BitsIface = 2,
    42  	BitsEface = 3,
    43  };
    44  
    45  // Bits in per-word bitmap.
    46  // #defines because enum might not be able to hold the values.
    47  //
    48  // Each word in the bitmap describes wordsPerBitmapWord words
    49  // of heap memory.  There are 4 bitmap bits dedicated to each heap word,
    50  // so on a 64-bit system there is one bitmap word per 16 heap words.
    51  // The bits in the word are packed together by type first, then by
    52  // heap location, so each 64-bit bitmap word consists of, from top to bottom,
    53  // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
    54  // then the 16 bitNoScan/bitBlockBoundary bits, then the 16 bitAllocated bits.
    55  // This layout makes it easier to iterate over the bits of a given type.
    56  //
    57  // The bitmap starts at mheap.arena_start and extends *backward* from
    58  // there.  On a 64-bit system the off'th word in the arena is tracked by
    59  // the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
    60  // the only difference is that the divisor is 8.)
    61  //
    62  // To pull out the bits corresponding to a given pointer p, we use:
    63  //
    64  //	off = p - (uintptr*)mheap.arena_start;  // word offset
    65  //	b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
    66  //	shift = off % wordsPerBitmapWord
    67  //	bits = *b >> shift;
    68  //	/* then test bits & bitAllocated, bits & bitMarked, etc. */
    69  //
    70  #define bitAllocated		((uintptr)1<<(bitShift*0))
    71  #define bitNoScan		((uintptr)1<<(bitShift*1))	/* when bitAllocated is set */
    72  #define bitMarked		((uintptr)1<<(bitShift*2))	/* when bitAllocated is set */
    73  #define bitSpecial		((uintptr)1<<(bitShift*3))	/* when bitAllocated is set - has finalizer or being profiled */
    74  #define bitBlockBoundary	((uintptr)1<<(bitShift*1))	/* when bitAllocated is NOT set */
    75  
    76  #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
    77  
    78  // Holding worldsema grants an M the right to try to stop the world.
    79  // The procedure is:
    80  //
    81  //	runtime·semacquire(&runtime·worldsema);
    82  //	m->gcing = 1;
    83  //	runtime·stoptheworld();
    84  //
    85  //	... do stuff ...
    86  //
    87  //	m->gcing = 0;
    88  //	runtime·semrelease(&runtime·worldsema);
    89  //	runtime·starttheworld();
    90  //
    91  uint32 runtime·worldsema = 1;
    92  
    93  typedef struct Obj Obj;
    94  struct Obj
    95  {
    96  	byte	*p;	// data pointer
    97  	uintptr	n;	// size of data in bytes
    98  	uintptr	ti;	// type info
    99  };
   100  
   101  // The size of Workbuf is N*PageSize.
   102  typedef struct Workbuf Workbuf;
   103  struct Workbuf
   104  {
   105  #define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
   106  	LFNode  node; // must be first
   107  	uintptr nobj;
   108  	Obj     obj[SIZE/sizeof(Obj) - 1];
   109  	uint8   _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
   110  #undef SIZE
   111  };
   112  
   113  typedef struct Finalizer Finalizer;
   114  struct Finalizer
   115  {
   116  	FuncVal *fn;
   117  	void *arg;
   118  	uintptr nret;
   119  	Type *fint;
   120  	PtrType *ot;
   121  };
   122  
   123  typedef struct FinBlock FinBlock;
   124  struct FinBlock
   125  {
   126  	FinBlock *alllink;
   127  	FinBlock *next;
   128  	int32 cnt;
   129  	int32 cap;
   130  	Finalizer fin[1];
   131  };
   132  
   133  extern byte data[];
   134  extern byte edata[];
   135  extern byte bss[];
   136  extern byte ebss[];
   137  
   138  extern byte gcdata[];
   139  extern byte gcbss[];
   140  
   141  static G *fing;
   142  static FinBlock *finq; // list of finalizers that are to be executed
   143  static FinBlock *finc; // cache of free blocks
   144  static FinBlock *allfin; // list of all blocks
   145  static Lock finlock;
   146  static int32 fingwait;
   147  
   148  static void runfinq(void);
   149  static Workbuf* getempty(Workbuf*);
   150  static Workbuf* getfull(Workbuf*);
   151  static void	putempty(Workbuf*);
   152  static Workbuf* handoff(Workbuf*);
   153  static void	gchelperstart(void);
   154  
   155  static struct {
   156  	uint64	full;  // lock-free list of full blocks
   157  	uint64	empty; // lock-free list of empty blocks
   158  	byte	pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
   159  	uint32	nproc;
   160  	volatile uint32	nwait;
   161  	volatile uint32	ndone;
   162  	volatile uint32 debugmarkdone;
   163  	Note	alldone;
   164  	ParFor	*markfor;
   165  	ParFor	*sweepfor;
   166  
   167  	Lock;
   168  	byte	*chunk;
   169  	uintptr	nchunk;
   170  
   171  	Obj	*roots;
   172  	uint32	nroot;
   173  	uint32	rootcap;
   174  } work;
   175  
   176  enum {
   177  	GC_DEFAULT_PTR = GC_NUM_INSTR,
   178  	GC_CHAN,
   179  
   180  	GC_NUM_INSTR2
   181  };
   182  
   183  static struct {
   184  	struct {
   185  		uint64 sum;
   186  		uint64 cnt;
   187  	} ptr;
   188  	uint64 nbytes;
   189  	struct {
   190  		uint64 sum;
   191  		uint64 cnt;
   192  		uint64 notype;
   193  		uint64 typelookup;
   194  	} obj;
   195  	uint64 rescan;
   196  	uint64 rescanbytes;
   197  	uint64 instr[GC_NUM_INSTR2];
   198  	uint64 putempty;
   199  	uint64 getfull;
   200  	struct {
   201  		uint64 foundbit;
   202  		uint64 foundword;
   203  		uint64 foundspan;
   204  	} flushptrbuf;
   205  	struct {
   206  		uint64 foundbit;
   207  		uint64 foundword;
   208  		uint64 foundspan;
   209  	} markonly;
   210  } gcstats;
   211  
   212  // markonly marks an object. It returns true if the object
   213  // has been marked by this function, false otherwise.
   214  // This function doesn't append the object to any buffer.
   215  static bool
   216  markonly(void *obj)
   217  {
   218  	byte *p;
   219  	uintptr *bitp, bits, shift, x, xbits, off, j;
   220  	MSpan *s;
   221  	PageID k;
   222  
   223  	// Words outside the arena cannot be pointers.
   224  	if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used)
   225  		return false;
   226  
   227  	// obj may be a pointer to a live object.
   228  	// Try to find the beginning of the object.
   229  
   230  	// Round down to word boundary.
   231  	obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
   232  
   233  	// Find bits for this word.
   234  	off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
   235  	bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
   236  	shift = off % wordsPerBitmapWord;
   237  	xbits = *bitp;
   238  	bits = xbits >> shift;
   239  
   240  	// Pointing at the beginning of a block?
   241  	if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
   242  		if(CollectStats)
   243  			runtime·xadd64(&gcstats.markonly.foundbit, 1);
   244  		goto found;
   245  	}
   246  
   247  	// Pointing just past the beginning?
   248  	// Scan backward a little to find a block boundary.
   249  	for(j=shift; j-->0; ) {
   250  		if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
   251  			shift = j;
   252  			bits = xbits>>shift;
   253  			if(CollectStats)
   254  				runtime·xadd64(&gcstats.markonly.foundword, 1);
   255  			goto found;
   256  		}
   257  	}
   258  
   259  	// Otherwise consult span table to find beginning.
   260  	// (Manually inlined copy of MHeap_LookupMaybe.)
   261  	k = (uintptr)obj>>PageShift;
   262  	x = k;
   263  	if(sizeof(void*) == 8)
   264  		x -= (uintptr)runtime·mheap.arena_start>>PageShift;
   265  	s = runtime·mheap.spans[x];
   266  	if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
   267  		return false;
   268  	p = (byte*)((uintptr)s->start<<PageShift);
   269  	if(s->sizeclass == 0) {
   270  		obj = p;
   271  	} else {
   272  		uintptr size = s->elemsize;
   273  		int32 i = ((byte*)obj - p)/size;
   274  		obj = p+i*size;
   275  	}
   276  
   277  	// Now that we know the object header, reload bits.
   278  	off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
   279  	bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
   280  	shift = off % wordsPerBitmapWord;
   281  	xbits = *bitp;
   282  	bits = xbits >> shift;
   283  	if(CollectStats)
   284  		runtime·xadd64(&gcstats.markonly.foundspan, 1);
   285  
   286  found:
   287  	// Now we have bits, bitp, and shift correct for
   288  	// obj pointing at the base of the object.
   289  	// Only care about allocated and not marked.
   290  	if((bits & (bitAllocated|bitMarked)) != bitAllocated)
   291  		return false;
   292  	if(work.nproc == 1)
   293  		*bitp |= bitMarked<<shift;
   294  	else {
   295  		for(;;) {
   296  			x = *bitp;
   297  			if(x & (bitMarked<<shift))
   298  				return false;
   299  			if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
   300  				break;
   301  		}
   302  	}
   303  
   304  	// The object is now marked
   305  	return true;
   306  }
   307  
   308  // PtrTarget is a structure used by intermediate buffers.
   309  // The intermediate buffers hold GC data before it
   310  // is moved/flushed to the work buffer (Workbuf).
   311  // The size of an intermediate buffer is very small,
   312  // such as 32 or 64 elements.
   313  typedef struct PtrTarget PtrTarget;
   314  struct PtrTarget
   315  {
   316  	void *p;
   317  	uintptr ti;
   318  };
   319  
   320  typedef struct BufferList BufferList;
   321  struct BufferList
   322  {
   323  	PtrTarget ptrtarget[IntermediateBufferCapacity];
   324  	Obj obj[IntermediateBufferCapacity];
   325  	uint32 busy;
   326  	byte pad[CacheLineSize];
   327  };
   328  #pragma dataflag NOPTR
   329  static BufferList bufferList[MaxGcproc];
   330  
   331  static Type *itabtype;
   332  
   333  static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
   334  
   335  // flushptrbuf moves data from the PtrTarget buffer to the work buffer.
   336  // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
   337  // while the work buffer contains blocks which have been marked
   338  // and are prepared to be scanned by the garbage collector.
   339  //
   340  // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
   341  //
   342  // A simplified drawing explaining how the todo-list moves from a structure to another:
   343  //
   344  //     scanblock
   345  //  (find pointers)
   346  //    Obj ------> PtrTarget (pointer targets)
   347  //     ↑          |
   348  //     |          |
   349  //     `----------'
   350  //     flushptrbuf
   351  //  (find block start, mark and enqueue)
   352  static void
   353  flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
   354  {
   355  	byte *p, *arena_start, *obj;
   356  	uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
   357  	MSpan *s;
   358  	PageID k;
   359  	Obj *wp;
   360  	Workbuf *wbuf;
   361  	PtrTarget *ptrbuf_end;
   362  
   363  	arena_start = runtime·mheap.arena_start;
   364  
   365  	wp = *_wp;
   366  	wbuf = *_wbuf;
   367  	nobj = *_nobj;
   368  
   369  	ptrbuf_end = *ptrbufpos;
   370  	n = ptrbuf_end - ptrbuf;
   371  	*ptrbufpos = ptrbuf;
   372  
   373  	if(CollectStats) {
   374  		runtime·xadd64(&gcstats.ptr.sum, n);
   375  		runtime·xadd64(&gcstats.ptr.cnt, 1);
   376  	}
   377  
   378  	// If buffer is nearly full, get a new one.
   379  	if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
   380  		if(wbuf != nil)
   381  			wbuf->nobj = nobj;
   382  		wbuf = getempty(wbuf);
   383  		wp = wbuf->obj;
   384  		nobj = 0;
   385  
   386  		if(n >= nelem(wbuf->obj))
   387  			runtime·throw("ptrbuf has to be smaller than WorkBuf");
   388  	}
   389  
   390  	// TODO(atom): This block is a branch of an if-then-else statement.
   391  	//             The single-threaded branch may be added in a next CL.
   392  	{
   393  		// Multi-threaded version.
   394  
   395  		while(ptrbuf < ptrbuf_end) {
   396  			obj = ptrbuf->p;
   397  			ti = ptrbuf->ti;
   398  			ptrbuf++;
   399  
   400  			// obj belongs to interval [mheap.arena_start, mheap.arena_used).
   401  			if(Debug > 1) {
   402  				if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used)
   403  					runtime·throw("object is outside of mheap");
   404  			}
   405  
   406  			// obj may be a pointer to a live object.
   407  			// Try to find the beginning of the object.
   408  
   409  			// Round down to word boundary.
   410  			if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
   411  				obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
   412  				ti = 0;
   413  			}
   414  
   415  			// Find bits for this word.
   416  			off = (uintptr*)obj - (uintptr*)arena_start;
   417  			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
   418  			shift = off % wordsPerBitmapWord;
   419  			xbits = *bitp;
   420  			bits = xbits >> shift;
   421  
   422  			// Pointing at the beginning of a block?
   423  			if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
   424  				if(CollectStats)
   425  					runtime·xadd64(&gcstats.flushptrbuf.foundbit, 1);
   426  				goto found;
   427  			}
   428  
   429  			ti = 0;
   430  
   431  			// Pointing just past the beginning?
   432  			// Scan backward a little to find a block boundary.
   433  			for(j=shift; j-->0; ) {
   434  				if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
   435  					obj = (byte*)obj - (shift-j)*PtrSize;
   436  					shift = j;
   437  					bits = xbits>>shift;
   438  					if(CollectStats)
   439  						runtime·xadd64(&gcstats.flushptrbuf.foundword, 1);
   440  					goto found;
   441  				}
   442  			}
   443  
   444  			// Otherwise consult span table to find beginning.
   445  			// (Manually inlined copy of MHeap_LookupMaybe.)
   446  			k = (uintptr)obj>>PageShift;
   447  			x = k;
   448  			if(sizeof(void*) == 8)
   449  				x -= (uintptr)arena_start>>PageShift;
   450  			s = runtime·mheap.spans[x];
   451  			if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
   452  				continue;
   453  			p = (byte*)((uintptr)s->start<<PageShift);
   454  			if(s->sizeclass == 0) {
   455  				obj = p;
   456  			} else {
   457  				size = s->elemsize;
   458  				int32 i = ((byte*)obj - p)/size;
   459  				obj = p+i*size;
   460  			}
   461  
   462  			// Now that we know the object header, reload bits.
   463  			off = (uintptr*)obj - (uintptr*)arena_start;
   464  			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
   465  			shift = off % wordsPerBitmapWord;
   466  			xbits = *bitp;
   467  			bits = xbits >> shift;
   468  			if(CollectStats)
   469  				runtime·xadd64(&gcstats.flushptrbuf.foundspan, 1);
   470  
   471  		found:
   472  			// Now we have bits, bitp, and shift correct for
   473  			// obj pointing at the base of the object.
   474  			// Only care about allocated and not marked.
   475  			if((bits & (bitAllocated|bitMarked)) != bitAllocated)
   476  				continue;
   477  			if(work.nproc == 1)
   478  				*bitp |= bitMarked<<shift;
   479  			else {
   480  				for(;;) {
   481  					x = *bitp;
   482  					if(x & (bitMarked<<shift))
   483  						goto continue_obj;
   484  					if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
   485  						break;
   486  				}
   487  			}
   488  
   489  			// If object has no pointers, don't need to scan further.
   490  			if((bits & bitNoScan) != 0)
   491  				continue;
   492  
   493  			// Ask span about size class.
   494  			// (Manually inlined copy of MHeap_Lookup.)
   495  			x = (uintptr)obj >> PageShift;
   496  			if(sizeof(void*) == 8)
   497  				x -= (uintptr)arena_start>>PageShift;
   498  			s = runtime·mheap.spans[x];
   499  
   500  			PREFETCH(obj);
   501  
   502  			*wp = (Obj){obj, s->elemsize, ti};
   503  			wp++;
   504  			nobj++;
   505  		continue_obj:;
   506  		}
   507  
   508  		// If another proc wants a pointer, give it some.
   509  		if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
   510  			wbuf->nobj = nobj;
   511  			wbuf = handoff(wbuf);
   512  			nobj = wbuf->nobj;
   513  			wp = wbuf->obj + nobj;
   514  		}
   515  	}
   516  
   517  	*_wp = wp;
   518  	*_wbuf = wbuf;
   519  	*_nobj = nobj;
   520  }
   521  
   522  static void
   523  flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
   524  {
   525  	uintptr nobj, off;
   526  	Obj *wp, obj;
   527  	Workbuf *wbuf;
   528  	Obj *objbuf_end;
   529  
   530  	wp = *_wp;
   531  	wbuf = *_wbuf;
   532  	nobj = *_nobj;
   533  
   534  	objbuf_end = *objbufpos;
   535  	*objbufpos = objbuf;
   536  
   537  	while(objbuf < objbuf_end) {
   538  		obj = *objbuf++;
   539  
   540  		// Align obj.b to a word boundary.
   541  		off = (uintptr)obj.p & (PtrSize-1);
   542  		if(off != 0) {
   543  			obj.p += PtrSize - off;
   544  			obj.n -= PtrSize - off;
   545  			obj.ti = 0;
   546  		}
   547  
   548  		if(obj.p == nil || obj.n == 0)
   549  			continue;
   550  
   551  		// If buffer is full, get a new one.
   552  		if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
   553  			if(wbuf != nil)
   554  				wbuf->nobj = nobj;
   555  			wbuf = getempty(wbuf);
   556  			wp = wbuf->obj;
   557  			nobj = 0;
   558  		}
   559  
   560  		*wp = obj;
   561  		wp++;
   562  		nobj++;
   563  	}
   564  
   565  	// If another proc wants a pointer, give it some.
   566  	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
   567  		wbuf->nobj = nobj;
   568  		wbuf = handoff(wbuf);
   569  		nobj = wbuf->nobj;
   570  		wp = wbuf->obj + nobj;
   571  	}
   572  
   573  	*_wp = wp;
   574  	*_wbuf = wbuf;
   575  	*_nobj = nobj;
   576  }
   577  
   578  // Program that scans the whole block and treats every block element as a potential pointer
   579  static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
   580  
   581  // Hchan program
   582  static uintptr chanProg[2] = {0, GC_CHAN};
   583  
   584  // Local variables of a program fragment or loop
   585  typedef struct Frame Frame;
   586  struct Frame {
   587  	uintptr count, elemsize, b;
   588  	uintptr *loop_or_ret;
   589  };
   590  
   591  // Sanity check for the derived type info objti.
   592  static void
   593  checkptr(void *obj, uintptr objti)
   594  {
   595  	uintptr *pc1, *pc2, type, tisize, i, j, x;
   596  	byte *objstart;
   597  	Type *t;
   598  	MSpan *s;
   599  
   600  	if(!Debug)
   601  		runtime·throw("checkptr is debug only");
   602  
   603  	if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used)
   604  		return;
   605  	type = runtime·gettype(obj);
   606  	t = (Type*)(type & ~(uintptr)(PtrSize-1));
   607  	if(t == nil)
   608  		return;
   609  	x = (uintptr)obj >> PageShift;
   610  	if(sizeof(void*) == 8)
   611  		x -= (uintptr)(runtime·mheap.arena_start)>>PageShift;
   612  	s = runtime·mheap.spans[x];
   613  	objstart = (byte*)((uintptr)s->start<<PageShift);
   614  	if(s->sizeclass != 0) {
   615  		i = ((byte*)obj - objstart)/s->elemsize;
   616  		objstart += i*s->elemsize;
   617  	}
   618  	tisize = *(uintptr*)objti;
   619  	// Sanity check for object size: it should fit into the memory block.
   620  	if((byte*)obj + tisize > objstart + s->elemsize) {
   621  		runtime·printf("object of type '%S' at %p/%p does not fit in block %p/%p\n",
   622  			       *t->string, obj, tisize, objstart, s->elemsize);
   623  		runtime·throw("invalid gc type info");
   624  	}
   625  	if(obj != objstart)
   626  		return;
   627  	// If obj points to the beginning of the memory block,
   628  	// check type info as well.
   629  	if(t->string == nil ||
   630  		// Gob allocates unsafe pointers for indirection.
   631  		(runtime·strcmp(t->string->str, (byte*)"unsafe.Pointer") &&
   632  		// Runtime and gc think differently about closures.
   633  		runtime·strstr(t->string->str, (byte*)"struct { F uintptr") != t->string->str)) {
   634  		pc1 = (uintptr*)objti;
   635  		pc2 = (uintptr*)t->gc;
   636  		// A simple best-effort check until first GC_END.
   637  		for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
   638  			if(pc1[j] != pc2[j]) {
   639  				runtime·printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n",
   640  					       t->string ? (int8*)t->string->str : (int8*)"?", j, pc1[j], pc2[j]);
   641  				runtime·throw("invalid gc type info");
   642  			}
   643  		}
   644  	}
   645  }					
   646  
   647  // scanblock scans a block of n bytes starting at pointer b for references
   648  // to other objects, scanning any it finds recursively until there are no
   649  // unscanned objects left.  Instead of using an explicit recursion, it keeps
   650  // a work list in the Workbuf* structures and loops in the main function
   651  // body.  Keeping an explicit work list is easier on the stack allocator and
   652  // more efficient.
   653  //
   654  // wbuf: current work buffer
   655  // wp:   storage for next queued pointer (write pointer)
   656  // nobj: number of queued objects
   657  static void
   658  scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
   659  {
   660  	byte *b, *arena_start, *arena_used;
   661  	uintptr n, i, end_b, elemsize, size, ti, objti, count, type;
   662  	uintptr *pc, precise_type, nominal_size;
   663  	uintptr *chan_ret, chancap;
   664  	void *obj;
   665  	Type *t;
   666  	Slice *sliceptr;
   667  	Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
   668  	BufferList *scanbuffers;
   669  	PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos;
   670  	Obj *objbuf, *objbuf_end, *objbufpos;
   671  	Eface *eface;
   672  	Iface *iface;
   673  	Hchan *chan;
   674  	ChanType *chantype;
   675  
   676  	if(sizeof(Workbuf) % PageSize != 0)
   677  		runtime·throw("scanblock: size of Workbuf is suboptimal");
   678  
   679  	// Memory arena parameters.
   680  	arena_start = runtime·mheap.arena_start;
   681  	arena_used = runtime·mheap.arena_used;
   682  
   683  	stack_ptr = stack+nelem(stack)-1;
   684  	
   685  	precise_type = false;
   686  	nominal_size = 0;
   687  
   688  	// Allocate ptrbuf
   689  	{
   690  		scanbuffers = &bufferList[m->helpgc];
   691  		ptrbuf = &scanbuffers->ptrtarget[0];
   692  		ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
   693  		objbuf = &scanbuffers->obj[0];
   694  		objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
   695  	}
   696  
   697  	ptrbufpos = ptrbuf;
   698  	objbufpos = objbuf;
   699  
   700  	// (Silence the compiler)
   701  	chan = nil;
   702  	chantype = nil;
   703  	chan_ret = nil;
   704  
   705  	goto next_block;
   706  
   707  	for(;;) {
   708  		// Each iteration scans the block b of length n, queueing pointers in
   709  		// the work buffer.
   710  		if(Debug > 1) {
   711  			runtime·printf("scanblock %p %D\n", b, (int64)n);
   712  		}
   713  
   714  		if(CollectStats) {
   715  			runtime·xadd64(&gcstats.nbytes, n);
   716  			runtime·xadd64(&gcstats.obj.sum, nobj);
   717  			runtime·xadd64(&gcstats.obj.cnt, 1);
   718  		}
   719  
   720  		if(ti != 0) {
   721  			pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
   722  			precise_type = (ti & PRECISE);
   723  			stack_top.elemsize = pc[0];
   724  			if(!precise_type)
   725  				nominal_size = pc[0];
   726  			if(ti & LOOP) {
   727  				stack_top.count = 0;	// 0 means an infinite number of iterations
   728  				stack_top.loop_or_ret = pc+1;
   729  			} else {
   730  				stack_top.count = 1;
   731  			}
   732  			if(Debug) {
   733  				// Simple sanity check for provided type info ti:
   734  				// The declared size of the object must be not larger than the actual size
   735  				// (it can be smaller due to inferior pointers).
   736  				// It's difficult to make a comprehensive check due to inferior pointers,
   737  				// reflection, gob, etc.
   738  				if(pc[0] > n) {
   739  					runtime·printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n);
   740  					runtime·throw("invalid gc type info");
   741  				}
   742  			}
   743  		} else if(UseSpanType) {
   744  			if(CollectStats)
   745  				runtime·xadd64(&gcstats.obj.notype, 1);
   746  
   747  			type = runtime·gettype(b);
   748  			if(type != 0) {
   749  				if(CollectStats)
   750  					runtime·xadd64(&gcstats.obj.typelookup, 1);
   751  
   752  				t = (Type*)(type & ~(uintptr)(PtrSize-1));
   753  				switch(type & (PtrSize-1)) {
   754  				case TypeInfo_SingleObject:
   755  					pc = (uintptr*)t->gc;
   756  					precise_type = true;  // type information about 'b' is precise
   757  					stack_top.count = 1;
   758  					stack_top.elemsize = pc[0];
   759  					break;
   760  				case TypeInfo_Array:
   761  					pc = (uintptr*)t->gc;
   762  					if(pc[0] == 0)
   763  						goto next_block;
   764  					precise_type = true;  // type information about 'b' is precise
   765  					stack_top.count = 0;  // 0 means an infinite number of iterations
   766  					stack_top.elemsize = pc[0];
   767  					stack_top.loop_or_ret = pc+1;
   768  					break;
   769  				case TypeInfo_Chan:
   770  					chan = (Hchan*)b;
   771  					chantype = (ChanType*)t;
   772  					chan_ret = nil;
   773  					pc = chanProg;
   774  					break;
   775  				default:
   776  					runtime·throw("scanblock: invalid type");
   777  					return;
   778  				}
   779  			} else {
   780  				pc = defaultProg;
   781  			}
   782  		} else {
   783  			pc = defaultProg;
   784  		}
   785  
   786  		if(IgnorePreciseGC)
   787  			pc = defaultProg;
   788  
   789  		pc++;
   790  		stack_top.b = (uintptr)b;
   791  
   792  		end_b = (uintptr)b + n - PtrSize;
   793  
   794  	for(;;) {
   795  		if(CollectStats)
   796  			runtime·xadd64(&gcstats.instr[pc[0]], 1);
   797  
   798  		obj = nil;
   799  		objti = 0;
   800  		switch(pc[0]) {
   801  		case GC_PTR:
   802  			obj = *(void**)(stack_top.b + pc[1]);
   803  			objti = pc[2];
   804  			pc += 3;
   805  			if(Debug)
   806  				checkptr(obj, objti);
   807  			break;
   808  
   809  		case GC_SLICE:
   810  			sliceptr = (Slice*)(stack_top.b + pc[1]);
   811  			if(sliceptr->cap != 0) {
   812  				obj = sliceptr->array;
   813  				// Can't use slice element type for scanning,
   814  				// because if it points to an array embedded
   815  				// in the beginning of a struct,
   816  				// we will scan the whole struct as the slice.
   817  				// So just obtain type info from heap.
   818  			}
   819  			pc += 3;
   820  			break;
   821  
   822  		case GC_APTR:
   823  			obj = *(void**)(stack_top.b + pc[1]);
   824  			pc += 2;
   825  			break;
   826  
   827  		case GC_STRING:
   828  			obj = *(void**)(stack_top.b + pc[1]);
   829  			markonly(obj);
   830  			pc += 2;
   831  			continue;
   832  
   833  		case GC_EFACE:
   834  			eface = (Eface*)(stack_top.b + pc[1]);
   835  			pc += 2;
   836  			if(eface->type == nil)
   837  				continue;
   838  
   839  			// eface->type
   840  			t = eface->type;
   841  			if((void*)t >= arena_start && (void*)t < arena_used) {
   842  				*ptrbufpos++ = (PtrTarget){t, 0};
   843  				if(ptrbufpos == ptrbuf_end)
   844  					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   845  			}
   846  
   847  			// eface->data
   848  			if(eface->data >= arena_start && eface->data < arena_used) {
   849  				if(t->size <= sizeof(void*)) {
   850  					if((t->kind & KindNoPointers))
   851  						continue;
   852  
   853  					obj = eface->data;
   854  					if((t->kind & ~KindNoPointers) == KindPtr)
   855  						objti = (uintptr)((PtrType*)t)->elem->gc;
   856  				} else {
   857  					obj = eface->data;
   858  					objti = (uintptr)t->gc;
   859  				}
   860  			}
   861  			break;
   862  
   863  		case GC_IFACE:
   864  			iface = (Iface*)(stack_top.b + pc[1]);
   865  			pc += 2;
   866  			if(iface->tab == nil)
   867  				continue;
   868  			
   869  			// iface->tab
   870  			if((void*)iface->tab >= arena_start && (void*)iface->tab < arena_used) {
   871  				*ptrbufpos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc};
   872  				if(ptrbufpos == ptrbuf_end)
   873  					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   874  			}
   875  
   876  			// iface->data
   877  			if(iface->data >= arena_start && iface->data < arena_used) {
   878  				t = iface->tab->type;
   879  				if(t->size <= sizeof(void*)) {
   880  					if((t->kind & KindNoPointers))
   881  						continue;
   882  
   883  					obj = iface->data;
   884  					if((t->kind & ~KindNoPointers) == KindPtr)
   885  						objti = (uintptr)((PtrType*)t)->elem->gc;
   886  				} else {
   887  					obj = iface->data;
   888  					objti = (uintptr)t->gc;
   889  				}
   890  			}
   891  			break;
   892  
   893  		case GC_DEFAULT_PTR:
   894  			while(stack_top.b <= end_b) {
   895  				obj = *(byte**)stack_top.b;
   896  				stack_top.b += PtrSize;
   897  				if(obj >= arena_start && obj < arena_used) {
   898  					*ptrbufpos++ = (PtrTarget){obj, 0};
   899  					if(ptrbufpos == ptrbuf_end)
   900  						flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   901  				}
   902  			}
   903  			goto next_block;
   904  
   905  		case GC_END:
   906  			if(--stack_top.count != 0) {
   907  				// Next iteration of a loop if possible.
   908  				stack_top.b += stack_top.elemsize;
   909  				if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) {
   910  					pc = stack_top.loop_or_ret;
   911  					continue;
   912  				}
   913  				i = stack_top.b;
   914  			} else {
   915  				// Stack pop if possible.
   916  				if(stack_ptr+1 < stack+nelem(stack)) {
   917  					pc = stack_top.loop_or_ret;
   918  					stack_top = *(++stack_ptr);
   919  					continue;
   920  				}
   921  				i = (uintptr)b + nominal_size;
   922  			}
   923  			if(!precise_type) {
   924  				// Quickly scan [b+i,b+n) for possible pointers.
   925  				for(; i<=end_b; i+=PtrSize) {
   926  					if(*(byte**)i != nil) {
   927  						// Found a value that may be a pointer.
   928  						// Do a rescan of the entire block.
   929  						enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj);
   930  						if(CollectStats) {
   931  							runtime·xadd64(&gcstats.rescan, 1);
   932  							runtime·xadd64(&gcstats.rescanbytes, n);
   933  						}
   934  						break;
   935  					}
   936  				}
   937  			}
   938  			goto next_block;
   939  
   940  		case GC_ARRAY_START:
   941  			i = stack_top.b + pc[1];
   942  			count = pc[2];
   943  			elemsize = pc[3];
   944  			pc += 4;
   945  
   946  			// Stack push.
   947  			*stack_ptr-- = stack_top;
   948  			stack_top = (Frame){count, elemsize, i, pc};
   949  			continue;
   950  
   951  		case GC_ARRAY_NEXT:
   952  			if(--stack_top.count != 0) {
   953  				stack_top.b += stack_top.elemsize;
   954  				pc = stack_top.loop_or_ret;
   955  			} else {
   956  				// Stack pop.
   957  				stack_top = *(++stack_ptr);
   958  				pc += 1;
   959  			}
   960  			continue;
   961  
   962  		case GC_CALL:
   963  			// Stack push.
   964  			*stack_ptr-- = stack_top;
   965  			stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
   966  			pc = (uintptr*)((byte*)pc + *(int32*)(pc+2));  // target of the CALL instruction
   967  			continue;
   968  
   969  		case GC_REGION:
   970  			obj = (void*)(stack_top.b + pc[1]);
   971  			size = pc[2];
   972  			objti = pc[3];
   973  			pc += 4;
   974  
   975  			*objbufpos++ = (Obj){obj, size, objti};
   976  			if(objbufpos == objbuf_end)
   977  				flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
   978  			continue;
   979  
   980  		case GC_CHAN_PTR:
   981  			chan = *(Hchan**)(stack_top.b + pc[1]);
   982  			if(chan == nil) {
   983  				pc += 3;
   984  				continue;
   985  			}
   986  			if(markonly(chan)) {
   987  				chantype = (ChanType*)pc[2];
   988  				if(!(chantype->elem->kind & KindNoPointers)) {
   989  					// Start chanProg.
   990  					chan_ret = pc+3;
   991  					pc = chanProg+1;
   992  					continue;
   993  				}
   994  			}
   995  			pc += 3;
   996  			continue;
   997  
   998  		case GC_CHAN:
   999  			// There are no heap pointers in struct Hchan,
  1000  			// so we can ignore the leading sizeof(Hchan) bytes.
  1001  			if(!(chantype->elem->kind & KindNoPointers)) {
  1002  				// Channel's buffer follows Hchan immediately in memory.
  1003  				// Size of buffer (cap(c)) is second int in the chan struct.
  1004  				chancap = ((uintgo*)chan)[1];
  1005  				if(chancap > 0) {
  1006  					// TODO(atom): split into two chunks so that only the
  1007  					// in-use part of the circular buffer is scanned.
  1008  					// (Channel routines zero the unused part, so the current
  1009  					// code does not lead to leaks, it's just a little inefficient.)
  1010  					*objbufpos++ = (Obj){(byte*)chan+runtime·Hchansize, chancap*chantype->elem->size,
  1011  						(uintptr)chantype->elem->gc | PRECISE | LOOP};
  1012  					if(objbufpos == objbuf_end)
  1013  						flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
  1014  				}
  1015  			}
  1016  			if(chan_ret == nil)
  1017  				goto next_block;
  1018  			pc = chan_ret;
  1019  			continue;
  1020  
  1021  		default:
  1022  			runtime·throw("scanblock: invalid GC instruction");
  1023  			return;
  1024  		}
  1025  
  1026  		if(obj >= arena_start && obj < arena_used) {
  1027  			*ptrbufpos++ = (PtrTarget){obj, objti};
  1028  			if(ptrbufpos == ptrbuf_end)
  1029  				flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
  1030  		}
  1031  	}
  1032  
  1033  	next_block:
  1034  		// Done scanning [b, b+n).  Prepare for the next iteration of
  1035  		// the loop by setting b, n, ti to the parameters for the next block.
  1036  
  1037  		if(nobj == 0) {
  1038  			flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
  1039  			flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
  1040  
  1041  			if(nobj == 0) {
  1042  				if(!keepworking) {
  1043  					if(wbuf)
  1044  						putempty(wbuf);
  1045  					goto endscan;
  1046  				}
  1047  				// Emptied our buffer: refill.
  1048  				wbuf = getfull(wbuf);
  1049  				if(wbuf == nil)
  1050  					goto endscan;
  1051  				nobj = wbuf->nobj;
  1052  				wp = wbuf->obj + wbuf->nobj;
  1053  			}
  1054  		}
  1055  
  1056  		// Fetch b from the work buffer.
  1057  		--wp;
  1058  		b = wp->p;
  1059  		n = wp->n;
  1060  		ti = wp->ti;
  1061  		nobj--;
  1062  	}
  1063  
  1064  endscan:;
  1065  }
  1066  
  1067  // debug_scanblock is the debug copy of scanblock.
  1068  // it is simpler, slower, single-threaded, recursive,
  1069  // and uses bitSpecial as the mark bit.
  1070  static void
  1071  debug_scanblock(byte *b, uintptr n)
  1072  {
  1073  	byte *obj, *p;
  1074  	void **vp;
  1075  	uintptr size, *bitp, bits, shift, i, xbits, off;
  1076  	MSpan *s;
  1077  
  1078  	if(!DebugMark)
  1079  		runtime·throw("debug_scanblock without DebugMark");
  1080  
  1081  	if((intptr)n < 0) {
  1082  		runtime·printf("debug_scanblock %p %D\n", b, (int64)n);
  1083  		runtime·throw("debug_scanblock");
  1084  	}
  1085  
  1086  	// Align b to a word boundary.
  1087  	off = (uintptr)b & (PtrSize-1);
  1088  	if(off != 0) {
  1089  		b += PtrSize - off;
  1090  		n -= PtrSize - off;
  1091  	}
  1092  
  1093  	vp = (void**)b;
  1094  	n /= PtrSize;
  1095  	for(i=0; i<n; i++) {
  1096  		obj = (byte*)vp[i];
  1097  
  1098  		// Words outside the arena cannot be pointers.
  1099  		if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used)
  1100  			continue;
  1101  
  1102  		// Round down to word boundary.
  1103  		obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
  1104  
  1105  		// Consult span table to find beginning.
  1106  		s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj);
  1107  		if(s == nil)
  1108  			continue;
  1109  
  1110  		p =  (byte*)((uintptr)s->start<<PageShift);
  1111  		size = s->elemsize;
  1112  		if(s->sizeclass == 0) {
  1113  			obj = p;
  1114  		} else {
  1115  			int32 i = ((byte*)obj - p)/size;
  1116  			obj = p+i*size;
  1117  		}
  1118  
  1119  		// Now that we know the object header, reload bits.
  1120  		off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
  1121  		bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  1122  		shift = off % wordsPerBitmapWord;
  1123  		xbits = *bitp;
  1124  		bits = xbits >> shift;
  1125  
  1126  		// Now we have bits, bitp, and shift correct for
  1127  		// obj pointing at the base of the object.
  1128  		// If not allocated or already marked, done.
  1129  		if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0)  // NOTE: bitSpecial not bitMarked
  1130  			continue;
  1131  		*bitp |= bitSpecial<<shift;
  1132  		if(!(bits & bitMarked))
  1133  			runtime·printf("found unmarked block %p in %p\n", obj, vp+i);
  1134  
  1135  		// If object has no pointers, don't need to scan further.
  1136  		if((bits & bitNoScan) != 0)
  1137  			continue;
  1138  
  1139  		debug_scanblock(obj, size);
  1140  	}
  1141  }
  1142  
  1143  // Append obj to the work buffer.
  1144  // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
  1145  static void
  1146  enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
  1147  {
  1148  	uintptr nobj, off;
  1149  	Obj *wp;
  1150  	Workbuf *wbuf;
  1151  
  1152  	if(Debug > 1)
  1153  		runtime·printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
  1154  
  1155  	// Align obj.b to a word boundary.
  1156  	off = (uintptr)obj.p & (PtrSize-1);
  1157  	if(off != 0) {
  1158  		obj.p += PtrSize - off;
  1159  		obj.n -= PtrSize - off;
  1160  		obj.ti = 0;
  1161  	}
  1162  
  1163  	if(obj.p == nil || obj.n == 0)
  1164  		return;
  1165  
  1166  	// Load work buffer state
  1167  	wp = *_wp;
  1168  	wbuf = *_wbuf;
  1169  	nobj = *_nobj;
  1170  
  1171  	// If another proc wants a pointer, give it some.
  1172  	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
  1173  		wbuf->nobj = nobj;
  1174  		wbuf = handoff(wbuf);
  1175  		nobj = wbuf->nobj;
  1176  		wp = wbuf->obj + nobj;
  1177  	}
  1178  
  1179  	// If buffer is full, get a new one.
  1180  	if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
  1181  		if(wbuf != nil)
  1182  			wbuf->nobj = nobj;
  1183  		wbuf = getempty(wbuf);
  1184  		wp = wbuf->obj;
  1185  		nobj = 0;
  1186  	}
  1187  
  1188  	*wp = obj;
  1189  	wp++;
  1190  	nobj++;
  1191  
  1192  	// Save work buffer state
  1193  	*_wp = wp;
  1194  	*_wbuf = wbuf;
  1195  	*_nobj = nobj;
  1196  }
  1197  
  1198  static void
  1199  markroot(ParFor *desc, uint32 i)
  1200  {
  1201  	Obj *wp;
  1202  	Workbuf *wbuf;
  1203  	uintptr nobj;
  1204  
  1205  	USED(&desc);
  1206  	wp = nil;
  1207  	wbuf = nil;
  1208  	nobj = 0;
  1209  	enqueue(work.roots[i], &wbuf, &wp, &nobj);
  1210  	scanblock(wbuf, wp, nobj, false);
  1211  }
  1212  
  1213  // Get an empty work buffer off the work.empty list,
  1214  // allocating new buffers as needed.
  1215  static Workbuf*
  1216  getempty(Workbuf *b)
  1217  {
  1218  	if(b != nil)
  1219  		runtime·lfstackpush(&work.full, &b->node);
  1220  	b = (Workbuf*)runtime·lfstackpop(&work.empty);
  1221  	if(b == nil) {
  1222  		// Need to allocate.
  1223  		runtime·lock(&work);
  1224  		if(work.nchunk < sizeof *b) {
  1225  			work.nchunk = 1<<20;
  1226  			work.chunk = runtime·SysAlloc(work.nchunk);
  1227  			if(work.chunk == nil)
  1228  				runtime·throw("runtime: cannot allocate memory");
  1229  		}
  1230  		b = (Workbuf*)work.chunk;
  1231  		work.chunk += sizeof *b;
  1232  		work.nchunk -= sizeof *b;
  1233  		runtime·unlock(&work);
  1234  	}
  1235  	b->nobj = 0;
  1236  	return b;
  1237  }
  1238  
  1239  static void
  1240  putempty(Workbuf *b)
  1241  {
  1242  	if(CollectStats)
  1243  		runtime·xadd64(&gcstats.putempty, 1);
  1244  
  1245  	runtime·lfstackpush(&work.empty, &b->node);
  1246  }
  1247  
  1248  // Get a full work buffer off the work.full list, or return nil.
  1249  static Workbuf*
  1250  getfull(Workbuf *b)
  1251  {
  1252  	int32 i;
  1253  
  1254  	if(CollectStats)
  1255  		runtime·xadd64(&gcstats.getfull, 1);
  1256  
  1257  	if(b != nil)
  1258  		runtime·lfstackpush(&work.empty, &b->node);
  1259  	b = (Workbuf*)runtime·lfstackpop(&work.full);
  1260  	if(b != nil || work.nproc == 1)
  1261  		return b;
  1262  
  1263  	runtime·xadd(&work.nwait, +1);
  1264  	for(i=0;; i++) {
  1265  		if(work.full != 0) {
  1266  			runtime·xadd(&work.nwait, -1);
  1267  			b = (Workbuf*)runtime·lfstackpop(&work.full);
  1268  			if(b != nil)
  1269  				return b;
  1270  			runtime·xadd(&work.nwait, +1);
  1271  		}
  1272  		if(work.nwait == work.nproc)
  1273  			return nil;
  1274  		if(i < 10) {
  1275  			m->gcstats.nprocyield++;
  1276  			runtime·procyield(20);
  1277  		} else if(i < 20) {
  1278  			m->gcstats.nosyield++;
  1279  			runtime·osyield();
  1280  		} else {
  1281  			m->gcstats.nsleep++;
  1282  			runtime·usleep(100);
  1283  		}
  1284  	}
  1285  }
  1286  
  1287  static Workbuf*
  1288  handoff(Workbuf *b)
  1289  {
  1290  	int32 n;
  1291  	Workbuf *b1;
  1292  
  1293  	// Make new buffer with half of b's pointers.
  1294  	b1 = getempty(nil);
  1295  	n = b->nobj/2;
  1296  	b->nobj -= n;
  1297  	b1->nobj = n;
  1298  	runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
  1299  	m->gcstats.nhandoff++;
  1300  	m->gcstats.nhandoffcnt += n;
  1301  
  1302  	// Put b on full list - let first half of b get stolen.
  1303  	runtime·lfstackpush(&work.full, &b->node);
  1304  	return b1;
  1305  }
  1306  
  1307  static void
  1308  addroot(Obj obj)
  1309  {
  1310  	uint32 cap;
  1311  	Obj *new;
  1312  
  1313  	if(work.nroot >= work.rootcap) {
  1314  		cap = PageSize/sizeof(Obj);
  1315  		if(cap < 2*work.rootcap)
  1316  			cap = 2*work.rootcap;
  1317  		new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj));
  1318  		if(new == nil)
  1319  			runtime·throw("runtime: cannot allocate memory");
  1320  		if(work.roots != nil) {
  1321  			runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj));
  1322  			runtime·SysFree(work.roots, work.rootcap*sizeof(Obj));
  1323  		}
  1324  		work.roots = new;
  1325  		work.rootcap = cap;
  1326  	}
  1327  	work.roots[work.nroot] = obj;
  1328  	work.nroot++;
  1329  }
  1330  
  1331  extern byte pclntab[]; // base for f->ptrsoff
  1332  
  1333  typedef struct BitVector BitVector;
  1334  struct BitVector
  1335  {
  1336  	int32 n;
  1337  	uint32 data[];
  1338  };
  1339  
  1340  // Scans an interface data value when the interface type indicates
  1341  // that it is a pointer.
  1342  static void
  1343  scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue)
  1344  {
  1345  	Itab *tab;
  1346  	Type *type;
  1347  
  1348  	if(afterprologue) {
  1349  		if(bits == BitsIface) {
  1350  			tab = *(Itab**)scanp;
  1351  			if(tab->type->size <= sizeof(void*) && (tab->type->kind & KindNoPointers))
  1352  				return;
  1353  		} else { // bits == BitsEface
  1354  			type = *(Type**)scanp;
  1355  			if(type->size <= sizeof(void*) && (type->kind & KindNoPointers))
  1356  				return;
  1357  		}
  1358  	}
  1359  	addroot((Obj){scanp+PtrSize, PtrSize, 0});
  1360  }
  1361  
  1362  // Starting from scanp, scans words corresponding to set bits.
  1363  static void
  1364  scanbitvector(byte *scanp, BitVector *bv, bool afterprologue)
  1365  {
  1366  	uintptr word, bits;
  1367  	uint32 *wordp;
  1368  	int32 i, remptrs;
  1369  
  1370  	wordp = bv->data;
  1371  	for(remptrs = bv->n; remptrs > 0; remptrs -= 32) {
  1372  		word = *wordp++;
  1373  		if(remptrs < 32)
  1374  			i = remptrs;
  1375  		else
  1376  			i = 32;
  1377  		i /= BitsPerPointer;
  1378  		for(; i > 0; i--) {
  1379  			bits = word & 3;
  1380  			if(bits != BitsNoPointer && *(void**)scanp != nil)
  1381  				if(bits == BitsPointer)
  1382  					addroot((Obj){scanp, PtrSize, 0});
  1383  				else
  1384  					scaninterfacedata(bits, scanp, afterprologue);
  1385  			word >>= BitsPerPointer;
  1386  			scanp += PtrSize;
  1387  		}
  1388  	}
  1389  }
  1390  
  1391  // Scan a stack frame: local variables and function arguments/results.
  1392  static void
  1393  addframeroots(Stkframe *frame, void*)
  1394  {
  1395  	Func *f;
  1396  	BitVector *args, *locals;
  1397  	uintptr size;
  1398  	bool afterprologue;
  1399  
  1400  	f = frame->fn;
  1401  
  1402  	// Scan local variables if stack frame has been allocated.
  1403  	// Use pointer information if known.
  1404  	afterprologue = (frame->varp > (byte*)frame->sp);
  1405  	if(afterprologue) {
  1406  		locals = runtime·funcdata(f, FUNCDATA_GCLocals);
  1407  		if(locals == nil) {
  1408  			// No locals information, scan everything.
  1409  			size = frame->varp - (byte*)frame->sp;
  1410  			addroot((Obj){frame->varp - size, size, 0});
  1411  		} else if(locals->n < 0) {
  1412  			// Locals size information, scan just the
  1413  			// locals.
  1414  			size = -locals->n;
  1415  			addroot((Obj){frame->varp - size, size, 0});
  1416  		} else if(locals->n > 0) {
  1417  			// Locals bitmap information, scan just the
  1418  			// pointers in locals.
  1419  			size = (locals->n*PtrSize) / BitsPerPointer;
  1420  			scanbitvector(frame->varp - size, locals, afterprologue);
  1421  		}
  1422  	}
  1423  
  1424  	// Scan arguments.
  1425  	// Use pointer information if known.
  1426  	args = runtime·funcdata(f, FUNCDATA_GCArgs);
  1427  	if(args != nil && args->n > 0)
  1428  		scanbitvector(frame->argp, args, false);
  1429  	else
  1430  		addroot((Obj){frame->argp, frame->arglen, 0});
  1431  }
  1432  
  1433  static void
  1434  addstackroots(G *gp)
  1435  {
  1436  	M *mp;
  1437  	int32 n;
  1438  	Stktop *stk;
  1439  	uintptr sp, guard, pc, lr;
  1440  	void *base;
  1441  	uintptr size;
  1442  
  1443  	stk = (Stktop*)gp->stackbase;
  1444  	guard = gp->stackguard;
  1445  
  1446  	if(gp == g)
  1447  		runtime·throw("can't scan our own stack");
  1448  	if((mp = gp->m) != nil && mp->helpgc)
  1449  		runtime·throw("can't scan gchelper stack");
  1450  	if(gp->syscallstack != (uintptr)nil) {
  1451  		// Scanning another goroutine that is about to enter or might
  1452  		// have just exited a system call. It may be executing code such
  1453  		// as schedlock and may have needed to start a new stack segment.
  1454  		// Use the stack segment and stack pointer at the time of
  1455  		// the system call instead, since that won't change underfoot.
  1456  		sp = gp->syscallsp;
  1457  		pc = gp->syscallpc;
  1458  		lr = 0;
  1459  		stk = (Stktop*)gp->syscallstack;
  1460  		guard = gp->syscallguard;
  1461  	} else {
  1462  		// Scanning another goroutine's stack.
  1463  		// The goroutine is usually asleep (the world is stopped).
  1464  		sp = gp->sched.sp;
  1465  		pc = gp->sched.pc;
  1466  		lr = gp->sched.lr;
  1467  
  1468  		// For function about to start, context argument is a root too.
  1469  		if(gp->sched.ctxt != 0 && runtime·mlookup(gp->sched.ctxt, &base, &size, nil))
  1470  			addroot((Obj){base, size, 0});
  1471  	}
  1472  	if(ScanStackByFrames) {
  1473  		USED(stk);
  1474  		USED(guard);
  1475  		runtime·gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, addframeroots, nil, false);
  1476  	} else {
  1477  		USED(pc);
  1478  		n = 0;
  1479  		while(stk) {
  1480  			if(sp < guard-StackGuard || (uintptr)stk < sp) {
  1481  				runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
  1482  				runtime·throw("scanstack");
  1483  			}
  1484  			addroot((Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP});
  1485  			sp = stk->gobuf.sp;
  1486  			guard = stk->stackguard;
  1487  			stk = (Stktop*)stk->stackbase;
  1488  			n++;
  1489  		}
  1490  	}
  1491  }
  1492  
  1493  static void
  1494  addfinroots(void *v)
  1495  {
  1496  	uintptr size;
  1497  	void *base;
  1498  
  1499  	size = 0;
  1500  	if(!runtime·mlookup(v, &base, &size, nil) || !runtime·blockspecial(base))
  1501  		runtime·throw("mark - finalizer inconsistency");
  1502  
  1503  	// do not mark the finalizer block itself.  just mark the things it points at.
  1504  	addroot((Obj){base, size, 0});
  1505  }
  1506  
  1507  static void
  1508  addroots(void)
  1509  {
  1510  	G *gp;
  1511  	FinBlock *fb;
  1512  	MSpan *s, **allspans;
  1513  	uint32 spanidx;
  1514  
  1515  	work.nroot = 0;
  1516  
  1517  	// data & bss
  1518  	// TODO(atom): load balancing
  1519  	addroot((Obj){data, edata - data, (uintptr)gcdata});
  1520  	addroot((Obj){bss, ebss - bss, (uintptr)gcbss});
  1521  
  1522  	// MSpan.types
  1523  	allspans = runtime·mheap.allspans;
  1524  	for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
  1525  		s = allspans[spanidx];
  1526  		if(s->state == MSpanInUse) {
  1527  			// The garbage collector ignores type pointers stored in MSpan.types:
  1528  			//  - Compiler-generated types are stored outside of heap.
  1529  			//  - The reflect package has runtime-generated types cached in its data structures.
  1530  			//    The garbage collector relies on finding the references via that cache.
  1531  			switch(s->types.compression) {
  1532  			case MTypes_Empty:
  1533  			case MTypes_Single:
  1534  				break;
  1535  			case MTypes_Words:
  1536  			case MTypes_Bytes:
  1537  				markonly((byte*)s->types.data);
  1538  				break;
  1539  			}
  1540  		}
  1541  	}
  1542  
  1543  	// stacks
  1544  	for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
  1545  		switch(gp->status){
  1546  		default:
  1547  			runtime·printf("unexpected G.status %d\n", gp->status);
  1548  			runtime·throw("mark - bad status");
  1549  		case Gdead:
  1550  			break;
  1551  		case Grunning:
  1552  			runtime·throw("mark - world not stopped");
  1553  		case Grunnable:
  1554  		case Gsyscall:
  1555  		case Gwaiting:
  1556  			addstackroots(gp);
  1557  			break;
  1558  		}
  1559  	}
  1560  
  1561  	runtime·walkfintab(addfinroots);
  1562  
  1563  	for(fb=allfin; fb; fb=fb->alllink)
  1564  		addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
  1565  }
  1566  
  1567  static bool
  1568  handlespecial(byte *p, uintptr size)
  1569  {
  1570  	FuncVal *fn;
  1571  	uintptr nret;
  1572  	PtrType *ot;
  1573  	Type *fint;
  1574  	FinBlock *block;
  1575  	Finalizer *f;
  1576  
  1577  	if(!runtime·getfinalizer(p, true, &fn, &nret, &fint, &ot)) {
  1578  		runtime·setblockspecial(p, false);
  1579  		runtime·MProf_Free(p, size);
  1580  		return false;
  1581  	}
  1582  
  1583  	runtime·lock(&finlock);
  1584  	if(finq == nil || finq->cnt == finq->cap) {
  1585  		if(finc == nil) {
  1586  			finc = runtime·persistentalloc(PageSize, 0);
  1587  			finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
  1588  			finc->alllink = allfin;
  1589  			allfin = finc;
  1590  		}
  1591  		block = finc;
  1592  		finc = block->next;
  1593  		block->next = finq;
  1594  		finq = block;
  1595  	}
  1596  	f = &finq->fin[finq->cnt];
  1597  	finq->cnt++;
  1598  	f->fn = fn;
  1599  	f->nret = nret;
  1600  	f->fint = fint;
  1601  	f->ot = ot;
  1602  	f->arg = p;
  1603  	runtime·unlock(&finlock);
  1604  	return true;
  1605  }
  1606  
  1607  // Sweep frees or collects finalizers for blocks not marked in the mark phase.
  1608  // It clears the mark bits in preparation for the next GC round.
  1609  static void
  1610  sweepspan(ParFor *desc, uint32 idx)
  1611  {
  1612  	int32 cl, n, npages;
  1613  	uintptr size;
  1614  	byte *p;
  1615  	MCache *c;
  1616  	byte *arena_start;
  1617  	MLink head, *end;
  1618  	int32 nfree;
  1619  	byte *type_data;
  1620  	byte compression;
  1621  	uintptr type_data_inc;
  1622  	MSpan *s;
  1623  
  1624  	USED(&desc);
  1625  	s = runtime·mheap.allspans[idx];
  1626  	if(s->state != MSpanInUse)
  1627  		return;
  1628  	arena_start = runtime·mheap.arena_start;
  1629  	p = (byte*)(s->start << PageShift);
  1630  	cl = s->sizeclass;
  1631  	size = s->elemsize;
  1632  	if(cl == 0) {
  1633  		n = 1;
  1634  	} else {
  1635  		// Chunk full of small blocks.
  1636  		npages = runtime·class_to_allocnpages[cl];
  1637  		n = (npages << PageShift) / size;
  1638  	}
  1639  	nfree = 0;
  1640  	end = &head;
  1641  	c = m->mcache;
  1642  	
  1643  	type_data = (byte*)s->types.data;
  1644  	type_data_inc = sizeof(uintptr);
  1645  	compression = s->types.compression;
  1646  	switch(compression) {
  1647  	case MTypes_Bytes:
  1648  		type_data += 8*sizeof(uintptr);
  1649  		type_data_inc = 1;
  1650  		break;
  1651  	}
  1652  
  1653  	// Sweep through n objects of given size starting at p.
  1654  	// This thread owns the span now, so it can manipulate
  1655  	// the block bitmap without atomic operations.
  1656  	for(; n > 0; n--, p += size, type_data+=type_data_inc) {
  1657  		uintptr off, *bitp, shift, bits;
  1658  
  1659  		off = (uintptr*)p - (uintptr*)arena_start;
  1660  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1661  		shift = off % wordsPerBitmapWord;
  1662  		bits = *bitp>>shift;
  1663  
  1664  		if((bits & bitAllocated) == 0)
  1665  			continue;
  1666  
  1667  		if((bits & bitMarked) != 0) {
  1668  			if(DebugMark) {
  1669  				if(!(bits & bitSpecial))
  1670  					runtime·printf("found spurious mark on %p\n", p);
  1671  				*bitp &= ~(bitSpecial<<shift);
  1672  			}
  1673  			*bitp &= ~(bitMarked<<shift);
  1674  			continue;
  1675  		}
  1676  
  1677  		// Special means it has a finalizer or is being profiled.
  1678  		// In DebugMark mode, the bit has been coopted so
  1679  		// we have to assume all blocks are special.
  1680  		if(DebugMark || (bits & bitSpecial) != 0) {
  1681  			if(handlespecial(p, size))
  1682  				continue;
  1683  		}
  1684  
  1685  		// Mark freed; restore block boundary bit.
  1686  		*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
  1687  
  1688  		if(cl == 0) {
  1689  			// Free large span.
  1690  			runtime·unmarkspan(p, 1<<PageShift);
  1691  			*(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll;	// needs zeroing
  1692  			runtime·MHeap_Free(&runtime·mheap, s, 1);
  1693  			c->local_nlargefree++;
  1694  			c->local_largefree += size;
  1695  		} else {
  1696  			// Free small object.
  1697  			switch(compression) {
  1698  			case MTypes_Words:
  1699  				*(uintptr*)type_data = 0;
  1700  				break;
  1701  			case MTypes_Bytes:
  1702  				*(byte*)type_data = 0;
  1703  				break;
  1704  			}
  1705  			if(size > sizeof(uintptr))
  1706  				((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
  1707  			
  1708  			end->next = (MLink*)p;
  1709  			end = (MLink*)p;
  1710  			nfree++;
  1711  		}
  1712  	}
  1713  
  1714  	if(nfree) {
  1715  		c->local_nsmallfree[cl] += nfree;
  1716  		c->local_cachealloc -= nfree * size;
  1717  		runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end);
  1718  	}
  1719  }
  1720  
  1721  static void
  1722  dumpspan(uint32 idx)
  1723  {
  1724  	int32 sizeclass, n, npages, i, column;
  1725  	uintptr size;
  1726  	byte *p;
  1727  	byte *arena_start;
  1728  	MSpan *s;
  1729  	bool allocated, special;
  1730  
  1731  	s = runtime·mheap.allspans[idx];
  1732  	if(s->state != MSpanInUse)
  1733  		return;
  1734  	arena_start = runtime·mheap.arena_start;
  1735  	p = (byte*)(s->start << PageShift);
  1736  	sizeclass = s->sizeclass;
  1737  	size = s->elemsize;
  1738  	if(sizeclass == 0) {
  1739  		n = 1;
  1740  	} else {
  1741  		npages = runtime·class_to_allocnpages[sizeclass];
  1742  		n = (npages << PageShift) / size;
  1743  	}
  1744  	
  1745  	runtime·printf("%p .. %p:\n", p, p+n*size);
  1746  	column = 0;
  1747  	for(; n>0; n--, p+=size) {
  1748  		uintptr off, *bitp, shift, bits;
  1749  
  1750  		off = (uintptr*)p - (uintptr*)arena_start;
  1751  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1752  		shift = off % wordsPerBitmapWord;
  1753  		bits = *bitp>>shift;
  1754  
  1755  		allocated = ((bits & bitAllocated) != 0);
  1756  		special = ((bits & bitSpecial) != 0);
  1757  
  1758  		for(i=0; i<size; i+=sizeof(void*)) {
  1759  			if(column == 0) {
  1760  				runtime·printf("\t");
  1761  			}
  1762  			if(i == 0) {
  1763  				runtime·printf(allocated ? "(" : "[");
  1764  				runtime·printf(special ? "@" : "");
  1765  				runtime·printf("%p: ", p+i);
  1766  			} else {
  1767  				runtime·printf(" ");
  1768  			}
  1769  
  1770  			runtime·printf("%p", *(void**)(p+i));
  1771  
  1772  			if(i+sizeof(void*) >= size) {
  1773  				runtime·printf(allocated ? ") " : "] ");
  1774  			}
  1775  
  1776  			column++;
  1777  			if(column == 8) {
  1778  				runtime·printf("\n");
  1779  				column = 0;
  1780  			}
  1781  		}
  1782  	}
  1783  	runtime·printf("\n");
  1784  }
  1785  
  1786  // A debugging function to dump the contents of memory
  1787  void
  1788  runtime·memorydump(void)
  1789  {
  1790  	uint32 spanidx;
  1791  
  1792  	for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
  1793  		dumpspan(spanidx);
  1794  	}
  1795  }
  1796  
  1797  void
  1798  runtime·gchelper(void)
  1799  {
  1800  	gchelperstart();
  1801  
  1802  	// parallel mark for over gc roots
  1803  	runtime·parfordo(work.markfor);
  1804  
  1805  	// help other threads scan secondary blocks
  1806  	scanblock(nil, nil, 0, true);
  1807  
  1808  	if(DebugMark) {
  1809  		// wait while the main thread executes mark(debug_scanblock)
  1810  		while(runtime·atomicload(&work.debugmarkdone) == 0)
  1811  			runtime·usleep(10);
  1812  	}
  1813  
  1814  	runtime·parfordo(work.sweepfor);
  1815  	bufferList[m->helpgc].busy = 0;
  1816  	if(runtime·xadd(&work.ndone, +1) == work.nproc-1)
  1817  		runtime·notewakeup(&work.alldone);
  1818  }
  1819  
  1820  #define GcpercentUnknown (-2)
  1821  
  1822  // Initialized from $GOGC.  GOGC=off means no gc.
  1823  //
  1824  // Next gc is after we've allocated an extra amount of
  1825  // memory proportional to the amount already in use.
  1826  // If gcpercent=100 and we're using 4M, we'll gc again
  1827  // when we get to 8M.  This keeps the gc cost in linear
  1828  // proportion to the allocation cost.  Adjusting gcpercent
  1829  // just changes the linear constant (and also the amount of
  1830  // extra memory used).
  1831  static int32 gcpercent = GcpercentUnknown;
  1832  
  1833  static void
  1834  cachestats(void)
  1835  {
  1836  	MCache *c;
  1837  	P *p, **pp;
  1838  
  1839  	for(pp=runtime·allp; p=*pp; pp++) {
  1840  		c = p->mcache;
  1841  		if(c==nil)
  1842  			continue;
  1843  		runtime·purgecachedstats(c);
  1844  	}
  1845  }
  1846  
  1847  static void
  1848  updatememstats(GCStats *stats)
  1849  {
  1850  	M *mp;
  1851  	MSpan *s;
  1852  	MCache *c;
  1853  	P *p, **pp;
  1854  	int32 i;
  1855  	uint64 stacks_inuse, smallfree;
  1856  	uint64 *src, *dst;
  1857  
  1858  	if(stats)
  1859  		runtime·memclr((byte*)stats, sizeof(*stats));
  1860  	stacks_inuse = 0;
  1861  	for(mp=runtime·allm; mp; mp=mp->alllink) {
  1862  		stacks_inuse += mp->stackinuse*FixedStack;
  1863  		if(stats) {
  1864  			src = (uint64*)&mp->gcstats;
  1865  			dst = (uint64*)stats;
  1866  			for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
  1867  				dst[i] += src[i];
  1868  			runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
  1869  		}
  1870  	}
  1871  	mstats.stacks_inuse = stacks_inuse;
  1872  
  1873  	// Calculate memory allocator stats.
  1874  	// During program execution we only count number of frees and amount of freed memory.
  1875  	// Current number of alive object in the heap and amount of alive heap memory
  1876  	// are calculated by scanning all spans.
  1877  	// Total number of mallocs is calculated as number of frees plus number of alive objects.
  1878  	// Similarly, total amount of allocated memory is calculated as amount of freed memory
  1879  	// plus amount of alive heap memory.
  1880  	mstats.alloc = 0;
  1881  	mstats.total_alloc = 0;
  1882  	mstats.nmalloc = 0;
  1883  	mstats.nfree = 0;
  1884  	for(i = 0; i < nelem(mstats.by_size); i++) {
  1885  		mstats.by_size[i].nmalloc = 0;
  1886  		mstats.by_size[i].nfree = 0;
  1887  	}
  1888  
  1889  	// Flush MCache's to MCentral.
  1890  	for(pp=runtime·allp; p=*pp; pp++) {
  1891  		c = p->mcache;
  1892  		if(c==nil)
  1893  			continue;
  1894  		runtime·MCache_ReleaseAll(c);
  1895  	}
  1896  
  1897  	// Aggregate local stats.
  1898  	cachestats();
  1899  
  1900  	// Scan all spans and count number of alive objects.
  1901  	for(i = 0; i < runtime·mheap.nspan; i++) {
  1902  		s = runtime·mheap.allspans[i];
  1903  		if(s->state != MSpanInUse)
  1904  			continue;
  1905  		if(s->sizeclass == 0) {
  1906  			mstats.nmalloc++;
  1907  			mstats.alloc += s->elemsize;
  1908  		} else {
  1909  			mstats.nmalloc += s->ref;
  1910  			mstats.by_size[s->sizeclass].nmalloc += s->ref;
  1911  			mstats.alloc += s->ref*s->elemsize;
  1912  		}
  1913  	}
  1914  
  1915  	// Aggregate by size class.
  1916  	smallfree = 0;
  1917  	mstats.nfree = runtime·mheap.nlargefree;
  1918  	for(i = 0; i < nelem(mstats.by_size); i++) {
  1919  		mstats.nfree += runtime·mheap.nsmallfree[i];
  1920  		mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i];
  1921  		mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i];
  1922  		smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i];
  1923  	}
  1924  	mstats.nmalloc += mstats.nfree;
  1925  
  1926  	// Calculate derived stats.
  1927  	mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree;
  1928  	mstats.heap_alloc = mstats.alloc;
  1929  	mstats.heap_objects = mstats.nmalloc - mstats.nfree;
  1930  }
  1931  
  1932  // Structure of arguments passed to function gc().
  1933  // This allows the arguments to be passed via runtime·mcall.
  1934  struct gc_args
  1935  {
  1936  	int64 start_time; // start time of GC in ns (just before stoptheworld)
  1937  };
  1938  
  1939  static void gc(struct gc_args *args);
  1940  static void mgc(G *gp);
  1941  
  1942  static int32
  1943  readgogc(void)
  1944  {
  1945  	byte *p;
  1946  
  1947  	p = runtime·getenv("GOGC");
  1948  	if(p == nil || p[0] == '\0')
  1949  		return 100;
  1950  	if(runtime·strcmp(p, (byte*)"off") == 0)
  1951  		return -1;
  1952  	return runtime·atoi(p);
  1953  }
  1954  
  1955  static FuncVal runfinqv = {runfinq};
  1956  
  1957  void
  1958  runtime·gc(int32 force)
  1959  {
  1960  	struct gc_args a;
  1961  	int32 i;
  1962  
  1963  	// The atomic operations are not atomic if the uint64s
  1964  	// are not aligned on uint64 boundaries. This has been
  1965  	// a problem in the past.
  1966  	if((((uintptr)&work.empty) & 7) != 0)
  1967  		runtime·throw("runtime: gc work buffer is misaligned");
  1968  	if((((uintptr)&work.full) & 7) != 0)
  1969  		runtime·throw("runtime: gc work buffer is misaligned");
  1970  
  1971  	// The gc is turned off (via enablegc) until
  1972  	// the bootstrap has completed.
  1973  	// Also, malloc gets called in the guts
  1974  	// of a number of libraries that might be
  1975  	// holding locks.  To avoid priority inversion
  1976  	// problems, don't bother trying to run gc
  1977  	// while holding a lock.  The next mallocgc
  1978  	// without a lock will do the gc instead.
  1979  	if(!mstats.enablegc || g == m->g0 || m->locks > 0 || runtime·panicking)
  1980  		return;
  1981  
  1982  	if(gcpercent == GcpercentUnknown) {	// first time through
  1983  		runtime·lock(&runtime·mheap);
  1984  		if(gcpercent == GcpercentUnknown)
  1985  			gcpercent = readgogc();
  1986  		runtime·unlock(&runtime·mheap);
  1987  	}
  1988  	if(gcpercent < 0)
  1989  		return;
  1990  
  1991  	runtime·semacquire(&runtime·worldsema, false);
  1992  	if(!force && mstats.heap_alloc < mstats.next_gc) {
  1993  		// typically threads which lost the race to grab
  1994  		// worldsema exit here when gc is done.
  1995  		runtime·semrelease(&runtime·worldsema);
  1996  		return;
  1997  	}
  1998  
  1999  	// Ok, we're doing it!  Stop everybody else
  2000  	a.start_time = runtime·nanotime();
  2001  	m->gcing = 1;
  2002  	runtime·stoptheworld();
  2003  	
  2004  	// Run gc on the g0 stack.  We do this so that the g stack
  2005  	// we're currently running on will no longer change.  Cuts
  2006  	// the root set down a bit (g0 stacks are not scanned, and
  2007  	// we don't need to scan gc's internal state).  Also an
  2008  	// enabler for copyable stacks.
  2009  	for(i = 0; i < (runtime·debug.gctrace > 1 ? 2 : 1); i++) {
  2010  		// switch to g0, call gc(&a), then switch back
  2011  		g->param = &a;
  2012  		g->status = Gwaiting;
  2013  		g->waitreason = "garbage collection";
  2014  		runtime·mcall(mgc);
  2015  		// record a new start time in case we're going around again
  2016  		a.start_time = runtime·nanotime();
  2017  	}
  2018  
  2019  	// all done
  2020  	m->gcing = 0;
  2021  	m->locks++;
  2022  	runtime·semrelease(&runtime·worldsema);
  2023  	runtime·starttheworld();
  2024  	m->locks--;
  2025  
  2026  	// now that gc is done, kick off finalizer thread if needed
  2027  	if(finq != nil) {
  2028  		runtime·lock(&finlock);
  2029  		// kick off or wake up goroutine to run queued finalizers
  2030  		if(fing == nil)
  2031  			fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc);
  2032  		else if(fingwait) {
  2033  			fingwait = 0;
  2034  			runtime·ready(fing);
  2035  		}
  2036  		runtime·unlock(&finlock);
  2037  	}
  2038  	// give the queued finalizers, if any, a chance to run
  2039  	runtime·gosched();
  2040  }
  2041  
  2042  static void
  2043  mgc(G *gp)
  2044  {
  2045  	gc(gp->param);
  2046  	gp->param = nil;
  2047  	gp->status = Grunning;
  2048  	runtime·gogo(&gp->sched);
  2049  }
  2050  
  2051  static void
  2052  gc(struct gc_args *args)
  2053  {
  2054  	int64 t0, t1, t2, t3, t4;
  2055  	uint64 heap0, heap1, obj0, obj1, ninstr;
  2056  	GCStats stats;
  2057  	M *mp;
  2058  	uint32 i;
  2059  	Eface eface;
  2060  
  2061  	t0 = args->start_time;
  2062  
  2063  	if(CollectStats)
  2064  		runtime·memclr((byte*)&gcstats, sizeof(gcstats));
  2065  
  2066  	for(mp=runtime·allm; mp; mp=mp->alllink)
  2067  		runtime·settype_flush(mp);
  2068  
  2069  	heap0 = 0;
  2070  	obj0 = 0;
  2071  	if(runtime·debug.gctrace) {
  2072  		updatememstats(nil);
  2073  		heap0 = mstats.heap_alloc;
  2074  		obj0 = mstats.nmalloc - mstats.nfree;
  2075  	}
  2076  
  2077  	m->locks++;	// disable gc during mallocs in parforalloc
  2078  	if(work.markfor == nil)
  2079  		work.markfor = runtime·parforalloc(MaxGcproc);
  2080  	if(work.sweepfor == nil)
  2081  		work.sweepfor = runtime·parforalloc(MaxGcproc);
  2082  	m->locks--;
  2083  
  2084  	if(itabtype == nil) {
  2085  		// get C pointer to the Go type "itab"
  2086  		runtime·gc_itab_ptr(&eface);
  2087  		itabtype = ((PtrType*)eface.type)->elem;
  2088  	}
  2089  
  2090  	work.nwait = 0;
  2091  	work.ndone = 0;
  2092  	work.debugmarkdone = 0;
  2093  	work.nproc = runtime·gcprocs();
  2094  	addroots();
  2095  	runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
  2096  	runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap.nspan, nil, true, sweepspan);
  2097  	if(work.nproc > 1) {
  2098  		runtime·noteclear(&work.alldone);
  2099  		runtime·helpgc(work.nproc);
  2100  	}
  2101  
  2102  	t1 = runtime·nanotime();
  2103  
  2104  	gchelperstart();
  2105  	runtime·parfordo(work.markfor);
  2106  	scanblock(nil, nil, 0, true);
  2107  
  2108  	if(DebugMark) {
  2109  		for(i=0; i<work.nroot; i++)
  2110  			debug_scanblock(work.roots[i].p, work.roots[i].n);
  2111  		runtime·atomicstore(&work.debugmarkdone, 1);
  2112  	}
  2113  	t2 = runtime·nanotime();
  2114  
  2115  	runtime·parfordo(work.sweepfor);
  2116  	bufferList[m->helpgc].busy = 0;
  2117  	t3 = runtime·nanotime();
  2118  
  2119  	if(work.nproc > 1)
  2120  		runtime·notesleep(&work.alldone);
  2121  
  2122  	cachestats();
  2123  	mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
  2124  
  2125  	t4 = runtime·nanotime();
  2126  	mstats.last_gc = t4;
  2127  	mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
  2128  	mstats.pause_total_ns += t4 - t0;
  2129  	mstats.numgc++;
  2130  	if(mstats.debuggc)
  2131  		runtime·printf("pause %D\n", t4-t0);
  2132  
  2133  	if(runtime·debug.gctrace) {
  2134  		updatememstats(&stats);
  2135  		heap1 = mstats.heap_alloc;
  2136  		obj1 = mstats.nmalloc - mstats.nfree;
  2137  
  2138  		stats.nprocyield += work.sweepfor->nprocyield;
  2139  		stats.nosyield += work.sweepfor->nosyield;
  2140  		stats.nsleep += work.sweepfor->nsleep;
  2141  
  2142  		runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
  2143  				" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
  2144  			mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
  2145  			heap0>>20, heap1>>20, obj0, obj1,
  2146  			mstats.nmalloc, mstats.nfree,
  2147  			stats.nhandoff, stats.nhandoffcnt,
  2148  			work.sweepfor->nsteal, work.sweepfor->nstealcnt,
  2149  			stats.nprocyield, stats.nosyield, stats.nsleep);
  2150  		if(CollectStats) {
  2151  			runtime·printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
  2152  				gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
  2153  			if(gcstats.ptr.cnt != 0)
  2154  				runtime·printf("avg ptrbufsize: %D (%D/%D)\n",
  2155  					gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
  2156  			if(gcstats.obj.cnt != 0)
  2157  				runtime·printf("avg nobj: %D (%D/%D)\n",
  2158  					gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
  2159  			runtime·printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
  2160  
  2161  			runtime·printf("instruction counts:\n");
  2162  			ninstr = 0;
  2163  			for(i=0; i<nelem(gcstats.instr); i++) {
  2164  				runtime·printf("\t%d:\t%D\n", i, gcstats.instr[i]);
  2165  				ninstr += gcstats.instr[i];
  2166  			}
  2167  			runtime·printf("\ttotal:\t%D\n", ninstr);
  2168  
  2169  			runtime·printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
  2170  
  2171  			runtime·printf("markonly base lookup: bit %D word %D span %D\n", gcstats.markonly.foundbit, gcstats.markonly.foundword, gcstats.markonly.foundspan);
  2172  			runtime·printf("flushptrbuf base lookup: bit %D word %D span %D\n", gcstats.flushptrbuf.foundbit, gcstats.flushptrbuf.foundword, gcstats.flushptrbuf.foundspan);
  2173  		}
  2174  	}
  2175  
  2176  	runtime·MProf_GC();
  2177  }
  2178  
  2179  void
  2180  runtime·ReadMemStats(MStats *stats)
  2181  {
  2182  	// Have to acquire worldsema to stop the world,
  2183  	// because stoptheworld can only be used by
  2184  	// one goroutine at a time, and there might be
  2185  	// a pending garbage collection already calling it.
  2186  	runtime·semacquire(&runtime·worldsema, false);
  2187  	m->gcing = 1;
  2188  	runtime·stoptheworld();
  2189  	updatememstats(nil);
  2190  	*stats = mstats;
  2191  	m->gcing = 0;
  2192  	m->locks++;
  2193  	runtime·semrelease(&runtime·worldsema);
  2194  	runtime·starttheworld();
  2195  	m->locks--;
  2196  }
  2197  
  2198  void
  2199  runtime∕debug·readGCStats(Slice *pauses)
  2200  {
  2201  	uint64 *p;
  2202  	uint32 i, n;
  2203  
  2204  	// Calling code in runtime/debug should make the slice large enough.
  2205  	if(pauses->cap < nelem(mstats.pause_ns)+3)
  2206  		runtime·throw("runtime: short slice passed to readGCStats");
  2207  
  2208  	// Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
  2209  	p = (uint64*)pauses->array;
  2210  	runtime·lock(&runtime·mheap);
  2211  	n = mstats.numgc;
  2212  	if(n > nelem(mstats.pause_ns))
  2213  		n = nelem(mstats.pause_ns);
  2214  	
  2215  	// The pause buffer is circular. The most recent pause is at
  2216  	// pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
  2217  	// from there to go back farther in time. We deliver the times
  2218  	// most recent first (in p[0]).
  2219  	for(i=0; i<n; i++)
  2220  		p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
  2221  
  2222  	p[n] = mstats.last_gc;
  2223  	p[n+1] = mstats.numgc;
  2224  	p[n+2] = mstats.pause_total_ns;	
  2225  	runtime·unlock(&runtime·mheap);
  2226  	pauses->len = n+3;
  2227  }
  2228  
  2229  void
  2230  runtime∕debug·setGCPercent(intgo in, intgo out)
  2231  {
  2232  	runtime·lock(&runtime·mheap);
  2233  	if(gcpercent == GcpercentUnknown)
  2234  		gcpercent = readgogc();
  2235  	out = gcpercent;
  2236  	if(in < 0)
  2237  		in = -1;
  2238  	gcpercent = in;
  2239  	runtime·unlock(&runtime·mheap);
  2240  	FLUSH(&out);
  2241  }
  2242  
  2243  static void
  2244  gchelperstart(void)
  2245  {
  2246  	if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
  2247  		runtime·throw("gchelperstart: bad m->helpgc");
  2248  	if(runtime·xchg(&bufferList[m->helpgc].busy, 1))
  2249  		runtime·throw("gchelperstart: already busy");
  2250  	if(g != m->g0)
  2251  		runtime·throw("gchelper not running on g0 stack");
  2252  }
  2253  
  2254  static void
  2255  runfinq(void)
  2256  {
  2257  	Finalizer *f;
  2258  	FinBlock *fb, *next;
  2259  	byte *frame;
  2260  	uint32 framesz, framecap, i;
  2261  	Eface *ef, ef1;
  2262  
  2263  	frame = nil;
  2264  	framecap = 0;
  2265  	for(;;) {
  2266  		runtime·lock(&finlock);
  2267  		fb = finq;
  2268  		finq = nil;
  2269  		if(fb == nil) {
  2270  			fingwait = 1;
  2271  			runtime·park(runtime·unlock, &finlock, "finalizer wait");
  2272  			continue;
  2273  		}
  2274  		runtime·unlock(&finlock);
  2275  		if(raceenabled)
  2276  			runtime·racefingo();
  2277  		for(; fb; fb=next) {
  2278  			next = fb->next;
  2279  			for(i=0; i<fb->cnt; i++) {
  2280  				f = &fb->fin[i];
  2281  				framesz = sizeof(Eface) + f->nret;
  2282  				if(framecap < framesz) {
  2283  					runtime·free(frame);
  2284  					// The frame does not contain pointers interesting for GC,
  2285  					// all not yet finalized objects are stored in finc.
  2286  					// If we do not mark it as FlagNoScan,
  2287  					// the last finalized object is not collected.
  2288  					frame = runtime·mallocgc(framesz, 0, FlagNoScan|FlagNoInvokeGC);
  2289  					framecap = framesz;
  2290  				}
  2291  				if(f->fint == nil)
  2292  					runtime·throw("missing type in runfinq");
  2293  				if(f->fint->kind == KindPtr) {
  2294  					// direct use of pointer
  2295  					*(void**)frame = f->arg;
  2296  				} else if(((InterfaceType*)f->fint)->mhdr.len == 0) {
  2297  					// convert to empty interface
  2298  					ef = (Eface*)frame;
  2299  					ef->type = f->ot;
  2300  					ef->data = f->arg;
  2301  				} else {
  2302  					// convert to interface with methods, via empty interface.
  2303  					ef1.type = f->ot;
  2304  					ef1.data = f->arg;
  2305  					if(!runtime·ifaceE2I2((InterfaceType*)f->fint, ef1, (Iface*)frame))
  2306  						runtime·throw("invalid type conversion in runfinq");
  2307  				}
  2308  				reflect·call(f->fn, frame, framesz);
  2309  				f->fn = nil;
  2310  				f->arg = nil;
  2311  				f->ot = nil;
  2312  			}
  2313  			fb->cnt = 0;
  2314  			fb->next = finc;
  2315  			finc = fb;
  2316  		}
  2317  		runtime·gc(1);	// trigger another gc to clean up the finalized objects, if possible
  2318  	}
  2319  }
  2320  
  2321  // mark the block at v of size n as allocated.
  2322  // If noscan is true, mark it as not needing scanning.
  2323  void
  2324  runtime·markallocated(void *v, uintptr n, bool noscan)
  2325  {
  2326  	uintptr *b, obits, bits, off, shift;
  2327  
  2328  	if(0)
  2329  		runtime·printf("markallocated %p+%p\n", v, n);
  2330  
  2331  	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2332  		runtime·throw("markallocated: bad pointer");
  2333  
  2334  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
  2335  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2336  	shift = off % wordsPerBitmapWord;
  2337  
  2338  	for(;;) {
  2339  		obits = *b;
  2340  		bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
  2341  		if(noscan)
  2342  			bits |= bitNoScan<<shift;
  2343  		if(runtime·gomaxprocs == 1) {
  2344  			*b = bits;
  2345  			break;
  2346  		} else {
  2347  			// more than one goroutine is potentially running: use atomic op
  2348  			if(runtime·casp((void**)b, (void*)obits, (void*)bits))
  2349  				break;
  2350  		}
  2351  	}
  2352  }
  2353  
  2354  // mark the block at v of size n as freed.
  2355  void
  2356  runtime·markfreed(void *v, uintptr n)
  2357  {
  2358  	uintptr *b, obits, bits, off, shift;
  2359  
  2360  	if(0)
  2361  		runtime·printf("markallocated %p+%p\n", v, n);
  2362  
  2363  	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2364  		runtime·throw("markallocated: bad pointer");
  2365  
  2366  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
  2367  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2368  	shift = off % wordsPerBitmapWord;
  2369  
  2370  	for(;;) {
  2371  		obits = *b;
  2372  		bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
  2373  		if(runtime·gomaxprocs == 1) {
  2374  			*b = bits;
  2375  			break;
  2376  		} else {
  2377  			// more than one goroutine is potentially running: use atomic op
  2378  			if(runtime·casp((void**)b, (void*)obits, (void*)bits))
  2379  				break;
  2380  		}
  2381  	}
  2382  }
  2383  
  2384  // check that the block at v of size n is marked freed.
  2385  void
  2386  runtime·checkfreed(void *v, uintptr n)
  2387  {
  2388  	uintptr *b, bits, off, shift;
  2389  
  2390  	if(!runtime·checking)
  2391  		return;
  2392  
  2393  	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2394  		return;	// not allocated, so okay
  2395  
  2396  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
  2397  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2398  	shift = off % wordsPerBitmapWord;
  2399  
  2400  	bits = *b>>shift;
  2401  	if((bits & bitAllocated) != 0) {
  2402  		runtime·printf("checkfreed %p+%p: off=%p have=%p\n",
  2403  			v, n, off, bits & bitMask);
  2404  		runtime·throw("checkfreed: not freed");
  2405  	}
  2406  }
  2407  
  2408  // mark the span of memory at v as having n blocks of the given size.
  2409  // if leftover is true, there is left over space at the end of the span.
  2410  void
  2411  runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
  2412  {
  2413  	uintptr *b, off, shift;
  2414  	byte *p;
  2415  
  2416  	if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2417  		runtime·throw("markspan: bad pointer");
  2418  
  2419  	p = v;
  2420  	if(leftover)	// mark a boundary just past end of last block too
  2421  		n++;
  2422  	for(; n-- > 0; p += size) {
  2423  		// Okay to use non-atomic ops here, because we control
  2424  		// the entire span, and each bitmap word has bits for only
  2425  		// one span, so no other goroutines are changing these
  2426  		// bitmap words.
  2427  		off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;  // word offset
  2428  		b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2429  		shift = off % wordsPerBitmapWord;
  2430  		*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
  2431  	}
  2432  }
  2433  
  2434  // unmark the span of memory at v of length n bytes.
  2435  void
  2436  runtime·unmarkspan(void *v, uintptr n)
  2437  {
  2438  	uintptr *p, *b, off;
  2439  
  2440  	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2441  		runtime·throw("markspan: bad pointer");
  2442  
  2443  	p = v;
  2444  	off = p - (uintptr*)runtime·mheap.arena_start;  // word offset
  2445  	if(off % wordsPerBitmapWord != 0)
  2446  		runtime·throw("markspan: unaligned pointer");
  2447  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2448  	n /= PtrSize;
  2449  	if(n%wordsPerBitmapWord != 0)
  2450  		runtime·throw("unmarkspan: unaligned length");
  2451  	// Okay to use non-atomic ops here, because we control
  2452  	// the entire span, and each bitmap word has bits for only
  2453  	// one span, so no other goroutines are changing these
  2454  	// bitmap words.
  2455  	n /= wordsPerBitmapWord;
  2456  	while(n-- > 0)
  2457  		*b-- = 0;
  2458  }
  2459  
  2460  bool
  2461  runtime·blockspecial(void *v)
  2462  {
  2463  	uintptr *b, off, shift;
  2464  
  2465  	if(DebugMark)
  2466  		return true;
  2467  
  2468  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
  2469  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2470  	shift = off % wordsPerBitmapWord;
  2471  
  2472  	return (*b & (bitSpecial<<shift)) != 0;
  2473  }
  2474  
  2475  void
  2476  runtime·setblockspecial(void *v, bool s)
  2477  {
  2478  	uintptr *b, off, shift, bits, obits;
  2479  
  2480  	if(DebugMark)
  2481  		return;
  2482  
  2483  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
  2484  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2485  	shift = off % wordsPerBitmapWord;
  2486  
  2487  	for(;;) {
  2488  		obits = *b;
  2489  		if(s)
  2490  			bits = obits | (bitSpecial<<shift);
  2491  		else
  2492  			bits = obits & ~(bitSpecial<<shift);
  2493  		if(runtime·gomaxprocs == 1) {
  2494  			*b = bits;
  2495  			break;
  2496  		} else {
  2497  			// more than one goroutine is potentially running: use atomic op
  2498  			if(runtime·casp((void**)b, (void*)obits, (void*)bits))
  2499  				break;
  2500  		}
  2501  	}
  2502  }
  2503  
  2504  void
  2505  runtime·MHeap_MapBits(MHeap *h)
  2506  {
  2507  	// Caller has added extra mappings to the arena.
  2508  	// Add extra mappings of bitmap words as needed.
  2509  	// We allocate extra bitmap pieces in chunks of bitmapChunk.
  2510  	enum {
  2511  		bitmapChunk = 8192
  2512  	};
  2513  	uintptr n;
  2514  
  2515  	n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
  2516  	n = ROUND(n, bitmapChunk);
  2517  	if(h->bitmap_mapped >= n)
  2518  		return;
  2519  
  2520  	runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped);
  2521  	h->bitmap_mapped = n;
  2522  }