github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/mgc0.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Garbage collector.
     6  
     7  #include "runtime.h"
     8  #include "arch_GOARCH.h"
     9  #include "malloc.h"
    10  #include "stack.h"
    11  #include "mgc0.h"
    12  #include "race.h"
    13  #include "type.h"
    14  #include "typekind.h"
    15  #include "funcdata.h"
    16  #include "../../cmd/ld/textflag.h"
    17  
    18  enum {
    19  	Debug = 0,
    20  	DebugMark = 0,  // run second pass to check mark
    21  	CollectStats = 0,
    22  	ScanStackByFrames = 0,
    23  	IgnorePreciseGC = 0,
    24  
    25  	// Four bits per word (see #defines below).
    26  	wordsPerBitmapWord = sizeof(void*)*8/4,
    27  	bitShift = sizeof(void*)*8/4,
    28  
    29  	handoffThreshold = 4,
    30  	IntermediateBufferCapacity = 64,
    31  
    32  	// Bits in type information
    33  	PRECISE = 1,
    34  	LOOP = 2,
    35  	PC_BITS = PRECISE | LOOP,
    36  
    37  	// Pointer map
    38  	BitsPerPointer = 2,
    39  	BitsNoPointer = 0,
    40  	BitsPointer = 1,
    41  	BitsIface = 2,
    42  	BitsEface = 3,
    43  };
    44  
    45  // Bits in per-word bitmap.
    46  // #defines because enum might not be able to hold the values.
    47  //
    48  // Each word in the bitmap describes wordsPerBitmapWord words
    49  // of heap memory.  There are 4 bitmap bits dedicated to each heap word,
    50  // so on a 64-bit system there is one bitmap word per 16 heap words.
    51  // The bits in the word are packed together by type first, then by
    52  // heap location, so each 64-bit bitmap word consists of, from top to bottom,
    53  // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
    54  // then the 16 bitNoScan/bitBlockBoundary bits, then the 16 bitAllocated bits.
    55  // This layout makes it easier to iterate over the bits of a given type.
    56  //
    57  // The bitmap starts at mheap.arena_start and extends *backward* from
    58  // there.  On a 64-bit system the off'th word in the arena is tracked by
    59  // the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
    60  // the only difference is that the divisor is 8.)
    61  //
    62  // To pull out the bits corresponding to a given pointer p, we use:
    63  //
    64  //	off = p - (uintptr*)mheap.arena_start;  // word offset
    65  //	b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
    66  //	shift = off % wordsPerBitmapWord
    67  //	bits = *b >> shift;
    68  //	/* then test bits & bitAllocated, bits & bitMarked, etc. */
    69  //
    70  #define bitAllocated		((uintptr)1<<(bitShift*0))
    71  #define bitNoScan		((uintptr)1<<(bitShift*1))	/* when bitAllocated is set */
    72  #define bitMarked		((uintptr)1<<(bitShift*2))	/* when bitAllocated is set */
    73  #define bitSpecial		((uintptr)1<<(bitShift*3))	/* when bitAllocated is set - has finalizer or being profiled */
    74  #define bitBlockBoundary	((uintptr)1<<(bitShift*1))	/* when bitAllocated is NOT set */
    75  
    76  #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
    77  
    78  // Holding worldsema grants an M the right to try to stop the world.
    79  // The procedure is:
    80  //
    81  //	runtime·semacquire(&runtime·worldsema);
    82  //	m->gcing = 1;
    83  //	runtime·stoptheworld();
    84  //
    85  //	... do stuff ...
    86  //
    87  //	m->gcing = 0;
    88  //	runtime·semrelease(&runtime·worldsema);
    89  //	runtime·starttheworld();
    90  //
    91  uint32 runtime·worldsema = 1;
    92  
    93  typedef struct Obj Obj;
    94  struct Obj
    95  {
    96  	byte	*p;	// data pointer
    97  	uintptr	n;	// size of data in bytes
    98  	uintptr	ti;	// type info
    99  };
   100  
   101  // The size of Workbuf is N*PageSize.
   102  typedef struct Workbuf Workbuf;
   103  struct Workbuf
   104  {
   105  #define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
   106  	LFNode  node; // must be first
   107  	uintptr nobj;
   108  	Obj     obj[SIZE/sizeof(Obj) - 1];
   109  	uint8   _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
   110  #undef SIZE
   111  };
   112  
   113  typedef struct Finalizer Finalizer;
   114  struct Finalizer
   115  {
   116  	FuncVal *fn;
   117  	void *arg;
   118  	uintptr nret;
   119  	Type *fint;
   120  	PtrType *ot;
   121  };
   122  
   123  typedef struct FinBlock FinBlock;
   124  struct FinBlock
   125  {
   126  	FinBlock *alllink;
   127  	FinBlock *next;
   128  	int32 cnt;
   129  	int32 cap;
   130  	Finalizer fin[1];
   131  };
   132  
   133  extern byte data[];
   134  extern byte edata[];
   135  extern byte bss[];
   136  extern byte ebss[];
   137  
   138  extern byte gcdata[];
   139  extern byte gcbss[];
   140  
   141  static G *fing;
   142  static FinBlock *finq; // list of finalizers that are to be executed
   143  static FinBlock *finc; // cache of free blocks
   144  static FinBlock *allfin; // list of all blocks
   145  static Lock finlock;
   146  static int32 fingwait;
   147  
   148  static void runfinq(void);
   149  static Workbuf* getempty(Workbuf*);
   150  static Workbuf* getfull(Workbuf*);
   151  static void	putempty(Workbuf*);
   152  static Workbuf* handoff(Workbuf*);
   153  static void	gchelperstart(void);
   154  
   155  static struct {
   156  	uint64	full;  // lock-free list of full blocks
   157  	uint64	empty; // lock-free list of empty blocks
   158  	byte	pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
   159  	uint32	nproc;
   160  	volatile uint32	nwait;
   161  	volatile uint32	ndone;
   162  	volatile uint32 debugmarkdone;
   163  	Note	alldone;
   164  	ParFor	*markfor;
   165  	ParFor	*sweepfor;
   166  
   167  	Lock;
   168  	byte	*chunk;
   169  	uintptr	nchunk;
   170  
   171  	Obj	*roots;
   172  	uint32	nroot;
   173  	uint32	rootcap;
   174  } work;
   175  
   176  enum {
   177  	GC_DEFAULT_PTR = GC_NUM_INSTR,
   178  	GC_CHAN,
   179  
   180  	GC_NUM_INSTR2
   181  };
   182  
   183  static struct {
   184  	struct {
   185  		uint64 sum;
   186  		uint64 cnt;
   187  	} ptr;
   188  	uint64 nbytes;
   189  	struct {
   190  		uint64 sum;
   191  		uint64 cnt;
   192  		uint64 notype;
   193  		uint64 typelookup;
   194  	} obj;
   195  	uint64 rescan;
   196  	uint64 rescanbytes;
   197  	uint64 instr[GC_NUM_INSTR2];
   198  	uint64 putempty;
   199  	uint64 getfull;
   200  	struct {
   201  		uint64 foundbit;
   202  		uint64 foundword;
   203  		uint64 foundspan;
   204  	} flushptrbuf;
   205  	struct {
   206  		uint64 foundbit;
   207  		uint64 foundword;
   208  		uint64 foundspan;
   209  	} markonly;
   210  } gcstats;
   211  
   212  // markonly marks an object. It returns true if the object
   213  // has been marked by this function, false otherwise.
   214  // This function doesn't append the object to any buffer.
   215  static bool
   216  markonly(void *obj)
   217  {
   218  	byte *p;
   219  	uintptr *bitp, bits, shift, x, xbits, off, j;
   220  	MSpan *s;
   221  	PageID k;
   222  
   223  	// Words outside the arena cannot be pointers.
   224  	if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used)
   225  		return false;
   226  
   227  	// obj may be a pointer to a live object.
   228  	// Try to find the beginning of the object.
   229  
   230  	// Round down to word boundary.
   231  	obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
   232  
   233  	// Find bits for this word.
   234  	off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
   235  	bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
   236  	shift = off % wordsPerBitmapWord;
   237  	xbits = *bitp;
   238  	bits = xbits >> shift;
   239  
   240  	// Pointing at the beginning of a block?
   241  	if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
   242  		if(CollectStats)
   243  			runtime·xadd64(&gcstats.markonly.foundbit, 1);
   244  		goto found;
   245  	}
   246  
   247  	// Pointing just past the beginning?
   248  	// Scan backward a little to find a block boundary.
   249  	for(j=shift; j-->0; ) {
   250  		if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
   251  			shift = j;
   252  			bits = xbits>>shift;
   253  			if(CollectStats)
   254  				runtime·xadd64(&gcstats.markonly.foundword, 1);
   255  			goto found;
   256  		}
   257  	}
   258  
   259  	// Otherwise consult span table to find beginning.
   260  	// (Manually inlined copy of MHeap_LookupMaybe.)
   261  	k = (uintptr)obj>>PageShift;
   262  	x = k;
   263  	if(sizeof(void*) == 8)
   264  		x -= (uintptr)runtime·mheap.arena_start>>PageShift;
   265  	s = runtime·mheap.spans[x];
   266  	if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
   267  		return false;
   268  	p = (byte*)((uintptr)s->start<<PageShift);
   269  	if(s->sizeclass == 0) {
   270  		obj = p;
   271  	} else {
   272  		uintptr size = s->elemsize;
   273  		int32 i = ((byte*)obj - p)/size;
   274  		obj = p+i*size;
   275  	}
   276  
   277  	// Now that we know the object header, reload bits.
   278  	off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
   279  	bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
   280  	shift = off % wordsPerBitmapWord;
   281  	xbits = *bitp;
   282  	bits = xbits >> shift;
   283  	if(CollectStats)
   284  		runtime·xadd64(&gcstats.markonly.foundspan, 1);
   285  
   286  found:
   287  	// Now we have bits, bitp, and shift correct for
   288  	// obj pointing at the base of the object.
   289  	// Only care about allocated and not marked.
   290  	if((bits & (bitAllocated|bitMarked)) != bitAllocated)
   291  		return false;
   292  	if(work.nproc == 1)
   293  		*bitp |= bitMarked<<shift;
   294  	else {
   295  		for(;;) {
   296  			x = *bitp;
   297  			if(x & (bitMarked<<shift))
   298  				return false;
   299  			if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
   300  				break;
   301  		}
   302  	}
   303  
   304  	// The object is now marked
   305  	return true;
   306  }
   307  
   308  // PtrTarget is a structure used by intermediate buffers.
   309  // The intermediate buffers hold GC data before it
   310  // is moved/flushed to the work buffer (Workbuf).
   311  // The size of an intermediate buffer is very small,
   312  // such as 32 or 64 elements.
   313  typedef struct PtrTarget PtrTarget;
   314  struct PtrTarget
   315  {
   316  	void *p;
   317  	uintptr ti;
   318  };
   319  
   320  typedef struct BufferList BufferList;
   321  struct BufferList
   322  {
   323  	PtrTarget ptrtarget[IntermediateBufferCapacity];
   324  	Obj obj[IntermediateBufferCapacity];
   325  	uint32 busy;
   326  	byte pad[CacheLineSize];
   327  };
   328  #pragma dataflag NOPTR
   329  static BufferList bufferList[MaxGcproc];
   330  
   331  static Type *itabtype;
   332  
   333  static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
   334  
   335  // flushptrbuf moves data from the PtrTarget buffer to the work buffer.
   336  // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
   337  // while the work buffer contains blocks which have been marked
   338  // and are prepared to be scanned by the garbage collector.
   339  //
   340  // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
   341  //
   342  // A simplified drawing explaining how the todo-list moves from a structure to another:
   343  //
   344  //     scanblock
   345  //  (find pointers)
   346  //    Obj ------> PtrTarget (pointer targets)
   347  //     ↑          |
   348  //     |          |
   349  //     `----------'
   350  //     flushptrbuf
   351  //  (find block start, mark and enqueue)
   352  static void
   353  flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
   354  {
   355  	byte *p, *arena_start, *obj;
   356  	uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
   357  	MSpan *s;
   358  	PageID k;
   359  	Obj *wp;
   360  	Workbuf *wbuf;
   361  	PtrTarget *ptrbuf_end;
   362  
   363  	arena_start = runtime·mheap.arena_start;
   364  
   365  	wp = *_wp;
   366  	wbuf = *_wbuf;
   367  	nobj = *_nobj;
   368  
   369  	ptrbuf_end = *ptrbufpos;
   370  	n = ptrbuf_end - ptrbuf;
   371  	*ptrbufpos = ptrbuf;
   372  
   373  	if(CollectStats) {
   374  		runtime·xadd64(&gcstats.ptr.sum, n);
   375  		runtime·xadd64(&gcstats.ptr.cnt, 1);
   376  	}
   377  
   378  	// If buffer is nearly full, get a new one.
   379  	if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
   380  		if(wbuf != nil)
   381  			wbuf->nobj = nobj;
   382  		wbuf = getempty(wbuf);
   383  		wp = wbuf->obj;
   384  		nobj = 0;
   385  
   386  		if(n >= nelem(wbuf->obj))
   387  			runtime·throw("ptrbuf has to be smaller than WorkBuf");
   388  	}
   389  
   390  	// TODO(atom): This block is a branch of an if-then-else statement.
   391  	//             The single-threaded branch may be added in a next CL.
   392  	{
   393  		// Multi-threaded version.
   394  
   395  		while(ptrbuf < ptrbuf_end) {
   396  			obj = ptrbuf->p;
   397  			ti = ptrbuf->ti;
   398  			ptrbuf++;
   399  
   400  			// obj belongs to interval [mheap.arena_start, mheap.arena_used).
   401  			if(Debug > 1) {
   402  				if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used)
   403  					runtime·throw("object is outside of mheap");
   404  			}
   405  
   406  			// obj may be a pointer to a live object.
   407  			// Try to find the beginning of the object.
   408  
   409  			// Round down to word boundary.
   410  			if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
   411  				obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
   412  				ti = 0;
   413  			}
   414  
   415  			// Find bits for this word.
   416  			off = (uintptr*)obj - (uintptr*)arena_start;
   417  			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
   418  			shift = off % wordsPerBitmapWord;
   419  			xbits = *bitp;
   420  			bits = xbits >> shift;
   421  
   422  			// Pointing at the beginning of a block?
   423  			if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
   424  				if(CollectStats)
   425  					runtime·xadd64(&gcstats.flushptrbuf.foundbit, 1);
   426  				goto found;
   427  			}
   428  
   429  			ti = 0;
   430  
   431  			// Pointing just past the beginning?
   432  			// Scan backward a little to find a block boundary.
   433  			for(j=shift; j-->0; ) {
   434  				if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
   435  					obj = (byte*)obj - (shift-j)*PtrSize;
   436  					shift = j;
   437  					bits = xbits>>shift;
   438  					if(CollectStats)
   439  						runtime·xadd64(&gcstats.flushptrbuf.foundword, 1);
   440  					goto found;
   441  				}
   442  			}
   443  
   444  			// Otherwise consult span table to find beginning.
   445  			// (Manually inlined copy of MHeap_LookupMaybe.)
   446  			k = (uintptr)obj>>PageShift;
   447  			x = k;
   448  			if(sizeof(void*) == 8)
   449  				x -= (uintptr)arena_start>>PageShift;
   450  			s = runtime·mheap.spans[x];
   451  			if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
   452  				continue;
   453  			p = (byte*)((uintptr)s->start<<PageShift);
   454  			if(s->sizeclass == 0) {
   455  				obj = p;
   456  			} else {
   457  				size = s->elemsize;
   458  				int32 i = ((byte*)obj - p)/size;
   459  				obj = p+i*size;
   460  			}
   461  
   462  			// Now that we know the object header, reload bits.
   463  			off = (uintptr*)obj - (uintptr*)arena_start;
   464  			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
   465  			shift = off % wordsPerBitmapWord;
   466  			xbits = *bitp;
   467  			bits = xbits >> shift;
   468  			if(CollectStats)
   469  				runtime·xadd64(&gcstats.flushptrbuf.foundspan, 1);
   470  
   471  		found:
   472  			// Now we have bits, bitp, and shift correct for
   473  			// obj pointing at the base of the object.
   474  			// Only care about allocated and not marked.
   475  			if((bits & (bitAllocated|bitMarked)) != bitAllocated)
   476  				continue;
   477  			if(work.nproc == 1)
   478  				*bitp |= bitMarked<<shift;
   479  			else {
   480  				for(;;) {
   481  					x = *bitp;
   482  					if(x & (bitMarked<<shift))
   483  						goto continue_obj;
   484  					if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
   485  						break;
   486  				}
   487  			}
   488  
   489  			// If object has no pointers, don't need to scan further.
   490  			if((bits & bitNoScan) != 0)
   491  				continue;
   492  
   493  			// Ask span about size class.
   494  			// (Manually inlined copy of MHeap_Lookup.)
   495  			x = (uintptr)obj >> PageShift;
   496  			if(sizeof(void*) == 8)
   497  				x -= (uintptr)arena_start>>PageShift;
   498  			s = runtime·mheap.spans[x];
   499  
   500  			PREFETCH(obj);
   501  
   502  			*wp = (Obj){obj, s->elemsize, ti};
   503  			wp++;
   504  			nobj++;
   505  		continue_obj:;
   506  		}
   507  
   508  		// If another proc wants a pointer, give it some.
   509  		if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
   510  			wbuf->nobj = nobj;
   511  			wbuf = handoff(wbuf);
   512  			nobj = wbuf->nobj;
   513  			wp = wbuf->obj + nobj;
   514  		}
   515  	}
   516  
   517  	*_wp = wp;
   518  	*_wbuf = wbuf;
   519  	*_nobj = nobj;
   520  }
   521  
   522  static void
   523  flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
   524  {
   525  	uintptr nobj, off;
   526  	Obj *wp, obj;
   527  	Workbuf *wbuf;
   528  	Obj *objbuf_end;
   529  
   530  	wp = *_wp;
   531  	wbuf = *_wbuf;
   532  	nobj = *_nobj;
   533  
   534  	objbuf_end = *objbufpos;
   535  	*objbufpos = objbuf;
   536  
   537  	while(objbuf < objbuf_end) {
   538  		obj = *objbuf++;
   539  
   540  		// Align obj.b to a word boundary.
   541  		off = (uintptr)obj.p & (PtrSize-1);
   542  		if(off != 0) {
   543  			obj.p += PtrSize - off;
   544  			obj.n -= PtrSize - off;
   545  			obj.ti = 0;
   546  		}
   547  
   548  		if(obj.p == nil || obj.n == 0)
   549  			continue;
   550  
   551  		// If buffer is full, get a new one.
   552  		if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
   553  			if(wbuf != nil)
   554  				wbuf->nobj = nobj;
   555  			wbuf = getempty(wbuf);
   556  			wp = wbuf->obj;
   557  			nobj = 0;
   558  		}
   559  
   560  		*wp = obj;
   561  		wp++;
   562  		nobj++;
   563  	}
   564  
   565  	// If another proc wants a pointer, give it some.
   566  	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
   567  		wbuf->nobj = nobj;
   568  		wbuf = handoff(wbuf);
   569  		nobj = wbuf->nobj;
   570  		wp = wbuf->obj + nobj;
   571  	}
   572  
   573  	*_wp = wp;
   574  	*_wbuf = wbuf;
   575  	*_nobj = nobj;
   576  }
   577  
   578  // Program that scans the whole block and treats every block element as a potential pointer
   579  static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
   580  
   581  // Hchan program
   582  static uintptr chanProg[2] = {0, GC_CHAN};
   583  
   584  // Local variables of a program fragment or loop
   585  typedef struct Frame Frame;
   586  struct Frame {
   587  	uintptr count, elemsize, b;
   588  	uintptr *loop_or_ret;
   589  };
   590  
   591  // Sanity check for the derived type info objti.
   592  static void
   593  checkptr(void *obj, uintptr objti)
   594  {
   595  	uintptr *pc1, *pc2, type, tisize, i, j, x;
   596  	byte *objstart;
   597  	Type *t;
   598  	MSpan *s;
   599  
   600  	if(!Debug)
   601  		runtime·throw("checkptr is debug only");
   602  
   603  	if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used)
   604  		return;
   605  	type = runtime·gettype(obj);
   606  	t = (Type*)(type & ~(uintptr)(PtrSize-1));
   607  	if(t == nil)
   608  		return;
   609  	x = (uintptr)obj >> PageShift;
   610  	if(sizeof(void*) == 8)
   611  		x -= (uintptr)(runtime·mheap.arena_start)>>PageShift;
   612  	s = runtime·mheap.spans[x];
   613  	objstart = (byte*)((uintptr)s->start<<PageShift);
   614  	if(s->sizeclass != 0) {
   615  		i = ((byte*)obj - objstart)/s->elemsize;
   616  		objstart += i*s->elemsize;
   617  	}
   618  	tisize = *(uintptr*)objti;
   619  	// Sanity check for object size: it should fit into the memory block.
   620  	if((byte*)obj + tisize > objstart + s->elemsize) {
   621  		runtime·printf("object of type '%S' at %p/%p does not fit in block %p/%p\n",
   622  			       *t->string, obj, tisize, objstart, s->elemsize);
   623  		runtime·throw("invalid gc type info");
   624  	}
   625  	if(obj != objstart)
   626  		return;
   627  	// If obj points to the beginning of the memory block,
   628  	// check type info as well.
   629  	if(t->string == nil ||
   630  		// Gob allocates unsafe pointers for indirection.
   631  		(runtime·strcmp(t->string->str, (byte*)"unsafe.Pointer") &&
   632  		// Runtime and gc think differently about closures.
   633  		runtime·strstr(t->string->str, (byte*)"struct { F uintptr") != t->string->str)) {
   634  		pc1 = (uintptr*)objti;
   635  		pc2 = (uintptr*)t->gc;
   636  		// A simple best-effort check until first GC_END.
   637  		for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
   638  			if(pc1[j] != pc2[j]) {
   639  				runtime·printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n",
   640  					       t->string ? (int8*)t->string->str : (int8*)"?", j, pc1[j], pc2[j]);
   641  				runtime·throw("invalid gc type info");
   642  			}
   643  		}
   644  	}
   645  }					
   646  
   647  // scanblock scans a block of n bytes starting at pointer b for references
   648  // to other objects, scanning any it finds recursively until there are no
   649  // unscanned objects left.  Instead of using an explicit recursion, it keeps
   650  // a work list in the Workbuf* structures and loops in the main function
   651  // body.  Keeping an explicit work list is easier on the stack allocator and
   652  // more efficient.
   653  //
   654  // wbuf: current work buffer
   655  // wp:   storage for next queued pointer (write pointer)
   656  // nobj: number of queued objects
   657  static void
   658  scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
   659  {
   660  	byte *b, *arena_start, *arena_used;
   661  	uintptr n, i, end_b, elemsize, size, ti, objti, count, type;
   662  	uintptr *pc, precise_type, nominal_size;
   663  	uintptr *chan_ret, chancap;
   664  	void *obj;
   665  	Type *t;
   666  	Slice *sliceptr;
   667  	Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
   668  	BufferList *scanbuffers;
   669  	PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos;
   670  	Obj *objbuf, *objbuf_end, *objbufpos;
   671  	Eface *eface;
   672  	Iface *iface;
   673  	Hchan *chan;
   674  	ChanType *chantype;
   675  
   676  	if(sizeof(Workbuf) % PageSize != 0)
   677  		runtime·throw("scanblock: size of Workbuf is suboptimal");
   678  
   679  	// Memory arena parameters.
   680  	arena_start = runtime·mheap.arena_start;
   681  	arena_used = runtime·mheap.arena_used;
   682  
   683  	stack_ptr = stack+nelem(stack)-1;
   684  	
   685  	precise_type = false;
   686  	nominal_size = 0;
   687  
   688  	// Allocate ptrbuf
   689  	{
   690  		scanbuffers = &bufferList[m->helpgc];
   691  		ptrbuf = &scanbuffers->ptrtarget[0];
   692  		ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
   693  		objbuf = &scanbuffers->obj[0];
   694  		objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
   695  	}
   696  
   697  	ptrbufpos = ptrbuf;
   698  	objbufpos = objbuf;
   699  
   700  	// (Silence the compiler)
   701  	chan = nil;
   702  	chantype = nil;
   703  	chan_ret = nil;
   704  
   705  	goto next_block;
   706  
   707  	for(;;) {
   708  		// Each iteration scans the block b of length n, queueing pointers in
   709  		// the work buffer.
   710  		if(Debug > 1) {
   711  			runtime·printf("scanblock %p %D\n", b, (int64)n);
   712  		}
   713  
   714  		if(CollectStats) {
   715  			runtime·xadd64(&gcstats.nbytes, n);
   716  			runtime·xadd64(&gcstats.obj.sum, nobj);
   717  			runtime·xadd64(&gcstats.obj.cnt, 1);
   718  		}
   719  
   720  		if(ti != 0) {
   721  			pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
   722  			precise_type = (ti & PRECISE);
   723  			stack_top.elemsize = pc[0];
   724  			if(!precise_type)
   725  				nominal_size = pc[0];
   726  			if(ti & LOOP) {
   727  				stack_top.count = 0;	// 0 means an infinite number of iterations
   728  				stack_top.loop_or_ret = pc+1;
   729  			} else {
   730  				stack_top.count = 1;
   731  			}
   732  			if(Debug) {
   733  				// Simple sanity check for provided type info ti:
   734  				// The declared size of the object must be not larger than the actual size
   735  				// (it can be smaller due to inferior pointers).
   736  				// It's difficult to make a comprehensive check due to inferior pointers,
   737  				// reflection, gob, etc.
   738  				if(pc[0] > n) {
   739  					runtime·printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n);
   740  					runtime·throw("invalid gc type info");
   741  				}
   742  			}
   743  		} else if(UseSpanType) {
   744  			if(CollectStats)
   745  				runtime·xadd64(&gcstats.obj.notype, 1);
   746  
   747  			type = runtime·gettype(b);
   748  			if(type != 0) {
   749  				if(CollectStats)
   750  					runtime·xadd64(&gcstats.obj.typelookup, 1);
   751  
   752  				t = (Type*)(type & ~(uintptr)(PtrSize-1));
   753  				switch(type & (PtrSize-1)) {
   754  				case TypeInfo_SingleObject:
   755  					pc = (uintptr*)t->gc;
   756  					precise_type = true;  // type information about 'b' is precise
   757  					stack_top.count = 1;
   758  					stack_top.elemsize = pc[0];
   759  					break;
   760  				case TypeInfo_Array:
   761  					pc = (uintptr*)t->gc;
   762  					if(pc[0] == 0)
   763  						goto next_block;
   764  					precise_type = true;  // type information about 'b' is precise
   765  					stack_top.count = 0;  // 0 means an infinite number of iterations
   766  					stack_top.elemsize = pc[0];
   767  					stack_top.loop_or_ret = pc+1;
   768  					break;
   769  				case TypeInfo_Chan:
   770  					chan = (Hchan*)b;
   771  					chantype = (ChanType*)t;
   772  					chan_ret = nil;
   773  					pc = chanProg;
   774  					break;
   775  				default:
   776  					runtime·throw("scanblock: invalid type");
   777  					return;
   778  				}
   779  			} else {
   780  				pc = defaultProg;
   781  			}
   782  		} else {
   783  			pc = defaultProg;
   784  		}
   785  
   786  		if(IgnorePreciseGC)
   787  			pc = defaultProg;
   788  
   789  		pc++;
   790  		stack_top.b = (uintptr)b;
   791  
   792  		end_b = (uintptr)b + n - PtrSize;
   793  
   794  	for(;;) {
   795  		if(CollectStats)
   796  			runtime·xadd64(&gcstats.instr[pc[0]], 1);
   797  
   798  		obj = nil;
   799  		objti = 0;
   800  		switch(pc[0]) {
   801  		case GC_PTR:
   802  			obj = *(void**)(stack_top.b + pc[1]);
   803  			objti = pc[2];
   804  			pc += 3;
   805  			if(Debug)
   806  				checkptr(obj, objti);
   807  			break;
   808  
   809  		case GC_SLICE:
   810  			sliceptr = (Slice*)(stack_top.b + pc[1]);
   811  			if(sliceptr->cap != 0) {
   812  				obj = sliceptr->array;
   813  				// Can't use slice element type for scanning,
   814  				// because if it points to an array embedded
   815  				// in the beginning of a struct,
   816  				// we will scan the whole struct as the slice.
   817  				// So just obtain type info from heap.
   818  			}
   819  			pc += 3;
   820  			break;
   821  
   822  		case GC_APTR:
   823  			obj = *(void**)(stack_top.b + pc[1]);
   824  			pc += 2;
   825  			break;
   826  
   827  		case GC_STRING:
   828  			obj = *(void**)(stack_top.b + pc[1]);
   829  			markonly(obj);
   830  			pc += 2;
   831  			continue;
   832  
   833  		case GC_EFACE:
   834  			eface = (Eface*)(stack_top.b + pc[1]);
   835  			pc += 2;
   836  			if(eface->type == nil)
   837  				continue;
   838  
   839  			// eface->type
   840  			t = eface->type;
   841  			if((void*)t >= arena_start && (void*)t < arena_used) {
   842  				*ptrbufpos++ = (PtrTarget){t, 0};
   843  				if(ptrbufpos == ptrbuf_end)
   844  					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   845  			}
   846  
   847  			// eface->data
   848  			if(eface->data >= arena_start && eface->data < arena_used) {
   849  				if(t->size <= sizeof(void*)) {
   850  					if((t->kind & KindNoPointers))
   851  						continue;
   852  
   853  					obj = eface->data;
   854  					if((t->kind & ~KindNoPointers) == KindPtr)
   855  						objti = (uintptr)((PtrType*)t)->elem->gc;
   856  				} else {
   857  					obj = eface->data;
   858  					objti = (uintptr)t->gc;
   859  				}
   860  			}
   861  			break;
   862  
   863  		case GC_IFACE:
   864  			iface = (Iface*)(stack_top.b + pc[1]);
   865  			pc += 2;
   866  			if(iface->tab == nil)
   867  				continue;
   868  			
   869  			// iface->tab
   870  			if((void*)iface->tab >= arena_start && (void*)iface->tab < arena_used) {
   871  				*ptrbufpos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc};
   872  				if(ptrbufpos == ptrbuf_end)
   873  					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   874  			}
   875  
   876  			// iface->data
   877  			if(iface->data >= arena_start && iface->data < arena_used) {
   878  				t = iface->tab->type;
   879  				if(t->size <= sizeof(void*)) {
   880  					if((t->kind & KindNoPointers))
   881  						continue;
   882  
   883  					obj = iface->data;
   884  					if((t->kind & ~KindNoPointers) == KindPtr)
   885  						objti = (uintptr)((PtrType*)t)->elem->gc;
   886  				} else {
   887  					obj = iface->data;
   888  					objti = (uintptr)t->gc;
   889  				}
   890  			}
   891  			break;
   892  
   893  		case GC_DEFAULT_PTR:
   894  			while(stack_top.b <= end_b) {
   895  				obj = *(byte**)stack_top.b;
   896  				stack_top.b += PtrSize;
   897  				if(obj >= arena_start && obj < arena_used) {
   898  					*ptrbufpos++ = (PtrTarget){obj, 0};
   899  					if(ptrbufpos == ptrbuf_end)
   900  						flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   901  				}
   902  			}
   903  			goto next_block;
   904  
   905  		case GC_END:
   906  			if(--stack_top.count != 0) {
   907  				// Next iteration of a loop if possible.
   908  				stack_top.b += stack_top.elemsize;
   909  				if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) {
   910  					pc = stack_top.loop_or_ret;
   911  					continue;
   912  				}
   913  				i = stack_top.b;
   914  			} else {
   915  				// Stack pop if possible.
   916  				if(stack_ptr+1 < stack+nelem(stack)) {
   917  					pc = stack_top.loop_or_ret;
   918  					stack_top = *(++stack_ptr);
   919  					continue;
   920  				}
   921  				i = (uintptr)b + nominal_size;
   922  			}
   923  			if(!precise_type) {
   924  				// Quickly scan [b+i,b+n) for possible pointers.
   925  				for(; i<=end_b; i+=PtrSize) {
   926  					if(*(byte**)i != nil) {
   927  						// Found a value that may be a pointer.
   928  						// Do a rescan of the entire block.
   929  						enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj);
   930  						if(CollectStats) {
   931  							runtime·xadd64(&gcstats.rescan, 1);
   932  							runtime·xadd64(&gcstats.rescanbytes, n);
   933  						}
   934  						break;
   935  					}
   936  				}
   937  			}
   938  			goto next_block;
   939  
   940  		case GC_ARRAY_START:
   941  			i = stack_top.b + pc[1];
   942  			count = pc[2];
   943  			elemsize = pc[3];
   944  			pc += 4;
   945  
   946  			// Stack push.
   947  			*stack_ptr-- = stack_top;
   948  			stack_top = (Frame){count, elemsize, i, pc};
   949  			continue;
   950  
   951  		case GC_ARRAY_NEXT:
   952  			if(--stack_top.count != 0) {
   953  				stack_top.b += stack_top.elemsize;
   954  				pc = stack_top.loop_or_ret;
   955  			} else {
   956  				// Stack pop.
   957  				stack_top = *(++stack_ptr);
   958  				pc += 1;
   959  			}
   960  			continue;
   961  
   962  		case GC_CALL:
   963  			// Stack push.
   964  			*stack_ptr-- = stack_top;
   965  			stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
   966  			pc = (uintptr*)((byte*)pc + *(int32*)(pc+2));  // target of the CALL instruction
   967  			continue;
   968  
   969  		case GC_REGION:
   970  			obj = (void*)(stack_top.b + pc[1]);
   971  			size = pc[2];
   972  			objti = pc[3];
   973  			pc += 4;
   974  
   975  			*objbufpos++ = (Obj){obj, size, objti};
   976  			if(objbufpos == objbuf_end)
   977  				flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
   978  			continue;
   979  
   980  		case GC_CHAN_PTR:
   981  			chan = *(Hchan**)(stack_top.b + pc[1]);
   982  			if(chan == nil) {
   983  				pc += 3;
   984  				continue;
   985  			}
   986  			if(markonly(chan)) {
   987  				chantype = (ChanType*)pc[2];
   988  				if(!(chantype->elem->kind & KindNoPointers)) {
   989  					// Start chanProg.
   990  					chan_ret = pc+3;
   991  					pc = chanProg+1;
   992  					continue;
   993  				}
   994  			}
   995  			pc += 3;
   996  			continue;
   997  
   998  		case GC_CHAN:
   999  			// There are no heap pointers in struct Hchan,
  1000  			// so we can ignore the leading sizeof(Hchan) bytes.
  1001  			if(!(chantype->elem->kind & KindNoPointers)) {
  1002  				// Channel's buffer follows Hchan immediately in memory.
  1003  				// Size of buffer (cap(c)) is second int in the chan struct.
  1004  				chancap = ((uintgo*)chan)[1];
  1005  				if(chancap > 0) {
  1006  					// TODO(atom): split into two chunks so that only the
  1007  					// in-use part of the circular buffer is scanned.
  1008  					// (Channel routines zero the unused part, so the current
  1009  					// code does not lead to leaks, it's just a little inefficient.)
  1010  					*objbufpos++ = (Obj){(byte*)chan+runtime·Hchansize, chancap*chantype->elem->size,
  1011  						(uintptr)chantype->elem->gc | PRECISE | LOOP};
  1012  					if(objbufpos == objbuf_end)
  1013  						flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
  1014  				}
  1015  			}
  1016  			if(chan_ret == nil)
  1017  				goto next_block;
  1018  			pc = chan_ret;
  1019  			continue;
  1020  
  1021  		default:
  1022  			runtime·throw("scanblock: invalid GC instruction");
  1023  			return;
  1024  		}
  1025  
  1026  		if(obj >= arena_start && obj < arena_used) {
  1027  			*ptrbufpos++ = (PtrTarget){obj, objti};
  1028  			if(ptrbufpos == ptrbuf_end)
  1029  				flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
  1030  		}
  1031  	}
  1032  
  1033  	next_block:
  1034  		// Done scanning [b, b+n).  Prepare for the next iteration of
  1035  		// the loop by setting b, n, ti to the parameters for the next block.
  1036  
  1037  		if(nobj == 0) {
  1038  			flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
  1039  			flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
  1040  
  1041  			if(nobj == 0) {
  1042  				if(!keepworking) {
  1043  					if(wbuf)
  1044  						putempty(wbuf);
  1045  					goto endscan;
  1046  				}
  1047  				// Emptied our buffer: refill.
  1048  				wbuf = getfull(wbuf);
  1049  				if(wbuf == nil)
  1050  					goto endscan;
  1051  				nobj = wbuf->nobj;
  1052  				wp = wbuf->obj + wbuf->nobj;
  1053  			}
  1054  		}
  1055  
  1056  		// Fetch b from the work buffer.
  1057  		--wp;
  1058  		b = wp->p;
  1059  		n = wp->n;
  1060  		ti = wp->ti;
  1061  		nobj--;
  1062  	}
  1063  
  1064  endscan:;
  1065  }
  1066  
  1067  // debug_scanblock is the debug copy of scanblock.
  1068  // it is simpler, slower, single-threaded, recursive,
  1069  // and uses bitSpecial as the mark bit.
  1070  static void
  1071  debug_scanblock(byte *b, uintptr n)
  1072  {
  1073  	byte *obj, *p;
  1074  	void **vp;
  1075  	uintptr size, *bitp, bits, shift, i, xbits, off;
  1076  	MSpan *s;
  1077  
  1078  	if(!DebugMark)
  1079  		runtime·throw("debug_scanblock without DebugMark");
  1080  
  1081  	if((intptr)n < 0) {
  1082  		runtime·printf("debug_scanblock %p %D\n", b, (int64)n);
  1083  		runtime·throw("debug_scanblock");
  1084  	}
  1085  
  1086  	// Align b to a word boundary.
  1087  	off = (uintptr)b & (PtrSize-1);
  1088  	if(off != 0) {
  1089  		b += PtrSize - off;
  1090  		n -= PtrSize - off;
  1091  	}
  1092  
  1093  	vp = (void**)b;
  1094  	n /= PtrSize;
  1095  	for(i=0; i<n; i++) {
  1096  		obj = (byte*)vp[i];
  1097  
  1098  		// Words outside the arena cannot be pointers.
  1099  		if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used)
  1100  			continue;
  1101  
  1102  		// Round down to word boundary.
  1103  		obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
  1104  
  1105  		// Consult span table to find beginning.
  1106  		s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj);
  1107  		if(s == nil)
  1108  			continue;
  1109  
  1110  		p =  (byte*)((uintptr)s->start<<PageShift);
  1111  		size = s->elemsize;
  1112  		if(s->sizeclass == 0) {
  1113  			obj = p;
  1114  		} else {
  1115  			int32 i = ((byte*)obj - p)/size;
  1116  			obj = p+i*size;
  1117  		}
  1118  
  1119  		// Now that we know the object header, reload bits.
  1120  		off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
  1121  		bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  1122  		shift = off % wordsPerBitmapWord;
  1123  		xbits = *bitp;
  1124  		bits = xbits >> shift;
  1125  
  1126  		// Now we have bits, bitp, and shift correct for
  1127  		// obj pointing at the base of the object.
  1128  		// If not allocated or already marked, done.
  1129  		if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0)  // NOTE: bitSpecial not bitMarked
  1130  			continue;
  1131  		*bitp |= bitSpecial<<shift;
  1132  		if(!(bits & bitMarked))
  1133  			runtime·printf("found unmarked block %p in %p\n", obj, vp+i);
  1134  
  1135  		// If object has no pointers, don't need to scan further.
  1136  		if((bits & bitNoScan) != 0)
  1137  			continue;
  1138  
  1139  		debug_scanblock(obj, size);
  1140  	}
  1141  }
  1142  
  1143  // Append obj to the work buffer.
  1144  // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
  1145  static void
  1146  enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
  1147  {
  1148  	uintptr nobj, off;
  1149  	Obj *wp;
  1150  	Workbuf *wbuf;
  1151  
  1152  	if(Debug > 1)
  1153  		runtime·printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
  1154  
  1155  	// Align obj.b to a word boundary.
  1156  	off = (uintptr)obj.p & (PtrSize-1);
  1157  	if(off != 0) {
  1158  		obj.p += PtrSize - off;
  1159  		obj.n -= PtrSize - off;
  1160  		obj.ti = 0;
  1161  	}
  1162  
  1163  	if(obj.p == nil || obj.n == 0)
  1164  		return;
  1165  
  1166  	// Load work buffer state
  1167  	wp = *_wp;
  1168  	wbuf = *_wbuf;
  1169  	nobj = *_nobj;
  1170  
  1171  	// If another proc wants a pointer, give it some.
  1172  	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
  1173  		wbuf->nobj = nobj;
  1174  		wbuf = handoff(wbuf);
  1175  		nobj = wbuf->nobj;
  1176  		wp = wbuf->obj + nobj;
  1177  	}
  1178  
  1179  	// If buffer is full, get a new one.
  1180  	if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
  1181  		if(wbuf != nil)
  1182  			wbuf->nobj = nobj;
  1183  		wbuf = getempty(wbuf);
  1184  		wp = wbuf->obj;
  1185  		nobj = 0;
  1186  	}
  1187  
  1188  	*wp = obj;
  1189  	wp++;
  1190  	nobj++;
  1191  
  1192  	// Save work buffer state
  1193  	*_wp = wp;
  1194  	*_wbuf = wbuf;
  1195  	*_nobj = nobj;
  1196  }
  1197  
  1198  static void
  1199  markroot(ParFor *desc, uint32 i)
  1200  {
  1201  	Obj *wp;
  1202  	Workbuf *wbuf;
  1203  	uintptr nobj;
  1204  
  1205  	USED(&desc);
  1206  	wp = nil;
  1207  	wbuf = nil;
  1208  	nobj = 0;
  1209  	enqueue(work.roots[i], &wbuf, &wp, &nobj);
  1210  	scanblock(wbuf, wp, nobj, false);
  1211  }
  1212  
  1213  // Get an empty work buffer off the work.empty list,
  1214  // allocating new buffers as needed.
  1215  static Workbuf*
  1216  getempty(Workbuf *b)
  1217  {
  1218  	if(b != nil)
  1219  		runtime·lfstackpush(&work.full, &b->node);
  1220  	b = (Workbuf*)runtime·lfstackpop(&work.empty);
  1221  	if(b == nil) {
  1222  		// Need to allocate.
  1223  		runtime·lock(&work);
  1224  		if(work.nchunk < sizeof *b) {
  1225  			work.nchunk = 1<<20;
  1226  			work.chunk = runtime·SysAlloc(work.nchunk, &mstats.gc_sys);
  1227  			if(work.chunk == nil)
  1228  				runtime·throw("runtime: cannot allocate memory");
  1229  		}
  1230  		b = (Workbuf*)work.chunk;
  1231  		work.chunk += sizeof *b;
  1232  		work.nchunk -= sizeof *b;
  1233  		runtime·unlock(&work);
  1234  	}
  1235  	b->nobj = 0;
  1236  	return b;
  1237  }
  1238  
  1239  static void
  1240  putempty(Workbuf *b)
  1241  {
  1242  	if(CollectStats)
  1243  		runtime·xadd64(&gcstats.putempty, 1);
  1244  
  1245  	runtime·lfstackpush(&work.empty, &b->node);
  1246  }
  1247  
  1248  // Get a full work buffer off the work.full list, or return nil.
  1249  static Workbuf*
  1250  getfull(Workbuf *b)
  1251  {
  1252  	int32 i;
  1253  
  1254  	if(CollectStats)
  1255  		runtime·xadd64(&gcstats.getfull, 1);
  1256  
  1257  	if(b != nil)
  1258  		runtime·lfstackpush(&work.empty, &b->node);
  1259  	b = (Workbuf*)runtime·lfstackpop(&work.full);
  1260  	if(b != nil || work.nproc == 1)
  1261  		return b;
  1262  
  1263  	runtime·xadd(&work.nwait, +1);
  1264  	for(i=0;; i++) {
  1265  		if(work.full != 0) {
  1266  			runtime·xadd(&work.nwait, -1);
  1267  			b = (Workbuf*)runtime·lfstackpop(&work.full);
  1268  			if(b != nil)
  1269  				return b;
  1270  			runtime·xadd(&work.nwait, +1);
  1271  		}
  1272  		if(work.nwait == work.nproc)
  1273  			return nil;
  1274  		if(i < 10) {
  1275  			m->gcstats.nprocyield++;
  1276  			runtime·procyield(20);
  1277  		} else if(i < 20) {
  1278  			m->gcstats.nosyield++;
  1279  			runtime·osyield();
  1280  		} else {
  1281  			m->gcstats.nsleep++;
  1282  			runtime·usleep(100);
  1283  		}
  1284  	}
  1285  }
  1286  
  1287  static Workbuf*
  1288  handoff(Workbuf *b)
  1289  {
  1290  	int32 n;
  1291  	Workbuf *b1;
  1292  
  1293  	// Make new buffer with half of b's pointers.
  1294  	b1 = getempty(nil);
  1295  	n = b->nobj/2;
  1296  	b->nobj -= n;
  1297  	b1->nobj = n;
  1298  	runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
  1299  	m->gcstats.nhandoff++;
  1300  	m->gcstats.nhandoffcnt += n;
  1301  
  1302  	// Put b on full list - let first half of b get stolen.
  1303  	runtime·lfstackpush(&work.full, &b->node);
  1304  	return b1;
  1305  }
  1306  
  1307  static void
  1308  addroot(Obj obj)
  1309  {
  1310  	uint32 cap;
  1311  	Obj *new;
  1312  
  1313  	if(work.nroot >= work.rootcap) {
  1314  		cap = PageSize/sizeof(Obj);
  1315  		if(cap < 2*work.rootcap)
  1316  			cap = 2*work.rootcap;
  1317  		new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj), &mstats.gc_sys);
  1318  		if(new == nil)
  1319  			runtime·throw("runtime: cannot allocate memory");
  1320  		if(work.roots != nil) {
  1321  			runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj));
  1322  			runtime·SysFree(work.roots, work.rootcap*sizeof(Obj), &mstats.gc_sys);
  1323  		}
  1324  		work.roots = new;
  1325  		work.rootcap = cap;
  1326  	}
  1327  	work.roots[work.nroot] = obj;
  1328  	work.nroot++;
  1329  }
  1330  
  1331  extern byte pclntab[]; // base for f->ptrsoff
  1332  
  1333  typedef struct BitVector BitVector;
  1334  struct BitVector
  1335  {
  1336  	int32 n;
  1337  	uint32 data[];
  1338  };
  1339  
  1340  // Scans an interface data value when the interface type indicates
  1341  // that it is a pointer.
  1342  static void
  1343  scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue)
  1344  {
  1345  	Itab *tab;
  1346  	Type *type;
  1347  
  1348  	if(runtime·precisestack && afterprologue) {
  1349  		if(bits == BitsIface) {
  1350  			tab = *(Itab**)scanp;
  1351  			if(tab->type->size <= sizeof(void*) && (tab->type->kind & KindNoPointers))
  1352  				return;
  1353  		} else { // bits == BitsEface
  1354  			type = *(Type**)scanp;
  1355  			if(type->size <= sizeof(void*) && (type->kind & KindNoPointers))
  1356  				return;
  1357  		}
  1358  	}
  1359  	addroot((Obj){scanp+PtrSize, PtrSize, 0});
  1360  }
  1361  
  1362  // Starting from scanp, scans words corresponding to set bits.
  1363  static void
  1364  scanbitvector(byte *scanp, BitVector *bv, bool afterprologue)
  1365  {
  1366  	uintptr word, bits;
  1367  	uint32 *wordp;
  1368  	int32 i, remptrs;
  1369  
  1370  	wordp = bv->data;
  1371  	for(remptrs = bv->n; remptrs > 0; remptrs -= 32) {
  1372  		word = *wordp++;
  1373  		if(remptrs < 32)
  1374  			i = remptrs;
  1375  		else
  1376  			i = 32;
  1377  		i /= BitsPerPointer;
  1378  		for(; i > 0; i--) {
  1379  			bits = word & 3;
  1380  			if(bits != BitsNoPointer && *(void**)scanp != nil)
  1381  				if(bits == BitsPointer)
  1382  					addroot((Obj){scanp, PtrSize, 0});
  1383  				else
  1384  					scaninterfacedata(bits, scanp, afterprologue);
  1385  			word >>= BitsPerPointer;
  1386  			scanp += PtrSize;
  1387  		}
  1388  	}
  1389  }
  1390  
  1391  // Scan a stack frame: local variables and function arguments/results.
  1392  static void
  1393  addframeroots(Stkframe *frame, void*)
  1394  {
  1395  	Func *f;
  1396  	BitVector *args, *locals;
  1397  	uintptr size;
  1398  	bool afterprologue;
  1399  
  1400  	f = frame->fn;
  1401  
  1402  	// Scan local variables if stack frame has been allocated.
  1403  	// Use pointer information if known.
  1404  	afterprologue = (frame->varp > (byte*)frame->sp);
  1405  	if(afterprologue) {
  1406  		locals = runtime·funcdata(f, FUNCDATA_GCLocals);
  1407  		if(locals == nil) {
  1408  			// No locals information, scan everything.
  1409  			size = frame->varp - (byte*)frame->sp;
  1410  			addroot((Obj){frame->varp - size, size, 0});
  1411  		} else if(locals->n < 0) {
  1412  			// Locals size information, scan just the
  1413  			// locals.
  1414  			size = -locals->n;
  1415  			addroot((Obj){frame->varp - size, size, 0});
  1416  		} else if(locals->n > 0) {
  1417  			// Locals bitmap information, scan just the
  1418  			// pointers in locals.
  1419  			size = (locals->n*PtrSize) / BitsPerPointer;
  1420  			scanbitvector(frame->varp - size, locals, afterprologue);
  1421  		}
  1422  	}
  1423  
  1424  	// Scan arguments.
  1425  	// Use pointer information if known.
  1426  	args = runtime·funcdata(f, FUNCDATA_GCArgs);
  1427  	if(args != nil && args->n > 0)
  1428  		scanbitvector(frame->argp, args, false);
  1429  	else
  1430  		addroot((Obj){frame->argp, frame->arglen, 0});
  1431  }
  1432  
  1433  static void
  1434  addstackroots(G *gp)
  1435  {
  1436  	M *mp;
  1437  	int32 n;
  1438  	Stktop *stk;
  1439  	uintptr sp, guard, pc, lr;
  1440  	void *base;
  1441  	uintptr size;
  1442  
  1443  	stk = (Stktop*)gp->stackbase;
  1444  	guard = gp->stackguard;
  1445  
  1446  	if(gp == g)
  1447  		runtime·throw("can't scan our own stack");
  1448  	if((mp = gp->m) != nil && mp->helpgc)
  1449  		runtime·throw("can't scan gchelper stack");
  1450  	if(gp->syscallstack != (uintptr)nil) {
  1451  		// Scanning another goroutine that is about to enter or might
  1452  		// have just exited a system call. It may be executing code such
  1453  		// as schedlock and may have needed to start a new stack segment.
  1454  		// Use the stack segment and stack pointer at the time of
  1455  		// the system call instead, since that won't change underfoot.
  1456  		sp = gp->syscallsp;
  1457  		pc = gp->syscallpc;
  1458  		lr = 0;
  1459  		stk = (Stktop*)gp->syscallstack;
  1460  		guard = gp->syscallguard;
  1461  	} else {
  1462  		// Scanning another goroutine's stack.
  1463  		// The goroutine is usually asleep (the world is stopped).
  1464  		sp = gp->sched.sp;
  1465  		pc = gp->sched.pc;
  1466  		lr = gp->sched.lr;
  1467  
  1468  		// For function about to start, context argument is a root too.
  1469  		if(gp->sched.ctxt != 0 && runtime·mlookup(gp->sched.ctxt, &base, &size, nil))
  1470  			addroot((Obj){base, size, 0});
  1471  	}
  1472  	if(ScanStackByFrames) {
  1473  		USED(stk);
  1474  		USED(guard);
  1475  		runtime·gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, addframeroots, nil, false);
  1476  	} else {
  1477  		USED(lr);
  1478  		USED(pc);
  1479  		n = 0;
  1480  		while(stk) {
  1481  			if(sp < guard-StackGuard || (uintptr)stk < sp) {
  1482  				runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
  1483  				runtime·throw("scanstack");
  1484  			}
  1485  			addroot((Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP});
  1486  			sp = stk->gobuf.sp;
  1487  			guard = stk->stackguard;
  1488  			stk = (Stktop*)stk->stackbase;
  1489  			n++;
  1490  		}
  1491  	}
  1492  }
  1493  
  1494  static void
  1495  addfinroots(void *v)
  1496  {
  1497  	uintptr size;
  1498  	void *base;
  1499  
  1500  	size = 0;
  1501  	if(!runtime·mlookup(v, &base, &size, nil) || !runtime·blockspecial(base))
  1502  		runtime·throw("mark - finalizer inconsistency");
  1503  
  1504  	// do not mark the finalizer block itself.  just mark the things it points at.
  1505  	addroot((Obj){base, size, 0});
  1506  }
  1507  
  1508  static void
  1509  addroots(void)
  1510  {
  1511  	G *gp;
  1512  	FinBlock *fb;
  1513  	MSpan *s, **allspans;
  1514  	uint32 spanidx;
  1515  
  1516  	work.nroot = 0;
  1517  
  1518  	// data & bss
  1519  	// TODO(atom): load balancing
  1520  	addroot((Obj){data, edata - data, (uintptr)gcdata});
  1521  	addroot((Obj){bss, ebss - bss, (uintptr)gcbss});
  1522  
  1523  	// MSpan.types
  1524  	allspans = runtime·mheap.allspans;
  1525  	for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
  1526  		s = allspans[spanidx];
  1527  		if(s->state == MSpanInUse) {
  1528  			// The garbage collector ignores type pointers stored in MSpan.types:
  1529  			//  - Compiler-generated types are stored outside of heap.
  1530  			//  - The reflect package has runtime-generated types cached in its data structures.
  1531  			//    The garbage collector relies on finding the references via that cache.
  1532  			switch(s->types.compression) {
  1533  			case MTypes_Empty:
  1534  			case MTypes_Single:
  1535  				break;
  1536  			case MTypes_Words:
  1537  			case MTypes_Bytes:
  1538  				markonly((byte*)s->types.data);
  1539  				break;
  1540  			}
  1541  		}
  1542  	}
  1543  
  1544  	// stacks
  1545  	for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
  1546  		switch(gp->status){
  1547  		default:
  1548  			runtime·printf("unexpected G.status %d\n", gp->status);
  1549  			runtime·throw("mark - bad status");
  1550  		case Gdead:
  1551  			break;
  1552  		case Grunning:
  1553  			runtime·throw("mark - world not stopped");
  1554  		case Grunnable:
  1555  		case Gsyscall:
  1556  		case Gwaiting:
  1557  			addstackroots(gp);
  1558  			break;
  1559  		}
  1560  	}
  1561  
  1562  	runtime·walkfintab(addfinroots);
  1563  
  1564  	for(fb=allfin; fb; fb=fb->alllink)
  1565  		addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
  1566  }
  1567  
  1568  static bool
  1569  handlespecial(byte *p, uintptr size)
  1570  {
  1571  	FuncVal *fn;
  1572  	uintptr nret;
  1573  	PtrType *ot;
  1574  	Type *fint;
  1575  	FinBlock *block;
  1576  	Finalizer *f;
  1577  
  1578  	if(!runtime·getfinalizer(p, true, &fn, &nret, &fint, &ot)) {
  1579  		runtime·setblockspecial(p, false);
  1580  		runtime·MProf_Free(p, size);
  1581  		return false;
  1582  	}
  1583  
  1584  	runtime·lock(&finlock);
  1585  	if(finq == nil || finq->cnt == finq->cap) {
  1586  		if(finc == nil) {
  1587  			finc = runtime·persistentalloc(PageSize, 0, &mstats.gc_sys);
  1588  			finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
  1589  			finc->alllink = allfin;
  1590  			allfin = finc;
  1591  		}
  1592  		block = finc;
  1593  		finc = block->next;
  1594  		block->next = finq;
  1595  		finq = block;
  1596  	}
  1597  	f = &finq->fin[finq->cnt];
  1598  	finq->cnt++;
  1599  	f->fn = fn;
  1600  	f->nret = nret;
  1601  	f->fint = fint;
  1602  	f->ot = ot;
  1603  	f->arg = p;
  1604  	runtime·unlock(&finlock);
  1605  	return true;
  1606  }
  1607  
  1608  // Sweep frees or collects finalizers for blocks not marked in the mark phase.
  1609  // It clears the mark bits in preparation for the next GC round.
  1610  static void
  1611  sweepspan(ParFor *desc, uint32 idx)
  1612  {
  1613  	int32 cl, n, npages;
  1614  	uintptr size;
  1615  	byte *p;
  1616  	MCache *c;
  1617  	byte *arena_start;
  1618  	MLink head, *end;
  1619  	int32 nfree;
  1620  	byte *type_data;
  1621  	byte compression;
  1622  	uintptr type_data_inc;
  1623  	MSpan *s;
  1624  
  1625  	USED(&desc);
  1626  	s = runtime·mheap.allspans[idx];
  1627  	if(s->state != MSpanInUse)
  1628  		return;
  1629  	arena_start = runtime·mheap.arena_start;
  1630  	p = (byte*)(s->start << PageShift);
  1631  	cl = s->sizeclass;
  1632  	size = s->elemsize;
  1633  	if(cl == 0) {
  1634  		n = 1;
  1635  	} else {
  1636  		// Chunk full of small blocks.
  1637  		npages = runtime·class_to_allocnpages[cl];
  1638  		n = (npages << PageShift) / size;
  1639  	}
  1640  	nfree = 0;
  1641  	end = &head;
  1642  	c = m->mcache;
  1643  	
  1644  	type_data = (byte*)s->types.data;
  1645  	type_data_inc = sizeof(uintptr);
  1646  	compression = s->types.compression;
  1647  	switch(compression) {
  1648  	case MTypes_Bytes:
  1649  		type_data += 8*sizeof(uintptr);
  1650  		type_data_inc = 1;
  1651  		break;
  1652  	}
  1653  
  1654  	// Sweep through n objects of given size starting at p.
  1655  	// This thread owns the span now, so it can manipulate
  1656  	// the block bitmap without atomic operations.
  1657  	for(; n > 0; n--, p += size, type_data+=type_data_inc) {
  1658  		uintptr off, *bitp, shift, bits;
  1659  
  1660  		off = (uintptr*)p - (uintptr*)arena_start;
  1661  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1662  		shift = off % wordsPerBitmapWord;
  1663  		bits = *bitp>>shift;
  1664  
  1665  		if((bits & bitAllocated) == 0)
  1666  			continue;
  1667  
  1668  		if((bits & bitMarked) != 0) {
  1669  			if(DebugMark) {
  1670  				if(!(bits & bitSpecial))
  1671  					runtime·printf("found spurious mark on %p\n", p);
  1672  				*bitp &= ~(bitSpecial<<shift);
  1673  			}
  1674  			*bitp &= ~(bitMarked<<shift);
  1675  			continue;
  1676  		}
  1677  
  1678  		// Special means it has a finalizer or is being profiled.
  1679  		// In DebugMark mode, the bit has been coopted so
  1680  		// we have to assume all blocks are special.
  1681  		if(DebugMark || (bits & bitSpecial) != 0) {
  1682  			if(handlespecial(p, size))
  1683  				continue;
  1684  		}
  1685  
  1686  		// Mark freed; restore block boundary bit.
  1687  		*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
  1688  
  1689  		if(cl == 0) {
  1690  			// Free large span.
  1691  			runtime·unmarkspan(p, 1<<PageShift);
  1692  			*(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll;	// needs zeroing
  1693  			runtime·MHeap_Free(&runtime·mheap, s, 1);
  1694  			c->local_nlargefree++;
  1695  			c->local_largefree += size;
  1696  		} else {
  1697  			// Free small object.
  1698  			switch(compression) {
  1699  			case MTypes_Words:
  1700  				*(uintptr*)type_data = 0;
  1701  				break;
  1702  			case MTypes_Bytes:
  1703  				*(byte*)type_data = 0;
  1704  				break;
  1705  			}
  1706  			if(size > sizeof(uintptr))
  1707  				((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
  1708  			
  1709  			end->next = (MLink*)p;
  1710  			end = (MLink*)p;
  1711  			nfree++;
  1712  		}
  1713  	}
  1714  
  1715  	if(nfree) {
  1716  		c->local_nsmallfree[cl] += nfree;
  1717  		c->local_cachealloc -= nfree * size;
  1718  		runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end);
  1719  	}
  1720  }
  1721  
  1722  static void
  1723  dumpspan(uint32 idx)
  1724  {
  1725  	int32 sizeclass, n, npages, i, column;
  1726  	uintptr size;
  1727  	byte *p;
  1728  	byte *arena_start;
  1729  	MSpan *s;
  1730  	bool allocated, special;
  1731  
  1732  	s = runtime·mheap.allspans[idx];
  1733  	if(s->state != MSpanInUse)
  1734  		return;
  1735  	arena_start = runtime·mheap.arena_start;
  1736  	p = (byte*)(s->start << PageShift);
  1737  	sizeclass = s->sizeclass;
  1738  	size = s->elemsize;
  1739  	if(sizeclass == 0) {
  1740  		n = 1;
  1741  	} else {
  1742  		npages = runtime·class_to_allocnpages[sizeclass];
  1743  		n = (npages << PageShift) / size;
  1744  	}
  1745  	
  1746  	runtime·printf("%p .. %p:\n", p, p+n*size);
  1747  	column = 0;
  1748  	for(; n>0; n--, p+=size) {
  1749  		uintptr off, *bitp, shift, bits;
  1750  
  1751  		off = (uintptr*)p - (uintptr*)arena_start;
  1752  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1753  		shift = off % wordsPerBitmapWord;
  1754  		bits = *bitp>>shift;
  1755  
  1756  		allocated = ((bits & bitAllocated) != 0);
  1757  		special = ((bits & bitSpecial) != 0);
  1758  
  1759  		for(i=0; i<size; i+=sizeof(void*)) {
  1760  			if(column == 0) {
  1761  				runtime·printf("\t");
  1762  			}
  1763  			if(i == 0) {
  1764  				runtime·printf(allocated ? "(" : "[");
  1765  				runtime·printf(special ? "@" : "");
  1766  				runtime·printf("%p: ", p+i);
  1767  			} else {
  1768  				runtime·printf(" ");
  1769  			}
  1770  
  1771  			runtime·printf("%p", *(void**)(p+i));
  1772  
  1773  			if(i+sizeof(void*) >= size) {
  1774  				runtime·printf(allocated ? ") " : "] ");
  1775  			}
  1776  
  1777  			column++;
  1778  			if(column == 8) {
  1779  				runtime·printf("\n");
  1780  				column = 0;
  1781  			}
  1782  		}
  1783  	}
  1784  	runtime·printf("\n");
  1785  }
  1786  
  1787  // A debugging function to dump the contents of memory
  1788  void
  1789  runtime·memorydump(void)
  1790  {
  1791  	uint32 spanidx;
  1792  
  1793  	for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
  1794  		dumpspan(spanidx);
  1795  	}
  1796  }
  1797  
  1798  void
  1799  runtime·gchelper(void)
  1800  {
  1801  	int32 nproc;
  1802  
  1803  	gchelperstart();
  1804  
  1805  	// parallel mark for over gc roots
  1806  	runtime·parfordo(work.markfor);
  1807  
  1808  	// help other threads scan secondary blocks
  1809  	scanblock(nil, nil, 0, true);
  1810  
  1811  	if(DebugMark) {
  1812  		// wait while the main thread executes mark(debug_scanblock)
  1813  		while(runtime·atomicload(&work.debugmarkdone) == 0)
  1814  			runtime·usleep(10);
  1815  	}
  1816  
  1817  	runtime·parfordo(work.sweepfor);
  1818  	bufferList[m->helpgc].busy = 0;
  1819  	nproc = work.nproc;  // work.nproc can change right after we increment work.ndone
  1820  	if(runtime·xadd(&work.ndone, +1) == nproc-1)
  1821  		runtime·notewakeup(&work.alldone);
  1822  }
  1823  
  1824  #define GcpercentUnknown (-2)
  1825  
  1826  // Initialized from $GOGC.  GOGC=off means no gc.
  1827  //
  1828  // Next gc is after we've allocated an extra amount of
  1829  // memory proportional to the amount already in use.
  1830  // If gcpercent=100 and we're using 4M, we'll gc again
  1831  // when we get to 8M.  This keeps the gc cost in linear
  1832  // proportion to the allocation cost.  Adjusting gcpercent
  1833  // just changes the linear constant (and also the amount of
  1834  // extra memory used).
  1835  static int32 gcpercent = GcpercentUnknown;
  1836  
  1837  static void
  1838  cachestats(void)
  1839  {
  1840  	MCache *c;
  1841  	P *p, **pp;
  1842  
  1843  	for(pp=runtime·allp; p=*pp; pp++) {
  1844  		c = p->mcache;
  1845  		if(c==nil)
  1846  			continue;
  1847  		runtime·purgecachedstats(c);
  1848  	}
  1849  }
  1850  
  1851  static void
  1852  updatememstats(GCStats *stats)
  1853  {
  1854  	M *mp;
  1855  	MSpan *s;
  1856  	MCache *c;
  1857  	P *p, **pp;
  1858  	int32 i;
  1859  	uint64 stacks_inuse, smallfree;
  1860  	uint64 *src, *dst;
  1861  
  1862  	if(stats)
  1863  		runtime·memclr((byte*)stats, sizeof(*stats));
  1864  	stacks_inuse = 0;
  1865  	for(mp=runtime·allm; mp; mp=mp->alllink) {
  1866  		stacks_inuse += mp->stackinuse*FixedStack;
  1867  		if(stats) {
  1868  			src = (uint64*)&mp->gcstats;
  1869  			dst = (uint64*)stats;
  1870  			for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
  1871  				dst[i] += src[i];
  1872  			runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
  1873  		}
  1874  	}
  1875  	mstats.stacks_inuse = stacks_inuse;
  1876  	mstats.mcache_inuse = runtime·mheap.cachealloc.inuse;
  1877  	mstats.mspan_inuse = runtime·mheap.spanalloc.inuse;
  1878  	mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
  1879  		mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys;
  1880  	
  1881  	// Calculate memory allocator stats.
  1882  	// During program execution we only count number of frees and amount of freed memory.
  1883  	// Current number of alive object in the heap and amount of alive heap memory
  1884  	// are calculated by scanning all spans.
  1885  	// Total number of mallocs is calculated as number of frees plus number of alive objects.
  1886  	// Similarly, total amount of allocated memory is calculated as amount of freed memory
  1887  	// plus amount of alive heap memory.
  1888  	mstats.alloc = 0;
  1889  	mstats.total_alloc = 0;
  1890  	mstats.nmalloc = 0;
  1891  	mstats.nfree = 0;
  1892  	for(i = 0; i < nelem(mstats.by_size); i++) {
  1893  		mstats.by_size[i].nmalloc = 0;
  1894  		mstats.by_size[i].nfree = 0;
  1895  	}
  1896  
  1897  	// Flush MCache's to MCentral.
  1898  	for(pp=runtime·allp; p=*pp; pp++) {
  1899  		c = p->mcache;
  1900  		if(c==nil)
  1901  			continue;
  1902  		runtime·MCache_ReleaseAll(c);
  1903  	}
  1904  
  1905  	// Aggregate local stats.
  1906  	cachestats();
  1907  
  1908  	// Scan all spans and count number of alive objects.
  1909  	for(i = 0; i < runtime·mheap.nspan; i++) {
  1910  		s = runtime·mheap.allspans[i];
  1911  		if(s->state != MSpanInUse)
  1912  			continue;
  1913  		if(s->sizeclass == 0) {
  1914  			mstats.nmalloc++;
  1915  			mstats.alloc += s->elemsize;
  1916  		} else {
  1917  			mstats.nmalloc += s->ref;
  1918  			mstats.by_size[s->sizeclass].nmalloc += s->ref;
  1919  			mstats.alloc += s->ref*s->elemsize;
  1920  		}
  1921  	}
  1922  
  1923  	// Aggregate by size class.
  1924  	smallfree = 0;
  1925  	mstats.nfree = runtime·mheap.nlargefree;
  1926  	for(i = 0; i < nelem(mstats.by_size); i++) {
  1927  		mstats.nfree += runtime·mheap.nsmallfree[i];
  1928  		mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i];
  1929  		mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i];
  1930  		smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i];
  1931  	}
  1932  	mstats.nmalloc += mstats.nfree;
  1933  
  1934  	// Calculate derived stats.
  1935  	mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree;
  1936  	mstats.heap_alloc = mstats.alloc;
  1937  	mstats.heap_objects = mstats.nmalloc - mstats.nfree;
  1938  }
  1939  
  1940  // Structure of arguments passed to function gc().
  1941  // This allows the arguments to be passed via runtime·mcall.
  1942  struct gc_args
  1943  {
  1944  	int64 start_time; // start time of GC in ns (just before stoptheworld)
  1945  };
  1946  
  1947  static void gc(struct gc_args *args);
  1948  static void mgc(G *gp);
  1949  
  1950  static int32
  1951  readgogc(void)
  1952  {
  1953  	byte *p;
  1954  
  1955  	p = runtime·getenv("GOGC");
  1956  	if(p == nil || p[0] == '\0')
  1957  		return 100;
  1958  	if(runtime·strcmp(p, (byte*)"off") == 0)
  1959  		return -1;
  1960  	return runtime·atoi(p);
  1961  }
  1962  
  1963  static FuncVal runfinqv = {runfinq};
  1964  
  1965  void
  1966  runtime·gc(int32 force)
  1967  {
  1968  	struct gc_args a;
  1969  	int32 i;
  1970  
  1971  	// The atomic operations are not atomic if the uint64s
  1972  	// are not aligned on uint64 boundaries. This has been
  1973  	// a problem in the past.
  1974  	if((((uintptr)&work.empty) & 7) != 0)
  1975  		runtime·throw("runtime: gc work buffer is misaligned");
  1976  	if((((uintptr)&work.full) & 7) != 0)
  1977  		runtime·throw("runtime: gc work buffer is misaligned");
  1978  
  1979  	// The gc is turned off (via enablegc) until
  1980  	// the bootstrap has completed.
  1981  	// Also, malloc gets called in the guts
  1982  	// of a number of libraries that might be
  1983  	// holding locks.  To avoid priority inversion
  1984  	// problems, don't bother trying to run gc
  1985  	// while holding a lock.  The next mallocgc
  1986  	// without a lock will do the gc instead.
  1987  	if(!mstats.enablegc || g == m->g0 || m->locks > 0 || runtime·panicking)
  1988  		return;
  1989  
  1990  	if(gcpercent == GcpercentUnknown) {	// first time through
  1991  		runtime·lock(&runtime·mheap);
  1992  		if(gcpercent == GcpercentUnknown)
  1993  			gcpercent = readgogc();
  1994  		runtime·unlock(&runtime·mheap);
  1995  	}
  1996  	if(gcpercent < 0)
  1997  		return;
  1998  
  1999  	runtime·semacquire(&runtime·worldsema, false);
  2000  	if(!force && mstats.heap_alloc < mstats.next_gc) {
  2001  		// typically threads which lost the race to grab
  2002  		// worldsema exit here when gc is done.
  2003  		runtime·semrelease(&runtime·worldsema);
  2004  		return;
  2005  	}
  2006  
  2007  	// Ok, we're doing it!  Stop everybody else
  2008  	a.start_time = runtime·nanotime();
  2009  	m->gcing = 1;
  2010  	runtime·stoptheworld();
  2011  	
  2012  	// Run gc on the g0 stack.  We do this so that the g stack
  2013  	// we're currently running on will no longer change.  Cuts
  2014  	// the root set down a bit (g0 stacks are not scanned, and
  2015  	// we don't need to scan gc's internal state).  Also an
  2016  	// enabler for copyable stacks.
  2017  	for(i = 0; i < (runtime·debug.gctrace > 1 ? 2 : 1); i++) {
  2018  		// switch to g0, call gc(&a), then switch back
  2019  		g->param = &a;
  2020  		g->status = Gwaiting;
  2021  		g->waitreason = "garbage collection";
  2022  		runtime·mcall(mgc);
  2023  		// record a new start time in case we're going around again
  2024  		a.start_time = runtime·nanotime();
  2025  	}
  2026  
  2027  	// all done
  2028  	m->gcing = 0;
  2029  	m->locks++;
  2030  	runtime·semrelease(&runtime·worldsema);
  2031  	runtime·starttheworld();
  2032  	m->locks--;
  2033  
  2034  	// now that gc is done, kick off finalizer thread if needed
  2035  	if(finq != nil) {
  2036  		runtime·lock(&finlock);
  2037  		// kick off or wake up goroutine to run queued finalizers
  2038  		if(fing == nil)
  2039  			fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc);
  2040  		else if(fingwait) {
  2041  			fingwait = 0;
  2042  			runtime·ready(fing);
  2043  		}
  2044  		runtime·unlock(&finlock);
  2045  	}
  2046  	// give the queued finalizers, if any, a chance to run
  2047  	runtime·gosched();
  2048  }
  2049  
  2050  static void
  2051  mgc(G *gp)
  2052  {
  2053  	gc(gp->param);
  2054  	gp->param = nil;
  2055  	gp->status = Grunning;
  2056  	runtime·gogo(&gp->sched);
  2057  }
  2058  
  2059  static void
  2060  gc(struct gc_args *args)
  2061  {
  2062  	int64 t0, t1, t2, t3, t4;
  2063  	uint64 heap0, heap1, obj0, obj1, ninstr;
  2064  	GCStats stats;
  2065  	M *mp;
  2066  	uint32 i;
  2067  	Eface eface;
  2068  
  2069  	t0 = args->start_time;
  2070  
  2071  	if(CollectStats)
  2072  		runtime·memclr((byte*)&gcstats, sizeof(gcstats));
  2073  
  2074  	for(mp=runtime·allm; mp; mp=mp->alllink)
  2075  		runtime·settype_flush(mp);
  2076  
  2077  	heap0 = 0;
  2078  	obj0 = 0;
  2079  	if(runtime·debug.gctrace) {
  2080  		updatememstats(nil);
  2081  		heap0 = mstats.heap_alloc;
  2082  		obj0 = mstats.nmalloc - mstats.nfree;
  2083  	}
  2084  
  2085  	m->locks++;	// disable gc during mallocs in parforalloc
  2086  	if(work.markfor == nil)
  2087  		work.markfor = runtime·parforalloc(MaxGcproc);
  2088  	if(work.sweepfor == nil)
  2089  		work.sweepfor = runtime·parforalloc(MaxGcproc);
  2090  	m->locks--;
  2091  
  2092  	if(itabtype == nil) {
  2093  		// get C pointer to the Go type "itab"
  2094  		runtime·gc_itab_ptr(&eface);
  2095  		itabtype = ((PtrType*)eface.type)->elem;
  2096  	}
  2097  
  2098  	work.nwait = 0;
  2099  	work.ndone = 0;
  2100  	work.debugmarkdone = 0;
  2101  	work.nproc = runtime·gcprocs();
  2102  	addroots();
  2103  	runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
  2104  	runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap.nspan, nil, true, sweepspan);
  2105  	if(work.nproc > 1) {
  2106  		runtime·noteclear(&work.alldone);
  2107  		runtime·helpgc(work.nproc);
  2108  	}
  2109  
  2110  	t1 = runtime·nanotime();
  2111  
  2112  	gchelperstart();
  2113  	runtime·parfordo(work.markfor);
  2114  	scanblock(nil, nil, 0, true);
  2115  
  2116  	if(DebugMark) {
  2117  		for(i=0; i<work.nroot; i++)
  2118  			debug_scanblock(work.roots[i].p, work.roots[i].n);
  2119  		runtime·atomicstore(&work.debugmarkdone, 1);
  2120  	}
  2121  	t2 = runtime·nanotime();
  2122  
  2123  	runtime·parfordo(work.sweepfor);
  2124  	bufferList[m->helpgc].busy = 0;
  2125  	t3 = runtime·nanotime();
  2126  
  2127  	if(work.nproc > 1)
  2128  		runtime·notesleep(&work.alldone);
  2129  
  2130  	cachestats();
  2131  	mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
  2132  
  2133  	t4 = runtime·nanotime();
  2134  	mstats.last_gc = t4;
  2135  	mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
  2136  	mstats.pause_total_ns += t4 - t0;
  2137  	mstats.numgc++;
  2138  	if(mstats.debuggc)
  2139  		runtime·printf("pause %D\n", t4-t0);
  2140  
  2141  	if(runtime·debug.gctrace) {
  2142  		updatememstats(&stats);
  2143  		heap1 = mstats.heap_alloc;
  2144  		obj1 = mstats.nmalloc - mstats.nfree;
  2145  
  2146  		stats.nprocyield += work.sweepfor->nprocyield;
  2147  		stats.nosyield += work.sweepfor->nosyield;
  2148  		stats.nsleep += work.sweepfor->nsleep;
  2149  
  2150  		runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
  2151  				" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
  2152  			mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
  2153  			heap0>>20, heap1>>20, obj0, obj1,
  2154  			mstats.nmalloc, mstats.nfree,
  2155  			stats.nhandoff, stats.nhandoffcnt,
  2156  			work.sweepfor->nsteal, work.sweepfor->nstealcnt,
  2157  			stats.nprocyield, stats.nosyield, stats.nsleep);
  2158  		if(CollectStats) {
  2159  			runtime·printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
  2160  				gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
  2161  			if(gcstats.ptr.cnt != 0)
  2162  				runtime·printf("avg ptrbufsize: %D (%D/%D)\n",
  2163  					gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
  2164  			if(gcstats.obj.cnt != 0)
  2165  				runtime·printf("avg nobj: %D (%D/%D)\n",
  2166  					gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
  2167  			runtime·printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
  2168  
  2169  			runtime·printf("instruction counts:\n");
  2170  			ninstr = 0;
  2171  			for(i=0; i<nelem(gcstats.instr); i++) {
  2172  				runtime·printf("\t%d:\t%D\n", i, gcstats.instr[i]);
  2173  				ninstr += gcstats.instr[i];
  2174  			}
  2175  			runtime·printf("\ttotal:\t%D\n", ninstr);
  2176  
  2177  			runtime·printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
  2178  
  2179  			runtime·printf("markonly base lookup: bit %D word %D span %D\n", gcstats.markonly.foundbit, gcstats.markonly.foundword, gcstats.markonly.foundspan);
  2180  			runtime·printf("flushptrbuf base lookup: bit %D word %D span %D\n", gcstats.flushptrbuf.foundbit, gcstats.flushptrbuf.foundword, gcstats.flushptrbuf.foundspan);
  2181  		}
  2182  	}
  2183  
  2184  	runtime·MProf_GC();
  2185  }
  2186  
  2187  void
  2188  runtime·ReadMemStats(MStats *stats)
  2189  {
  2190  	// Have to acquire worldsema to stop the world,
  2191  	// because stoptheworld can only be used by
  2192  	// one goroutine at a time, and there might be
  2193  	// a pending garbage collection already calling it.
  2194  	runtime·semacquire(&runtime·worldsema, false);
  2195  	m->gcing = 1;
  2196  	runtime·stoptheworld();
  2197  	updatememstats(nil);
  2198  	*stats = mstats;
  2199  	m->gcing = 0;
  2200  	m->locks++;
  2201  	runtime·semrelease(&runtime·worldsema);
  2202  	runtime·starttheworld();
  2203  	m->locks--;
  2204  }
  2205  
  2206  void
  2207  runtime∕debug·readGCStats(Slice *pauses)
  2208  {
  2209  	uint64 *p;
  2210  	uint32 i, n;
  2211  
  2212  	// Calling code in runtime/debug should make the slice large enough.
  2213  	if(pauses->cap < nelem(mstats.pause_ns)+3)
  2214  		runtime·throw("runtime: short slice passed to readGCStats");
  2215  
  2216  	// Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
  2217  	p = (uint64*)pauses->array;
  2218  	runtime·lock(&runtime·mheap);
  2219  	n = mstats.numgc;
  2220  	if(n > nelem(mstats.pause_ns))
  2221  		n = nelem(mstats.pause_ns);
  2222  	
  2223  	// The pause buffer is circular. The most recent pause is at
  2224  	// pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
  2225  	// from there to go back farther in time. We deliver the times
  2226  	// most recent first (in p[0]).
  2227  	for(i=0; i<n; i++)
  2228  		p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
  2229  
  2230  	p[n] = mstats.last_gc;
  2231  	p[n+1] = mstats.numgc;
  2232  	p[n+2] = mstats.pause_total_ns;	
  2233  	runtime·unlock(&runtime·mheap);
  2234  	pauses->len = n+3;
  2235  }
  2236  
  2237  void
  2238  runtime∕debug·setGCPercent(intgo in, intgo out)
  2239  {
  2240  	runtime·lock(&runtime·mheap);
  2241  	if(gcpercent == GcpercentUnknown)
  2242  		gcpercent = readgogc();
  2243  	out = gcpercent;
  2244  	if(in < 0)
  2245  		in = -1;
  2246  	gcpercent = in;
  2247  	runtime·unlock(&runtime·mheap);
  2248  	FLUSH(&out);
  2249  }
  2250  
  2251  static void
  2252  gchelperstart(void)
  2253  {
  2254  	if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
  2255  		runtime·throw("gchelperstart: bad m->helpgc");
  2256  	if(runtime·xchg(&bufferList[m->helpgc].busy, 1))
  2257  		runtime·throw("gchelperstart: already busy");
  2258  	if(g != m->g0)
  2259  		runtime·throw("gchelper not running on g0 stack");
  2260  }
  2261  
  2262  static void
  2263  runfinq(void)
  2264  {
  2265  	Finalizer *f;
  2266  	FinBlock *fb, *next;
  2267  	byte *frame;
  2268  	uint32 framesz, framecap, i;
  2269  	Eface *ef, ef1;
  2270  
  2271  	frame = nil;
  2272  	framecap = 0;
  2273  	for(;;) {
  2274  		runtime·lock(&finlock);
  2275  		fb = finq;
  2276  		finq = nil;
  2277  		if(fb == nil) {
  2278  			fingwait = 1;
  2279  			runtime·park(runtime·unlock, &finlock, "finalizer wait");
  2280  			continue;
  2281  		}
  2282  		runtime·unlock(&finlock);
  2283  		if(raceenabled)
  2284  			runtime·racefingo();
  2285  		for(; fb; fb=next) {
  2286  			next = fb->next;
  2287  			for(i=0; i<fb->cnt; i++) {
  2288  				f = &fb->fin[i];
  2289  				framesz = sizeof(Eface) + f->nret;
  2290  				if(framecap < framesz) {
  2291  					runtime·free(frame);
  2292  					// The frame does not contain pointers interesting for GC,
  2293  					// all not yet finalized objects are stored in finc.
  2294  					// If we do not mark it as FlagNoScan,
  2295  					// the last finalized object is not collected.
  2296  					frame = runtime·mallocgc(framesz, 0, FlagNoScan|FlagNoInvokeGC);
  2297  					framecap = framesz;
  2298  				}
  2299  				if(f->fint == nil)
  2300  					runtime·throw("missing type in runfinq");
  2301  				if(f->fint->kind == KindPtr) {
  2302  					// direct use of pointer
  2303  					*(void**)frame = f->arg;
  2304  				} else if(((InterfaceType*)f->fint)->mhdr.len == 0) {
  2305  					// convert to empty interface
  2306  					ef = (Eface*)frame;
  2307  					ef->type = f->ot;
  2308  					ef->data = f->arg;
  2309  				} else {
  2310  					// convert to interface with methods, via empty interface.
  2311  					ef1.type = f->ot;
  2312  					ef1.data = f->arg;
  2313  					if(!runtime·ifaceE2I2((InterfaceType*)f->fint, ef1, (Iface*)frame))
  2314  						runtime·throw("invalid type conversion in runfinq");
  2315  				}
  2316  				reflect·call(f->fn, frame, framesz);
  2317  				f->fn = nil;
  2318  				f->arg = nil;
  2319  				f->ot = nil;
  2320  			}
  2321  			fb->cnt = 0;
  2322  			fb->next = finc;
  2323  			finc = fb;
  2324  		}
  2325  		runtime·gc(1);	// trigger another gc to clean up the finalized objects, if possible
  2326  	}
  2327  }
  2328  
  2329  // mark the block at v of size n as allocated.
  2330  // If noscan is true, mark it as not needing scanning.
  2331  void
  2332  runtime·markallocated(void *v, uintptr n, bool noscan)
  2333  {
  2334  	uintptr *b, obits, bits, off, shift;
  2335  
  2336  	if(0)
  2337  		runtime·printf("markallocated %p+%p\n", v, n);
  2338  
  2339  	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2340  		runtime·throw("markallocated: bad pointer");
  2341  
  2342  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
  2343  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2344  	shift = off % wordsPerBitmapWord;
  2345  
  2346  	for(;;) {
  2347  		obits = *b;
  2348  		bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
  2349  		if(noscan)
  2350  			bits |= bitNoScan<<shift;
  2351  		if(runtime·gomaxprocs == 1) {
  2352  			*b = bits;
  2353  			break;
  2354  		} else {
  2355  			// more than one goroutine is potentially running: use atomic op
  2356  			if(runtime·casp((void**)b, (void*)obits, (void*)bits))
  2357  				break;
  2358  		}
  2359  	}
  2360  }
  2361  
  2362  // mark the block at v of size n as freed.
  2363  void
  2364  runtime·markfreed(void *v, uintptr n)
  2365  {
  2366  	uintptr *b, obits, bits, off, shift;
  2367  
  2368  	if(0)
  2369  		runtime·printf("markfreed %p+%p\n", v, n);
  2370  
  2371  	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2372  		runtime·throw("markfreed: bad pointer");
  2373  
  2374  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
  2375  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2376  	shift = off % wordsPerBitmapWord;
  2377  
  2378  	for(;;) {
  2379  		obits = *b;
  2380  		bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
  2381  		if(runtime·gomaxprocs == 1) {
  2382  			*b = bits;
  2383  			break;
  2384  		} else {
  2385  			// more than one goroutine is potentially running: use atomic op
  2386  			if(runtime·casp((void**)b, (void*)obits, (void*)bits))
  2387  				break;
  2388  		}
  2389  	}
  2390  }
  2391  
  2392  // check that the block at v of size n is marked freed.
  2393  void
  2394  runtime·checkfreed(void *v, uintptr n)
  2395  {
  2396  	uintptr *b, bits, off, shift;
  2397  
  2398  	if(!runtime·checking)
  2399  		return;
  2400  
  2401  	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2402  		return;	// not allocated, so okay
  2403  
  2404  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
  2405  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2406  	shift = off % wordsPerBitmapWord;
  2407  
  2408  	bits = *b>>shift;
  2409  	if((bits & bitAllocated) != 0) {
  2410  		runtime·printf("checkfreed %p+%p: off=%p have=%p\n",
  2411  			v, n, off, bits & bitMask);
  2412  		runtime·throw("checkfreed: not freed");
  2413  	}
  2414  }
  2415  
  2416  // mark the span of memory at v as having n blocks of the given size.
  2417  // if leftover is true, there is left over space at the end of the span.
  2418  void
  2419  runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
  2420  {
  2421  	uintptr *b, off, shift;
  2422  	byte *p;
  2423  
  2424  	if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2425  		runtime·throw("markspan: bad pointer");
  2426  
  2427  	p = v;
  2428  	if(leftover)	// mark a boundary just past end of last block too
  2429  		n++;
  2430  	for(; n-- > 0; p += size) {
  2431  		// Okay to use non-atomic ops here, because we control
  2432  		// the entire span, and each bitmap word has bits for only
  2433  		// one span, so no other goroutines are changing these
  2434  		// bitmap words.
  2435  		off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;  // word offset
  2436  		b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2437  		shift = off % wordsPerBitmapWord;
  2438  		*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
  2439  	}
  2440  }
  2441  
  2442  // unmark the span of memory at v of length n bytes.
  2443  void
  2444  runtime·unmarkspan(void *v, uintptr n)
  2445  {
  2446  	uintptr *p, *b, off;
  2447  
  2448  	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
  2449  		runtime·throw("markspan: bad pointer");
  2450  
  2451  	p = v;
  2452  	off = p - (uintptr*)runtime·mheap.arena_start;  // word offset
  2453  	if(off % wordsPerBitmapWord != 0)
  2454  		runtime·throw("markspan: unaligned pointer");
  2455  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2456  	n /= PtrSize;
  2457  	if(n%wordsPerBitmapWord != 0)
  2458  		runtime·throw("unmarkspan: unaligned length");
  2459  	// Okay to use non-atomic ops here, because we control
  2460  	// the entire span, and each bitmap word has bits for only
  2461  	// one span, so no other goroutines are changing these
  2462  	// bitmap words.
  2463  	n /= wordsPerBitmapWord;
  2464  	while(n-- > 0)
  2465  		*b-- = 0;
  2466  }
  2467  
  2468  bool
  2469  runtime·blockspecial(void *v)
  2470  {
  2471  	uintptr *b, off, shift;
  2472  
  2473  	if(DebugMark)
  2474  		return true;
  2475  
  2476  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
  2477  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2478  	shift = off % wordsPerBitmapWord;
  2479  
  2480  	return (*b & (bitSpecial<<shift)) != 0;
  2481  }
  2482  
  2483  void
  2484  runtime·setblockspecial(void *v, bool s)
  2485  {
  2486  	uintptr *b, off, shift, bits, obits;
  2487  
  2488  	if(DebugMark)
  2489  		return;
  2490  
  2491  	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
  2492  	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
  2493  	shift = off % wordsPerBitmapWord;
  2494  
  2495  	for(;;) {
  2496  		obits = *b;
  2497  		if(s)
  2498  			bits = obits | (bitSpecial<<shift);
  2499  		else
  2500  			bits = obits & ~(bitSpecial<<shift);
  2501  		if(runtime·gomaxprocs == 1) {
  2502  			*b = bits;
  2503  			break;
  2504  		} else {
  2505  			// more than one goroutine is potentially running: use atomic op
  2506  			if(runtime·casp((void**)b, (void*)obits, (void*)bits))
  2507  				break;
  2508  		}
  2509  	}
  2510  }
  2511  
  2512  void
  2513  runtime·MHeap_MapBits(MHeap *h)
  2514  {
  2515  	// Caller has added extra mappings to the arena.
  2516  	// Add extra mappings of bitmap words as needed.
  2517  	// We allocate extra bitmap pieces in chunks of bitmapChunk.
  2518  	enum {
  2519  		bitmapChunk = 8192
  2520  	};
  2521  	uintptr n;
  2522  
  2523  	n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
  2524  	n = ROUND(n, bitmapChunk);
  2525  	if(h->bitmap_mapped >= n)
  2526  		return;
  2527  
  2528  	runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped, &mstats.gc_sys);
  2529  	h->bitmap_mapped = n;
  2530  }