github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/mgc0.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Garbage collector.
     6  
     7  #include "runtime.h"
     8  #include "arch_GOARCH.h"
     9  #include "malloc.h"
    10  #include "stack.h"
    11  #include "mgc0.h"
    12  #include "race.h"
    13  #include "type.h"
    14  #include "typekind.h"
    15  #include "hashmap.h"
    16  
    17  enum {
    18  	Debug = 0,
    19  	DebugMark = 0,  // run second pass to check mark
    20  	CollectStats = 0,
    21  	ScanStackByFrames = 0,
    22  	IgnorePreciseGC = 0,
    23  
    24  	// Four bits per word (see #defines below).
    25  	wordsPerBitmapWord = sizeof(void*)*8/4,
    26  	bitShift = sizeof(void*)*8/4,
    27  
    28  	handoffThreshold = 4,
    29  	IntermediateBufferCapacity = 64,
    30  
    31  	// Bits in type information
    32  	PRECISE = 1,
    33  	LOOP = 2,
    34  	PC_BITS = PRECISE | LOOP,
    35  };
    36  
    37  // Bits in per-word bitmap.
    38  // #defines because enum might not be able to hold the values.
    39  //
    40  // Each word in the bitmap describes wordsPerBitmapWord words
    41  // of heap memory.  There are 4 bitmap bits dedicated to each heap word,
    42  // so on a 64-bit system there is one bitmap word per 16 heap words.
    43  // The bits in the word are packed together by type first, then by
    44  // heap location, so each 64-bit bitmap word consists of, from top to bottom,
    45  // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
    46  // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
    47  // This layout makes it easier to iterate over the bits of a given type.
    48  //
    49  // The bitmap starts at mheap.arena_start and extends *backward* from
    50  // there.  On a 64-bit system the off'th word in the arena is tracked by
    51  // the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
    52  // the only difference is that the divisor is 8.)
    53  //
    54  // To pull out the bits corresponding to a given pointer p, we use:
    55  //
    56  //	off = p - (uintptr*)mheap.arena_start;  // word offset
    57  //	b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
    58  //	shift = off % wordsPerBitmapWord
    59  //	bits = *b >> shift;
    60  //	/* then test bits & bitAllocated, bits & bitMarked, etc. */
    61  //
    62  #define bitAllocated		((uintptr)1<<(bitShift*0))
    63  #define bitNoPointers		((uintptr)1<<(bitShift*1))	/* when bitAllocated is set */
    64  #define bitMarked		((uintptr)1<<(bitShift*2))	/* when bitAllocated is set */
    65  #define bitSpecial		((uintptr)1<<(bitShift*3))	/* when bitAllocated is set - has finalizer or being profiled */
    66  #define bitBlockBoundary	((uintptr)1<<(bitShift*1))	/* when bitAllocated is NOT set */
    67  
    68  #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
    69  
    70  // Holding worldsema grants an M the right to try to stop the world.
    71  // The procedure is:
    72  //
    73  //	runtime·semacquire(&runtime·worldsema);
    74  //	m->gcing = 1;
    75  //	runtime·stoptheworld();
    76  //
    77  //	... do stuff ...
    78  //
    79  //	m->gcing = 0;
    80  //	runtime·semrelease(&runtime·worldsema);
    81  //	runtime·starttheworld();
    82  //
    83  uint32 runtime·worldsema = 1;
    84  
    85  static int32 gctrace;
    86  
    87  typedef struct Obj Obj;
    88  struct Obj
    89  {
    90  	byte	*p;	// data pointer
    91  	uintptr	n;	// size of data in bytes
    92  	uintptr	ti;	// type info
    93  };
    94  
    95  // The size of Workbuf is N*PageSize.
    96  typedef struct Workbuf Workbuf;
    97  struct Workbuf
    98  {
    99  #define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
   100  	LFNode  node; // must be first
   101  	uintptr nobj;
   102  	Obj     obj[SIZE/sizeof(Obj) - 1];
   103  	uint8   _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
   104  #undef SIZE
   105  };
   106  
   107  typedef struct Finalizer Finalizer;
   108  struct Finalizer
   109  {
   110  	FuncVal *fn;
   111  	void *arg;
   112  	uintptr nret;
   113  };
   114  
   115  typedef struct FinBlock FinBlock;
   116  struct FinBlock
   117  {
   118  	FinBlock *alllink;
   119  	FinBlock *next;
   120  	int32 cnt;
   121  	int32 cap;
   122  	Finalizer fin[1];
   123  };
   124  
   125  extern byte data[];
   126  extern byte edata[];
   127  extern byte bss[];
   128  extern byte ebss[];
   129  
   130  extern byte gcdata[];
   131  extern byte gcbss[];
   132  
   133  static G *fing;
   134  static FinBlock *finq; // list of finalizers that are to be executed
   135  static FinBlock *finc; // cache of free blocks
   136  static FinBlock *allfin; // list of all blocks
   137  static Lock finlock;
   138  static int32 fingwait;
   139  
   140  static void runfinq(void);
   141  static Workbuf* getempty(Workbuf*);
   142  static Workbuf* getfull(Workbuf*);
   143  static void	putempty(Workbuf*);
   144  static Workbuf* handoff(Workbuf*);
   145  static void	gchelperstart(void);
   146  
   147  static struct {
   148  	uint64	full;  // lock-free list of full blocks
   149  	uint64	empty; // lock-free list of empty blocks
   150  	byte	pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
   151  	uint32	nproc;
   152  	volatile uint32	nwait;
   153  	volatile uint32	ndone;
   154  	volatile uint32 debugmarkdone;
   155  	Note	alldone;
   156  	ParFor	*markfor;
   157  	ParFor	*sweepfor;
   158  
   159  	Lock;
   160  	byte	*chunk;
   161  	uintptr	nchunk;
   162  
   163  	Obj	*roots;
   164  	uint32	nroot;
   165  	uint32	rootcap;
   166  } work;
   167  
   168  enum {
   169  	GC_DEFAULT_PTR = GC_NUM_INSTR,
   170  	GC_MAP_NEXT,
   171  	GC_CHAN,
   172  
   173  	GC_NUM_INSTR2
   174  };
   175  
   176  static struct {
   177  	struct {
   178  		uint64 sum;
   179  		uint64 cnt;
   180  	} ptr;
   181  	uint64 nbytes;
   182  	struct {
   183  		uint64 sum;
   184  		uint64 cnt;
   185  		uint64 notype;
   186  		uint64 typelookup;
   187  	} obj;
   188  	uint64 rescan;
   189  	uint64 rescanbytes;
   190  	uint64 instr[GC_NUM_INSTR2];
   191  	uint64 putempty;
   192  	uint64 getfull;
   193  } gcstats;
   194  
   195  // markonly marks an object. It returns true if the object
   196  // has been marked by this function, false otherwise.
   197  // This function doesn't append the object to any buffer.
   198  static bool
   199  markonly(void *obj)
   200  {
   201  	byte *p;
   202  	uintptr *bitp, bits, shift, x, xbits, off;
   203  	MSpan *s;
   204  	PageID k;
   205  
   206  	// Words outside the arena cannot be pointers.
   207  	if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used)
   208  		return false;
   209  
   210  	// obj may be a pointer to a live object.
   211  	// Try to find the beginning of the object.
   212  
   213  	// Round down to word boundary.
   214  	obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
   215  
   216  	// Find bits for this word.
   217  	off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start;
   218  	bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
   219  	shift = off % wordsPerBitmapWord;
   220  	xbits = *bitp;
   221  	bits = xbits >> shift;
   222  
   223  	// Pointing at the beginning of a block?
   224  	if((bits & (bitAllocated|bitBlockBoundary)) != 0)
   225  		goto found;
   226  
   227  	// Otherwise consult span table to find beginning.
   228  	// (Manually inlined copy of MHeap_LookupMaybe.)
   229  	k = (uintptr)obj>>PageShift;
   230  	x = k;
   231  	if(sizeof(void*) == 8)
   232  		x -= (uintptr)runtime·mheap->arena_start>>PageShift;
   233  	s = runtime·mheap->map[x];
   234  	if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
   235  		return false;
   236  	p = (byte*)((uintptr)s->start<<PageShift);
   237  	if(s->sizeclass == 0) {
   238  		obj = p;
   239  	} else {
   240  		if((byte*)obj >= (byte*)s->limit)
   241  			return false;
   242  		uintptr size = s->elemsize;
   243  		int32 i = ((byte*)obj - p)/size;
   244  		obj = p+i*size;
   245  	}
   246  
   247  	// Now that we know the object header, reload bits.
   248  	off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start;
   249  	bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
   250  	shift = off % wordsPerBitmapWord;
   251  	xbits = *bitp;
   252  	bits = xbits >> shift;
   253  
   254  found:
   255  	// Now we have bits, bitp, and shift correct for
   256  	// obj pointing at the base of the object.
   257  	// Only care about allocated and not marked.
   258  	if((bits & (bitAllocated|bitMarked)) != bitAllocated)
   259  		return false;
   260  	if(work.nproc == 1)
   261  		*bitp |= bitMarked<<shift;
   262  	else {
   263  		for(;;) {
   264  			x = *bitp;
   265  			if(x & (bitMarked<<shift))
   266  				return false;
   267  			if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
   268  				break;
   269  		}
   270  	}
   271  
   272  	// The object is now marked
   273  	return true;
   274  }
   275  
   276  // PtrTarget is a structure used by intermediate buffers.
   277  // The intermediate buffers hold GC data before it
   278  // is moved/flushed to the work buffer (Workbuf).
   279  // The size of an intermediate buffer is very small,
   280  // such as 32 or 64 elements.
   281  typedef struct PtrTarget PtrTarget;
   282  struct PtrTarget
   283  {
   284  	void *p;
   285  	uintptr ti;
   286  };
   287  
   288  typedef struct BufferList BufferList;
   289  struct BufferList
   290  {
   291  	PtrTarget ptrtarget[IntermediateBufferCapacity];
   292  	Obj obj[IntermediateBufferCapacity];
   293  	uint32 busy;
   294  	byte pad[CacheLineSize];
   295  };
   296  #pragma dataflag 16  // no pointers
   297  static BufferList bufferList[MaxGcproc];
   298  
   299  static Type *itabtype;
   300  
   301  static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
   302  
   303  // flushptrbuf moves data from the PtrTarget buffer to the work buffer.
   304  // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
   305  // while the work buffer contains blocks which have been marked
   306  // and are prepared to be scanned by the garbage collector.
   307  //
   308  // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
   309  //
   310  // A simplified drawing explaining how the todo-list moves from a structure to another:
   311  //
   312  //     scanblock
   313  //  (find pointers)
   314  //    Obj ------> PtrTarget (pointer targets)
   315  //     ↑          |
   316  //     |          |
   317  //     `----------'
   318  //     flushptrbuf
   319  //  (find block start, mark and enqueue)
   320  static void
   321  flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
   322  {
   323  	byte *p, *arena_start, *obj;
   324  	uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
   325  	MSpan *s;
   326  	PageID k;
   327  	Obj *wp;
   328  	Workbuf *wbuf;
   329  	PtrTarget *ptrbuf_end;
   330  
   331  	arena_start = runtime·mheap->arena_start;
   332  
   333  	wp = *_wp;
   334  	wbuf = *_wbuf;
   335  	nobj = *_nobj;
   336  
   337  	ptrbuf_end = *ptrbufpos;
   338  	n = ptrbuf_end - ptrbuf;
   339  	*ptrbufpos = ptrbuf;
   340  
   341  	if(CollectStats) {
   342  		runtime·xadd64(&gcstats.ptr.sum, n);
   343  		runtime·xadd64(&gcstats.ptr.cnt, 1);
   344  	}
   345  
   346  	// If buffer is nearly full, get a new one.
   347  	if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
   348  		if(wbuf != nil)
   349  			wbuf->nobj = nobj;
   350  		wbuf = getempty(wbuf);
   351  		wp = wbuf->obj;
   352  		nobj = 0;
   353  
   354  		if(n >= nelem(wbuf->obj))
   355  			runtime·throw("ptrbuf has to be smaller than WorkBuf");
   356  	}
   357  
   358  	// TODO(atom): This block is a branch of an if-then-else statement.
   359  	//             The single-threaded branch may be added in a next CL.
   360  	{
   361  		// Multi-threaded version.
   362  
   363  		while(ptrbuf < ptrbuf_end) {
   364  			obj = ptrbuf->p;
   365  			ti = ptrbuf->ti;
   366  			ptrbuf++;
   367  
   368  			// obj belongs to interval [mheap.arena_start, mheap.arena_used).
   369  			if(Debug > 1) {
   370  				if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used)
   371  					runtime·throw("object is outside of mheap");
   372  			}
   373  
   374  			// obj may be a pointer to a live object.
   375  			// Try to find the beginning of the object.
   376  
   377  			// Round down to word boundary.
   378  			if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
   379  				obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
   380  				ti = 0;
   381  			}
   382  
   383  			// Find bits for this word.
   384  			off = (uintptr*)obj - (uintptr*)arena_start;
   385  			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
   386  			shift = off % wordsPerBitmapWord;
   387  			xbits = *bitp;
   388  			bits = xbits >> shift;
   389  
   390  			// Pointing at the beginning of a block?
   391  			if((bits & (bitAllocated|bitBlockBoundary)) != 0)
   392  				goto found;
   393  
   394  			ti = 0;
   395  
   396  			// Pointing just past the beginning?
   397  			// Scan backward a little to find a block boundary.
   398  			for(j=shift; j-->0; ) {
   399  				if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
   400  					obj = (byte*)obj - (shift-j)*PtrSize;
   401  					shift = j;
   402  					bits = xbits>>shift;
   403  					goto found;
   404  				}
   405  			}
   406  
   407  			// Otherwise consult span table to find beginning.
   408  			// (Manually inlined copy of MHeap_LookupMaybe.)
   409  			k = (uintptr)obj>>PageShift;
   410  			x = k;
   411  			if(sizeof(void*) == 8)
   412  				x -= (uintptr)arena_start>>PageShift;
   413  			s = runtime·mheap->map[x];
   414  			if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
   415  				continue;
   416  			p = (byte*)((uintptr)s->start<<PageShift);
   417  			if(s->sizeclass == 0) {
   418  				obj = p;
   419  			} else {
   420  				if((byte*)obj >= (byte*)s->limit)
   421  					continue;
   422  				size = s->elemsize;
   423  				int32 i = ((byte*)obj - p)/size;
   424  				obj = p+i*size;
   425  			}
   426  
   427  			// Now that we know the object header, reload bits.
   428  			off = (uintptr*)obj - (uintptr*)arena_start;
   429  			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
   430  			shift = off % wordsPerBitmapWord;
   431  			xbits = *bitp;
   432  			bits = xbits >> shift;
   433  
   434  		found:
   435  			// Now we have bits, bitp, and shift correct for
   436  			// obj pointing at the base of the object.
   437  			// Only care about allocated and not marked.
   438  			if((bits & (bitAllocated|bitMarked)) != bitAllocated)
   439  				continue;
   440  			if(work.nproc == 1)
   441  				*bitp |= bitMarked<<shift;
   442  			else {
   443  				for(;;) {
   444  					x = *bitp;
   445  					if(x & (bitMarked<<shift))
   446  						goto continue_obj;
   447  					if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
   448  						break;
   449  				}
   450  			}
   451  
   452  			// If object has no pointers, don't need to scan further.
   453  			if((bits & bitNoPointers) != 0)
   454  				continue;
   455  
   456  			// Ask span about size class.
   457  			// (Manually inlined copy of MHeap_Lookup.)
   458  			x = (uintptr)obj >> PageShift;
   459  			if(sizeof(void*) == 8)
   460  				x -= (uintptr)arena_start>>PageShift;
   461  			s = runtime·mheap->map[x];
   462  
   463  			PREFETCH(obj);
   464  
   465  			*wp = (Obj){obj, s->elemsize, ti};
   466  			wp++;
   467  			nobj++;
   468  		continue_obj:;
   469  		}
   470  
   471  		// If another proc wants a pointer, give it some.
   472  		if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
   473  			wbuf->nobj = nobj;
   474  			wbuf = handoff(wbuf);
   475  			nobj = wbuf->nobj;
   476  			wp = wbuf->obj + nobj;
   477  		}
   478  	}
   479  
   480  	*_wp = wp;
   481  	*_wbuf = wbuf;
   482  	*_nobj = nobj;
   483  }
   484  
   485  static void
   486  flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
   487  {
   488  	uintptr nobj, off;
   489  	Obj *wp, obj;
   490  	Workbuf *wbuf;
   491  	Obj *objbuf_end;
   492  
   493  	wp = *_wp;
   494  	wbuf = *_wbuf;
   495  	nobj = *_nobj;
   496  
   497  	objbuf_end = *objbufpos;
   498  	*objbufpos = objbuf;
   499  
   500  	while(objbuf < objbuf_end) {
   501  		obj = *objbuf++;
   502  
   503  		// Align obj.b to a word boundary.
   504  		off = (uintptr)obj.p & (PtrSize-1);
   505  		if(off != 0) {
   506  			obj.p += PtrSize - off;
   507  			obj.n -= PtrSize - off;
   508  			obj.ti = 0;
   509  		}
   510  
   511  		if(obj.p == nil || obj.n == 0)
   512  			continue;
   513  
   514  		// If buffer is full, get a new one.
   515  		if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
   516  			if(wbuf != nil)
   517  				wbuf->nobj = nobj;
   518  			wbuf = getempty(wbuf);
   519  			wp = wbuf->obj;
   520  			nobj = 0;
   521  		}
   522  
   523  		*wp = obj;
   524  		wp++;
   525  		nobj++;
   526  	}
   527  
   528  	// If another proc wants a pointer, give it some.
   529  	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
   530  		wbuf->nobj = nobj;
   531  		wbuf = handoff(wbuf);
   532  		nobj = wbuf->nobj;
   533  		wp = wbuf->obj + nobj;
   534  	}
   535  
   536  	*_wp = wp;
   537  	*_wbuf = wbuf;
   538  	*_nobj = nobj;
   539  }
   540  
   541  // Program that scans the whole block and treats every block element as a potential pointer
   542  static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
   543  
   544  // Hashmap iterator program
   545  static uintptr mapProg[2] = {0, GC_MAP_NEXT};
   546  
   547  // Hchan program
   548  static uintptr chanProg[2] = {0, GC_CHAN};
   549  
   550  // Local variables of a program fragment or loop
   551  typedef struct Frame Frame;
   552  struct Frame {
   553  	uintptr count, elemsize, b;
   554  	uintptr *loop_or_ret;
   555  };
   556  
   557  // Sanity check for the derived type info objti.
   558  static void
   559  checkptr(void *obj, uintptr objti)
   560  {
   561  	uintptr *pc1, *pc2, type, tisize, i, j, x;
   562  	byte *objstart;
   563  	Type *t;
   564  	MSpan *s;
   565  
   566  	if(!Debug)
   567  		runtime·throw("checkptr is debug only");
   568  
   569  	if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used)
   570  		return;
   571  	type = runtime·gettype(obj);
   572  	t = (Type*)(type & ~(uintptr)(PtrSize-1));
   573  	if(t == nil)
   574  		return;
   575  	x = (uintptr)obj >> PageShift;
   576  	if(sizeof(void*) == 8)
   577  		x -= (uintptr)(runtime·mheap->arena_start)>>PageShift;
   578  	s = runtime·mheap->map[x];
   579  	objstart = (byte*)((uintptr)s->start<<PageShift);
   580  	if(s->sizeclass != 0) {
   581  		i = ((byte*)obj - objstart)/s->elemsize;
   582  		objstart += i*s->elemsize;
   583  	}
   584  	tisize = *(uintptr*)objti;
   585  	// Sanity check for object size: it should fit into the memory block.
   586  	if((byte*)obj + tisize > objstart + s->elemsize)
   587  		runtime·throw("invalid gc type info");
   588  	if(obj != objstart)
   589  		return;
   590  	// If obj points to the beginning of the memory block,
   591  	// check type info as well.
   592  	if(t->string == nil ||
   593  		// Gob allocates unsafe pointers for indirection.
   594  		(runtime·strcmp(t->string->str, (byte*)"unsafe.Pointer") &&
   595  		// Runtime and gc think differently about closures.
   596  		runtime·strstr(t->string->str, (byte*)"struct { F uintptr") != t->string->str)) {
   597  		pc1 = (uintptr*)objti;
   598  		pc2 = (uintptr*)t->gc;
   599  		// A simple best-effort check until first GC_END.
   600  		for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
   601  			if(pc1[j] != pc2[j]) {
   602  				runtime·printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n",
   603  					t->string ? (int8*)t->string->str : (int8*)"?", j, pc1[j], pc2[j]);
   604  				runtime·throw("invalid gc type info");
   605  			}
   606  		}
   607  	}
   608  }					
   609  
   610  // scanblock scans a block of n bytes starting at pointer b for references
   611  // to other objects, scanning any it finds recursively until there are no
   612  // unscanned objects left.  Instead of using an explicit recursion, it keeps
   613  // a work list in the Workbuf* structures and loops in the main function
   614  // body.  Keeping an explicit work list is easier on the stack allocator and
   615  // more efficient.
   616  //
   617  // wbuf: current work buffer
   618  // wp:   storage for next queued pointer (write pointer)
   619  // nobj: number of queued objects
   620  static void
   621  scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
   622  {
   623  	byte *b, *arena_start, *arena_used;
   624  	uintptr n, i, end_b, elemsize, size, ti, objti, count, type;
   625  	uintptr *pc, precise_type, nominal_size;
   626  	uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti, *chan_ret;
   627  	void *obj;
   628  	Type *t;
   629  	Slice *sliceptr;
   630  	Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
   631  	BufferList *scanbuffers;
   632  	PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos;
   633  	Obj *objbuf, *objbuf_end, *objbufpos;
   634  	Eface *eface;
   635  	Iface *iface;
   636  	Hmap *hmap;
   637  	MapType *maptype;
   638  	bool mapkey_kind, mapval_kind;
   639  	struct hash_gciter map_iter;
   640  	struct hash_gciter_data d;
   641  	Hchan *chan;
   642  	ChanType *chantype;
   643  
   644  	if(sizeof(Workbuf) % PageSize != 0)
   645  		runtime·throw("scanblock: size of Workbuf is suboptimal");
   646  
   647  	// Memory arena parameters.
   648  	arena_start = runtime·mheap->arena_start;
   649  	arena_used = runtime·mheap->arena_used;
   650  
   651  	stack_ptr = stack+nelem(stack)-1;
   652  	
   653  	precise_type = false;
   654  	nominal_size = 0;
   655  
   656  	// Allocate ptrbuf
   657  	{
   658  		scanbuffers = &bufferList[m->helpgc];
   659  		ptrbuf = &scanbuffers->ptrtarget[0];
   660  		ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
   661  		objbuf = &scanbuffers->obj[0];
   662  		objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
   663  	}
   664  
   665  	ptrbufpos = ptrbuf;
   666  	objbufpos = objbuf;
   667  
   668  	// (Silence the compiler)
   669  	map_ret = nil;
   670  	mapkey_size = mapval_size = 0;
   671  	mapkey_kind = mapval_kind = false;
   672  	mapkey_ti = mapval_ti = 0;
   673  	chan = nil;
   674  	chantype = nil;
   675  	chan_ret = nil;
   676  
   677  	goto next_block;
   678  
   679  	for(;;) {
   680  		// Each iteration scans the block b of length n, queueing pointers in
   681  		// the work buffer.
   682  		if(Debug > 1) {
   683  			runtime·printf("scanblock %p %D\n", b, (int64)n);
   684  		}
   685  
   686  		if(CollectStats) {
   687  			runtime·xadd64(&gcstats.nbytes, n);
   688  			runtime·xadd64(&gcstats.obj.sum, nobj);
   689  			runtime·xadd64(&gcstats.obj.cnt, 1);
   690  		}
   691  
   692  		if(ti != 0) {
   693  			pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
   694  			precise_type = (ti & PRECISE);
   695  			stack_top.elemsize = pc[0];
   696  			if(!precise_type)
   697  				nominal_size = pc[0];
   698  			if(ti & LOOP) {
   699  				stack_top.count = 0;	// 0 means an infinite number of iterations
   700  				stack_top.loop_or_ret = pc+1;
   701  			} else {
   702  				stack_top.count = 1;
   703  			}
   704  			if(Debug) {
   705  				// Simple sanity check for provided type info ti:
   706  				// The declared size of the object must be not larger than the actual size
   707  				// (it can be smaller due to inferior pointers).
   708  				// It's difficult to make a comprehensive check due to inferior pointers,
   709  				// reflection, gob, etc.
   710  				if(pc[0] > n) {
   711  					runtime·printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n);
   712  					runtime·throw("invalid gc type info");
   713  				}
   714  			}
   715  		} else if(UseSpanType) {
   716  			if(CollectStats)
   717  				runtime·xadd64(&gcstats.obj.notype, 1);
   718  
   719  			type = runtime·gettype(b);
   720  			if(type != 0) {
   721  				if(CollectStats)
   722  					runtime·xadd64(&gcstats.obj.typelookup, 1);
   723  
   724  				t = (Type*)(type & ~(uintptr)(PtrSize-1));
   725  				switch(type & (PtrSize-1)) {
   726  				case TypeInfo_SingleObject:
   727  					pc = (uintptr*)t->gc;
   728  					precise_type = true;  // type information about 'b' is precise
   729  					stack_top.count = 1;
   730  					stack_top.elemsize = pc[0];
   731  					break;
   732  				case TypeInfo_Array:
   733  					pc = (uintptr*)t->gc;
   734  					if(pc[0] == 0)
   735  						goto next_block;
   736  					precise_type = true;  // type information about 'b' is precise
   737  					stack_top.count = 0;  // 0 means an infinite number of iterations
   738  					stack_top.elemsize = pc[0];
   739  					stack_top.loop_or_ret = pc+1;
   740  					break;
   741  				case TypeInfo_Map:
   742  					hmap = (Hmap*)b;
   743  					maptype = (MapType*)t;
   744  					if(hash_gciter_init(hmap, &map_iter)) {
   745  						mapkey_size = maptype->key->size;
   746  						mapkey_kind = maptype->key->kind;
   747  						mapkey_ti   = (uintptr)maptype->key->gc | PRECISE;
   748  						mapval_size = maptype->elem->size;
   749  						mapval_kind = maptype->elem->kind;
   750  						mapval_ti   = (uintptr)maptype->elem->gc | PRECISE;
   751  
   752  						map_ret = nil;
   753  						pc = mapProg;
   754  					} else {
   755  						goto next_block;
   756  					}
   757  					break;
   758  				case TypeInfo_Chan:
   759  					chan = (Hchan*)b;
   760  					chantype = (ChanType*)t;
   761  					chan_ret = nil;
   762  					pc = chanProg;
   763  					break;
   764  				default:
   765  					runtime·throw("scanblock: invalid type");
   766  					return;
   767  				}
   768  			} else {
   769  				pc = defaultProg;
   770  			}
   771  		} else {
   772  			pc = defaultProg;
   773  		}
   774  
   775  		if(IgnorePreciseGC)
   776  			pc = defaultProg;
   777  
   778  		pc++;
   779  		stack_top.b = (uintptr)b;
   780  
   781  		end_b = (uintptr)b + n - PtrSize;
   782  
   783  	for(;;) {
   784  		if(CollectStats)
   785  			runtime·xadd64(&gcstats.instr[pc[0]], 1);
   786  
   787  		obj = nil;
   788  		objti = 0;
   789  		switch(pc[0]) {
   790  		case GC_PTR:
   791  			obj = *(void**)(stack_top.b + pc[1]);
   792  			objti = pc[2];
   793  			pc += 3;
   794  			if(Debug)
   795  				checkptr(obj, objti);
   796  			break;
   797  
   798  		case GC_SLICE:
   799  			sliceptr = (Slice*)(stack_top.b + pc[1]);
   800  			if(sliceptr->cap != 0) {
   801  				obj = sliceptr->array;
   802  				objti = pc[2] | PRECISE | LOOP;
   803  			}
   804  			pc += 3;
   805  			break;
   806  
   807  		case GC_APTR:
   808  			obj = *(void**)(stack_top.b + pc[1]);
   809  			pc += 2;
   810  			break;
   811  
   812  		case GC_STRING:
   813  			obj = *(void**)(stack_top.b + pc[1]);
   814  			markonly(obj);
   815  			pc += 2;
   816  			continue;
   817  
   818  		case GC_EFACE:
   819  			eface = (Eface*)(stack_top.b + pc[1]);
   820  			pc += 2;
   821  			if(eface->type == nil)
   822  				continue;
   823  
   824  			// eface->type
   825  			t = eface->type;
   826  			if((void*)t >= arena_start && (void*)t < arena_used) {
   827  				*ptrbufpos++ = (PtrTarget){t, 0};
   828  				if(ptrbufpos == ptrbuf_end)
   829  					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   830  			}
   831  
   832  			// eface->data
   833  			if(eface->data >= arena_start && eface->data < arena_used) {
   834  				if(t->size <= sizeof(void*)) {
   835  					if((t->kind & KindNoPointers))
   836  						continue;
   837  
   838  					obj = eface->data;
   839  					if((t->kind & ~KindNoPointers) == KindPtr)
   840  						objti = (uintptr)((PtrType*)t)->elem->gc;
   841  				} else {
   842  					obj = eface->data;
   843  					objti = (uintptr)t->gc;
   844  				}
   845  			}
   846  			break;
   847  
   848  		case GC_IFACE:
   849  			iface = (Iface*)(stack_top.b + pc[1]);
   850  			pc += 2;
   851  			if(iface->tab == nil)
   852  				continue;
   853  			
   854  			// iface->tab
   855  			if((void*)iface->tab >= arena_start && (void*)iface->tab < arena_used) {
   856  				*ptrbufpos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc};
   857  				if(ptrbufpos == ptrbuf_end)
   858  					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   859  			}
   860  
   861  			// iface->data
   862  			if(iface->data >= arena_start && iface->data < arena_used) {
   863  				t = iface->tab->type;
   864  				if(t->size <= sizeof(void*)) {
   865  					if((t->kind & KindNoPointers))
   866  						continue;
   867  
   868  					obj = iface->data;
   869  					if((t->kind & ~KindNoPointers) == KindPtr)
   870  						objti = (uintptr)((PtrType*)t)->elem->gc;
   871  				} else {
   872  					obj = iface->data;
   873  					objti = (uintptr)t->gc;
   874  				}
   875  			}
   876  			break;
   877  
   878  		case GC_DEFAULT_PTR:
   879  			while(stack_top.b <= end_b) {
   880  				obj = *(byte**)stack_top.b;
   881  				stack_top.b += PtrSize;
   882  				if(obj >= arena_start && obj < arena_used) {
   883  					*ptrbufpos++ = (PtrTarget){obj, 0};
   884  					if(ptrbufpos == ptrbuf_end)
   885  						flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   886  				}
   887  			}
   888  			goto next_block;
   889  
   890  		case GC_END:
   891  			if(--stack_top.count != 0) {
   892  				// Next iteration of a loop if possible.
   893  				stack_top.b += stack_top.elemsize;
   894  				if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) {
   895  					pc = stack_top.loop_or_ret;
   896  					continue;
   897  				}
   898  				i = stack_top.b;
   899  			} else {
   900  				// Stack pop if possible.
   901  				if(stack_ptr+1 < stack+nelem(stack)) {
   902  					pc = stack_top.loop_or_ret;
   903  					stack_top = *(++stack_ptr);
   904  					continue;
   905  				}
   906  				i = (uintptr)b + nominal_size;
   907  			}
   908  			if(!precise_type) {
   909  				// Quickly scan [b+i,b+n) for possible pointers.
   910  				for(; i<=end_b; i+=PtrSize) {
   911  					if(*(byte**)i != nil) {
   912  						// Found a value that may be a pointer.
   913  						// Do a rescan of the entire block.
   914  						enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj);
   915  						if(CollectStats) {
   916  							runtime·xadd64(&gcstats.rescan, 1);
   917  							runtime·xadd64(&gcstats.rescanbytes, n);
   918  						}
   919  						break;
   920  					}
   921  				}
   922  			}
   923  			goto next_block;
   924  
   925  		case GC_ARRAY_START:
   926  			i = stack_top.b + pc[1];
   927  			count = pc[2];
   928  			elemsize = pc[3];
   929  			pc += 4;
   930  
   931  			// Stack push.
   932  			*stack_ptr-- = stack_top;
   933  			stack_top = (Frame){count, elemsize, i, pc};
   934  			continue;
   935  
   936  		case GC_ARRAY_NEXT:
   937  			if(--stack_top.count != 0) {
   938  				stack_top.b += stack_top.elemsize;
   939  				pc = stack_top.loop_or_ret;
   940  			} else {
   941  				// Stack pop.
   942  				stack_top = *(++stack_ptr);
   943  				pc += 1;
   944  			}
   945  			continue;
   946  
   947  		case GC_CALL:
   948  			// Stack push.
   949  			*stack_ptr-- = stack_top;
   950  			stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
   951  			pc = (uintptr*)((byte*)pc + *(int32*)(pc+2));  // target of the CALL instruction
   952  			continue;
   953  
   954  		case GC_MAP_PTR:
   955  			hmap = *(Hmap**)(stack_top.b + pc[1]);
   956  			if(hmap == nil) {
   957  				pc += 3;
   958  				continue;
   959  			}
   960  			if(markonly(hmap)) {
   961  				maptype = (MapType*)pc[2];
   962  				if(hash_gciter_init(hmap, &map_iter)) {
   963  					mapkey_size = maptype->key->size;
   964  					mapkey_kind = maptype->key->kind;
   965  					mapkey_ti   = (uintptr)maptype->key->gc | PRECISE;
   966  					mapval_size = maptype->elem->size;
   967  					mapval_kind = maptype->elem->kind;
   968  					mapval_ti   = (uintptr)maptype->elem->gc | PRECISE;
   969  
   970  					// Start mapProg.
   971  					map_ret = pc+3;
   972  					pc = mapProg+1;
   973  				} else {
   974  					pc += 3;
   975  				}
   976  			} else {
   977  				pc += 3;
   978  			}
   979  			continue;
   980  
   981  		case GC_MAP_NEXT:
   982  			// Add all keys and values to buffers, mark all subtables.
   983  			while(hash_gciter_next(&map_iter, &d)) {
   984  				// buffers: reserve space for 2 objects.
   985  				if(ptrbufpos+2 >= ptrbuf_end)
   986  					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
   987  				if(objbufpos+2 >= objbuf_end)
   988  					flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
   989  
   990  				if(d.st != nil)
   991  					markonly(d.st);
   992  
   993  				if(d.key_data != nil) {
   994  					if(!(mapkey_kind & KindNoPointers) || d.indirectkey) {
   995  						if(!d.indirectkey)
   996  							*objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti};
   997  						else {
   998  							if(Debug) {
   999  								obj = *(void**)d.key_data;
  1000  								if(!(arena_start <= obj && obj < arena_used))
  1001  									runtime·throw("scanblock: inconsistent hashmap");
  1002  							}
  1003  							*ptrbufpos++ = (PtrTarget){*(void**)d.key_data, mapkey_ti};
  1004  						}
  1005  					}
  1006  					if(!(mapval_kind & KindNoPointers) || d.indirectval) {
  1007  						if(!d.indirectval)
  1008  							*objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti};
  1009  						else {
  1010  							if(Debug) {
  1011  								obj = *(void**)d.val_data;
  1012  								if(!(arena_start <= obj && obj < arena_used))
  1013  									runtime·throw("scanblock: inconsistent hashmap");
  1014  							}
  1015  							*ptrbufpos++ = (PtrTarget){*(void**)d.val_data, mapval_ti};
  1016  						}
  1017  					}
  1018  				}
  1019  			}
  1020  			if(map_ret == nil)
  1021  				goto next_block;
  1022  			pc = map_ret;
  1023  			continue;
  1024  
  1025  		case GC_REGION:
  1026  			obj = (void*)(stack_top.b + pc[1]);
  1027  			size = pc[2];
  1028  			objti = pc[3];
  1029  			pc += 4;
  1030  
  1031  			*objbufpos++ = (Obj){obj, size, objti};
  1032  			if(objbufpos == objbuf_end)
  1033  				flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
  1034  			continue;
  1035  
  1036  		case GC_CHAN_PTR:
  1037  			// Similar to GC_MAP_PTR
  1038  			chan = *(Hchan**)(stack_top.b + pc[1]);
  1039  			if(chan == nil) {
  1040  				pc += 3;
  1041  				continue;
  1042  			}
  1043  			if(markonly(chan)) {
  1044  				chantype = (ChanType*)pc[2];
  1045  				if(!(chantype->elem->kind & KindNoPointers)) {
  1046  					// Start chanProg.
  1047  					chan_ret = pc+3;
  1048  					pc = chanProg+1;
  1049  					continue;
  1050  				}
  1051  			}
  1052  			pc += 3;
  1053  			continue;
  1054  
  1055  		case GC_CHAN:
  1056  			// There are no heap pointers in struct Hchan,
  1057  			// so we can ignore the leading sizeof(Hchan) bytes.
  1058  			if(!(chantype->elem->kind & KindNoPointers)) {
  1059  				// Channel's buffer follows Hchan immediately in memory.
  1060  				// Size of buffer (cap(c)) is second int in the chan struct.
  1061  				n = ((uintgo*)chan)[1];
  1062  				if(n > 0) {
  1063  					// TODO(atom): split into two chunks so that only the
  1064  					// in-use part of the circular buffer is scanned.
  1065  					// (Channel routines zero the unused part, so the current
  1066  					// code does not lead to leaks, it's just a little inefficient.)
  1067  					*objbufpos++ = (Obj){(byte*)chan+runtime·Hchansize, n*chantype->elem->size,
  1068  						(uintptr)chantype->elem->gc | PRECISE | LOOP};
  1069  					if(objbufpos == objbuf_end)
  1070  						flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
  1071  				}
  1072  			}
  1073  			if(chan_ret == nil)
  1074  				goto next_block;
  1075  			pc = chan_ret;
  1076  			continue;
  1077  
  1078  		default:
  1079  			runtime·throw("scanblock: invalid GC instruction");
  1080  			return;
  1081  		}
  1082  
  1083  		if(obj >= arena_start && obj < arena_used) {
  1084  			*ptrbufpos++ = (PtrTarget){obj, objti};
  1085  			if(ptrbufpos == ptrbuf_end)
  1086  				flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
  1087  		}
  1088  	}
  1089  
  1090  	next_block:
  1091  		// Done scanning [b, b+n).  Prepare for the next iteration of
  1092  		// the loop by setting b, n, ti to the parameters for the next block.
  1093  
  1094  		if(nobj == 0) {
  1095  			flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
  1096  			flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
  1097  
  1098  			if(nobj == 0) {
  1099  				if(!keepworking) {
  1100  					if(wbuf)
  1101  						putempty(wbuf);
  1102  					goto endscan;
  1103  				}
  1104  				// Emptied our buffer: refill.
  1105  				wbuf = getfull(wbuf);
  1106  				if(wbuf == nil)
  1107  					goto endscan;
  1108  				nobj = wbuf->nobj;
  1109  				wp = wbuf->obj + wbuf->nobj;
  1110  			}
  1111  		}
  1112  
  1113  		// Fetch b from the work buffer.
  1114  		--wp;
  1115  		b = wp->p;
  1116  		n = wp->n;
  1117  		ti = wp->ti;
  1118  		nobj--;
  1119  	}
  1120  
  1121  endscan:;
  1122  }
  1123  
  1124  // debug_scanblock is the debug copy of scanblock.
  1125  // it is simpler, slower, single-threaded, recursive,
  1126  // and uses bitSpecial as the mark bit.
  1127  static void
  1128  debug_scanblock(byte *b, uintptr n)
  1129  {
  1130  	byte *obj, *p;
  1131  	void **vp;
  1132  	uintptr size, *bitp, bits, shift, i, xbits, off;
  1133  	MSpan *s;
  1134  
  1135  	if(!DebugMark)
  1136  		runtime·throw("debug_scanblock without DebugMark");
  1137  
  1138  	if((intptr)n < 0) {
  1139  		runtime·printf("debug_scanblock %p %D\n", b, (int64)n);
  1140  		runtime·throw("debug_scanblock");
  1141  	}
  1142  
  1143  	// Align b to a word boundary.
  1144  	off = (uintptr)b & (PtrSize-1);
  1145  	if(off != 0) {
  1146  		b += PtrSize - off;
  1147  		n -= PtrSize - off;
  1148  	}
  1149  
  1150  	vp = (void**)b;
  1151  	n /= PtrSize;
  1152  	for(i=0; i<n; i++) {
  1153  		obj = (byte*)vp[i];
  1154  
  1155  		// Words outside the arena cannot be pointers.
  1156  		if((byte*)obj < runtime·mheap->arena_start || (byte*)obj >= runtime·mheap->arena_used)
  1157  			continue;
  1158  
  1159  		// Round down to word boundary.
  1160  		obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
  1161  
  1162  		// Consult span table to find beginning.
  1163  		s = runtime·MHeap_LookupMaybe(runtime·mheap, obj);
  1164  		if(s == nil)
  1165  			continue;
  1166  
  1167  		p =  (byte*)((uintptr)s->start<<PageShift);
  1168  		size = s->elemsize;
  1169  		if(s->sizeclass == 0) {
  1170  			obj = p;
  1171  		} else {
  1172  			if((byte*)obj >= (byte*)s->limit)
  1173  				continue;
  1174  			int32 i = ((byte*)obj - p)/size;
  1175  			obj = p+i*size;
  1176  		}
  1177  
  1178  		// Now that we know the object header, reload bits.
  1179  		off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start;
  1180  		bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
  1181  		shift = off % wordsPerBitmapWord;
  1182  		xbits = *bitp;
  1183  		bits = xbits >> shift;
  1184  
  1185  		// Now we have bits, bitp, and shift correct for
  1186  		// obj pointing at the base of the object.
  1187  		// If not allocated or already marked, done.
  1188  		if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0)  // NOTE: bitSpecial not bitMarked
  1189  			continue;
  1190  		*bitp |= bitSpecial<<shift;
  1191  		if(!(bits & bitMarked))
  1192  			runtime·printf("found unmarked block %p in %p\n", obj, vp+i);
  1193  
  1194  		// If object has no pointers, don't need to scan further.
  1195  		if((bits & bitNoPointers) != 0)
  1196  			continue;
  1197  
  1198  		debug_scanblock(obj, size);
  1199  	}
  1200  }
  1201  
  1202  // Append obj to the work buffer.
  1203  // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
  1204  static void
  1205  enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
  1206  {
  1207  	uintptr nobj, off;
  1208  	Obj *wp;
  1209  	Workbuf *wbuf;
  1210  
  1211  	if(Debug > 1)
  1212  		runtime·printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
  1213  
  1214  	// Align obj.b to a word boundary.
  1215  	off = (uintptr)obj.p & (PtrSize-1);
  1216  	if(off != 0) {
  1217  		obj.p += PtrSize - off;
  1218  		obj.n -= PtrSize - off;
  1219  		obj.ti = 0;
  1220  	}
  1221  
  1222  	if(obj.p == nil || obj.n == 0)
  1223  		return;
  1224  
  1225  	// Load work buffer state
  1226  	wp = *_wp;
  1227  	wbuf = *_wbuf;
  1228  	nobj = *_nobj;
  1229  
  1230  	// If another proc wants a pointer, give it some.
  1231  	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
  1232  		wbuf->nobj = nobj;
  1233  		wbuf = handoff(wbuf);
  1234  		nobj = wbuf->nobj;
  1235  		wp = wbuf->obj + nobj;
  1236  	}
  1237  
  1238  	// If buffer is full, get a new one.
  1239  	if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
  1240  		if(wbuf != nil)
  1241  			wbuf->nobj = nobj;
  1242  		wbuf = getempty(wbuf);
  1243  		wp = wbuf->obj;
  1244  		nobj = 0;
  1245  	}
  1246  
  1247  	*wp = obj;
  1248  	wp++;
  1249  	nobj++;
  1250  
  1251  	// Save work buffer state
  1252  	*_wp = wp;
  1253  	*_wbuf = wbuf;
  1254  	*_nobj = nobj;
  1255  }
  1256  
  1257  static void
  1258  markroot(ParFor *desc, uint32 i)
  1259  {
  1260  	Obj *wp;
  1261  	Workbuf *wbuf;
  1262  	uintptr nobj;
  1263  
  1264  	USED(&desc);
  1265  	wp = nil;
  1266  	wbuf = nil;
  1267  	nobj = 0;
  1268  	enqueue(work.roots[i], &wbuf, &wp, &nobj);
  1269  	scanblock(wbuf, wp, nobj, false);
  1270  }
  1271  
  1272  // Get an empty work buffer off the work.empty list,
  1273  // allocating new buffers as needed.
  1274  static Workbuf*
  1275  getempty(Workbuf *b)
  1276  {
  1277  	if(b != nil)
  1278  		runtime·lfstackpush(&work.full, &b->node);
  1279  	b = (Workbuf*)runtime·lfstackpop(&work.empty);
  1280  	if(b == nil) {
  1281  		// Need to allocate.
  1282  		runtime·lock(&work);
  1283  		if(work.nchunk < sizeof *b) {
  1284  			work.nchunk = 1<<20;
  1285  			work.chunk = runtime·SysAlloc(work.nchunk);
  1286  			if(work.chunk == nil)
  1287  				runtime·throw("runtime: cannot allocate memory");
  1288  		}
  1289  		b = (Workbuf*)work.chunk;
  1290  		work.chunk += sizeof *b;
  1291  		work.nchunk -= sizeof *b;
  1292  		runtime·unlock(&work);
  1293  	}
  1294  	b->nobj = 0;
  1295  	return b;
  1296  }
  1297  
  1298  static void
  1299  putempty(Workbuf *b)
  1300  {
  1301  	if(CollectStats)
  1302  		runtime·xadd64(&gcstats.putempty, 1);
  1303  
  1304  	runtime·lfstackpush(&work.empty, &b->node);
  1305  }
  1306  
  1307  // Get a full work buffer off the work.full list, or return nil.
  1308  static Workbuf*
  1309  getfull(Workbuf *b)
  1310  {
  1311  	int32 i;
  1312  
  1313  	if(CollectStats)
  1314  		runtime·xadd64(&gcstats.getfull, 1);
  1315  
  1316  	if(b != nil)
  1317  		runtime·lfstackpush(&work.empty, &b->node);
  1318  	b = (Workbuf*)runtime·lfstackpop(&work.full);
  1319  	if(b != nil || work.nproc == 1)
  1320  		return b;
  1321  
  1322  	runtime·xadd(&work.nwait, +1);
  1323  	for(i=0;; i++) {
  1324  		if(work.full != 0) {
  1325  			runtime·xadd(&work.nwait, -1);
  1326  			b = (Workbuf*)runtime·lfstackpop(&work.full);
  1327  			if(b != nil)
  1328  				return b;
  1329  			runtime·xadd(&work.nwait, +1);
  1330  		}
  1331  		if(work.nwait == work.nproc)
  1332  			return nil;
  1333  		if(i < 10) {
  1334  			m->gcstats.nprocyield++;
  1335  			runtime·procyield(20);
  1336  		} else if(i < 20) {
  1337  			m->gcstats.nosyield++;
  1338  			runtime·osyield();
  1339  		} else {
  1340  			m->gcstats.nsleep++;
  1341  			runtime·usleep(100);
  1342  		}
  1343  	}
  1344  }
  1345  
  1346  static Workbuf*
  1347  handoff(Workbuf *b)
  1348  {
  1349  	int32 n;
  1350  	Workbuf *b1;
  1351  
  1352  	// Make new buffer with half of b's pointers.
  1353  	b1 = getempty(nil);
  1354  	n = b->nobj/2;
  1355  	b->nobj -= n;
  1356  	b1->nobj = n;
  1357  	runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
  1358  	m->gcstats.nhandoff++;
  1359  	m->gcstats.nhandoffcnt += n;
  1360  
  1361  	// Put b on full list - let first half of b get stolen.
  1362  	runtime·lfstackpush(&work.full, &b->node);
  1363  	return b1;
  1364  }
  1365  
  1366  static void
  1367  addroot(Obj obj)
  1368  {
  1369  	uint32 cap;
  1370  	Obj *new;
  1371  
  1372  	if(work.nroot >= work.rootcap) {
  1373  		cap = PageSize/sizeof(Obj);
  1374  		if(cap < 2*work.rootcap)
  1375  			cap = 2*work.rootcap;
  1376  		new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj));
  1377  		if(new == nil)
  1378  			runtime·throw("runtime: cannot allocate memory");
  1379  		if(work.roots != nil) {
  1380  			runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj));
  1381  			runtime·SysFree(work.roots, work.rootcap*sizeof(Obj));
  1382  		}
  1383  		work.roots = new;
  1384  		work.rootcap = cap;
  1385  	}
  1386  	work.roots[work.nroot] = obj;
  1387  	work.nroot++;
  1388  }
  1389  
  1390  // Scan a stack frame.  The doframe parameter is a signal that the previously
  1391  // scanned activation has an unknown argument size.  When *doframe is true the
  1392  // current activation must have its entire frame scanned.  Otherwise, only the
  1393  // locals need to be scanned.
  1394  static void
  1395  addframeroots(Func *f, byte*, byte *sp, void *doframe)
  1396  {
  1397  	uintptr outs;
  1398  
  1399  	if(thechar == '5')
  1400  		sp += sizeof(uintptr);
  1401  	if(f->locals == 0 || *(bool*)doframe == true)
  1402  		addroot((Obj){sp, f->frame - sizeof(uintptr), 0});
  1403  	else if(f->locals > 0) {
  1404  		outs = f->frame - sizeof(uintptr) - f->locals;
  1405  		addroot((Obj){sp + outs, f->locals, 0});
  1406  	}
  1407  	if(f->args > 0)
  1408  		addroot((Obj){sp + f->frame, f->args, 0});
  1409  	*(bool*)doframe = (f->args == ArgsSizeUnknown);
  1410  }
  1411  
  1412  static void
  1413  addstackroots(G *gp)
  1414  {
  1415  	M *mp;
  1416  	int32 n;
  1417  	Stktop *stk;
  1418  	byte *sp, *guard, *pc;
  1419  	Func *f;
  1420  	bool doframe;
  1421  
  1422  	stk = (Stktop*)gp->stackbase;
  1423  	guard = (byte*)gp->stackguard;
  1424  
  1425  	if(gp == g) {
  1426  		// Scanning our own stack: start at &gp.
  1427  		sp = runtime·getcallersp(&gp);
  1428  		pc = runtime·getcallerpc(&gp);
  1429  	} else if((mp = gp->m) != nil && mp->helpgc) {
  1430  		// gchelper's stack is in active use and has no interesting pointers.
  1431  		return;
  1432  	} else if(gp->gcstack != (uintptr)nil) {
  1433  		// Scanning another goroutine that is about to enter or might
  1434  		// have just exited a system call. It may be executing code such
  1435  		// as schedlock and may have needed to start a new stack segment.
  1436  		// Use the stack segment and stack pointer at the time of
  1437  		// the system call instead, since that won't change underfoot.
  1438  		sp = (byte*)gp->gcsp;
  1439  		pc = gp->gcpc;
  1440  		stk = (Stktop*)gp->gcstack;
  1441  		guard = (byte*)gp->gcguard;
  1442  	} else {
  1443  		// Scanning another goroutine's stack.
  1444  		// The goroutine is usually asleep (the world is stopped).
  1445  		sp = (byte*)gp->sched.sp;
  1446  		pc = gp->sched.pc;
  1447  		if(ScanStackByFrames && pc == (byte*)runtime·goexit && gp->fnstart != nil) {
  1448  			// The goroutine has not started. However, its incoming
  1449  			// arguments are live at the top of the stack and must
  1450  			// be scanned.  No other live values should be on the
  1451  			// stack.
  1452  			f = runtime·findfunc((uintptr)gp->fnstart->fn);
  1453  			if(f->args > 0) {
  1454  				if(thechar == '5')
  1455  					sp += sizeof(uintptr);
  1456  				addroot((Obj){sp, f->args, 0});
  1457  			}
  1458  			return;
  1459  		}
  1460  	}
  1461  	if (ScanStackByFrames) {
  1462  		doframe = false;
  1463  		runtime·gentraceback(pc, sp, nil, gp, 0, nil, 0x7fffffff, addframeroots, &doframe);
  1464  	} else {
  1465  		USED(pc);
  1466  		n = 0;
  1467  		while(stk) {
  1468  			if(sp < guard-StackGuard || (byte*)stk < sp) {
  1469  				runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
  1470  				runtime·throw("scanstack");
  1471  			}
  1472  			addroot((Obj){sp, (byte*)stk - sp, (uintptr)defaultProg | PRECISE | LOOP});
  1473  			sp = (byte*)stk->gobuf.sp;
  1474  			guard = stk->stackguard;
  1475  			stk = (Stktop*)stk->stackbase;
  1476  			n++;
  1477  		}
  1478  	}
  1479  }
  1480  
  1481  static void
  1482  addfinroots(void *v)
  1483  {
  1484  	uintptr size;
  1485  	void *base;
  1486  
  1487  	size = 0;
  1488  	if(!runtime·mlookup(v, &base, &size, nil) || !runtime·blockspecial(base))
  1489  		runtime·throw("mark - finalizer inconsistency");
  1490  
  1491  	// do not mark the finalizer block itself.  just mark the things it points at.
  1492  	addroot((Obj){base, size, 0});
  1493  }
  1494  
  1495  static void
  1496  addroots(void)
  1497  {
  1498  	G *gp;
  1499  	FinBlock *fb;
  1500  	MSpan *s, **allspans;
  1501  	uint32 spanidx;
  1502  
  1503  	work.nroot = 0;
  1504  
  1505  	// data & bss
  1506  	// TODO(atom): load balancing
  1507  	addroot((Obj){data, edata - data, (uintptr)gcdata});
  1508  	addroot((Obj){bss, ebss - bss, (uintptr)gcbss});
  1509  
  1510  	// MSpan.types
  1511  	allspans = runtime·mheap->allspans;
  1512  	for(spanidx=0; spanidx<runtime·mheap->nspan; spanidx++) {
  1513  		s = allspans[spanidx];
  1514  		if(s->state == MSpanInUse) {
  1515  			// The garbage collector ignores type pointers stored in MSpan.types:
  1516  			//  - Compiler-generated types are stored outside of heap.
  1517  			//  - The reflect package has runtime-generated types cached in its data structures.
  1518  			//    The garbage collector relies on finding the references via that cache.
  1519  			switch(s->types.compression) {
  1520  			case MTypes_Empty:
  1521  			case MTypes_Single:
  1522  				break;
  1523  			case MTypes_Words:
  1524  			case MTypes_Bytes:
  1525  				markonly((byte*)s->types.data);
  1526  				break;
  1527  			}
  1528  		}
  1529  	}
  1530  
  1531  	// stacks
  1532  	for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
  1533  		switch(gp->status){
  1534  		default:
  1535  			runtime·printf("unexpected G.status %d\n", gp->status);
  1536  			runtime·throw("mark - bad status");
  1537  		case Gdead:
  1538  			break;
  1539  		case Grunning:
  1540  			if(gp != g)
  1541  				runtime·throw("mark - world not stopped");
  1542  			addstackroots(gp);
  1543  			break;
  1544  		case Grunnable:
  1545  		case Gsyscall:
  1546  		case Gwaiting:
  1547  			addstackroots(gp);
  1548  			break;
  1549  		}
  1550  	}
  1551  
  1552  	runtime·walkfintab(addfinroots);
  1553  
  1554  	for(fb=allfin; fb; fb=fb->alllink)
  1555  		addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
  1556  }
  1557  
  1558  static bool
  1559  handlespecial(byte *p, uintptr size)
  1560  {
  1561  	FuncVal *fn;
  1562  	uintptr nret;
  1563  	FinBlock *block;
  1564  	Finalizer *f;
  1565  
  1566  	if(!runtime·getfinalizer(p, true, &fn, &nret)) {
  1567  		runtime·setblockspecial(p, false);
  1568  		runtime·MProf_Free(p, size);
  1569  		return false;
  1570  	}
  1571  
  1572  	runtime·lock(&finlock);
  1573  	if(finq == nil || finq->cnt == finq->cap) {
  1574  		if(finc == nil) {
  1575  			finc = runtime·SysAlloc(PageSize);
  1576  			if(finc == nil)
  1577  				runtime·throw("runtime: cannot allocate memory");
  1578  			finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
  1579  			finc->alllink = allfin;
  1580  			allfin = finc;
  1581  		}
  1582  		block = finc;
  1583  		finc = block->next;
  1584  		block->next = finq;
  1585  		finq = block;
  1586  	}
  1587  	f = &finq->fin[finq->cnt];
  1588  	finq->cnt++;
  1589  	f->fn = fn;
  1590  	f->nret = nret;
  1591  	f->arg = p;
  1592  	runtime·unlock(&finlock);
  1593  	return true;
  1594  }
  1595  
  1596  // Sweep frees or collects finalizers for blocks not marked in the mark phase.
  1597  // It clears the mark bits in preparation for the next GC round.
  1598  static void
  1599  sweepspan(ParFor *desc, uint32 idx)
  1600  {
  1601  	int32 cl, n, npages;
  1602  	uintptr size;
  1603  	byte *p;
  1604  	MCache *c;
  1605  	byte *arena_start;
  1606  	MLink head, *end;
  1607  	int32 nfree;
  1608  	byte *type_data;
  1609  	byte compression;
  1610  	uintptr type_data_inc;
  1611  	MSpan *s;
  1612  
  1613  	USED(&desc);
  1614  	s = runtime·mheap->allspans[idx];
  1615  	if(s->state != MSpanInUse)
  1616  		return;
  1617  	arena_start = runtime·mheap->arena_start;
  1618  	p = (byte*)(s->start << PageShift);
  1619  	cl = s->sizeclass;
  1620  	size = s->elemsize;
  1621  	if(cl == 0) {
  1622  		n = 1;
  1623  	} else {
  1624  		// Chunk full of small blocks.
  1625  		npages = runtime·class_to_allocnpages[cl];
  1626  		n = (npages << PageShift) / size;
  1627  	}
  1628  	nfree = 0;
  1629  	end = &head;
  1630  	c = m->mcache;
  1631  	
  1632  	type_data = (byte*)s->types.data;
  1633  	type_data_inc = sizeof(uintptr);
  1634  	compression = s->types.compression;
  1635  	switch(compression) {
  1636  	case MTypes_Bytes:
  1637  		type_data += 8*sizeof(uintptr);
  1638  		type_data_inc = 1;
  1639  		break;
  1640  	}
  1641  
  1642  	// Sweep through n objects of given size starting at p.
  1643  	// This thread owns the span now, so it can manipulate
  1644  	// the block bitmap without atomic operations.
  1645  	for(; n > 0; n--, p += size, type_data+=type_data_inc) {
  1646  		uintptr off, *bitp, shift, bits;
  1647  
  1648  		off = (uintptr*)p - (uintptr*)arena_start;
  1649  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1650  		shift = off % wordsPerBitmapWord;
  1651  		bits = *bitp>>shift;
  1652  
  1653  		if((bits & bitAllocated) == 0)
  1654  			continue;
  1655  
  1656  		if((bits & bitMarked) != 0) {
  1657  			if(DebugMark) {
  1658  				if(!(bits & bitSpecial))
  1659  					runtime·printf("found spurious mark on %p\n", p);
  1660  				*bitp &= ~(bitSpecial<<shift);
  1661  			}
  1662  			*bitp &= ~(bitMarked<<shift);
  1663  			continue;
  1664  		}
  1665  
  1666  		// Special means it has a finalizer or is being profiled.
  1667  		// In DebugMark mode, the bit has been coopted so
  1668  		// we have to assume all blocks are special.
  1669  		if(DebugMark || (bits & bitSpecial) != 0) {
  1670  			if(handlespecial(p, size))
  1671  				continue;
  1672  		}
  1673  
  1674  		// Mark freed; restore block boundary bit.
  1675  		*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
  1676  
  1677  		if(cl == 0) {
  1678  			// Free large span.
  1679  			runtime·unmarkspan(p, 1<<PageShift);
  1680  			*(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll;	// needs zeroing
  1681  			runtime·MHeap_Free(runtime·mheap, s, 1);
  1682  			c->local_alloc -= size;
  1683  			c->local_nfree++;
  1684  		} else {
  1685  			// Free small object.
  1686  			switch(compression) {
  1687  			case MTypes_Words:
  1688  				*(uintptr*)type_data = 0;
  1689  				break;
  1690  			case MTypes_Bytes:
  1691  				*(byte*)type_data = 0;
  1692  				break;
  1693  			}
  1694  			if(size > sizeof(uintptr))
  1695  				((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
  1696  			
  1697  			end->next = (MLink*)p;
  1698  			end = (MLink*)p;
  1699  			nfree++;
  1700  		}
  1701  	}
  1702  
  1703  	if(nfree) {
  1704  		c->local_by_size[cl].nfree += nfree;
  1705  		c->local_alloc -= size * nfree;
  1706  		c->local_nfree += nfree;
  1707  		c->local_cachealloc -= nfree * size;
  1708  		c->local_objects -= nfree;
  1709  		runtime·MCentral_FreeSpan(&runtime·mheap->central[cl], s, nfree, head.next, end);
  1710  	}
  1711  }
  1712  
  1713  static void
  1714  dumpspan(uint32 idx)
  1715  {
  1716  	int32 sizeclass, n, npages, i, column;
  1717  	uintptr size;
  1718  	byte *p;
  1719  	byte *arena_start;
  1720  	MSpan *s;
  1721  	bool allocated, special;
  1722  
  1723  	s = runtime·mheap->allspans[idx];
  1724  	if(s->state != MSpanInUse)
  1725  		return;
  1726  	arena_start = runtime·mheap->arena_start;
  1727  	p = (byte*)(s->start << PageShift);
  1728  	sizeclass = s->sizeclass;
  1729  	size = s->elemsize;
  1730  	if(sizeclass == 0) {
  1731  		n = 1;
  1732  	} else {
  1733  		npages = runtime·class_to_allocnpages[sizeclass];
  1734  		n = (npages << PageShift) / size;
  1735  	}
  1736  	
  1737  	runtime·printf("%p .. %p:\n", p, p+n*size);
  1738  	column = 0;
  1739  	for(; n>0; n--, p+=size) {
  1740  		uintptr off, *bitp, shift, bits;
  1741  
  1742  		off = (uintptr*)p - (uintptr*)arena_start;
  1743  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1744  		shift = off % wordsPerBitmapWord;
  1745  		bits = *bitp>>shift;
  1746  
  1747  		allocated = ((bits & bitAllocated) != 0);
  1748  		special = ((bits & bitSpecial) != 0);
  1749  
  1750  		for(i=0; i<size; i+=sizeof(void*)) {
  1751  			if(column == 0) {
  1752  				runtime·printf("\t");
  1753  			}
  1754  			if(i == 0) {
  1755  				runtime·printf(allocated ? "(" : "[");
  1756  				runtime·printf(special ? "@" : "");
  1757  				runtime·printf("%p: ", p+i);
  1758  			} else {
  1759  				runtime·printf(" ");
  1760  			}
  1761  
  1762  			runtime·printf("%p", *(void**)(p+i));
  1763  
  1764  			if(i+sizeof(void*) >= size) {
  1765  				runtime·printf(allocated ? ") " : "] ");
  1766  			}
  1767  
  1768  			column++;
  1769  			if(column == 8) {
  1770  				runtime·printf("\n");
  1771  				column = 0;
  1772  			}
  1773  		}
  1774  	}
  1775  	runtime·printf("\n");
  1776  }
  1777  
  1778  // A debugging function to dump the contents of memory
  1779  void
  1780  runtime·memorydump(void)
  1781  {
  1782  	uint32 spanidx;
  1783  
  1784  	for(spanidx=0; spanidx<runtime·mheap->nspan; spanidx++) {
  1785  		dumpspan(spanidx);
  1786  	}
  1787  }
  1788  
  1789  void
  1790  runtime·gchelper(void)
  1791  {
  1792  	gchelperstart();
  1793  
  1794  	// parallel mark for over gc roots
  1795  	runtime·parfordo(work.markfor);
  1796  
  1797  	// help other threads scan secondary blocks
  1798  	scanblock(nil, nil, 0, true);
  1799  
  1800  	if(DebugMark) {
  1801  		// wait while the main thread executes mark(debug_scanblock)
  1802  		while(runtime·atomicload(&work.debugmarkdone) == 0)
  1803  			runtime·usleep(10);
  1804  	}
  1805  
  1806  	runtime·parfordo(work.sweepfor);
  1807  	bufferList[m->helpgc].busy = 0;
  1808  	if(runtime·xadd(&work.ndone, +1) == work.nproc-1)
  1809  		runtime·notewakeup(&work.alldone);
  1810  }
  1811  
  1812  #define GcpercentUnknown (-2)
  1813  
  1814  // Initialized from $GOGC.  GOGC=off means no gc.
  1815  //
  1816  // Next gc is after we've allocated an extra amount of
  1817  // memory proportional to the amount already in use.
  1818  // If gcpercent=100 and we're using 4M, we'll gc again
  1819  // when we get to 8M.  This keeps the gc cost in linear
  1820  // proportion to the allocation cost.  Adjusting gcpercent
  1821  // just changes the linear constant (and also the amount of
  1822  // extra memory used).
  1823  static int32 gcpercent = GcpercentUnknown;
  1824  
  1825  static void
  1826  cachestats(GCStats *stats)
  1827  {
  1828  	M *mp;
  1829  	MCache *c;
  1830  	P *p, **pp;
  1831  	int32 i;
  1832  	uint64 stacks_inuse;
  1833  	uint64 *src, *dst;
  1834  
  1835  	if(stats)
  1836  		runtime·memclr((byte*)stats, sizeof(*stats));
  1837  	stacks_inuse = 0;
  1838  	for(mp=runtime·allm; mp; mp=mp->alllink) {
  1839  		stacks_inuse += mp->stackinuse*FixedStack;
  1840  		if(stats) {
  1841  			src = (uint64*)&mp->gcstats;
  1842  			dst = (uint64*)stats;
  1843  			for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
  1844  				dst[i] += src[i];
  1845  			runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
  1846  		}
  1847  	}
  1848  	for(pp=runtime·allp; p=*pp; pp++) {
  1849  		c = p->mcache;
  1850  		if(c==nil)
  1851  			continue;
  1852  		runtime·purgecachedstats(c);
  1853  		for(i=0; i<nelem(c->local_by_size); i++) {
  1854  			mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
  1855  			c->local_by_size[i].nmalloc = 0;
  1856  			mstats.by_size[i].nfree += c->local_by_size[i].nfree;
  1857  			c->local_by_size[i].nfree = 0;
  1858  		}
  1859  	}
  1860  	mstats.stacks_inuse = stacks_inuse;
  1861  }
  1862  
  1863  // Structure of arguments passed to function gc().
  1864  // This allows the arguments to be passed via reflect·call.
  1865  struct gc_args
  1866  {
  1867  	int32 force;
  1868  };
  1869  
  1870  static void gc(struct gc_args *args);
  1871  
  1872  static int32
  1873  readgogc(void)
  1874  {
  1875  	byte *p;
  1876  
  1877  	p = runtime·getenv("GOGC");
  1878  	if(p == nil || p[0] == '\0')
  1879  		return 100;
  1880  	if(runtime·strcmp(p, (byte*)"off") == 0)
  1881  		return -1;
  1882  	return runtime·atoi(p);
  1883  }
  1884  
  1885  void
  1886  runtime·gc(int32 force)
  1887  {
  1888  	byte *p;
  1889  	struct gc_args a, *ap;
  1890  	FuncVal gcv;
  1891  
  1892  	// The atomic operations are not atomic if the uint64s
  1893  	// are not aligned on uint64 boundaries. This has been
  1894  	// a problem in the past.
  1895  	if((((uintptr)&work.empty) & 7) != 0)
  1896  		runtime·throw("runtime: gc work buffer is misaligned");
  1897  	if((((uintptr)&work.full) & 7) != 0)
  1898  		runtime·throw("runtime: gc work buffer is misaligned");
  1899  
  1900  	// The gc is turned off (via enablegc) until
  1901  	// the bootstrap has completed.
  1902  	// Also, malloc gets called in the guts
  1903  	// of a number of libraries that might be
  1904  	// holding locks.  To avoid priority inversion
  1905  	// problems, don't bother trying to run gc
  1906  	// while holding a lock.  The next mallocgc
  1907  	// without a lock will do the gc instead.
  1908  	if(!mstats.enablegc || m->locks > 0 || runtime·panicking)
  1909  		return;
  1910  
  1911  	if(gcpercent == GcpercentUnknown) {	// first time through
  1912  		gcpercent = readgogc();
  1913  
  1914  		p = runtime·getenv("GOGCTRACE");
  1915  		if(p != nil)
  1916  			gctrace = runtime·atoi(p);
  1917  	}
  1918  	if(gcpercent < 0)
  1919  		return;
  1920  
  1921  	// Run gc on a bigger stack to eliminate
  1922  	// a potentially large number of calls to runtime·morestack.
  1923  	a.force = force;
  1924  	ap = &a;
  1925  	m->moreframesize_minalloc = StackBig;
  1926  	gcv.fn = (void*)gc;
  1927  	reflect·call(&gcv, (byte*)&ap, sizeof(ap));
  1928  
  1929  	if(gctrace > 1 && !force) {
  1930  		a.force = 1;
  1931  		gc(&a);
  1932  	}
  1933  }
  1934  
  1935  static FuncVal runfinqv = {runfinq};
  1936  
  1937  static void
  1938  gc(struct gc_args *args)
  1939  {
  1940  	int64 t0, t1, t2, t3, t4;
  1941  	uint64 heap0, heap1, obj0, obj1, ninstr;
  1942  	GCStats stats;
  1943  	M *mp;
  1944  	uint32 i;
  1945  	Eface eface;
  1946  
  1947  	runtime·semacquire(&runtime·worldsema);
  1948  	if(!args->force && mstats.heap_alloc < mstats.next_gc) {
  1949  		runtime·semrelease(&runtime·worldsema);
  1950  		return;
  1951  	}
  1952  
  1953  	t0 = runtime·nanotime();
  1954  
  1955  	m->gcing = 1;
  1956  	runtime·stoptheworld();
  1957  
  1958  	if(CollectStats)
  1959  		runtime·memclr((byte*)&gcstats, sizeof(gcstats));
  1960  
  1961  	for(mp=runtime·allm; mp; mp=mp->alllink)
  1962  		runtime·settype_flush(mp, false);
  1963  
  1964  	heap0 = 0;
  1965  	obj0 = 0;
  1966  	if(gctrace) {
  1967  		cachestats(nil);
  1968  		heap0 = mstats.heap_alloc;
  1969  		obj0 = mstats.nmalloc - mstats.nfree;
  1970  	}
  1971  
  1972  	m->locks++;	// disable gc during mallocs in parforalloc
  1973  	if(work.markfor == nil)
  1974  		work.markfor = runtime·parforalloc(MaxGcproc);
  1975  	if(work.sweepfor == nil)
  1976  		work.sweepfor = runtime·parforalloc(MaxGcproc);
  1977  	m->locks--;
  1978  
  1979  	if(itabtype == nil) {
  1980  		// get C pointer to the Go type "itab"
  1981  		runtime·gc_itab_ptr(&eface);
  1982  		itabtype = ((PtrType*)eface.type)->elem;
  1983  	}
  1984  
  1985  	work.nwait = 0;
  1986  	work.ndone = 0;
  1987  	work.debugmarkdone = 0;
  1988  	work.nproc = runtime·gcprocs();
  1989  	addroots();
  1990  	runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
  1991  	runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap->nspan, nil, true, sweepspan);
  1992  	if(work.nproc > 1) {
  1993  		runtime·noteclear(&work.alldone);
  1994  		runtime·helpgc(work.nproc);
  1995  	}
  1996  
  1997  	t1 = runtime·nanotime();
  1998  
  1999  	gchelperstart();
  2000  	runtime·parfordo(work.markfor);
  2001  	scanblock(nil, nil, 0, true);
  2002  
  2003  	if(DebugMark) {
  2004  		for(i=0; i<work.nroot; i++)
  2005  			debug_scanblock(work.roots[i].p, work.roots[i].n);
  2006  		runtime·atomicstore(&work.debugmarkdone, 1);
  2007  	}
  2008  	t2 = runtime·nanotime();
  2009  
  2010  	runtime·parfordo(work.sweepfor);
  2011  	bufferList[m->helpgc].busy = 0;
  2012  	t3 = runtime·nanotime();
  2013  
  2014  	if(work.nproc > 1)
  2015  		runtime·notesleep(&work.alldone);
  2016  
  2017  	cachestats(&stats);
  2018  
  2019  	stats.nprocyield += work.sweepfor->nprocyield;
  2020  	stats.nosyield += work.sweepfor->nosyield;
  2021  	stats.nsleep += work.sweepfor->nsleep;
  2022  
  2023  	mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
  2024  	m->gcing = 0;
  2025  
  2026  	if(finq != nil) {
  2027  		m->locks++;	// disable gc during the mallocs in newproc
  2028  		// kick off or wake up goroutine to run queued finalizers
  2029  		if(fing == nil)
  2030  			fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc);
  2031  		else if(fingwait) {
  2032  			fingwait = 0;
  2033  			runtime·ready(fing);
  2034  		}
  2035  		m->locks--;
  2036  	}
  2037  
  2038  	heap1 = mstats.heap_alloc;
  2039  	obj1 = mstats.nmalloc - mstats.nfree;
  2040  
  2041  	t4 = runtime·nanotime();
  2042  	mstats.last_gc = t4;
  2043  	mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
  2044  	mstats.pause_total_ns += t4 - t0;
  2045  	mstats.numgc++;
  2046  	if(mstats.debuggc)
  2047  		runtime·printf("pause %D\n", t4-t0);
  2048  
  2049  	if(gctrace) {
  2050  		runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
  2051  				" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
  2052  			mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
  2053  			heap0>>20, heap1>>20, obj0, obj1,
  2054  			mstats.nmalloc, mstats.nfree,
  2055  			stats.nhandoff, stats.nhandoffcnt,
  2056  			work.sweepfor->nsteal, work.sweepfor->nstealcnt,
  2057  			stats.nprocyield, stats.nosyield, stats.nsleep);
  2058  		if(CollectStats) {
  2059  			runtime·printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
  2060  				gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
  2061  			if(gcstats.ptr.cnt != 0)
  2062  				runtime·printf("avg ptrbufsize: %D (%D/%D)\n",
  2063  					gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
  2064  			if(gcstats.obj.cnt != 0)
  2065  				runtime·printf("avg nobj: %D (%D/%D)\n",
  2066  					gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
  2067  			runtime·printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
  2068  
  2069  			runtime·printf("instruction counts:\n");
  2070  			ninstr = 0;
  2071  			for(i=0; i<nelem(gcstats.instr); i++) {
  2072  				runtime·printf("\t%d:\t%D\n", i, gcstats.instr[i]);
  2073  				ninstr += gcstats.instr[i];
  2074  			}
  2075  			runtime·printf("\ttotal:\t%D\n", ninstr);
  2076  
  2077  			runtime·printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
  2078  		}
  2079  	}
  2080  
  2081  	runtime·MProf_GC();
  2082  	runtime·semrelease(&runtime·worldsema);
  2083  	runtime·starttheworld();
  2084  
  2085  	// give the queued finalizers, if any, a chance to run
  2086  	if(finq != nil)
  2087  		runtime·gosched();
  2088  }
  2089  
  2090  void
  2091  runtime·ReadMemStats(MStats *stats)
  2092  {
  2093  	// Have to acquire worldsema to stop the world,
  2094  	// because stoptheworld can only be used by
  2095  	// one goroutine at a time, and there might be
  2096  	// a pending garbage collection already calling it.
  2097  	runtime·semacquire(&runtime·worldsema);
  2098  	m->gcing = 1;
  2099  	runtime·stoptheworld();
  2100  	cachestats(nil);
  2101  	*stats = mstats;
  2102  	m->gcing = 0;
  2103  	runtime·semrelease(&runtime·worldsema);
  2104  	runtime·starttheworld();
  2105  }
  2106  
  2107  void
  2108  runtime∕debug·readGCStats(Slice *pauses)
  2109  {
  2110  	uint64 *p;
  2111  	uint32 i, n;
  2112  
  2113  	// Calling code in runtime/debug should make the slice large enough.
  2114  	if(pauses->cap < nelem(mstats.pause_ns)+3)
  2115  		runtime·throw("runtime: short slice passed to readGCStats");
  2116  
  2117  	// Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
  2118  	p = (uint64*)pauses->array;
  2119  	runtime·lock(runtime·mheap);
  2120  	n = mstats.numgc;
  2121  	if(n > nelem(mstats.pause_ns))
  2122  		n = nelem(mstats.pause_ns);
  2123  	
  2124  	// The pause buffer is circular. The most recent pause is at
  2125  	// pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
  2126  	// from there to go back farther in time. We deliver the times
  2127  	// most recent first (in p[0]).
  2128  	for(i=0; i<n; i++)
  2129  		p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
  2130  
  2131  	p[n] = mstats.last_gc;
  2132  	p[n+1] = mstats.numgc;
  2133  	p[n+2] = mstats.pause_total_ns;	
  2134  	runtime·unlock(runtime·mheap);
  2135  	pauses->len = n+3;
  2136  }
  2137  
  2138  void
  2139  runtime∕debug·setGCPercent(intgo in, intgo out)
  2140  {
  2141  	runtime·lock(runtime·mheap);
  2142  	if(gcpercent == GcpercentUnknown)
  2143  		gcpercent = readgogc();
  2144  	out = gcpercent;
  2145  	if(in < 0)
  2146  		in = -1;
  2147  	gcpercent = in;
  2148  	runtime·unlock(runtime·mheap);
  2149  	FLUSH(&out);
  2150  }
  2151  
  2152  static void
  2153  gchelperstart(void)
  2154  {
  2155  	if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
  2156  		runtime·throw("gchelperstart: bad m->helpgc");
  2157  	if(runtime·xchg(&bufferList[m->helpgc].busy, 1))
  2158  		runtime·throw("gchelperstart: already busy");
  2159  }
  2160  
  2161  static void
  2162  runfinq(void)
  2163  {
  2164  	Finalizer *f;
  2165  	FinBlock *fb, *next;
  2166  	byte *frame;
  2167  	uint32 framesz, framecap, i;
  2168  
  2169  	frame = nil;
  2170  	framecap = 0;
  2171  	for(;;) {
  2172  		// There's no need for a lock in this section
  2173  		// because it only conflicts with the garbage
  2174  		// collector, and the garbage collector only
  2175  		// runs when everyone else is stopped, and
  2176  		// runfinq only stops at the gosched() or
  2177  		// during the calls in the for loop.
  2178  		fb = finq;
  2179  		finq = nil;
  2180  		if(fb == nil) {
  2181  			fingwait = 1;
  2182  			runtime·park(nil, nil, "finalizer wait");
  2183  			continue;
  2184  		}
  2185  		if(raceenabled)
  2186  			runtime·racefingo();
  2187  		for(; fb; fb=next) {
  2188  			next = fb->next;
  2189  			for(i=0; i<fb->cnt; i++) {
  2190  				f = &fb->fin[i];
  2191  				framesz = sizeof(uintptr) + f->nret;
  2192  				if(framecap < framesz) {
  2193  					runtime·free(frame);
  2194  					frame = runtime·mal(framesz);
  2195  					framecap = framesz;
  2196  				}
  2197  				*(void**)frame = f->arg;
  2198  				reflect·call(f->fn, frame, sizeof(uintptr) + f->nret);
  2199  				f->fn = nil;
  2200  				f->arg = nil;
  2201  			}
  2202  			fb->cnt = 0;
  2203  			fb->next = finc;
  2204  			finc = fb;
  2205  		}
  2206  		runtime·gc(1);	// trigger another gc to clean up the finalized objects, if possible
  2207  	}
  2208  }
  2209  
  2210  // mark the block at v of size n as allocated.
  2211  // If noptr is true, mark it as having no pointers.
  2212  void
  2213  runtime·markallocated(void *v, uintptr n, bool noptr)
  2214  {
  2215  	uintptr *b, obits, bits, off, shift;
  2216  
  2217  	if(0)
  2218  		runtime·printf("markallocated %p+%p\n", v, n);
  2219  
  2220  	if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
  2221  		runtime·throw("markallocated: bad pointer");
  2222  
  2223  	off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start;  // word offset
  2224  	b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
  2225  	shift = off % wordsPerBitmapWord;
  2226  
  2227  	for(;;) {
  2228  		obits = *b;
  2229  		bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
  2230  		if(noptr)
  2231  			bits |= bitNoPointers<<shift;
  2232  		if(runtime·singleproc) {
  2233  			*b = bits;
  2234  			break;
  2235  		} else {
  2236  			// more than one goroutine is potentially running: use atomic op
  2237  			if(runtime·casp((void**)b, (void*)obits, (void*)bits))
  2238  				break;
  2239  		}
  2240  	}
  2241  }
  2242  
  2243  // mark the block at v of size n as freed.
  2244  void
  2245  runtime·markfreed(void *v, uintptr n)
  2246  {
  2247  	uintptr *b, obits, bits, off, shift;
  2248  
  2249  	if(0)
  2250  		runtime·printf("markallocated %p+%p\n", v, n);
  2251  
  2252  	if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
  2253  		runtime·throw("markallocated: bad pointer");
  2254  
  2255  	off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start;  // word offset
  2256  	b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
  2257  	shift = off % wordsPerBitmapWord;
  2258  
  2259  	for(;;) {
  2260  		obits = *b;
  2261  		bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
  2262  		if(runtime·singleproc) {
  2263  			*b = bits;
  2264  			break;
  2265  		} else {
  2266  			// more than one goroutine is potentially running: use atomic op
  2267  			if(runtime·casp((void**)b, (void*)obits, (void*)bits))
  2268  				break;
  2269  		}
  2270  	}
  2271  }
  2272  
  2273  // check that the block at v of size n is marked freed.
  2274  void
  2275  runtime·checkfreed(void *v, uintptr n)
  2276  {
  2277  	uintptr *b, bits, off, shift;
  2278  
  2279  	if(!runtime·checking)
  2280  		return;
  2281  
  2282  	if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
  2283  		return;	// not allocated, so okay
  2284  
  2285  	off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start;  // word offset
  2286  	b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
  2287  	shift = off % wordsPerBitmapWord;
  2288  
  2289  	bits = *b>>shift;
  2290  	if((bits & bitAllocated) != 0) {
  2291  		runtime·printf("checkfreed %p+%p: off=%p have=%p\n",
  2292  			v, n, off, bits & bitMask);
  2293  		runtime·throw("checkfreed: not freed");
  2294  	}
  2295  }
  2296  
  2297  // mark the span of memory at v as having n blocks of the given size.
  2298  // if leftover is true, there is left over space at the end of the span.
  2299  void
  2300  runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
  2301  {
  2302  	uintptr *b, off, shift;
  2303  	byte *p;
  2304  
  2305  	if((byte*)v+size*n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
  2306  		runtime·throw("markspan: bad pointer");
  2307  
  2308  	p = v;
  2309  	if(leftover)	// mark a boundary just past end of last block too
  2310  		n++;
  2311  	for(; n-- > 0; p += size) {
  2312  		// Okay to use non-atomic ops here, because we control
  2313  		// the entire span, and each bitmap word has bits for only
  2314  		// one span, so no other goroutines are changing these
  2315  		// bitmap words.
  2316  		off = (uintptr*)p - (uintptr*)runtime·mheap->arena_start;  // word offset
  2317  		b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
  2318  		shift = off % wordsPerBitmapWord;
  2319  		*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
  2320  	}
  2321  }
  2322  
  2323  // unmark the span of memory at v of length n bytes.
  2324  void
  2325  runtime·unmarkspan(void *v, uintptr n)
  2326  {
  2327  	uintptr *p, *b, off;
  2328  
  2329  	if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
  2330  		runtime·throw("markspan: bad pointer");
  2331  
  2332  	p = v;
  2333  	off = p - (uintptr*)runtime·mheap->arena_start;  // word offset
  2334  	if(off % wordsPerBitmapWord != 0)
  2335  		runtime·throw("markspan: unaligned pointer");
  2336  	b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
  2337  	n /= PtrSize;
  2338  	if(n%wordsPerBitmapWord != 0)
  2339  		runtime·throw("unmarkspan: unaligned length");
  2340  	// Okay to use non-atomic ops here, because we control
  2341  	// the entire span, and each bitmap word has bits for only
  2342  	// one span, so no other goroutines are changing these
  2343  	// bitmap words.
  2344  	n /= wordsPerBitmapWord;
  2345  	while(n-- > 0)
  2346  		*b-- = 0;
  2347  }
  2348  
  2349  bool
  2350  runtime·blockspecial(void *v)
  2351  {
  2352  	uintptr *b, off, shift;
  2353  
  2354  	if(DebugMark)
  2355  		return true;
  2356  
  2357  	off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start;
  2358  	b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
  2359  	shift = off % wordsPerBitmapWord;
  2360  
  2361  	return (*b & (bitSpecial<<shift)) != 0;
  2362  }
  2363  
  2364  void
  2365  runtime·setblockspecial(void *v, bool s)
  2366  {
  2367  	uintptr *b, off, shift, bits, obits;
  2368  
  2369  	if(DebugMark)
  2370  		return;
  2371  
  2372  	off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start;
  2373  	b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
  2374  	shift = off % wordsPerBitmapWord;
  2375  
  2376  	for(;;) {
  2377  		obits = *b;
  2378  		if(s)
  2379  			bits = obits | (bitSpecial<<shift);
  2380  		else
  2381  			bits = obits & ~(bitSpecial<<shift);
  2382  		if(runtime·singleproc) {
  2383  			*b = bits;
  2384  			break;
  2385  		} else {
  2386  			// more than one goroutine is potentially running: use atomic op
  2387  			if(runtime·casp((void**)b, (void*)obits, (void*)bits))
  2388  				break;
  2389  		}
  2390  	}
  2391  }
  2392  
  2393  void
  2394  runtime·MHeap_MapBits(MHeap *h)
  2395  {
  2396  	// Caller has added extra mappings to the arena.
  2397  	// Add extra mappings of bitmap words as needed.
  2398  	// We allocate extra bitmap pieces in chunks of bitmapChunk.
  2399  	enum {
  2400  		bitmapChunk = 8192
  2401  	};
  2402  	uintptr n;
  2403  
  2404  	n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
  2405  	n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
  2406  	if(h->bitmap_mapped >= n)
  2407  		return;
  2408  
  2409  	runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped);
  2410  	h->bitmap_mapped = n;
  2411  }