github.com/llvm-mirror/llgo@v0.0.0-20190322182713-bf6f0a60fce1/third_party/gofrontend/libgo/runtime/mgc0.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Garbage collector (GC).
     6  //
     7  // GC is:
     8  // - mark&sweep
     9  // - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
    10  // - parallel (up to MaxGcproc threads)
    11  // - partially concurrent (mark is stop-the-world, while sweep is concurrent)
    12  // - non-moving/non-compacting
    13  // - full (non-partial)
    14  //
    15  // GC rate.
    16  // Next GC is after we've allocated an extra amount of memory proportional to
    17  // the amount already in use. The proportion is controlled by GOGC environment variable
    18  // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
    19  // (this mark is tracked in next_gc variable). This keeps the GC cost in linear
    20  // proportion to the allocation cost. Adjusting GOGC just changes the linear constant
    21  // (and also the amount of extra memory used).
    22  //
    23  // Concurrent sweep.
    24  // The sweep phase proceeds concurrently with normal program execution.
    25  // The heap is swept span-by-span both lazily (when a goroutine needs another span)
    26  // and concurrently in a background goroutine (this helps programs that are not CPU bound).
    27  // However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
    28  // and so next_gc calculation is tricky and happens as follows.
    29  // At the end of the stop-the-world phase next_gc is conservatively set based on total
    30  // heap size; all spans are marked as "needs sweeping".
    31  // Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
    32  // The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
    33  // closer to the target value. However, this is not enough to avoid over-allocating memory.
    34  // Consider that a goroutine wants to allocate a new span for a large object and
    35  // there are no free swept spans, but there are small-object unswept spans.
    36  // If the goroutine naively allocates a new span, it can surpass the yet-unknown
    37  // target next_gc value. In order to prevent such cases (1) when a goroutine needs
    38  // to allocate a new small-object span, it sweeps small-object spans for the same
    39  // object size until it frees at least one object; (2) when a goroutine needs to
    40  // allocate large-object span from heap, it sweeps spans until it frees at least
    41  // that many pages into heap. Together these two measures ensure that we don't surpass
    42  // target next_gc value by a large margin. There is an exception: if a goroutine sweeps
    43  // and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
    44  // but there can still be other one-page unswept spans which could be combined into a two-page span.
    45  // It's critical to ensure that no operations proceed on unswept spans (that would corrupt
    46  // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
    47  // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
    48  // When a goroutine explicitly frees an object or sets a finalizer, it ensures that
    49  // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
    50  // The finalizer goroutine is kicked off only when all spans are swept.
    51  // When the next GC starts, it sweeps all not-yet-swept spans (if any).
    52  
    53  #include <unistd.h>
    54  
    55  #include "runtime.h"
    56  #include "arch.h"
    57  #include "malloc.h"
    58  #include "mgc0.h"
    59  #include "chan.h"
    60  #include "go-type.h"
    61  
    62  // Map gccgo field names to gc field names.
    63  // Slice aka __go_open_array.
    64  #define array __values
    65  #define cap __capacity
    66  // Iface aka __go_interface
    67  #define tab __methods
    68  // Hmap aka __go_map
    69  typedef struct __go_map Hmap;
    70  // Type aka __go_type_descriptor
    71  #define string __reflection
    72  #define KindPtr GO_PTR
    73  #define KindNoPointers GO_NO_POINTERS
    74  #define kindMask GO_CODE_MASK
    75  // PtrType aka __go_ptr_type
    76  #define elem __element_type
    77  
    78  #ifdef USING_SPLIT_STACK
    79  
    80  extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
    81  				 void **);
    82  
    83  extern void * __splitstack_find_context (void *context[10], size_t *, void **,
    84  					 void **, void **);
    85  
    86  #endif
    87  
    88  enum {
    89  	Debug = 0,
    90  	CollectStats = 0,
    91  	ConcurrentSweep = 1,
    92  
    93  	WorkbufSize	= 16*1024,
    94  	FinBlockSize	= 4*1024,
    95  
    96  	handoffThreshold = 4,
    97  	IntermediateBufferCapacity = 64,
    98  
    99  	// Bits in type information
   100  	PRECISE = 1,
   101  	LOOP = 2,
   102  	PC_BITS = PRECISE | LOOP,
   103  
   104  	RootData	= 0,
   105  	RootBss		= 1,
   106  	RootFinalizers	= 2,
   107  	RootSpanTypes	= 3,
   108  	RootFlushCaches = 4,
   109  	RootCount	= 5,
   110  };
   111  
   112  #define GcpercentUnknown (-2)
   113  
   114  // Initialized from $GOGC.  GOGC=off means no gc.
   115  static int32 gcpercent = GcpercentUnknown;
   116  
   117  static FuncVal* poolcleanup;
   118  
   119  void sync_runtime_registerPoolCleanup(FuncVal*)
   120    __asm__ (GOSYM_PREFIX "sync.runtime_registerPoolCleanup");
   121  
   122  void
   123  sync_runtime_registerPoolCleanup(FuncVal *f)
   124  {
   125  	poolcleanup = f;
   126  }
   127  
   128  static void
   129  clearpools(void)
   130  {
   131  	P *p, **pp;
   132  	MCache *c;
   133  
   134  	// clear sync.Pool's
   135  	if(poolcleanup != nil) {
   136  		__builtin_call_with_static_chain(poolcleanup->fn(),
   137  						 poolcleanup);
   138  	}
   139  
   140  	for(pp=runtime_allp; (p=*pp) != nil; pp++) {
   141  		// clear tinyalloc pool
   142  		c = p->mcache;
   143  		if(c != nil) {
   144  			c->tiny = nil;
   145  			c->tinysize = 0;
   146  		}
   147  		// clear defer pools
   148  		p->deferpool = nil;
   149  	}
   150  }
   151  
   152  // Holding worldsema grants an M the right to try to stop the world.
   153  // The procedure is:
   154  //
   155  //	runtime_semacquire(&runtime_worldsema);
   156  //	m->gcing = 1;
   157  //	runtime_stoptheworld();
   158  //
   159  //	... do stuff ...
   160  //
   161  //	m->gcing = 0;
   162  //	runtime_semrelease(&runtime_worldsema);
   163  //	runtime_starttheworld();
   164  //
   165  uint32 runtime_worldsema = 1;
   166  
   167  typedef struct Workbuf Workbuf;
   168  struct Workbuf
   169  {
   170  #define SIZE (WorkbufSize-sizeof(LFNode)-sizeof(uintptr))
   171  	LFNode  node; // must be first
   172  	uintptr nobj;
   173  	Obj     obj[SIZE/sizeof(Obj) - 1];
   174  	uint8   _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
   175  #undef SIZE
   176  };
   177  
   178  typedef struct Finalizer Finalizer;
   179  struct Finalizer
   180  {
   181  	FuncVal *fn;
   182  	void *arg;
   183  	const struct __go_func_type *ft;
   184  	const PtrType *ot;
   185  };
   186  
   187  typedef struct FinBlock FinBlock;
   188  struct FinBlock
   189  {
   190  	FinBlock *alllink;
   191  	FinBlock *next;
   192  	int32 cnt;
   193  	int32 cap;
   194  	Finalizer fin[1];
   195  };
   196  
   197  static Lock	finlock;	// protects the following variables
   198  static FinBlock	*finq;		// list of finalizers that are to be executed
   199  static FinBlock	*finc;		// cache of free blocks
   200  static FinBlock	*allfin;	// list of all blocks
   201  bool	runtime_fingwait;
   202  bool	runtime_fingwake;
   203  
   204  static Lock	gclock;
   205  static G*	fing;
   206  
   207  static void	runfinq(void*);
   208  static void	bgsweep(void*);
   209  static Workbuf* getempty(Workbuf*);
   210  static Workbuf* getfull(Workbuf*);
   211  static void	putempty(Workbuf*);
   212  static Workbuf* handoff(Workbuf*);
   213  static void	gchelperstart(void);
   214  static void	flushallmcaches(void);
   215  static void	addstackroots(G *gp, Workbuf **wbufp);
   216  
   217  static struct {
   218  	uint64	full;  // lock-free list of full blocks
   219  	uint64	empty; // lock-free list of empty blocks
   220  	byte	pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
   221  	uint32	nproc;
   222  	int64	tstart;
   223  	volatile uint32	nwait;
   224  	volatile uint32	ndone;
   225  	Note	alldone;
   226  	ParFor	*markfor;
   227  
   228  	Lock	lock;
   229  	byte	*chunk;
   230  	uintptr	nchunk;
   231  } work __attribute__((aligned(8)));
   232  
   233  enum {
   234  	GC_DEFAULT_PTR = GC_NUM_INSTR,
   235  	GC_CHAN,
   236  
   237  	GC_NUM_INSTR2
   238  };
   239  
   240  static struct {
   241  	struct {
   242  		uint64 sum;
   243  		uint64 cnt;
   244  	} ptr;
   245  	uint64 nbytes;
   246  	struct {
   247  		uint64 sum;
   248  		uint64 cnt;
   249  		uint64 notype;
   250  		uint64 typelookup;
   251  	} obj;
   252  	uint64 rescan;
   253  	uint64 rescanbytes;
   254  	uint64 instr[GC_NUM_INSTR2];
   255  	uint64 putempty;
   256  	uint64 getfull;
   257  	struct {
   258  		uint64 foundbit;
   259  		uint64 foundword;
   260  		uint64 foundspan;
   261  	} flushptrbuf;
   262  	struct {
   263  		uint64 foundbit;
   264  		uint64 foundword;
   265  		uint64 foundspan;
   266  	} markonly;
   267  	uint32 nbgsweep;
   268  	uint32 npausesweep;
   269  } gcstats;
   270  
   271  // markonly marks an object. It returns true if the object
   272  // has been marked by this function, false otherwise.
   273  // This function doesn't append the object to any buffer.
   274  static bool
   275  markonly(const void *obj)
   276  {
   277  	byte *p;
   278  	uintptr *bitp, bits, shift, x, xbits, off, j;
   279  	MSpan *s;
   280  	PageID k;
   281  
   282  	// Words outside the arena cannot be pointers.
   283  	if((const byte*)obj < runtime_mheap.arena_start || (const byte*)obj >= runtime_mheap.arena_used)
   284  		return false;
   285  
   286  	// obj may be a pointer to a live object.
   287  	// Try to find the beginning of the object.
   288  
   289  	// Round down to word boundary.
   290  	obj = (const void*)((uintptr)obj & ~((uintptr)PtrSize-1));
   291  
   292  	// Find bits for this word.
   293  	off = (const uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
   294  	bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
   295  	shift = off % wordsPerBitmapWord;
   296  	xbits = *bitp;
   297  	bits = xbits >> shift;
   298  
   299  	// Pointing at the beginning of a block?
   300  	if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
   301  		if(CollectStats)
   302  			runtime_xadd64(&gcstats.markonly.foundbit, 1);
   303  		goto found;
   304  	}
   305  
   306  	// Pointing just past the beginning?
   307  	// Scan backward a little to find a block boundary.
   308  	for(j=shift; j-->0; ) {
   309  		if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
   310  			shift = j;
   311  			bits = xbits>>shift;
   312  			if(CollectStats)
   313  				runtime_xadd64(&gcstats.markonly.foundword, 1);
   314  			goto found;
   315  		}
   316  	}
   317  
   318  	// Otherwise consult span table to find beginning.
   319  	// (Manually inlined copy of MHeap_LookupMaybe.)
   320  	k = (uintptr)obj>>PageShift;
   321  	x = k;
   322  	x -= (uintptr)runtime_mheap.arena_start>>PageShift;
   323  	s = runtime_mheap.spans[x];
   324  	if(s == nil || k < s->start || (const byte*)obj >= s->limit || s->state != MSpanInUse)
   325  		return false;
   326  	p = (byte*)((uintptr)s->start<<PageShift);
   327  	if(s->sizeclass == 0) {
   328  		obj = p;
   329  	} else {
   330  		uintptr size = s->elemsize;
   331  		int32 i = ((const byte*)obj - p)/size;
   332  		obj = p+i*size;
   333  	}
   334  
   335  	// Now that we know the object header, reload bits.
   336  	off = (const uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
   337  	bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
   338  	shift = off % wordsPerBitmapWord;
   339  	xbits = *bitp;
   340  	bits = xbits >> shift;
   341  	if(CollectStats)
   342  		runtime_xadd64(&gcstats.markonly.foundspan, 1);
   343  
   344  found:
   345  	// Now we have bits, bitp, and shift correct for
   346  	// obj pointing at the base of the object.
   347  	// Only care about allocated and not marked.
   348  	if((bits & (bitAllocated|bitMarked)) != bitAllocated)
   349  		return false;
   350  	if(work.nproc == 1)
   351  		*bitp |= bitMarked<<shift;
   352  	else {
   353  		for(;;) {
   354  			x = *bitp;
   355  			if(x & (bitMarked<<shift))
   356  				return false;
   357  			if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
   358  				break;
   359  		}
   360  	}
   361  
   362  	// The object is now marked
   363  	return true;
   364  }
   365  
   366  // PtrTarget is a structure used by intermediate buffers.
   367  // The intermediate buffers hold GC data before it
   368  // is moved/flushed to the work buffer (Workbuf).
   369  // The size of an intermediate buffer is very small,
   370  // such as 32 or 64 elements.
   371  typedef struct PtrTarget PtrTarget;
   372  struct PtrTarget
   373  {
   374  	void *p;
   375  	uintptr ti;
   376  };
   377  
   378  typedef	struct Scanbuf Scanbuf;
   379  struct	Scanbuf
   380  {
   381  	struct {
   382  		PtrTarget *begin;
   383  		PtrTarget *end;
   384  		PtrTarget *pos;
   385  	} ptr;
   386  	struct {
   387  		Obj *begin;
   388  		Obj *end;
   389  		Obj *pos;
   390  	} obj;
   391  	Workbuf *wbuf;
   392  	Obj *wp;
   393  	uintptr nobj;
   394  };
   395  
   396  typedef struct BufferList BufferList;
   397  struct BufferList
   398  {
   399  	PtrTarget ptrtarget[IntermediateBufferCapacity];
   400  	Obj obj[IntermediateBufferCapacity];
   401  	uint32 busy;
   402  	byte pad[CacheLineSize];
   403  };
   404  static BufferList bufferList[MaxGcproc];
   405  
   406  static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
   407  
   408  // flushptrbuf moves data from the PtrTarget buffer to the work buffer.
   409  // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
   410  // while the work buffer contains blocks which have been marked
   411  // and are prepared to be scanned by the garbage collector.
   412  //
   413  // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
   414  //
   415  // A simplified drawing explaining how the todo-list moves from a structure to another:
   416  //
   417  //     scanblock
   418  //  (find pointers)
   419  //    Obj ------> PtrTarget (pointer targets)
   420  //     ↑          |
   421  //     |          |
   422  //     `----------'
   423  //     flushptrbuf
   424  //  (find block start, mark and enqueue)
   425  static void
   426  flushptrbuf(Scanbuf *sbuf)
   427  {
   428  	byte *p, *arena_start, *obj;
   429  	uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
   430  	MSpan *s;
   431  	PageID k;
   432  	Obj *wp;
   433  	Workbuf *wbuf;
   434  	PtrTarget *ptrbuf;
   435  	PtrTarget *ptrbuf_end;
   436  
   437  	arena_start = runtime_mheap.arena_start;
   438  
   439  	wp = sbuf->wp;
   440  	wbuf = sbuf->wbuf;
   441  	nobj = sbuf->nobj;
   442  
   443  	ptrbuf = sbuf->ptr.begin;
   444  	ptrbuf_end = sbuf->ptr.pos;
   445  	n = ptrbuf_end - sbuf->ptr.begin;
   446  	sbuf->ptr.pos = sbuf->ptr.begin;
   447  
   448  	if(CollectStats) {
   449  		runtime_xadd64(&gcstats.ptr.sum, n);
   450  		runtime_xadd64(&gcstats.ptr.cnt, 1);
   451  	}
   452  
   453  	// If buffer is nearly full, get a new one.
   454  	if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
   455  		if(wbuf != nil)
   456  			wbuf->nobj = nobj;
   457  		wbuf = getempty(wbuf);
   458  		wp = wbuf->obj;
   459  		nobj = 0;
   460  
   461  		if(n >= nelem(wbuf->obj))
   462  			runtime_throw("ptrbuf has to be smaller than WorkBuf");
   463  	}
   464  
   465  	while(ptrbuf < ptrbuf_end) {
   466  		obj = ptrbuf->p;
   467  		ti = ptrbuf->ti;
   468  		ptrbuf++;
   469  
   470  		// obj belongs to interval [mheap.arena_start, mheap.arena_used).
   471  		if(Debug > 1) {
   472  			if(obj < runtime_mheap.arena_start || obj >= runtime_mheap.arena_used)
   473  				runtime_throw("object is outside of mheap");
   474  		}
   475  
   476  		// obj may be a pointer to a live object.
   477  		// Try to find the beginning of the object.
   478  
   479  		// Round down to word boundary.
   480  		if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
   481  			obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
   482  			ti = 0;
   483  		}
   484  
   485  		// Find bits for this word.
   486  		off = (uintptr*)obj - (uintptr*)arena_start;
   487  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
   488  		shift = off % wordsPerBitmapWord;
   489  		xbits = *bitp;
   490  		bits = xbits >> shift;
   491  
   492  		// Pointing at the beginning of a block?
   493  		if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
   494  			if(CollectStats)
   495  				runtime_xadd64(&gcstats.flushptrbuf.foundbit, 1);
   496  			goto found;
   497  		}
   498  
   499  		ti = 0;
   500  
   501  		// Pointing just past the beginning?
   502  		// Scan backward a little to find a block boundary.
   503  		for(j=shift; j-->0; ) {
   504  			if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
   505  				obj = (byte*)obj - (shift-j)*PtrSize;
   506  				shift = j;
   507  				bits = xbits>>shift;
   508  				if(CollectStats)
   509  					runtime_xadd64(&gcstats.flushptrbuf.foundword, 1);
   510  				goto found;
   511  			}
   512  		}
   513  
   514  		// Otherwise consult span table to find beginning.
   515  		// (Manually inlined copy of MHeap_LookupMaybe.)
   516  		k = (uintptr)obj>>PageShift;
   517  		x = k;
   518  		x -= (uintptr)arena_start>>PageShift;
   519  		s = runtime_mheap.spans[x];
   520  		if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
   521  			continue;
   522  		p = (byte*)((uintptr)s->start<<PageShift);
   523  		if(s->sizeclass == 0) {
   524  			obj = p;
   525  		} else {
   526  			size = s->elemsize;
   527  			int32 i = ((byte*)obj - p)/size;
   528  			obj = p+i*size;
   529  		}
   530  
   531  		// Now that we know the object header, reload bits.
   532  		off = (uintptr*)obj - (uintptr*)arena_start;
   533  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
   534  		shift = off % wordsPerBitmapWord;
   535  		xbits = *bitp;
   536  		bits = xbits >> shift;
   537  		if(CollectStats)
   538  			runtime_xadd64(&gcstats.flushptrbuf.foundspan, 1);
   539  
   540  	found:
   541  		// Now we have bits, bitp, and shift correct for
   542  		// obj pointing at the base of the object.
   543  		// Only care about allocated and not marked.
   544  		if((bits & (bitAllocated|bitMarked)) != bitAllocated)
   545  			continue;
   546  		if(work.nproc == 1)
   547  			*bitp |= bitMarked<<shift;
   548  		else {
   549  			for(;;) {
   550  				x = *bitp;
   551  				if(x & (bitMarked<<shift))
   552  					goto continue_obj;
   553  				if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
   554  					break;
   555  			}
   556  		}
   557  
   558  		// If object has no pointers, don't need to scan further.
   559  		if((bits & bitScan) == 0)
   560  			continue;
   561  
   562  		// Ask span about size class.
   563  		// (Manually inlined copy of MHeap_Lookup.)
   564  		x = (uintptr)obj >> PageShift;
   565  		x -= (uintptr)arena_start>>PageShift;
   566  		s = runtime_mheap.spans[x];
   567  
   568  		PREFETCH(obj);
   569  
   570  		*wp = (Obj){obj, s->elemsize, ti};
   571  		wp++;
   572  		nobj++;
   573  	continue_obj:;
   574  	}
   575  
   576  	// If another proc wants a pointer, give it some.
   577  	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
   578  		wbuf->nobj = nobj;
   579  		wbuf = handoff(wbuf);
   580  		nobj = wbuf->nobj;
   581  		wp = wbuf->obj + nobj;
   582  	}
   583  
   584  	sbuf->wp = wp;
   585  	sbuf->wbuf = wbuf;
   586  	sbuf->nobj = nobj;
   587  }
   588  
   589  static void
   590  flushobjbuf(Scanbuf *sbuf)
   591  {
   592  	uintptr nobj, off;
   593  	Obj *wp, obj;
   594  	Workbuf *wbuf;
   595  	Obj *objbuf;
   596  	Obj *objbuf_end;
   597  
   598  	wp = sbuf->wp;
   599  	wbuf = sbuf->wbuf;
   600  	nobj = sbuf->nobj;
   601  
   602  	objbuf = sbuf->obj.begin;
   603  	objbuf_end = sbuf->obj.pos;
   604  	sbuf->obj.pos = sbuf->obj.begin;
   605  
   606  	while(objbuf < objbuf_end) {
   607  		obj = *objbuf++;
   608  
   609  		// Align obj.b to a word boundary.
   610  		off = (uintptr)obj.p & (PtrSize-1);
   611  		if(off != 0) {
   612  			obj.p += PtrSize - off;
   613  			obj.n -= PtrSize - off;
   614  			obj.ti = 0;
   615  		}
   616  
   617  		if(obj.p == nil || obj.n == 0)
   618  			continue;
   619  
   620  		// If buffer is full, get a new one.
   621  		if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
   622  			if(wbuf != nil)
   623  				wbuf->nobj = nobj;
   624  			wbuf = getempty(wbuf);
   625  			wp = wbuf->obj;
   626  			nobj = 0;
   627  		}
   628  
   629  		*wp = obj;
   630  		wp++;
   631  		nobj++;
   632  	}
   633  
   634  	// If another proc wants a pointer, give it some.
   635  	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
   636  		wbuf->nobj = nobj;
   637  		wbuf = handoff(wbuf);
   638  		nobj = wbuf->nobj;
   639  		wp = wbuf->obj + nobj;
   640  	}
   641  
   642  	sbuf->wp = wp;
   643  	sbuf->wbuf = wbuf;
   644  	sbuf->nobj = nobj;
   645  }
   646  
   647  // Program that scans the whole block and treats every block element as a potential pointer
   648  static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
   649  
   650  // Hchan program
   651  static uintptr chanProg[2] = {0, GC_CHAN};
   652  
   653  // Local variables of a program fragment or loop
   654  typedef struct Frame Frame;
   655  struct Frame {
   656  	uintptr count, elemsize, b;
   657  	const uintptr *loop_or_ret;
   658  };
   659  
   660  // Sanity check for the derived type info objti.
   661  static void
   662  checkptr(void *obj, uintptr objti)
   663  {
   664  	uintptr *pc1, type, tisize, i, j, x;
   665  	const uintptr *pc2;
   666  	byte *objstart;
   667  	Type *t;
   668  	MSpan *s;
   669  
   670  	if(!Debug)
   671  		runtime_throw("checkptr is debug only");
   672  
   673  	if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
   674  		return;
   675  	type = runtime_gettype(obj);
   676  	t = (Type*)(type & ~(uintptr)(PtrSize-1));
   677  	if(t == nil)
   678  		return;
   679  	x = (uintptr)obj >> PageShift;
   680  	x -= (uintptr)(runtime_mheap.arena_start)>>PageShift;
   681  	s = runtime_mheap.spans[x];
   682  	objstart = (byte*)((uintptr)s->start<<PageShift);
   683  	if(s->sizeclass != 0) {
   684  		i = ((byte*)obj - objstart)/s->elemsize;
   685  		objstart += i*s->elemsize;
   686  	}
   687  	tisize = *(uintptr*)objti;
   688  	// Sanity check for object size: it should fit into the memory block.
   689  	if((byte*)obj + tisize > objstart + s->elemsize) {
   690  		runtime_printf("object of type '%S' at %p/%p does not fit in block %p/%p\n",
   691  			       *t->string, obj, tisize, objstart, s->elemsize);
   692  		runtime_throw("invalid gc type info");
   693  	}
   694  	if(obj != objstart)
   695  		return;
   696  	// If obj points to the beginning of the memory block,
   697  	// check type info as well.
   698  	if(t->string == nil ||
   699  		// Gob allocates unsafe pointers for indirection.
   700  		(runtime_strcmp((const char *)t->string->str, (const char*)"unsafe.Pointer") &&
   701  		// Runtime and gc think differently about closures.
   702  		 runtime_strstr((const char *)t->string->str, (const char*)"struct { F uintptr") != (const char *)t->string->str)) {
   703  		pc1 = (uintptr*)objti;
   704  		pc2 = (const uintptr*)t->__gc;
   705  		// A simple best-effort check until first GC_END.
   706  		for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
   707  			if(pc1[j] != pc2[j]) {
   708  				runtime_printf("invalid gc type info for '%s', type info %p [%d]=%p, block info %p [%d]=%p\n",
   709  					       t->string ? (const int8*)t->string->str : (const int8*)"?", pc1, (int32)j, pc1[j], pc2, (int32)j, pc2[j]);
   710  				runtime_throw("invalid gc type info");
   711  			}
   712  		}
   713  	}
   714  }					
   715  
   716  // scanblock scans a block of n bytes starting at pointer b for references
   717  // to other objects, scanning any it finds recursively until there are no
   718  // unscanned objects left.  Instead of using an explicit recursion, it keeps
   719  // a work list in the Workbuf* structures and loops in the main function
   720  // body.  Keeping an explicit work list is easier on the stack allocator and
   721  // more efficient.
   722  static void
   723  scanblock(Workbuf *wbuf, bool keepworking)
   724  {
   725  	byte *b, *arena_start, *arena_used;
   726  	uintptr n, i, end_b, elemsize, size, ti, objti, count, type, nobj;
   727  	uintptr precise_type, nominal_size;
   728  	const uintptr *pc, *chan_ret;
   729  	uintptr chancap;
   730  	void *obj;
   731  	const Type *t, *et;
   732  	Slice *sliceptr;
   733  	String *stringptr;
   734  	Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
   735  	BufferList *scanbuffers;
   736  	Scanbuf sbuf;
   737  	Eface *eface;
   738  	Iface *iface;
   739  	Hchan *chan;
   740  	const ChanType *chantype;
   741  	Obj *wp;
   742  
   743  	if(sizeof(Workbuf) % WorkbufSize != 0)
   744  		runtime_throw("scanblock: size of Workbuf is suboptimal");
   745  
   746  	// Memory arena parameters.
   747  	arena_start = runtime_mheap.arena_start;
   748  	arena_used = runtime_mheap.arena_used;
   749  
   750  	stack_ptr = stack+nelem(stack)-1;
   751  
   752  	precise_type = false;
   753  	nominal_size = 0;
   754  
   755  	if(wbuf) {
   756  		nobj = wbuf->nobj;
   757  		wp = &wbuf->obj[nobj];
   758  	} else {
   759  		nobj = 0;
   760  		wp = nil;
   761  	}
   762  
   763  	// Initialize sbuf
   764  	scanbuffers = &bufferList[runtime_m()->helpgc];
   765  
   766  	sbuf.ptr.begin = sbuf.ptr.pos = &scanbuffers->ptrtarget[0];
   767  	sbuf.ptr.end = sbuf.ptr.begin + nelem(scanbuffers->ptrtarget);
   768  
   769  	sbuf.obj.begin = sbuf.obj.pos = &scanbuffers->obj[0];
   770  	sbuf.obj.end = sbuf.obj.begin + nelem(scanbuffers->obj);
   771  
   772  	sbuf.wbuf = wbuf;
   773  	sbuf.wp = wp;
   774  	sbuf.nobj = nobj;
   775  
   776  	// (Silence the compiler)
   777  	chan = nil;
   778  	chantype = nil;
   779  	chan_ret = nil;
   780  
   781  	goto next_block;
   782  
   783  	for(;;) {
   784  		// Each iteration scans the block b of length n, queueing pointers in
   785  		// the work buffer.
   786  
   787  		if(CollectStats) {
   788  			runtime_xadd64(&gcstats.nbytes, n);
   789  			runtime_xadd64(&gcstats.obj.sum, sbuf.nobj);
   790  			runtime_xadd64(&gcstats.obj.cnt, 1);
   791  		}
   792  
   793  		if(ti != 0) {
   794  			if(Debug > 1) {
   795  				runtime_printf("scanblock %p %D ti %p\n", b, (int64)n, ti);
   796  			}
   797  			pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
   798  			precise_type = (ti & PRECISE);
   799  			stack_top.elemsize = pc[0];
   800  			if(!precise_type)
   801  				nominal_size = pc[0];
   802  			if(ti & LOOP) {
   803  				stack_top.count = 0;	// 0 means an infinite number of iterations
   804  				stack_top.loop_or_ret = pc+1;
   805  			} else {
   806  				stack_top.count = 1;
   807  			}
   808  			if(Debug) {
   809  				// Simple sanity check for provided type info ti:
   810  				// The declared size of the object must be not larger than the actual size
   811  				// (it can be smaller due to inferior pointers).
   812  				// It's difficult to make a comprehensive check due to inferior pointers,
   813  				// reflection, gob, etc.
   814  				if(pc[0] > n) {
   815  					runtime_printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n);
   816  					runtime_throw("invalid gc type info");
   817  				}
   818  			}
   819  		} else if(UseSpanType) {
   820  			if(CollectStats)
   821  				runtime_xadd64(&gcstats.obj.notype, 1);
   822  
   823  			type = runtime_gettype(b);
   824  			if(type != 0) {
   825  				if(CollectStats)
   826  					runtime_xadd64(&gcstats.obj.typelookup, 1);
   827  
   828  				t = (Type*)(type & ~(uintptr)(PtrSize-1));
   829  				switch(type & (PtrSize-1)) {
   830  				case TypeInfo_SingleObject:
   831  					pc = (const uintptr*)t->__gc;
   832  					precise_type = true;  // type information about 'b' is precise
   833  					stack_top.count = 1;
   834  					stack_top.elemsize = pc[0];
   835  					break;
   836  				case TypeInfo_Array:
   837  					pc = (const uintptr*)t->__gc;
   838  					if(pc[0] == 0)
   839  						goto next_block;
   840  					precise_type = true;  // type information about 'b' is precise
   841  					stack_top.count = 0;  // 0 means an infinite number of iterations
   842  					stack_top.elemsize = pc[0];
   843  					stack_top.loop_or_ret = pc+1;
   844  					break;
   845  				case TypeInfo_Chan:
   846  					chan = (Hchan*)b;
   847  					chantype = (const ChanType*)t;
   848  					chan_ret = nil;
   849  					pc = chanProg;
   850  					break;
   851  				default:
   852  					if(Debug > 1)
   853  						runtime_printf("scanblock %p %D type %p %S\n", b, (int64)n, type, *t->string);
   854  					runtime_throw("scanblock: invalid type");
   855  					return;
   856  				}
   857  				if(Debug > 1)
   858  					runtime_printf("scanblock %p %D type %p %S pc=%p\n", b, (int64)n, type, *t->string, pc);
   859  			} else {
   860  				pc = defaultProg;
   861  				if(Debug > 1)
   862  					runtime_printf("scanblock %p %D unknown type\n", b, (int64)n);
   863  			}
   864  		} else {
   865  			pc = defaultProg;
   866  			if(Debug > 1)
   867  				runtime_printf("scanblock %p %D no span types\n", b, (int64)n);
   868  		}
   869  
   870  		if(IgnorePreciseGC)
   871  			pc = defaultProg;
   872  
   873  		pc++;
   874  		stack_top.b = (uintptr)b;
   875  		end_b = (uintptr)b + n - PtrSize;
   876  
   877  	for(;;) {
   878  		if(CollectStats)
   879  			runtime_xadd64(&gcstats.instr[pc[0]], 1);
   880  
   881  		obj = nil;
   882  		objti = 0;
   883  		switch(pc[0]) {
   884  		case GC_PTR:
   885  			obj = *(void**)(stack_top.b + pc[1]);
   886  			objti = pc[2];
   887  			if(Debug > 2)
   888  				runtime_printf("gc_ptr @%p: %p ti=%p\n", stack_top.b+pc[1], obj, objti);
   889  			pc += 3;
   890  			if(Debug)
   891  				checkptr(obj, objti);
   892  			break;
   893  
   894  		case GC_SLICE:
   895  			sliceptr = (Slice*)(stack_top.b + pc[1]);
   896  			if(Debug > 2)
   897  				runtime_printf("gc_slice @%p: %p/%D/%D\n", sliceptr, sliceptr->array, (int64)sliceptr->__count, (int64)sliceptr->cap);
   898  			if(sliceptr->cap != 0) {
   899  				obj = sliceptr->array;
   900  				// Can't use slice element type for scanning,
   901  				// because if it points to an array embedded
   902  				// in the beginning of a struct,
   903  				// we will scan the whole struct as the slice.
   904  				// So just obtain type info from heap.
   905  			}
   906  			pc += 3;
   907  			break;
   908  
   909  		case GC_APTR:
   910  			obj = *(void**)(stack_top.b + pc[1]);
   911  			if(Debug > 2)
   912  				runtime_printf("gc_aptr @%p: %p\n", stack_top.b+pc[1], obj);
   913  			pc += 2;
   914  			break;
   915  
   916  		case GC_STRING:
   917  			stringptr = (String*)(stack_top.b + pc[1]);
   918  			if(Debug > 2)
   919  				runtime_printf("gc_string @%p: %p/%D\n", stack_top.b+pc[1], stringptr->str, (int64)stringptr->len);
   920  			if(stringptr->len != 0)
   921  				markonly(stringptr->str);
   922  			pc += 2;
   923  			continue;
   924  
   925  		case GC_EFACE:
   926  			eface = (Eface*)(stack_top.b + pc[1]);
   927  			pc += 2;
   928  			if(Debug > 2)
   929  				runtime_printf("gc_eface @%p: %p %p\n", stack_top.b+pc[1], eface->__type_descriptor, eface->__object);
   930  			if(eface->__type_descriptor == nil)
   931  				continue;
   932  
   933  			// eface->type
   934  			t = eface->__type_descriptor;
   935  			if((const byte*)t >= arena_start && (const byte*)t < arena_used) {
   936  				union { const Type *tc; Type *tr; } u;
   937  				u.tc = t;
   938  				*sbuf.ptr.pos++ = (PtrTarget){u.tr, 0};
   939  				if(sbuf.ptr.pos == sbuf.ptr.end)
   940  					flushptrbuf(&sbuf);
   941  			}
   942  
   943  			// eface->__object
   944  			if((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used) {
   945  				if(__go_is_pointer_type(t)) {
   946  					if((t->__code & KindNoPointers))
   947  						continue;
   948  
   949  					obj = eface->__object;
   950  					if((t->__code & kindMask) == KindPtr) {
   951  						// Only use type information if it is a pointer-containing type.
   952  						// This matches the GC programs written by cmd/gc/reflect.c's
   953  						// dgcsym1 in case TPTR32/case TPTR64. See rationale there.
   954  						et = ((const PtrType*)t)->elem;
   955  						if(!(et->__code & KindNoPointers))
   956  							objti = (uintptr)((const PtrType*)t)->elem->__gc;
   957  					}
   958  				} else {
   959  					obj = eface->__object;
   960  					objti = (uintptr)t->__gc;
   961  				}
   962  			}
   963  			break;
   964  
   965  		case GC_IFACE:
   966  			iface = (Iface*)(stack_top.b + pc[1]);
   967  			pc += 2;
   968  			if(Debug > 2)
   969  				runtime_printf("gc_iface @%p: %p/%p %p\n", stack_top.b+pc[1], iface->__methods[0], nil, iface->__object);
   970  			if(iface->tab == nil)
   971  				continue;
   972  			
   973  			// iface->tab
   974  			if((byte*)iface->tab >= arena_start && (byte*)iface->tab < arena_used) {
   975  				*sbuf.ptr.pos++ = (PtrTarget){iface->tab, 0};
   976  				if(sbuf.ptr.pos == sbuf.ptr.end)
   977  					flushptrbuf(&sbuf);
   978  			}
   979  
   980  			// iface->data
   981  			if((byte*)iface->__object >= arena_start && (byte*)iface->__object < arena_used) {
   982  				t = (const Type*)iface->tab[0];
   983  				if(__go_is_pointer_type(t)) {
   984  					if((t->__code & KindNoPointers))
   985  						continue;
   986  
   987  					obj = iface->__object;
   988  					if((t->__code & kindMask) == KindPtr) {
   989  						// Only use type information if it is a pointer-containing type.
   990  						// This matches the GC programs written by cmd/gc/reflect.c's
   991  						// dgcsym1 in case TPTR32/case TPTR64. See rationale there.
   992  						et = ((const PtrType*)t)->elem;
   993  						if(!(et->__code & KindNoPointers))
   994  							objti = (uintptr)((const PtrType*)t)->elem->__gc;
   995  					}
   996  				} else {
   997  					obj = iface->__object;
   998  					objti = (uintptr)t->__gc;
   999  				}
  1000  			}
  1001  			break;
  1002  
  1003  		case GC_DEFAULT_PTR:
  1004  			while(stack_top.b <= end_b) {
  1005  				obj = *(byte**)stack_top.b;
  1006  				if(Debug > 2)
  1007  					runtime_printf("gc_default_ptr @%p: %p\n", stack_top.b, obj);
  1008  				stack_top.b += PtrSize;
  1009  				if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
  1010  					*sbuf.ptr.pos++ = (PtrTarget){obj, 0};
  1011  					if(sbuf.ptr.pos == sbuf.ptr.end)
  1012  						flushptrbuf(&sbuf);
  1013  				}
  1014  			}
  1015  			goto next_block;
  1016  
  1017  		case GC_END:
  1018  			if(--stack_top.count != 0) {
  1019  				// Next iteration of a loop if possible.
  1020  				stack_top.b += stack_top.elemsize;
  1021  				if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) {
  1022  					pc = stack_top.loop_or_ret;
  1023  					continue;
  1024  				}
  1025  				i = stack_top.b;
  1026  			} else {
  1027  				// Stack pop if possible.
  1028  				if(stack_ptr+1 < stack+nelem(stack)) {
  1029  					pc = stack_top.loop_or_ret;
  1030  					stack_top = *(++stack_ptr);
  1031  					continue;
  1032  				}
  1033  				i = (uintptr)b + nominal_size;
  1034  			}
  1035  			if(!precise_type) {
  1036  				// Quickly scan [b+i,b+n) for possible pointers.
  1037  				for(; i<=end_b; i+=PtrSize) {
  1038  					if(*(byte**)i != nil) {
  1039  						// Found a value that may be a pointer.
  1040  						// Do a rescan of the entire block.
  1041  						enqueue((Obj){b, n, 0}, &sbuf.wbuf, &sbuf.wp, &sbuf.nobj);
  1042  						if(CollectStats) {
  1043  							runtime_xadd64(&gcstats.rescan, 1);
  1044  							runtime_xadd64(&gcstats.rescanbytes, n);
  1045  						}
  1046  						break;
  1047  					}
  1048  				}
  1049  			}
  1050  			goto next_block;
  1051  
  1052  		case GC_ARRAY_START:
  1053  			i = stack_top.b + pc[1];
  1054  			count = pc[2];
  1055  			elemsize = pc[3];
  1056  			pc += 4;
  1057  
  1058  			// Stack push.
  1059  			*stack_ptr-- = stack_top;
  1060  			stack_top = (Frame){count, elemsize, i, pc};
  1061  			continue;
  1062  
  1063  		case GC_ARRAY_NEXT:
  1064  			if(--stack_top.count != 0) {
  1065  				stack_top.b += stack_top.elemsize;
  1066  				pc = stack_top.loop_or_ret;
  1067  			} else {
  1068  				// Stack pop.
  1069  				stack_top = *(++stack_ptr);
  1070  				pc += 1;
  1071  			}
  1072  			continue;
  1073  
  1074  		case GC_CALL:
  1075  			// Stack push.
  1076  			*stack_ptr-- = stack_top;
  1077  			stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
  1078  			pc = (const uintptr*)((const byte*)pc + *(const int32*)(pc+2));  // target of the CALL instruction
  1079  			continue;
  1080  
  1081  		case GC_REGION:
  1082  			obj = (void*)(stack_top.b + pc[1]);
  1083  			size = pc[2];
  1084  			objti = pc[3];
  1085  			pc += 4;
  1086  
  1087  			if(Debug > 2)
  1088  				runtime_printf("gc_region @%p: %D %p\n", stack_top.b+pc[1], (int64)size, objti);
  1089  			*sbuf.obj.pos++ = (Obj){obj, size, objti};
  1090  			if(sbuf.obj.pos == sbuf.obj.end)
  1091  				flushobjbuf(&sbuf);
  1092  			continue;
  1093  
  1094  		case GC_CHAN_PTR:
  1095  			chan = *(Hchan**)(stack_top.b + pc[1]);
  1096  			if(Debug > 2 && chan != nil)
  1097  				runtime_printf("gc_chan_ptr @%p: %p/%D/%D %p\n", stack_top.b+pc[1], chan, (int64)chan->qcount, (int64)chan->dataqsiz, pc[2]);
  1098  			if(chan == nil) {
  1099  				pc += 3;
  1100  				continue;
  1101  			}
  1102  			if(markonly(chan)) {
  1103  				chantype = (ChanType*)pc[2];
  1104  				if(!(chantype->elem->__code & KindNoPointers)) {
  1105  					// Start chanProg.
  1106  					chan_ret = pc+3;
  1107  					pc = chanProg+1;
  1108  					continue;
  1109  				}
  1110  			}
  1111  			pc += 3;
  1112  			continue;
  1113  
  1114  		case GC_CHAN:
  1115  			// There are no heap pointers in struct Hchan,
  1116  			// so we can ignore the leading sizeof(Hchan) bytes.
  1117  			if(!(chantype->elem->__code & KindNoPointers)) {
  1118  				// Channel's buffer follows Hchan immediately in memory.
  1119  				// Size of buffer (cap(c)) is second int in the chan struct.
  1120  				chancap = ((uintgo*)chan)[1];
  1121  				if(chancap > 0) {
  1122  					// TODO(atom): split into two chunks so that only the
  1123  					// in-use part of the circular buffer is scanned.
  1124  					// (Channel routines zero the unused part, so the current
  1125  					// code does not lead to leaks, it's just a little inefficient.)
  1126  					*sbuf.obj.pos++ = (Obj){(byte*)chan+runtime_Hchansize, chancap*chantype->elem->__size,
  1127  						(uintptr)chantype->elem->__gc | PRECISE | LOOP};
  1128  					if(sbuf.obj.pos == sbuf.obj.end)
  1129  						flushobjbuf(&sbuf);
  1130  				}
  1131  			}
  1132  			if(chan_ret == nil)
  1133  				goto next_block;
  1134  			pc = chan_ret;
  1135  			continue;
  1136  
  1137  		default:
  1138  			runtime_printf("runtime: invalid GC instruction %p at %p\n", pc[0], pc);
  1139  			runtime_throw("scanblock: invalid GC instruction");
  1140  			return;
  1141  		}
  1142  
  1143  		if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
  1144  			*sbuf.ptr.pos++ = (PtrTarget){obj, objti};
  1145  			if(sbuf.ptr.pos == sbuf.ptr.end)
  1146  				flushptrbuf(&sbuf);
  1147  		}
  1148  	}
  1149  
  1150  	next_block:
  1151  		// Done scanning [b, b+n).  Prepare for the next iteration of
  1152  		// the loop by setting b, n, ti to the parameters for the next block.
  1153  
  1154  		if(sbuf.nobj == 0) {
  1155  			flushptrbuf(&sbuf);
  1156  			flushobjbuf(&sbuf);
  1157  
  1158  			if(sbuf.nobj == 0) {
  1159  				if(!keepworking) {
  1160  					if(sbuf.wbuf)
  1161  						putempty(sbuf.wbuf);
  1162  					return;
  1163  				}
  1164  				// Emptied our buffer: refill.
  1165  				sbuf.wbuf = getfull(sbuf.wbuf);
  1166  				if(sbuf.wbuf == nil)
  1167  					return;
  1168  				sbuf.nobj = sbuf.wbuf->nobj;
  1169  				sbuf.wp = sbuf.wbuf->obj + sbuf.wbuf->nobj;
  1170  			}
  1171  		}
  1172  
  1173  		// Fetch b from the work buffer.
  1174  		--sbuf.wp;
  1175  		b = sbuf.wp->p;
  1176  		n = sbuf.wp->n;
  1177  		ti = sbuf.wp->ti;
  1178  		sbuf.nobj--;
  1179  	}
  1180  }
  1181  
  1182  static struct root_list* roots;
  1183  
  1184  void
  1185  __go_register_gc_roots (struct root_list* r)
  1186  {
  1187  	// FIXME: This needs locking if multiple goroutines can call
  1188  	// dlopen simultaneously.
  1189  	r->next = roots;
  1190  	roots = r;
  1191  }
  1192  
  1193  // Append obj to the work buffer.
  1194  // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
  1195  static void
  1196  enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
  1197  {
  1198  	uintptr nobj, off;
  1199  	Obj *wp;
  1200  	Workbuf *wbuf;
  1201  
  1202  	if(Debug > 1)
  1203  		runtime_printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
  1204  
  1205  	// Align obj.b to a word boundary.
  1206  	off = (uintptr)obj.p & (PtrSize-1);
  1207  	if(off != 0) {
  1208  		obj.p += PtrSize - off;
  1209  		obj.n -= PtrSize - off;
  1210  		obj.ti = 0;
  1211  	}
  1212  
  1213  	if(obj.p == nil || obj.n == 0)
  1214  		return;
  1215  
  1216  	// Load work buffer state
  1217  	wp = *_wp;
  1218  	wbuf = *_wbuf;
  1219  	nobj = *_nobj;
  1220  
  1221  	// If another proc wants a pointer, give it some.
  1222  	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
  1223  		wbuf->nobj = nobj;
  1224  		wbuf = handoff(wbuf);
  1225  		nobj = wbuf->nobj;
  1226  		wp = wbuf->obj + nobj;
  1227  	}
  1228  
  1229  	// If buffer is full, get a new one.
  1230  	if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
  1231  		if(wbuf != nil)
  1232  			wbuf->nobj = nobj;
  1233  		wbuf = getempty(wbuf);
  1234  		wp = wbuf->obj;
  1235  		nobj = 0;
  1236  	}
  1237  
  1238  	*wp = obj;
  1239  	wp++;
  1240  	nobj++;
  1241  
  1242  	// Save work buffer state
  1243  	*_wp = wp;
  1244  	*_wbuf = wbuf;
  1245  	*_nobj = nobj;
  1246  }
  1247  
  1248  static void
  1249  enqueue1(Workbuf **wbufp, Obj obj)
  1250  {
  1251  	Workbuf *wbuf;
  1252  
  1253  	wbuf = *wbufp;
  1254  	if(wbuf->nobj >= nelem(wbuf->obj))
  1255  		*wbufp = wbuf = getempty(wbuf);
  1256  	wbuf->obj[wbuf->nobj++] = obj;
  1257  }
  1258  
  1259  static void
  1260  markroot(ParFor *desc, uint32 i)
  1261  {
  1262  	Workbuf *wbuf;
  1263  	FinBlock *fb;
  1264  	MHeap *h;
  1265  	MSpan **allspans, *s;
  1266  	uint32 spanidx, sg;
  1267  	G *gp;
  1268  	void *p;
  1269  
  1270  	USED(&desc);
  1271  	wbuf = getempty(nil);
  1272  	// Note: if you add a case here, please also update heapdump.c:dumproots.
  1273  	switch(i) {
  1274  	case RootData:
  1275  		// For gccgo this is both data and bss.
  1276  		{
  1277  			struct root_list *pl;
  1278  
  1279  			for(pl = roots; pl != nil; pl = pl->next) {
  1280  				struct root *pr = &pl->roots[0];
  1281  				while(1) {
  1282  					void *decl = pr->decl;
  1283  					if(decl == nil)
  1284  						break;
  1285  					enqueue1(&wbuf, (Obj){decl, pr->size, 0});
  1286  					pr++;
  1287  				}
  1288  			}
  1289  		}
  1290  		break;
  1291  
  1292  	case RootBss:
  1293  		// For gccgo we use this for all the other global roots.
  1294  		enqueue1(&wbuf, (Obj){(byte*)&runtime_m0, sizeof runtime_m0, 0});
  1295  		enqueue1(&wbuf, (Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0});
  1296  		enqueue1(&wbuf, (Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0});
  1297  		enqueue1(&wbuf, (Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
  1298  		enqueue1(&wbuf, (Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
  1299  		enqueue1(&wbuf, (Obj){(byte*)&work, sizeof work, 0});
  1300  		runtime_proc_scan(&wbuf, enqueue1);
  1301  		runtime_MProf_Mark(&wbuf, enqueue1);
  1302  		runtime_time_scan(&wbuf, enqueue1);
  1303  		runtime_netpoll_scan(&wbuf, enqueue1);
  1304  		break;
  1305  
  1306  	case RootFinalizers:
  1307  		for(fb=allfin; fb; fb=fb->alllink)
  1308  			enqueue1(&wbuf, (Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
  1309  		break;
  1310  
  1311  	case RootSpanTypes:
  1312  		// mark span types and MSpan.specials (to walk spans only once)
  1313  		h = &runtime_mheap;
  1314  		sg = h->sweepgen;
  1315  		allspans = h->allspans;
  1316  		for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
  1317  			Special *sp;
  1318  			SpecialFinalizer *spf;
  1319  
  1320  			s = allspans[spanidx];
  1321  			if(s->sweepgen != sg) {
  1322  				runtime_printf("sweep %d %d\n", s->sweepgen, sg);
  1323  				runtime_throw("gc: unswept span");
  1324  			}
  1325  			if(s->state != MSpanInUse)
  1326  				continue;
  1327  			// The garbage collector ignores type pointers stored in MSpan.types:
  1328  			//  - Compiler-generated types are stored outside of heap.
  1329  			//  - The reflect package has runtime-generated types cached in its data structures.
  1330  			//    The garbage collector relies on finding the references via that cache.
  1331  			if(s->types.compression == MTypes_Words || s->types.compression == MTypes_Bytes)
  1332  				markonly((byte*)s->types.data);
  1333  			for(sp = s->specials; sp != nil; sp = sp->next) {
  1334  				if(sp->kind != KindSpecialFinalizer)
  1335  					continue;
  1336  				// don't mark finalized object, but scan it so we
  1337  				// retain everything it points to.
  1338  				spf = (SpecialFinalizer*)sp;
  1339  				// A finalizer can be set for an inner byte of an object, find object beginning.
  1340  				p = (void*)((s->start << PageShift) + spf->special.offset/s->elemsize*s->elemsize);
  1341  				enqueue1(&wbuf, (Obj){p, s->elemsize, 0});
  1342  				enqueue1(&wbuf, (Obj){(void*)&spf->fn, PtrSize, 0});
  1343  				enqueue1(&wbuf, (Obj){(void*)&spf->ft, PtrSize, 0});
  1344  				enqueue1(&wbuf, (Obj){(void*)&spf->ot, PtrSize, 0});
  1345  			}
  1346  		}
  1347  		break;
  1348  
  1349  	case RootFlushCaches:
  1350  		flushallmcaches();
  1351  		break;
  1352  
  1353  	default:
  1354  		// the rest is scanning goroutine stacks
  1355  		if(i - RootCount >= runtime_allglen)
  1356  			runtime_throw("markroot: bad index");
  1357  		gp = runtime_allg[i - RootCount];
  1358  		// remember when we've first observed the G blocked
  1359  		// needed only to output in traceback
  1360  		if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince == 0)
  1361  			gp->waitsince = work.tstart;
  1362  		addstackroots(gp, &wbuf);
  1363  		break;
  1364  		
  1365  	}
  1366  
  1367  	if(wbuf)
  1368  		scanblock(wbuf, false);
  1369  }
  1370  
  1371  static const FuncVal markroot_funcval = { (void *) markroot };
  1372  
  1373  // Get an empty work buffer off the work.empty list,
  1374  // allocating new buffers as needed.
  1375  static Workbuf*
  1376  getempty(Workbuf *b)
  1377  {
  1378  	if(b != nil)
  1379  		runtime_lfstackpush(&work.full, &b->node);
  1380  	b = (Workbuf*)runtime_lfstackpop(&work.empty);
  1381  	if(b == nil) {
  1382  		// Need to allocate.
  1383  		runtime_lock(&work.lock);
  1384  		if(work.nchunk < sizeof *b) {
  1385  			work.nchunk = 1<<20;
  1386  			work.chunk = runtime_SysAlloc(work.nchunk, &mstats.gc_sys);
  1387  			if(work.chunk == nil)
  1388  				runtime_throw("runtime: cannot allocate memory");
  1389  		}
  1390  		b = (Workbuf*)work.chunk;
  1391  		work.chunk += sizeof *b;
  1392  		work.nchunk -= sizeof *b;
  1393  		runtime_unlock(&work.lock);
  1394  	}
  1395  	b->nobj = 0;
  1396  	return b;
  1397  }
  1398  
  1399  static void
  1400  putempty(Workbuf *b)
  1401  {
  1402  	if(CollectStats)
  1403  		runtime_xadd64(&gcstats.putempty, 1);
  1404  
  1405  	runtime_lfstackpush(&work.empty, &b->node);
  1406  }
  1407  
  1408  // Get a full work buffer off the work.full list, or return nil.
  1409  static Workbuf*
  1410  getfull(Workbuf *b)
  1411  {
  1412  	M *m;
  1413  	int32 i;
  1414  
  1415  	if(CollectStats)
  1416  		runtime_xadd64(&gcstats.getfull, 1);
  1417  
  1418  	if(b != nil)
  1419  		runtime_lfstackpush(&work.empty, &b->node);
  1420  	b = (Workbuf*)runtime_lfstackpop(&work.full);
  1421  	if(b != nil || work.nproc == 1)
  1422  		return b;
  1423  
  1424  	m = runtime_m();
  1425  	runtime_xadd(&work.nwait, +1);
  1426  	for(i=0;; i++) {
  1427  		if(work.full != 0) {
  1428  			runtime_xadd(&work.nwait, -1);
  1429  			b = (Workbuf*)runtime_lfstackpop(&work.full);
  1430  			if(b != nil)
  1431  				return b;
  1432  			runtime_xadd(&work.nwait, +1);
  1433  		}
  1434  		if(work.nwait == work.nproc)
  1435  			return nil;
  1436  		if(i < 10) {
  1437  			m->gcstats.nprocyield++;
  1438  			runtime_procyield(20);
  1439  		} else if(i < 20) {
  1440  			m->gcstats.nosyield++;
  1441  			runtime_osyield();
  1442  		} else {
  1443  			m->gcstats.nsleep++;
  1444  			runtime_usleep(100);
  1445  		}
  1446  	}
  1447  }
  1448  
  1449  static Workbuf*
  1450  handoff(Workbuf *b)
  1451  {
  1452  	M *m;
  1453  	int32 n;
  1454  	Workbuf *b1;
  1455  
  1456  	m = runtime_m();
  1457  
  1458  	// Make new buffer with half of b's pointers.
  1459  	b1 = getempty(nil);
  1460  	n = b->nobj/2;
  1461  	b->nobj -= n;
  1462  	b1->nobj = n;
  1463  	runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
  1464  	m->gcstats.nhandoff++;
  1465  	m->gcstats.nhandoffcnt += n;
  1466  
  1467  	// Put b on full list - let first half of b get stolen.
  1468  	runtime_lfstackpush(&work.full, &b->node);
  1469  	return b1;
  1470  }
  1471  
  1472  static void
  1473  addstackroots(G *gp, Workbuf **wbufp)
  1474  {
  1475  	switch(gp->status){
  1476  	default:
  1477  		runtime_printf("unexpected G.status %d (goroutine %p %D)\n", gp->status, gp, gp->goid);
  1478  		runtime_throw("mark - bad status");
  1479  	case Gdead:
  1480  		return;
  1481  	case Grunning:
  1482  		runtime_throw("mark - world not stopped");
  1483  	case Grunnable:
  1484  	case Gsyscall:
  1485  	case Gwaiting:
  1486  		break;
  1487  	}
  1488  
  1489  #ifdef USING_SPLIT_STACK
  1490  	M *mp;
  1491  	void* sp;
  1492  	size_t spsize;
  1493  	void* next_segment;
  1494  	void* next_sp;
  1495  	void* initial_sp;
  1496  
  1497  	if(gp == runtime_g()) {
  1498  		// Scanning our own stack.
  1499  		sp = __splitstack_find(nil, nil, &spsize, &next_segment,
  1500  				       &next_sp, &initial_sp);
  1501  	} else if((mp = gp->m) != nil && mp->helpgc) {
  1502  		// gchelper's stack is in active use and has no interesting pointers.
  1503  		return;
  1504  	} else {
  1505  		// Scanning another goroutine's stack.
  1506  		// The goroutine is usually asleep (the world is stopped).
  1507  
  1508  		// The exception is that if the goroutine is about to enter or might
  1509  		// have just exited a system call, it may be executing code such
  1510  		// as schedlock and may have needed to start a new stack segment.
  1511  		// Use the stack segment and stack pointer at the time of
  1512  		// the system call instead, since that won't change underfoot.
  1513  		if(gp->gcstack != nil) {
  1514  			sp = gp->gcstack;
  1515  			spsize = gp->gcstack_size;
  1516  			next_segment = gp->gcnext_segment;
  1517  			next_sp = gp->gcnext_sp;
  1518  			initial_sp = gp->gcinitial_sp;
  1519  		} else {
  1520  			sp = __splitstack_find_context(&gp->stack_context[0],
  1521  						       &spsize, &next_segment,
  1522  						       &next_sp, &initial_sp);
  1523  		}
  1524  	}
  1525  	if(sp != nil) {
  1526  		enqueue1(wbufp, (Obj){sp, spsize, 0});
  1527  		while((sp = __splitstack_find(next_segment, next_sp,
  1528  					      &spsize, &next_segment,
  1529  					      &next_sp, &initial_sp)) != nil)
  1530  			enqueue1(wbufp, (Obj){sp, spsize, 0});
  1531  	}
  1532  #else
  1533  	M *mp;
  1534  	byte* bottom;
  1535  	byte* top;
  1536  
  1537  	if(gp == runtime_g()) {
  1538  		// Scanning our own stack.
  1539  		bottom = (byte*)&gp;
  1540  	} else if((mp = gp->m) != nil && mp->helpgc) {
  1541  		// gchelper's stack is in active use and has no interesting pointers.
  1542  		return;
  1543  	} else {
  1544  		// Scanning another goroutine's stack.
  1545  		// The goroutine is usually asleep (the world is stopped).
  1546  		bottom = (byte*)gp->gcnext_sp;
  1547  		if(bottom == nil)
  1548  			return;
  1549  	}
  1550  	top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
  1551  	if(top > bottom)
  1552  		enqueue1(wbufp, (Obj){bottom, top - bottom, 0});
  1553  	else
  1554  		enqueue1(wbufp, (Obj){top, bottom - top, 0});
  1555  #endif
  1556  }
  1557  
  1558  void
  1559  runtime_queuefinalizer(void *p, FuncVal *fn, const FuncType *ft, const PtrType *ot)
  1560  {
  1561  	FinBlock *block;
  1562  	Finalizer *f;
  1563  
  1564  	runtime_lock(&finlock);
  1565  	if(finq == nil || finq->cnt == finq->cap) {
  1566  		if(finc == nil) {
  1567  			finc = runtime_persistentalloc(FinBlockSize, 0, &mstats.gc_sys);
  1568  			finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
  1569  			finc->alllink = allfin;
  1570  			allfin = finc;
  1571  		}
  1572  		block = finc;
  1573  		finc = block->next;
  1574  		block->next = finq;
  1575  		finq = block;
  1576  	}
  1577  	f = &finq->fin[finq->cnt];
  1578  	finq->cnt++;
  1579  	f->fn = fn;
  1580  	f->ft = ft;
  1581  	f->ot = ot;
  1582  	f->arg = p;
  1583  	runtime_fingwake = true;
  1584  	runtime_unlock(&finlock);
  1585  }
  1586  
  1587  void
  1588  runtime_iterate_finq(void (*callback)(FuncVal*, void*, const FuncType*, const PtrType*))
  1589  {
  1590  	FinBlock *fb;
  1591  	Finalizer *f;
  1592  	int32 i;
  1593  
  1594  	for(fb = allfin; fb; fb = fb->alllink) {
  1595  		for(i = 0; i < fb->cnt; i++) {
  1596  			f = &fb->fin[i];
  1597  			callback(f->fn, f->arg, f->ft, f->ot);
  1598  		}
  1599  	}
  1600  }
  1601  
  1602  void
  1603  runtime_MSpan_EnsureSwept(MSpan *s)
  1604  {
  1605  	M *m = runtime_m();
  1606  	G *g = runtime_g();
  1607  	uint32 sg;
  1608  
  1609  	// Caller must disable preemption.
  1610  	// Otherwise when this function returns the span can become unswept again
  1611  	// (if GC is triggered on another goroutine).
  1612  	if(m->locks == 0 && m->mallocing == 0 && g != m->g0)
  1613  		runtime_throw("MSpan_EnsureSwept: m is not locked");
  1614  
  1615  	sg = runtime_mheap.sweepgen;
  1616  	if(runtime_atomicload(&s->sweepgen) == sg)
  1617  		return;
  1618  	if(runtime_cas(&s->sweepgen, sg-2, sg-1)) {
  1619  		runtime_MSpan_Sweep(s);
  1620  		return;
  1621  	}
  1622  	// unfortunate condition, and we don't have efficient means to wait
  1623  	while(runtime_atomicload(&s->sweepgen) != sg)
  1624  		runtime_osyield();  
  1625  }
  1626  
  1627  // Sweep frees or collects finalizers for blocks not marked in the mark phase.
  1628  // It clears the mark bits in preparation for the next GC round.
  1629  // Returns true if the span was returned to heap.
  1630  bool
  1631  runtime_MSpan_Sweep(MSpan *s)
  1632  {
  1633  	M *m;
  1634  	int32 cl, n, npages, nfree;
  1635  	uintptr size, off, *bitp, shift, bits;
  1636  	uint32 sweepgen;
  1637  	byte *p;
  1638  	MCache *c;
  1639  	byte *arena_start;
  1640  	MLink head, *end;
  1641  	byte *type_data;
  1642  	byte compression;
  1643  	uintptr type_data_inc;
  1644  	MLink *x;
  1645  	Special *special, **specialp, *y;
  1646  	bool res, sweepgenset;
  1647  
  1648  	m = runtime_m();
  1649  
  1650  	// It's critical that we enter this function with preemption disabled,
  1651  	// GC must not start while we are in the middle of this function.
  1652  	if(m->locks == 0 && m->mallocing == 0 && runtime_g() != m->g0)
  1653  		runtime_throw("MSpan_Sweep: m is not locked");
  1654  	sweepgen = runtime_mheap.sweepgen;
  1655  	if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
  1656  		runtime_printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
  1657  			s->state, s->sweepgen, sweepgen);
  1658  		runtime_throw("MSpan_Sweep: bad span state");
  1659  	}
  1660  	arena_start = runtime_mheap.arena_start;
  1661  	cl = s->sizeclass;
  1662  	size = s->elemsize;
  1663  	if(cl == 0) {
  1664  		n = 1;
  1665  	} else {
  1666  		// Chunk full of small blocks.
  1667  		npages = runtime_class_to_allocnpages[cl];
  1668  		n = (npages << PageShift) / size;
  1669  	}
  1670  	res = false;
  1671  	nfree = 0;
  1672  	end = &head;
  1673  	c = m->mcache;
  1674  	sweepgenset = false;
  1675  
  1676  	// mark any free objects in this span so we don't collect them
  1677  	for(x = s->freelist; x != nil; x = x->next) {
  1678  		// This is markonly(x) but faster because we don't need
  1679  		// atomic access and we're guaranteed to be pointing at
  1680  		// the head of a valid object.
  1681  		off = (uintptr*)x - (uintptr*)runtime_mheap.arena_start;
  1682  		bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  1683  		shift = off % wordsPerBitmapWord;
  1684  		*bitp |= bitMarked<<shift;
  1685  	}
  1686  
  1687  	// Unlink & free special records for any objects we're about to free.
  1688  	specialp = &s->specials;
  1689  	special = *specialp;
  1690  	while(special != nil) {
  1691  		// A finalizer can be set for an inner byte of an object, find object beginning.
  1692  		p = (byte*)(s->start << PageShift) + special->offset/size*size;
  1693  		off = (uintptr*)p - (uintptr*)arena_start;
  1694  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1695  		shift = off % wordsPerBitmapWord;
  1696  		bits = *bitp>>shift;
  1697  		if((bits & (bitAllocated|bitMarked)) == bitAllocated) {
  1698  			// Find the exact byte for which the special was setup
  1699  			// (as opposed to object beginning).
  1700  			p = (byte*)(s->start << PageShift) + special->offset;
  1701  			// about to free object: splice out special record
  1702  			y = special;
  1703  			special = special->next;
  1704  			*specialp = special;
  1705  			if(!runtime_freespecial(y, p, size, false)) {
  1706  				// stop freeing of object if it has a finalizer
  1707  				*bitp |= bitMarked << shift;
  1708  			}
  1709  		} else {
  1710  			// object is still live: keep special record
  1711  			specialp = &special->next;
  1712  			special = *specialp;
  1713  		}
  1714  	}
  1715  
  1716  	type_data = (byte*)s->types.data;
  1717  	type_data_inc = sizeof(uintptr);
  1718  	compression = s->types.compression;
  1719  	switch(compression) {
  1720  	case MTypes_Bytes:
  1721  		type_data += 8*sizeof(uintptr);
  1722  		type_data_inc = 1;
  1723  		break;
  1724  	}
  1725  
  1726  	// Sweep through n objects of given size starting at p.
  1727  	// This thread owns the span now, so it can manipulate
  1728  	// the block bitmap without atomic operations.
  1729  	p = (byte*)(s->start << PageShift);
  1730  	for(; n > 0; n--, p += size, type_data+=type_data_inc) {
  1731  		off = (uintptr*)p - (uintptr*)arena_start;
  1732  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1733  		shift = off % wordsPerBitmapWord;
  1734  		bits = *bitp>>shift;
  1735  
  1736  		if((bits & bitAllocated) == 0)
  1737  			continue;
  1738  
  1739  		if((bits & bitMarked) != 0) {
  1740  			*bitp &= ~(bitMarked<<shift);
  1741  			continue;
  1742  		}
  1743  
  1744  		if(runtime_debug.allocfreetrace)
  1745  			runtime_tracefree(p, size);
  1746  
  1747  		// Clear mark and scan bits.
  1748  		*bitp &= ~((bitScan|bitMarked)<<shift);
  1749  
  1750  		if(cl == 0) {
  1751  			// Free large span.
  1752  			runtime_unmarkspan(p, 1<<PageShift);
  1753  			s->needzero = 1;
  1754  			// important to set sweepgen before returning it to heap
  1755  			runtime_atomicstore(&s->sweepgen, sweepgen);
  1756  			sweepgenset = true;
  1757  			// See note about SysFault vs SysFree in malloc.goc.
  1758  			if(runtime_debug.efence)
  1759  				runtime_SysFault(p, size);
  1760  			else
  1761  				runtime_MHeap_Free(&runtime_mheap, s, 1);
  1762  			c->local_nlargefree++;
  1763  			c->local_largefree += size;
  1764  			runtime_xadd64(&mstats.next_gc, -(uint64)(size * (gcpercent + 100)/100));
  1765  			res = true;
  1766  		} else {
  1767  			// Free small object.
  1768  			switch(compression) {
  1769  			case MTypes_Words:
  1770  				*(uintptr*)type_data = 0;
  1771  				break;
  1772  			case MTypes_Bytes:
  1773  				*(byte*)type_data = 0;
  1774  				break;
  1775  			}
  1776  			if(size > 2*sizeof(uintptr))
  1777  				((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
  1778  			else if(size > sizeof(uintptr))
  1779  				((uintptr*)p)[1] = 0;
  1780  
  1781  			end->next = (MLink*)p;
  1782  			end = (MLink*)p;
  1783  			nfree++;
  1784  		}
  1785  	}
  1786  
  1787  	// We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
  1788  	// because of the potential for a concurrent free/SetFinalizer.
  1789  	// But we need to set it before we make the span available for allocation
  1790  	// (return it to heap or mcentral), because allocation code assumes that a
  1791  	// span is already swept if available for allocation.
  1792  
  1793  	if(!sweepgenset && nfree == 0) {
  1794  		// The span must be in our exclusive ownership until we update sweepgen,
  1795  		// check for potential races.
  1796  		if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
  1797  			runtime_printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
  1798  				s->state, s->sweepgen, sweepgen);
  1799  			runtime_throw("MSpan_Sweep: bad span state after sweep");
  1800  		}
  1801  		runtime_atomicstore(&s->sweepgen, sweepgen);
  1802  	}
  1803  	if(nfree > 0) {
  1804  		c->local_nsmallfree[cl] += nfree;
  1805  		c->local_cachealloc -= nfree * size;
  1806  		runtime_xadd64(&mstats.next_gc, -(uint64)(nfree * size * (gcpercent + 100)/100));
  1807  		res = runtime_MCentral_FreeSpan(&runtime_mheap.central[cl].mcentral, s, nfree, head.next, end);
  1808  		//MCentral_FreeSpan updates sweepgen
  1809  	}
  1810  	return res;
  1811  }
  1812  
  1813  // State of background sweep.
  1814  // Protected by gclock.
  1815  static struct
  1816  {
  1817  	G*	g;
  1818  	bool	parked;
  1819  
  1820  	MSpan**	spans;
  1821  	uint32	nspan;
  1822  	uint32	spanidx;
  1823  } sweep;
  1824  
  1825  // background sweeping goroutine
  1826  static void
  1827  bgsweep(void* dummy __attribute__ ((unused)))
  1828  {
  1829  	runtime_g()->issystem = 1;
  1830  	for(;;) {
  1831  		while(runtime_sweepone() != (uintptr)-1) {
  1832  			gcstats.nbgsweep++;
  1833  			runtime_gosched();
  1834  		}
  1835  		runtime_lock(&gclock);
  1836  		if(!runtime_mheap.sweepdone) {
  1837  			// It's possible if GC has happened between sweepone has
  1838  			// returned -1 and gclock lock.
  1839  			runtime_unlock(&gclock);
  1840  			continue;
  1841  		}
  1842  		sweep.parked = true;
  1843  		runtime_g()->isbackground = true;
  1844  		runtime_parkunlock(&gclock, "GC sweep wait");
  1845  		runtime_g()->isbackground = false;
  1846  	}
  1847  }
  1848  
  1849  // sweeps one span
  1850  // returns number of pages returned to heap, or -1 if there is nothing to sweep
  1851  uintptr
  1852  runtime_sweepone(void)
  1853  {
  1854  	M *m = runtime_m();
  1855  	MSpan *s;
  1856  	uint32 idx, sg;
  1857  	uintptr npages;
  1858  
  1859  	// increment locks to ensure that the goroutine is not preempted
  1860  	// in the middle of sweep thus leaving the span in an inconsistent state for next GC
  1861  	m->locks++;
  1862  	sg = runtime_mheap.sweepgen;
  1863  	for(;;) {
  1864  		idx = runtime_xadd(&sweep.spanidx, 1) - 1;
  1865  		if(idx >= sweep.nspan) {
  1866  			runtime_mheap.sweepdone = true;
  1867  			m->locks--;
  1868  			return (uintptr)-1;
  1869  		}
  1870  		s = sweep.spans[idx];
  1871  		if(s->state != MSpanInUse) {
  1872  			s->sweepgen = sg;
  1873  			continue;
  1874  		}
  1875  		if(s->sweepgen != sg-2 || !runtime_cas(&s->sweepgen, sg-2, sg-1))
  1876  			continue;
  1877  		if(s->incache)
  1878  			runtime_throw("sweep of incache span");
  1879  		npages = s->npages;
  1880  		if(!runtime_MSpan_Sweep(s))
  1881  			npages = 0;
  1882  		m->locks--;
  1883  		return npages;
  1884  	}
  1885  }
  1886  
  1887  static void
  1888  dumpspan(uint32 idx)
  1889  {
  1890  	int32 sizeclass, n, npages, i, column;
  1891  	uintptr size;
  1892  	byte *p;
  1893  	byte *arena_start;
  1894  	MSpan *s;
  1895  	bool allocated;
  1896  
  1897  	s = runtime_mheap.allspans[idx];
  1898  	if(s->state != MSpanInUse)
  1899  		return;
  1900  	arena_start = runtime_mheap.arena_start;
  1901  	p = (byte*)(s->start << PageShift);
  1902  	sizeclass = s->sizeclass;
  1903  	size = s->elemsize;
  1904  	if(sizeclass == 0) {
  1905  		n = 1;
  1906  	} else {
  1907  		npages = runtime_class_to_allocnpages[sizeclass];
  1908  		n = (npages << PageShift) / size;
  1909  	}
  1910  	
  1911  	runtime_printf("%p .. %p:\n", p, p+n*size);
  1912  	column = 0;
  1913  	for(; n>0; n--, p+=size) {
  1914  		uintptr off, *bitp, shift, bits;
  1915  
  1916  		off = (uintptr*)p - (uintptr*)arena_start;
  1917  		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1918  		shift = off % wordsPerBitmapWord;
  1919  		bits = *bitp>>shift;
  1920  
  1921  		allocated = ((bits & bitAllocated) != 0);
  1922  
  1923  		for(i=0; (uint32)i<size; i+=sizeof(void*)) {
  1924  			if(column == 0) {
  1925  				runtime_printf("\t");
  1926  			}
  1927  			if(i == 0) {
  1928  				runtime_printf(allocated ? "(" : "[");
  1929  				runtime_printf("%p: ", p+i);
  1930  			} else {
  1931  				runtime_printf(" ");
  1932  			}
  1933  
  1934  			runtime_printf("%p", *(void**)(p+i));
  1935  
  1936  			if(i+sizeof(void*) >= size) {
  1937  				runtime_printf(allocated ? ") " : "] ");
  1938  			}
  1939  
  1940  			column++;
  1941  			if(column == 8) {
  1942  				runtime_printf("\n");
  1943  				column = 0;
  1944  			}
  1945  		}
  1946  	}
  1947  	runtime_printf("\n");
  1948  }
  1949  
  1950  // A debugging function to dump the contents of memory
  1951  void
  1952  runtime_memorydump(void)
  1953  {
  1954  	uint32 spanidx;
  1955  
  1956  	for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
  1957  		dumpspan(spanidx);
  1958  	}
  1959  }
  1960  
  1961  void
  1962  runtime_gchelper(void)
  1963  {
  1964  	uint32 nproc;
  1965  
  1966  	runtime_m()->traceback = 2;
  1967  	gchelperstart();
  1968  
  1969  	// parallel mark for over gc roots
  1970  	runtime_parfordo(work.markfor);
  1971  
  1972  	// help other threads scan secondary blocks
  1973  	scanblock(nil, true);
  1974  
  1975  	bufferList[runtime_m()->helpgc].busy = 0;
  1976  	nproc = work.nproc;  // work.nproc can change right after we increment work.ndone
  1977  	if(runtime_xadd(&work.ndone, +1) == nproc-1)
  1978  		runtime_notewakeup(&work.alldone);
  1979  	runtime_m()->traceback = 0;
  1980  }
  1981  
  1982  static void
  1983  cachestats(void)
  1984  {
  1985  	MCache *c;
  1986  	P *p, **pp;
  1987  
  1988  	for(pp=runtime_allp; (p=*pp) != nil; pp++) {
  1989  		c = p->mcache;
  1990  		if(c==nil)
  1991  			continue;
  1992  		runtime_purgecachedstats(c);
  1993  	}
  1994  }
  1995  
  1996  static void
  1997  flushallmcaches(void)
  1998  {
  1999  	P *p, **pp;
  2000  	MCache *c;
  2001  
  2002  	// Flush MCache's to MCentral.
  2003  	for(pp=runtime_allp; (p=*pp) != nil; pp++) {
  2004  		c = p->mcache;
  2005  		if(c==nil)
  2006  			continue;
  2007  		runtime_MCache_ReleaseAll(c);
  2008  	}
  2009  }
  2010  
  2011  void
  2012  runtime_updatememstats(GCStats *stats)
  2013  {
  2014  	M *mp;
  2015  	MSpan *s;
  2016  	uint32 i;
  2017  	uint64 stacks_inuse, smallfree;
  2018  	uint64 *src, *dst;
  2019  
  2020  	if(stats)
  2021  		runtime_memclr((byte*)stats, sizeof(*stats));
  2022  	stacks_inuse = 0;
  2023  	for(mp=runtime_allm; mp; mp=mp->alllink) {
  2024  		//stacks_inuse += mp->stackinuse*FixedStack;
  2025  		if(stats) {
  2026  			src = (uint64*)&mp->gcstats;
  2027  			dst = (uint64*)stats;
  2028  			for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
  2029  				dst[i] += src[i];
  2030  			runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
  2031  		}
  2032  	}
  2033  	mstats.stacks_inuse = stacks_inuse;
  2034  	mstats.mcache_inuse = runtime_mheap.cachealloc.inuse;
  2035  	mstats.mspan_inuse = runtime_mheap.spanalloc.inuse;
  2036  	mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
  2037  		mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys;
  2038  	
  2039  	// Calculate memory allocator stats.
  2040  	// During program execution we only count number of frees and amount of freed memory.
  2041  	// Current number of alive object in the heap and amount of alive heap memory
  2042  	// are calculated by scanning all spans.
  2043  	// Total number of mallocs is calculated as number of frees plus number of alive objects.
  2044  	// Similarly, total amount of allocated memory is calculated as amount of freed memory
  2045  	// plus amount of alive heap memory.
  2046  	mstats.alloc = 0;
  2047  	mstats.total_alloc = 0;
  2048  	mstats.nmalloc = 0;
  2049  	mstats.nfree = 0;
  2050  	for(i = 0; i < nelem(mstats.by_size); i++) {
  2051  		mstats.by_size[i].nmalloc = 0;
  2052  		mstats.by_size[i].nfree = 0;
  2053  	}
  2054  
  2055  	// Flush MCache's to MCentral.
  2056  	flushallmcaches();
  2057  
  2058  	// Aggregate local stats.
  2059  	cachestats();
  2060  
  2061  	// Scan all spans and count number of alive objects.
  2062  	for(i = 0; i < runtime_mheap.nspan; i++) {
  2063  		s = runtime_mheap.allspans[i];
  2064  		if(s->state != MSpanInUse)
  2065  			continue;
  2066  		if(s->sizeclass == 0) {
  2067  			mstats.nmalloc++;
  2068  			mstats.alloc += s->elemsize;
  2069  		} else {
  2070  			mstats.nmalloc += s->ref;
  2071  			mstats.by_size[s->sizeclass].nmalloc += s->ref;
  2072  			mstats.alloc += s->ref*s->elemsize;
  2073  		}
  2074  	}
  2075  
  2076  	// Aggregate by size class.
  2077  	smallfree = 0;
  2078  	mstats.nfree = runtime_mheap.nlargefree;
  2079  	for(i = 0; i < nelem(mstats.by_size); i++) {
  2080  		mstats.nfree += runtime_mheap.nsmallfree[i];
  2081  		mstats.by_size[i].nfree = runtime_mheap.nsmallfree[i];
  2082  		mstats.by_size[i].nmalloc += runtime_mheap.nsmallfree[i];
  2083  		smallfree += runtime_mheap.nsmallfree[i] * runtime_class_to_size[i];
  2084  	}
  2085  	mstats.nmalloc += mstats.nfree;
  2086  
  2087  	// Calculate derived stats.
  2088  	mstats.total_alloc = mstats.alloc + runtime_mheap.largefree + smallfree;
  2089  	mstats.heap_alloc = mstats.alloc;
  2090  	mstats.heap_objects = mstats.nmalloc - mstats.nfree;
  2091  }
  2092  
  2093  // Structure of arguments passed to function gc().
  2094  // This allows the arguments to be passed via runtime_mcall.
  2095  struct gc_args
  2096  {
  2097  	int64 start_time; // start time of GC in ns (just before stoptheworld)
  2098  	bool  eagersweep;
  2099  };
  2100  
  2101  static void gc(struct gc_args *args);
  2102  static void mgc(G *gp);
  2103  
  2104  static int32
  2105  readgogc(void)
  2106  {
  2107  	String s;
  2108  	const byte *p;
  2109  
  2110  	s = runtime_getenv("GOGC");
  2111  	if(s.len == 0)
  2112  		return 100;
  2113  	p = s.str;
  2114  	if(s.len == 3 && runtime_strcmp((const char *)p, "off") == 0)
  2115  		return -1;
  2116  	return runtime_atoi(p, s.len);
  2117  }
  2118  
  2119  // force = 1 - do GC regardless of current heap usage
  2120  // force = 2 - go GC and eager sweep
  2121  void
  2122  runtime_gc(int32 force)
  2123  {
  2124  	M *m;
  2125  	G *g;
  2126  	struct gc_args a;
  2127  	int32 i;
  2128  
  2129  	// The atomic operations are not atomic if the uint64s
  2130  	// are not aligned on uint64 boundaries. This has been
  2131  	// a problem in the past.
  2132  	if((((uintptr)&work.empty) & 7) != 0)
  2133  		runtime_throw("runtime: gc work buffer is misaligned");
  2134  	if((((uintptr)&work.full) & 7) != 0)
  2135  		runtime_throw("runtime: gc work buffer is misaligned");
  2136  
  2137  	// Make sure all registers are saved on stack so that
  2138  	// scanstack sees them.
  2139  	__builtin_unwind_init();
  2140  
  2141  	// The gc is turned off (via enablegc) until
  2142  	// the bootstrap has completed.
  2143  	// Also, malloc gets called in the guts
  2144  	// of a number of libraries that might be
  2145  	// holding locks.  To avoid priority inversion
  2146  	// problems, don't bother trying to run gc
  2147  	// while holding a lock.  The next mallocgc
  2148  	// without a lock will do the gc instead.
  2149  	m = runtime_m();
  2150  	if(!mstats.enablegc || runtime_g() == m->g0 || m->locks > 0 || runtime_panicking)
  2151  		return;
  2152  
  2153  	if(gcpercent == GcpercentUnknown) {	// first time through
  2154  		runtime_lock(&runtime_mheap.lock);
  2155  		if(gcpercent == GcpercentUnknown)
  2156  			gcpercent = readgogc();
  2157  		runtime_unlock(&runtime_mheap.lock);
  2158  	}
  2159  	if(gcpercent < 0)
  2160  		return;
  2161  
  2162  	runtime_semacquire(&runtime_worldsema, false);
  2163  	if(force==0 && mstats.heap_alloc < mstats.next_gc) {
  2164  		// typically threads which lost the race to grab
  2165  		// worldsema exit here when gc is done.
  2166  		runtime_semrelease(&runtime_worldsema);
  2167  		return;
  2168  	}
  2169  
  2170  	// Ok, we're doing it!  Stop everybody else
  2171  	a.start_time = runtime_nanotime();
  2172  	a.eagersweep = force >= 2;
  2173  	m->gcing = 1;
  2174  	runtime_stoptheworld();
  2175  	
  2176  	clearpools();
  2177  
  2178  	// Run gc on the g0 stack.  We do this so that the g stack
  2179  	// we're currently running on will no longer change.  Cuts
  2180  	// the root set down a bit (g0 stacks are not scanned, and
  2181  	// we don't need to scan gc's internal state).  Also an
  2182  	// enabler for copyable stacks.
  2183  	for(i = 0; i < (runtime_debug.gctrace > 1 ? 2 : 1); i++) {
  2184  		if(i > 0)
  2185  			a.start_time = runtime_nanotime();
  2186  		// switch to g0, call gc(&a), then switch back
  2187  		g = runtime_g();
  2188  		g->param = &a;
  2189  		g->status = Gwaiting;
  2190  		g->waitreason = "garbage collection";
  2191  		runtime_mcall(mgc);
  2192  		m = runtime_m();
  2193  	}
  2194  
  2195  	// all done
  2196  	m->gcing = 0;
  2197  	m->locks++;
  2198  	runtime_semrelease(&runtime_worldsema);
  2199  	runtime_starttheworld();
  2200  	m->locks--;
  2201  
  2202  	// now that gc is done, kick off finalizer thread if needed
  2203  	if(!ConcurrentSweep) {
  2204  		// give the queued finalizers, if any, a chance to run
  2205  		runtime_gosched();
  2206  	} else {
  2207  		// For gccgo, let other goroutines run.
  2208  		runtime_gosched();
  2209  	}
  2210  }
  2211  
  2212  static void
  2213  mgc(G *gp)
  2214  {
  2215  	gc(gp->param);
  2216  	gp->param = nil;
  2217  	gp->status = Grunning;
  2218  	runtime_gogo(gp);
  2219  }
  2220  
  2221  static void
  2222  gc(struct gc_args *args)
  2223  {
  2224  	M *m;
  2225  	int64 t0, t1, t2, t3, t4;
  2226  	uint64 heap0, heap1, obj, ninstr;
  2227  	GCStats stats;
  2228  	uint32 i;
  2229  	// Eface eface;
  2230  
  2231  	m = runtime_m();
  2232  
  2233  	if(runtime_debug.allocfreetrace)
  2234  		runtime_tracegc();
  2235  
  2236  	m->traceback = 2;
  2237  	t0 = args->start_time;
  2238  	work.tstart = args->start_time; 
  2239  
  2240  	if(CollectStats)
  2241  		runtime_memclr((byte*)&gcstats, sizeof(gcstats));
  2242  
  2243  	m->locks++;	// disable gc during mallocs in parforalloc
  2244  	if(work.markfor == nil)
  2245  		work.markfor = runtime_parforalloc(MaxGcproc);
  2246  	m->locks--;
  2247  
  2248  	t1 = 0;
  2249  	if(runtime_debug.gctrace)
  2250  		t1 = runtime_nanotime();
  2251  
  2252  	// Sweep what is not sweeped by bgsweep.
  2253  	while(runtime_sweepone() != (uintptr)-1)
  2254  		gcstats.npausesweep++;
  2255  
  2256  	work.nwait = 0;
  2257  	work.ndone = 0;
  2258  	work.nproc = runtime_gcprocs();
  2259  	runtime_parforsetup(work.markfor, work.nproc, RootCount + runtime_allglen, false, &markroot_funcval);
  2260  	if(work.nproc > 1) {
  2261  		runtime_noteclear(&work.alldone);
  2262  		runtime_helpgc(work.nproc);
  2263  	}
  2264  
  2265  	t2 = 0;
  2266  	if(runtime_debug.gctrace)
  2267  		t2 = runtime_nanotime();
  2268  
  2269  	gchelperstart();
  2270  	runtime_parfordo(work.markfor);
  2271  	scanblock(nil, true);
  2272  
  2273  	t3 = 0;
  2274  	if(runtime_debug.gctrace)
  2275  		t3 = runtime_nanotime();
  2276  
  2277  	bufferList[m->helpgc].busy = 0;
  2278  	if(work.nproc > 1)
  2279  		runtime_notesleep(&work.alldone);
  2280  
  2281  	cachestats();
  2282  	// next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
  2283  	// estimate what was live heap size after previous GC (for tracing only)
  2284  	heap0 = mstats.next_gc*100/(gcpercent+100);
  2285  	// conservatively set next_gc to high value assuming that everything is live
  2286  	// concurrent/lazy sweep will reduce this number while discovering new garbage
  2287  	mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
  2288  
  2289  	t4 = runtime_nanotime();
  2290  	mstats.last_gc = runtime_unixnanotime();  // must be Unix time to make sense to user
  2291  	mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
  2292  	mstats.pause_end[mstats.numgc%nelem(mstats.pause_end)] = mstats.last_gc;
  2293  	mstats.pause_total_ns += t4 - t0;
  2294  	mstats.numgc++;
  2295  	if(mstats.debuggc)
  2296  		runtime_printf("pause %D\n", t4-t0);
  2297  
  2298  	if(runtime_debug.gctrace) {
  2299  		heap1 = mstats.heap_alloc;
  2300  		runtime_updatememstats(&stats);
  2301  		if(heap1 != mstats.heap_alloc) {
  2302  			runtime_printf("runtime: mstats skew: heap=%D/%D\n", heap1, mstats.heap_alloc);
  2303  			runtime_throw("mstats skew");
  2304  		}
  2305  		obj = mstats.nmalloc - mstats.nfree;
  2306  
  2307  		stats.nprocyield += work.markfor->nprocyield;
  2308  		stats.nosyield += work.markfor->nosyield;
  2309  		stats.nsleep += work.markfor->nsleep;
  2310  
  2311  		runtime_printf("gc%d(%d): %D+%D+%D+%D us, %D -> %D MB, %D (%D-%D) objects,"
  2312  				" %d/%d/%d sweeps,"
  2313  				" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
  2314  			mstats.numgc, work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000,
  2315  			heap0>>20, heap1>>20, obj,
  2316  			mstats.nmalloc, mstats.nfree,
  2317  			sweep.nspan, gcstats.nbgsweep, gcstats.npausesweep,
  2318  			stats.nhandoff, stats.nhandoffcnt,
  2319  			work.markfor->nsteal, work.markfor->nstealcnt,
  2320  			stats.nprocyield, stats.nosyield, stats.nsleep);
  2321  		gcstats.nbgsweep = gcstats.npausesweep = 0;
  2322  		if(CollectStats) {
  2323  			runtime_printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
  2324  				gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
  2325  			if(gcstats.ptr.cnt != 0)
  2326  				runtime_printf("avg ptrbufsize: %D (%D/%D)\n",
  2327  					gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
  2328  			if(gcstats.obj.cnt != 0)
  2329  				runtime_printf("avg nobj: %D (%D/%D)\n",
  2330  					gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
  2331  			runtime_printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
  2332  
  2333  			runtime_printf("instruction counts:\n");
  2334  			ninstr = 0;
  2335  			for(i=0; i<nelem(gcstats.instr); i++) {
  2336  				runtime_printf("\t%d:\t%D\n", i, gcstats.instr[i]);
  2337  				ninstr += gcstats.instr[i];
  2338  			}
  2339  			runtime_printf("\ttotal:\t%D\n", ninstr);
  2340  
  2341  			runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
  2342  
  2343  			runtime_printf("markonly base lookup: bit %D word %D span %D\n", gcstats.markonly.foundbit, gcstats.markonly.foundword, gcstats.markonly.foundspan);
  2344  			runtime_printf("flushptrbuf base lookup: bit %D word %D span %D\n", gcstats.flushptrbuf.foundbit, gcstats.flushptrbuf.foundword, gcstats.flushptrbuf.foundspan);
  2345  		}
  2346  	}
  2347  
  2348  	// We cache current runtime_mheap.allspans array in sweep.spans,
  2349  	// because the former can be resized and freed.
  2350  	// Otherwise we would need to take heap lock every time
  2351  	// we want to convert span index to span pointer.
  2352  
  2353  	// Free the old cached array if necessary.
  2354  	if(sweep.spans && sweep.spans != runtime_mheap.allspans)
  2355  		runtime_SysFree(sweep.spans, sweep.nspan*sizeof(sweep.spans[0]), &mstats.other_sys);
  2356  	// Cache the current array.
  2357  	runtime_mheap.sweepspans = runtime_mheap.allspans;
  2358  	runtime_mheap.sweepgen += 2;
  2359  	runtime_mheap.sweepdone = false;
  2360  	sweep.spans = runtime_mheap.allspans;
  2361  	sweep.nspan = runtime_mheap.nspan;
  2362  	sweep.spanidx = 0;
  2363  
  2364  	// Temporary disable concurrent sweep, because we see failures on builders.
  2365  	if(ConcurrentSweep && !args->eagersweep) {
  2366  		runtime_lock(&gclock);
  2367  		if(sweep.g == nil)
  2368  			sweep.g = __go_go(bgsweep, nil);
  2369  		else if(sweep.parked) {
  2370  			sweep.parked = false;
  2371  			runtime_ready(sweep.g);
  2372  		}
  2373  		runtime_unlock(&gclock);
  2374  	} else {
  2375  		// Sweep all spans eagerly.
  2376  		while(runtime_sweepone() != (uintptr)-1)
  2377  			gcstats.npausesweep++;
  2378  		// Do an additional mProf_GC, because all 'free' events are now real as well.
  2379  		runtime_MProf_GC();
  2380  	}
  2381  
  2382  	runtime_MProf_GC();
  2383  	m->traceback = 0;
  2384  }
  2385  
  2386  extern uintptr runtime_sizeof_C_MStats
  2387    __asm__ (GOSYM_PREFIX "runtime.Sizeof_C_MStats");
  2388  
  2389  void runtime_ReadMemStats(MStats *)
  2390    __asm__ (GOSYM_PREFIX "runtime.ReadMemStats");
  2391  
  2392  void
  2393  runtime_ReadMemStats(MStats *stats)
  2394  {
  2395  	M *m;
  2396  
  2397  	// Have to acquire worldsema to stop the world,
  2398  	// because stoptheworld can only be used by
  2399  	// one goroutine at a time, and there might be
  2400  	// a pending garbage collection already calling it.
  2401  	runtime_semacquire(&runtime_worldsema, false);
  2402  	m = runtime_m();
  2403  	m->gcing = 1;
  2404  	runtime_stoptheworld();
  2405  	runtime_updatememstats(nil);
  2406  	// Size of the trailing by_size array differs between Go and C,
  2407  	// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
  2408  	runtime_memmove(stats, &mstats, runtime_sizeof_C_MStats);
  2409  	m->gcing = 0;
  2410  	m->locks++;
  2411  	runtime_semrelease(&runtime_worldsema);
  2412  	runtime_starttheworld();
  2413  	m->locks--;
  2414  }
  2415  
  2416  void runtime_debug_readGCStats(Slice*)
  2417    __asm__("runtime_debug.readGCStats");
  2418  
  2419  void
  2420  runtime_debug_readGCStats(Slice *pauses)
  2421  {
  2422  	uint64 *p;
  2423  	uint32 i, n;
  2424  
  2425  	// Calling code in runtime/debug should make the slice large enough.
  2426  	if((size_t)pauses->cap < nelem(mstats.pause_ns)+3)
  2427  		runtime_throw("runtime: short slice passed to readGCStats");
  2428  
  2429  	// Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
  2430  	p = (uint64*)pauses->array;
  2431  	runtime_lock(&runtime_mheap.lock);
  2432  	n = mstats.numgc;
  2433  	if(n > nelem(mstats.pause_ns))
  2434  		n = nelem(mstats.pause_ns);
  2435  	
  2436  	// The pause buffer is circular. The most recent pause is at
  2437  	// pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
  2438  	// from there to go back farther in time. We deliver the times
  2439  	// most recent first (in p[0]).
  2440  	for(i=0; i<n; i++)
  2441  		p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
  2442  
  2443  	p[n] = mstats.last_gc;
  2444  	p[n+1] = mstats.numgc;
  2445  	p[n+2] = mstats.pause_total_ns;	
  2446  	runtime_unlock(&runtime_mheap.lock);
  2447  	pauses->__count = n+3;
  2448  }
  2449  
  2450  int32
  2451  runtime_setgcpercent(int32 in) {
  2452  	int32 out;
  2453  
  2454  	runtime_lock(&runtime_mheap.lock);
  2455  	if(gcpercent == GcpercentUnknown)
  2456  		gcpercent = readgogc();
  2457  	out = gcpercent;
  2458  	if(in < 0)
  2459  		in = -1;
  2460  	gcpercent = in;
  2461  	runtime_unlock(&runtime_mheap.lock);
  2462  	return out;
  2463  }
  2464  
  2465  static void
  2466  gchelperstart(void)
  2467  {
  2468  	M *m;
  2469  
  2470  	m = runtime_m();
  2471  	if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
  2472  		runtime_throw("gchelperstart: bad m->helpgc");
  2473  	if(runtime_xchg(&bufferList[m->helpgc].busy, 1))
  2474  		runtime_throw("gchelperstart: already busy");
  2475  	if(runtime_g() != m->g0)
  2476  		runtime_throw("gchelper not running on g0 stack");
  2477  }
  2478  
  2479  static void
  2480  runfinq(void* dummy __attribute__ ((unused)))
  2481  {
  2482  	Finalizer *f;
  2483  	FinBlock *fb, *next;
  2484  	uint32 i;
  2485  	Eface ef;
  2486  	Iface iface;
  2487  
  2488  	// This function blocks for long periods of time, and because it is written in C
  2489  	// we have no liveness information. Zero everything so that uninitialized pointers
  2490  	// do not cause memory leaks.
  2491  	f = nil;
  2492  	fb = nil;
  2493  	next = nil;
  2494  	i = 0;
  2495  	ef.__type_descriptor = nil;
  2496  	ef.__object = nil;
  2497  	
  2498  	// force flush to memory
  2499  	USED(&f);
  2500  	USED(&fb);
  2501  	USED(&next);
  2502  	USED(&i);
  2503  	USED(&ef);
  2504  
  2505  	for(;;) {
  2506  		runtime_lock(&finlock);
  2507  		fb = finq;
  2508  		finq = nil;
  2509  		if(fb == nil) {
  2510  			runtime_fingwait = true;
  2511  			runtime_g()->isbackground = true;
  2512  			runtime_parkunlock(&finlock, "finalizer wait");
  2513  			runtime_g()->isbackground = false;
  2514  			continue;
  2515  		}
  2516  		runtime_unlock(&finlock);
  2517  		for(; fb; fb=next) {
  2518  			next = fb->next;
  2519  			for(i=0; i<(uint32)fb->cnt; i++) {
  2520  				const Type *fint;
  2521  				void *param;
  2522  
  2523  				f = &fb->fin[i];
  2524  				fint = ((const Type**)f->ft->__in.array)[0];
  2525  				if((fint->__code & kindMask) == KindPtr) {
  2526  					// direct use of pointer
  2527  					param = &f->arg;
  2528  				} else if(((const InterfaceType*)fint)->__methods.__count == 0) {
  2529  					// convert to empty interface
  2530  					ef.__type_descriptor = (const Type*)f->ot;
  2531  					ef.__object = f->arg;
  2532  					param = &ef;
  2533  				} else {
  2534  					// convert to interface with methods
  2535  					iface.__methods = __go_convert_interface_2((const Type*)fint,
  2536  										   (const Type*)f->ot,
  2537  										   1);
  2538  					iface.__object = f->arg;
  2539  					if(iface.__methods == nil)
  2540  						runtime_throw("invalid type conversion in runfinq");
  2541  					param = &iface;
  2542  				}
  2543  				reflect_call(f->ft, f->fn, 0, 0, &param, nil);
  2544  				f->fn = nil;
  2545  				f->arg = nil;
  2546  				f->ot = nil;
  2547  			}
  2548  			fb->cnt = 0;
  2549  			runtime_lock(&finlock);
  2550  			fb->next = finc;
  2551  			finc = fb;
  2552  			runtime_unlock(&finlock);
  2553  		}
  2554  
  2555  		// Zero everything that's dead, to avoid memory leaks.
  2556  		// See comment at top of function.
  2557  		f = nil;
  2558  		fb = nil;
  2559  		next = nil;
  2560  		i = 0;
  2561  		ef.__type_descriptor = nil;
  2562  		ef.__object = nil;
  2563  		runtime_gc(1);	// trigger another gc to clean up the finalized objects, if possible
  2564  	}
  2565  }
  2566  
  2567  void
  2568  runtime_createfing(void)
  2569  {
  2570  	if(fing != nil)
  2571  		return;
  2572  	// Here we use gclock instead of finlock,
  2573  	// because newproc1 can allocate, which can cause on-demand span sweep,
  2574  	// which can queue finalizers, which would deadlock.
  2575  	runtime_lock(&gclock);
  2576  	if(fing == nil)
  2577  		fing = __go_go(runfinq, nil);
  2578  	runtime_unlock(&gclock);
  2579  }
  2580  
  2581  G*
  2582  runtime_wakefing(void)
  2583  {
  2584  	G *res;
  2585  
  2586  	res = nil;
  2587  	runtime_lock(&finlock);
  2588  	if(runtime_fingwait && runtime_fingwake) {
  2589  		runtime_fingwait = false;
  2590  		runtime_fingwake = false;
  2591  		res = fing;
  2592  	}
  2593  	runtime_unlock(&finlock);
  2594  	return res;
  2595  }
  2596  
  2597  void
  2598  runtime_marknogc(void *v)
  2599  {
  2600  	uintptr *b, off, shift;
  2601  
  2602  	off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
  2603  	b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2604  	shift = off % wordsPerBitmapWord;
  2605  	*b = (*b & ~(bitAllocated<<shift)) | bitBlockBoundary<<shift;
  2606  }
  2607  
  2608  void
  2609  runtime_markscan(void *v)
  2610  {
  2611  	uintptr *b, off, shift;
  2612  
  2613  	off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
  2614  	b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2615  	shift = off % wordsPerBitmapWord;
  2616  	*b |= bitScan<<shift;
  2617  }
  2618  
  2619  // mark the block at v as freed.
  2620  void
  2621  runtime_markfreed(void *v)
  2622  {
  2623  	uintptr *b, off, shift;
  2624  
  2625  	if(0)
  2626  		runtime_printf("markfreed %p\n", v);
  2627  
  2628  	if((byte*)v > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
  2629  		runtime_throw("markfreed: bad pointer");
  2630  
  2631  	off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
  2632  	b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2633  	shift = off % wordsPerBitmapWord;
  2634  	*b = (*b & ~(bitMask<<shift)) | (bitAllocated<<shift);
  2635  }
  2636  
  2637  // check that the block at v of size n is marked freed.
  2638  void
  2639  runtime_checkfreed(void *v, uintptr n)
  2640  {
  2641  	uintptr *b, bits, off, shift;
  2642  
  2643  	if(!runtime_checking)
  2644  		return;
  2645  
  2646  	if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
  2647  		return;	// not allocated, so okay
  2648  
  2649  	off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
  2650  	b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2651  	shift = off % wordsPerBitmapWord;
  2652  
  2653  	bits = *b>>shift;
  2654  	if((bits & bitAllocated) != 0) {
  2655  		runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
  2656  			v, n, off, bits & bitMask);
  2657  		runtime_throw("checkfreed: not freed");
  2658  	}
  2659  }
  2660  
  2661  // mark the span of memory at v as having n blocks of the given size.
  2662  // if leftover is true, there is left over space at the end of the span.
  2663  void
  2664  runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
  2665  {
  2666  	uintptr *b, *b0, off, shift, i, x;
  2667  	byte *p;
  2668  
  2669  	if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
  2670  		runtime_throw("markspan: bad pointer");
  2671  
  2672  	if(runtime_checking) {
  2673  		// bits should be all zero at the start
  2674  		off = (byte*)v + size - runtime_mheap.arena_start;
  2675  		b = (uintptr*)(runtime_mheap.arena_start - off/wordsPerBitmapWord);
  2676  		for(i = 0; i < size/PtrSize/wordsPerBitmapWord; i++) {
  2677  			if(b[i] != 0)
  2678  				runtime_throw("markspan: span bits not zero");
  2679  		}
  2680  	}
  2681  
  2682  	p = v;
  2683  	if(leftover)	// mark a boundary just past end of last block too
  2684  		n++;
  2685  
  2686  	b0 = nil;
  2687  	x = 0;
  2688  	for(; n-- > 0; p += size) {
  2689  		// Okay to use non-atomic ops here, because we control
  2690  		// the entire span, and each bitmap word has bits for only
  2691  		// one span, so no other goroutines are changing these
  2692  		// bitmap words.
  2693  		off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start;  // word offset
  2694  		b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2695  		shift = off % wordsPerBitmapWord;
  2696  		if(b0 != b) {
  2697  			if(b0 != nil)
  2698  				*b0 = x;
  2699  			b0 = b;
  2700  			x = 0;
  2701  		}
  2702  		x |= bitAllocated<<shift;
  2703  	}
  2704  	*b0 = x;
  2705  }
  2706  
  2707  // unmark the span of memory at v of length n bytes.
  2708  void
  2709  runtime_unmarkspan(void *v, uintptr n)
  2710  {
  2711  	uintptr *p, *b, off;
  2712  
  2713  	if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
  2714  		runtime_throw("markspan: bad pointer");
  2715  
  2716  	p = v;
  2717  	off = p - (uintptr*)runtime_mheap.arena_start;  // word offset
  2718  	if(off % wordsPerBitmapWord != 0)
  2719  		runtime_throw("markspan: unaligned pointer");
  2720  	b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2721  	n /= PtrSize;
  2722  	if(n%wordsPerBitmapWord != 0)
  2723  		runtime_throw("unmarkspan: unaligned length");
  2724  	// Okay to use non-atomic ops here, because we control
  2725  	// the entire span, and each bitmap word has bits for only
  2726  	// one span, so no other goroutines are changing these
  2727  	// bitmap words.
  2728  	n /= wordsPerBitmapWord;
  2729  	while(n-- > 0)
  2730  		*b-- = 0;
  2731  }
  2732  
  2733  void
  2734  runtime_MHeap_MapBits(MHeap *h)
  2735  {
  2736  	size_t page_size;
  2737  
  2738  	// Caller has added extra mappings to the arena.
  2739  	// Add extra mappings of bitmap words as needed.
  2740  	// We allocate extra bitmap pieces in chunks of bitmapChunk.
  2741  	enum {
  2742  		bitmapChunk = 8192
  2743  	};
  2744  	uintptr n;
  2745  
  2746  	n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
  2747  	n = ROUND(n, bitmapChunk);
  2748  	n = ROUND(n, PageSize);
  2749  	page_size = getpagesize();
  2750  	n = ROUND(n, page_size);
  2751  	if(h->bitmap_mapped >= n)
  2752  		return;
  2753  
  2754  	runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped, h->arena_reserved, &mstats.gc_sys);
  2755  	h->bitmap_mapped = n;
  2756  }
  2757  
  2758  // typedmemmove copies a value of type t to dst from src.
  2759  
  2760  extern void typedmemmove(const Type* td, void *dst, const void *src)
  2761    __asm__ (GOSYM_PREFIX "reflect.typedmemmove");
  2762  
  2763  void
  2764  typedmemmove(const Type* td, void *dst, const void *src)
  2765  {
  2766  	runtime_memmove(dst, src, td->__size);
  2767  }
  2768  
  2769  // typedslicecopy copies a slice of elemType values from src to dst,
  2770  // returning the number of elements copied.
  2771  
  2772  extern intgo typedslicecopy(const Type* elem, Slice dst, Slice src)
  2773    __asm__ (GOSYM_PREFIX "reflect.typedslicecopy");
  2774  
  2775  intgo
  2776  typedslicecopy(const Type* elem, Slice dst, Slice src)
  2777  {
  2778  	intgo n;
  2779  	void *dstp;
  2780  	void *srcp;
  2781  
  2782  	n = dst.__count;
  2783  	if (n > src.__count)
  2784  		n = src.__count;
  2785  	if (n == 0)
  2786  		return 0;
  2787  	dstp = dst.__values;
  2788  	srcp = src.__values;
  2789  	memmove(dstp, srcp, (uintptr_t)n * elem->__size);
  2790  	return n;
  2791  }