github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/proc.c

github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/proc.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "runtime.h"
     6  #include "arch_GOARCH.h"
     7  #include "malloc.h"
     8  #include "stack.h"
     9  #include "race.h"
    10  #include "type.h"
    11  
    12  // Goroutine scheduler
    13  // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
    14  //
    15  // The main concepts are:
    16  // G - goroutine.
    17  // M - worker thread, or machine.
    18  // P - processor, a resource that is required to execute Go code.
    19  //     M must have an associated P to execute Go code, however it can be
    20  //     blocked or in a syscall w/o an associated P.
    21  //
    22  // Design doc at http://golang.org/s/go11sched.
    23  
    24  typedef struct Sched Sched;
    25  struct Sched {
    26  	Lock;
    27  
    28  	uint64	goidgen;
    29  
    30  	M*	midle;	 // idle m's waiting for work
    31  	int32	nmidle;	 // number of idle m's waiting for work
    32  	int32	mlocked; // number of locked m's waiting for work
    33  	int32	mcount;	 // number of m's that have been created
    34  
    35  	P*	pidle;  // idle P's
    36  	uint32	npidle;
    37  	uint32	nmspinning;
    38  
    39  	// Global runnable queue.
    40  	G*	runqhead;
    41  	G*	runqtail;
    42  	int32	runqsize;
    43  
    44  	// Global cache of dead G's.
    45  	Lock	gflock;
    46  	G*	gfree;
    47  
    48  	int32	stopwait;
    49  	Note	stopnote;
    50  	uint32	sysmonwait;
    51  	Note	sysmonnote;
    52  	uint64	lastpoll;
    53  
    54  	int32	profilehz;	// cpu profiling rate
    55  };
    56  
    57  // The max value of GOMAXPROCS.
    58  // There are no fundamental restrictions on the value.
    59  enum { MaxGomaxprocs = 1<<8 };
    60  
    61  Sched	runtime·sched;
    62  int32	runtime·gomaxprocs;
    63  bool	runtime·singleproc;
    64  bool	runtime·iscgo;
    65  uint32	runtime·gcwaiting;
    66  M	runtime·m0;
    67  G	runtime·g0;	 // idle goroutine for m0
    68  G*	runtime·allg;
    69  G*	runtime·lastg;
    70  M*	runtime·allm;
    71  M*	runtime·extram;
    72  int8*	runtime·goos;
    73  int32	runtime·ncpu;
    74  static int32	newprocs;
    75  
    76  void runtime·mstart(void);
    77  static void runqput(P*, G*);
    78  static G* runqget(P*);
    79  static void runqgrow(P*);
    80  static G* runqsteal(P*, P*);
    81  static void mput(M*);
    82  static M* mget(void);
    83  static void mcommoninit(M*);
    84  static void schedule(void);
    85  static void procresize(int32);
    86  static void acquirep(P*);
    87  static P* releasep(void);
    88  static void newm(void(*)(void), P*);
    89  static void goidle(void);
    90  static void stopm(void);
    91  static void startm(P*, bool);
    92  static void handoffp(P*);
    93  static void wakep(void);
    94  static void stoplockedm(void);
    95  static void startlockedm(G*);
    96  static void sysmon(void);
    97  static uint32 retake(uint32*);
    98  static void inclocked(int32);
    99  static void checkdead(void);
   100  static void exitsyscall0(G*);
   101  static void park0(G*);
   102  static void gosched0(G*);
   103  static void goexit0(G*);
   104  static void gfput(P*, G*);
   105  static G* gfget(P*);
   106  static void gfpurge(P*);
   107  static void globrunqput(G*);
   108  static G* globrunqget(P*);
   109  static P* pidleget(void);
   110  static void pidleput(P*);
   111  static void injectglist(G*);
   112  
   113  // The bootstrap sequence is:
   114  //
   115  //	call osinit
   116  //	call schedinit
   117  //	make & queue new G
   118  //	call runtime·mstart
   119  //
   120  // The new G calls runtime·main.
   121  void
   122  runtime·schedinit(void)
   123  {
   124  	int32 n, procs;
   125  	byte *p;
   126  
   127  	m->nomemprof++;
   128  	runtime·mprofinit();
   129  	runtime·mallocinit();
   130  	mcommoninit(m);
   131  
   132  	runtime·goargs();
   133  	runtime·goenvs();
   134  
   135  	// For debugging:
   136  	// Allocate internal symbol table representation now,
   137  	// so that we don't need to call malloc when we crash.
   138  	// runtime·findfunc(0);
   139  
   140  	runtime·sched.lastpoll = runtime·nanotime();
   141  	procs = 1;
   142  	p = runtime·getenv("GOMAXPROCS");
   143  	if(p != nil && (n = runtime·atoi(p)) > 0) {
   144  		if(n > MaxGomaxprocs)
   145  			n = MaxGomaxprocs;
   146  		procs = n;
   147  	}
   148  	runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0]));
   149  	procresize(procs);
   150  
   151  	mstats.enablegc = 1;
   152  	m->nomemprof--;
   153  
   154  	if(raceenabled)
   155  		g->racectx = runtime·raceinit();
   156  }
   157  
   158  extern void main·init(void);
   159  extern void main·main(void);
   160  
   161  static FuncVal scavenger = {runtime·MHeap_Scavenger};
   162  
   163  // The main goroutine.
   164  void
   165  runtime·main(void)
   166  {
   167  	newm(sysmon, nil);
   168  
   169  	// Lock the main goroutine onto this, the main OS thread,
   170  	// during initialization.  Most programs won't care, but a few
   171  	// do require certain calls to be made by the main thread.
   172  	// Those can arrange for main.main to run in the main thread
   173  	// by calling runtime.LockOSThread during initialization
   174  	// to preserve the lock.
   175  	runtime·lockOSThread();
   176  	if(m != &runtime·m0)
   177  		runtime·throw("runtime·main not on m0");
   178  	runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
   179  	main·init();
   180  	runtime·unlockOSThread();
   181  
   182  	main·main();
   183  	if(raceenabled)
   184  		runtime·racefini();
   185  
   186  	// Make racy client program work: if panicking on
   187  	// another goroutine at the same time as main returns,
   188  	// let the other goroutine finish printing the panic trace.
   189  	// Once it does, it will exit. See issue 3934.
   190  	if(runtime·panicking)
   191  		runtime·park(nil, nil, "panicwait");
   192  
   193  	runtime·exit(0);
   194  	for(;;)
   195  		*(int32*)runtime·main = 0;
   196  }
   197  
   198  void
   199  runtime·goroutineheader(G *gp)
   200  {
   201  	int8 *status;
   202  
   203  	switch(gp->status) {
   204  	case Gidle:
   205  		status = "idle";
   206  		break;
   207  	case Grunnable:
   208  		status = "runnable";
   209  		break;
   210  	case Grunning:
   211  		status = "running";
   212  		break;
   213  	case Gsyscall:
   214  		status = "syscall";
   215  		break;
   216  	case Gwaiting:
   217  		if(gp->waitreason)
   218  			status = gp->waitreason;
   219  		else
   220  			status = "waiting";
   221  		break;
   222  	default:
   223  		status = "???";
   224  		break;
   225  	}
   226  	runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
   227  }
   228  
   229  void
   230  runtime·tracebackothers(G *me)
   231  {
   232  	G *gp;
   233  	int32 traceback;
   234  
   235  	traceback = runtime·gotraceback(nil);
   236  	for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
   237  		if(gp == me || gp->status == Gdead)
   238  			continue;
   239  		if(gp->issystem && traceback < 2)
   240  			continue;
   241  		runtime·printf("\n");
   242  		runtime·goroutineheader(gp);
   243  		runtime·traceback(gp->sched.pc, (byte*)gp->sched.sp, 0, gp);
   244  	}
   245  }
   246  
   247  static void
   248  mcommoninit(M *mp)
   249  {
   250  	// If there is no mcache runtime·callers() will crash,
   251  	// and we are most likely in sysmon thread so the stack is senseless anyway.
   252  	if(m->mcache)
   253  		runtime·callers(1, mp->createstack, nelem(mp->createstack));
   254  
   255  	mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
   256  
   257  	runtime·lock(&runtime·sched);
   258  	mp->id = runtime·sched.mcount++;
   259  
   260  	runtime·mpreinit(mp);
   261  
   262  	// Add to runtime·allm so garbage collector doesn't free m
   263  	// when it is just in a register or thread-local storage.
   264  	mp->alllink = runtime·allm;
   265  	// runtime·NumCgoCall() iterates over allm w/o schedlock,
   266  	// so we need to publish it safely.
   267  	runtime·atomicstorep(&runtime·allm, mp);
   268  	runtime·unlock(&runtime·sched);
   269  }
   270  
   271  // Mark gp ready to run.
   272  void
   273  runtime·ready(G *gp)
   274  {
   275  	// Mark runnable.
   276  	if(gp->status != Gwaiting) {
   277  		runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
   278  		runtime·throw("bad g->status in ready");
   279  	}
   280  	gp->status = Grunnable;
   281  	runqput(m->p, gp);
   282  	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
   283  		wakep();
   284  }
   285  
   286  int32
   287  runtime·gcprocs(void)
   288  {
   289  	int32 n;
   290  
   291  	// Figure out how many CPUs to use during GC.
   292  	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
   293  	runtime·lock(&runtime·sched);
   294  	n = runtime·gomaxprocs;
   295  	if(n > runtime·ncpu)
   296  		n = runtime·ncpu;
   297  	if(n > MaxGcproc)
   298  		n = MaxGcproc;
   299  	if(n > runtime·sched.nmidle+1) // one M is currently running
   300  		n = runtime·sched.nmidle+1;
   301  	runtime·unlock(&runtime·sched);
   302  	return n;
   303  }
   304  
   305  static bool
   306  needaddgcproc(void)
   307  {
   308  	int32 n;
   309  
   310  	runtime·lock(&runtime·sched);
   311  	n = runtime·gomaxprocs;
   312  	if(n > runtime·ncpu)
   313  		n = runtime·ncpu;
   314  	if(n > MaxGcproc)
   315  		n = MaxGcproc;
   316  	n -= runtime·sched.nmidle+1; // one M is currently running
   317  	runtime·unlock(&runtime·sched);
   318  	return n > 0;
   319  }
   320  
   321  void
   322  runtime·helpgc(int32 nproc)
   323  {
   324  	M *mp;
   325  	int32 n, pos;
   326  
   327  	runtime·lock(&runtime·sched);
   328  	pos = 0;
   329  	for(n = 1; n < nproc; n++) {  // one M is currently running
   330  		if(runtime·allp[pos]->mcache == m->mcache)
   331  			pos++;
   332  		mp = mget();
   333  		if(mp == nil)
   334  			runtime·throw("runtime·gcprocs inconsistency");
   335  		mp->helpgc = n;
   336  		mp->mcache = runtime·allp[pos]->mcache;
   337  		pos++;
   338  		runtime·notewakeup(&mp->park);
   339  	}
   340  	runtime·unlock(&runtime·sched);
   341  }
   342  
   343  void
   344  runtime·stoptheworld(void)
   345  {
   346  	int32 i;
   347  	uint32 s;
   348  	P *p;
   349  	bool wait;
   350  
   351  	runtime·lock(&runtime·sched);
   352  	runtime·sched.stopwait = runtime·gomaxprocs;
   353  	runtime·atomicstore((uint32*)&runtime·gcwaiting, 1);
   354  	// stop current P
   355  	m->p->status = Pgcstop;
   356  	runtime·sched.stopwait--;
   357  	// try to retake all P's in Psyscall status
   358  	for(i = 0; i < runtime·gomaxprocs; i++) {
   359  		p = runtime·allp[i];
   360  		s = p->status;
   361  		if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
   362  			runtime·sched.stopwait--;
   363  	}
   364  	// stop idle P's
   365  	while(p = pidleget()) {
   366  		p->status = Pgcstop;
   367  		runtime·sched.stopwait--;
   368  	}
   369  	wait = runtime·sched.stopwait > 0;
   370  	runtime·unlock(&runtime·sched);
   371  
   372  	// wait for remaining P's to stop voluntary
   373  	if(wait) {
   374  		runtime·notesleep(&runtime·sched.stopnote);
   375  		runtime·noteclear(&runtime·sched.stopnote);
   376  	}
   377  	if(runtime·sched.stopwait)
   378  		runtime·throw("stoptheworld: not stopped");
   379  	for(i = 0; i < runtime·gomaxprocs; i++) {
   380  		p = runtime·allp[i];
   381  		if(p->status != Pgcstop)
   382  			runtime·throw("stoptheworld: not stopped");
   383  	}
   384  }
   385  
   386  static void
   387  mhelpgc(void)
   388  {
   389  	m->helpgc = -1;
   390  }
   391  
   392  void
   393  runtime·starttheworld(void)
   394  {
   395  	P *p, *p1;
   396  	M *mp;
   397  	G *gp;
   398  	bool add;
   399  
   400  	gp = runtime·netpoll(false);  // non-blocking
   401  	injectglist(gp);
   402  	add = needaddgcproc();
   403  	runtime·lock(&runtime·sched);
   404  	if(newprocs) {
   405  		procresize(newprocs);
   406  		newprocs = 0;
   407  	} else
   408  		procresize(runtime·gomaxprocs);
   409  	runtime·gcwaiting = 0;
   410  
   411  	p1 = nil;
   412  	while(p = pidleget()) {
   413  		// procresize() puts p's with work at the beginning of the list.
   414  		// Once we reach a p without a run queue, the rest don't have one either.
   415  		if(p->runqhead == p->runqtail) {
   416  			pidleput(p);
   417  			break;
   418  		}
   419  		mp = mget();
   420  		if(mp == nil) {
   421  			p->link = p1;
   422  			p1 = p;
   423  			continue;
   424  		}
   425  		if(mp->nextp)
   426  			runtime·throw("starttheworld: inconsistent mp->nextp");
   427  		mp->nextp = p;
   428  		runtime·notewakeup(&mp->park);
   429  	}
   430  	if(runtime·sched.sysmonwait) {
   431  		runtime·sched.sysmonwait = false;
   432  		runtime·notewakeup(&runtime·sched.sysmonnote);
   433  	}
   434  	runtime·unlock(&runtime·sched);
   435  
   436  	while(p1) {
   437  		p = p1;
   438  		p1 = p1->link;
   439  		add = false;
   440  		newm(nil, p);
   441  	}
   442  
   443  	if(add) {
   444  		// If GC could have used another helper proc, start one now,
   445  		// in the hope that it will be available next time.
   446  		// It would have been even better to start it before the collection,
   447  		// but doing so requires allocating memory, so it's tricky to
   448  		// coordinate.  This lazy approach works out in practice:
   449  		// we don't mind if the first couple gc rounds don't have quite
   450  		// the maximum number of procs.
   451  		newm(mhelpgc, nil);
   452  	}
   453  }
   454  
   455  // Called to start an M.
   456  void
   457  runtime·mstart(void)
   458  {
   459  	// It is used by windows-386 only. Unfortunately, seh needs
   460  	// to be located on os stack, and mstart runs on os stack
   461  	// for both m0 and m.
   462  	SEH seh;
   463  
   464  	if(g != m->g0)
   465  		runtime·throw("bad runtime·mstart");
   466  
   467  	// Record top of stack for use by mcall.
   468  	// Once we call schedule we're never coming back,
   469  	// so other calls can reuse this stack space.
   470  	runtime·gosave(&m->g0->sched);
   471  	m->g0->sched.pc = (void*)-1;  // make sure it is never used
   472  	m->seh = &seh;
   473  	runtime·asminit();
   474  	runtime·minit();
   475  
   476  	// Install signal handlers; after minit so that minit can
   477  	// prepare the thread to be able to handle the signals.
   478  	if(m == &runtime·m0) {
   479  		runtime·initsig();
   480  		if(runtime·iscgo)
   481  			runtime·newextram();
   482  	}
   483  	
   484  	if(m->mstartfn)
   485  		m->mstartfn();
   486  
   487  	if(m->helpgc) {
   488  		m->helpgc = 0;
   489  		stopm();
   490  	} else if(m != &runtime·m0) {
   491  		acquirep(m->nextp);
   492  		m->nextp = nil;
   493  	}
   494  	schedule();
   495  
   496  	// TODO(brainman): This point is never reached, because scheduler
   497  	// does not release os threads at the moment. But once this path
   498  	// is enabled, we must remove our seh here.
   499  }
   500  
   501  // When running with cgo, we call _cgo_thread_start
   502  // to start threads for us so that we can play nicely with
   503  // foreign code.
   504  void (*_cgo_thread_start)(void*);
   505  
   506  typedef struct CgoThreadStart CgoThreadStart;
   507  struct CgoThreadStart
   508  {
   509  	M *m;
   510  	G *g;
   511  	void (*fn)(void);
   512  };
   513  
   514  // Allocate a new m unassociated with any thread.
   515  // Can use p for allocation context if needed.
   516  M*
   517  runtime·allocm(P *p)
   518  {
   519  	M *mp;
   520  	static Type *mtype;  // The Go type M
   521  
   522  	m->locks++;  // disable GC because it can be called from sysmon
   523  	if(m->p == nil)
   524  		acquirep(p);  // temporarily borrow p for mallocs in this function
   525  	if(mtype == nil) {
   526  		Eface e;
   527  		runtime·gc_m_ptr(&e);
   528  		mtype = ((PtrType*)e.type)->elem;
   529  	}
   530  
   531  	mp = runtime·cnew(mtype);
   532  	mcommoninit(mp);
   533  
   534  	// In case of cgo, pthread_create will make us a stack.
   535  	// Windows will layout sched stack on OS stack.
   536  	if(runtime·iscgo || Windows)
   537  		mp->g0 = runtime·malg(-1);
   538  	else
   539  		mp->g0 = runtime·malg(8192);
   540  
   541  	if(p == m->p)
   542  		releasep();
   543  	m->locks--;
   544  
   545  	return mp;
   546  }
   547  
   548  static M* lockextra(bool nilokay);
   549  static void unlockextra(M*);
   550  
   551  // needm is called when a cgo callback happens on a
   552  // thread without an m (a thread not created by Go).
   553  // In this case, needm is expected to find an m to use
   554  // and return with m, g initialized correctly.
   555  // Since m and g are not set now (likely nil, but see below)
   556  // needm is limited in what routines it can call. In particular
   557  // it can only call nosplit functions (textflag 7) and cannot
   558  // do any scheduling that requires an m.
   559  //
   560  // In order to avoid needing heavy lifting here, we adopt
   561  // the following strategy: there is a stack of available m's
   562  // that can be stolen. Using compare-and-swap
   563  // to pop from the stack has ABA races, so we simulate
   564  // a lock by doing an exchange (via casp) to steal the stack
   565  // head and replace the top pointer with MLOCKED (1).
   566  // This serves as a simple spin lock that we can use even
   567  // without an m. The thread that locks the stack in this way
   568  // unlocks the stack by storing a valid stack head pointer.
   569  //
   570  // In order to make sure that there is always an m structure
   571  // available to be stolen, we maintain the invariant that there
   572  // is always one more than needed. At the beginning of the
   573  // program (if cgo is in use) the list is seeded with a single m.
   574  // If needm finds that it has taken the last m off the list, its job
   575  // is - once it has installed its own m so that it can do things like
   576  // allocate memory - to create a spare m and put it on the list.
   577  //
   578  // Each of these extra m's also has a g0 and a curg that are
   579  // pressed into service as the scheduling stack and current
   580  // goroutine for the duration of the cgo callback.
   581  //
   582  // When the callback is done with the m, it calls dropm to
   583  // put the m back on the list.
   584  #pragma textflag 7
   585  void
   586  runtime·needm(byte x)
   587  {
   588  	M *mp;
   589  
   590  	// Lock extra list, take head, unlock popped list.
   591  	// nilokay=false is safe here because of the invariant above,
   592  	// that the extra list always contains or will soon contain
   593  	// at least one m.
   594  	mp = lockextra(false);
   595  
   596  	// Set needextram when we've just emptied the list,
   597  	// so that the eventual call into cgocallbackg will
   598  	// allocate a new m for the extra list. We delay the
   599  	// allocation until then so that it can be done
   600  	// after exitsyscall makes sure it is okay to be
   601  	// running at all (that is, there's no garbage collection
   602  	// running right now).
   603  	mp->needextram = mp->schedlink == nil;
   604  	unlockextra(mp->schedlink);
   605  
   606  	// Install m and g (= m->g0) and set the stack bounds
   607  	// to match the current stack. We don't actually know
   608  	// how big the stack is, like we don't know how big any
   609  	// scheduling stack is, but we assume there's at least 32 kB,
   610  	// which is more than enough for us.
   611  	runtime·setmg(mp, mp->g0);
   612  	g->stackbase = (uintptr)(&x + 1024);
   613  	g->stackguard = (uintptr)(&x - 32*1024);
   614  
   615  	// On windows/386, we need to put an SEH frame (two words)
   616  	// somewhere on the current stack. We are called
   617  	// from needm, and we know there is some available
   618  	// space one word into the argument frame. Use that.
   619  	m->seh = (SEH*)((uintptr*)&x + 1);
   620  
   621  	// Initialize this thread to use the m.
   622  	runtime·asminit();
   623  	runtime·minit();
   624  }
   625  
   626  // newextram allocates an m and puts it on the extra list.
   627  // It is called with a working local m, so that it can do things
   628  // like call schedlock and allocate.
   629  void
   630  runtime·newextram(void)
   631  {
   632  	M *mp, *mnext;
   633  	G *gp;
   634  
   635  	// Create extra goroutine locked to extra m.
   636  	// The goroutine is the context in which the cgo callback will run.
   637  	// The sched.pc will never be returned to, but setting it to
   638  	// runtime.goexit makes clear to the traceback routines where
   639  	// the goroutine stack ends.
   640  	mp = runtime·allocm(nil);
   641  	gp = runtime·malg(4096);
   642  	gp->sched.pc = (void*)runtime·goexit;
   643  	gp->sched.sp = gp->stackbase;
   644  	gp->sched.g = gp;
   645  	gp->status = Gsyscall;
   646  	mp->curg = gp;
   647  	mp->locked = LockInternal;
   648  	mp->lockedg = gp;
   649  	gp->lockedm = mp;
   650  	// put on allg for garbage collector
   651  	runtime·lock(&runtime·sched);
   652  	if(runtime·lastg == nil)
   653  		runtime·allg = gp;
   654  	else
   655  		runtime·lastg->alllink = gp;
   656  	runtime·lastg = gp;
   657  	runtime·unlock(&runtime·sched);
   658  	gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
   659  	if(raceenabled)
   660  		gp->racectx = runtime·racegostart(runtime·newextram);
   661  
   662  	// Add m to the extra list.
   663  	mnext = lockextra(true);
   664  	mp->schedlink = mnext;
   665  	unlockextra(mp);
   666  }
   667  
   668  // dropm is called when a cgo callback has called needm but is now
   669  // done with the callback and returning back into the non-Go thread.
   670  // It puts the current m back onto the extra list.
   671  //
   672  // The main expense here is the call to signalstack to release the
   673  // m's signal stack, and then the call to needm on the next callback
   674  // from this thread. It is tempting to try to save the m for next time,
   675  // which would eliminate both these costs, but there might not be
   676  // a next time: the current thread (which Go does not control) might exit.
   677  // If we saved the m for that thread, there would be an m leak each time
   678  // such a thread exited. Instead, we acquire and release an m on each
   679  // call. These should typically not be scheduling operations, just a few
   680  // atomics, so the cost should be small.
   681  //
   682  // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
   683  // variable using pthread_key_create. Unlike the pthread keys we already use
   684  // on OS X, this dummy key would never be read by Go code. It would exist
   685  // only so that we could register at thread-exit-time destructor.
   686  // That destructor would put the m back onto the extra list.
   687  // This is purely a performance optimization. The current version,
   688  // in which dropm happens on each cgo call, is still correct too.
   689  // We may have to keep the current version on systems with cgo
   690  // but without pthreads, like Windows.
   691  void
   692  runtime·dropm(void)
   693  {
   694  	M *mp, *mnext;
   695  
   696  	// Undo whatever initialization minit did during needm.
   697  	runtime·unminit();
   698  	m->seh = nil;  // reset dangling typed pointer
   699  
   700  	// Clear m and g, and return m to the extra list.
   701  	// After the call to setmg we can only call nosplit functions.
   702  	mp = m;
   703  	runtime·setmg(nil, nil);
   704  
   705  	mnext = lockextra(true);
   706  	mp->schedlink = mnext;
   707  	unlockextra(mp);
   708  }
   709  
   710  #define MLOCKED ((M*)1)
   711  
   712  // lockextra locks the extra list and returns the list head.
   713  // The caller must unlock the list by storing a new list head
   714  // to runtime.extram. If nilokay is true, then lockextra will
   715  // return a nil list head if that's what it finds. If nilokay is false,
   716  // lockextra will keep waiting until the list head is no longer nil.
   717  #pragma textflag 7
   718  static M*
   719  lockextra(bool nilokay)
   720  {
   721  	M *mp;
   722  	void (*yield)(void);
   723  
   724  	for(;;) {
   725  		mp = runtime·atomicloadp(&runtime·extram);
   726  		if(mp == MLOCKED) {
   727  			yield = runtime·osyield;
   728  			yield();
   729  			continue;
   730  		}
   731  		if(mp == nil && !nilokay) {
   732  			runtime·usleep(1);
   733  			continue;
   734  		}
   735  		if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
   736  			yield = runtime·osyield;
   737  			yield();
   738  			continue;
   739  		}
   740  		break;
   741  	}
   742  	return mp;
   743  }
   744  
   745  #pragma textflag 7
   746  static void
   747  unlockextra(M *mp)
   748  {
   749  	runtime·atomicstorep(&runtime·extram, mp);
   750  }
   751  
   752  
   753  // Create a new m.  It will start off with a call to fn, or else the scheduler.
   754  static void
   755  newm(void(*fn)(void), P *p)
   756  {
   757  	M *mp;
   758  
   759  	mp = runtime·allocm(p);
   760  	mp->nextp = p;
   761  	mp->mstartfn = fn;
   762  
   763  	if(runtime·iscgo) {
   764  		CgoThreadStart ts;
   765  
   766  		if(_cgo_thread_start == nil)
   767  			runtime·throw("_cgo_thread_start missing");
   768  		ts.m = mp;
   769  		ts.g = mp->g0;
   770  		ts.fn = runtime·mstart;
   771  		runtime·asmcgocall(_cgo_thread_start, &ts);
   772  		return;
   773  	}
   774  	runtime·newosproc(mp, (byte*)mp->g0->stackbase);
   775  }
   776  
   777  // Stops execution of the current m until new work is available.
   778  // Returns with acquired P.
   779  static void
   780  stopm(void)
   781  {
   782  	if(m->locks)
   783  		runtime·throw("stopm holding locks");
   784  	if(m->p)
   785  		runtime·throw("stopm holding p");
   786  	if(m->spinning) {
   787  		m->spinning = false;
   788  		runtime·xadd(&runtime·sched.nmspinning, -1);
   789  	}
   790  
   791  retry:
   792  	runtime·lock(&runtime·sched);
   793  	mput(m);
   794  	runtime·unlock(&runtime·sched);
   795  	runtime·notesleep(&m->park);
   796  	runtime·noteclear(&m->park);
   797  	if(m->helpgc) {
   798  		runtime·gchelper();
   799  		m->helpgc = 0;
   800  		m->mcache = nil;
   801  		goto retry;
   802  	}
   803  	acquirep(m->nextp);
   804  	m->nextp = nil;
   805  }
   806  
   807  static void
   808  mspinning(void)
   809  {
   810  	m->spinning = true;
   811  }
   812  
   813  // Schedules some M to run the p (creates an M if necessary).
   814  // If p==nil, tries to get an idle P, if no idle P's returns false.
   815  static void
   816  startm(P *p, bool spinning)
   817  {
   818  	M *mp;
   819  	void (*fn)(void);
   820  
   821  	runtime·lock(&runtime·sched);
   822  	if(p == nil) {
   823  		p = pidleget();
   824  		if(p == nil) {
   825  			runtime·unlock(&runtime·sched);
   826  			if(spinning)
   827  				runtime·xadd(&runtime·sched.nmspinning, -1);
   828  			return;
   829  		}
   830  	}
   831  	mp = mget();
   832  	runtime·unlock(&runtime·sched);
   833  	if(mp == nil) {
   834  		fn = nil;
   835  		if(spinning)
   836  			fn = mspinning;
   837  		newm(fn, p);
   838  		return;
   839  	}
   840  	if(mp->spinning)
   841  		runtime·throw("startm: m is spinning");
   842  	if(mp->nextp)
   843  		runtime·throw("startm: m has p");
   844  	mp->spinning = spinning;
   845  	mp->nextp = p;
   846  	runtime·notewakeup(&mp->park);
   847  }
   848  
   849  // Hands off P from syscall or locked M.
   850  static void
   851  handoffp(P *p)
   852  {
   853  	// if it has local work, start it straight away
   854  	if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
   855  		startm(p, false);
   856  		return;
   857  	}
   858  	// no local work, check that there are no spinning/idle M's,
   859  	// otherwise our help is not required
   860  	if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
   861  		runtime·cas(&runtime·sched.nmspinning, 0, 1)) {
   862  		startm(p, true);
   863  		return;
   864  	}
   865  	runtime·lock(&runtime·sched);
   866  	if(runtime·gcwaiting) {
   867  		p->status = Pgcstop;
   868  		if(--runtime·sched.stopwait == 0)
   869  			runtime·notewakeup(&runtime·sched.stopnote);
   870  		runtime·unlock(&runtime·sched);
   871  		return;
   872  	}
   873  	if(runtime·sched.runqsize) {
   874  		runtime·unlock(&runtime·sched);
   875  		startm(p, false);
   876  		return;
   877  	}
   878  	// If this is the last running P and nobody is polling network,
   879  	// need to wakeup another M to poll network.
   880  	if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
   881  		runtime·unlock(&runtime·sched);
   882  		startm(p, false);
   883  		return;
   884  	}
   885  	pidleput(p);
   886  	runtime·unlock(&runtime·sched);
   887  }
   888  
   889  // Tries to add one more P to execute G's.
   890  // Called when a G is made runnable (newproc, ready).
   891  static void
   892  wakep(void)
   893  {
   894  	// be conservative about spinning threads
   895  	if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
   896  		return;
   897  	startm(nil, true);
   898  }
   899  
   900  // Stops execution of the current m that is locked to a g until the g is runnable again.
   901  // Returns with acquired P.
   902  static void
   903  stoplockedm(void)
   904  {
   905  	P *p;
   906  
   907  	if(m->lockedg == nil || m->lockedg->lockedm != m)
   908  		runtime·throw("stoplockedm: inconsistent locking");
   909  	if(m->p) {
   910  		// Schedule another M to run this p.
   911  		p = releasep();
   912  		handoffp(p);
   913  	}
   914  	inclocked(1);
   915  	// Wait until another thread schedules lockedg again.
   916  	runtime·notesleep(&m->park);
   917  	runtime·noteclear(&m->park);
   918  	if(m->lockedg->status != Grunnable)
   919  		runtime·throw("stoplockedm: not runnable");
   920  	acquirep(m->nextp);
   921  	m->nextp = nil;
   922  }
   923  
   924  // Schedules the locked m to run the locked gp.
   925  static void
   926  startlockedm(G *gp)
   927  {
   928  	M *mp;
   929  	P *p;
   930  
   931  	mp = gp->lockedm;
   932  	if(mp == m)
   933  		runtime·throw("startlockedm: locked to me");
   934  	if(mp->nextp)
   935  		runtime·throw("startlockedm: m has p");
   936  	// directly handoff current P to the locked m
   937  	inclocked(-1);
   938  	p = releasep();
   939  	mp->nextp = p;
   940  	runtime·notewakeup(&mp->park);
   941  	stopm();
   942  }
   943  
   944  // Stops the current m for stoptheworld.
   945  // Returns when the world is restarted.
   946  static void
   947  gcstopm(void)
   948  {
   949  	P *p;
   950  
   951  	if(!runtime·gcwaiting)
   952  		runtime·throw("gcstopm: not waiting for gc");
   953  	if(m->spinning) {
   954  		m->spinning = false;
   955  		runtime·xadd(&runtime·sched.nmspinning, -1);
   956  	}
   957  	p = releasep();
   958  	runtime·lock(&runtime·sched);
   959  	p->status = Pgcstop;
   960  	if(--runtime·sched.stopwait == 0)
   961  		runtime·notewakeup(&runtime·sched.stopnote);
   962  	runtime·unlock(&runtime·sched);
   963  	stopm();
   964  }
   965  
   966  // Schedules gp to run on the current M.
   967  // Never returns.
   968  static void
   969  execute(G *gp)
   970  {
   971  	int32 hz;
   972  
   973  	if(gp->status != Grunnable) {
   974  		runtime·printf("execute: bad g status %d\n", gp->status);
   975  		runtime·throw("execute: bad g status");
   976  	}
   977  	gp->status = Grunning;
   978  	m->p->tick++;
   979  	m->curg = gp;
   980  	gp->m = m;
   981  
   982  	// Check whether the profiler needs to be turned on or off.
   983  	hz = runtime·sched.profilehz;
   984  	if(m->profilehz != hz)
   985  		runtime·resetcpuprofiler(hz);
   986  
   987  	if(gp->sched.pc == (byte*)runtime·goexit)  // kickoff
   988  		runtime·gogocallfn(&gp->sched, gp->fnstart);
   989  	runtime·gogo(&gp->sched, 0);
   990  }
   991  
   992  // Finds a runnable goroutine to execute.
   993  // Tries to steal from other P's, get g from global queue, poll network.
   994  static G*
   995  findrunnable(void)
   996  {
   997  	G *gp;
   998  	P *p;
   999  	int32 i;
  1000  
  1001  top:
  1002  	if(runtime·gcwaiting) {
  1003  		gcstopm();
  1004  		goto top;
  1005  	}
  1006  	// local runq
  1007  	gp = runqget(m->p);
  1008  	if(gp)
  1009  		return gp;
  1010  	// global runq
  1011  	if(runtime·sched.runqsize) {
  1012  		runtime·lock(&runtime·sched);
  1013  		gp = globrunqget(m->p);
  1014  		runtime·unlock(&runtime·sched);
  1015  		if(gp)
  1016  			return gp;
  1017  	}
  1018  	// poll network
  1019  	gp = runtime·netpoll(false);  // non-blocking
  1020  	if(gp) {
  1021  		injectglist(gp->schedlink);
  1022  		gp->status = Grunnable;
  1023  		return gp;
  1024  	}
  1025  	// If number of spinning M's >= number of busy P's, block.
  1026  	// This is necessary to prevent excessive CPU consumption
  1027  	// when GOMAXPROCS>>1 but the program parallelism is low.
  1028  	if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
  1029  		goto stop;
  1030  	if(!m->spinning) {
  1031  		m->spinning = true;
  1032  		runtime·xadd(&runtime·sched.nmspinning, 1);
  1033  	}
  1034  	// random steal from other P's
  1035  	for(i = 0; i < 2*runtime·gomaxprocs; i++) {
  1036  		if(runtime·gcwaiting)
  1037  			goto top;
  1038  		p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
  1039  		if(p == m->p)
  1040  			gp = runqget(p);
  1041  		else
  1042  			gp = runqsteal(m->p, p);
  1043  		if(gp)
  1044  			return gp;
  1045  	}
  1046  stop:
  1047  	// return P and block
  1048  	runtime·lock(&runtime·sched);
  1049  	if(runtime·gcwaiting) {
  1050  		runtime·unlock(&runtime·sched);
  1051  		goto top;
  1052  	}
  1053  	if(runtime·sched.runqsize) {
  1054  		gp = globrunqget(m->p);
  1055  		runtime·unlock(&runtime·sched);
  1056  		return gp;
  1057  	}
  1058  	p = releasep();
  1059  	pidleput(p);
  1060  	runtime·unlock(&runtime·sched);
  1061  	if(m->spinning) {
  1062  		m->spinning = false;
  1063  		runtime·xadd(&runtime·sched.nmspinning, -1);
  1064  	}
  1065  	// check all runqueues once again
  1066  	for(i = 0; i < runtime·gomaxprocs; i++) {
  1067  		p = runtime·allp[i];
  1068  		if(p && p->runqhead != p->runqtail) {
  1069  			runtime·lock(&runtime·sched);
  1070  			p = pidleget();
  1071  			runtime·unlock(&runtime·sched);
  1072  			if(p) {
  1073  				acquirep(p);
  1074  				goto top;
  1075  			}
  1076  			break;
  1077  		}
  1078  	}
  1079  	// poll network
  1080  	if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
  1081  		if(m->p)
  1082  			runtime·throw("findrunnable: netpoll with p");
  1083  		if(m->spinning)
  1084  			runtime·throw("findrunnable: netpoll with spinning");
  1085  		gp = runtime·netpoll(true);  // block until new work is available
  1086  		runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
  1087  		if(gp) {
  1088  			runtime·lock(&runtime·sched);
  1089  			p = pidleget();
  1090  			runtime·unlock(&runtime·sched);
  1091  			if(p) {
  1092  				acquirep(p);
  1093  				injectglist(gp->schedlink);
  1094  				gp->status = Grunnable;
  1095  				return gp;
  1096  			}
  1097  			injectglist(gp);
  1098  		}
  1099  	}
  1100  	stopm();
  1101  	goto top;
  1102  }
  1103  
  1104  // Injects the list of runnable G's into the scheduler.
  1105  // Can run concurrently with GC.
  1106  static void
  1107  injectglist(G *glist)
  1108  {
  1109  	int32 n;
  1110  	G *gp;
  1111  
  1112  	if(glist == nil)
  1113  		return;
  1114  	runtime·lock(&runtime·sched);
  1115  	for(n = 0; glist; n++) {
  1116  		gp = glist;
  1117  		glist = gp->schedlink;
  1118  		gp->status = Grunnable;
  1119  		globrunqput(gp);
  1120  	}
  1121  	runtime·unlock(&runtime·sched);
  1122  
  1123  	for(; n && runtime·sched.npidle; n--)
  1124  		startm(nil, false);
  1125  }
  1126  
  1127  // One round of scheduler: find a runnable goroutine and execute it.
  1128  // Never returns.
  1129  static void
  1130  schedule(void)
  1131  {
  1132  	G *gp;
  1133  
  1134  	if(m->locks)
  1135  		runtime·throw("schedule: holding locks");
  1136  
  1137  top:
  1138  	if(runtime·gcwaiting) {
  1139  		gcstopm();
  1140  		goto top;
  1141  	}
  1142  
  1143  	gp = runqget(m->p);
  1144  	if(gp == nil)
  1145  		gp = findrunnable();
  1146  
  1147  	if(m->spinning) {
  1148  		m->spinning = false;
  1149  		runtime·xadd(&runtime·sched.nmspinning, -1);
  1150  	}
  1151  
  1152  	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
  1153  	// so see if we need to wakeup another M here.
  1154  	if (m->p->runqhead != m->p->runqtail &&
  1155  		runtime·atomicload(&runtime·sched.nmspinning) == 0 &&
  1156  		runtime·atomicload(&runtime·sched.npidle) > 0)  // TODO: fast atomic
  1157  		wakep();
  1158  
  1159  	if(gp->lockedm) {
  1160  		startlockedm(gp);
  1161  		goto top;
  1162  	}
  1163  
  1164  	execute(gp);
  1165  }
  1166  
  1167  // Puts the current goroutine into a waiting state and unlocks the lock.
  1168  // The goroutine can be made runnable again by calling runtime·ready(gp).
  1169  void
  1170  runtime·park(void(*unlockf)(Lock*), Lock *lock, int8 *reason)
  1171  {
  1172  	m->waitlock = lock;
  1173  	m->waitunlockf = unlockf;
  1174  	g->waitreason = reason;
  1175  	runtime·mcall(park0);
  1176  }
  1177  
  1178  // runtime·park continuation on g0.
  1179  static void
  1180  park0(G *gp)
  1181  {
  1182  	gp->status = Gwaiting;
  1183  	gp->m = nil;
  1184  	m->curg = nil;
  1185  	if(m->waitunlockf) {
  1186  		m->waitunlockf(m->waitlock);
  1187  		m->waitunlockf = nil;
  1188  		m->waitlock = nil;
  1189  	}
  1190  	if(m->lockedg) {
  1191  		stoplockedm();
  1192  		execute(gp);  // Never returns.
  1193  	}
  1194  	schedule();
  1195  }
  1196  
  1197  // Scheduler yield.
  1198  void
  1199  runtime·gosched(void)
  1200  {
  1201  	runtime·mcall(gosched0);
  1202  }
  1203  
  1204  // runtime·gosched continuation on g0.
  1205  static void
  1206  gosched0(G *gp)
  1207  {
  1208  	gp->status = Grunnable;
  1209  	gp->m = nil;
  1210  	m->curg = nil;
  1211  	runtime·lock(&runtime·sched);
  1212  	globrunqput(gp);
  1213  	runtime·unlock(&runtime·sched);
  1214  	if(m->lockedg) {
  1215  		stoplockedm();
  1216  		execute(gp);  // Never returns.
  1217  	}
  1218  	schedule();
  1219  }
  1220  
  1221  // Finishes execution of the current goroutine.
  1222  void
  1223  runtime·goexit(void)
  1224  {
  1225  	if(raceenabled)
  1226  		runtime·racegoend();
  1227  	runtime·mcall(goexit0);
  1228  }
  1229  
  1230  // runtime·goexit continuation on g0.
  1231  static void
  1232  goexit0(G *gp)
  1233  {
  1234  	gp->status = Gdead;
  1235  	gp->m = nil;
  1236  	gp->lockedm = nil;
  1237  	m->curg = nil;
  1238  	m->lockedg = nil;
  1239  	if(m->locked & ~LockExternal) {
  1240  		runtime·printf("invalid m->locked = %d", m->locked);
  1241  		runtime·throw("internal lockOSThread error");
  1242  	}	
  1243  	m->locked = 0;
  1244  	runtime·unwindstack(gp, nil);
  1245  	gfput(m->p, gp);
  1246  	schedule();
  1247  }
  1248  
  1249  // The goroutine g is about to enter a system call.
  1250  // Record that it's not using the cpu anymore.
  1251  // This is called only from the go syscall library and cgocall,
  1252  // not from the low-level system calls used by the runtime.
  1253  //
  1254  // Entersyscall cannot split the stack: the runtime·gosave must
  1255  // make g->sched refer to the caller's stack segment, because
  1256  // entersyscall is going to return immediately after.
  1257  #pragma textflag 7
  1258  void
  1259  ·entersyscall(int32 dummy)
  1260  {
  1261  	if(m->profilehz > 0)
  1262  		runtime·setprof(false);
  1263  
  1264  	// Leave SP around for gc and traceback.
  1265  	g->sched.sp = (uintptr)runtime·getcallersp(&dummy);
  1266  	g->sched.pc = runtime·getcallerpc(&dummy);
  1267  	g->sched.g = g;
  1268  	g->gcsp = g->sched.sp;
  1269  	g->gcpc = g->sched.pc;
  1270  	g->gcstack = g->stackbase;
  1271  	g->gcguard = g->stackguard;
  1272  	g->status = Gsyscall;
  1273  	if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
  1274  		// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1275  		//	g->gcsp, g->gcguard-StackGuard, g->gcstack);
  1276  		runtime·throw("entersyscall");
  1277  	}
  1278  
  1279  	if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
  1280  		runtime·lock(&runtime·sched);
  1281  		if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1282  			runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1283  			runtime·notewakeup(&runtime·sched.sysmonnote);
  1284  		}
  1285  		runtime·unlock(&runtime·sched);
  1286  		runtime·gosave(&g->sched);  // re-save for traceback
  1287  	}
  1288  
  1289  	m->mcache = nil;
  1290  	m->p->tick++;
  1291  	m->p->m = nil;
  1292  	runtime·atomicstore(&m->p->status, Psyscall);
  1293  	if(runtime·gcwaiting) {
  1294  		runtime·lock(&runtime·sched);
  1295  		if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) {
  1296  			if(--runtime·sched.stopwait == 0)
  1297  				runtime·notewakeup(&runtime·sched.stopnote);
  1298  		}
  1299  		runtime·unlock(&runtime·sched);
  1300  		runtime·gosave(&g->sched);  // re-save for traceback
  1301  	}
  1302  }
  1303  
  1304  // The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
  1305  #pragma textflag 7
  1306  void
  1307  ·entersyscallblock(int32 dummy)
  1308  {
  1309  	P *p;
  1310  
  1311  	if(m->profilehz > 0)
  1312  		runtime·setprof(false);
  1313  
  1314  	// Leave SP around for gc and traceback.
  1315  	g->sched.sp = (uintptr)runtime·getcallersp(&dummy);
  1316  	g->sched.pc = runtime·getcallerpc(&dummy);
  1317  	g->sched.g = g;
  1318  	g->gcsp = g->sched.sp;
  1319  	g->gcpc = g->sched.pc;
  1320  	g->gcstack = g->stackbase;
  1321  	g->gcguard = g->stackguard;
  1322  	g->status = Gsyscall;
  1323  	if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
  1324  		// runtime·printf("entersyscallblock inconsistent %p [%p,%p]\n",
  1325  		//	g->gcsp, g->gcguard-StackGuard, g->gcstack);
  1326  		runtime·throw("entersyscallblock");
  1327  	}
  1328  
  1329  	p = releasep();
  1330  	handoffp(p);
  1331  	if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1332  		inclocked(1);
  1333  	runtime·gosave(&g->sched);  // re-save for traceback
  1334  }
  1335  
  1336  // The goroutine g exited its system call.
  1337  // Arrange for it to run on a cpu again.
  1338  // This is called only from the go syscall library, not
  1339  // from the low-level system calls used by the runtime.
  1340  void
  1341  runtime·exitsyscall(void)
  1342  {
  1343  	P *p;
  1344  
  1345  	// Check whether the profiler needs to be turned on.
  1346  	if(m->profilehz > 0)
  1347  		runtime·setprof(true);
  1348  
  1349  	// Try to re-acquire the last P.
  1350  	if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
  1351  		// There's a cpu for us, so we can run.
  1352  		m->mcache = m->p->mcache;
  1353  		m->p->m = m;
  1354  		m->p->tick++;
  1355  		g->status = Grunning;
  1356  		// Garbage collector isn't running (since we are),
  1357  		// so okay to clear gcstack and gcsp.
  1358  		g->gcstack = (uintptr)nil;
  1359  		g->gcsp = (uintptr)nil;
  1360  		return;
  1361  	}
  1362  
  1363  	if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1364  		inclocked(-1);
  1365  	// Try to get any other idle P.
  1366  	m->p = nil;
  1367  	if(runtime·sched.pidle) {
  1368  		runtime·lock(&runtime·sched);
  1369  		p = pidleget();
  1370  		runtime·unlock(&runtime·sched);
  1371  		if(p) {
  1372  			acquirep(p);
  1373  			g->gcstack = (uintptr)nil;
  1374  			g->gcsp = (uintptr)nil;
  1375  			return;
  1376  		}
  1377  	}
  1378  
  1379  	// Call the scheduler.
  1380  	runtime·mcall(exitsyscall0);
  1381  
  1382  	// Scheduler returned, so we're allowed to run now.
  1383  	// Delete the gcstack information that we left for
  1384  	// the garbage collector during the system call.
  1385  	// Must wait until now because until gosched returns
  1386  	// we don't know for sure that the garbage collector
  1387  	// is not running.
  1388  	g->gcstack = (uintptr)nil;
  1389  	g->gcsp = (uintptr)nil;
  1390  }
  1391  
  1392  // runtime·exitsyscall slow path on g0.
  1393  // Failed to acquire P, enqueue gp as runnable.
  1394  static void
  1395  exitsyscall0(G *gp)
  1396  {
  1397  	P *p;
  1398  
  1399  	gp->status = Grunnable;
  1400  	gp->m = nil;
  1401  	m->curg = nil;
  1402  	runtime·lock(&runtime·sched);
  1403  	p = pidleget();
  1404  	if(p == nil)
  1405  		globrunqput(gp);
  1406  	runtime·unlock(&runtime·sched);
  1407  	if(p) {
  1408  		acquirep(p);
  1409  		execute(gp);  // Never returns.
  1410  	}
  1411  	if(m->lockedg) {
  1412  		// Wait until another thread schedules gp and so m again.
  1413  		stoplockedm();
  1414  		execute(gp);  // Never returns.
  1415  	}
  1416  	stopm();
  1417  	schedule();  // Never returns.
  1418  }
  1419  
  1420  // Hook used by runtime·malg to call runtime·stackalloc on the
  1421  // scheduler stack.  This exists because runtime·stackalloc insists
  1422  // on being called on the scheduler stack, to avoid trying to grow
  1423  // the stack while allocating a new stack segment.
  1424  static void
  1425  mstackalloc(G *gp)
  1426  {
  1427  	gp->param = runtime·stackalloc((uintptr)gp->param);
  1428  	runtime·gogo(&gp->sched, 0);
  1429  }
  1430  
  1431  // Allocate a new g, with a stack big enough for stacksize bytes.
  1432  G*
  1433  runtime·malg(int32 stacksize)
  1434  {
  1435  	G *newg;
  1436  	byte *stk;
  1437  
  1438  	if(StackTop < sizeof(Stktop)) {
  1439  		runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
  1440  		runtime·throw("runtime: bad stack.h");
  1441  	}
  1442  
  1443  	newg = runtime·malloc(sizeof(G));
  1444  	if(stacksize >= 0) {
  1445  		if(g == m->g0) {
  1446  			// running on scheduler stack already.
  1447  			stk = runtime·stackalloc(StackSystem + stacksize);
  1448  		} else {
  1449  			// have to call stackalloc on scheduler stack.
  1450  			g->param = (void*)(StackSystem + stacksize);
  1451  			runtime·mcall(mstackalloc);
  1452  			stk = g->param;
  1453  			g->param = nil;
  1454  		}
  1455  		newg->stack0 = (uintptr)stk;
  1456  		newg->stackguard = (uintptr)stk + StackGuard;
  1457  		newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop);
  1458  		runtime·memclr((byte*)newg->stackbase, sizeof(Stktop));
  1459  	}
  1460  	return newg;
  1461  }
  1462  
  1463  // Create a new g running fn with siz bytes of arguments.
  1464  // Put it on the queue of g's waiting to run.
  1465  // The compiler turns a go statement into a call to this.
  1466  // Cannot split the stack because it assumes that the arguments
  1467  // are available sequentially after &fn; they would not be
  1468  // copied if a stack split occurred.  It's OK for this to call
  1469  // functions that split the stack.
  1470  #pragma textflag 7
  1471  void
  1472  runtime·newproc(int32 siz, FuncVal* fn, ...)
  1473  {
  1474  	byte *argp;
  1475  
  1476  	if(thechar == '5')
  1477  		argp = (byte*)(&fn+2);  // skip caller's saved LR
  1478  	else
  1479  		argp = (byte*)(&fn+1);
  1480  	runtime·newproc1(fn, argp, siz, 0, runtime·getcallerpc(&siz));
  1481  }
  1482  
  1483  // Create a new g running fn with narg bytes of arguments starting
  1484  // at argp and returning nret bytes of results.  callerpc is the
  1485  // address of the go statement that created this.  The new g is put
  1486  // on the queue of g's waiting to run.
  1487  G*
  1488  runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
  1489  {
  1490  	byte *sp;
  1491  	G *newg;
  1492  	int32 siz;
  1493  
  1494  //printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret);
  1495  	siz = narg + nret;
  1496  	siz = (siz+7) & ~7;
  1497  
  1498  	// We could instead create a secondary stack frame
  1499  	// and make it look like goexit was on the original but
  1500  	// the call to the actual goroutine function was split.
  1501  	// Not worth it: this is almost always an error.
  1502  	if(siz > StackMin - 1024)
  1503  		runtime·throw("runtime.newproc: function arguments too large for new goroutine");
  1504  
  1505  	if((newg = gfget(m->p)) != nil) {
  1506  		if(newg->stackguard - StackGuard != newg->stack0)
  1507  			runtime·throw("invalid stack in newg");
  1508  	} else {
  1509  		newg = runtime·malg(StackMin);
  1510  		runtime·lock(&runtime·sched);
  1511  		if(runtime·lastg == nil)
  1512  			runtime·allg = newg;
  1513  		else
  1514  			runtime·lastg->alllink = newg;
  1515  		runtime·lastg = newg;
  1516  		runtime·unlock(&runtime·sched);
  1517  	}
  1518  
  1519  	sp = (byte*)newg->stackbase;
  1520  	sp -= siz;
  1521  	runtime·memmove(sp, argp, narg);
  1522  	if(thechar == '5') {
  1523  		// caller's LR
  1524  		sp -= sizeof(void*);
  1525  		*(void**)sp = nil;
  1526  	}
  1527  
  1528  	newg->sched.sp = (uintptr)sp;
  1529  	newg->sched.pc = (byte*)runtime·goexit;
  1530  	newg->sched.g = newg;
  1531  	newg->fnstart = fn;
  1532  	newg->gopc = (uintptr)callerpc;
  1533  	newg->status = Grunnable;
  1534  	newg->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
  1535  	if(raceenabled)
  1536  		newg->racectx = runtime·racegostart(callerpc);
  1537  	runqput(m->p, newg);
  1538  
  1539  	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
  1540  		wakep();
  1541  	return newg;
  1542  }
  1543  
  1544  // Put on gfree list.
  1545  // If local list is too long, transfer a batch to the global list.
  1546  static void
  1547  gfput(P *p, G *gp)
  1548  {
  1549  	if(gp->stackguard - StackGuard != gp->stack0)
  1550  		runtime·throw("invalid stack in gfput");
  1551  	gp->schedlink = p->gfree;
  1552  	p->gfree = gp;
  1553  	p->gfreecnt++;
  1554  	if(p->gfreecnt >= 64) {
  1555  		runtime·lock(&runtime·sched.gflock);
  1556  		while(p->gfreecnt >= 32) {
  1557  			p->gfreecnt--;
  1558  			gp = p->gfree;
  1559  			p->gfree = gp->schedlink;
  1560  			gp->schedlink = runtime·sched.gfree;
  1561  			runtime·sched.gfree = gp;
  1562  		}
  1563  		runtime·unlock(&runtime·sched.gflock);
  1564  	}
  1565  }
  1566  
  1567  // Get from gfree list.
  1568  // If local list is empty, grab a batch from global list.
  1569  static G*
  1570  gfget(P *p)
  1571  {
  1572  	G *gp;
  1573  
  1574  retry:
  1575  	gp = p->gfree;
  1576  	if(gp == nil && runtime·sched.gfree) {
  1577  		runtime·lock(&runtime·sched.gflock);
  1578  		while(p->gfreecnt < 32 && runtime·sched.gfree) {
  1579  			p->gfreecnt++;
  1580  			gp = runtime·sched.gfree;
  1581  			runtime·sched.gfree = gp->schedlink;
  1582  			gp->schedlink = p->gfree;
  1583  			p->gfree = gp;
  1584  		}
  1585  		runtime·unlock(&runtime·sched.gflock);
  1586  		goto retry;
  1587  	}
  1588  	if(gp) {
  1589  		p->gfree = gp->schedlink;
  1590  		p->gfreecnt--;
  1591  	}
  1592  	return gp;
  1593  }
  1594  
  1595  // Purge all cached G's from gfree list to the global list.
  1596  static void
  1597  gfpurge(P *p)
  1598  {
  1599  	G *gp;
  1600  
  1601  	runtime·lock(&runtime·sched.gflock);
  1602  	while(p->gfreecnt) {
  1603  		p->gfreecnt--;
  1604  		gp = p->gfree;
  1605  		p->gfree = gp->schedlink;
  1606  		gp->schedlink = runtime·sched.gfree;
  1607  		runtime·sched.gfree = gp;
  1608  	}
  1609  	runtime·unlock(&runtime·sched.gflock);
  1610  }
  1611  
  1612  void
  1613  runtime·Breakpoint(void)
  1614  {
  1615  	runtime·breakpoint();
  1616  }
  1617  
  1618  void
  1619  runtime·Gosched(void)
  1620  {
  1621  	runtime·gosched();
  1622  }
  1623  
  1624  // Implementation of runtime.GOMAXPROCS.
  1625  // delete when scheduler is even stronger
  1626  int32
  1627  runtime·gomaxprocsfunc(int32 n)
  1628  {
  1629  	int32 ret;
  1630  
  1631  	if(n > MaxGomaxprocs)
  1632  		n = MaxGomaxprocs;
  1633  	runtime·lock(&runtime·sched);
  1634  	ret = runtime·gomaxprocs;
  1635  	if(n <= 0 || n == ret) {
  1636  		runtime·unlock(&runtime·sched);
  1637  		return ret;
  1638  	}
  1639  	runtime·unlock(&runtime·sched);
  1640  
  1641  	runtime·semacquire(&runtime·worldsema);
  1642  	m->gcing = 1;
  1643  	runtime·stoptheworld();
  1644  	newprocs = n;
  1645  	m->gcing = 0;
  1646  	runtime·semrelease(&runtime·worldsema);
  1647  	runtime·starttheworld();
  1648  
  1649  	return ret;
  1650  }
  1651  
  1652  static void
  1653  LockOSThread(void)
  1654  {
  1655  	m->lockedg = g;
  1656  	g->lockedm = m;
  1657  }
  1658  
  1659  void
  1660  runtime·LockOSThread(void)
  1661  {
  1662  	m->locked |= LockExternal;
  1663  	LockOSThread();
  1664  }
  1665  
  1666  void
  1667  runtime·lockOSThread(void)
  1668  {
  1669  	m->locked += LockInternal;
  1670  	LockOSThread();
  1671  }
  1672  
  1673  static void
  1674  UnlockOSThread(void)
  1675  {
  1676  	if(m->locked != 0)
  1677  		return;
  1678  	m->lockedg = nil;
  1679  	g->lockedm = nil;
  1680  }
  1681  
  1682  void
  1683  runtime·UnlockOSThread(void)
  1684  {
  1685  	m->locked &= ~LockExternal;
  1686  	UnlockOSThread();
  1687  }
  1688  
  1689  void
  1690  runtime·unlockOSThread(void)
  1691  {
  1692  	if(m->locked < LockInternal)
  1693  		runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
  1694  	m->locked -= LockInternal;
  1695  	UnlockOSThread();
  1696  }
  1697  
  1698  bool
  1699  runtime·lockedOSThread(void)
  1700  {
  1701  	return g->lockedm != nil && m->lockedg != nil;
  1702  }
  1703  
  1704  // for testing of callbacks
  1705  void
  1706  runtime·golockedOSThread(bool ret)
  1707  {
  1708  	ret = runtime·lockedOSThread();
  1709  	FLUSH(&ret);
  1710  }
  1711  
  1712  // for testing of wire, unwire
  1713  void
  1714  runtime·mid(uint32 ret)
  1715  {
  1716  	ret = m->id;
  1717  	FLUSH(&ret);
  1718  }
  1719  
  1720  void
  1721  runtime·NumGoroutine(intgo ret)
  1722  {
  1723  	ret = runtime·gcount();
  1724  	FLUSH(&ret);
  1725  }
  1726  
  1727  int32
  1728  runtime·gcount(void)
  1729  {
  1730  	G *gp;
  1731  	int32 n, s;
  1732  
  1733  	n = 0;
  1734  	runtime·lock(&runtime·sched);
  1735  	// TODO(dvyukov): runtime.NumGoroutine() is O(N).
  1736  	// We do not want to increment/decrement centralized counter in newproc/goexit,
  1737  	// just to make runtime.NumGoroutine() faster.
  1738  	// Compromise solution is to introduce per-P counters of active goroutines.
  1739  	for(gp = runtime·allg; gp; gp = gp->alllink) {
  1740  		s = gp->status;
  1741  		if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
  1742  			n++;
  1743  	}
  1744  	runtime·unlock(&runtime·sched);
  1745  	return n;
  1746  }
  1747  
  1748  int32
  1749  runtime·mcount(void)
  1750  {
  1751  	return runtime·sched.mcount;
  1752  }
  1753  
  1754  void
  1755  runtime·badmcall(void)  // called from assembly
  1756  {
  1757  	runtime·throw("runtime: mcall called on m->g0 stack");
  1758  }
  1759  
  1760  void
  1761  runtime·badmcall2(void)  // called from assembly
  1762  {
  1763  	runtime·throw("runtime: mcall function returned");
  1764  }
  1765  
  1766  static struct {
  1767  	Lock;
  1768  	void (*fn)(uintptr*, int32);
  1769  	int32 hz;
  1770  	uintptr pcbuf[100];
  1771  } prof;
  1772  
  1773  // Called if we receive a SIGPROF signal.
  1774  void
  1775  runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
  1776  {
  1777  	int32 n;
  1778  
  1779  	// Windows does profiling in a dedicated thread w/o m.
  1780  	if(!Windows && (m == nil || m->mcache == nil))
  1781  		return;
  1782  	if(prof.fn == nil || prof.hz == 0)
  1783  		return;
  1784  
  1785  	runtime·lock(&prof);
  1786  	if(prof.fn == nil) {
  1787  		runtime·unlock(&prof);
  1788  		return;
  1789  	}
  1790  	n = runtime·gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil);
  1791  	if(n > 0)
  1792  		prof.fn(prof.pcbuf, n);
  1793  	runtime·unlock(&prof);
  1794  }
  1795  
  1796  // Arrange to call fn with a traceback hz times a second.
  1797  void
  1798  runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
  1799  {
  1800  	// Force sane arguments.
  1801  	if(hz < 0)
  1802  		hz = 0;
  1803  	if(hz == 0)
  1804  		fn = nil;
  1805  	if(fn == nil)
  1806  		hz = 0;
  1807  
  1808  	// Stop profiler on this cpu so that it is safe to lock prof.
  1809  	// if a profiling signal came in while we had prof locked,
  1810  	// it would deadlock.
  1811  	runtime·resetcpuprofiler(0);
  1812  
  1813  	runtime·lock(&prof);
  1814  	prof.fn = fn;
  1815  	prof.hz = hz;
  1816  	runtime·unlock(&prof);
  1817  	runtime·lock(&runtime·sched);
  1818  	runtime·sched.profilehz = hz;
  1819  	runtime·unlock(&runtime·sched);
  1820  
  1821  	if(hz != 0)
  1822  		runtime·resetcpuprofiler(hz);
  1823  }
  1824  
  1825  // Change number of processors.  The world is stopped, sched is locked.
  1826  static void
  1827  procresize(int32 new)
  1828  {
  1829  	int32 i, old;
  1830  	G *gp;
  1831  	P *p;
  1832  
  1833  	old = runtime·gomaxprocs;
  1834  	if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
  1835  		runtime·throw("procresize: invalid arg");
  1836  	// initialize new P's
  1837  	for(i = 0; i < new; i++) {
  1838  		p = runtime·allp[i];
  1839  		if(p == nil) {
  1840  			p = (P*)runtime·mallocgc(sizeof(*p), 0, 0, 1);
  1841  			p->status = Pgcstop;
  1842  			runtime·atomicstorep(&runtime·allp[i], p);
  1843  		}
  1844  		if(p->mcache == nil) {
  1845  			if(old==0 && i==0)
  1846  				p->mcache = m->mcache;  // bootstrap
  1847  			else
  1848  				p->mcache = runtime·allocmcache();
  1849  		}
  1850  		if(p->runq == nil) {
  1851  			p->runqsize = 128;
  1852  			p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, 0, 1);
  1853  		}
  1854  	}
  1855  
  1856  	// redistribute runnable G's evenly
  1857  	for(i = 0; i < old; i++) {
  1858  		p = runtime·allp[i];
  1859  		while(gp = runqget(p))
  1860  			globrunqput(gp);
  1861  	}
  1862  	// start at 1 because current M already executes some G and will acquire allp[0] below,
  1863  	// so if we have a spare G we want to put it into allp[1].
  1864  	for(i = 1; runtime·sched.runqhead; i++) {
  1865  		gp = runtime·sched.runqhead;
  1866  		runtime·sched.runqhead = gp->schedlink;
  1867  		runqput(runtime·allp[i%new], gp);
  1868  	}
  1869  	runtime·sched.runqtail = nil;
  1870  	runtime·sched.runqsize = 0;
  1871  
  1872  	// free unused P's
  1873  	for(i = new; i < old; i++) {
  1874  		p = runtime·allp[i];
  1875  		runtime·freemcache(p->mcache);
  1876  		p->mcache = nil;
  1877  		gfpurge(p);
  1878  		p->status = Pdead;
  1879  		// can't free P itself because it can be referenced by an M in syscall
  1880  	}
  1881  
  1882  	if(m->p)
  1883  		m->p->m = nil;
  1884  	m->p = nil;
  1885  	m->mcache = nil;
  1886  	p = runtime·allp[0];
  1887  	p->m = nil;
  1888  	p->status = Pidle;
  1889  	acquirep(p);
  1890  	for(i = new-1; i > 0; i--) {
  1891  		p = runtime·allp[i];
  1892  		p->status = Pidle;
  1893  		pidleput(p);
  1894  	}
  1895  	runtime·singleproc = new == 1;
  1896  	runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
  1897  }
  1898  
  1899  // Associate p and the current m.
  1900  static void
  1901  acquirep(P *p)
  1902  {
  1903  	if(m->p || m->mcache)
  1904  		runtime·throw("acquirep: already in go");
  1905  	if(p->m || p->status != Pidle) {
  1906  		runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
  1907  		runtime·throw("acquirep: invalid p state");
  1908  	}
  1909  	m->mcache = p->mcache;
  1910  	m->p = p;
  1911  	p->m = m;
  1912  	p->status = Prunning;
  1913  }
  1914  
  1915  // Disassociate p and the current m.
  1916  static P*
  1917  releasep(void)
  1918  {
  1919  	P *p;
  1920  
  1921  	if(m->p == nil || m->mcache == nil)
  1922  		runtime·throw("releasep: invalid arg");
  1923  	p = m->p;
  1924  	if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
  1925  		runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
  1926  			m, m->p, p->m, m->mcache, p->mcache, p->status);
  1927  		runtime·throw("releasep: invalid p state");
  1928  	}
  1929  	m->p = nil;
  1930  	m->mcache = nil;
  1931  	p->m = nil;
  1932  	p->status = Pidle;
  1933  	return p;
  1934  }
  1935  
  1936  static void
  1937  inclocked(int32 v)
  1938  {
  1939  	runtime·lock(&runtime·sched);
  1940  	runtime·sched.mlocked += v;
  1941  	if(v > 0)
  1942  		checkdead();
  1943  	runtime·unlock(&runtime·sched);
  1944  }
  1945  
  1946  // Check for deadlock situation.
  1947  // The check is based on number of running M's, if 0 -> deadlock.
  1948  static void
  1949  checkdead(void)
  1950  {
  1951  	G *gp;
  1952  	int32 run, grunning, s;
  1953  
  1954  	// -1 for sysmon
  1955  	run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.mlocked - 1;
  1956  	if(run > 0)
  1957  		return;
  1958  	if(run < 0) {
  1959  		runtime·printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n",
  1960  			runtime·sched.nmidle, runtime·sched.mlocked, runtime·sched.mcount);
  1961  		runtime·throw("checkdead: inconsistent counts");
  1962  	}
  1963  	grunning = 0;
  1964  	for(gp = runtime·allg; gp; gp = gp->alllink) {
  1965  		if(gp->isbackground)
  1966  			continue;
  1967  		s = gp->status;
  1968  		if(s == Gwaiting)
  1969  			grunning++;
  1970  		else if(s == Grunnable || s == Grunning || s == Gsyscall) {
  1971  			runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s);
  1972  			runtime·throw("checkdead: runnable g");
  1973  		}
  1974  	}
  1975  	if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
  1976  		runtime·exit(0);
  1977  	m->throwing = -1;  // do not dump full stacks
  1978  	runtime·throw("all goroutines are asleep - deadlock!");
  1979  }
  1980  
  1981  static void
  1982  sysmon(void)
  1983  {
  1984  	uint32 idle, delay;
  1985  	int64 now, lastpoll;
  1986  	G *gp;
  1987  	uint32 ticks[MaxGomaxprocs];
  1988  
  1989  	idle = 0;  // how many cycles in succession we had not wokeup somebody
  1990  	delay = 0;
  1991  	for(;;) {
  1992  		if(idle == 0)  // start with 20us sleep...
  1993  			delay = 20;
  1994  		else if(idle > 50)  // start doubling the sleep after 1ms...
  1995  			delay *= 2;
  1996  		if(delay > 10*1000)  // up to 10ms
  1997  			delay = 10*1000;
  1998  		runtime·usleep(delay);
  1999  		if(runtime·gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {  // TODO: fast atomic
  2000  			runtime·lock(&runtime·sched);
  2001  			if(runtime·atomicload(&runtime·gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
  2002  				runtime·atomicstore(&runtime·sched.sysmonwait, 1);
  2003  				runtime·unlock(&runtime·sched);
  2004  				runtime·notesleep(&runtime·sched.sysmonnote);
  2005  				runtime·noteclear(&runtime·sched.sysmonnote);
  2006  				idle = 0;
  2007  				delay = 20;
  2008  			} else
  2009  				runtime·unlock(&runtime·sched);
  2010  		}
  2011  		// poll network if not polled for more than 10ms
  2012  		lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
  2013  		now = runtime·nanotime();
  2014  		if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) {
  2015  			gp = runtime·netpoll(false);  // non-blocking
  2016  			injectglist(gp);
  2017  		}
  2018  		// retake P's blocked in syscalls
  2019  		if(retake(ticks))
  2020  			idle = 0;
  2021  		else
  2022  			idle++;
  2023  	}
  2024  }
  2025  
  2026  static uint32
  2027  retake(uint32 *ticks)
  2028  {
  2029  	uint32 i, s, n;
  2030  	int64 t;
  2031  	P *p;
  2032  
  2033  	n = 0;
  2034  	for(i = 0; i < runtime·gomaxprocs; i++) {
  2035  		p = runtime·allp[i];
  2036  		if(p==nil)
  2037  			continue;
  2038  		t = p->tick;
  2039  		if(ticks[i] != t) {
  2040  			ticks[i] = t;
  2041  			continue;
  2042  		}
  2043  		s = p->status;
  2044  		if(s != Psyscall)
  2045  			continue;
  2046  		if(p->runqhead == p->runqtail && runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0)  // TODO: fast atomic
  2047  			continue;
  2048  		// Need to increment number of locked M's before the CAS.
  2049  		// Otherwise the M from which we retake can exit the syscall,
  2050  		// increment nmidle and report deadlock.
  2051  		inclocked(-1);
  2052  		if(runtime·cas(&p->status, s, Pidle)) {
  2053  			n++;
  2054  			handoffp(p);
  2055  		}
  2056  		inclocked(1);
  2057  	}
  2058  	return n;
  2059  }
  2060  
  2061  // Put mp on midle list.
  2062  // Sched must be locked.
  2063  static void
  2064  mput(M *mp)
  2065  {
  2066  	mp->schedlink = runtime·sched.midle;
  2067  	runtime·sched.midle = mp;
  2068  	runtime·sched.nmidle++;
  2069  	checkdead();
  2070  }
  2071  
  2072  // Try to get an m from midle list.
  2073  // Sched must be locked.
  2074  static M*
  2075  mget(void)
  2076  {
  2077  	M *mp;
  2078  
  2079  	if((mp = runtime·sched.midle) != nil){
  2080  		runtime·sched.midle = mp->schedlink;
  2081  		runtime·sched.nmidle--;
  2082  	}
  2083  	return mp;
  2084  }
  2085  
  2086  // Put gp on the global runnable queue.
  2087  // Sched must be locked.
  2088  static void
  2089  globrunqput(G *gp)
  2090  {
  2091  	gp->schedlink = nil;
  2092  	if(runtime·sched.runqtail)
  2093  		runtime·sched.runqtail->schedlink = gp;
  2094  	else
  2095  		runtime·sched.runqhead = gp;
  2096  	runtime·sched.runqtail = gp;
  2097  	runtime·sched.runqsize++;
  2098  }
  2099  
  2100  // Try get a batch of G's from the global runnable queue.
  2101  // Sched must be locked.
  2102  static G*
  2103  globrunqget(P *p)
  2104  {
  2105  	G *gp, *gp1;
  2106  	int32 n;
  2107  
  2108  	if(runtime·sched.runqsize == 0)
  2109  		return nil;
  2110  	n = runtime·sched.runqsize/runtime·gomaxprocs+1;
  2111  	if(n > runtime·sched.runqsize)
  2112  		n = runtime·sched.runqsize;
  2113  	runtime·sched.runqsize -= n;
  2114  	if(runtime·sched.runqsize == 0)
  2115  		runtime·sched.runqtail = nil;
  2116  	gp = runtime·sched.runqhead;
  2117  	runtime·sched.runqhead = gp->schedlink;
  2118  	n--;
  2119  	while(n--) {
  2120  		gp1 = runtime·sched.runqhead;
  2121  		runtime·sched.runqhead = gp1->schedlink;
  2122  		runqput(p, gp1);
  2123  	}
  2124  	return gp;
  2125  }
  2126  
  2127  // Put p to on pidle list.
  2128  // Sched must be locked.
  2129  static void
  2130  pidleput(P *p)
  2131  {
  2132  	p->link = runtime·sched.pidle;
  2133  	runtime·sched.pidle = p;
  2134  	runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
  2135  }
  2136  
  2137  // Try get a p from pidle list.
  2138  // Sched must be locked.
  2139  static P*
  2140  pidleget(void)
  2141  {
  2142  	P *p;
  2143  
  2144  	p = runtime·sched.pidle;
  2145  	if(p) {
  2146  		runtime·sched.pidle = p->link;
  2147  		runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
  2148  	}
  2149  	return p;
  2150  }
  2151  
  2152  // Put g on local runnable queue.
  2153  // TODO(dvyukov): consider using lock-free queue.
  2154  static void
  2155  runqput(P *p, G *gp)
  2156  {
  2157  	int32 h, t, s;
  2158  
  2159  	runtime·lock(p);
  2160  retry:
  2161  	h = p->runqhead;
  2162  	t = p->runqtail;
  2163  	s = p->runqsize;
  2164  	if(t == h-1 || (h == 0 && t == s-1)) {
  2165  		runqgrow(p);
  2166  		goto retry;
  2167  	}
  2168  	p->runq[t++] = gp;
  2169  	if(t == s)
  2170  		t = 0;
  2171  	p->runqtail = t;
  2172  	runtime·unlock(p);
  2173  }
  2174  
  2175  // Get g from local runnable queue.
  2176  static G*
  2177  runqget(P *p)
  2178  {
  2179  	G *gp;
  2180  	int32 t, h, s;
  2181  
  2182  	if(p->runqhead == p->runqtail)
  2183  		return nil;
  2184  	runtime·lock(p);
  2185  	h = p->runqhead;
  2186  	t = p->runqtail;
  2187  	s = p->runqsize;
  2188  	if(t == h) {
  2189  		runtime·unlock(p);
  2190  		return nil;
  2191  	}
  2192  	gp = p->runq[h++];
  2193  	if(h == s)
  2194  		h = 0;
  2195  	p->runqhead = h;
  2196  	runtime·unlock(p);
  2197  	return gp;
  2198  }
  2199  
  2200  // Grow local runnable queue.
  2201  // TODO(dvyukov): consider using fixed-size array
  2202  // and transfer excess to the global list (local queue can grow way too big).
  2203  static void
  2204  runqgrow(P *p)
  2205  {
  2206  	G **q;
  2207  	int32 s, t, h, t2;
  2208  
  2209  	h = p->runqhead;
  2210  	t = p->runqtail;
  2211  	s = p->runqsize;
  2212  	t2 = 0;
  2213  	q = runtime·malloc(2*s*sizeof(*q));
  2214  	while(t != h) {
  2215  		q[t2++] = p->runq[h++];
  2216  		if(h == s)
  2217  			h = 0;
  2218  	}
  2219  	runtime·free(p->runq);
  2220  	p->runq = q;
  2221  	p->runqhead = 0;
  2222  	p->runqtail = t2;
  2223  	p->runqsize = 2*s;
  2224  }
  2225  
  2226  // Steal half of elements from local runnable queue of p2
  2227  // and put onto local runnable queue of p.
  2228  // Returns one of the stolen elements (or nil if failed).
  2229  static G*
  2230  runqsteal(P *p, P *p2)
  2231  {
  2232  	G *gp, *gp1;
  2233  	int32 t, h, s, t2, h2, s2, c, i;
  2234  
  2235  	if(p2->runqhead == p2->runqtail)
  2236  		return nil;
  2237  	// sort locks to prevent deadlocks
  2238  	if(p < p2)
  2239  		runtime·lock(p);
  2240  	runtime·lock(p2);
  2241  	if(p2->runqhead == p2->runqtail) {
  2242  		runtime·unlock(p2);
  2243  		if(p < p2)
  2244  			runtime·unlock(p);
  2245  		return nil;
  2246  	}
  2247  	if(p >= p2)
  2248  		runtime·lock(p);
  2249  	// now we've locked both queues and know the victim is not empty
  2250  	h = p->runqhead;
  2251  	t = p->runqtail;
  2252  	s = p->runqsize;
  2253  	h2 = p2->runqhead;
  2254  	t2 = p2->runqtail;
  2255  	s2 = p2->runqsize;
  2256  	gp = p2->runq[h2++];  // return value
  2257  	if(h2 == s2)
  2258  		h2 = 0;
  2259  	// steal roughly half
  2260  	if(t2 > h2)
  2261  		c = (t2 - h2) / 2;
  2262  	else
  2263  		c = (s2 - h2 + t2) / 2;
  2264  	// copy
  2265  	for(i = 0; i != c; i++) {
  2266  		// the target queue is full?
  2267  		if(t == h-1 || (h == 0 && t == s-1))
  2268  			break;
  2269  		// the victim queue is empty?
  2270  		if(t2 == h2)
  2271  			break;
  2272  		gp1 = p2->runq[h2++];
  2273  		if(h2 == s2)
  2274  			h2 = 0;
  2275  		p->runq[t++] = gp1;
  2276  		if(t == s)
  2277  			t = 0;
  2278  	}
  2279  	p->runqtail = t;
  2280  	p2->runqhead = h2;
  2281  	runtime·unlock(p2);
  2282  	runtime·unlock(p);
  2283  	return gp;
  2284  }
  2285  
  2286  void
  2287  runtime·testSchedLocalQueue(void)
  2288  {
  2289  	P p;
  2290  	G gs[1000];
  2291  	int32 i, j;
  2292  
  2293  	runtime·memclr((byte*)&p, sizeof(p));
  2294  	p.runqsize = 1;
  2295  	p.runqhead = 0;
  2296  	p.runqtail = 0;
  2297  	p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq));
  2298  
  2299  	for(i = 0; i < nelem(gs); i++) {
  2300  		if(runqget(&p) != nil)
  2301  			runtime·throw("runq is not empty initially");
  2302  		for(j = 0; j < i; j++)
  2303  			runqput(&p, &gs[i]);
  2304  		for(j = 0; j < i; j++) {
  2305  			if(runqget(&p) != &gs[i]) {
  2306  				runtime·printf("bad element at iter %d/%d\n", i, j);
  2307  				runtime·throw("bad element");
  2308  			}
  2309  		}
  2310  		if(runqget(&p) != nil)
  2311  			runtime·throw("runq is not empty afterwards");
  2312  	}
  2313  }
  2314  
  2315  void
  2316  runtime·testSchedLocalQueueSteal(void)
  2317  {
  2318  	P p1, p2;
  2319  	G gs[1000], *gp;
  2320  	int32 i, j, s;
  2321  
  2322  	runtime·memclr((byte*)&p1, sizeof(p1));
  2323  	p1.runqsize = 1;
  2324  	p1.runqhead = 0;
  2325  	p1.runqtail = 0;
  2326  	p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq));
  2327  
  2328  	runtime·memclr((byte*)&p2, sizeof(p2));
  2329  	p2.runqsize = nelem(gs);
  2330  	p2.runqhead = 0;
  2331  	p2.runqtail = 0;
  2332  	p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq));
  2333  
  2334  	for(i = 0; i < nelem(gs); i++) {
  2335  		for(j = 0; j < i; j++) {
  2336  			gs[j].sig = 0;
  2337  			runqput(&p1, &gs[j]);
  2338  		}
  2339  		gp = runqsteal(&p2, &p1);
  2340  		s = 0;
  2341  		if(gp) {
  2342  			s++;
  2343  			gp->sig++;
  2344  		}
  2345  		while(gp = runqget(&p2)) {
  2346  			s++;
  2347  			gp->sig++;
  2348  		}
  2349  		while(gp = runqget(&p1))
  2350  			gp->sig++;
  2351  		for(j = 0; j < i; j++) {
  2352  			if(gs[j].sig != 1) {
  2353  				runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
  2354  				runtime·throw("bad element");
  2355  			}
  2356  		}
  2357  		if(s != i/2 && s != i/2+1) {
  2358  			runtime·printf("bad steal %d, want %d or %d, iter %d\n",
  2359  				s, i/2, i/2+1, i);
  2360  			runtime·throw("bad steal");
  2361  		}
  2362  	}
  2363  }
  2364