github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/proc.c

github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/proc.c (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "runtime.h"
     6  #include "arch_GOARCH.h"
     7  #include "zaexperiment.h"
     8  #include "malloc.h"
     9  #include "stack.h"
    10  #include "race.h"
    11  #include "type.h"
    12  #include "../../cmd/ld/textflag.h"
    13  
    14  // Goroutine scheduler
    15  // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
    16  //
    17  // The main concepts are:
    18  // G - goroutine.
    19  // M - worker thread, or machine.
    20  // P - processor, a resource that is required to execute Go code.
    21  //     M must have an associated P to execute Go code, however it can be
    22  //     blocked or in a syscall w/o an associated P.
    23  //
    24  // Design doc at http://golang.org/s/go11sched.
    25  
    26  typedef struct Sched Sched;
    27  struct Sched {
    28  	Lock;
    29  
    30  	uint64	goidgen;
    31  
    32  	M*	midle;	 // idle m's waiting for work
    33  	int32	nmidle;	 // number of idle m's waiting for work
    34  	int32	nmidlelocked; // number of locked m's waiting for work
    35  	int32	mcount;	 // number of m's that have been created
    36  	int32	maxmcount;	// maximum number of m's allowed (or die)
    37  
    38  	P*	pidle;  // idle P's
    39  	uint32	npidle;
    40  	uint32	nmspinning;
    41  
    42  	// Global runnable queue.
    43  	G*	runqhead;
    44  	G*	runqtail;
    45  	int32	runqsize;
    46  
    47  	// Global cache of dead G's.
    48  	Lock	gflock;
    49  	G*	gfree;
    50  
    51  	uint32	gcwaiting;	// gc is waiting to run
    52  	int32	stopwait;
    53  	Note	stopnote;
    54  	uint32	sysmonwait;
    55  	Note	sysmonnote;
    56  	uint64	lastpoll;
    57  
    58  	int32	profilehz;	// cpu profiling rate
    59  };
    60  
    61  // The max value of GOMAXPROCS.
    62  // There are no fundamental restrictions on the value.
    63  enum { MaxGomaxprocs = 1<<8 };
    64  
    65  Sched	runtime·sched;
    66  int32	runtime·gomaxprocs;
    67  uint32	runtime·needextram;
    68  bool	runtime·iscgo;
    69  M	runtime·m0;
    70  G	runtime·g0;	 // idle goroutine for m0
    71  G*	runtime·allg;
    72  G*	runtime·lastg;
    73  M*	runtime·allm;
    74  M*	runtime·extram;
    75  int8*	runtime·goos;
    76  int32	runtime·ncpu;
    77  static int32	newprocs;
    78  
    79  void runtime·mstart(void);
    80  static void runqput(P*, G*);
    81  static G* runqget(P*);
    82  static void runqgrow(P*);
    83  static G* runqsteal(P*, P*);
    84  static void mput(M*);
    85  static M* mget(void);
    86  static void mcommoninit(M*);
    87  static void schedule(void);
    88  static void procresize(int32);
    89  static void acquirep(P*);
    90  static P* releasep(void);
    91  static void newm(void(*)(void), P*);
    92  static void stopm(void);
    93  static void startm(P*, bool);
    94  static void handoffp(P*);
    95  static void wakep(void);
    96  static void stoplockedm(void);
    97  static void startlockedm(G*);
    98  static void sysmon(void);
    99  static uint32 retake(int64);
   100  static void incidlelocked(int32);
   101  static void checkdead(void);
   102  static void exitsyscall0(G*);
   103  static void park0(G*);
   104  static void goexit0(G*);
   105  static void gfput(P*, G*);
   106  static G* gfget(P*);
   107  static void gfpurge(P*);
   108  static void globrunqput(G*);
   109  static G* globrunqget(P*, int32);
   110  static P* pidleget(void);
   111  static void pidleput(P*);
   112  static void injectglist(G*);
   113  static bool preemptall(void);
   114  static bool preemptone(P*);
   115  static bool exitsyscallfast(void);
   116  static bool haveexperiment(int8*);
   117  
   118  // The bootstrap sequence is:
   119  //
   120  //	call osinit
   121  //	call schedinit
   122  //	make & queue new G
   123  //	call runtime·mstart
   124  //
   125  // The new G calls runtime·main.
   126  void
   127  runtime·schedinit(void)
   128  {
   129  	int32 n, procs;
   130  	byte *p;
   131  	Eface i;
   132  
   133  	runtime·sched.maxmcount = 10000;
   134  	runtime·precisestack = haveexperiment("precisestack");
   135  
   136  	runtime·mprofinit();
   137  	runtime·mallocinit();
   138  	mcommoninit(m);
   139  	
   140  	// Initialize the itable value for newErrorCString,
   141  	// so that the next time it gets called, possibly
   142  	// in a fault during a garbage collection, it will not
   143  	// need to allocated memory.
   144  	runtime·newErrorCString(0, &i);
   145  
   146  	runtime·goargs();
   147  	runtime·goenvs();
   148  	runtime·parsedebugvars();
   149  
   150  	// Allocate internal symbol table representation now, we need it for GC anyway.
   151  	runtime·symtabinit();
   152  
   153  	runtime·sched.lastpoll = runtime·nanotime();
   154  	procs = 1;
   155  	p = runtime·getenv("GOMAXPROCS");
   156  	if(p != nil && (n = runtime·atoi(p)) > 0) {
   157  		if(n > MaxGomaxprocs)
   158  			n = MaxGomaxprocs;
   159  		procs = n;
   160  	}
   161  	runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0]));
   162  	procresize(procs);
   163  
   164  	mstats.enablegc = 1;
   165  
   166  	if(raceenabled)
   167  		g->racectx = runtime·raceinit();
   168  }
   169  
   170  extern void main·init(void);
   171  extern void main·main(void);
   172  
   173  static FuncVal scavenger = {runtime·MHeap_Scavenger};
   174  
   175  static FuncVal initDone = { runtime·unlockOSThread };
   176  
   177  // The main goroutine.
   178  void
   179  runtime·main(void)
   180  {
   181  	Defer d;
   182  	
   183  	// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
   184  	// Using decimal instead of binary GB and MB because
   185  	// they look nicer in the stack overflow failure message.
   186  	if(sizeof(void*) == 8)
   187  		runtime·maxstacksize = 1000000000;
   188  	else
   189  		runtime·maxstacksize = 250000000;
   190  
   191  	newm(sysmon, nil);
   192  
   193  	// Lock the main goroutine onto this, the main OS thread,
   194  	// during initialization.  Most programs won't care, but a few
   195  	// do require certain calls to be made by the main thread.
   196  	// Those can arrange for main.main to run in the main thread
   197  	// by calling runtime.LockOSThread during initialization
   198  	// to preserve the lock.
   199  	runtime·lockOSThread();
   200  	
   201  	// Defer unlock so that runtime.Goexit during init does the unlock too.
   202  	d.fn = &initDone;
   203  	d.siz = 0;
   204  	d.link = g->defer;
   205  	d.argp = (void*)-1;
   206  	d.special = true;
   207  	d.free = false;
   208  	g->defer = &d;
   209  
   210  	if(m != &runtime·m0)
   211  		runtime·throw("runtime·main not on m0");
   212  	runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
   213  	main·init();
   214  
   215  	if(g->defer != &d || d.fn != &initDone)
   216  		runtime·throw("runtime: bad defer entry after init");
   217  	g->defer = d.link;
   218  	runtime·unlockOSThread();
   219  
   220  	main·main();
   221  	if(raceenabled)
   222  		runtime·racefini();
   223  
   224  	// Make racy client program work: if panicking on
   225  	// another goroutine at the same time as main returns,
   226  	// let the other goroutine finish printing the panic trace.
   227  	// Once it does, it will exit. See issue 3934.
   228  	if(runtime·panicking)
   229  		runtime·park(nil, nil, "panicwait");
   230  
   231  	runtime·exit(0);
   232  	for(;;)
   233  		*(int32*)runtime·main = 0;
   234  }
   235  
   236  void
   237  runtime·goroutineheader(G *gp)
   238  {
   239  	int8 *status;
   240  
   241  	switch(gp->status) {
   242  	case Gidle:
   243  		status = "idle";
   244  		break;
   245  	case Grunnable:
   246  		status = "runnable";
   247  		break;
   248  	case Grunning:
   249  		status = "running";
   250  		break;
   251  	case Gsyscall:
   252  		status = "syscall";
   253  		break;
   254  	case Gwaiting:
   255  		if(gp->waitreason)
   256  			status = gp->waitreason;
   257  		else
   258  			status = "waiting";
   259  		break;
   260  	default:
   261  		status = "???";
   262  		break;
   263  	}
   264  	runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
   265  }
   266  
   267  void
   268  runtime·tracebackothers(G *me)
   269  {
   270  	G *gp;
   271  	int32 traceback;
   272  
   273  	traceback = runtime·gotraceback(nil);
   274  	
   275  	// Show the current goroutine first, if we haven't already.
   276  	if((gp = m->curg) != nil && gp != me) {
   277  		runtime·printf("\n");
   278  		runtime·goroutineheader(gp);
   279  		runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
   280  	}
   281  
   282  	for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
   283  		if(gp == me || gp == m->curg || gp->status == Gdead)
   284  			continue;
   285  		if(gp->issystem && traceback < 2)
   286  			continue;
   287  		runtime·printf("\n");
   288  		runtime·goroutineheader(gp);
   289  		if(gp->status == Grunning) {
   290  			runtime·printf("\tgoroutine running on other thread; stack unavailable\n");
   291  			runtime·printcreatedby(gp);
   292  		} else
   293  			runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
   294  	}
   295  }
   296  
   297  static void
   298  checkmcount(void)
   299  {
   300  	// sched lock is held
   301  	if(runtime·sched.mcount > runtime·sched.maxmcount) {
   302  		runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
   303  		runtime·throw("thread exhaustion");
   304  	}
   305  }
   306  
   307  static void
   308  mcommoninit(M *mp)
   309  {
   310  	// If there is no mcache runtime·callers() will crash,
   311  	// and we are most likely in sysmon thread so the stack is senseless anyway.
   312  	if(m->mcache)
   313  		runtime·callers(1, mp->createstack, nelem(mp->createstack));
   314  
   315  	mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
   316  
   317  	runtime·lock(&runtime·sched);
   318  	mp->id = runtime·sched.mcount++;
   319  	checkmcount();
   320  	runtime·mpreinit(mp);
   321  
   322  	// Add to runtime·allm so garbage collector doesn't free m
   323  	// when it is just in a register or thread-local storage.
   324  	mp->alllink = runtime·allm;
   325  	// runtime·NumCgoCall() iterates over allm w/o schedlock,
   326  	// so we need to publish it safely.
   327  	runtime·atomicstorep(&runtime·allm, mp);
   328  	runtime·unlock(&runtime·sched);
   329  }
   330  
   331  // Mark gp ready to run.
   332  void
   333  runtime·ready(G *gp)
   334  {
   335  	// Mark runnable.
   336  	m->locks++;  // disable preemption because it can be holding p in a local var
   337  	if(gp->status != Gwaiting) {
   338  		runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
   339  		runtime·throw("bad g->status in ready");
   340  	}
   341  	gp->status = Grunnable;
   342  	runqput(m->p, gp);
   343  	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
   344  		wakep();
   345  	m->locks--;
   346  	if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   347  		g->stackguard0 = StackPreempt;
   348  }
   349  
   350  int32
   351  runtime·gcprocs(void)
   352  {
   353  	int32 n;
   354  
   355  	// Figure out how many CPUs to use during GC.
   356  	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
   357  	runtime·lock(&runtime·sched);
   358  	n = runtime·gomaxprocs;
   359  	if(n > runtime·ncpu)
   360  		n = runtime·ncpu;
   361  	if(n > MaxGcproc)
   362  		n = MaxGcproc;
   363  	if(n > runtime·sched.nmidle+1) // one M is currently running
   364  		n = runtime·sched.nmidle+1;
   365  	runtime·unlock(&runtime·sched);
   366  	return n;
   367  }
   368  
   369  static bool
   370  needaddgcproc(void)
   371  {
   372  	int32 n;
   373  
   374  	runtime·lock(&runtime·sched);
   375  	n = runtime·gomaxprocs;
   376  	if(n > runtime·ncpu)
   377  		n = runtime·ncpu;
   378  	if(n > MaxGcproc)
   379  		n = MaxGcproc;
   380  	n -= runtime·sched.nmidle+1; // one M is currently running
   381  	runtime·unlock(&runtime·sched);
   382  	return n > 0;
   383  }
   384  
   385  void
   386  runtime·helpgc(int32 nproc)
   387  {
   388  	M *mp;
   389  	int32 n, pos;
   390  
   391  	runtime·lock(&runtime·sched);
   392  	pos = 0;
   393  	for(n = 1; n < nproc; n++) {  // one M is currently running
   394  		if(runtime·allp[pos]->mcache == m->mcache)
   395  			pos++;
   396  		mp = mget();
   397  		if(mp == nil)
   398  			runtime·throw("runtime·gcprocs inconsistency");
   399  		mp->helpgc = n;
   400  		mp->mcache = runtime·allp[pos]->mcache;
   401  		pos++;
   402  		runtime·notewakeup(&mp->park);
   403  	}
   404  	runtime·unlock(&runtime·sched);
   405  }
   406  
   407  // Similar to stoptheworld but best-effort and can be called several times.
   408  // There is no reverse operation, used during crashing.
   409  // This function must not lock any mutexes.
   410  void
   411  runtime·freezetheworld(void)
   412  {
   413  	int32 i;
   414  
   415  	if(runtime·gomaxprocs == 1)
   416  		return;
   417  	// stopwait and preemption requests can be lost
   418  	// due to races with concurrently executing threads,
   419  	// so try several times
   420  	for(i = 0; i < 5; i++) {
   421  		// this should tell the scheduler to not start any new goroutines
   422  		runtime·sched.stopwait = 0x7fffffff;
   423  		runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
   424  		// this should stop running goroutines
   425  		if(!preemptall())
   426  			break;  // no running goroutines
   427  		runtime·usleep(1000);
   428  	}
   429  	// to be sure
   430  	runtime·usleep(1000);
   431  	preemptall();
   432  	runtime·usleep(1000);
   433  }
   434  
   435  void
   436  runtime·stoptheworld(void)
   437  {
   438  	int32 i;
   439  	uint32 s;
   440  	P *p;
   441  	bool wait;
   442  
   443  	runtime·lock(&runtime·sched);
   444  	runtime·sched.stopwait = runtime·gomaxprocs;
   445  	runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
   446  	preemptall();
   447  	// stop current P
   448  	m->p->status = Pgcstop;
   449  	runtime·sched.stopwait--;
   450  	// try to retake all P's in Psyscall status
   451  	for(i = 0; i < runtime·gomaxprocs; i++) {
   452  		p = runtime·allp[i];
   453  		s = p->status;
   454  		if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
   455  			runtime·sched.stopwait--;
   456  	}
   457  	// stop idle P's
   458  	while(p = pidleget()) {
   459  		p->status = Pgcstop;
   460  		runtime·sched.stopwait--;
   461  	}
   462  	wait = runtime·sched.stopwait > 0;
   463  	runtime·unlock(&runtime·sched);
   464  
   465  	// wait for remaining P's to stop voluntarily
   466  	if(wait) {
   467  		for(;;) {
   468  			// wait for 100us, then try to re-preempt in case of any races
   469  			if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
   470  				runtime·noteclear(&runtime·sched.stopnote);
   471  				break;
   472  			}
   473  			preemptall();
   474  		}
   475  	}
   476  	if(runtime·sched.stopwait)
   477  		runtime·throw("stoptheworld: not stopped");
   478  	for(i = 0; i < runtime·gomaxprocs; i++) {
   479  		p = runtime·allp[i];
   480  		if(p->status != Pgcstop)
   481  			runtime·throw("stoptheworld: not stopped");
   482  	}
   483  }
   484  
   485  static void
   486  mhelpgc(void)
   487  {
   488  	m->helpgc = -1;
   489  }
   490  
   491  void
   492  runtime·starttheworld(void)
   493  {
   494  	P *p, *p1;
   495  	M *mp;
   496  	G *gp;
   497  	bool add;
   498  
   499  	m->locks++;  // disable preemption because it can be holding p in a local var
   500  	gp = runtime·netpoll(false);  // non-blocking
   501  	injectglist(gp);
   502  	add = needaddgcproc();
   503  	runtime·lock(&runtime·sched);
   504  	if(newprocs) {
   505  		procresize(newprocs);
   506  		newprocs = 0;
   507  	} else
   508  		procresize(runtime·gomaxprocs);
   509  	runtime·sched.gcwaiting = 0;
   510  
   511  	p1 = nil;
   512  	while(p = pidleget()) {
   513  		// procresize() puts p's with work at the beginning of the list.
   514  		// Once we reach a p without a run queue, the rest don't have one either.
   515  		if(p->runqhead == p->runqtail) {
   516  			pidleput(p);
   517  			break;
   518  		}
   519  		p->m = mget();
   520  		p->link = p1;
   521  		p1 = p;
   522  	}
   523  	if(runtime·sched.sysmonwait) {
   524  		runtime·sched.sysmonwait = false;
   525  		runtime·notewakeup(&runtime·sched.sysmonnote);
   526  	}
   527  	runtime·unlock(&runtime·sched);
   528  
   529  	while(p1) {
   530  		p = p1;
   531  		p1 = p1->link;
   532  		if(p->m) {
   533  			mp = p->m;
   534  			p->m = nil;
   535  			if(mp->nextp)
   536  				runtime·throw("starttheworld: inconsistent mp->nextp");
   537  			mp->nextp = p;
   538  			runtime·notewakeup(&mp->park);
   539  		} else {
   540  			// Start M to run P.  Do not start another M below.
   541  			newm(nil, p);
   542  			add = false;
   543  		}
   544  	}
   545  
   546  	if(add) {
   547  		// If GC could have used another helper proc, start one now,
   548  		// in the hope that it will be available next time.
   549  		// It would have been even better to start it before the collection,
   550  		// but doing so requires allocating memory, so it's tricky to
   551  		// coordinate.  This lazy approach works out in practice:
   552  		// we don't mind if the first couple gc rounds don't have quite
   553  		// the maximum number of procs.
   554  		newm(mhelpgc, nil);
   555  	}
   556  	m->locks--;
   557  	if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   558  		g->stackguard0 = StackPreempt;
   559  }
   560  
   561  // Called to start an M.
   562  void
   563  runtime·mstart(void)
   564  {
   565  #ifdef GOOS_windows
   566  #ifdef GOARCH_386
   567  	// It is used by windows-386 only. Unfortunately, seh needs
   568  	// to be located on os stack, and mstart runs on os stack
   569  	// for both m0 and m.
   570  	SEH seh;
   571  #endif
   572  #endif
   573  
   574  	if(g != m->g0)
   575  		runtime·throw("bad runtime·mstart");
   576  
   577  	// Record top of stack for use by mcall.
   578  	// Once we call schedule we're never coming back,
   579  	// so other calls can reuse this stack space.
   580  	runtime·gosave(&m->g0->sched);
   581  	m->g0->sched.pc = (uintptr)-1;  // make sure it is never used
   582  	m->g0->stackguard = m->g0->stackguard0;  // cgo sets only stackguard0, copy it to stackguard
   583  #ifdef GOOS_windows
   584  #ifdef GOARCH_386
   585  	m->seh = &seh;
   586  #endif
   587  #endif
   588  	runtime·asminit();
   589  	runtime·minit();
   590  
   591  	// Install signal handlers; after minit so that minit can
   592  	// prepare the thread to be able to handle the signals.
   593  	if(m == &runtime·m0)
   594  		runtime·initsig();
   595  	
   596  	if(m->mstartfn)
   597  		m->mstartfn();
   598  
   599  	if(m->helpgc) {
   600  		m->helpgc = 0;
   601  		stopm();
   602  	} else if(m != &runtime·m0) {
   603  		acquirep(m->nextp);
   604  		m->nextp = nil;
   605  	}
   606  	schedule();
   607  
   608  	// TODO(brainman): This point is never reached, because scheduler
   609  	// does not release os threads at the moment. But once this path
   610  	// is enabled, we must remove our seh here.
   611  }
   612  
   613  // When running with cgo, we call _cgo_thread_start
   614  // to start threads for us so that we can play nicely with
   615  // foreign code.
   616  void (*_cgo_thread_start)(void*);
   617  
   618  typedef struct CgoThreadStart CgoThreadStart;
   619  struct CgoThreadStart
   620  {
   621  	M *m;
   622  	G *g;
   623  	void (*fn)(void);
   624  };
   625  
   626  // Allocate a new m unassociated with any thread.
   627  // Can use p for allocation context if needed.
   628  M*
   629  runtime·allocm(P *p)
   630  {
   631  	M *mp;
   632  	static Type *mtype;  // The Go type M
   633  
   634  	m->locks++;  // disable GC because it can be called from sysmon
   635  	if(m->p == nil)
   636  		acquirep(p);  // temporarily borrow p for mallocs in this function
   637  	if(mtype == nil) {
   638  		Eface e;
   639  		runtime·gc_m_ptr(&e);
   640  		mtype = ((PtrType*)e.type)->elem;
   641  	}
   642  
   643  	mp = runtime·cnew(mtype);
   644  	mcommoninit(mp);
   645  
   646  	// In case of cgo, pthread_create will make us a stack.
   647  	// Windows will layout sched stack on OS stack.
   648  	if(runtime·iscgo || Windows)
   649  		mp->g0 = runtime·malg(-1);
   650  	else
   651  		mp->g0 = runtime·malg(8192);
   652  
   653  	if(p == m->p)
   654  		releasep();
   655  	m->locks--;
   656  	if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
   657  		g->stackguard0 = StackPreempt;
   658  
   659  	return mp;
   660  }
   661  
   662  static M* lockextra(bool nilokay);
   663  static void unlockextra(M*);
   664  
   665  // needm is called when a cgo callback happens on a
   666  // thread without an m (a thread not created by Go).
   667  // In this case, needm is expected to find an m to use
   668  // and return with m, g initialized correctly.
   669  // Since m and g are not set now (likely nil, but see below)
   670  // needm is limited in what routines it can call. In particular
   671  // it can only call nosplit functions (textflag 7) and cannot
   672  // do any scheduling that requires an m.
   673  //
   674  // In order to avoid needing heavy lifting here, we adopt
   675  // the following strategy: there is a stack of available m's
   676  // that can be stolen. Using compare-and-swap
   677  // to pop from the stack has ABA races, so we simulate
   678  // a lock by doing an exchange (via casp) to steal the stack
   679  // head and replace the top pointer with MLOCKED (1).
   680  // This serves as a simple spin lock that we can use even
   681  // without an m. The thread that locks the stack in this way
   682  // unlocks the stack by storing a valid stack head pointer.
   683  //
   684  // In order to make sure that there is always an m structure
   685  // available to be stolen, we maintain the invariant that there
   686  // is always one more than needed. At the beginning of the
   687  // program (if cgo is in use) the list is seeded with a single m.
   688  // If needm finds that it has taken the last m off the list, its job
   689  // is - once it has installed its own m so that it can do things like
   690  // allocate memory - to create a spare m and put it on the list.
   691  //
   692  // Each of these extra m's also has a g0 and a curg that are
   693  // pressed into service as the scheduling stack and current
   694  // goroutine for the duration of the cgo callback.
   695  //
   696  // When the callback is done with the m, it calls dropm to
   697  // put the m back on the list.
   698  #pragma textflag NOSPLIT
   699  void
   700  runtime·needm(byte x)
   701  {
   702  	M *mp;
   703  
   704  	if(runtime·needextram) {
   705  		// Can happen if C/C++ code calls Go from a global ctor.
   706  		// Can not throw, because scheduler is not initialized yet.
   707  		runtime·write(2, "fatal error: cgo callback before cgo call\n",
   708  			sizeof("fatal error: cgo callback before cgo call\n")-1);
   709  		runtime·exit(1);
   710  	}
   711  
   712  	// Lock extra list, take head, unlock popped list.
   713  	// nilokay=false is safe here because of the invariant above,
   714  	// that the extra list always contains or will soon contain
   715  	// at least one m.
   716  	mp = lockextra(false);
   717  
   718  	// Set needextram when we've just emptied the list,
   719  	// so that the eventual call into cgocallbackg will
   720  	// allocate a new m for the extra list. We delay the
   721  	// allocation until then so that it can be done
   722  	// after exitsyscall makes sure it is okay to be
   723  	// running at all (that is, there's no garbage collection
   724  	// running right now).
   725  	mp->needextram = mp->schedlink == nil;
   726  	unlockextra(mp->schedlink);
   727  
   728  	// Install m and g (= m->g0) and set the stack bounds
   729  	// to match the current stack. We don't actually know
   730  	// how big the stack is, like we don't know how big any
   731  	// scheduling stack is, but we assume there's at least 32 kB,
   732  	// which is more than enough for us.
   733  	runtime·setmg(mp, mp->g0);
   734  	g->stackbase = (uintptr)(&x + 1024);
   735  	g->stackguard = (uintptr)(&x - 32*1024);
   736  	g->stackguard0 = g->stackguard;
   737  
   738  #ifdef GOOS_windows
   739  #ifdef GOARCH_386
   740  	// On windows/386, we need to put an SEH frame (two words)
   741  	// somewhere on the current stack. We are called from cgocallback_gofunc
   742  	// and we know that it will leave two unused words below m->curg->sched.sp.
   743  	// Use those.
   744  	m->seh = (SEH*)((uintptr*)&x + 1);
   745  #endif
   746  #endif
   747  
   748  	// Initialize this thread to use the m.
   749  	runtime·asminit();
   750  	runtime·minit();
   751  }
   752  
   753  // newextram allocates an m and puts it on the extra list.
   754  // It is called with a working local m, so that it can do things
   755  // like call schedlock and allocate.
   756  void
   757  runtime·newextram(void)
   758  {
   759  	M *mp, *mnext;
   760  	G *gp;
   761  
   762  	// Create extra goroutine locked to extra m.
   763  	// The goroutine is the context in which the cgo callback will run.
   764  	// The sched.pc will never be returned to, but setting it to
   765  	// runtime.goexit makes clear to the traceback routines where
   766  	// the goroutine stack ends.
   767  	mp = runtime·allocm(nil);
   768  	gp = runtime·malg(4096);
   769  	gp->sched.pc = (uintptr)runtime·goexit;
   770  	gp->sched.sp = gp->stackbase;
   771  	gp->sched.lr = 0;
   772  	gp->sched.g = gp;
   773  	gp->syscallpc = gp->sched.pc;
   774  	gp->syscallsp = gp->sched.sp;
   775  	gp->syscallstack = gp->stackbase;
   776  	gp->syscallguard = gp->stackguard;
   777  	gp->status = Gsyscall;
   778  	mp->curg = gp;
   779  	mp->locked = LockInternal;
   780  	mp->lockedg = gp;
   781  	gp->lockedm = mp;
   782  	gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
   783  	if(raceenabled)
   784  		gp->racectx = runtime·racegostart(runtime·newextram);
   785  	// put on allg for garbage collector
   786  	runtime·lock(&runtime·sched);
   787  	if(runtime·lastg == nil)
   788  		runtime·allg = gp;
   789  	else
   790  		runtime·lastg->alllink = gp;
   791  	runtime·lastg = gp;
   792  	runtime·unlock(&runtime·sched);
   793  
   794  	// Add m to the extra list.
   795  	mnext = lockextra(true);
   796  	mp->schedlink = mnext;
   797  	unlockextra(mp);
   798  }
   799  
   800  // dropm is called when a cgo callback has called needm but is now
   801  // done with the callback and returning back into the non-Go thread.
   802  // It puts the current m back onto the extra list.
   803  //
   804  // The main expense here is the call to signalstack to release the
   805  // m's signal stack, and then the call to needm on the next callback
   806  // from this thread. It is tempting to try to save the m for next time,
   807  // which would eliminate both these costs, but there might not be
   808  // a next time: the current thread (which Go does not control) might exit.
   809  // If we saved the m for that thread, there would be an m leak each time
   810  // such a thread exited. Instead, we acquire and release an m on each
   811  // call. These should typically not be scheduling operations, just a few
   812  // atomics, so the cost should be small.
   813  //
   814  // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
   815  // variable using pthread_key_create. Unlike the pthread keys we already use
   816  // on OS X, this dummy key would never be read by Go code. It would exist
   817  // only so that we could register at thread-exit-time destructor.
   818  // That destructor would put the m back onto the extra list.
   819  // This is purely a performance optimization. The current version,
   820  // in which dropm happens on each cgo call, is still correct too.
   821  // We may have to keep the current version on systems with cgo
   822  // but without pthreads, like Windows.
   823  void
   824  runtime·dropm(void)
   825  {
   826  	M *mp, *mnext;
   827  
   828  	// Undo whatever initialization minit did during needm.
   829  	runtime·unminit();
   830  
   831  #ifdef GOOS_windows
   832  #ifdef GOARCH_386
   833  	m->seh = nil;  // reset dangling typed pointer
   834  #endif
   835  #endif
   836  
   837  	// Clear m and g, and return m to the extra list.
   838  	// After the call to setmg we can only call nosplit functions.
   839  	mp = m;
   840  	runtime·setmg(nil, nil);
   841  
   842  	mnext = lockextra(true);
   843  	mp->schedlink = mnext;
   844  	unlockextra(mp);
   845  }
   846  
   847  #define MLOCKED ((M*)1)
   848  
   849  // lockextra locks the extra list and returns the list head.
   850  // The caller must unlock the list by storing a new list head
   851  // to runtime.extram. If nilokay is true, then lockextra will
   852  // return a nil list head if that's what it finds. If nilokay is false,
   853  // lockextra will keep waiting until the list head is no longer nil.
   854  #pragma textflag NOSPLIT
   855  static M*
   856  lockextra(bool nilokay)
   857  {
   858  	M *mp;
   859  	void (*yield)(void);
   860  
   861  	for(;;) {
   862  		mp = runtime·atomicloadp(&runtime·extram);
   863  		if(mp == MLOCKED) {
   864  			yield = runtime·osyield;
   865  			yield();
   866  			continue;
   867  		}
   868  		if(mp == nil && !nilokay) {
   869  			runtime·usleep(1);
   870  			continue;
   871  		}
   872  		if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
   873  			yield = runtime·osyield;
   874  			yield();
   875  			continue;
   876  		}
   877  		break;
   878  	}
   879  	return mp;
   880  }
   881  
   882  #pragma textflag NOSPLIT
   883  static void
   884  unlockextra(M *mp)
   885  {
   886  	runtime·atomicstorep(&runtime·extram, mp);
   887  }
   888  
   889  
   890  // Create a new m.  It will start off with a call to fn, or else the scheduler.
   891  static void
   892  newm(void(*fn)(void), P *p)
   893  {
   894  	M *mp;
   895  
   896  	mp = runtime·allocm(p);
   897  	mp->nextp = p;
   898  	mp->mstartfn = fn;
   899  
   900  	if(runtime·iscgo) {
   901  		CgoThreadStart ts;
   902  
   903  		if(_cgo_thread_start == nil)
   904  			runtime·throw("_cgo_thread_start missing");
   905  		ts.m = mp;
   906  		ts.g = mp->g0;
   907  		ts.fn = runtime·mstart;
   908  		runtime·asmcgocall(_cgo_thread_start, &ts);
   909  		return;
   910  	}
   911  	runtime·newosproc(mp, (byte*)mp->g0->stackbase);
   912  }
   913  
   914  // Stops execution of the current m until new work is available.
   915  // Returns with acquired P.
   916  static void
   917  stopm(void)
   918  {
   919  	if(m->locks)
   920  		runtime·throw("stopm holding locks");
   921  	if(m->p)
   922  		runtime·throw("stopm holding p");
   923  	if(m->spinning) {
   924  		m->spinning = false;
   925  		runtime·xadd(&runtime·sched.nmspinning, -1);
   926  	}
   927  
   928  retry:
   929  	runtime·lock(&runtime·sched);
   930  	mput(m);
   931  	runtime·unlock(&runtime·sched);
   932  	runtime·notesleep(&m->park);
   933  	runtime·noteclear(&m->park);
   934  	if(m->helpgc) {
   935  		runtime·gchelper();
   936  		m->helpgc = 0;
   937  		m->mcache = nil;
   938  		goto retry;
   939  	}
   940  	acquirep(m->nextp);
   941  	m->nextp = nil;
   942  }
   943  
   944  static void
   945  mspinning(void)
   946  {
   947  	m->spinning = true;
   948  }
   949  
   950  // Schedules some M to run the p (creates an M if necessary).
   951  // If p==nil, tries to get an idle P, if no idle P's returns false.
   952  static void
   953  startm(P *p, bool spinning)
   954  {
   955  	M *mp;
   956  	void (*fn)(void);
   957  
   958  	runtime·lock(&runtime·sched);
   959  	if(p == nil) {
   960  		p = pidleget();
   961  		if(p == nil) {
   962  			runtime·unlock(&runtime·sched);
   963  			if(spinning)
   964  				runtime·xadd(&runtime·sched.nmspinning, -1);
   965  			return;
   966  		}
   967  	}
   968  	mp = mget();
   969  	runtime·unlock(&runtime·sched);
   970  	if(mp == nil) {
   971  		fn = nil;
   972  		if(spinning)
   973  			fn = mspinning;
   974  		newm(fn, p);
   975  		return;
   976  	}
   977  	if(mp->spinning)
   978  		runtime·throw("startm: m is spinning");
   979  	if(mp->nextp)
   980  		runtime·throw("startm: m has p");
   981  	mp->spinning = spinning;
   982  	mp->nextp = p;
   983  	runtime·notewakeup(&mp->park);
   984  }
   985  
   986  // Hands off P from syscall or locked M.
   987  static void
   988  handoffp(P *p)
   989  {
   990  	// if it has local work, start it straight away
   991  	if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
   992  		startm(p, false);
   993  		return;
   994  	}
   995  	// no local work, check that there are no spinning/idle M's,
   996  	// otherwise our help is not required
   997  	if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
   998  		runtime·cas(&runtime·sched.nmspinning, 0, 1)) {
   999  		startm(p, true);
  1000  		return;
  1001  	}
  1002  	runtime·lock(&runtime·sched);
  1003  	if(runtime·sched.gcwaiting) {
  1004  		p->status = Pgcstop;
  1005  		if(--runtime·sched.stopwait == 0)
  1006  			runtime·notewakeup(&runtime·sched.stopnote);
  1007  		runtime·unlock(&runtime·sched);
  1008  		return;
  1009  	}
  1010  	if(runtime·sched.runqsize) {
  1011  		runtime·unlock(&runtime·sched);
  1012  		startm(p, false);
  1013  		return;
  1014  	}
  1015  	// If this is the last running P and nobody is polling network,
  1016  	// need to wakeup another M to poll network.
  1017  	if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
  1018  		runtime·unlock(&runtime·sched);
  1019  		startm(p, false);
  1020  		return;
  1021  	}
  1022  	pidleput(p);
  1023  	runtime·unlock(&runtime·sched);
  1024  }
  1025  
  1026  // Tries to add one more P to execute G's.
  1027  // Called when a G is made runnable (newproc, ready).
  1028  static void
  1029  wakep(void)
  1030  {
  1031  	// be conservative about spinning threads
  1032  	if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
  1033  		return;
  1034  	startm(nil, true);
  1035  }
  1036  
  1037  // Stops execution of the current m that is locked to a g until the g is runnable again.
  1038  // Returns with acquired P.
  1039  static void
  1040  stoplockedm(void)
  1041  {
  1042  	P *p;
  1043  
  1044  	if(m->lockedg == nil || m->lockedg->lockedm != m)
  1045  		runtime·throw("stoplockedm: inconsistent locking");
  1046  	if(m->p) {
  1047  		// Schedule another M to run this p.
  1048  		p = releasep();
  1049  		handoffp(p);
  1050  	}
  1051  	incidlelocked(1);
  1052  	// Wait until another thread schedules lockedg again.
  1053  	runtime·notesleep(&m->park);
  1054  	runtime·noteclear(&m->park);
  1055  	if(m->lockedg->status != Grunnable)
  1056  		runtime·throw("stoplockedm: not runnable");
  1057  	acquirep(m->nextp);
  1058  	m->nextp = nil;
  1059  }
  1060  
  1061  // Schedules the locked m to run the locked gp.
  1062  static void
  1063  startlockedm(G *gp)
  1064  {
  1065  	M *mp;
  1066  	P *p;
  1067  
  1068  	mp = gp->lockedm;
  1069  	if(mp == m)
  1070  		runtime·throw("startlockedm: locked to me");
  1071  	if(mp->nextp)
  1072  		runtime·throw("startlockedm: m has p");
  1073  	// directly handoff current P to the locked m
  1074  	incidlelocked(-1);
  1075  	p = releasep();
  1076  	mp->nextp = p;
  1077  	runtime·notewakeup(&mp->park);
  1078  	stopm();
  1079  }
  1080  
  1081  // Stops the current m for stoptheworld.
  1082  // Returns when the world is restarted.
  1083  static void
  1084  gcstopm(void)
  1085  {
  1086  	P *p;
  1087  
  1088  	if(!runtime·sched.gcwaiting)
  1089  		runtime·throw("gcstopm: not waiting for gc");
  1090  	if(m->spinning) {
  1091  		m->spinning = false;
  1092  		runtime·xadd(&runtime·sched.nmspinning, -1);
  1093  	}
  1094  	p = releasep();
  1095  	runtime·lock(&runtime·sched);
  1096  	p->status = Pgcstop;
  1097  	if(--runtime·sched.stopwait == 0)
  1098  		runtime·notewakeup(&runtime·sched.stopnote);
  1099  	runtime·unlock(&runtime·sched);
  1100  	stopm();
  1101  }
  1102  
  1103  // Schedules gp to run on the current M.
  1104  // Never returns.
  1105  static void
  1106  execute(G *gp)
  1107  {
  1108  	int32 hz;
  1109  
  1110  	if(gp->status != Grunnable) {
  1111  		runtime·printf("execute: bad g status %d\n", gp->status);
  1112  		runtime·throw("execute: bad g status");
  1113  	}
  1114  	gp->status = Grunning;
  1115  	gp->preempt = false;
  1116  	gp->stackguard0 = gp->stackguard;
  1117  	m->p->schedtick++;
  1118  	m->curg = gp;
  1119  	gp->m = m;
  1120  
  1121  	// Check whether the profiler needs to be turned on or off.
  1122  	hz = runtime·sched.profilehz;
  1123  	if(m->profilehz != hz)
  1124  		runtime·resetcpuprofiler(hz);
  1125  
  1126  	runtime·gogo(&gp->sched);
  1127  }
  1128  
  1129  // Finds a runnable goroutine to execute.
  1130  // Tries to steal from other P's, get g from global queue, poll network.
  1131  static G*
  1132  findrunnable(void)
  1133  {
  1134  	G *gp;
  1135  	P *p;
  1136  	int32 i;
  1137  
  1138  top:
  1139  	if(runtime·sched.gcwaiting) {
  1140  		gcstopm();
  1141  		goto top;
  1142  	}
  1143  	// local runq
  1144  	gp = runqget(m->p);
  1145  	if(gp)
  1146  		return gp;
  1147  	// global runq
  1148  	if(runtime·sched.runqsize) {
  1149  		runtime·lock(&runtime·sched);
  1150  		gp = globrunqget(m->p, 0);
  1151  		runtime·unlock(&runtime·sched);
  1152  		if(gp)
  1153  			return gp;
  1154  	}
  1155  	// poll network
  1156  	gp = runtime·netpoll(false);  // non-blocking
  1157  	if(gp) {
  1158  		injectglist(gp->schedlink);
  1159  		gp->status = Grunnable;
  1160  		return gp;
  1161  	}
  1162  	// If number of spinning M's >= number of busy P's, block.
  1163  	// This is necessary to prevent excessive CPU consumption
  1164  	// when GOMAXPROCS>>1 but the program parallelism is low.
  1165  	if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
  1166  		goto stop;
  1167  	if(!m->spinning) {
  1168  		m->spinning = true;
  1169  		runtime·xadd(&runtime·sched.nmspinning, 1);
  1170  	}
  1171  	// random steal from other P's
  1172  	for(i = 0; i < 2*runtime·gomaxprocs; i++) {
  1173  		if(runtime·sched.gcwaiting)
  1174  			goto top;
  1175  		p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
  1176  		if(p == m->p)
  1177  			gp = runqget(p);
  1178  		else
  1179  			gp = runqsteal(m->p, p);
  1180  		if(gp)
  1181  			return gp;
  1182  	}
  1183  stop:
  1184  	// return P and block
  1185  	runtime·lock(&runtime·sched);
  1186  	if(runtime·sched.gcwaiting) {
  1187  		runtime·unlock(&runtime·sched);
  1188  		goto top;
  1189  	}
  1190  	if(runtime·sched.runqsize) {
  1191  		gp = globrunqget(m->p, 0);
  1192  		runtime·unlock(&runtime·sched);
  1193  		return gp;
  1194  	}
  1195  	p = releasep();
  1196  	pidleput(p);
  1197  	runtime·unlock(&runtime·sched);
  1198  	if(m->spinning) {
  1199  		m->spinning = false;
  1200  		runtime·xadd(&runtime·sched.nmspinning, -1);
  1201  	}
  1202  	// check all runqueues once again
  1203  	for(i = 0; i < runtime·gomaxprocs; i++) {
  1204  		p = runtime·allp[i];
  1205  		if(p && p->runqhead != p->runqtail) {
  1206  			runtime·lock(&runtime·sched);
  1207  			p = pidleget();
  1208  			runtime·unlock(&runtime·sched);
  1209  			if(p) {
  1210  				acquirep(p);
  1211  				goto top;
  1212  			}
  1213  			break;
  1214  		}
  1215  	}
  1216  	// poll network
  1217  	if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
  1218  		if(m->p)
  1219  			runtime·throw("findrunnable: netpoll with p");
  1220  		if(m->spinning)
  1221  			runtime·throw("findrunnable: netpoll with spinning");
  1222  		gp = runtime·netpoll(true);  // block until new work is available
  1223  		runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
  1224  		if(gp) {
  1225  			runtime·lock(&runtime·sched);
  1226  			p = pidleget();
  1227  			runtime·unlock(&runtime·sched);
  1228  			if(p) {
  1229  				acquirep(p);
  1230  				injectglist(gp->schedlink);
  1231  				gp->status = Grunnable;
  1232  				return gp;
  1233  			}
  1234  			injectglist(gp);
  1235  		}
  1236  	}
  1237  	stopm();
  1238  	goto top;
  1239  }
  1240  
  1241  static void
  1242  resetspinning(void)
  1243  {
  1244  	int32 nmspinning;
  1245  
  1246  	if(m->spinning) {
  1247  		m->spinning = false;
  1248  		nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
  1249  		if(nmspinning < 0)
  1250  			runtime·throw("findrunnable: negative nmspinning");
  1251  	} else
  1252  		nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
  1253  
  1254  	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
  1255  	// so see if we need to wakeup another P here.
  1256  	if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
  1257  		wakep();
  1258  }
  1259  
  1260  // Injects the list of runnable G's into the scheduler.
  1261  // Can run concurrently with GC.
  1262  static void
  1263  injectglist(G *glist)
  1264  {
  1265  	int32 n;
  1266  	G *gp;
  1267  
  1268  	if(glist == nil)
  1269  		return;
  1270  	runtime·lock(&runtime·sched);
  1271  	for(n = 0; glist; n++) {
  1272  		gp = glist;
  1273  		glist = gp->schedlink;
  1274  		gp->status = Grunnable;
  1275  		globrunqput(gp);
  1276  	}
  1277  	runtime·unlock(&runtime·sched);
  1278  
  1279  	for(; n && runtime·sched.npidle; n--)
  1280  		startm(nil, false);
  1281  }
  1282  
  1283  // One round of scheduler: find a runnable goroutine and execute it.
  1284  // Never returns.
  1285  static void
  1286  schedule(void)
  1287  {
  1288  	G *gp;
  1289  	uint32 tick;
  1290  
  1291  	if(m->locks)
  1292  		runtime·throw("schedule: holding locks");
  1293  
  1294  top:
  1295  	if(runtime·sched.gcwaiting) {
  1296  		gcstopm();
  1297  		goto top;
  1298  	}
  1299  
  1300  	gp = nil;
  1301  	// Check the global runnable queue once in a while to ensure fairness.
  1302  	// Otherwise two goroutines can completely occupy the local runqueue
  1303  	// by constantly respawning each other.
  1304  	tick = m->p->schedtick;
  1305  	// This is a fancy way to say tick%61==0,
  1306  	// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
  1307  	if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
  1308  		runtime·lock(&runtime·sched);
  1309  		gp = globrunqget(m->p, 1);
  1310  		runtime·unlock(&runtime·sched);
  1311  		if(gp)
  1312  			resetspinning();
  1313  	}
  1314  	if(gp == nil) {
  1315  		gp = runqget(m->p);
  1316  		if(gp && m->spinning)
  1317  			runtime·throw("schedule: spinning with local work");
  1318  	}
  1319  	if(gp == nil) {
  1320  		gp = findrunnable();  // blocks until work is available
  1321  		resetspinning();
  1322  	}
  1323  
  1324  	if(gp->lockedm) {
  1325  		// Hands off own p to the locked m,
  1326  		// then blocks waiting for a new p.
  1327  		startlockedm(gp);
  1328  		goto top;
  1329  	}
  1330  
  1331  	execute(gp);
  1332  }
  1333  
  1334  // Puts the current goroutine into a waiting state and unlocks the lock.
  1335  // The goroutine can be made runnable again by calling runtime·ready(gp).
  1336  void
  1337  runtime·park(void(*unlockf)(Lock*), Lock *lock, int8 *reason)
  1338  {
  1339  	m->waitlock = lock;
  1340  	m->waitunlockf = unlockf;
  1341  	g->waitreason = reason;
  1342  	runtime·mcall(park0);
  1343  }
  1344  
  1345  // runtime·park continuation on g0.
  1346  static void
  1347  park0(G *gp)
  1348  {
  1349  	gp->status = Gwaiting;
  1350  	gp->m = nil;
  1351  	m->curg = nil;
  1352  	if(m->waitunlockf) {
  1353  		m->waitunlockf(m->waitlock);
  1354  		m->waitunlockf = nil;
  1355  		m->waitlock = nil;
  1356  	}
  1357  	if(m->lockedg) {
  1358  		stoplockedm();
  1359  		execute(gp);  // Never returns.
  1360  	}
  1361  	schedule();
  1362  }
  1363  
  1364  // Scheduler yield.
  1365  void
  1366  runtime·gosched(void)
  1367  {
  1368  	runtime·mcall(runtime·gosched0);
  1369  }
  1370  
  1371  // runtime·gosched continuation on g0.
  1372  void
  1373  runtime·gosched0(G *gp)
  1374  {
  1375  	gp->status = Grunnable;
  1376  	gp->m = nil;
  1377  	m->curg = nil;
  1378  	runtime·lock(&runtime·sched);
  1379  	globrunqput(gp);
  1380  	runtime·unlock(&runtime·sched);
  1381  	if(m->lockedg) {
  1382  		stoplockedm();
  1383  		execute(gp);  // Never returns.
  1384  	}
  1385  	schedule();
  1386  }
  1387  
  1388  // Finishes execution of the current goroutine.
  1389  // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack).
  1390  // Since it does not return it does not matter.  But if it is preempted
  1391  // at the split stack check, GC will complain about inconsistent sp.
  1392  #pragma textflag NOSPLIT
  1393  void
  1394  runtime·goexit(void)
  1395  {
  1396  	if(raceenabled)
  1397  		runtime·racegoend();
  1398  	runtime·mcall(goexit0);
  1399  }
  1400  
  1401  // runtime·goexit continuation on g0.
  1402  static void
  1403  goexit0(G *gp)
  1404  {
  1405  	gp->status = Gdead;
  1406  	gp->m = nil;
  1407  	gp->lockedm = nil;
  1408  	m->curg = nil;
  1409  	m->lockedg = nil;
  1410  	if(m->locked & ~LockExternal) {
  1411  		runtime·printf("invalid m->locked = %d\n", m->locked);
  1412  		runtime·throw("internal lockOSThread error");
  1413  	}	
  1414  	m->locked = 0;
  1415  	runtime·unwindstack(gp, nil);
  1416  	gfput(m->p, gp);
  1417  	schedule();
  1418  }
  1419  
  1420  #pragma textflag NOSPLIT
  1421  static void
  1422  save(void *pc, uintptr sp)
  1423  {
  1424  	g->sched.pc = (uintptr)pc;
  1425  	g->sched.sp = sp;
  1426  	g->sched.lr = 0;
  1427  	g->sched.ret = 0;
  1428  	g->sched.ctxt = 0;
  1429  	g->sched.g = g;
  1430  }
  1431  
  1432  // The goroutine g is about to enter a system call.
  1433  // Record that it's not using the cpu anymore.
  1434  // This is called only from the go syscall library and cgocall,
  1435  // not from the low-level system calls used by the runtime.
  1436  //
  1437  // Entersyscall cannot split the stack: the runtime·gosave must
  1438  // make g->sched refer to the caller's stack segment, because
  1439  // entersyscall is going to return immediately after.
  1440  #pragma textflag NOSPLIT
  1441  void
  1442  ·entersyscall(int32 dummy)
  1443  {
  1444  	// Disable preemption because during this function g is in Gsyscall status,
  1445  	// but can have inconsistent g->sched, do not let GC observe it.
  1446  	m->locks++;
  1447  
  1448  	// Leave SP around for GC and traceback.
  1449  	save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1450  	g->syscallsp = g->sched.sp;
  1451  	g->syscallpc = g->sched.pc;
  1452  	g->syscallstack = g->stackbase;
  1453  	g->syscallguard = g->stackguard;
  1454  	g->status = Gsyscall;
  1455  	if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
  1456  		// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1457  		//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
  1458  		runtime·throw("entersyscall");
  1459  	}
  1460  
  1461  	if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
  1462  		runtime·lock(&runtime·sched);
  1463  		if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1464  			runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1465  			runtime·notewakeup(&runtime·sched.sysmonnote);
  1466  		}
  1467  		runtime·unlock(&runtime·sched);
  1468  		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1469  	}
  1470  
  1471  	m->mcache = nil;
  1472  	m->p->m = nil;
  1473  	runtime·atomicstore(&m->p->status, Psyscall);
  1474  	if(runtime·sched.gcwaiting) {
  1475  		runtime·lock(&runtime·sched);
  1476  		if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) {
  1477  			if(--runtime·sched.stopwait == 0)
  1478  				runtime·notewakeup(&runtime·sched.stopnote);
  1479  		}
  1480  		runtime·unlock(&runtime·sched);
  1481  		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1482  	}
  1483  
  1484  	// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
  1485  	// We set stackguard to StackPreempt so that first split stack check calls morestack.
  1486  	// Morestack detects this case and throws.
  1487  	g->stackguard0 = StackPreempt;
  1488  	m->locks--;
  1489  }
  1490  
  1491  // The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
  1492  #pragma textflag NOSPLIT
  1493  void
  1494  ·entersyscallblock(int32 dummy)
  1495  {
  1496  	P *p;
  1497  
  1498  	m->locks++;  // see comment in entersyscall
  1499  
  1500  	// Leave SP around for GC and traceback.
  1501  	save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1502  	g->syscallsp = g->sched.sp;
  1503  	g->syscallpc = g->sched.pc;
  1504  	g->syscallstack = g->stackbase;
  1505  	g->syscallguard = g->stackguard;
  1506  	g->status = Gsyscall;
  1507  	if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
  1508  		// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
  1509  		//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
  1510  		runtime·throw("entersyscallblock");
  1511  	}
  1512  
  1513  	p = releasep();
  1514  	handoffp(p);
  1515  	if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1516  		incidlelocked(1);
  1517  
  1518  	// Resave for traceback during blocked call.
  1519  	save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
  1520  
  1521  	g->stackguard0 = StackPreempt;  // see comment in entersyscall
  1522  	m->locks--;
  1523  }
  1524  
  1525  // The goroutine g exited its system call.
  1526  // Arrange for it to run on a cpu again.
  1527  // This is called only from the go syscall library, not
  1528  // from the low-level system calls used by the runtime.
  1529  #pragma textflag NOSPLIT
  1530  void
  1531  runtime·exitsyscall(void)
  1532  {
  1533  	m->locks++;  // see comment in entersyscall
  1534  
  1535  	if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
  1536  		incidlelocked(-1);
  1537  
  1538  	if(exitsyscallfast()) {
  1539  		// There's a cpu for us, so we can run.
  1540  		m->p->syscalltick++;
  1541  		g->status = Grunning;
  1542  		// Garbage collector isn't running (since we are),
  1543  		// so okay to clear gcstack and gcsp.
  1544  		g->syscallstack = (uintptr)nil;
  1545  		g->syscallsp = (uintptr)nil;
  1546  		m->locks--;
  1547  		if(g->preempt) {
  1548  			// restore the preemption request in case we've cleared it in newstack
  1549  			g->stackguard0 = StackPreempt;
  1550  		} else {
  1551  			// otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
  1552  			g->stackguard0 = g->stackguard;
  1553  		}
  1554  		return;
  1555  	}
  1556  
  1557  	m->locks--;
  1558  
  1559  	// Call the scheduler.
  1560  	runtime·mcall(exitsyscall0);
  1561  
  1562  	// Scheduler returned, so we're allowed to run now.
  1563  	// Delete the gcstack information that we left for
  1564  	// the garbage collector during the system call.
  1565  	// Must wait until now because until gosched returns
  1566  	// we don't know for sure that the garbage collector
  1567  	// is not running.
  1568  	g->syscallstack = (uintptr)nil;
  1569  	g->syscallsp = (uintptr)nil;
  1570  	m->p->syscalltick++;
  1571  }
  1572  
  1573  #pragma textflag NOSPLIT
  1574  static bool
  1575  exitsyscallfast(void)
  1576  {
  1577  	P *p;
  1578  
  1579  	// Freezetheworld sets stopwait but does not retake P's.
  1580  	if(runtime·sched.stopwait) {
  1581  		m->p = nil;
  1582  		return false;
  1583  	}
  1584  
  1585  	// Try to re-acquire the last P.
  1586  	if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
  1587  		// There's a cpu for us, so we can run.
  1588  		m->mcache = m->p->mcache;
  1589  		m->p->m = m;
  1590  		return true;
  1591  	}
  1592  	// Try to get any other idle P.
  1593  	m->p = nil;
  1594  	if(runtime·sched.pidle) {
  1595  		runtime·lock(&runtime·sched);
  1596  		p = pidleget();
  1597  		if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
  1598  			runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1599  			runtime·notewakeup(&runtime·sched.sysmonnote);
  1600  		}
  1601  		runtime·unlock(&runtime·sched);
  1602  		if(p) {
  1603  			acquirep(p);
  1604  			return true;
  1605  		}
  1606  	}
  1607  	return false;
  1608  }
  1609  
  1610  // runtime·exitsyscall slow path on g0.
  1611  // Failed to acquire P, enqueue gp as runnable.
  1612  static void
  1613  exitsyscall0(G *gp)
  1614  {
  1615  	P *p;
  1616  
  1617  	gp->status = Grunnable;
  1618  	gp->m = nil;
  1619  	m->curg = nil;
  1620  	runtime·lock(&runtime·sched);
  1621  	p = pidleget();
  1622  	if(p == nil)
  1623  		globrunqput(gp);
  1624  	else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
  1625  		runtime·atomicstore(&runtime·sched.sysmonwait, 0);
  1626  		runtime·notewakeup(&runtime·sched.sysmonnote);
  1627  	}
  1628  	runtime·unlock(&runtime·sched);
  1629  	if(p) {
  1630  		acquirep(p);
  1631  		execute(gp);  // Never returns.
  1632  	}
  1633  	if(m->lockedg) {
  1634  		// Wait until another thread schedules gp and so m again.
  1635  		stoplockedm();
  1636  		execute(gp);  // Never returns.
  1637  	}
  1638  	stopm();
  1639  	schedule();  // Never returns.
  1640  }
  1641  
  1642  // Called from syscall package before fork.
  1643  void
  1644  syscall·runtime_BeforeFork(void)
  1645  {
  1646  	// Fork can hang if preempted with signals frequently enough (see issue 5517).
  1647  	// Ensure that we stay on the same M where we disable profiling.
  1648  	m->locks++;
  1649  	if(m->profilehz != 0)
  1650  		runtime·resetcpuprofiler(0);
  1651  }
  1652  
  1653  // Called from syscall package after fork in parent.
  1654  void
  1655  syscall·runtime_AfterFork(void)
  1656  {
  1657  	int32 hz;
  1658  
  1659  	hz = runtime·sched.profilehz;
  1660  	if(hz != 0)
  1661  		runtime·resetcpuprofiler(hz);
  1662  	m->locks--;
  1663  }
  1664  
  1665  // Hook used by runtime·malg to call runtime·stackalloc on the
  1666  // scheduler stack.  This exists because runtime·stackalloc insists
  1667  // on being called on the scheduler stack, to avoid trying to grow
  1668  // the stack while allocating a new stack segment.
  1669  static void
  1670  mstackalloc(G *gp)
  1671  {
  1672  	gp->param = runtime·stackalloc((uintptr)gp->param);
  1673  	runtime·gogo(&gp->sched);
  1674  }
  1675  
  1676  // Allocate a new g, with a stack big enough for stacksize bytes.
  1677  G*
  1678  runtime·malg(int32 stacksize)
  1679  {
  1680  	G *newg;
  1681  	byte *stk;
  1682  
  1683  	if(StackTop < sizeof(Stktop)) {
  1684  		runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
  1685  		runtime·throw("runtime: bad stack.h");
  1686  	}
  1687  
  1688  	newg = runtime·malloc(sizeof(G));
  1689  	if(stacksize >= 0) {
  1690  		if(g == m->g0) {
  1691  			// running on scheduler stack already.
  1692  			stk = runtime·stackalloc(StackSystem + stacksize);
  1693  		} else {
  1694  			// have to call stackalloc on scheduler stack.
  1695  			g->param = (void*)(StackSystem + stacksize);
  1696  			runtime·mcall(mstackalloc);
  1697  			stk = g->param;
  1698  			g->param = nil;
  1699  		}
  1700  		newg->stacksize = StackSystem + stacksize;
  1701  		newg->stack0 = (uintptr)stk;
  1702  		newg->stackguard = (uintptr)stk + StackGuard;
  1703  		newg->stackguard0 = newg->stackguard;
  1704  		newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop);
  1705  		runtime·memclr((byte*)newg->stackbase, sizeof(Stktop));
  1706  	}
  1707  	return newg;
  1708  }
  1709  
  1710  // Create a new g running fn with siz bytes of arguments.
  1711  // Put it on the queue of g's waiting to run.
  1712  // The compiler turns a go statement into a call to this.
  1713  // Cannot split the stack because it assumes that the arguments
  1714  // are available sequentially after &fn; they would not be
  1715  // copied if a stack split occurred.  It's OK for this to call
  1716  // functions that split the stack.
  1717  #pragma textflag NOSPLIT
  1718  void
  1719  runtime·newproc(int32 siz, FuncVal* fn, ...)
  1720  {
  1721  	byte *argp;
  1722  
  1723  	if(thechar == '5')
  1724  		argp = (byte*)(&fn+2);  // skip caller's saved LR
  1725  	else
  1726  		argp = (byte*)(&fn+1);
  1727  	runtime·newproc1(fn, argp, siz, 0, runtime·getcallerpc(&siz));
  1728  }
  1729  
  1730  // Create a new g running fn with narg bytes of arguments starting
  1731  // at argp and returning nret bytes of results.  callerpc is the
  1732  // address of the go statement that created this.  The new g is put
  1733  // on the queue of g's waiting to run.
  1734  G*
  1735  runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
  1736  {
  1737  	byte *sp;
  1738  	G *newg;
  1739  	int32 siz;
  1740  
  1741  //runtime·printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
  1742  	m->locks++;  // disable preemption because it can be holding p in a local var
  1743  	siz = narg + nret;
  1744  	siz = (siz+7) & ~7;
  1745  
  1746  	// We could instead create a secondary stack frame
  1747  	// and make it look like goexit was on the original but
  1748  	// the call to the actual goroutine function was split.
  1749  	// Not worth it: this is almost always an error.
  1750  	if(siz > StackMin - 1024)
  1751  		runtime·throw("runtime.newproc: function arguments too large for new goroutine");
  1752  
  1753  	if((newg = gfget(m->p)) != nil) {
  1754  		if(newg->stackguard - StackGuard != newg->stack0)
  1755  			runtime·throw("invalid stack in newg");
  1756  	} else {
  1757  		newg = runtime·malg(StackMin);
  1758  		runtime·lock(&runtime·sched);
  1759  		if(runtime·lastg == nil)
  1760  			runtime·allg = newg;
  1761  		else
  1762  			runtime·lastg->alllink = newg;
  1763  		runtime·lastg = newg;
  1764  		runtime·unlock(&runtime·sched);
  1765  	}
  1766  
  1767  	sp = (byte*)newg->stackbase;
  1768  	sp -= siz;
  1769  	runtime·memmove(sp, argp, narg);
  1770  	if(thechar == '5') {
  1771  		// caller's LR
  1772  		sp -= sizeof(void*);
  1773  		*(void**)sp = nil;
  1774  	}
  1775  
  1776  	runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
  1777  	newg->sched.sp = (uintptr)sp;
  1778  	newg->sched.pc = (uintptr)runtime·goexit;
  1779  	newg->sched.g = newg;
  1780  	runtime·gostartcallfn(&newg->sched, fn);
  1781  	newg->gopc = (uintptr)callerpc;
  1782  	newg->status = Grunnable;
  1783  	newg->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
  1784  	newg->panicwrap = 0;
  1785  	if(raceenabled)
  1786  		newg->racectx = runtime·racegostart((void*)callerpc);
  1787  	runqput(m->p, newg);
  1788  
  1789  	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
  1790  		wakep();
  1791  	m->locks--;
  1792  	if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
  1793  		g->stackguard0 = StackPreempt;
  1794  	return newg;
  1795  }
  1796  
  1797  // Put on gfree list.
  1798  // If local list is too long, transfer a batch to the global list.
  1799  static void
  1800  gfput(P *p, G *gp)
  1801  {
  1802  	if(gp->stackguard - StackGuard != gp->stack0)
  1803  		runtime·throw("invalid stack in gfput");
  1804  	gp->schedlink = p->gfree;
  1805  	p->gfree = gp;
  1806  	p->gfreecnt++;
  1807  	if(p->gfreecnt >= 64) {
  1808  		runtime·lock(&runtime·sched.gflock);
  1809  		while(p->gfreecnt >= 32) {
  1810  			p->gfreecnt--;
  1811  			gp = p->gfree;
  1812  			p->gfree = gp->schedlink;
  1813  			gp->schedlink = runtime·sched.gfree;
  1814  			runtime·sched.gfree = gp;
  1815  		}
  1816  		runtime·unlock(&runtime·sched.gflock);
  1817  	}
  1818  }
  1819  
  1820  // Get from gfree list.
  1821  // If local list is empty, grab a batch from global list.
  1822  static G*
  1823  gfget(P *p)
  1824  {
  1825  	G *gp;
  1826  
  1827  retry:
  1828  	gp = p->gfree;
  1829  	if(gp == nil && runtime·sched.gfree) {
  1830  		runtime·lock(&runtime·sched.gflock);
  1831  		while(p->gfreecnt < 32 && runtime·sched.gfree) {
  1832  			p->gfreecnt++;
  1833  			gp = runtime·sched.gfree;
  1834  			runtime·sched.gfree = gp->schedlink;
  1835  			gp->schedlink = p->gfree;
  1836  			p->gfree = gp;
  1837  		}
  1838  		runtime·unlock(&runtime·sched.gflock);
  1839  		goto retry;
  1840  	}
  1841  	if(gp) {
  1842  		p->gfree = gp->schedlink;
  1843  		p->gfreecnt--;
  1844  	}
  1845  	return gp;
  1846  }
  1847  
  1848  // Purge all cached G's from gfree list to the global list.
  1849  static void
  1850  gfpurge(P *p)
  1851  {
  1852  	G *gp;
  1853  
  1854  	runtime·lock(&runtime·sched.gflock);
  1855  	while(p->gfreecnt) {
  1856  		p->gfreecnt--;
  1857  		gp = p->gfree;
  1858  		p->gfree = gp->schedlink;
  1859  		gp->schedlink = runtime·sched.gfree;
  1860  		runtime·sched.gfree = gp;
  1861  	}
  1862  	runtime·unlock(&runtime·sched.gflock);
  1863  }
  1864  
  1865  void
  1866  runtime·Breakpoint(void)
  1867  {
  1868  	runtime·breakpoint();
  1869  }
  1870  
  1871  void
  1872  runtime·Gosched(void)
  1873  {
  1874  	runtime·gosched();
  1875  }
  1876  
  1877  // Implementation of runtime.GOMAXPROCS.
  1878  // delete when scheduler is even stronger
  1879  int32
  1880  runtime·gomaxprocsfunc(int32 n)
  1881  {
  1882  	int32 ret;
  1883  
  1884  	if(n > MaxGomaxprocs)
  1885  		n = MaxGomaxprocs;
  1886  	runtime·lock(&runtime·sched);
  1887  	ret = runtime·gomaxprocs;
  1888  	if(n <= 0 || n == ret) {
  1889  		runtime·unlock(&runtime·sched);
  1890  		return ret;
  1891  	}
  1892  	runtime·unlock(&runtime·sched);
  1893  
  1894  	runtime·semacquire(&runtime·worldsema, false);
  1895  	m->gcing = 1;
  1896  	runtime·stoptheworld();
  1897  	newprocs = n;
  1898  	m->gcing = 0;
  1899  	runtime·semrelease(&runtime·worldsema);
  1900  	runtime·starttheworld();
  1901  
  1902  	return ret;
  1903  }
  1904  
  1905  // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
  1906  // after they modify m->locked. Do not allow preemption during this call,
  1907  // or else the m might be different in this function than in the caller.
  1908  #pragma textflag NOSPLIT
  1909  static void
  1910  lockOSThread(void)
  1911  {
  1912  	m->lockedg = g;
  1913  	g->lockedm = m;
  1914  }
  1915  
  1916  void
  1917  runtime·LockOSThread(void)
  1918  {
  1919  	m->locked |= LockExternal;
  1920  	lockOSThread();
  1921  }
  1922  
  1923  void
  1924  runtime·lockOSThread(void)
  1925  {
  1926  	m->locked += LockInternal;
  1927  	lockOSThread();
  1928  }
  1929  
  1930  
  1931  // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
  1932  // after they update m->locked. Do not allow preemption during this call,
  1933  // or else the m might be in different in this function than in the caller.
  1934  #pragma textflag NOSPLIT
  1935  static void
  1936  unlockOSThread(void)
  1937  {
  1938  	if(m->locked != 0)
  1939  		return;
  1940  	m->lockedg = nil;
  1941  	g->lockedm = nil;
  1942  }
  1943  
  1944  void
  1945  runtime·UnlockOSThread(void)
  1946  {
  1947  	m->locked &= ~LockExternal;
  1948  	unlockOSThread();
  1949  }
  1950  
  1951  void
  1952  runtime·unlockOSThread(void)
  1953  {
  1954  	if(m->locked < LockInternal)
  1955  		runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
  1956  	m->locked -= LockInternal;
  1957  	unlockOSThread();
  1958  }
  1959  
  1960  bool
  1961  runtime·lockedOSThread(void)
  1962  {
  1963  	return g->lockedm != nil && m->lockedg != nil;
  1964  }
  1965  
  1966  // for testing of callbacks
  1967  void
  1968  runtime·golockedOSThread(bool ret)
  1969  {
  1970  	ret = runtime·lockedOSThread();
  1971  	FLUSH(&ret);
  1972  }
  1973  
  1974  void
  1975  runtime·NumGoroutine(intgo ret)
  1976  {
  1977  	ret = runtime·gcount();
  1978  	FLUSH(&ret);
  1979  }
  1980  
  1981  int32
  1982  runtime·gcount(void)
  1983  {
  1984  	G *gp;
  1985  	int32 n, s;
  1986  
  1987  	n = 0;
  1988  	runtime·lock(&runtime·sched);
  1989  	// TODO(dvyukov): runtime.NumGoroutine() is O(N).
  1990  	// We do not want to increment/decrement centralized counter in newproc/goexit,
  1991  	// just to make runtime.NumGoroutine() faster.
  1992  	// Compromise solution is to introduce per-P counters of active goroutines.
  1993  	for(gp = runtime·allg; gp; gp = gp->alllink) {
  1994  		s = gp->status;
  1995  		if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
  1996  			n++;
  1997  	}
  1998  	runtime·unlock(&runtime·sched);
  1999  	return n;
  2000  }
  2001  
  2002  int32
  2003  runtime·mcount(void)
  2004  {
  2005  	return runtime·sched.mcount;
  2006  }
  2007  
  2008  void
  2009  runtime·badmcall(void (*fn)(G*))  // called from assembly
  2010  {
  2011  	USED(fn); // TODO: print fn?
  2012  	runtime·throw("runtime: mcall called on m->g0 stack");
  2013  }
  2014  
  2015  void
  2016  runtime·badmcall2(void (*fn)(G*))  // called from assembly
  2017  {
  2018  	USED(fn);
  2019  	runtime·throw("runtime: mcall function returned");
  2020  }
  2021  
  2022  void
  2023  runtime·badreflectcall(void) // called from assembly
  2024  {
  2025  	runtime·panicstring("runtime: arg size to reflect.call more than 1GB");
  2026  }
  2027  
  2028  static struct {
  2029  	Lock;
  2030  	void (*fn)(uintptr*, int32);
  2031  	int32 hz;
  2032  	uintptr pcbuf[100];
  2033  } prof;
  2034  
  2035  static void
  2036  System(void)
  2037  {
  2038  }
  2039  
  2040  // Called if we receive a SIGPROF signal.
  2041  void
  2042  runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
  2043  {
  2044  	int32 n;
  2045  	bool traceback;
  2046  
  2047  	if(prof.fn == nil || prof.hz == 0)
  2048  		return;
  2049  	traceback = true;
  2050  	// Windows does profiling in a dedicated thread w/o m.
  2051  	if(!Windows && (m == nil || m->mcache == nil))
  2052  		traceback = false;
  2053  	
  2054  	// Define that a "user g" is a user-created goroutine, and a "system g"
  2055  	// is one that is m->g0 or m->gsignal. We've only made sure that we
  2056  	// can unwind user g's, so exclude the system g's.
  2057  	//
  2058  	// It is not quite as easy as testing gp == m->curg (the current user g)
  2059  	// because we might be interrupted for profiling halfway through a
  2060  	// goroutine switch. The switch involves updating three (or four) values:
  2061  	// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
  2062  	// because once it gets updated the new g is running.
  2063  	//
  2064  	// When switching from a user g to a system g, LR is not considered live,
  2065  	// so the update only affects g, SP, and PC. Since PC must be last, there
  2066  	// the possible partial transitions in ordinary execution are (1) g alone is updated,
  2067  	// (2) both g and SP are updated, and (3) SP alone is updated.
  2068  	// If g is updated, we'll see a system g and not look closer.
  2069  	// If SP alone is updated, we can detect the partial transition by checking
  2070  	// whether the SP is within g's stack bounds. (We could also require that SP
  2071  	// be changed only after g, but the stack bounds check is needed by other
  2072  	// cases, so there is no need to impose an additional requirement.)
  2073  	//
  2074  	// There is one exceptional transition to a system g, not in ordinary execution.
  2075  	// When a signal arrives, the operating system starts the signal handler running
  2076  	// with an updated PC and SP. The g is updated last, at the beginning of the
  2077  	// handler. There are two reasons this is okay. First, until g is updated the
  2078  	// g and SP do not match, so the stack bounds check detects the partial transition.
  2079  	// Second, signal handlers currently run with signals disabled, so a profiling
  2080  	// signal cannot arrive during the handler.
  2081  	//
  2082  	// When switching from a system g to a user g, there are three possibilities.
  2083  	//
  2084  	// First, it may be that the g switch has no PC update, because the SP
  2085  	// either corresponds to a user g throughout (as in runtime.asmcgocall)
  2086  	// or because it has been arranged to look like a user g frame
  2087  	// (as in runtime.cgocallback_gofunc). In this case, since the entire
  2088  	// transition is a g+SP update, a partial transition updating just one of 
  2089  	// those will be detected by the stack bounds check.
  2090  	//
  2091  	// Second, when returning from a signal handler, the PC and SP updates
  2092  	// are performed by the operating system in an atomic update, so the g
  2093  	// update must be done before them. The stack bounds check detects
  2094  	// the partial transition here, and (again) signal handlers run with signals
  2095  	// disabled, so a profiling signal cannot arrive then anyway.
  2096  	//
  2097  	// Third, the common case: it may be that the switch updates g, SP, and PC
  2098  	// separately, as in runtime.gogo.
  2099  	//
  2100  	// Because runtime.gogo is the only instance, we check whether the PC lies
  2101  	// within that function, and if so, not ask for a traceback. This approach
  2102  	// requires knowing the size of the runtime.gogo function, which we
  2103  	// record in arch_*.h and check in runtime_test.go.
  2104  	//
  2105  	// There is another apparently viable approach, recorded here in case
  2106  	// the "PC within runtime.gogo" check turns out not to be usable.
  2107  	// It would be possible to delay the update of either g or SP until immediately
  2108  	// before the PC update instruction. Then, because of the stack bounds check,
  2109  	// the only problematic interrupt point is just before that PC update instruction,
  2110  	// and the sigprof handler can detect that instruction and simulate stepping past
  2111  	// it in order to reach a consistent state. On ARM, the update of g must be made
  2112  	// in two places (in R10 and also in a TLS slot), so the delayed update would
  2113  	// need to be the SP update. The sigprof handler must read the instruction at
  2114  	// the current PC and if it was the known instruction (for example, JMP BX or 
  2115  	// MOV R2, PC), use that other register in place of the PC value.
  2116  	// The biggest drawback to this solution is that it requires that we can tell
  2117  	// whether it's safe to read from the memory pointed at by PC.
  2118  	// In a correct program, we can test PC == nil and otherwise read,
  2119  	// but if a profiling signal happens at the instant that a program executes
  2120  	// a bad jump (before the program manages to handle the resulting fault)
  2121  	// the profiling handler could fault trying to read nonexistent memory.
  2122  	//
  2123  	// To recap, there are no constraints on the assembly being used for the
  2124  	// transition. We simply require that g and SP match and that the PC is not
  2125  	// in runtime.gogo.
  2126  	//
  2127  	// On Windows, one m is sending reports about all the g's, so gp == m->curg
  2128  	// is not a useful comparison. The profilem function in os_windows.c has
  2129  	// already checked that gp is a user g.
  2130  	if(gp == nil ||
  2131  	   (!Windows && gp != m->curg) ||
  2132  	   (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
  2133  	   ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
  2134  		traceback = false;
  2135  
  2136  	// Race detector calls asmcgocall w/o entersyscall/exitsyscall,
  2137  	// we can not currently unwind through asmcgocall.
  2138  	if(m != nil && m->racecall)
  2139  		traceback = false;
  2140  
  2141  	runtime·lock(&prof);
  2142  	if(prof.fn == nil) {
  2143  		runtime·unlock(&prof);
  2144  		return;
  2145  	}
  2146  	n = 0;
  2147  	if(traceback)
  2148  		n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
  2149  	if(!traceback || n <= 0) {
  2150  		n = 2;
  2151  		prof.pcbuf[0] = (uintptr)pc;
  2152  		prof.pcbuf[1] = (uintptr)System + 1;
  2153  	}
  2154  	prof.fn(prof.pcbuf, n);
  2155  	runtime·unlock(&prof);
  2156  }
  2157  
  2158  // Arrange to call fn with a traceback hz times a second.
  2159  void
  2160  runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
  2161  {
  2162  	// Force sane arguments.
  2163  	if(hz < 0)
  2164  		hz = 0;
  2165  	if(hz == 0)
  2166  		fn = nil;
  2167  	if(fn == nil)
  2168  		hz = 0;
  2169  
  2170  	// Disable preemption, otherwise we can be rescheduled to another thread
  2171  	// that has profiling enabled.
  2172  	m->locks++;
  2173  
  2174  	// Stop profiler on this thread so that it is safe to lock prof.
  2175  	// if a profiling signal came in while we had prof locked,
  2176  	// it would deadlock.
  2177  	runtime·resetcpuprofiler(0);
  2178  
  2179  	runtime·lock(&prof);
  2180  	prof.fn = fn;
  2181  	prof.hz = hz;
  2182  	runtime·unlock(&prof);
  2183  	runtime·lock(&runtime·sched);
  2184  	runtime·sched.profilehz = hz;
  2185  	runtime·unlock(&runtime·sched);
  2186  
  2187  	if(hz != 0)
  2188  		runtime·resetcpuprofiler(hz);
  2189  
  2190  	m->locks--;
  2191  }
  2192  
  2193  // Change number of processors.  The world is stopped, sched is locked.
  2194  static void
  2195  procresize(int32 new)
  2196  {
  2197  	int32 i, old;
  2198  	G *gp;
  2199  	P *p;
  2200  
  2201  	old = runtime·gomaxprocs;
  2202  	if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
  2203  		runtime·throw("procresize: invalid arg");
  2204  	// initialize new P's
  2205  	for(i = 0; i < new; i++) {
  2206  		p = runtime·allp[i];
  2207  		if(p == nil) {
  2208  			p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
  2209  			p->id = i;
  2210  			p->status = Pgcstop;
  2211  			runtime·atomicstorep(&runtime·allp[i], p);
  2212  		}
  2213  		if(p->mcache == nil) {
  2214  			if(old==0 && i==0)
  2215  				p->mcache = m->mcache;  // bootstrap
  2216  			else
  2217  				p->mcache = runtime·allocmcache();
  2218  		}
  2219  		if(p->runq == nil) {
  2220  			p->runqsize = 128;
  2221  			p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC);
  2222  		}
  2223  	}
  2224  
  2225  	// redistribute runnable G's evenly
  2226  	for(i = 0; i < old; i++) {
  2227  		p = runtime·allp[i];
  2228  		while(gp = runqget(p))
  2229  			globrunqput(gp);
  2230  	}
  2231  	// start at 1 because current M already executes some G and will acquire allp[0] below,
  2232  	// so if we have a spare G we want to put it into allp[1].
  2233  	for(i = 1; runtime·sched.runqhead; i++) {
  2234  		gp = runtime·sched.runqhead;
  2235  		runtime·sched.runqhead = gp->schedlink;
  2236  		runqput(runtime·allp[i%new], gp);
  2237  	}
  2238  	runtime·sched.runqtail = nil;
  2239  	runtime·sched.runqsize = 0;
  2240  
  2241  	// free unused P's
  2242  	for(i = new; i < old; i++) {
  2243  		p = runtime·allp[i];
  2244  		runtime·freemcache(p->mcache);
  2245  		p->mcache = nil;
  2246  		gfpurge(p);
  2247  		p->status = Pdead;
  2248  		// can't free P itself because it can be referenced by an M in syscall
  2249  	}
  2250  
  2251  	if(m->p)
  2252  		m->p->m = nil;
  2253  	m->p = nil;
  2254  	m->mcache = nil;
  2255  	p = runtime·allp[0];
  2256  	p->m = nil;
  2257  	p->status = Pidle;
  2258  	acquirep(p);
  2259  	for(i = new-1; i > 0; i--) {
  2260  		p = runtime·allp[i];
  2261  		p->status = Pidle;
  2262  		pidleput(p);
  2263  	}
  2264  	runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
  2265  }
  2266  
  2267  // Associate p and the current m.
  2268  static void
  2269  acquirep(P *p)
  2270  {
  2271  	if(m->p || m->mcache)
  2272  		runtime·throw("acquirep: already in go");
  2273  	if(p->m || p->status != Pidle) {
  2274  		runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
  2275  		runtime·throw("acquirep: invalid p state");
  2276  	}
  2277  	m->mcache = p->mcache;
  2278  	m->p = p;
  2279  	p->m = m;
  2280  	p->status = Prunning;
  2281  }
  2282  
  2283  // Disassociate p and the current m.
  2284  static P*
  2285  releasep(void)
  2286  {
  2287  	P *p;
  2288  
  2289  	if(m->p == nil || m->mcache == nil)
  2290  		runtime·throw("releasep: invalid arg");
  2291  	p = m->p;
  2292  	if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
  2293  		runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
  2294  			m, m->p, p->m, m->mcache, p->mcache, p->status);
  2295  		runtime·throw("releasep: invalid p state");
  2296  	}
  2297  	m->p = nil;
  2298  	m->mcache = nil;
  2299  	p->m = nil;
  2300  	p->status = Pidle;
  2301  	return p;
  2302  }
  2303  
  2304  static void
  2305  incidlelocked(int32 v)
  2306  {
  2307  	runtime·lock(&runtime·sched);
  2308  	runtime·sched.nmidlelocked += v;
  2309  	if(v > 0)
  2310  		checkdead();
  2311  	runtime·unlock(&runtime·sched);
  2312  }
  2313  
  2314  // Check for deadlock situation.
  2315  // The check is based on number of running M's, if 0 -> deadlock.
  2316  static void
  2317  checkdead(void)
  2318  {
  2319  	G *gp;
  2320  	int32 run, grunning, s;
  2321  
  2322  	// -1 for sysmon
  2323  	run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
  2324  	if(run > 0)
  2325  		return;
  2326  	if(run < 0) {
  2327  		runtime·printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
  2328  			runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
  2329  		runtime·throw("checkdead: inconsistent counts");
  2330  	}
  2331  	grunning = 0;
  2332  	for(gp = runtime·allg; gp; gp = gp->alllink) {
  2333  		if(gp->isbackground)
  2334  			continue;
  2335  		s = gp->status;
  2336  		if(s == Gwaiting)
  2337  			grunning++;
  2338  		else if(s == Grunnable || s == Grunning || s == Gsyscall) {
  2339  			runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s);
  2340  			runtime·throw("checkdead: runnable g");
  2341  		}
  2342  	}
  2343  	if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
  2344  		runtime·exit(0);
  2345  	m->throwing = -1;  // do not dump full stacks
  2346  	runtime·throw("all goroutines are asleep - deadlock!");
  2347  }
  2348  
  2349  static void
  2350  sysmon(void)
  2351  {
  2352  	uint32 idle, delay;
  2353  	int64 now, lastpoll, lasttrace;
  2354  	G *gp;
  2355  
  2356  	lasttrace = 0;
  2357  	idle = 0;  // how many cycles in succession we had not wokeup somebody
  2358  	delay = 0;
  2359  	for(;;) {
  2360  		if(idle == 0)  // start with 20us sleep...
  2361  			delay = 20;
  2362  		else if(idle > 50)  // start doubling the sleep after 1ms...
  2363  			delay *= 2;
  2364  		if(delay > 10*1000)  // up to 10ms
  2365  			delay = 10*1000;
  2366  		runtime·usleep(delay);
  2367  		if(runtime·debug.schedtrace <= 0 &&
  2368  			(runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) {  // TODO: fast atomic
  2369  			runtime·lock(&runtime·sched);
  2370  			if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
  2371  				runtime·atomicstore(&runtime·sched.sysmonwait, 1);
  2372  				runtime·unlock(&runtime·sched);
  2373  				runtime·notesleep(&runtime·sched.sysmonnote);
  2374  				runtime·noteclear(&runtime·sched.sysmonnote);
  2375  				idle = 0;
  2376  				delay = 20;
  2377  			} else
  2378  				runtime·unlock(&runtime·sched);
  2379  		}
  2380  		// poll network if not polled for more than 10ms
  2381  		lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
  2382  		now = runtime·nanotime();
  2383  		if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
  2384  			runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
  2385  			gp = runtime·netpoll(false);  // non-blocking
  2386  			if(gp) {
  2387  				// Need to decrement number of idle locked M's
  2388  				// (pretending that one more is running) before injectglist.
  2389  				// Otherwise it can lead to the following situation:
  2390  				// injectglist grabs all P's but before it starts M's to run the P's,
  2391  				// another M returns from syscall, finishes running its G,
  2392  				// observes that there is no work to do and no other running M's
  2393  				// and reports deadlock.
  2394  				incidlelocked(-1);
  2395  				injectglist(gp);
  2396  				incidlelocked(1);
  2397  			}
  2398  		}
  2399  		// retake P's blocked in syscalls
  2400  		// and preempt long running G's
  2401  		if(retake(now))
  2402  			idle = 0;
  2403  		else
  2404  			idle++;
  2405  
  2406  		if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
  2407  			lasttrace = now;
  2408  			runtime·schedtrace(runtime·debug.scheddetail);
  2409  		}
  2410  	}
  2411  }
  2412  
  2413  typedef struct Pdesc Pdesc;
  2414  struct Pdesc
  2415  {
  2416  	uint32	schedtick;
  2417  	int64	schedwhen;
  2418  	uint32	syscalltick;
  2419  	int64	syscallwhen;
  2420  };
  2421  static Pdesc pdesc[MaxGomaxprocs];
  2422  
  2423  static uint32
  2424  retake(int64 now)
  2425  {
  2426  	uint32 i, s, n;
  2427  	int64 t;
  2428  	P *p;
  2429  	Pdesc *pd;
  2430  
  2431  	n = 0;
  2432  	for(i = 0; i < runtime·gomaxprocs; i++) {
  2433  		p = runtime·allp[i];
  2434  		if(p==nil)
  2435  			continue;
  2436  		pd = &pdesc[i];
  2437  		s = p->status;
  2438  		if(s == Psyscall) {
  2439  			// Retake P from syscall if it's there for more than 1 sysmon tick (20us).
  2440  			// But only if there is other work to do.
  2441  			t = p->syscalltick;
  2442  			if(pd->syscalltick != t) {
  2443  				pd->syscalltick = t;
  2444  				pd->syscallwhen = now;
  2445  				continue;
  2446  			}
  2447  			if(p->runqhead == p->runqtail &&
  2448  				runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0)
  2449  				continue;
  2450  			// Need to decrement number of idle locked M's
  2451  			// (pretending that one more is running) before the CAS.
  2452  			// Otherwise the M from which we retake can exit the syscall,
  2453  			// increment nmidle and report deadlock.
  2454  			incidlelocked(-1);
  2455  			if(runtime·cas(&p->status, s, Pidle)) {
  2456  				n++;
  2457  				handoffp(p);
  2458  			}
  2459  			incidlelocked(1);
  2460  		} else if(s == Prunning) {
  2461  			// Preempt G if it's running for more than 10ms.
  2462  			t = p->schedtick;
  2463  			if(pd->schedtick != t) {
  2464  				pd->schedtick = t;
  2465  				pd->schedwhen = now;
  2466  				continue;
  2467  			}
  2468  			if(pd->schedwhen + 10*1000*1000 > now)
  2469  				continue;
  2470  			preemptone(p);
  2471  		}
  2472  	}
  2473  	return n;
  2474  }
  2475  
  2476  // Tell all goroutines that they have been preempted and they should stop.
  2477  // This function is purely best-effort.  It can fail to inform a goroutine if a
  2478  // processor just started running it.
  2479  // No locks need to be held.
  2480  // Returns true if preemption request was issued to at least one goroutine.
  2481  static bool
  2482  preemptall(void)
  2483  {
  2484  	P *p;
  2485  	int32 i;
  2486  	bool res;
  2487  
  2488  	res = false;
  2489  	for(i = 0; i < runtime·gomaxprocs; i++) {
  2490  		p = runtime·allp[i];
  2491  		if(p == nil || p->status != Prunning)
  2492  			continue;
  2493  		res |= preemptone(p);
  2494  	}
  2495  	return res;
  2496  }
  2497  
  2498  // Tell the goroutine running on processor P to stop.
  2499  // This function is purely best-effort.  It can incorrectly fail to inform the
  2500  // goroutine.  It can send inform the wrong goroutine.  Even if it informs the
  2501  // correct goroutine, that goroutine might ignore the request if it is
  2502  // simultaneously executing runtime·newstack.
  2503  // No lock needs to be held.
  2504  // Returns true if preemption request was issued.
  2505  static bool
  2506  preemptone(P *p)
  2507  {
  2508  	M *mp;
  2509  	G *gp;
  2510  
  2511  	mp = p->m;
  2512  	if(mp == nil || mp == m)
  2513  		return false;
  2514  	gp = mp->curg;
  2515  	if(gp == nil || gp == mp->g0)
  2516  		return false;
  2517  	gp->preempt = true;
  2518  	gp->stackguard0 = StackPreempt;
  2519  	return true;
  2520  }
  2521  
  2522  void
  2523  runtime·schedtrace(bool detailed)
  2524  {
  2525  	static int64 starttime;
  2526  	int64 now;
  2527  	int64 id1, id2, id3;
  2528  	int32 i, q, t, h, s;
  2529  	int8 *fmt;
  2530  	M *mp, *lockedm;
  2531  	G *gp, *lockedg;
  2532  	P *p;
  2533  
  2534  	now = runtime·nanotime();
  2535  	if(starttime == 0)
  2536  		starttime = now;
  2537  
  2538  	runtime·lock(&runtime·sched);
  2539  	runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
  2540  		(now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
  2541  		runtime·sched.nmidle, runtime·sched.runqsize);
  2542  	if(detailed) {
  2543  		runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
  2544  			runtime·sched.gcwaiting, runtime·sched.nmidlelocked, runtime·sched.nmspinning,
  2545  			runtime·sched.stopwait, runtime·sched.sysmonwait);
  2546  	}
  2547  	// We must be careful while reading data from P's, M's and G's.
  2548  	// Even if we hold schedlock, most data can be changed concurrently.
  2549  	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
  2550  	for(i = 0; i < runtime·gomaxprocs; i++) {
  2551  		p = runtime·allp[i];
  2552  		if(p == nil)
  2553  			continue;
  2554  		mp = p->m;
  2555  		t = p->runqtail;
  2556  		h = p->runqhead;
  2557  		s = p->runqsize;
  2558  		q = t - h;
  2559  		if(q < 0)
  2560  			q += s;
  2561  		if(detailed)
  2562  			runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d/%d gfreecnt=%d\n",
  2563  				i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt);
  2564  		else {
  2565  			// In non-detailed mode format lengths of per-P run queues as:
  2566  			// [len1 len2 len3 len4]
  2567  			fmt = " %d";
  2568  			if(runtime·gomaxprocs == 1)
  2569  				fmt = " [%d]\n";
  2570  			else if(i == 0)
  2571  				fmt = " [%d";
  2572  			else if(i == runtime·gomaxprocs-1)
  2573  				fmt = " %d]\n";
  2574  			runtime·printf(fmt, q);
  2575  		}
  2576  	}
  2577  	if(!detailed) {
  2578  		runtime·unlock(&runtime·sched);
  2579  		return;
  2580  	}
  2581  	for(mp = runtime·allm; mp; mp = mp->alllink) {
  2582  		p = mp->p;
  2583  		gp = mp->curg;
  2584  		lockedg = mp->lockedg;
  2585  		id1 = -1;
  2586  		if(p)
  2587  			id1 = p->id;
  2588  		id2 = -1;
  2589  		if(gp)
  2590  			id2 = gp->goid;
  2591  		id3 = -1;
  2592  		if(lockedg)
  2593  			id3 = lockedg->goid;
  2594  		runtime·printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
  2595  			" locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
  2596  			mp->id, id1, id2,
  2597  			mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
  2598  			mp->spinning, id3);
  2599  	}
  2600  	for(gp = runtime·allg; gp; gp = gp->alllink) {
  2601  		mp = gp->m;
  2602  		lockedm = gp->lockedm;
  2603  		runtime·printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",
  2604  			gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
  2605  			lockedm ? lockedm->id : -1);
  2606  	}
  2607  	runtime·unlock(&runtime·sched);
  2608  }
  2609  
  2610  // Put mp on midle list.
  2611  // Sched must be locked.
  2612  static void
  2613  mput(M *mp)
  2614  {
  2615  	mp->schedlink = runtime·sched.midle;
  2616  	runtime·sched.midle = mp;
  2617  	runtime·sched.nmidle++;
  2618  	checkdead();
  2619  }
  2620  
  2621  // Try to get an m from midle list.
  2622  // Sched must be locked.
  2623  static M*
  2624  mget(void)
  2625  {
  2626  	M *mp;
  2627  
  2628  	if((mp = runtime·sched.midle) != nil){
  2629  		runtime·sched.midle = mp->schedlink;
  2630  		runtime·sched.nmidle--;
  2631  	}
  2632  	return mp;
  2633  }
  2634  
  2635  // Put gp on the global runnable queue.
  2636  // Sched must be locked.
  2637  static void
  2638  globrunqput(G *gp)
  2639  {
  2640  	gp->schedlink = nil;
  2641  	if(runtime·sched.runqtail)
  2642  		runtime·sched.runqtail->schedlink = gp;
  2643  	else
  2644  		runtime·sched.runqhead = gp;
  2645  	runtime·sched.runqtail = gp;
  2646  	runtime·sched.runqsize++;
  2647  }
  2648  
  2649  // Try get a batch of G's from the global runnable queue.
  2650  // Sched must be locked.
  2651  static G*
  2652  globrunqget(P *p, int32 max)
  2653  {
  2654  	G *gp, *gp1;
  2655  	int32 n;
  2656  
  2657  	if(runtime·sched.runqsize == 0)
  2658  		return nil;
  2659  	n = runtime·sched.runqsize/runtime·gomaxprocs+1;
  2660  	if(n > runtime·sched.runqsize)
  2661  		n = runtime·sched.runqsize;
  2662  	if(max > 0 && n > max)
  2663  		n = max;
  2664  	runtime·sched.runqsize -= n;
  2665  	if(runtime·sched.runqsize == 0)
  2666  		runtime·sched.runqtail = nil;
  2667  	gp = runtime·sched.runqhead;
  2668  	runtime·sched.runqhead = gp->schedlink;
  2669  	n--;
  2670  	while(n--) {
  2671  		gp1 = runtime·sched.runqhead;
  2672  		runtime·sched.runqhead = gp1->schedlink;
  2673  		runqput(p, gp1);
  2674  	}
  2675  	return gp;
  2676  }
  2677  
  2678  // Put p to on pidle list.
  2679  // Sched must be locked.
  2680  static void
  2681  pidleput(P *p)
  2682  {
  2683  	p->link = runtime·sched.pidle;
  2684  	runtime·sched.pidle = p;
  2685  	runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
  2686  }
  2687  
  2688  // Try get a p from pidle list.
  2689  // Sched must be locked.
  2690  static P*
  2691  pidleget(void)
  2692  {
  2693  	P *p;
  2694  
  2695  	p = runtime·sched.pidle;
  2696  	if(p) {
  2697  		runtime·sched.pidle = p->link;
  2698  		runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
  2699  	}
  2700  	return p;
  2701  }
  2702  
  2703  // Put g on local runnable queue.
  2704  // TODO(dvyukov): consider using lock-free queue.
  2705  static void
  2706  runqput(P *p, G *gp)
  2707  {
  2708  	int32 h, t, s;
  2709  
  2710  	runtime·lock(p);
  2711  retry:
  2712  	h = p->runqhead;
  2713  	t = p->runqtail;
  2714  	s = p->runqsize;
  2715  	if(t == h-1 || (h == 0 && t == s-1)) {
  2716  		runqgrow(p);
  2717  		goto retry;
  2718  	}
  2719  	p->runq[t++] = gp;
  2720  	if(t == s)
  2721  		t = 0;
  2722  	p->runqtail = t;
  2723  	runtime·unlock(p);
  2724  }
  2725  
  2726  // Get g from local runnable queue.
  2727  static G*
  2728  runqget(P *p)
  2729  {
  2730  	G *gp;
  2731  	int32 t, h, s;
  2732  
  2733  	if(p->runqhead == p->runqtail)
  2734  		return nil;
  2735  	runtime·lock(p);
  2736  	h = p->runqhead;
  2737  	t = p->runqtail;
  2738  	s = p->runqsize;
  2739  	if(t == h) {
  2740  		runtime·unlock(p);
  2741  		return nil;
  2742  	}
  2743  	gp = p->runq[h++];
  2744  	if(h == s)
  2745  		h = 0;
  2746  	p->runqhead = h;
  2747  	runtime·unlock(p);
  2748  	return gp;
  2749  }
  2750  
  2751  // Grow local runnable queue.
  2752  // TODO(dvyukov): consider using fixed-size array
  2753  // and transfer excess to the global list (local queue can grow way too big).
  2754  static void
  2755  runqgrow(P *p)
  2756  {
  2757  	G **q;
  2758  	int32 s, t, h, t2;
  2759  
  2760  	h = p->runqhead;
  2761  	t = p->runqtail;
  2762  	s = p->runqsize;
  2763  	t2 = 0;
  2764  	q = runtime·malloc(2*s*sizeof(*q));
  2765  	while(t != h) {
  2766  		q[t2++] = p->runq[h++];
  2767  		if(h == s)
  2768  			h = 0;
  2769  	}
  2770  	runtime·free(p->runq);
  2771  	p->runq = q;
  2772  	p->runqhead = 0;
  2773  	p->runqtail = t2;
  2774  	p->runqsize = 2*s;
  2775  }
  2776  
  2777  // Steal half of elements from local runnable queue of p2
  2778  // and put onto local runnable queue of p.
  2779  // Returns one of the stolen elements (or nil if failed).
  2780  static G*
  2781  runqsteal(P *p, P *p2)
  2782  {
  2783  	G *gp, *gp1;
  2784  	int32 t, h, s, t2, h2, s2, c, i;
  2785  
  2786  	if(p2->runqhead == p2->runqtail)
  2787  		return nil;
  2788  	// sort locks to prevent deadlocks
  2789  	if(p < p2)
  2790  		runtime·lock(p);
  2791  	runtime·lock(p2);
  2792  	if(p2->runqhead == p2->runqtail) {
  2793  		runtime·unlock(p2);
  2794  		if(p < p2)
  2795  			runtime·unlock(p);
  2796  		return nil;
  2797  	}
  2798  	if(p >= p2)
  2799  		runtime·lock(p);
  2800  	// now we've locked both queues and know the victim is not empty
  2801  	h = p->runqhead;
  2802  	t = p->runqtail;
  2803  	s = p->runqsize;
  2804  	h2 = p2->runqhead;
  2805  	t2 = p2->runqtail;
  2806  	s2 = p2->runqsize;
  2807  	gp = p2->runq[h2++];  // return value
  2808  	if(h2 == s2)
  2809  		h2 = 0;
  2810  	// steal roughly half
  2811  	if(t2 > h2)
  2812  		c = (t2 - h2) / 2;
  2813  	else
  2814  		c = (s2 - h2 + t2) / 2;
  2815  	// copy
  2816  	for(i = 0; i != c; i++) {
  2817  		// the target queue is full?
  2818  		if(t == h-1 || (h == 0 && t == s-1))
  2819  			break;
  2820  		// the victim queue is empty?
  2821  		if(t2 == h2)
  2822  			break;
  2823  		gp1 = p2->runq[h2++];
  2824  		if(h2 == s2)
  2825  			h2 = 0;
  2826  		p->runq[t++] = gp1;
  2827  		if(t == s)
  2828  			t = 0;
  2829  	}
  2830  	p->runqtail = t;
  2831  	p2->runqhead = h2;
  2832  	runtime·unlock(p2);
  2833  	runtime·unlock(p);
  2834  	return gp;
  2835  }
  2836  
  2837  void
  2838  runtime·testSchedLocalQueue(void)
  2839  {
  2840  	P p;
  2841  	G gs[1000];
  2842  	int32 i, j;
  2843  
  2844  	runtime·memclr((byte*)&p, sizeof(p));
  2845  	p.runqsize = 1;
  2846  	p.runqhead = 0;
  2847  	p.runqtail = 0;
  2848  	p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq));
  2849  
  2850  	for(i = 0; i < nelem(gs); i++) {
  2851  		if(runqget(&p) != nil)
  2852  			runtime·throw("runq is not empty initially");
  2853  		for(j = 0; j < i; j++)
  2854  			runqput(&p, &gs[i]);
  2855  		for(j = 0; j < i; j++) {
  2856  			if(runqget(&p) != &gs[i]) {
  2857  				runtime·printf("bad element at iter %d/%d\n", i, j);
  2858  				runtime·throw("bad element");
  2859  			}
  2860  		}
  2861  		if(runqget(&p) != nil)
  2862  			runtime·throw("runq is not empty afterwards");
  2863  	}
  2864  }
  2865  
  2866  void
  2867  runtime·testSchedLocalQueueSteal(void)
  2868  {
  2869  	P p1, p2;
  2870  	G gs[1000], *gp;
  2871  	int32 i, j, s;
  2872  
  2873  	runtime·memclr((byte*)&p1, sizeof(p1));
  2874  	p1.runqsize = 1;
  2875  	p1.runqhead = 0;
  2876  	p1.runqtail = 0;
  2877  	p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq));
  2878  
  2879  	runtime·memclr((byte*)&p2, sizeof(p2));
  2880  	p2.runqsize = nelem(gs);
  2881  	p2.runqhead = 0;
  2882  	p2.runqtail = 0;
  2883  	p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq));
  2884  
  2885  	for(i = 0; i < nelem(gs); i++) {
  2886  		for(j = 0; j < i; j++) {
  2887  			gs[j].sig = 0;
  2888  			runqput(&p1, &gs[j]);
  2889  		}
  2890  		gp = runqsteal(&p2, &p1);
  2891  		s = 0;
  2892  		if(gp) {
  2893  			s++;
  2894  			gp->sig++;
  2895  		}
  2896  		while(gp = runqget(&p2)) {
  2897  			s++;
  2898  			gp->sig++;
  2899  		}
  2900  		while(gp = runqget(&p1))
  2901  			gp->sig++;
  2902  		for(j = 0; j < i; j++) {
  2903  			if(gs[j].sig != 1) {
  2904  				runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
  2905  				runtime·throw("bad element");
  2906  			}
  2907  		}
  2908  		if(s != i/2 && s != i/2+1) {
  2909  			runtime·printf("bad steal %d, want %d or %d, iter %d\n",
  2910  				s, i/2, i/2+1, i);
  2911  			runtime·throw("bad steal");
  2912  		}
  2913  	}
  2914  }
  2915  
  2916  extern void runtime·morestack(void);
  2917  
  2918  // Does f mark the top of a goroutine stack?
  2919  bool
  2920  runtime·topofstack(Func *f)
  2921  {
  2922  	return f->entry == (uintptr)runtime·goexit ||
  2923  		f->entry == (uintptr)runtime·mstart ||
  2924  		f->entry == (uintptr)runtime·mcall ||
  2925  		f->entry == (uintptr)runtime·morestack ||
  2926  		f->entry == (uintptr)runtime·lessstack ||
  2927  		f->entry == (uintptr)_rt0_go;
  2928  }
  2929  
  2930  void
  2931  runtime∕debug·setMaxThreads(intgo in, intgo out)
  2932  {
  2933  	runtime·lock(&runtime·sched);
  2934  	out = runtime·sched.maxmcount;
  2935  	runtime·sched.maxmcount = in;
  2936  	checkmcount();
  2937  	runtime·unlock(&runtime·sched);
  2938  	FLUSH(&out);
  2939  }
  2940  
  2941  static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h
  2942  
  2943  static bool
  2944  haveexperiment(int8 *name)
  2945  {
  2946  	int32 i, j;
  2947  	
  2948  	for(i=0; i<sizeof(experiment); i++) {
  2949  		if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
  2950  			for(j=0; name[j]; j++)
  2951  				if(experiment[i+j] != name[j])
  2952  					goto nomatch;
  2953  			if(experiment[i+j] != '\0' && experiment[i+j] != ',')
  2954  				goto nomatch;
  2955  			return 1;
  2956  		}
  2957  	nomatch:;
  2958  	}
  2959  	return 0;
  2960  }