github.com/prattmic/llgo-embedded@v0.0.0-20150820070356-41cfecea0e1e/third_party/gofrontend/libgo/runtime/proc.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include <limits.h> 6 #include <signal.h> 7 #include <stdlib.h> 8 #include <pthread.h> 9 #include <unistd.h> 10 11 #include "config.h" 12 13 #ifdef HAVE_DL_ITERATE_PHDR 14 #include <link.h> 15 #endif 16 17 #include "runtime.h" 18 #include "arch.h" 19 #include "defs.h" 20 #include "malloc.h" 21 #include "go-type.h" 22 #include "go-defer.h" 23 24 #ifdef USING_SPLIT_STACK 25 26 /* FIXME: These are not declared anywhere. */ 27 28 extern void __splitstack_getcontext(void *context[10]); 29 30 extern void __splitstack_setcontext(void *context[10]); 31 32 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *); 33 34 extern void * __splitstack_resetcontext(void *context[10], size_t *); 35 36 extern void *__splitstack_find(void *, void *, size_t *, void **, void **, 37 void **); 38 39 extern void __splitstack_block_signals (int *, int *); 40 41 extern void __splitstack_block_signals_context (void *context[10], int *, 42 int *); 43 44 #endif 45 46 #ifndef PTHREAD_STACK_MIN 47 # define PTHREAD_STACK_MIN 8192 48 #endif 49 50 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK) 51 # define StackMin PTHREAD_STACK_MIN 52 #else 53 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024) 54 #endif 55 56 uintptr runtime_stacks_sys; 57 58 static void gtraceback(G*); 59 60 #ifdef __rtems__ 61 #define __thread 62 #endif 63 64 static __thread G *g; 65 static __thread M *m; 66 67 #ifndef SETCONTEXT_CLOBBERS_TLS 68 69 static inline void 70 initcontext(void) 71 { 72 } 73 74 static inline void 75 fixcontext(ucontext_t *c __attribute__ ((unused))) 76 { 77 } 78 79 #else 80 81 # if defined(__x86_64__) && defined(__sun__) 82 83 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs 84 // register to that of the thread which called getcontext. The effect 85 // is that the address of all __thread variables changes. This bug 86 // also affects pthread_self() and pthread_getspecific. We work 87 // around it by clobbering the context field directly to keep %fs the 88 // same. 89 90 static __thread greg_t fs; 91 92 static inline void 93 initcontext(void) 94 { 95 ucontext_t c; 96 97 getcontext(&c); 98 fs = c.uc_mcontext.gregs[REG_FSBASE]; 99 } 100 101 static inline void 102 fixcontext(ucontext_t* c) 103 { 104 c->uc_mcontext.gregs[REG_FSBASE] = fs; 105 } 106 107 # elif defined(__NetBSD__) 108 109 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save 110 // and restore it ourselves. 111 112 static __thread __greg_t tlsbase; 113 114 static inline void 115 initcontext(void) 116 { 117 ucontext_t c; 118 119 getcontext(&c); 120 tlsbase = c.uc_mcontext._mc_tlsbase; 121 } 122 123 static inline void 124 fixcontext(ucontext_t* c) 125 { 126 c->uc_mcontext._mc_tlsbase = tlsbase; 127 } 128 129 # elif defined(__sparc__) 130 131 static inline void 132 initcontext(void) 133 { 134 } 135 136 static inline void 137 fixcontext(ucontext_t *c) 138 { 139 /* ??? Using 140 register unsigned long thread __asm__("%g7"); 141 c->uc_mcontext.gregs[REG_G7] = thread; 142 results in 143 error: variable ‘thread’ might be clobbered by \ 144 ‘longjmp’ or ‘vfork’ [-Werror=clobbered] 145 which ought to be false, as %g7 is a fixed register. */ 146 147 if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8) 148 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7])); 149 else 150 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7])); 151 } 152 153 # else 154 155 # error unknown case for SETCONTEXT_CLOBBERS_TLS 156 157 # endif 158 159 #endif 160 161 // We can not always refer to the TLS variables directly. The 162 // compiler will call tls_get_addr to get the address of the variable, 163 // and it may hold it in a register across a call to schedule. When 164 // we get back from the call we may be running in a different thread, 165 // in which case the register now points to the TLS variable for a 166 // different thread. We use non-inlinable functions to avoid this 167 // when necessary. 168 169 G* runtime_g(void) __attribute__ ((noinline, no_split_stack)); 170 171 G* 172 runtime_g(void) 173 { 174 return g; 175 } 176 177 M* runtime_m(void) __attribute__ ((noinline, no_split_stack)); 178 179 M* 180 runtime_m(void) 181 { 182 return m; 183 } 184 185 // Set m and g. 186 void 187 runtime_setmg(M* mp, G* gp) 188 { 189 m = mp; 190 g = gp; 191 } 192 193 // Start a new thread. 194 static void 195 runtime_newosproc(M *mp) 196 { 197 pthread_attr_t attr; 198 sigset_t clear, old; 199 pthread_t tid; 200 int ret; 201 202 if(pthread_attr_init(&attr) != 0) 203 runtime_throw("pthread_attr_init"); 204 if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0) 205 runtime_throw("pthread_attr_setdetachstate"); 206 207 // Block signals during pthread_create so that the new thread 208 // starts with signals disabled. It will enable them in minit. 209 sigfillset(&clear); 210 211 #ifdef SIGTRAP 212 // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux. 213 sigdelset(&clear, SIGTRAP); 214 #endif 215 216 sigemptyset(&old); 217 pthread_sigmask(SIG_BLOCK, &clear, &old); 218 ret = pthread_create(&tid, &attr, runtime_mstart, mp); 219 pthread_sigmask(SIG_SETMASK, &old, nil); 220 221 if (ret != 0) 222 runtime_throw("pthread_create"); 223 } 224 225 // First function run by a new goroutine. This replaces gogocall. 226 static void 227 kickoff(void) 228 { 229 void (*fn)(void*); 230 231 if(g->traceback != nil) 232 gtraceback(g); 233 234 fn = (void (*)(void*))(g->entry); 235 fn(g->param); 236 runtime_goexit(); 237 } 238 239 // Switch context to a different goroutine. This is like longjmp. 240 void runtime_gogo(G*) __attribute__ ((noinline)); 241 void 242 runtime_gogo(G* newg) 243 { 244 #ifdef USING_SPLIT_STACK 245 __splitstack_setcontext(&newg->stack_context[0]); 246 #endif 247 g = newg; 248 newg->fromgogo = true; 249 fixcontext(&newg->context); 250 setcontext(&newg->context); 251 runtime_throw("gogo setcontext returned"); 252 } 253 254 // Save context and call fn passing g as a parameter. This is like 255 // setjmp. Because getcontext always returns 0, unlike setjmp, we use 256 // g->fromgogo as a code. It will be true if we got here via 257 // setcontext. g == nil the first time this is called in a new m. 258 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline)); 259 void 260 runtime_mcall(void (*pfn)(G*)) 261 { 262 M *mp; 263 G *gp; 264 265 // Ensure that all registers are on the stack for the garbage 266 // collector. 267 __builtin_unwind_init(); 268 269 mp = m; 270 gp = g; 271 if(gp == mp->g0) 272 runtime_throw("runtime: mcall called on m->g0 stack"); 273 274 if(gp != nil) { 275 276 #ifdef USING_SPLIT_STACK 277 __splitstack_getcontext(&g->stack_context[0]); 278 #else 279 gp->gcnext_sp = &pfn; 280 #endif 281 gp->fromgogo = false; 282 getcontext(&gp->context); 283 284 // When we return from getcontext, we may be running 285 // in a new thread. That means that m and g may have 286 // changed. They are global variables so we will 287 // reload them, but the addresses of m and g may be 288 // cached in our local stack frame, and those 289 // addresses may be wrong. Call functions to reload 290 // the values for this thread. 291 mp = runtime_m(); 292 gp = runtime_g(); 293 294 if(gp->traceback != nil) 295 gtraceback(gp); 296 } 297 if (gp == nil || !gp->fromgogo) { 298 #ifdef USING_SPLIT_STACK 299 __splitstack_setcontext(&mp->g0->stack_context[0]); 300 #endif 301 mp->g0->entry = (byte*)pfn; 302 mp->g0->param = gp; 303 304 // It's OK to set g directly here because this case 305 // can not occur if we got here via a setcontext to 306 // the getcontext call just above. 307 g = mp->g0; 308 309 fixcontext(&mp->g0->context); 310 setcontext(&mp->g0->context); 311 runtime_throw("runtime: mcall function returned"); 312 } 313 } 314 315 // Goroutine scheduler 316 // The scheduler's job is to distribute ready-to-run goroutines over worker threads. 317 // 318 // The main concepts are: 319 // G - goroutine. 320 // M - worker thread, or machine. 321 // P - processor, a resource that is required to execute Go code. 322 // M must have an associated P to execute Go code, however it can be 323 // blocked or in a syscall w/o an associated P. 324 // 325 // Design doc at http://golang.org/s/go11sched. 326 327 typedef struct Sched Sched; 328 struct Sched { 329 Lock lock; 330 331 uint64 goidgen; 332 M* midle; // idle m's waiting for work 333 int32 nmidle; // number of idle m's waiting for work 334 int32 nmidlelocked; // number of locked m's waiting for work 335 int32 mcount; // number of m's that have been created 336 int32 maxmcount; // maximum number of m's allowed (or die) 337 338 P* pidle; // idle P's 339 uint32 npidle; 340 uint32 nmspinning; 341 342 // Global runnable queue. 343 G* runqhead; 344 G* runqtail; 345 int32 runqsize; 346 347 // Global cache of dead G's. 348 Lock gflock; 349 G* gfree; 350 351 uint32 gcwaiting; // gc is waiting to run 352 int32 stopwait; 353 Note stopnote; 354 uint32 sysmonwait; 355 Note sysmonnote; 356 uint64 lastpoll; 357 358 int32 profilehz; // cpu profiling rate 359 }; 360 361 enum 362 { 363 // The max value of GOMAXPROCS. 364 // There are no fundamental restrictions on the value. 365 MaxGomaxprocs = 1<<8, 366 367 // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once. 368 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. 369 GoidCacheBatch = 16, 370 }; 371 372 Sched runtime_sched; 373 int32 runtime_gomaxprocs; 374 uint32 runtime_needextram = 1; 375 bool runtime_iscgo = true; 376 M runtime_m0; 377 G runtime_g0; // idle goroutine for m0 378 G* runtime_lastg; 379 M* runtime_allm; 380 P** runtime_allp; 381 M* runtime_extram; 382 int8* runtime_goos; 383 int32 runtime_ncpu; 384 bool runtime_precisestack; 385 static int32 newprocs; 386 387 static Lock allglock; // the following vars are protected by this lock or by stoptheworld 388 G** runtime_allg; 389 uintptr runtime_allglen; 390 static uintptr allgcap; 391 392 void* runtime_mstart(void*); 393 static void runqput(P*, G*); 394 static G* runqget(P*); 395 static bool runqputslow(P*, G*, uint32, uint32); 396 static G* runqsteal(P*, P*); 397 static void mput(M*); 398 static M* mget(void); 399 static void mcommoninit(M*); 400 static void schedule(void); 401 static void procresize(int32); 402 static void acquirep(P*); 403 static P* releasep(void); 404 static void newm(void(*)(void), P*); 405 static void stopm(void); 406 static void startm(P*, bool); 407 static void handoffp(P*); 408 static void wakep(void); 409 static void stoplockedm(void); 410 static void startlockedm(G*); 411 static void sysmon(void); 412 static uint32 retake(int64); 413 static void incidlelocked(int32); 414 static void checkdead(void); 415 static void exitsyscall0(G*); 416 static void park0(G*); 417 static void goexit0(G*); 418 static void gfput(P*, G*); 419 static G* gfget(P*); 420 static void gfpurge(P*); 421 static void globrunqput(G*); 422 static void globrunqputbatch(G*, G*, int32); 423 static G* globrunqget(P*, int32); 424 static P* pidleget(void); 425 static void pidleput(P*); 426 static void injectglist(G*); 427 static bool preemptall(void); 428 static bool exitsyscallfast(void); 429 static void allgadd(G*); 430 431 // The bootstrap sequence is: 432 // 433 // call osinit 434 // call schedinit 435 // make & queue new G 436 // call runtime_mstart 437 // 438 // The new G calls runtime_main. 439 void 440 runtime_schedinit(void) 441 { 442 int32 n, procs; 443 const byte *p; 444 Eface i; 445 446 m = &runtime_m0; 447 g = &runtime_g0; 448 m->g0 = g; 449 m->curg = g; 450 g->m = m; 451 452 initcontext(); 453 454 runtime_sched.maxmcount = 10000; 455 runtime_precisestack = 0; 456 457 // runtime_symtabinit(); 458 runtime_mallocinit(); 459 mcommoninit(m); 460 461 // Initialize the itable value for newErrorCString, 462 // so that the next time it gets called, possibly 463 // in a fault during a garbage collection, it will not 464 // need to allocated memory. 465 runtime_newErrorCString(0, &i); 466 467 // Initialize the cached gotraceback value, since 468 // gotraceback calls getenv, which mallocs on Plan 9. 469 runtime_gotraceback(nil); 470 471 runtime_goargs(); 472 runtime_goenvs(); 473 runtime_parsedebugvars(); 474 475 runtime_sched.lastpoll = runtime_nanotime(); 476 procs = 1; 477 p = runtime_getenv("GOMAXPROCS"); 478 if(p != nil && (n = runtime_atoi(p)) > 0) { 479 if(n > MaxGomaxprocs) 480 n = MaxGomaxprocs; 481 procs = n; 482 } 483 runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0])); 484 procresize(procs); 485 486 // Can not enable GC until all roots are registered. 487 // mstats.enablegc = 1; 488 } 489 490 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main"); 491 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main"); 492 493 static void 494 initDone(void *arg __attribute__ ((unused))) { 495 runtime_unlockOSThread(); 496 }; 497 498 // The main goroutine. 499 // Note: C frames in general are not copyable during stack growth, for two reasons: 500 // 1) We don't know where in a frame to find pointers to other stack locations. 501 // 2) There's no guarantee that globals or heap values do not point into the frame. 502 // 503 // The C frame for runtime.main is copyable, because: 504 // 1) There are no pointers to other stack locations in the frame 505 // (d.fn points at a global, d.link is nil, d.argp is -1). 506 // 2) The only pointer into this frame is from the defer chain, 507 // which is explicitly handled during stack copying. 508 void 509 runtime_main(void* dummy __attribute__((unused))) 510 { 511 Defer d; 512 _Bool frame; 513 514 newm(sysmon, nil); 515 516 // Lock the main goroutine onto this, the main OS thread, 517 // during initialization. Most programs won't care, but a few 518 // do require certain calls to be made by the main thread. 519 // Those can arrange for main.main to run in the main thread 520 // by calling runtime.LockOSThread during initialization 521 // to preserve the lock. 522 runtime_lockOSThread(); 523 524 // Defer unlock so that runtime.Goexit during init does the unlock too. 525 d.__pfn = initDone; 526 d.__next = g->defer; 527 d.__arg = (void*)-1; 528 d.__panic = g->panic; 529 d.__retaddr = nil; 530 d.__makefunc_can_recover = 0; 531 d.__frame = &frame; 532 d.__special = true; 533 g->defer = &d; 534 535 if(m != &runtime_m0) 536 runtime_throw("runtime_main not on m0"); 537 __go_go(runtime_MHeap_Scavenger, nil); 538 main_init(); 539 540 if(g->defer != &d || d.__pfn != initDone) 541 runtime_throw("runtime: bad defer entry after init"); 542 g->defer = d.__next; 543 runtime_unlockOSThread(); 544 545 // For gccgo we have to wait until after main is initialized 546 // to enable GC, because initializing main registers the GC 547 // roots. 548 mstats.enablegc = 1; 549 550 main_main(); 551 552 // Make racy client program work: if panicking on 553 // another goroutine at the same time as main returns, 554 // let the other goroutine finish printing the panic trace. 555 // Once it does, it will exit. See issue 3934. 556 if(runtime_panicking) 557 runtime_park(nil, nil, "panicwait"); 558 559 runtime_exit(0); 560 for(;;) 561 *(int32*)0 = 0; 562 } 563 564 void 565 runtime_goroutineheader(G *gp) 566 { 567 const char *status; 568 int64 waitfor; 569 570 switch(gp->status) { 571 case Gidle: 572 status = "idle"; 573 break; 574 case Grunnable: 575 status = "runnable"; 576 break; 577 case Grunning: 578 status = "running"; 579 break; 580 case Gsyscall: 581 status = "syscall"; 582 break; 583 case Gwaiting: 584 if(gp->waitreason) 585 status = gp->waitreason; 586 else 587 status = "waiting"; 588 break; 589 default: 590 status = "???"; 591 break; 592 } 593 594 // approx time the G is blocked, in minutes 595 waitfor = 0; 596 if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0) 597 waitfor = (runtime_nanotime() - gp->waitsince) / (60LL*1000*1000*1000); 598 599 if(waitfor < 1) 600 runtime_printf("goroutine %D [%s]:\n", gp->goid, status); 601 else 602 runtime_printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor); 603 } 604 605 void 606 runtime_printcreatedby(G *g) 607 { 608 if(g != nil && g->gopc != 0 && g->goid != 1) { 609 String fn; 610 String file; 611 intgo line; 612 613 if(__go_file_line(g->gopc - 1, &fn, &file, &line)) { 614 runtime_printf("created by %S\n", fn); 615 runtime_printf("\t%S:%D\n", file, (int64) line); 616 } 617 } 618 } 619 620 struct Traceback 621 { 622 G* gp; 623 Location locbuf[TracebackMaxFrames]; 624 int32 c; 625 }; 626 627 void 628 runtime_tracebackothers(G * volatile me) 629 { 630 G * volatile gp; 631 Traceback tb; 632 int32 traceback; 633 volatile uintptr i; 634 635 tb.gp = me; 636 traceback = runtime_gotraceback(nil); 637 638 // Show the current goroutine first, if we haven't already. 639 if((gp = m->curg) != nil && gp != me) { 640 runtime_printf("\n"); 641 runtime_goroutineheader(gp); 642 gp->traceback = &tb; 643 644 #ifdef USING_SPLIT_STACK 645 __splitstack_getcontext(&me->stack_context[0]); 646 #endif 647 getcontext(&me->context); 648 649 if(gp->traceback != nil) { 650 runtime_gogo(gp); 651 } 652 653 runtime_printtrace(tb.locbuf, tb.c, false); 654 runtime_printcreatedby(gp); 655 } 656 657 runtime_lock(&allglock); 658 for(i = 0; i < runtime_allglen; i++) { 659 gp = runtime_allg[i]; 660 if(gp == me || gp == m->curg || gp->status == Gdead) 661 continue; 662 if(gp->issystem && traceback < 2) 663 continue; 664 runtime_printf("\n"); 665 runtime_goroutineheader(gp); 666 667 // Our only mechanism for doing a stack trace is 668 // _Unwind_Backtrace. And that only works for the 669 // current thread, not for other random goroutines. 670 // So we need to switch context to the goroutine, get 671 // the backtrace, and then switch back. 672 673 // This means that if g is running or in a syscall, we 674 // can't reliably print a stack trace. FIXME. 675 676 if(gp->status == Grunning) { 677 runtime_printf("\tgoroutine running on other thread; stack unavailable\n"); 678 runtime_printcreatedby(gp); 679 } else if(gp->status == Gsyscall) { 680 runtime_printf("\tgoroutine in C code; stack unavailable\n"); 681 runtime_printcreatedby(gp); 682 } else { 683 gp->traceback = &tb; 684 685 #ifdef USING_SPLIT_STACK 686 __splitstack_getcontext(&me->stack_context[0]); 687 #endif 688 getcontext(&me->context); 689 690 if(gp->traceback != nil) { 691 runtime_gogo(gp); 692 } 693 694 runtime_printtrace(tb.locbuf, tb.c, false); 695 runtime_printcreatedby(gp); 696 } 697 } 698 runtime_unlock(&allglock); 699 } 700 701 static void 702 checkmcount(void) 703 { 704 // sched lock is held 705 if(runtime_sched.mcount > runtime_sched.maxmcount) { 706 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount); 707 runtime_throw("thread exhaustion"); 708 } 709 } 710 711 // Do a stack trace of gp, and then restore the context to 712 // gp->dotraceback. 713 714 static void 715 gtraceback(G* gp) 716 { 717 Traceback* traceback; 718 719 traceback = gp->traceback; 720 gp->traceback = nil; 721 traceback->c = runtime_callers(1, traceback->locbuf, 722 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false); 723 runtime_gogo(traceback->gp); 724 } 725 726 static void 727 mcommoninit(M *mp) 728 { 729 // If there is no mcache runtime_callers() will crash, 730 // and we are most likely in sysmon thread so the stack is senseless anyway. 731 if(m->mcache) 732 runtime_callers(1, mp->createstack, nelem(mp->createstack), false); 733 734 mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks(); 735 736 runtime_lock(&runtime_sched.lock); 737 mp->id = runtime_sched.mcount++; 738 checkmcount(); 739 runtime_mpreinit(mp); 740 741 // Add to runtime_allm so garbage collector doesn't free m 742 // when it is just in a register or thread-local storage. 743 mp->alllink = runtime_allm; 744 // runtime_NumCgoCall() iterates over allm w/o schedlock, 745 // so we need to publish it safely. 746 runtime_atomicstorep(&runtime_allm, mp); 747 runtime_unlock(&runtime_sched.lock); 748 } 749 750 // Mark gp ready to run. 751 void 752 runtime_ready(G *gp) 753 { 754 // Mark runnable. 755 m->locks++; // disable preemption because it can be holding p in a local var 756 if(gp->status != Gwaiting) { 757 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status); 758 runtime_throw("bad g->status in ready"); 759 } 760 gp->status = Grunnable; 761 runqput(m->p, gp); 762 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic 763 wakep(); 764 m->locks--; 765 } 766 767 int32 768 runtime_gcprocs(void) 769 { 770 int32 n; 771 772 // Figure out how many CPUs to use during GC. 773 // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. 774 runtime_lock(&runtime_sched.lock); 775 n = runtime_gomaxprocs; 776 if(n > runtime_ncpu) 777 n = runtime_ncpu > 0 ? runtime_ncpu : 1; 778 if(n > MaxGcproc) 779 n = MaxGcproc; 780 if(n > runtime_sched.nmidle+1) // one M is currently running 781 n = runtime_sched.nmidle+1; 782 runtime_unlock(&runtime_sched.lock); 783 return n; 784 } 785 786 static bool 787 needaddgcproc(void) 788 { 789 int32 n; 790 791 runtime_lock(&runtime_sched.lock); 792 n = runtime_gomaxprocs; 793 if(n > runtime_ncpu) 794 n = runtime_ncpu; 795 if(n > MaxGcproc) 796 n = MaxGcproc; 797 n -= runtime_sched.nmidle+1; // one M is currently running 798 runtime_unlock(&runtime_sched.lock); 799 return n > 0; 800 } 801 802 void 803 runtime_helpgc(int32 nproc) 804 { 805 M *mp; 806 int32 n, pos; 807 808 runtime_lock(&runtime_sched.lock); 809 pos = 0; 810 for(n = 1; n < nproc; n++) { // one M is currently running 811 if(runtime_allp[pos]->mcache == m->mcache) 812 pos++; 813 mp = mget(); 814 if(mp == nil) 815 runtime_throw("runtime_gcprocs inconsistency"); 816 mp->helpgc = n; 817 mp->mcache = runtime_allp[pos]->mcache; 818 pos++; 819 runtime_notewakeup(&mp->park); 820 } 821 runtime_unlock(&runtime_sched.lock); 822 } 823 824 // Similar to stoptheworld but best-effort and can be called several times. 825 // There is no reverse operation, used during crashing. 826 // This function must not lock any mutexes. 827 void 828 runtime_freezetheworld(void) 829 { 830 int32 i; 831 832 if(runtime_gomaxprocs == 1) 833 return; 834 // stopwait and preemption requests can be lost 835 // due to races with concurrently executing threads, 836 // so try several times 837 for(i = 0; i < 5; i++) { 838 // this should tell the scheduler to not start any new goroutines 839 runtime_sched.stopwait = 0x7fffffff; 840 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1); 841 // this should stop running goroutines 842 if(!preemptall()) 843 break; // no running goroutines 844 runtime_usleep(1000); 845 } 846 // to be sure 847 runtime_usleep(1000); 848 preemptall(); 849 runtime_usleep(1000); 850 } 851 852 void 853 runtime_stoptheworld(void) 854 { 855 int32 i; 856 uint32 s; 857 P *p; 858 bool wait; 859 860 runtime_lock(&runtime_sched.lock); 861 runtime_sched.stopwait = runtime_gomaxprocs; 862 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1); 863 preemptall(); 864 // stop current P 865 m->p->status = Pgcstop; 866 runtime_sched.stopwait--; 867 // try to retake all P's in Psyscall status 868 for(i = 0; i < runtime_gomaxprocs; i++) { 869 p = runtime_allp[i]; 870 s = p->status; 871 if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop)) 872 runtime_sched.stopwait--; 873 } 874 // stop idle P's 875 while((p = pidleget()) != nil) { 876 p->status = Pgcstop; 877 runtime_sched.stopwait--; 878 } 879 wait = runtime_sched.stopwait > 0; 880 runtime_unlock(&runtime_sched.lock); 881 882 // wait for remaining P's to stop voluntarily 883 if(wait) { 884 runtime_notesleep(&runtime_sched.stopnote); 885 runtime_noteclear(&runtime_sched.stopnote); 886 } 887 if(runtime_sched.stopwait) 888 runtime_throw("stoptheworld: not stopped"); 889 for(i = 0; i < runtime_gomaxprocs; i++) { 890 p = runtime_allp[i]; 891 if(p->status != Pgcstop) 892 runtime_throw("stoptheworld: not stopped"); 893 } 894 } 895 896 static void 897 mhelpgc(void) 898 { 899 m->helpgc = -1; 900 } 901 902 void 903 runtime_starttheworld(void) 904 { 905 P *p, *p1; 906 M *mp; 907 G *gp; 908 bool add; 909 910 m->locks++; // disable preemption because it can be holding p in a local var 911 gp = runtime_netpoll(false); // non-blocking 912 injectglist(gp); 913 add = needaddgcproc(); 914 runtime_lock(&runtime_sched.lock); 915 if(newprocs) { 916 procresize(newprocs); 917 newprocs = 0; 918 } else 919 procresize(runtime_gomaxprocs); 920 runtime_sched.gcwaiting = 0; 921 922 p1 = nil; 923 while((p = pidleget()) != nil) { 924 // procresize() puts p's with work at the beginning of the list. 925 // Once we reach a p without a run queue, the rest don't have one either. 926 if(p->runqhead == p->runqtail) { 927 pidleput(p); 928 break; 929 } 930 p->m = mget(); 931 p->link = p1; 932 p1 = p; 933 } 934 if(runtime_sched.sysmonwait) { 935 runtime_sched.sysmonwait = false; 936 runtime_notewakeup(&runtime_sched.sysmonnote); 937 } 938 runtime_unlock(&runtime_sched.lock); 939 940 while(p1) { 941 p = p1; 942 p1 = p1->link; 943 if(p->m) { 944 mp = p->m; 945 p->m = nil; 946 if(mp->nextp) 947 runtime_throw("starttheworld: inconsistent mp->nextp"); 948 mp->nextp = p; 949 runtime_notewakeup(&mp->park); 950 } else { 951 // Start M to run P. Do not start another M below. 952 newm(nil, p); 953 add = false; 954 } 955 } 956 957 if(add) { 958 // If GC could have used another helper proc, start one now, 959 // in the hope that it will be available next time. 960 // It would have been even better to start it before the collection, 961 // but doing so requires allocating memory, so it's tricky to 962 // coordinate. This lazy approach works out in practice: 963 // we don't mind if the first couple gc rounds don't have quite 964 // the maximum number of procs. 965 newm(mhelpgc, nil); 966 } 967 m->locks--; 968 } 969 970 // Called to start an M. 971 void* 972 runtime_mstart(void* mp) 973 { 974 m = (M*)mp; 975 g = m->g0; 976 977 initcontext(); 978 979 g->entry = nil; 980 g->param = nil; 981 982 // Record top of stack for use by mcall. 983 // Once we call schedule we're never coming back, 984 // so other calls can reuse this stack space. 985 #ifdef USING_SPLIT_STACK 986 __splitstack_getcontext(&g->stack_context[0]); 987 #else 988 g->gcinitial_sp = ∓ 989 // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp 990 // is the top of the stack, not the bottom. 991 g->gcstack_size = 0; 992 g->gcnext_sp = ∓ 993 #endif 994 getcontext(&g->context); 995 996 if(g->entry != nil) { 997 // Got here from mcall. 998 void (*pfn)(G*) = (void (*)(G*))g->entry; 999 G* gp = (G*)g->param; 1000 pfn(gp); 1001 *(int*)0x21 = 0x21; 1002 } 1003 runtime_minit(); 1004 1005 #ifdef USING_SPLIT_STACK 1006 { 1007 int dont_block_signals = 0; 1008 __splitstack_block_signals(&dont_block_signals, nil); 1009 } 1010 #endif 1011 1012 // Install signal handlers; after minit so that minit can 1013 // prepare the thread to be able to handle the signals. 1014 if(m == &runtime_m0) 1015 runtime_initsig(); 1016 1017 if(m->mstartfn) 1018 m->mstartfn(); 1019 1020 if(m->helpgc) { 1021 m->helpgc = 0; 1022 stopm(); 1023 } else if(m != &runtime_m0) { 1024 acquirep(m->nextp); 1025 m->nextp = nil; 1026 } 1027 schedule(); 1028 1029 // TODO(brainman): This point is never reached, because scheduler 1030 // does not release os threads at the moment. But once this path 1031 // is enabled, we must remove our seh here. 1032 1033 return nil; 1034 } 1035 1036 typedef struct CgoThreadStart CgoThreadStart; 1037 struct CgoThreadStart 1038 { 1039 M *m; 1040 G *g; 1041 uintptr *tls; 1042 void (*fn)(void); 1043 }; 1044 1045 // Allocate a new m unassociated with any thread. 1046 // Can use p for allocation context if needed. 1047 M* 1048 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacksize) 1049 { 1050 M *mp; 1051 1052 m->locks++; // disable GC because it can be called from sysmon 1053 if(m->p == nil) 1054 acquirep(p); // temporarily borrow p for mallocs in this function 1055 #if 0 1056 if(mtype == nil) { 1057 Eface e; 1058 runtime_gc_m_ptr(&e); 1059 mtype = ((const PtrType*)e.__type_descriptor)->__element_type; 1060 } 1061 #endif 1062 1063 mp = runtime_mal(sizeof *mp); 1064 mcommoninit(mp); 1065 mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize); 1066 1067 if(p == m->p) 1068 releasep(); 1069 m->locks--; 1070 1071 return mp; 1072 } 1073 1074 static G* 1075 allocg(void) 1076 { 1077 G *gp; 1078 // static Type *gtype; 1079 1080 // if(gtype == nil) { 1081 // Eface e; 1082 // runtime_gc_g_ptr(&e); 1083 // gtype = ((PtrType*)e.__type_descriptor)->__element_type; 1084 // } 1085 // gp = runtime_cnew(gtype); 1086 gp = runtime_malloc(sizeof(G)); 1087 return gp; 1088 } 1089 1090 static M* lockextra(bool nilokay); 1091 static void unlockextra(M*); 1092 1093 // needm is called when a cgo callback happens on a 1094 // thread without an m (a thread not created by Go). 1095 // In this case, needm is expected to find an m to use 1096 // and return with m, g initialized correctly. 1097 // Since m and g are not set now (likely nil, but see below) 1098 // needm is limited in what routines it can call. In particular 1099 // it can only call nosplit functions (textflag 7) and cannot 1100 // do any scheduling that requires an m. 1101 // 1102 // In order to avoid needing heavy lifting here, we adopt 1103 // the following strategy: there is a stack of available m's 1104 // that can be stolen. Using compare-and-swap 1105 // to pop from the stack has ABA races, so we simulate 1106 // a lock by doing an exchange (via casp) to steal the stack 1107 // head and replace the top pointer with MLOCKED (1). 1108 // This serves as a simple spin lock that we can use even 1109 // without an m. The thread that locks the stack in this way 1110 // unlocks the stack by storing a valid stack head pointer. 1111 // 1112 // In order to make sure that there is always an m structure 1113 // available to be stolen, we maintain the invariant that there 1114 // is always one more than needed. At the beginning of the 1115 // program (if cgo is in use) the list is seeded with a single m. 1116 // If needm finds that it has taken the last m off the list, its job 1117 // is - once it has installed its own m so that it can do things like 1118 // allocate memory - to create a spare m and put it on the list. 1119 // 1120 // Each of these extra m's also has a g0 and a curg that are 1121 // pressed into service as the scheduling stack and current 1122 // goroutine for the duration of the cgo callback. 1123 // 1124 // When the callback is done with the m, it calls dropm to 1125 // put the m back on the list. 1126 // 1127 // Unlike the gc toolchain, we start running on curg, since we are 1128 // just going to return and let the caller continue. 1129 void 1130 runtime_needm(void) 1131 { 1132 M *mp; 1133 1134 if(runtime_needextram) { 1135 // Can happen if C/C++ code calls Go from a global ctor. 1136 // Can not throw, because scheduler is not initialized yet. 1137 int rv __attribute__((unused)); 1138 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n", 1139 sizeof("fatal error: cgo callback before cgo call\n")-1); 1140 runtime_exit(1); 1141 } 1142 1143 // Lock extra list, take head, unlock popped list. 1144 // nilokay=false is safe here because of the invariant above, 1145 // that the extra list always contains or will soon contain 1146 // at least one m. 1147 mp = lockextra(false); 1148 1149 // Set needextram when we've just emptied the list, 1150 // so that the eventual call into cgocallbackg will 1151 // allocate a new m for the extra list. We delay the 1152 // allocation until then so that it can be done 1153 // after exitsyscall makes sure it is okay to be 1154 // running at all (that is, there's no garbage collection 1155 // running right now). 1156 mp->needextram = mp->schedlink == nil; 1157 unlockextra(mp->schedlink); 1158 1159 // Install m and g (= m->curg). 1160 runtime_setmg(mp, mp->curg); 1161 1162 // Initialize g's context as in mstart. 1163 initcontext(); 1164 g->status = Gsyscall; 1165 g->entry = nil; 1166 g->param = nil; 1167 #ifdef USING_SPLIT_STACK 1168 __splitstack_getcontext(&g->stack_context[0]); 1169 #else 1170 g->gcinitial_sp = ∓ 1171 g->gcstack = nil; 1172 g->gcstack_size = 0; 1173 g->gcnext_sp = ∓ 1174 #endif 1175 getcontext(&g->context); 1176 1177 if(g->entry != nil) { 1178 // Got here from mcall. 1179 void (*pfn)(G*) = (void (*)(G*))g->entry; 1180 G* gp = (G*)g->param; 1181 pfn(gp); 1182 *(int*)0x22 = 0x22; 1183 } 1184 1185 // Initialize this thread to use the m. 1186 runtime_minit(); 1187 1188 #ifdef USING_SPLIT_STACK 1189 { 1190 int dont_block_signals = 0; 1191 __splitstack_block_signals(&dont_block_signals, nil); 1192 } 1193 #endif 1194 } 1195 1196 // newextram allocates an m and puts it on the extra list. 1197 // It is called with a working local m, so that it can do things 1198 // like call schedlock and allocate. 1199 void 1200 runtime_newextram(void) 1201 { 1202 M *mp, *mnext; 1203 G *gp; 1204 byte *g0_sp, *sp; 1205 size_t g0_spsize, spsize; 1206 1207 // Create extra goroutine locked to extra m. 1208 // The goroutine is the context in which the cgo callback will run. 1209 // The sched.pc will never be returned to, but setting it to 1210 // runtime.goexit makes clear to the traceback routines where 1211 // the goroutine stack ends. 1212 mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize); 1213 gp = runtime_malg(StackMin, &sp, &spsize); 1214 gp->status = Gdead; 1215 mp->curg = gp; 1216 mp->locked = LockInternal; 1217 mp->lockedg = gp; 1218 gp->lockedm = mp; 1219 gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1); 1220 // put on allg for garbage collector 1221 allgadd(gp); 1222 1223 // The context for gp will be set up in runtime_needm. But 1224 // here we need to set up the context for g0. 1225 getcontext(&mp->g0->context); 1226 mp->g0->context.uc_stack.ss_sp = g0_sp; 1227 mp->g0->context.uc_stack.ss_size = g0_spsize; 1228 makecontext(&mp->g0->context, kickoff, 0); 1229 1230 // Add m to the extra list. 1231 mnext = lockextra(true); 1232 mp->schedlink = mnext; 1233 unlockextra(mp); 1234 } 1235 1236 // dropm is called when a cgo callback has called needm but is now 1237 // done with the callback and returning back into the non-Go thread. 1238 // It puts the current m back onto the extra list. 1239 // 1240 // The main expense here is the call to signalstack to release the 1241 // m's signal stack, and then the call to needm on the next callback 1242 // from this thread. It is tempting to try to save the m for next time, 1243 // which would eliminate both these costs, but there might not be 1244 // a next time: the current thread (which Go does not control) might exit. 1245 // If we saved the m for that thread, there would be an m leak each time 1246 // such a thread exited. Instead, we acquire and release an m on each 1247 // call. These should typically not be scheduling operations, just a few 1248 // atomics, so the cost should be small. 1249 // 1250 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread 1251 // variable using pthread_key_create. Unlike the pthread keys we already use 1252 // on OS X, this dummy key would never be read by Go code. It would exist 1253 // only so that we could register at thread-exit-time destructor. 1254 // That destructor would put the m back onto the extra list. 1255 // This is purely a performance optimization. The current version, 1256 // in which dropm happens on each cgo call, is still correct too. 1257 // We may have to keep the current version on systems with cgo 1258 // but without pthreads, like Windows. 1259 void 1260 runtime_dropm(void) 1261 { 1262 M *mp, *mnext; 1263 1264 // Undo whatever initialization minit did during needm. 1265 runtime_unminit(); 1266 1267 // Clear m and g, and return m to the extra list. 1268 // After the call to setmg we can only call nosplit functions. 1269 mp = m; 1270 runtime_setmg(nil, nil); 1271 1272 mp->curg->status = Gdead; 1273 mp->curg->gcstack = nil; 1274 mp->curg->gcnext_sp = nil; 1275 1276 mnext = lockextra(true); 1277 mp->schedlink = mnext; 1278 unlockextra(mp); 1279 } 1280 1281 #define MLOCKED ((M*)1) 1282 1283 // lockextra locks the extra list and returns the list head. 1284 // The caller must unlock the list by storing a new list head 1285 // to runtime.extram. If nilokay is true, then lockextra will 1286 // return a nil list head if that's what it finds. If nilokay is false, 1287 // lockextra will keep waiting until the list head is no longer nil. 1288 static M* 1289 lockextra(bool nilokay) 1290 { 1291 M *mp; 1292 void (*yield)(void); 1293 1294 for(;;) { 1295 mp = runtime_atomicloadp(&runtime_extram); 1296 if(mp == MLOCKED) { 1297 yield = runtime_osyield; 1298 yield(); 1299 continue; 1300 } 1301 if(mp == nil && !nilokay) { 1302 runtime_usleep(1); 1303 continue; 1304 } 1305 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) { 1306 yield = runtime_osyield; 1307 yield(); 1308 continue; 1309 } 1310 break; 1311 } 1312 return mp; 1313 } 1314 1315 static void 1316 unlockextra(M *mp) 1317 { 1318 runtime_atomicstorep(&runtime_extram, mp); 1319 } 1320 1321 static int32 1322 countextra() 1323 { 1324 M *mp, *mc; 1325 int32 c; 1326 1327 for(;;) { 1328 mp = runtime_atomicloadp(&runtime_extram); 1329 if(mp == MLOCKED) { 1330 runtime_osyield(); 1331 continue; 1332 } 1333 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) { 1334 runtime_osyield(); 1335 continue; 1336 } 1337 c = 0; 1338 for(mc = mp; mc != nil; mc = mc->schedlink) 1339 c++; 1340 runtime_atomicstorep(&runtime_extram, mp); 1341 return c; 1342 } 1343 } 1344 1345 // Create a new m. It will start off with a call to fn, or else the scheduler. 1346 static void 1347 newm(void(*fn)(void), P *p) 1348 { 1349 M *mp; 1350 1351 mp = runtime_allocm(p, -1, nil, nil); 1352 mp->nextp = p; 1353 mp->mstartfn = fn; 1354 1355 runtime_newosproc(mp); 1356 } 1357 1358 // Stops execution of the current m until new work is available. 1359 // Returns with acquired P. 1360 static void 1361 stopm(void) 1362 { 1363 if(m->locks) 1364 runtime_throw("stopm holding locks"); 1365 if(m->p) 1366 runtime_throw("stopm holding p"); 1367 if(m->spinning) { 1368 m->spinning = false; 1369 runtime_xadd(&runtime_sched.nmspinning, -1); 1370 } 1371 1372 retry: 1373 runtime_lock(&runtime_sched.lock); 1374 mput(m); 1375 runtime_unlock(&runtime_sched.lock); 1376 runtime_notesleep(&m->park); 1377 runtime_noteclear(&m->park); 1378 if(m->helpgc) { 1379 runtime_gchelper(); 1380 m->helpgc = 0; 1381 m->mcache = nil; 1382 goto retry; 1383 } 1384 acquirep(m->nextp); 1385 m->nextp = nil; 1386 } 1387 1388 static void 1389 mspinning(void) 1390 { 1391 m->spinning = true; 1392 } 1393 1394 // Schedules some M to run the p (creates an M if necessary). 1395 // If p==nil, tries to get an idle P, if no idle P's does nothing. 1396 static void 1397 startm(P *p, bool spinning) 1398 { 1399 M *mp; 1400 void (*fn)(void); 1401 1402 runtime_lock(&runtime_sched.lock); 1403 if(p == nil) { 1404 p = pidleget(); 1405 if(p == nil) { 1406 runtime_unlock(&runtime_sched.lock); 1407 if(spinning) 1408 runtime_xadd(&runtime_sched.nmspinning, -1); 1409 return; 1410 } 1411 } 1412 mp = mget(); 1413 runtime_unlock(&runtime_sched.lock); 1414 if(mp == nil) { 1415 fn = nil; 1416 if(spinning) 1417 fn = mspinning; 1418 newm(fn, p); 1419 return; 1420 } 1421 if(mp->spinning) 1422 runtime_throw("startm: m is spinning"); 1423 if(mp->nextp) 1424 runtime_throw("startm: m has p"); 1425 mp->spinning = spinning; 1426 mp->nextp = p; 1427 runtime_notewakeup(&mp->park); 1428 } 1429 1430 // Hands off P from syscall or locked M. 1431 static void 1432 handoffp(P *p) 1433 { 1434 // if it has local work, start it straight away 1435 if(p->runqhead != p->runqtail || runtime_sched.runqsize) { 1436 startm(p, false); 1437 return; 1438 } 1439 // no local work, check that there are no spinning/idle M's, 1440 // otherwise our help is not required 1441 if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 && // TODO: fast atomic 1442 runtime_cas(&runtime_sched.nmspinning, 0, 1)) { 1443 startm(p, true); 1444 return; 1445 } 1446 runtime_lock(&runtime_sched.lock); 1447 if(runtime_sched.gcwaiting) { 1448 p->status = Pgcstop; 1449 if(--runtime_sched.stopwait == 0) 1450 runtime_notewakeup(&runtime_sched.stopnote); 1451 runtime_unlock(&runtime_sched.lock); 1452 return; 1453 } 1454 if(runtime_sched.runqsize) { 1455 runtime_unlock(&runtime_sched.lock); 1456 startm(p, false); 1457 return; 1458 } 1459 // If this is the last running P and nobody is polling network, 1460 // need to wakeup another M to poll network. 1461 if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) { 1462 runtime_unlock(&runtime_sched.lock); 1463 startm(p, false); 1464 return; 1465 } 1466 pidleput(p); 1467 runtime_unlock(&runtime_sched.lock); 1468 } 1469 1470 // Tries to add one more P to execute G's. 1471 // Called when a G is made runnable (newproc, ready). 1472 static void 1473 wakep(void) 1474 { 1475 // be conservative about spinning threads 1476 if(!runtime_cas(&runtime_sched.nmspinning, 0, 1)) 1477 return; 1478 startm(nil, true); 1479 } 1480 1481 // Stops execution of the current m that is locked to a g until the g is runnable again. 1482 // Returns with acquired P. 1483 static void 1484 stoplockedm(void) 1485 { 1486 P *p; 1487 1488 if(m->lockedg == nil || m->lockedg->lockedm != m) 1489 runtime_throw("stoplockedm: inconsistent locking"); 1490 if(m->p) { 1491 // Schedule another M to run this p. 1492 p = releasep(); 1493 handoffp(p); 1494 } 1495 incidlelocked(1); 1496 // Wait until another thread schedules lockedg again. 1497 runtime_notesleep(&m->park); 1498 runtime_noteclear(&m->park); 1499 if(m->lockedg->status != Grunnable) 1500 runtime_throw("stoplockedm: not runnable"); 1501 acquirep(m->nextp); 1502 m->nextp = nil; 1503 } 1504 1505 // Schedules the locked m to run the locked gp. 1506 static void 1507 startlockedm(G *gp) 1508 { 1509 M *mp; 1510 P *p; 1511 1512 mp = gp->lockedm; 1513 if(mp == m) 1514 runtime_throw("startlockedm: locked to me"); 1515 if(mp->nextp) 1516 runtime_throw("startlockedm: m has p"); 1517 // directly handoff current P to the locked m 1518 incidlelocked(-1); 1519 p = releasep(); 1520 mp->nextp = p; 1521 runtime_notewakeup(&mp->park); 1522 stopm(); 1523 } 1524 1525 // Stops the current m for stoptheworld. 1526 // Returns when the world is restarted. 1527 static void 1528 gcstopm(void) 1529 { 1530 P *p; 1531 1532 if(!runtime_sched.gcwaiting) 1533 runtime_throw("gcstopm: not waiting for gc"); 1534 if(m->spinning) { 1535 m->spinning = false; 1536 runtime_xadd(&runtime_sched.nmspinning, -1); 1537 } 1538 p = releasep(); 1539 runtime_lock(&runtime_sched.lock); 1540 p->status = Pgcstop; 1541 if(--runtime_sched.stopwait == 0) 1542 runtime_notewakeup(&runtime_sched.stopnote); 1543 runtime_unlock(&runtime_sched.lock); 1544 stopm(); 1545 } 1546 1547 // Schedules gp to run on the current M. 1548 // Never returns. 1549 static void 1550 execute(G *gp) 1551 { 1552 int32 hz; 1553 1554 if(gp->status != Grunnable) { 1555 runtime_printf("execute: bad g status %d\n", gp->status); 1556 runtime_throw("execute: bad g status"); 1557 } 1558 gp->status = Grunning; 1559 gp->waitsince = 0; 1560 m->p->schedtick++; 1561 m->curg = gp; 1562 gp->m = m; 1563 1564 // Check whether the profiler needs to be turned on or off. 1565 hz = runtime_sched.profilehz; 1566 if(m->profilehz != hz) 1567 runtime_resetcpuprofiler(hz); 1568 1569 runtime_gogo(gp); 1570 } 1571 1572 // Finds a runnable goroutine to execute. 1573 // Tries to steal from other P's, get g from global queue, poll network. 1574 static G* 1575 findrunnable(void) 1576 { 1577 G *gp; 1578 P *p; 1579 int32 i; 1580 1581 top: 1582 if(runtime_sched.gcwaiting) { 1583 gcstopm(); 1584 goto top; 1585 } 1586 if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil) 1587 runtime_ready(gp); 1588 // local runq 1589 gp = runqget(m->p); 1590 if(gp) 1591 return gp; 1592 // global runq 1593 if(runtime_sched.runqsize) { 1594 runtime_lock(&runtime_sched.lock); 1595 gp = globrunqget(m->p, 0); 1596 runtime_unlock(&runtime_sched.lock); 1597 if(gp) 1598 return gp; 1599 } 1600 // poll network 1601 gp = runtime_netpoll(false); // non-blocking 1602 if(gp) { 1603 injectglist(gp->schedlink); 1604 gp->status = Grunnable; 1605 return gp; 1606 } 1607 // If number of spinning M's >= number of busy P's, block. 1608 // This is necessary to prevent excessive CPU consumption 1609 // when GOMAXPROCS>>1 but the program parallelism is low. 1610 if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle)) // TODO: fast atomic 1611 goto stop; 1612 if(!m->spinning) { 1613 m->spinning = true; 1614 runtime_xadd(&runtime_sched.nmspinning, 1); 1615 } 1616 // random steal from other P's 1617 for(i = 0; i < 2*runtime_gomaxprocs; i++) { 1618 if(runtime_sched.gcwaiting) 1619 goto top; 1620 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs]; 1621 if(p == m->p) 1622 gp = runqget(p); 1623 else 1624 gp = runqsteal(m->p, p); 1625 if(gp) 1626 return gp; 1627 } 1628 stop: 1629 // return P and block 1630 runtime_lock(&runtime_sched.lock); 1631 if(runtime_sched.gcwaiting) { 1632 runtime_unlock(&runtime_sched.lock); 1633 goto top; 1634 } 1635 if(runtime_sched.runqsize) { 1636 gp = globrunqget(m->p, 0); 1637 runtime_unlock(&runtime_sched.lock); 1638 return gp; 1639 } 1640 p = releasep(); 1641 pidleput(p); 1642 runtime_unlock(&runtime_sched.lock); 1643 if(m->spinning) { 1644 m->spinning = false; 1645 runtime_xadd(&runtime_sched.nmspinning, -1); 1646 } 1647 // check all runqueues once again 1648 for(i = 0; i < runtime_gomaxprocs; i++) { 1649 p = runtime_allp[i]; 1650 if(p && p->runqhead != p->runqtail) { 1651 runtime_lock(&runtime_sched.lock); 1652 p = pidleget(); 1653 runtime_unlock(&runtime_sched.lock); 1654 if(p) { 1655 acquirep(p); 1656 goto top; 1657 } 1658 break; 1659 } 1660 } 1661 // poll network 1662 if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) { 1663 if(m->p) 1664 runtime_throw("findrunnable: netpoll with p"); 1665 if(m->spinning) 1666 runtime_throw("findrunnable: netpoll with spinning"); 1667 gp = runtime_netpoll(true); // block until new work is available 1668 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime()); 1669 if(gp) { 1670 runtime_lock(&runtime_sched.lock); 1671 p = pidleget(); 1672 runtime_unlock(&runtime_sched.lock); 1673 if(p) { 1674 acquirep(p); 1675 injectglist(gp->schedlink); 1676 gp->status = Grunnable; 1677 return gp; 1678 } 1679 injectglist(gp); 1680 } 1681 } 1682 stopm(); 1683 goto top; 1684 } 1685 1686 static void 1687 resetspinning(void) 1688 { 1689 int32 nmspinning; 1690 1691 if(m->spinning) { 1692 m->spinning = false; 1693 nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1); 1694 if(nmspinning < 0) 1695 runtime_throw("findrunnable: negative nmspinning"); 1696 } else 1697 nmspinning = runtime_atomicload(&runtime_sched.nmspinning); 1698 1699 // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), 1700 // so see if we need to wakeup another P here. 1701 if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0) 1702 wakep(); 1703 } 1704 1705 // Injects the list of runnable G's into the scheduler. 1706 // Can run concurrently with GC. 1707 static void 1708 injectglist(G *glist) 1709 { 1710 int32 n; 1711 G *gp; 1712 1713 if(glist == nil) 1714 return; 1715 runtime_lock(&runtime_sched.lock); 1716 for(n = 0; glist; n++) { 1717 gp = glist; 1718 glist = gp->schedlink; 1719 gp->status = Grunnable; 1720 globrunqput(gp); 1721 } 1722 runtime_unlock(&runtime_sched.lock); 1723 1724 for(; n && runtime_sched.npidle; n--) 1725 startm(nil, false); 1726 } 1727 1728 // One round of scheduler: find a runnable goroutine and execute it. 1729 // Never returns. 1730 static void 1731 schedule(void) 1732 { 1733 G *gp; 1734 uint32 tick; 1735 1736 if(m->locks) 1737 runtime_throw("schedule: holding locks"); 1738 1739 top: 1740 if(runtime_sched.gcwaiting) { 1741 gcstopm(); 1742 goto top; 1743 } 1744 1745 gp = nil; 1746 // Check the global runnable queue once in a while to ensure fairness. 1747 // Otherwise two goroutines can completely occupy the local runqueue 1748 // by constantly respawning each other. 1749 tick = m->p->schedtick; 1750 // This is a fancy way to say tick%61==0, 1751 // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors. 1752 if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) { 1753 runtime_lock(&runtime_sched.lock); 1754 gp = globrunqget(m->p, 1); 1755 runtime_unlock(&runtime_sched.lock); 1756 if(gp) 1757 resetspinning(); 1758 } 1759 if(gp == nil) { 1760 gp = runqget(m->p); 1761 if(gp && m->spinning) 1762 runtime_throw("schedule: spinning with local work"); 1763 } 1764 if(gp == nil) { 1765 gp = findrunnable(); // blocks until work is available 1766 resetspinning(); 1767 } 1768 1769 if(gp->lockedm) { 1770 // Hands off own p to the locked m, 1771 // then blocks waiting for a new p. 1772 startlockedm(gp); 1773 goto top; 1774 } 1775 1776 execute(gp); 1777 } 1778 1779 // Puts the current goroutine into a waiting state and calls unlockf. 1780 // If unlockf returns false, the goroutine is resumed. 1781 void 1782 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason) 1783 { 1784 if(g->status != Grunning) 1785 runtime_throw("bad g status"); 1786 m->waitlock = lock; 1787 m->waitunlockf = unlockf; 1788 g->waitreason = reason; 1789 runtime_mcall(park0); 1790 } 1791 1792 static bool 1793 parkunlock(G *gp, void *lock) 1794 { 1795 USED(gp); 1796 runtime_unlock(lock); 1797 return true; 1798 } 1799 1800 // Puts the current goroutine into a waiting state and unlocks the lock. 1801 // The goroutine can be made runnable again by calling runtime_ready(gp). 1802 void 1803 runtime_parkunlock(Lock *lock, const char *reason) 1804 { 1805 runtime_park(parkunlock, lock, reason); 1806 } 1807 1808 // runtime_park continuation on g0. 1809 static void 1810 park0(G *gp) 1811 { 1812 bool ok; 1813 1814 gp->status = Gwaiting; 1815 gp->m = nil; 1816 m->curg = nil; 1817 if(m->waitunlockf) { 1818 ok = m->waitunlockf(gp, m->waitlock); 1819 m->waitunlockf = nil; 1820 m->waitlock = nil; 1821 if(!ok) { 1822 gp->status = Grunnable; 1823 execute(gp); // Schedule it back, never returns. 1824 } 1825 } 1826 if(m->lockedg) { 1827 stoplockedm(); 1828 execute(gp); // Never returns. 1829 } 1830 schedule(); 1831 } 1832 1833 // Scheduler yield. 1834 void 1835 runtime_gosched(void) 1836 { 1837 if(g->status != Grunning) 1838 runtime_throw("bad g status"); 1839 runtime_mcall(runtime_gosched0); 1840 } 1841 1842 // runtime_gosched continuation on g0. 1843 void 1844 runtime_gosched0(G *gp) 1845 { 1846 gp->status = Grunnable; 1847 gp->m = nil; 1848 m->curg = nil; 1849 runtime_lock(&runtime_sched.lock); 1850 globrunqput(gp); 1851 runtime_unlock(&runtime_sched.lock); 1852 if(m->lockedg) { 1853 stoplockedm(); 1854 execute(gp); // Never returns. 1855 } 1856 schedule(); 1857 } 1858 1859 // Finishes execution of the current goroutine. 1860 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack). 1861 // Since it does not return it does not matter. But if it is preempted 1862 // at the split stack check, GC will complain about inconsistent sp. 1863 void runtime_goexit(void) __attribute__ ((noinline)); 1864 void 1865 runtime_goexit(void) 1866 { 1867 if(g->status != Grunning) 1868 runtime_throw("bad g status"); 1869 runtime_mcall(goexit0); 1870 } 1871 1872 // runtime_goexit continuation on g0. 1873 static void 1874 goexit0(G *gp) 1875 { 1876 gp->status = Gdead; 1877 gp->entry = nil; 1878 gp->m = nil; 1879 gp->lockedm = nil; 1880 gp->paniconfault = 0; 1881 gp->defer = nil; // should be true already but just in case. 1882 gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data. 1883 gp->writenbuf = 0; 1884 gp->writebuf = nil; 1885 gp->waitreason = nil; 1886 gp->param = nil; 1887 m->curg = nil; 1888 m->lockedg = nil; 1889 if(m->locked & ~LockExternal) { 1890 runtime_printf("invalid m->locked = %d\n", m->locked); 1891 runtime_throw("internal lockOSThread error"); 1892 } 1893 m->locked = 0; 1894 gfput(m->p, gp); 1895 schedule(); 1896 } 1897 1898 // The goroutine g is about to enter a system call. 1899 // Record that it's not using the cpu anymore. 1900 // This is called only from the go syscall library and cgocall, 1901 // not from the low-level system calls used by the runtime. 1902 // 1903 // Entersyscall cannot split the stack: the runtime_gosave must 1904 // make g->sched refer to the caller's stack segment, because 1905 // entersyscall is going to return immediately after. 1906 1907 void runtime_entersyscall(void) __attribute__ ((no_split_stack)); 1908 static void doentersyscall(void) __attribute__ ((no_split_stack, noinline)); 1909 1910 void 1911 runtime_entersyscall() 1912 { 1913 // Save the registers in the g structure so that any pointers 1914 // held in registers will be seen by the garbage collector. 1915 getcontext(&g->gcregs); 1916 1917 // Do the work in a separate function, so that this function 1918 // doesn't save any registers on its own stack. If this 1919 // function does save any registers, we might store the wrong 1920 // value in the call to getcontext. 1921 // 1922 // FIXME: This assumes that we do not need to save any 1923 // callee-saved registers to access the TLS variable g. We 1924 // don't want to put the ucontext_t on the stack because it is 1925 // large and we can not split the stack here. 1926 doentersyscall(); 1927 } 1928 1929 static void 1930 doentersyscall() 1931 { 1932 // Disable preemption because during this function g is in Gsyscall status, 1933 // but can have inconsistent g->sched, do not let GC observe it. 1934 m->locks++; 1935 1936 // Leave SP around for GC and traceback. 1937 #ifdef USING_SPLIT_STACK 1938 g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size, 1939 &g->gcnext_segment, &g->gcnext_sp, 1940 &g->gcinitial_sp); 1941 #else 1942 { 1943 void *v; 1944 1945 g->gcnext_sp = (byte *) &v; 1946 } 1947 #endif 1948 1949 g->status = Gsyscall; 1950 1951 if(runtime_atomicload(&runtime_sched.sysmonwait)) { // TODO: fast atomic 1952 runtime_lock(&runtime_sched.lock); 1953 if(runtime_atomicload(&runtime_sched.sysmonwait)) { 1954 runtime_atomicstore(&runtime_sched.sysmonwait, 0); 1955 runtime_notewakeup(&runtime_sched.sysmonnote); 1956 } 1957 runtime_unlock(&runtime_sched.lock); 1958 } 1959 1960 m->mcache = nil; 1961 m->p->m = nil; 1962 runtime_atomicstore(&m->p->status, Psyscall); 1963 if(runtime_sched.gcwaiting) { 1964 runtime_lock(&runtime_sched.lock); 1965 if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) { 1966 if(--runtime_sched.stopwait == 0) 1967 runtime_notewakeup(&runtime_sched.stopnote); 1968 } 1969 runtime_unlock(&runtime_sched.lock); 1970 } 1971 1972 m->locks--; 1973 } 1974 1975 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking. 1976 void 1977 runtime_entersyscallblock(void) 1978 { 1979 P *p; 1980 1981 m->locks++; // see comment in entersyscall 1982 1983 // Leave SP around for GC and traceback. 1984 #ifdef USING_SPLIT_STACK 1985 g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size, 1986 &g->gcnext_segment, &g->gcnext_sp, 1987 &g->gcinitial_sp); 1988 #else 1989 g->gcnext_sp = (byte *) &p; 1990 #endif 1991 1992 // Save the registers in the g structure so that any pointers 1993 // held in registers will be seen by the garbage collector. 1994 getcontext(&g->gcregs); 1995 1996 g->status = Gsyscall; 1997 1998 p = releasep(); 1999 handoffp(p); 2000 if(g->isbackground) // do not consider blocked scavenger for deadlock detection 2001 incidlelocked(1); 2002 2003 m->locks--; 2004 } 2005 2006 // The goroutine g exited its system call. 2007 // Arrange for it to run on a cpu again. 2008 // This is called only from the go syscall library, not 2009 // from the low-level system calls used by the runtime. 2010 void 2011 runtime_exitsyscall(void) 2012 { 2013 G *gp; 2014 2015 m->locks++; // see comment in entersyscall 2016 2017 gp = g; 2018 if(gp->isbackground) // do not consider blocked scavenger for deadlock detection 2019 incidlelocked(-1); 2020 2021 g->waitsince = 0; 2022 if(exitsyscallfast()) { 2023 // There's a cpu for us, so we can run. 2024 m->p->syscalltick++; 2025 gp->status = Grunning; 2026 // Garbage collector isn't running (since we are), 2027 // so okay to clear gcstack and gcsp. 2028 #ifdef USING_SPLIT_STACK 2029 gp->gcstack = nil; 2030 #endif 2031 gp->gcnext_sp = nil; 2032 runtime_memclr(&gp->gcregs, sizeof gp->gcregs); 2033 m->locks--; 2034 return; 2035 } 2036 2037 m->locks--; 2038 2039 // Call the scheduler. 2040 runtime_mcall(exitsyscall0); 2041 2042 // Scheduler returned, so we're allowed to run now. 2043 // Delete the gcstack information that we left for 2044 // the garbage collector during the system call. 2045 // Must wait until now because until gosched returns 2046 // we don't know for sure that the garbage collector 2047 // is not running. 2048 #ifdef USING_SPLIT_STACK 2049 gp->gcstack = nil; 2050 #endif 2051 gp->gcnext_sp = nil; 2052 runtime_memclr(&gp->gcregs, sizeof gp->gcregs); 2053 2054 // Don't refer to m again, we might be running on a different 2055 // thread after returning from runtime_mcall. 2056 runtime_m()->p->syscalltick++; 2057 } 2058 2059 static bool 2060 exitsyscallfast(void) 2061 { 2062 P *p; 2063 2064 // Freezetheworld sets stopwait but does not retake P's. 2065 if(runtime_sched.stopwait) { 2066 m->p = nil; 2067 return false; 2068 } 2069 2070 // Try to re-acquire the last P. 2071 if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) { 2072 // There's a cpu for us, so we can run. 2073 m->mcache = m->p->mcache; 2074 m->p->m = m; 2075 return true; 2076 } 2077 // Try to get any other idle P. 2078 m->p = nil; 2079 if(runtime_sched.pidle) { 2080 runtime_lock(&runtime_sched.lock); 2081 p = pidleget(); 2082 if(p && runtime_atomicload(&runtime_sched.sysmonwait)) { 2083 runtime_atomicstore(&runtime_sched.sysmonwait, 0); 2084 runtime_notewakeup(&runtime_sched.sysmonnote); 2085 } 2086 runtime_unlock(&runtime_sched.lock); 2087 if(p) { 2088 acquirep(p); 2089 return true; 2090 } 2091 } 2092 return false; 2093 } 2094 2095 // runtime_exitsyscall slow path on g0. 2096 // Failed to acquire P, enqueue gp as runnable. 2097 static void 2098 exitsyscall0(G *gp) 2099 { 2100 P *p; 2101 2102 gp->status = Grunnable; 2103 gp->m = nil; 2104 m->curg = nil; 2105 runtime_lock(&runtime_sched.lock); 2106 p = pidleget(); 2107 if(p == nil) 2108 globrunqput(gp); 2109 else if(runtime_atomicload(&runtime_sched.sysmonwait)) { 2110 runtime_atomicstore(&runtime_sched.sysmonwait, 0); 2111 runtime_notewakeup(&runtime_sched.sysmonnote); 2112 } 2113 runtime_unlock(&runtime_sched.lock); 2114 if(p) { 2115 acquirep(p); 2116 execute(gp); // Never returns. 2117 } 2118 if(m->lockedg) { 2119 // Wait until another thread schedules gp and so m again. 2120 stoplockedm(); 2121 execute(gp); // Never returns. 2122 } 2123 stopm(); 2124 schedule(); // Never returns. 2125 } 2126 2127 // Called from syscall package before fork. 2128 void syscall_runtime_BeforeFork(void) 2129 __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork"); 2130 void 2131 syscall_runtime_BeforeFork(void) 2132 { 2133 // Fork can hang if preempted with signals frequently enough (see issue 5517). 2134 // Ensure that we stay on the same M where we disable profiling. 2135 runtime_m()->locks++; 2136 if(runtime_m()->profilehz != 0) 2137 runtime_resetcpuprofiler(0); 2138 } 2139 2140 // Called from syscall package after fork in parent. 2141 void syscall_runtime_AfterFork(void) 2142 __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork"); 2143 void 2144 syscall_runtime_AfterFork(void) 2145 { 2146 int32 hz; 2147 2148 hz = runtime_sched.profilehz; 2149 if(hz != 0) 2150 runtime_resetcpuprofiler(hz); 2151 runtime_m()->locks--; 2152 } 2153 2154 // Allocate a new g, with a stack big enough for stacksize bytes. 2155 G* 2156 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize) 2157 { 2158 G *newg; 2159 2160 newg = allocg(); 2161 if(stacksize >= 0) { 2162 #if USING_SPLIT_STACK 2163 int dont_block_signals = 0; 2164 2165 *ret_stack = __splitstack_makecontext(stacksize, 2166 &newg->stack_context[0], 2167 ret_stacksize); 2168 __splitstack_block_signals_context(&newg->stack_context[0], 2169 &dont_block_signals, nil); 2170 #else 2171 *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC); 2172 *ret_stacksize = stacksize; 2173 newg->gcinitial_sp = *ret_stack; 2174 newg->gcstack_size = stacksize; 2175 runtime_xadd(&runtime_stacks_sys, stacksize); 2176 #endif 2177 } 2178 return newg; 2179 } 2180 2181 /* For runtime package testing. */ 2182 2183 2184 // Create a new g running fn with siz bytes of arguments. 2185 // Put it on the queue of g's waiting to run. 2186 // The compiler turns a go statement into a call to this. 2187 // Cannot split the stack because it assumes that the arguments 2188 // are available sequentially after &fn; they would not be 2189 // copied if a stack split occurred. It's OK for this to call 2190 // functions that split the stack. 2191 void runtime_testing_entersyscall(void) 2192 __asm__ (GOSYM_PREFIX "runtime.entersyscall"); 2193 void 2194 runtime_testing_entersyscall() 2195 { 2196 runtime_entersyscall(); 2197 } 2198 2199 void runtime_testing_exitsyscall(void) 2200 __asm__ (GOSYM_PREFIX "runtime.exitsyscall"); 2201 2202 void 2203 runtime_testing_exitsyscall() 2204 { 2205 runtime_exitsyscall(); 2206 } 2207 2208 G* 2209 __go_go(void (*fn)(void*), void* arg) 2210 { 2211 byte *sp; 2212 size_t spsize; 2213 G *newg; 2214 P *p; 2215 2216 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret); 2217 if(fn == nil) { 2218 m->throwing = -1; // do not dump full stacks 2219 runtime_throw("go of nil func value"); 2220 } 2221 m->locks++; // disable preemption because it can be holding p in a local var 2222 2223 p = m->p; 2224 if((newg = gfget(p)) != nil) { 2225 #ifdef USING_SPLIT_STACK 2226 int dont_block_signals = 0; 2227 2228 sp = __splitstack_resetcontext(&newg->stack_context[0], 2229 &spsize); 2230 __splitstack_block_signals_context(&newg->stack_context[0], 2231 &dont_block_signals, nil); 2232 #else 2233 sp = newg->gcinitial_sp; 2234 spsize = newg->gcstack_size; 2235 if(spsize == 0) 2236 runtime_throw("bad spsize in __go_go"); 2237 newg->gcnext_sp = sp; 2238 #endif 2239 } else { 2240 newg = runtime_malg(StackMin, &sp, &spsize); 2241 allgadd(newg); 2242 } 2243 2244 newg->entry = (byte*)fn; 2245 newg->param = arg; 2246 newg->gopc = (uintptr)__builtin_return_address(0); 2247 newg->status = Grunnable; 2248 if(p->goidcache == p->goidcacheend) { 2249 p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch); 2250 p->goidcacheend = p->goidcache + GoidCacheBatch; 2251 } 2252 newg->goid = p->goidcache++; 2253 2254 { 2255 // Avoid warnings about variables clobbered by 2256 // longjmp. 2257 byte * volatile vsp = sp; 2258 size_t volatile vspsize = spsize; 2259 G * volatile vnewg = newg; 2260 2261 getcontext(&vnewg->context); 2262 vnewg->context.uc_stack.ss_sp = vsp; 2263 #ifdef MAKECONTEXT_STACK_TOP 2264 vnewg->context.uc_stack.ss_sp += vspsize; 2265 #endif 2266 vnewg->context.uc_stack.ss_size = vspsize; 2267 makecontext(&vnewg->context, kickoff, 0); 2268 2269 runqput(p, vnewg); 2270 2271 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic 2272 wakep(); 2273 m->locks--; 2274 return vnewg; 2275 } 2276 } 2277 2278 static void 2279 allgadd(G *gp) 2280 { 2281 G **new; 2282 uintptr cap; 2283 2284 runtime_lock(&allglock); 2285 if(runtime_allglen >= allgcap) { 2286 cap = 4096/sizeof(new[0]); 2287 if(cap < 2*allgcap) 2288 cap = 2*allgcap; 2289 new = runtime_malloc(cap*sizeof(new[0])); 2290 if(new == nil) 2291 runtime_throw("runtime: cannot allocate memory"); 2292 if(runtime_allg != nil) { 2293 runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0])); 2294 runtime_free(runtime_allg); 2295 } 2296 runtime_allg = new; 2297 allgcap = cap; 2298 } 2299 runtime_allg[runtime_allglen++] = gp; 2300 runtime_unlock(&allglock); 2301 } 2302 2303 // Put on gfree list. 2304 // If local list is too long, transfer a batch to the global list. 2305 static void 2306 gfput(P *p, G *gp) 2307 { 2308 gp->schedlink = p->gfree; 2309 p->gfree = gp; 2310 p->gfreecnt++; 2311 if(p->gfreecnt >= 64) { 2312 runtime_lock(&runtime_sched.gflock); 2313 while(p->gfreecnt >= 32) { 2314 p->gfreecnt--; 2315 gp = p->gfree; 2316 p->gfree = gp->schedlink; 2317 gp->schedlink = runtime_sched.gfree; 2318 runtime_sched.gfree = gp; 2319 } 2320 runtime_unlock(&runtime_sched.gflock); 2321 } 2322 } 2323 2324 // Get from gfree list. 2325 // If local list is empty, grab a batch from global list. 2326 static G* 2327 gfget(P *p) 2328 { 2329 G *gp; 2330 2331 retry: 2332 gp = p->gfree; 2333 if(gp == nil && runtime_sched.gfree) { 2334 runtime_lock(&runtime_sched.gflock); 2335 while(p->gfreecnt < 32 && runtime_sched.gfree) { 2336 p->gfreecnt++; 2337 gp = runtime_sched.gfree; 2338 runtime_sched.gfree = gp->schedlink; 2339 gp->schedlink = p->gfree; 2340 p->gfree = gp; 2341 } 2342 runtime_unlock(&runtime_sched.gflock); 2343 goto retry; 2344 } 2345 if(gp) { 2346 p->gfree = gp->schedlink; 2347 p->gfreecnt--; 2348 } 2349 return gp; 2350 } 2351 2352 // Purge all cached G's from gfree list to the global list. 2353 static void 2354 gfpurge(P *p) 2355 { 2356 G *gp; 2357 2358 runtime_lock(&runtime_sched.gflock); 2359 while(p->gfreecnt) { 2360 p->gfreecnt--; 2361 gp = p->gfree; 2362 p->gfree = gp->schedlink; 2363 gp->schedlink = runtime_sched.gfree; 2364 runtime_sched.gfree = gp; 2365 } 2366 runtime_unlock(&runtime_sched.gflock); 2367 } 2368 2369 void 2370 runtime_Breakpoint(void) 2371 { 2372 runtime_breakpoint(); 2373 } 2374 2375 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched"); 2376 2377 void 2378 runtime_Gosched(void) 2379 { 2380 runtime_gosched(); 2381 } 2382 2383 // Implementation of runtime.GOMAXPROCS. 2384 // delete when scheduler is even stronger 2385 int32 2386 runtime_gomaxprocsfunc(int32 n) 2387 { 2388 int32 ret; 2389 2390 if(n > MaxGomaxprocs) 2391 n = MaxGomaxprocs; 2392 runtime_lock(&runtime_sched.lock); 2393 ret = runtime_gomaxprocs; 2394 if(n <= 0 || n == ret) { 2395 runtime_unlock(&runtime_sched.lock); 2396 return ret; 2397 } 2398 runtime_unlock(&runtime_sched.lock); 2399 2400 runtime_semacquire(&runtime_worldsema, false); 2401 m->gcing = 1; 2402 runtime_stoptheworld(); 2403 newprocs = n; 2404 m->gcing = 0; 2405 runtime_semrelease(&runtime_worldsema); 2406 runtime_starttheworld(); 2407 2408 return ret; 2409 } 2410 2411 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below 2412 // after they modify m->locked. Do not allow preemption during this call, 2413 // or else the m might be different in this function than in the caller. 2414 static void 2415 lockOSThread(void) 2416 { 2417 m->lockedg = g; 2418 g->lockedm = m; 2419 } 2420 2421 void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread"); 2422 void 2423 runtime_LockOSThread(void) 2424 { 2425 m->locked |= LockExternal; 2426 lockOSThread(); 2427 } 2428 2429 void 2430 runtime_lockOSThread(void) 2431 { 2432 m->locked += LockInternal; 2433 lockOSThread(); 2434 } 2435 2436 2437 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below 2438 // after they update m->locked. Do not allow preemption during this call, 2439 // or else the m might be in different in this function than in the caller. 2440 static void 2441 unlockOSThread(void) 2442 { 2443 if(m->locked != 0) 2444 return; 2445 m->lockedg = nil; 2446 g->lockedm = nil; 2447 } 2448 2449 void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread"); 2450 2451 void 2452 runtime_UnlockOSThread(void) 2453 { 2454 m->locked &= ~LockExternal; 2455 unlockOSThread(); 2456 } 2457 2458 void 2459 runtime_unlockOSThread(void) 2460 { 2461 if(m->locked < LockInternal) 2462 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread"); 2463 m->locked -= LockInternal; 2464 unlockOSThread(); 2465 } 2466 2467 bool 2468 runtime_lockedOSThread(void) 2469 { 2470 return g->lockedm != nil && m->lockedg != nil; 2471 } 2472 2473 int32 2474 runtime_gcount(void) 2475 { 2476 G *gp; 2477 int32 n, s; 2478 uintptr i; 2479 2480 n = 0; 2481 runtime_lock(&allglock); 2482 // TODO(dvyukov): runtime.NumGoroutine() is O(N). 2483 // We do not want to increment/decrement centralized counter in newproc/goexit, 2484 // just to make runtime.NumGoroutine() faster. 2485 // Compromise solution is to introduce per-P counters of active goroutines. 2486 for(i = 0; i < runtime_allglen; i++) { 2487 gp = runtime_allg[i]; 2488 s = gp->status; 2489 if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting) 2490 n++; 2491 } 2492 runtime_unlock(&allglock); 2493 return n; 2494 } 2495 2496 int32 2497 runtime_mcount(void) 2498 { 2499 return runtime_sched.mcount; 2500 } 2501 2502 static struct { 2503 Lock lock; 2504 void (*fn)(uintptr*, int32); 2505 int32 hz; 2506 uintptr pcbuf[TracebackMaxFrames]; 2507 Location locbuf[TracebackMaxFrames]; 2508 } prof; 2509 2510 static void System(void) {} 2511 static void GC(void) {} 2512 2513 // Called if we receive a SIGPROF signal. 2514 void 2515 runtime_sigprof() 2516 { 2517 M *mp = m; 2518 int32 n, i; 2519 bool traceback; 2520 2521 if(prof.fn == nil || prof.hz == 0) 2522 return; 2523 2524 if(mp == nil) 2525 return; 2526 2527 // Profiling runs concurrently with GC, so it must not allocate. 2528 mp->mallocing++; 2529 2530 traceback = true; 2531 2532 if(mp->mcache == nil) 2533 traceback = false; 2534 2535 runtime_lock(&prof.lock); 2536 if(prof.fn == nil) { 2537 runtime_unlock(&prof.lock); 2538 mp->mallocing--; 2539 return; 2540 } 2541 n = 0; 2542 2543 if(runtime_atomicload(&runtime_in_callers) > 0) { 2544 // If SIGPROF arrived while already fetching runtime 2545 // callers we can have trouble on older systems 2546 // because the unwind library calls dl_iterate_phdr 2547 // which was not recursive in the past. 2548 traceback = false; 2549 } 2550 2551 if(traceback) { 2552 n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf), false); 2553 for(i = 0; i < n; i++) 2554 prof.pcbuf[i] = prof.locbuf[i].pc; 2555 } 2556 if(!traceback || n <= 0) { 2557 n = 2; 2558 prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n); 2559 if(mp->gcing || mp->helpgc) 2560 prof.pcbuf[1] = (uintptr)GC; 2561 else 2562 prof.pcbuf[1] = (uintptr)System; 2563 } 2564 prof.fn(prof.pcbuf, n); 2565 runtime_unlock(&prof.lock); 2566 mp->mallocing--; 2567 } 2568 2569 // Arrange to call fn with a traceback hz times a second. 2570 void 2571 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) 2572 { 2573 // Force sane arguments. 2574 if(hz < 0) 2575 hz = 0; 2576 if(hz == 0) 2577 fn = nil; 2578 if(fn == nil) 2579 hz = 0; 2580 2581 // Disable preemption, otherwise we can be rescheduled to another thread 2582 // that has profiling enabled. 2583 m->locks++; 2584 2585 // Stop profiler on this thread so that it is safe to lock prof. 2586 // if a profiling signal came in while we had prof locked, 2587 // it would deadlock. 2588 runtime_resetcpuprofiler(0); 2589 2590 runtime_lock(&prof.lock); 2591 prof.fn = fn; 2592 prof.hz = hz; 2593 runtime_unlock(&prof.lock); 2594 runtime_lock(&runtime_sched.lock); 2595 runtime_sched.profilehz = hz; 2596 runtime_unlock(&runtime_sched.lock); 2597 2598 if(hz != 0) 2599 runtime_resetcpuprofiler(hz); 2600 2601 m->locks--; 2602 } 2603 2604 // Change number of processors. The world is stopped, sched is locked. 2605 static void 2606 procresize(int32 new) 2607 { 2608 int32 i, old; 2609 bool empty; 2610 G *gp; 2611 P *p; 2612 2613 old = runtime_gomaxprocs; 2614 if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs) 2615 runtime_throw("procresize: invalid arg"); 2616 // initialize new P's 2617 for(i = 0; i < new; i++) { 2618 p = runtime_allp[i]; 2619 if(p == nil) { 2620 p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC); 2621 p->id = i; 2622 p->status = Pgcstop; 2623 runtime_atomicstorep(&runtime_allp[i], p); 2624 } 2625 if(p->mcache == nil) { 2626 if(old==0 && i==0) 2627 p->mcache = m->mcache; // bootstrap 2628 else 2629 p->mcache = runtime_allocmcache(); 2630 } 2631 } 2632 2633 // redistribute runnable G's evenly 2634 // collect all runnable goroutines in global queue preserving FIFO order 2635 // FIFO order is required to ensure fairness even during frequent GCs 2636 // see http://golang.org/issue/7126 2637 empty = false; 2638 while(!empty) { 2639 empty = true; 2640 for(i = 0; i < old; i++) { 2641 p = runtime_allp[i]; 2642 if(p->runqhead == p->runqtail) 2643 continue; 2644 empty = false; 2645 // pop from tail of local queue 2646 p->runqtail--; 2647 gp = p->runq[p->runqtail%nelem(p->runq)]; 2648 // push onto head of global queue 2649 gp->schedlink = runtime_sched.runqhead; 2650 runtime_sched.runqhead = gp; 2651 if(runtime_sched.runqtail == nil) 2652 runtime_sched.runqtail = gp; 2653 runtime_sched.runqsize++; 2654 } 2655 } 2656 // fill local queues with at most nelem(p->runq)/2 goroutines 2657 // start at 1 because current M already executes some G and will acquire allp[0] below, 2658 // so if we have a spare G we want to put it into allp[1]. 2659 for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) { 2660 gp = runtime_sched.runqhead; 2661 runtime_sched.runqhead = gp->schedlink; 2662 if(runtime_sched.runqhead == nil) 2663 runtime_sched.runqtail = nil; 2664 runtime_sched.runqsize--; 2665 runqput(runtime_allp[i%new], gp); 2666 } 2667 2668 // free unused P's 2669 for(i = new; i < old; i++) { 2670 p = runtime_allp[i]; 2671 runtime_freemcache(p->mcache); 2672 p->mcache = nil; 2673 gfpurge(p); 2674 p->status = Pdead; 2675 // can't free P itself because it can be referenced by an M in syscall 2676 } 2677 2678 if(m->p) 2679 m->p->m = nil; 2680 m->p = nil; 2681 m->mcache = nil; 2682 p = runtime_allp[0]; 2683 p->m = nil; 2684 p->status = Pidle; 2685 acquirep(p); 2686 for(i = new-1; i > 0; i--) { 2687 p = runtime_allp[i]; 2688 p->status = Pidle; 2689 pidleput(p); 2690 } 2691 runtime_atomicstore((uint32*)&runtime_gomaxprocs, new); 2692 } 2693 2694 // Associate p and the current m. 2695 static void 2696 acquirep(P *p) 2697 { 2698 if(m->p || m->mcache) 2699 runtime_throw("acquirep: already in go"); 2700 if(p->m || p->status != Pidle) { 2701 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status); 2702 runtime_throw("acquirep: invalid p state"); 2703 } 2704 m->mcache = p->mcache; 2705 m->p = p; 2706 p->m = m; 2707 p->status = Prunning; 2708 } 2709 2710 // Disassociate p and the current m. 2711 static P* 2712 releasep(void) 2713 { 2714 P *p; 2715 2716 if(m->p == nil || m->mcache == nil) 2717 runtime_throw("releasep: invalid arg"); 2718 p = m->p; 2719 if(p->m != m || p->mcache != m->mcache || p->status != Prunning) { 2720 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n", 2721 m, m->p, p->m, m->mcache, p->mcache, p->status); 2722 runtime_throw("releasep: invalid p state"); 2723 } 2724 m->p = nil; 2725 m->mcache = nil; 2726 p->m = nil; 2727 p->status = Pidle; 2728 return p; 2729 } 2730 2731 static void 2732 incidlelocked(int32 v) 2733 { 2734 runtime_lock(&runtime_sched.lock); 2735 runtime_sched.nmidlelocked += v; 2736 if(v > 0) 2737 checkdead(); 2738 runtime_unlock(&runtime_sched.lock); 2739 } 2740 2741 // Check for deadlock situation. 2742 // The check is based on number of running M's, if 0 -> deadlock. 2743 static void 2744 checkdead(void) 2745 { 2746 G *gp; 2747 int32 run, grunning, s; 2748 uintptr i; 2749 2750 // -1 for sysmon 2751 run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra(); 2752 if(run > 0) 2753 return; 2754 // If we are dying because of a signal caught on an already idle thread, 2755 // freezetheworld will cause all running threads to block. 2756 // And runtime will essentially enter into deadlock state, 2757 // except that there is a thread that will call runtime_exit soon. 2758 if(runtime_panicking > 0) 2759 return; 2760 if(run < 0) { 2761 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n", 2762 runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount); 2763 runtime_throw("checkdead: inconsistent counts"); 2764 } 2765 grunning = 0; 2766 runtime_lock(&allglock); 2767 for(i = 0; i < runtime_allglen; i++) { 2768 gp = runtime_allg[i]; 2769 if(gp->isbackground) 2770 continue; 2771 s = gp->status; 2772 if(s == Gwaiting) 2773 grunning++; 2774 else if(s == Grunnable || s == Grunning || s == Gsyscall) { 2775 runtime_unlock(&allglock); 2776 runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s); 2777 runtime_throw("checkdead: runnable g"); 2778 } 2779 } 2780 runtime_unlock(&allglock); 2781 if(grunning == 0) // possible if main goroutine calls runtime_Goexit() 2782 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!"); 2783 m->throwing = -1; // do not dump full stacks 2784 runtime_throw("all goroutines are asleep - deadlock!"); 2785 } 2786 2787 static void 2788 sysmon(void) 2789 { 2790 uint32 idle, delay; 2791 int64 now, lastpoll, lasttrace; 2792 G *gp; 2793 2794 lasttrace = 0; 2795 idle = 0; // how many cycles in succession we had not wokeup somebody 2796 delay = 0; 2797 for(;;) { 2798 if(idle == 0) // start with 20us sleep... 2799 delay = 20; 2800 else if(idle > 50) // start doubling the sleep after 1ms... 2801 delay *= 2; 2802 if(delay > 10*1000) // up to 10ms 2803 delay = 10*1000; 2804 runtime_usleep(delay); 2805 if(runtime_debug.schedtrace <= 0 && 2806 (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic 2807 runtime_lock(&runtime_sched.lock); 2808 if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { 2809 runtime_atomicstore(&runtime_sched.sysmonwait, 1); 2810 runtime_unlock(&runtime_sched.lock); 2811 runtime_notesleep(&runtime_sched.sysmonnote); 2812 runtime_noteclear(&runtime_sched.sysmonnote); 2813 idle = 0; 2814 delay = 20; 2815 } else 2816 runtime_unlock(&runtime_sched.lock); 2817 } 2818 // poll network if not polled for more than 10ms 2819 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll); 2820 now = runtime_nanotime(); 2821 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) { 2822 runtime_cas64(&runtime_sched.lastpoll, lastpoll, now); 2823 gp = runtime_netpoll(false); // non-blocking 2824 if(gp) { 2825 // Need to decrement number of idle locked M's 2826 // (pretending that one more is running) before injectglist. 2827 // Otherwise it can lead to the following situation: 2828 // injectglist grabs all P's but before it starts M's to run the P's, 2829 // another M returns from syscall, finishes running its G, 2830 // observes that there is no work to do and no other running M's 2831 // and reports deadlock. 2832 incidlelocked(-1); 2833 injectglist(gp); 2834 incidlelocked(1); 2835 } 2836 } 2837 // retake P's blocked in syscalls 2838 // and preempt long running G's 2839 if(retake(now)) 2840 idle = 0; 2841 else 2842 idle++; 2843 2844 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) { 2845 lasttrace = now; 2846 runtime_schedtrace(runtime_debug.scheddetail); 2847 } 2848 } 2849 } 2850 2851 typedef struct Pdesc Pdesc; 2852 struct Pdesc 2853 { 2854 uint32 schedtick; 2855 int64 schedwhen; 2856 uint32 syscalltick; 2857 int64 syscallwhen; 2858 }; 2859 static Pdesc pdesc[MaxGomaxprocs]; 2860 2861 static uint32 2862 retake(int64 now) 2863 { 2864 uint32 i, s, n; 2865 int64 t; 2866 P *p; 2867 Pdesc *pd; 2868 2869 n = 0; 2870 for(i = 0; i < (uint32)runtime_gomaxprocs; i++) { 2871 p = runtime_allp[i]; 2872 if(p==nil) 2873 continue; 2874 pd = &pdesc[i]; 2875 s = p->status; 2876 if(s == Psyscall) { 2877 // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). 2878 t = p->syscalltick; 2879 if(pd->syscalltick != t) { 2880 pd->syscalltick = t; 2881 pd->syscallwhen = now; 2882 continue; 2883 } 2884 // On the one hand we don't want to retake Ps if there is no other work to do, 2885 // but on the other hand we want to retake them eventually 2886 // because they can prevent the sysmon thread from deep sleep. 2887 if(p->runqhead == p->runqtail && 2888 runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 && 2889 pd->syscallwhen + 10*1000*1000 > now) 2890 continue; 2891 // Need to decrement number of idle locked M's 2892 // (pretending that one more is running) before the CAS. 2893 // Otherwise the M from which we retake can exit the syscall, 2894 // increment nmidle and report deadlock. 2895 incidlelocked(-1); 2896 if(runtime_cas(&p->status, s, Pidle)) { 2897 n++; 2898 handoffp(p); 2899 } 2900 incidlelocked(1); 2901 } else if(s == Prunning) { 2902 // Preempt G if it's running for more than 10ms. 2903 t = p->schedtick; 2904 if(pd->schedtick != t) { 2905 pd->schedtick = t; 2906 pd->schedwhen = now; 2907 continue; 2908 } 2909 if(pd->schedwhen + 10*1000*1000 > now) 2910 continue; 2911 // preemptone(p); 2912 } 2913 } 2914 return n; 2915 } 2916 2917 // Tell all goroutines that they have been preempted and they should stop. 2918 // This function is purely best-effort. It can fail to inform a goroutine if a 2919 // processor just started running it. 2920 // No locks need to be held. 2921 // Returns true if preemption request was issued to at least one goroutine. 2922 static bool 2923 preemptall(void) 2924 { 2925 return false; 2926 } 2927 2928 void 2929 runtime_schedtrace(bool detailed) 2930 { 2931 static int64 starttime; 2932 int64 now; 2933 int64 id1, id2, id3; 2934 int32 i, t, h; 2935 uintptr gi; 2936 const char *fmt; 2937 M *mp, *lockedm; 2938 G *gp, *lockedg; 2939 P *p; 2940 2941 now = runtime_nanotime(); 2942 if(starttime == 0) 2943 starttime = now; 2944 2945 runtime_lock(&runtime_sched.lock); 2946 runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d", 2947 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount, 2948 runtime_sched.nmidle, runtime_sched.runqsize); 2949 if(detailed) { 2950 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n", 2951 runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning, 2952 runtime_sched.stopwait, runtime_sched.sysmonwait); 2953 } 2954 // We must be careful while reading data from P's, M's and G's. 2955 // Even if we hold schedlock, most data can be changed concurrently. 2956 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. 2957 for(i = 0; i < runtime_gomaxprocs; i++) { 2958 p = runtime_allp[i]; 2959 if(p == nil) 2960 continue; 2961 mp = p->m; 2962 h = runtime_atomicload(&p->runqhead); 2963 t = runtime_atomicload(&p->runqtail); 2964 if(detailed) 2965 runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n", 2966 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt); 2967 else { 2968 // In non-detailed mode format lengths of per-P run queues as: 2969 // [len1 len2 len3 len4] 2970 fmt = " %d"; 2971 if(runtime_gomaxprocs == 1) 2972 fmt = " [%d]\n"; 2973 else if(i == 0) 2974 fmt = " [%d"; 2975 else if(i == runtime_gomaxprocs-1) 2976 fmt = " %d]\n"; 2977 runtime_printf(fmt, t-h); 2978 } 2979 } 2980 if(!detailed) { 2981 runtime_unlock(&runtime_sched.lock); 2982 return; 2983 } 2984 for(mp = runtime_allm; mp; mp = mp->alllink) { 2985 p = mp->p; 2986 gp = mp->curg; 2987 lockedg = mp->lockedg; 2988 id1 = -1; 2989 if(p) 2990 id1 = p->id; 2991 id2 = -1; 2992 if(gp) 2993 id2 = gp->goid; 2994 id3 = -1; 2995 if(lockedg) 2996 id3 = lockedg->goid; 2997 runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d" 2998 " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n", 2999 mp->id, id1, id2, 3000 mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc, 3001 mp->spinning, m->blocked, id3); 3002 } 3003 runtime_lock(&allglock); 3004 for(gi = 0; gi < runtime_allglen; gi++) { 3005 gp = runtime_allg[gi]; 3006 mp = gp->m; 3007 lockedm = gp->lockedm; 3008 runtime_printf(" G%D: status=%d(%s) m=%d lockedm=%d\n", 3009 gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1, 3010 lockedm ? lockedm->id : -1); 3011 } 3012 runtime_unlock(&allglock); 3013 runtime_unlock(&runtime_sched.lock); 3014 } 3015 3016 // Put mp on midle list. 3017 // Sched must be locked. 3018 static void 3019 mput(M *mp) 3020 { 3021 mp->schedlink = runtime_sched.midle; 3022 runtime_sched.midle = mp; 3023 runtime_sched.nmidle++; 3024 checkdead(); 3025 } 3026 3027 // Try to get an m from midle list. 3028 // Sched must be locked. 3029 static M* 3030 mget(void) 3031 { 3032 M *mp; 3033 3034 if((mp = runtime_sched.midle) != nil){ 3035 runtime_sched.midle = mp->schedlink; 3036 runtime_sched.nmidle--; 3037 } 3038 return mp; 3039 } 3040 3041 // Put gp on the global runnable queue. 3042 // Sched must be locked. 3043 static void 3044 globrunqput(G *gp) 3045 { 3046 gp->schedlink = nil; 3047 if(runtime_sched.runqtail) 3048 runtime_sched.runqtail->schedlink = gp; 3049 else 3050 runtime_sched.runqhead = gp; 3051 runtime_sched.runqtail = gp; 3052 runtime_sched.runqsize++; 3053 } 3054 3055 // Put a batch of runnable goroutines on the global runnable queue. 3056 // Sched must be locked. 3057 static void 3058 globrunqputbatch(G *ghead, G *gtail, int32 n) 3059 { 3060 gtail->schedlink = nil; 3061 if(runtime_sched.runqtail) 3062 runtime_sched.runqtail->schedlink = ghead; 3063 else 3064 runtime_sched.runqhead = ghead; 3065 runtime_sched.runqtail = gtail; 3066 runtime_sched.runqsize += n; 3067 } 3068 3069 // Try get a batch of G's from the global runnable queue. 3070 // Sched must be locked. 3071 static G* 3072 globrunqget(P *p, int32 max) 3073 { 3074 G *gp, *gp1; 3075 int32 n; 3076 3077 if(runtime_sched.runqsize == 0) 3078 return nil; 3079 n = runtime_sched.runqsize/runtime_gomaxprocs+1; 3080 if(n > runtime_sched.runqsize) 3081 n = runtime_sched.runqsize; 3082 if(max > 0 && n > max) 3083 n = max; 3084 if((uint32)n > nelem(p->runq)/2) 3085 n = nelem(p->runq)/2; 3086 runtime_sched.runqsize -= n; 3087 if(runtime_sched.runqsize == 0) 3088 runtime_sched.runqtail = nil; 3089 gp = runtime_sched.runqhead; 3090 runtime_sched.runqhead = gp->schedlink; 3091 n--; 3092 while(n--) { 3093 gp1 = runtime_sched.runqhead; 3094 runtime_sched.runqhead = gp1->schedlink; 3095 runqput(p, gp1); 3096 } 3097 return gp; 3098 } 3099 3100 // Put p to on pidle list. 3101 // Sched must be locked. 3102 static void 3103 pidleput(P *p) 3104 { 3105 p->link = runtime_sched.pidle; 3106 runtime_sched.pidle = p; 3107 runtime_xadd(&runtime_sched.npidle, 1); // TODO: fast atomic 3108 } 3109 3110 // Try get a p from pidle list. 3111 // Sched must be locked. 3112 static P* 3113 pidleget(void) 3114 { 3115 P *p; 3116 3117 p = runtime_sched.pidle; 3118 if(p) { 3119 runtime_sched.pidle = p->link; 3120 runtime_xadd(&runtime_sched.npidle, -1); // TODO: fast atomic 3121 } 3122 return p; 3123 } 3124 3125 // Try to put g on local runnable queue. 3126 // If it's full, put onto global queue. 3127 // Executed only by the owner P. 3128 static void 3129 runqput(P *p, G *gp) 3130 { 3131 uint32 h, t; 3132 3133 retry: 3134 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers 3135 t = p->runqtail; 3136 if(t - h < nelem(p->runq)) { 3137 p->runq[t%nelem(p->runq)] = gp; 3138 runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption 3139 return; 3140 } 3141 if(runqputslow(p, gp, h, t)) 3142 return; 3143 // the queue is not full, now the put above must suceed 3144 goto retry; 3145 } 3146 3147 // Put g and a batch of work from local runnable queue on global queue. 3148 // Executed only by the owner P. 3149 static bool 3150 runqputslow(P *p, G *gp, uint32 h, uint32 t) 3151 { 3152 G *batch[nelem(p->runq)/2+1]; 3153 uint32 n, i; 3154 3155 // First, grab a batch from local queue. 3156 n = t-h; 3157 n = n/2; 3158 if(n != nelem(p->runq)/2) 3159 runtime_throw("runqputslow: queue is not full"); 3160 for(i=0; i<n; i++) 3161 batch[i] = p->runq[(h+i)%nelem(p->runq)]; 3162 if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume 3163 return false; 3164 batch[n] = gp; 3165 // Link the goroutines. 3166 for(i=0; i<n; i++) 3167 batch[i]->schedlink = batch[i+1]; 3168 // Now put the batch on global queue. 3169 runtime_lock(&runtime_sched.lock); 3170 globrunqputbatch(batch[0], batch[n], n+1); 3171 runtime_unlock(&runtime_sched.lock); 3172 return true; 3173 } 3174 3175 // Get g from local runnable queue. 3176 // Executed only by the owner P. 3177 static G* 3178 runqget(P *p) 3179 { 3180 G *gp; 3181 uint32 t, h; 3182 3183 for(;;) { 3184 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers 3185 t = p->runqtail; 3186 if(t == h) 3187 return nil; 3188 gp = p->runq[h%nelem(p->runq)]; 3189 if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume 3190 return gp; 3191 } 3192 } 3193 3194 // Grabs a batch of goroutines from local runnable queue. 3195 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines. 3196 // Can be executed by any P. 3197 static uint32 3198 runqgrab(P *p, G **batch) 3199 { 3200 uint32 t, h, n, i; 3201 3202 for(;;) { 3203 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers 3204 t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer 3205 n = t-h; 3206 n = n - n/2; 3207 if(n == 0) 3208 break; 3209 if(n > nelem(p->runq)/2) // read inconsistent h and t 3210 continue; 3211 for(i=0; i<n; i++) 3212 batch[i] = p->runq[(h+i)%nelem(p->runq)]; 3213 if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume 3214 break; 3215 } 3216 return n; 3217 } 3218 3219 // Steal half of elements from local runnable queue of p2 3220 // and put onto local runnable queue of p. 3221 // Returns one of the stolen elements (or nil if failed). 3222 static G* 3223 runqsteal(P *p, P *p2) 3224 { 3225 G *gp; 3226 G *batch[nelem(p->runq)/2]; 3227 uint32 t, h, n, i; 3228 3229 n = runqgrab(p2, batch); 3230 if(n == 0) 3231 return nil; 3232 n--; 3233 gp = batch[n]; 3234 if(n == 0) 3235 return gp; 3236 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers 3237 t = p->runqtail; 3238 if(t - h + n >= nelem(p->runq)) 3239 runtime_throw("runqsteal: runq overflow"); 3240 for(i=0; i<n; i++, t++) 3241 p->runq[t%nelem(p->runq)] = batch[i]; 3242 runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption 3243 return gp; 3244 } 3245 3246 void runtime_testSchedLocalQueue(void) 3247 __asm__("runtime.testSchedLocalQueue"); 3248 3249 void 3250 runtime_testSchedLocalQueue(void) 3251 { 3252 P p; 3253 G gs[nelem(p.runq)]; 3254 int32 i, j; 3255 3256 runtime_memclr((byte*)&p, sizeof(p)); 3257 3258 for(i = 0; i < (int32)nelem(gs); i++) { 3259 if(runqget(&p) != nil) 3260 runtime_throw("runq is not empty initially"); 3261 for(j = 0; j < i; j++) 3262 runqput(&p, &gs[i]); 3263 for(j = 0; j < i; j++) { 3264 if(runqget(&p) != &gs[i]) { 3265 runtime_printf("bad element at iter %d/%d\n", i, j); 3266 runtime_throw("bad element"); 3267 } 3268 } 3269 if(runqget(&p) != nil) 3270 runtime_throw("runq is not empty afterwards"); 3271 } 3272 } 3273 3274 void runtime_testSchedLocalQueueSteal(void) 3275 __asm__("runtime.testSchedLocalQueueSteal"); 3276 3277 void 3278 runtime_testSchedLocalQueueSteal(void) 3279 { 3280 P p1, p2; 3281 G gs[nelem(p1.runq)], *gp; 3282 int32 i, j, s; 3283 3284 runtime_memclr((byte*)&p1, sizeof(p1)); 3285 runtime_memclr((byte*)&p2, sizeof(p2)); 3286 3287 for(i = 0; i < (int32)nelem(gs); i++) { 3288 for(j = 0; j < i; j++) { 3289 gs[j].sig = 0; 3290 runqput(&p1, &gs[j]); 3291 } 3292 gp = runqsteal(&p2, &p1); 3293 s = 0; 3294 if(gp) { 3295 s++; 3296 gp->sig++; 3297 } 3298 while((gp = runqget(&p2)) != nil) { 3299 s++; 3300 gp->sig++; 3301 } 3302 while((gp = runqget(&p1)) != nil) 3303 gp->sig++; 3304 for(j = 0; j < i; j++) { 3305 if(gs[j].sig != 1) { 3306 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i); 3307 runtime_throw("bad element"); 3308 } 3309 } 3310 if(s != i/2 && s != i/2+1) { 3311 runtime_printf("bad steal %d, want %d or %d, iter %d\n", 3312 s, i/2, i/2+1, i); 3313 runtime_throw("bad steal"); 3314 } 3315 } 3316 } 3317 3318 int32 3319 runtime_setmaxthreads(int32 in) 3320 { 3321 int32 out; 3322 3323 runtime_lock(&runtime_sched.lock); 3324 out = runtime_sched.maxmcount; 3325 runtime_sched.maxmcount = in; 3326 checkmcount(); 3327 runtime_unlock(&runtime_sched.lock); 3328 return out; 3329 } 3330 3331 void 3332 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj)) 3333 { 3334 enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0}); 3335 } 3336 3337 // Return whether we are waiting for a GC. This gc toolchain uses 3338 // preemption instead. 3339 bool 3340 runtime_gcwaiting(void) 3341 { 3342 return runtime_sched.gcwaiting; 3343 }