github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/proc.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "internal/abi" 9 "internal/cpu" 10 "internal/goarch" 11 "internal/goexperiment" 12 "internal/goos" 13 "runtime/internal/atomic" 14 "runtime/internal/sys" 15 "unsafe" 16 ) 17 18 // set using cmd/go/internal/modload.ModInfoProg 19 var modinfo string 20 21 // Goroutine scheduler 22 // The scheduler's job is to distribute ready-to-run goroutines over worker threads. 23 // 24 // The main concepts are: 25 // G - goroutine. 26 // M - worker thread, or machine. 27 // P - processor, a resource that is required to execute Go code. 28 // M must have an associated P to execute Go code, however it can be 29 // blocked or in a syscall w/o an associated P. 30 // 31 // Design doc at https://golang.org/s/go11sched. 32 33 // Worker thread parking/unparking. 34 // We need to balance between keeping enough running worker threads to utilize 35 // available hardware parallelism and parking excessive running worker threads 36 // to conserve CPU resources and power. This is not simple for two reasons: 37 // (1) scheduler state is intentionally distributed (in particular, per-P work 38 // queues), so it is not possible to compute global predicates on fast paths; 39 // (2) for optimal thread management we would need to know the future (don't park 40 // a worker thread when a new goroutine will be readied in near future). 41 // 42 // Three rejected approaches that would work badly: 43 // 1. Centralize all scheduler state (would inhibit scalability). 44 // 2. Direct goroutine handoff. That is, when we ready a new goroutine and there 45 // is a spare P, unpark a thread and handoff it the thread and the goroutine. 46 // This would lead to thread state thrashing, as the thread that readied the 47 // goroutine can be out of work the very next moment, we will need to park it. 48 // Also, it would destroy locality of computation as we want to preserve 49 // dependent goroutines on the same thread; and introduce additional latency. 50 // 3. Unpark an additional thread whenever we ready a goroutine and there is an 51 // idle P, but don't do handoff. This would lead to excessive thread parking/ 52 // unparking as the additional threads will instantly park without discovering 53 // any work to do. 54 // 55 // The current approach: 56 // 57 // This approach applies to three primary sources of potential work: readying a 58 // goroutine, new/modified-earlier timers, and idle-priority GC. See below for 59 // additional details. 60 // 61 // We unpark an additional thread when we submit work if (this is wakep()): 62 // 1. There is an idle P, and 63 // 2. There are no "spinning" worker threads. 64 // 65 // A worker thread is considered spinning if it is out of local work and did 66 // not find work in the global run queue or netpoller; the spinning state is 67 // denoted in m.spinning and in sched.nmspinning. Threads unparked this way are 68 // also considered spinning; we don't do goroutine handoff so such threads are 69 // out of work initially. Spinning threads spin on looking for work in per-P 70 // run queues and timer heaps or from the GC before parking. If a spinning 71 // thread finds work it takes itself out of the spinning state and proceeds to 72 // execution. If it does not find work it takes itself out of the spinning 73 // state and then parks. 74 // 75 // If there is at least one spinning thread (sched.nmspinning>1), we don't 76 // unpark new threads when submitting work. To compensate for that, if the last 77 // spinning thread finds work and stops spinning, it must unpark a new spinning 78 // thread. This approach smooths out unjustified spikes of thread unparking, 79 // but at the same time guarantees eventual maximal CPU parallelism 80 // utilization. 81 // 82 // The main implementation complication is that we need to be very careful 83 // during spinning->non-spinning thread transition. This transition can race 84 // with submission of new work, and either one part or another needs to unpark 85 // another worker thread. If they both fail to do that, we can end up with 86 // semi-persistent CPU underutilization. 87 // 88 // The general pattern for submission is: 89 // 1. Submit work to the local or global run queue, timer heap, or GC state. 90 // 2. #StoreLoad-style memory barrier. 91 // 3. Check sched.nmspinning. 92 // 93 // The general pattern for spinning->non-spinning transition is: 94 // 1. Decrement nmspinning. 95 // 2. #StoreLoad-style memory barrier. 96 // 3. Check all per-P work queues and GC for new work. 97 // 98 // Note that all this complexity does not apply to global run queue as we are 99 // not sloppy about thread unparking when submitting to global queue. Also see 100 // comments for nmspinning manipulation. 101 // 102 // How these different sources of work behave varies, though it doesn't affect 103 // the synchronization approach: 104 // * Ready goroutine: this is an obvious source of work; the goroutine is 105 // immediately ready and must run on some thread eventually. 106 // * New/modified-earlier timer: The current timer implementation (see time.go) 107 // uses netpoll in a thread with no work available to wait for the soonest 108 // timer. If there is no thread waiting, we want a new spinning thread to go 109 // wait. 110 // * Idle-priority GC: The GC wakes a stopped idle thread to contribute to 111 // background GC work (note: currently disabled per golang.org/issue/19112). 112 // Also see golang.org/issue/44313, as this should be extended to all GC 113 // workers. 114 115 var ( 116 m0 m 117 g0 g 118 mcache0 *mcache 119 raceprocctx0 uintptr 120 raceFiniLock mutex 121 ) 122 123 // This slice records the initializing tasks that need to be 124 // done to start up the runtime. It is built by the linker. 125 var runtime_inittasks []*initTask 126 127 // main_init_done is a signal used by cgocallbackg that initialization 128 // has been completed. It is made before _cgo_notify_runtime_init_done, 129 // so all cgo calls can rely on it existing. When main_init is complete, 130 // it is closed, meaning cgocallbackg can reliably receive from it. 131 var main_init_done chan bool 132 133 //go:linkname main_main main.main 134 func main_main() 135 136 // mainStarted indicates that the main M has started. 137 var mainStarted bool 138 139 // runtimeInitTime is the nanotime() at which the runtime started. 140 var runtimeInitTime int64 141 142 // Value to use for signal mask for newly created M's. 143 var initSigmask sigset 144 145 // The main goroutine. 146 func main() { 147 mp := getg().m 148 149 // Racectx of m0->g0 is used only as the parent of the main goroutine. 150 // It must not be used for anything else. 151 mp.g0.racectx = 0 152 153 // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit. 154 // Using decimal instead of binary GB and MB because 155 // they look nicer in the stack overflow failure message. 156 if goarch.PtrSize == 8 { 157 maxstacksize = 1000000000 158 } else { 159 maxstacksize = 250000000 160 } 161 162 // An upper limit for max stack size. Used to avoid random crashes 163 // after calling SetMaxStack and trying to allocate a stack that is too big, 164 // since stackalloc works with 32-bit sizes. 165 maxstackceiling = 2 * maxstacksize 166 167 // Allow newproc to start new Ms. 168 mainStarted = true 169 170 if haveSysmon { 171 systemstack(func() { 172 newm(sysmon, nil, -1) 173 }) 174 } 175 176 // Lock the main goroutine onto this, the main OS thread, 177 // during initialization. Most programs won't care, but a few 178 // do require certain calls to be made by the main thread. 179 // Those can arrange for main.main to run in the main thread 180 // by calling runtime.LockOSThread during initialization 181 // to preserve the lock. 182 lockOSThread() 183 184 if mp != &m0 { 185 throw("runtime.main not on m0") 186 } 187 188 // Record when the world started. 189 // Must be before doInit for tracing init. 190 runtimeInitTime = nanotime() 191 if runtimeInitTime == 0 { 192 throw("nanotime returning zero") 193 } 194 195 if debug.inittrace != 0 { 196 inittrace.id = getg().goid 197 inittrace.active = true 198 } 199 200 doInit(runtime_inittasks) // Must be before defer. 201 202 // Defer unlock so that runtime.Goexit during init does the unlock too. 203 needUnlock := true 204 defer func() { 205 if needUnlock { 206 unlockOSThread() 207 } 208 }() 209 210 gcenable() 211 212 main_init_done = make(chan bool) 213 if iscgo { 214 if _cgo_pthread_key_created == nil { 215 throw("_cgo_pthread_key_created missing") 216 } 217 218 if _cgo_thread_start == nil { 219 throw("_cgo_thread_start missing") 220 } 221 if GOOS != "windows" { 222 if _cgo_setenv == nil { 223 throw("_cgo_setenv missing") 224 } 225 if _cgo_unsetenv == nil { 226 throw("_cgo_unsetenv missing") 227 } 228 } 229 if _cgo_notify_runtime_init_done == nil { 230 throw("_cgo_notify_runtime_init_done missing") 231 } 232 233 // Set the x_crosscall2_ptr C function pointer variable point to crosscall2. 234 if set_crosscall2 == nil { 235 throw("set_crosscall2 missing") 236 } 237 set_crosscall2() 238 239 // Start the template thread in case we enter Go from 240 // a C-created thread and need to create a new thread. 241 startTemplateThread() 242 cgocall(_cgo_notify_runtime_init_done, nil) 243 } 244 245 // Run the initializing tasks. Depending on build mode this 246 // list can arrive a few different ways, but it will always 247 // contain the init tasks computed by the linker for all the 248 // packages in the program (excluding those added at runtime 249 // by package plugin). Run through the modules in dependency 250 // order (the order they are initialized by the dynamic 251 // loader, i.e. they are added to the moduledata linked list). 252 for m := &firstmoduledata; m != nil; m = m.next { 253 doInit(m.inittasks) 254 } 255 256 // Disable init tracing after main init done to avoid overhead 257 // of collecting statistics in malloc and newproc 258 inittrace.active = false 259 260 close(main_init_done) 261 262 needUnlock = false 263 unlockOSThread() 264 265 if isarchive || islibrary { 266 // A program compiled with -buildmode=c-archive or c-shared 267 // has a main, but it is not executed. 268 return 269 } 270 fn := main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime 271 fn() 272 if raceenabled { 273 runExitHooks(0) // run hooks now, since racefini does not return 274 racefini() 275 } 276 277 // Make racy client program work: if panicking on 278 // another goroutine at the same time as main returns, 279 // let the other goroutine finish printing the panic trace. 280 // Once it does, it will exit. See issues 3934 and 20018. 281 if runningPanicDefers.Load() != 0 { 282 // Running deferred functions should not take long. 283 for c := 0; c < 1000; c++ { 284 if runningPanicDefers.Load() == 0 { 285 break 286 } 287 Gosched() 288 } 289 } 290 if panicking.Load() != 0 { 291 gopark(nil, nil, waitReasonPanicWait, traceBlockForever, 1) 292 } 293 runExitHooks(0) 294 295 exit(0) 296 for { 297 var x *int32 298 *x = 0 299 } 300 } 301 302 // os_beforeExit is called from os.Exit(0). 303 // 304 //go:linkname os_beforeExit os.runtime_beforeExit 305 func os_beforeExit(exitCode int) { 306 runExitHooks(exitCode) 307 if exitCode == 0 && raceenabled { 308 racefini() 309 } 310 } 311 312 // start forcegc helper goroutine 313 func init() { 314 go forcegchelper() 315 } 316 317 func forcegchelper() { 318 forcegc.g = getg() 319 lockInit(&forcegc.lock, lockRankForcegc) 320 for { 321 lock(&forcegc.lock) 322 if forcegc.idle.Load() { 323 throw("forcegc: phase error") 324 } 325 forcegc.idle.Store(true) 326 goparkunlock(&forcegc.lock, waitReasonForceGCIdle, traceBlockSystemGoroutine, 1) 327 // this goroutine is explicitly resumed by sysmon 328 if debug.gctrace > 0 { 329 println("GC forced") 330 } 331 // Time-triggered, fully concurrent. 332 gcStart(gcTrigger{kind: gcTriggerTime, now: nanotime()}) 333 } 334 } 335 336 // Gosched yields the processor, allowing other goroutines to run. It does not 337 // suspend the current goroutine, so execution resumes automatically. 338 // 339 //go:nosplit 340 func Gosched() { 341 checkTimeouts() 342 mcall(gosched_m) 343 } 344 345 // goschedguarded yields the processor like gosched, but also checks 346 // for forbidden states and opts out of the yield in those cases. 347 // 348 //go:nosplit 349 func goschedguarded() { 350 mcall(goschedguarded_m) 351 } 352 353 // goschedIfBusy yields the processor like gosched, but only does so if 354 // there are no idle Ps or if we're on the only P and there's nothing in 355 // the run queue. In both cases, there is freely available idle time. 356 // 357 //go:nosplit 358 func goschedIfBusy() { 359 gp := getg() 360 // Call gosched if gp.preempt is set; we may be in a tight loop that 361 // doesn't otherwise yield. 362 if !gp.preempt && sched.npidle.Load() > 0 { 363 return 364 } 365 mcall(gosched_m) 366 } 367 368 // Puts the current goroutine into a waiting state and calls unlockf on the 369 // system stack. 370 // 371 // If unlockf returns false, the goroutine is resumed. 372 // 373 // unlockf must not access this G's stack, as it may be moved between 374 // the call to gopark and the call to unlockf. 375 // 376 // Note that because unlockf is called after putting the G into a waiting 377 // state, the G may have already been readied by the time unlockf is called 378 // unless there is external synchronization preventing the G from being 379 // readied. If unlockf returns false, it must guarantee that the G cannot be 380 // externally readied. 381 // 382 // Reason explains why the goroutine has been parked. It is displayed in stack 383 // traces and heap dumps. Reasons should be unique and descriptive. Do not 384 // re-use reasons, add new ones. 385 func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceReason traceBlockReason, traceskip int) { 386 if reason != waitReasonSleep { 387 checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy 388 } 389 mp := acquirem() 390 gp := mp.curg 391 status := readgstatus(gp) 392 if status != _Grunning && status != _Gscanrunning { 393 throw("gopark: bad g status") 394 } 395 mp.waitlock = lock 396 mp.waitunlockf = unlockf 397 gp.waitreason = reason 398 mp.waitTraceBlockReason = traceReason 399 mp.waitTraceSkip = traceskip 400 releasem(mp) 401 // can't do anything that might move the G between Ms here. 402 mcall(park_m) 403 } 404 405 // Puts the current goroutine into a waiting state and unlocks the lock. 406 // The goroutine can be made runnable again by calling goready(gp). 407 func goparkunlock(lock *mutex, reason waitReason, traceReason traceBlockReason, traceskip int) { 408 gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceReason, traceskip) 409 } 410 411 func goready(gp *g, traceskip int) { 412 systemstack(func() { 413 ready(gp, traceskip, true) 414 }) 415 } 416 417 //go:nosplit 418 func acquireSudog() *sudog { 419 // Delicate dance: the semaphore implementation calls 420 // acquireSudog, acquireSudog calls new(sudog), 421 // new calls malloc, malloc can call the garbage collector, 422 // and the garbage collector calls the semaphore implementation 423 // in stopTheWorld. 424 // Break the cycle by doing acquirem/releasem around new(sudog). 425 // The acquirem/releasem increments m.locks during new(sudog), 426 // which keeps the garbage collector from being invoked. 427 mp := acquirem() 428 pp := mp.p.ptr() 429 if len(pp.sudogcache) == 0 { 430 lock(&sched.sudoglock) 431 // First, try to grab a batch from central cache. 432 for len(pp.sudogcache) < cap(pp.sudogcache)/2 && sched.sudogcache != nil { 433 s := sched.sudogcache 434 sched.sudogcache = s.next 435 s.next = nil 436 pp.sudogcache = append(pp.sudogcache, s) 437 } 438 unlock(&sched.sudoglock) 439 // If the central cache is empty, allocate a new one. 440 if len(pp.sudogcache) == 0 { 441 pp.sudogcache = append(pp.sudogcache, new(sudog)) 442 } 443 } 444 n := len(pp.sudogcache) 445 s := pp.sudogcache[n-1] 446 pp.sudogcache[n-1] = nil 447 pp.sudogcache = pp.sudogcache[:n-1] 448 if s.elem != nil { 449 throw("acquireSudog: found s.elem != nil in cache") 450 } 451 releasem(mp) 452 return s 453 } 454 455 //go:nosplit 456 func releaseSudog(s *sudog) { 457 if s.elem != nil { 458 throw("runtime: sudog with non-nil elem") 459 } 460 if s.isSelect { 461 throw("runtime: sudog with non-false isSelect") 462 } 463 if s.next != nil { 464 throw("runtime: sudog with non-nil next") 465 } 466 if s.prev != nil { 467 throw("runtime: sudog with non-nil prev") 468 } 469 if s.waitlink != nil { 470 throw("runtime: sudog with non-nil waitlink") 471 } 472 if s.c != nil { 473 throw("runtime: sudog with non-nil c") 474 } 475 gp := getg() 476 if gp.param != nil { 477 throw("runtime: releaseSudog with non-nil gp.param") 478 } 479 mp := acquirem() // avoid rescheduling to another P 480 pp := mp.p.ptr() 481 if len(pp.sudogcache) == cap(pp.sudogcache) { 482 // Transfer half of local cache to the central cache. 483 var first, last *sudog 484 for len(pp.sudogcache) > cap(pp.sudogcache)/2 { 485 n := len(pp.sudogcache) 486 p := pp.sudogcache[n-1] 487 pp.sudogcache[n-1] = nil 488 pp.sudogcache = pp.sudogcache[:n-1] 489 if first == nil { 490 first = p 491 } else { 492 last.next = p 493 } 494 last = p 495 } 496 lock(&sched.sudoglock) 497 last.next = sched.sudogcache 498 sched.sudogcache = first 499 unlock(&sched.sudoglock) 500 } 501 pp.sudogcache = append(pp.sudogcache, s) 502 releasem(mp) 503 } 504 505 // called from assembly. 506 func badmcall(fn func(*g)) { 507 throw("runtime: mcall called on m->g0 stack") 508 } 509 510 func badmcall2(fn func(*g)) { 511 throw("runtime: mcall function returned") 512 } 513 514 func badreflectcall() { 515 panic(plainError("arg size to reflect.call more than 1GB")) 516 } 517 518 //go:nosplit 519 //go:nowritebarrierrec 520 func badmorestackg0() { 521 if !crashStackImplemented { 522 writeErrStr("fatal: morestack on g0\n") 523 return 524 } 525 526 g := getg() 527 switchToCrashStack(func() { 528 print("runtime: morestack on g0, stack [", hex(g.stack.lo), " ", hex(g.stack.hi), "], sp=", hex(g.sched.sp), ", called from\n") 529 g.m.traceback = 2 // include pc and sp in stack trace 530 traceback1(g.sched.pc, g.sched.sp, g.sched.lr, g, 0) 531 print("\n") 532 533 throw("morestack on g0") 534 }) 535 } 536 537 //go:nosplit 538 //go:nowritebarrierrec 539 func badmorestackgsignal() { 540 writeErrStr("fatal: morestack on gsignal\n") 541 } 542 543 //go:nosplit 544 func badctxt() { 545 throw("ctxt != 0") 546 } 547 548 // gcrash is a fake g that can be used when crashing due to bad 549 // stack conditions. 550 var gcrash g 551 552 var crashingG atomic.Pointer[g] 553 554 // Switch to crashstack and call fn, with special handling of 555 // concurrent and recursive cases. 556 // 557 // Nosplit as it is called in a bad stack condition (we know 558 // morestack would fail). 559 // 560 //go:nosplit 561 //go:nowritebarrierrec 562 func switchToCrashStack(fn func()) { 563 me := getg() 564 if crashingG.CompareAndSwapNoWB(nil, me) { 565 switchToCrashStack0(fn) // should never return 566 abort() 567 } 568 if crashingG.Load() == me { 569 // recursive crashing. too bad. 570 writeErrStr("fatal: recursive switchToCrashStack\n") 571 abort() 572 } 573 // Another g is crashing. Give it some time, hopefully it will finish traceback. 574 usleep_no_g(100) 575 writeErrStr("fatal: concurrent switchToCrashStack\n") 576 abort() 577 } 578 579 // Disable crash stack on Windows for now. Apparently, throwing an exception 580 // on a non-system-allocated crash stack causes EXCEPTION_STACK_OVERFLOW and 581 // hangs the process (see issue 63938). 582 const crashStackImplemented = (GOARCH == "386" || GOARCH == "amd64" || GOARCH == "arm" || GOARCH == "arm64" || GOARCH == "loong64" || GOARCH == "mips64" || GOARCH == "mips64le" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64" || GOARCH == "s390x" || GOARCH == "wasm") && GOOS != "windows" 583 584 //go:noescape 585 func switchToCrashStack0(fn func()) // in assembly 586 587 func lockedOSThread() bool { 588 gp := getg() 589 return gp.lockedm != 0 && gp.m.lockedg != 0 590 } 591 592 var ( 593 // allgs contains all Gs ever created (including dead Gs), and thus 594 // never shrinks. 595 // 596 // Access via the slice is protected by allglock or stop-the-world. 597 // Readers that cannot take the lock may (carefully!) use the atomic 598 // variables below. 599 allglock mutex 600 allgs []*g 601 602 // allglen and allgptr are atomic variables that contain len(allgs) and 603 // &allgs[0] respectively. Proper ordering depends on totally-ordered 604 // loads and stores. Writes are protected by allglock. 605 // 606 // allgptr is updated before allglen. Readers should read allglen 607 // before allgptr to ensure that allglen is always <= len(allgptr). New 608 // Gs appended during the race can be missed. For a consistent view of 609 // all Gs, allglock must be held. 610 // 611 // allgptr copies should always be stored as a concrete type or 612 // unsafe.Pointer, not uintptr, to ensure that GC can still reach it 613 // even if it points to a stale array. 614 allglen uintptr 615 allgptr **g 616 ) 617 618 func allgadd(gp *g) { 619 if readgstatus(gp) == _Gidle { 620 throw("allgadd: bad status Gidle") 621 } 622 623 lock(&allglock) 624 allgs = append(allgs, gp) 625 if &allgs[0] != allgptr { 626 atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0])) 627 } 628 atomic.Storeuintptr(&allglen, uintptr(len(allgs))) 629 unlock(&allglock) 630 } 631 632 // allGsSnapshot returns a snapshot of the slice of all Gs. 633 // 634 // The world must be stopped or allglock must be held. 635 func allGsSnapshot() []*g { 636 assertWorldStoppedOrLockHeld(&allglock) 637 638 // Because the world is stopped or allglock is held, allgadd 639 // cannot happen concurrently with this. allgs grows 640 // monotonically and existing entries never change, so we can 641 // simply return a copy of the slice header. For added safety, 642 // we trim everything past len because that can still change. 643 return allgs[:len(allgs):len(allgs)] 644 } 645 646 // atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex. 647 func atomicAllG() (**g, uintptr) { 648 length := atomic.Loaduintptr(&allglen) 649 ptr := (**g)(atomic.Loadp(unsafe.Pointer(&allgptr))) 650 return ptr, length 651 } 652 653 // atomicAllGIndex returns ptr[i] with the allgptr returned from atomicAllG. 654 func atomicAllGIndex(ptr **g, i uintptr) *g { 655 return *(**g)(add(unsafe.Pointer(ptr), i*goarch.PtrSize)) 656 } 657 658 // forEachG calls fn on every G from allgs. 659 // 660 // forEachG takes a lock to exclude concurrent addition of new Gs. 661 func forEachG(fn func(gp *g)) { 662 lock(&allglock) 663 for _, gp := range allgs { 664 fn(gp) 665 } 666 unlock(&allglock) 667 } 668 669 // forEachGRace calls fn on every G from allgs. 670 // 671 // forEachGRace avoids locking, but does not exclude addition of new Gs during 672 // execution, which may be missed. 673 func forEachGRace(fn func(gp *g)) { 674 ptr, length := atomicAllG() 675 for i := uintptr(0); i < length; i++ { 676 gp := atomicAllGIndex(ptr, i) 677 fn(gp) 678 } 679 return 680 } 681 682 const ( 683 // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once. 684 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. 685 _GoidCacheBatch = 16 686 ) 687 688 // cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete 689 // value of the GODEBUG environment variable. 690 func cpuinit(env string) { 691 switch GOOS { 692 case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": 693 cpu.DebugOptions = true 694 } 695 cpu.Initialize(env) 696 697 // Support cpu feature variables are used in code generated by the compiler 698 // to guard execution of instructions that can not be assumed to be always supported. 699 switch GOARCH { 700 case "386", "amd64": 701 x86HasPOPCNT = cpu.X86.HasPOPCNT 702 x86HasSSE41 = cpu.X86.HasSSE41 703 x86HasFMA = cpu.X86.HasFMA 704 705 case "arm": 706 armHasVFPv4 = cpu.ARM.HasVFPv4 707 708 case "arm64": 709 arm64HasATOMICS = cpu.ARM64.HasATOMICS 710 } 711 } 712 713 // getGodebugEarly extracts the environment variable GODEBUG from the environment on 714 // Unix-like operating systems and returns it. This function exists to extract GODEBUG 715 // early before much of the runtime is initialized. 716 func getGodebugEarly() string { 717 const prefix = "GODEBUG=" 718 var env string 719 switch GOOS { 720 case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": 721 // Similar to goenv_unix but extracts the environment value for 722 // GODEBUG directly. 723 // TODO(moehrmann): remove when general goenvs() can be called before cpuinit() 724 n := int32(0) 725 for argv_index(argv, argc+1+n) != nil { 726 n++ 727 } 728 729 for i := int32(0); i < n; i++ { 730 p := argv_index(argv, argc+1+i) 731 s := unsafe.String(p, findnull(p)) 732 733 if hasPrefix(s, prefix) { 734 env = gostring(p)[len(prefix):] 735 break 736 } 737 } 738 } 739 return env 740 } 741 742 // The bootstrap sequence is: 743 // 744 // call osinit 745 // call schedinit 746 // make & queue new G 747 // call runtime·mstart 748 // 749 // The new G calls runtime·main. 750 func schedinit() { 751 lockInit(&sched.lock, lockRankSched) 752 lockInit(&sched.sysmonlock, lockRankSysmon) 753 lockInit(&sched.deferlock, lockRankDefer) 754 lockInit(&sched.sudoglock, lockRankSudog) 755 lockInit(&deadlock, lockRankDeadlock) 756 lockInit(&paniclk, lockRankPanic) 757 lockInit(&allglock, lockRankAllg) 758 lockInit(&allpLock, lockRankAllp) 759 lockInit(&reflectOffs.lock, lockRankReflectOffs) 760 lockInit(&finlock, lockRankFin) 761 lockInit(&cpuprof.lock, lockRankCpuprof) 762 allocmLock.init(lockRankAllocmR, lockRankAllocmRInternal, lockRankAllocmW) 763 execLock.init(lockRankExecR, lockRankExecRInternal, lockRankExecW) 764 traceLockInit() 765 // Enforce that this lock is always a leaf lock. 766 // All of this lock's critical sections should be 767 // extremely short. 768 lockInit(&memstats.heapStats.noPLock, lockRankLeafRank) 769 770 // raceinit must be the first call to race detector. 771 // In particular, it must be done before mallocinit below calls racemapshadow. 772 gp := getg() 773 if raceenabled { 774 gp.racectx, raceprocctx0 = raceinit() 775 } 776 777 sched.maxmcount = 10000 778 crashFD.Store(^uintptr(0)) 779 780 // The world starts stopped. 781 worldStopped() 782 783 ticks.init() // run as early as possible 784 moduledataverify() 785 stackinit() 786 mallocinit() 787 godebug := getGodebugEarly() 788 initPageTrace(godebug) // must run after mallocinit but before anything allocates 789 cpuinit(godebug) // must run before alginit 790 randinit() // must run before alginit, mcommoninit 791 alginit() // maps, hash, rand must not be used before this call 792 mcommoninit(gp.m, -1) 793 modulesinit() // provides activeModules 794 typelinksinit() // uses maps, activeModules 795 itabsinit() // uses activeModules 796 stkobjinit() // must run before GC starts 797 798 sigsave(&gp.m.sigmask) 799 initSigmask = gp.m.sigmask 800 801 goargs() 802 goenvs() 803 secure() 804 checkfds() 805 parsedebugvars() 806 gcinit() 807 808 // Allocate stack space that can be used when crashing due to bad stack 809 // conditions, e.g. morestack on g0. 810 gcrash.stack = stackalloc(16384) 811 gcrash.stackguard0 = gcrash.stack.lo + 1000 812 gcrash.stackguard1 = gcrash.stack.lo + 1000 813 814 // if disableMemoryProfiling is set, update MemProfileRate to 0 to turn off memprofile. 815 // Note: parsedebugvars may update MemProfileRate, but when disableMemoryProfiling is 816 // set to true by the linker, it means that nothing is consuming the profile, it is 817 // safe to set MemProfileRate to 0. 818 if disableMemoryProfiling { 819 MemProfileRate = 0 820 } 821 822 lock(&sched.lock) 823 sched.lastpoll.Store(nanotime()) 824 procs := ncpu 825 if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 { 826 procs = n 827 } 828 if procresize(procs) != nil { 829 throw("unknown runnable goroutine during bootstrap") 830 } 831 unlock(&sched.lock) 832 833 // World is effectively started now, as P's can run. 834 worldStarted() 835 836 if buildVersion == "" { 837 // Condition should never trigger. This code just serves 838 // to ensure runtime·buildVersion is kept in the resulting binary. 839 buildVersion = "unknown" 840 } 841 if len(modinfo) == 1 { 842 // Condition should never trigger. This code just serves 843 // to ensure runtime·modinfo is kept in the resulting binary. 844 modinfo = "" 845 } 846 } 847 848 func dumpgstatus(gp *g) { 849 thisg := getg() 850 print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 851 print("runtime: getg: g=", thisg, ", goid=", thisg.goid, ", g->atomicstatus=", readgstatus(thisg), "\n") 852 } 853 854 // sched.lock must be held. 855 func checkmcount() { 856 assertLockHeld(&sched.lock) 857 858 // Exclude extra M's, which are used for cgocallback from threads 859 // created in C. 860 // 861 // The purpose of the SetMaxThreads limit is to avoid accidental fork 862 // bomb from something like millions of goroutines blocking on system 863 // calls, causing the runtime to create millions of threads. By 864 // definition, this isn't a problem for threads created in C, so we 865 // exclude them from the limit. See https://go.dev/issue/60004. 866 count := mcount() - int32(extraMInUse.Load()) - int32(extraMLength.Load()) 867 if count > sched.maxmcount { 868 print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n") 869 throw("thread exhaustion") 870 } 871 } 872 873 // mReserveID returns the next ID to use for a new m. This new m is immediately 874 // considered 'running' by checkdead. 875 // 876 // sched.lock must be held. 877 func mReserveID() int64 { 878 assertLockHeld(&sched.lock) 879 880 if sched.mnext+1 < sched.mnext { 881 throw("runtime: thread ID overflow") 882 } 883 id := sched.mnext 884 sched.mnext++ 885 checkmcount() 886 return id 887 } 888 889 // Pre-allocated ID may be passed as 'id', or omitted by passing -1. 890 func mcommoninit(mp *m, id int64) { 891 gp := getg() 892 893 // g0 stack won't make sense for user (and is not necessary unwindable). 894 if gp != gp.m.g0 { 895 callers(1, mp.createstack[:]) 896 } 897 898 lock(&sched.lock) 899 900 if id >= 0 { 901 mp.id = id 902 } else { 903 mp.id = mReserveID() 904 } 905 906 mrandinit(mp) 907 908 mpreinit(mp) 909 if mp.gsignal != nil { 910 mp.gsignal.stackguard1 = mp.gsignal.stack.lo + stackGuard 911 } 912 913 // Add to allm so garbage collector doesn't free g->m 914 // when it is just in a register or thread-local storage. 915 mp.alllink = allm 916 917 // NumCgoCall() and others iterate over allm w/o schedlock, 918 // so we need to publish it safely. 919 atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp)) 920 unlock(&sched.lock) 921 922 // Allocate memory to hold a cgo traceback if the cgo call crashes. 923 if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" { 924 mp.cgoCallers = new(cgoCallers) 925 } 926 } 927 928 func (mp *m) becomeSpinning() { 929 mp.spinning = true 930 sched.nmspinning.Add(1) 931 sched.needspinning.Store(0) 932 } 933 934 func (mp *m) hasCgoOnStack() bool { 935 return mp.ncgo > 0 || mp.isextra 936 } 937 938 const ( 939 // osHasLowResTimer indicates that the platform's internal timer system has a low resolution, 940 // typically on the order of 1 ms or more. 941 osHasLowResTimer = GOOS == "windows" || GOOS == "openbsd" || GOOS == "netbsd" 942 943 // osHasLowResClockInt is osHasLowResClock but in integer form, so it can be used to create 944 // constants conditionally. 945 osHasLowResClockInt = goos.IsWindows 946 947 // osHasLowResClock indicates that timestamps produced by nanotime on the platform have a 948 // low resolution, typically on the order of 1 ms or more. 949 osHasLowResClock = osHasLowResClockInt > 0 950 ) 951 952 // Mark gp ready to run. 953 func ready(gp *g, traceskip int, next bool) { 954 status := readgstatus(gp) 955 956 // Mark runnable. 957 mp := acquirem() // disable preemption because it can be holding p in a local var 958 if status&^_Gscan != _Gwaiting { 959 dumpgstatus(gp) 960 throw("bad g->status in ready") 961 } 962 963 // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq 964 trace := traceAcquire() 965 casgstatus(gp, _Gwaiting, _Grunnable) 966 if trace.ok() { 967 trace.GoUnpark(gp, traceskip) 968 traceRelease(trace) 969 } 970 runqput(mp.p.ptr(), gp, next) 971 wakep() 972 releasem(mp) 973 } 974 975 // freezeStopWait is a large value that freezetheworld sets 976 // sched.stopwait to in order to request that all Gs permanently stop. 977 const freezeStopWait = 0x7fffffff 978 979 // freezing is set to non-zero if the runtime is trying to freeze the 980 // world. 981 var freezing atomic.Bool 982 983 // Similar to stopTheWorld but best-effort and can be called several times. 984 // There is no reverse operation, used during crashing. 985 // This function must not lock any mutexes. 986 func freezetheworld() { 987 freezing.Store(true) 988 if debug.dontfreezetheworld > 0 { 989 // Don't prempt Ps to stop goroutines. That will perturb 990 // scheduler state, making debugging more difficult. Instead, 991 // allow goroutines to continue execution. 992 // 993 // fatalpanic will tracebackothers to trace all goroutines. It 994 // is unsafe to trace a running goroutine, so tracebackothers 995 // will skip running goroutines. That is OK and expected, we 996 // expect users of dontfreezetheworld to use core files anyway. 997 // 998 // However, allowing the scheduler to continue running free 999 // introduces a race: a goroutine may be stopped when 1000 // tracebackothers checks its status, and then start running 1001 // later when we are in the middle of traceback, potentially 1002 // causing a crash. 1003 // 1004 // To mitigate this, when an M naturally enters the scheduler, 1005 // schedule checks if freezing is set and if so stops 1006 // execution. This guarantees that while Gs can transition from 1007 // running to stopped, they can never transition from stopped 1008 // to running. 1009 // 1010 // The sleep here allows racing Ms that missed freezing and are 1011 // about to run a G to complete the transition to running 1012 // before we start traceback. 1013 usleep(1000) 1014 return 1015 } 1016 1017 // stopwait and preemption requests can be lost 1018 // due to races with concurrently executing threads, 1019 // so try several times 1020 for i := 0; i < 5; i++ { 1021 // this should tell the scheduler to not start any new goroutines 1022 sched.stopwait = freezeStopWait 1023 sched.gcwaiting.Store(true) 1024 // this should stop running goroutines 1025 if !preemptall() { 1026 break // no running goroutines 1027 } 1028 usleep(1000) 1029 } 1030 // to be sure 1031 usleep(1000) 1032 preemptall() 1033 usleep(1000) 1034 } 1035 1036 // All reads and writes of g's status go through readgstatus, casgstatus 1037 // castogscanstatus, casfrom_Gscanstatus. 1038 // 1039 //go:nosplit 1040 func readgstatus(gp *g) uint32 { 1041 return gp.atomicstatus.Load() 1042 } 1043 1044 // The Gscanstatuses are acting like locks and this releases them. 1045 // If it proves to be a performance hit we should be able to make these 1046 // simple atomic stores but for now we are going to throw if 1047 // we see an inconsistent state. 1048 func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { 1049 success := false 1050 1051 // Check that transition is valid. 1052 switch oldval { 1053 default: 1054 print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 1055 dumpgstatus(gp) 1056 throw("casfrom_Gscanstatus:top gp->status is not in scan state") 1057 case _Gscanrunnable, 1058 _Gscanwaiting, 1059 _Gscanrunning, 1060 _Gscansyscall, 1061 _Gscanpreempted: 1062 if newval == oldval&^_Gscan { 1063 success = gp.atomicstatus.CompareAndSwap(oldval, newval) 1064 } 1065 } 1066 if !success { 1067 print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 1068 dumpgstatus(gp) 1069 throw("casfrom_Gscanstatus: gp->status is not in scan state") 1070 } 1071 releaseLockRank(lockRankGscan) 1072 } 1073 1074 // This will return false if the gp is not in the expected status and the cas fails. 1075 // This acts like a lock acquire while the casfromgstatus acts like a lock release. 1076 func castogscanstatus(gp *g, oldval, newval uint32) bool { 1077 switch oldval { 1078 case _Grunnable, 1079 _Grunning, 1080 _Gwaiting, 1081 _Gsyscall: 1082 if newval == oldval|_Gscan { 1083 r := gp.atomicstatus.CompareAndSwap(oldval, newval) 1084 if r { 1085 acquireLockRank(lockRankGscan) 1086 } 1087 return r 1088 1089 } 1090 } 1091 print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n") 1092 throw("castogscanstatus") 1093 panic("not reached") 1094 } 1095 1096 // casgstatusAlwaysTrack is a debug flag that causes casgstatus to always track 1097 // various latencies on every transition instead of sampling them. 1098 var casgstatusAlwaysTrack = false 1099 1100 // If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus 1101 // and casfrom_Gscanstatus instead. 1102 // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that 1103 // put it in the Gscan state is finished. 1104 // 1105 //go:nosplit 1106 func casgstatus(gp *g, oldval, newval uint32) { 1107 if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval { 1108 systemstack(func() { 1109 print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n") 1110 throw("casgstatus: bad incoming values") 1111 }) 1112 } 1113 1114 acquireLockRank(lockRankGscan) 1115 releaseLockRank(lockRankGscan) 1116 1117 // See https://golang.org/cl/21503 for justification of the yield delay. 1118 const yieldDelay = 5 * 1000 1119 var nextYield int64 1120 1121 // loop if gp->atomicstatus is in a scan state giving 1122 // GC time to finish and change the state to oldval. 1123 for i := 0; !gp.atomicstatus.CompareAndSwap(oldval, newval); i++ { 1124 if oldval == _Gwaiting && gp.atomicstatus.Load() == _Grunnable { 1125 throw("casgstatus: waiting for Gwaiting but is Grunnable") 1126 } 1127 if i == 0 { 1128 nextYield = nanotime() + yieldDelay 1129 } 1130 if nanotime() < nextYield { 1131 for x := 0; x < 10 && gp.atomicstatus.Load() != oldval; x++ { 1132 procyield(1) 1133 } 1134 } else { 1135 osyield() 1136 nextYield = nanotime() + yieldDelay/2 1137 } 1138 } 1139 1140 if oldval == _Grunning { 1141 // Track every gTrackingPeriod time a goroutine transitions out of running. 1142 if casgstatusAlwaysTrack || gp.trackingSeq%gTrackingPeriod == 0 { 1143 gp.tracking = true 1144 } 1145 gp.trackingSeq++ 1146 } 1147 if !gp.tracking { 1148 return 1149 } 1150 1151 // Handle various kinds of tracking. 1152 // 1153 // Currently: 1154 // - Time spent in runnable. 1155 // - Time spent blocked on a sync.Mutex or sync.RWMutex. 1156 switch oldval { 1157 case _Grunnable: 1158 // We transitioned out of runnable, so measure how much 1159 // time we spent in this state and add it to 1160 // runnableTime. 1161 now := nanotime() 1162 gp.runnableTime += now - gp.trackingStamp 1163 gp.trackingStamp = 0 1164 case _Gwaiting: 1165 if !gp.waitreason.isMutexWait() { 1166 // Not blocking on a lock. 1167 break 1168 } 1169 // Blocking on a lock, measure it. Note that because we're 1170 // sampling, we have to multiply by our sampling period to get 1171 // a more representative estimate of the absolute value. 1172 // gTrackingPeriod also represents an accurate sampling period 1173 // because we can only enter this state from _Grunning. 1174 now := nanotime() 1175 sched.totalMutexWaitTime.Add((now - gp.trackingStamp) * gTrackingPeriod) 1176 gp.trackingStamp = 0 1177 } 1178 switch newval { 1179 case _Gwaiting: 1180 if !gp.waitreason.isMutexWait() { 1181 // Not blocking on a lock. 1182 break 1183 } 1184 // Blocking on a lock. Write down the timestamp. 1185 now := nanotime() 1186 gp.trackingStamp = now 1187 case _Grunnable: 1188 // We just transitioned into runnable, so record what 1189 // time that happened. 1190 now := nanotime() 1191 gp.trackingStamp = now 1192 case _Grunning: 1193 // We're transitioning into running, so turn off 1194 // tracking and record how much time we spent in 1195 // runnable. 1196 gp.tracking = false 1197 sched.timeToRun.record(gp.runnableTime) 1198 gp.runnableTime = 0 1199 } 1200 } 1201 1202 // casGToWaiting transitions gp from old to _Gwaiting, and sets the wait reason. 1203 // 1204 // Use this over casgstatus when possible to ensure that a waitreason is set. 1205 func casGToWaiting(gp *g, old uint32, reason waitReason) { 1206 // Set the wait reason before calling casgstatus, because casgstatus will use it. 1207 gp.waitreason = reason 1208 casgstatus(gp, old, _Gwaiting) 1209 } 1210 1211 // casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable. 1212 // Returns old status. Cannot call casgstatus directly, because we are racing with an 1213 // async wakeup that might come in from netpoll. If we see Gwaiting from the readgstatus, 1214 // it might have become Grunnable by the time we get to the cas. If we called casgstatus, 1215 // it would loop waiting for the status to go back to Gwaiting, which it never will. 1216 // 1217 //go:nosplit 1218 func casgcopystack(gp *g) uint32 { 1219 for { 1220 oldstatus := readgstatus(gp) &^ _Gscan 1221 if oldstatus != _Gwaiting && oldstatus != _Grunnable { 1222 throw("copystack: bad status, not Gwaiting or Grunnable") 1223 } 1224 if gp.atomicstatus.CompareAndSwap(oldstatus, _Gcopystack) { 1225 return oldstatus 1226 } 1227 } 1228 } 1229 1230 // casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted. 1231 // 1232 // TODO(austin): This is the only status operation that both changes 1233 // the status and locks the _Gscan bit. Rethink this. 1234 func casGToPreemptScan(gp *g, old, new uint32) { 1235 if old != _Grunning || new != _Gscan|_Gpreempted { 1236 throw("bad g transition") 1237 } 1238 acquireLockRank(lockRankGscan) 1239 for !gp.atomicstatus.CompareAndSwap(_Grunning, _Gscan|_Gpreempted) { 1240 } 1241 } 1242 1243 // casGFromPreempted attempts to transition gp from _Gpreempted to 1244 // _Gwaiting. If successful, the caller is responsible for 1245 // re-scheduling gp. 1246 func casGFromPreempted(gp *g, old, new uint32) bool { 1247 if old != _Gpreempted || new != _Gwaiting { 1248 throw("bad g transition") 1249 } 1250 gp.waitreason = waitReasonPreempted 1251 return gp.atomicstatus.CompareAndSwap(_Gpreempted, _Gwaiting) 1252 } 1253 1254 // stwReason is an enumeration of reasons the world is stopping. 1255 type stwReason uint8 1256 1257 // Reasons to stop-the-world. 1258 // 1259 // Avoid reusing reasons and add new ones instead. 1260 const ( 1261 stwUnknown stwReason = iota // "unknown" 1262 stwGCMarkTerm // "GC mark termination" 1263 stwGCSweepTerm // "GC sweep termination" 1264 stwWriteHeapDump // "write heap dump" 1265 stwGoroutineProfile // "goroutine profile" 1266 stwGoroutineProfileCleanup // "goroutine profile cleanup" 1267 stwAllGoroutinesStack // "all goroutines stack trace" 1268 stwReadMemStats // "read mem stats" 1269 stwAllThreadsSyscall // "AllThreadsSyscall" 1270 stwGOMAXPROCS // "GOMAXPROCS" 1271 stwStartTrace // "start trace" 1272 stwStopTrace // "stop trace" 1273 stwForTestCountPagesInUse // "CountPagesInUse (test)" 1274 stwForTestReadMetricsSlow // "ReadMetricsSlow (test)" 1275 stwForTestReadMemStatsSlow // "ReadMemStatsSlow (test)" 1276 stwForTestPageCachePagesLeaked // "PageCachePagesLeaked (test)" 1277 stwForTestResetDebugLog // "ResetDebugLog (test)" 1278 ) 1279 1280 func (r stwReason) String() string { 1281 return stwReasonStrings[r] 1282 } 1283 1284 func (r stwReason) isGC() bool { 1285 return r == stwGCMarkTerm || r == stwGCSweepTerm 1286 } 1287 1288 // If you add to this list, also add it to src/internal/trace/parser.go. 1289 // If you change the values of any of the stw* constants, bump the trace 1290 // version number and make a copy of this. 1291 var stwReasonStrings = [...]string{ 1292 stwUnknown: "unknown", 1293 stwGCMarkTerm: "GC mark termination", 1294 stwGCSweepTerm: "GC sweep termination", 1295 stwWriteHeapDump: "write heap dump", 1296 stwGoroutineProfile: "goroutine profile", 1297 stwGoroutineProfileCleanup: "goroutine profile cleanup", 1298 stwAllGoroutinesStack: "all goroutines stack trace", 1299 stwReadMemStats: "read mem stats", 1300 stwAllThreadsSyscall: "AllThreadsSyscall", 1301 stwGOMAXPROCS: "GOMAXPROCS", 1302 stwStartTrace: "start trace", 1303 stwStopTrace: "stop trace", 1304 stwForTestCountPagesInUse: "CountPagesInUse (test)", 1305 stwForTestReadMetricsSlow: "ReadMetricsSlow (test)", 1306 stwForTestReadMemStatsSlow: "ReadMemStatsSlow (test)", 1307 stwForTestPageCachePagesLeaked: "PageCachePagesLeaked (test)", 1308 stwForTestResetDebugLog: "ResetDebugLog (test)", 1309 } 1310 1311 // worldStop provides context from the stop-the-world required by the 1312 // start-the-world. 1313 type worldStop struct { 1314 reason stwReason 1315 start int64 1316 } 1317 1318 // Temporary variable for stopTheWorld, when it can't write to the stack. 1319 // 1320 // Protected by worldsema. 1321 var stopTheWorldContext worldStop 1322 1323 // stopTheWorld stops all P's from executing goroutines, interrupting 1324 // all goroutines at GC safe points and records reason as the reason 1325 // for the stop. On return, only the current goroutine's P is running. 1326 // stopTheWorld must not be called from a system stack and the caller 1327 // must not hold worldsema. The caller must call startTheWorld when 1328 // other P's should resume execution. 1329 // 1330 // stopTheWorld is safe for multiple goroutines to call at the 1331 // same time. Each will execute its own stop, and the stops will 1332 // be serialized. 1333 // 1334 // This is also used by routines that do stack dumps. If the system is 1335 // in panic or being exited, this may not reliably stop all 1336 // goroutines. 1337 // 1338 // Returns the STW context. When starting the world, this context must be 1339 // passed to startTheWorld. 1340 func stopTheWorld(reason stwReason) worldStop { 1341 semacquire(&worldsema) 1342 gp := getg() 1343 gp.m.preemptoff = reason.String() 1344 systemstack(func() { 1345 // Mark the goroutine which called stopTheWorld preemptible so its 1346 // stack may be scanned. 1347 // This lets a mark worker scan us while we try to stop the world 1348 // since otherwise we could get in a mutual preemption deadlock. 1349 // We must not modify anything on the G stack because a stack shrink 1350 // may occur. A stack shrink is otherwise OK though because in order 1351 // to return from this function (and to leave the system stack) we 1352 // must have preempted all goroutines, including any attempting 1353 // to scan our stack, in which case, any stack shrinking will 1354 // have already completed by the time we exit. 1355 // 1356 // N.B. The execution tracer is not aware of this status 1357 // transition and handles it specially based on the 1358 // wait reason. 1359 casGToWaiting(gp, _Grunning, waitReasonStoppingTheWorld) 1360 stopTheWorldContext = stopTheWorldWithSema(reason) // avoid write to stack 1361 casgstatus(gp, _Gwaiting, _Grunning) 1362 }) 1363 return stopTheWorldContext 1364 } 1365 1366 // startTheWorld undoes the effects of stopTheWorld. 1367 // 1368 // w must be the worldStop returned by stopTheWorld. 1369 func startTheWorld(w worldStop) { 1370 systemstack(func() { startTheWorldWithSema(0, w) }) 1371 1372 // worldsema must be held over startTheWorldWithSema to ensure 1373 // gomaxprocs cannot change while worldsema is held. 1374 // 1375 // Release worldsema with direct handoff to the next waiter, but 1376 // acquirem so that semrelease1 doesn't try to yield our time. 1377 // 1378 // Otherwise if e.g. ReadMemStats is being called in a loop, 1379 // it might stomp on other attempts to stop the world, such as 1380 // for starting or ending GC. The operation this blocks is 1381 // so heavy-weight that we should just try to be as fair as 1382 // possible here. 1383 // 1384 // We don't want to just allow us to get preempted between now 1385 // and releasing the semaphore because then we keep everyone 1386 // (including, for example, GCs) waiting longer. 1387 mp := acquirem() 1388 mp.preemptoff = "" 1389 semrelease1(&worldsema, true, 0) 1390 releasem(mp) 1391 } 1392 1393 // stopTheWorldGC has the same effect as stopTheWorld, but blocks 1394 // until the GC is not running. It also blocks a GC from starting 1395 // until startTheWorldGC is called. 1396 func stopTheWorldGC(reason stwReason) worldStop { 1397 semacquire(&gcsema) 1398 return stopTheWorld(reason) 1399 } 1400 1401 // startTheWorldGC undoes the effects of stopTheWorldGC. 1402 // 1403 // w must be the worldStop returned by stopTheWorld. 1404 func startTheWorldGC(w worldStop) { 1405 startTheWorld(w) 1406 semrelease(&gcsema) 1407 } 1408 1409 // Holding worldsema grants an M the right to try to stop the world. 1410 var worldsema uint32 = 1 1411 1412 // Holding gcsema grants the M the right to block a GC, and blocks 1413 // until the current GC is done. In particular, it prevents gomaxprocs 1414 // from changing concurrently. 1415 // 1416 // TODO(mknyszek): Once gomaxprocs and the execution tracer can handle 1417 // being changed/enabled during a GC, remove this. 1418 var gcsema uint32 = 1 1419 1420 // stopTheWorldWithSema is the core implementation of stopTheWorld. 1421 // The caller is responsible for acquiring worldsema and disabling 1422 // preemption first and then should stopTheWorldWithSema on the system 1423 // stack: 1424 // 1425 // semacquire(&worldsema, 0) 1426 // m.preemptoff = "reason" 1427 // var stw worldStop 1428 // systemstack(func() { 1429 // stw = stopTheWorldWithSema(reason) 1430 // }) 1431 // 1432 // When finished, the caller must either call startTheWorld or undo 1433 // these three operations separately: 1434 // 1435 // m.preemptoff = "" 1436 // systemstack(func() { 1437 // now = startTheWorldWithSema(stw) 1438 // }) 1439 // semrelease(&worldsema) 1440 // 1441 // It is allowed to acquire worldsema once and then execute multiple 1442 // startTheWorldWithSema/stopTheWorldWithSema pairs. 1443 // Other P's are able to execute between successive calls to 1444 // startTheWorldWithSema and stopTheWorldWithSema. 1445 // Holding worldsema causes any other goroutines invoking 1446 // stopTheWorld to block. 1447 // 1448 // Returns the STW context. When starting the world, this context must be 1449 // passed to startTheWorldWithSema. 1450 func stopTheWorldWithSema(reason stwReason) worldStop { 1451 trace := traceAcquire() 1452 if trace.ok() { 1453 trace.STWStart(reason) 1454 traceRelease(trace) 1455 } 1456 gp := getg() 1457 1458 // If we hold a lock, then we won't be able to stop another M 1459 // that is blocked trying to acquire the lock. 1460 if gp.m.locks > 0 { 1461 throw("stopTheWorld: holding locks") 1462 } 1463 1464 lock(&sched.lock) 1465 start := nanotime() // exclude time waiting for sched.lock from start and total time metrics. 1466 sched.stopwait = gomaxprocs 1467 sched.gcwaiting.Store(true) 1468 preemptall() 1469 // stop current P 1470 gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. 1471 sched.stopwait-- 1472 // try to retake all P's in Psyscall status 1473 trace = traceAcquire() 1474 for _, pp := range allp { 1475 s := pp.status 1476 if s == _Psyscall && atomic.Cas(&pp.status, s, _Pgcstop) { 1477 if trace.ok() { 1478 trace.GoSysBlock(pp) 1479 trace.ProcSteal(pp, false) 1480 } 1481 pp.syscalltick++ 1482 sched.stopwait-- 1483 } 1484 } 1485 if trace.ok() { 1486 traceRelease(trace) 1487 } 1488 1489 // stop idle P's 1490 now := nanotime() 1491 for { 1492 pp, _ := pidleget(now) 1493 if pp == nil { 1494 break 1495 } 1496 pp.status = _Pgcstop 1497 sched.stopwait-- 1498 } 1499 wait := sched.stopwait > 0 1500 unlock(&sched.lock) 1501 1502 // wait for remaining P's to stop voluntarily 1503 if wait { 1504 for { 1505 // wait for 100us, then try to re-preempt in case of any races 1506 if notetsleep(&sched.stopnote, 100*1000) { 1507 noteclear(&sched.stopnote) 1508 break 1509 } 1510 preemptall() 1511 } 1512 } 1513 1514 startTime := nanotime() - start 1515 if reason.isGC() { 1516 sched.stwStoppingTimeGC.record(startTime) 1517 } else { 1518 sched.stwStoppingTimeOther.record(startTime) 1519 } 1520 1521 // sanity checks 1522 bad := "" 1523 if sched.stopwait != 0 { 1524 bad = "stopTheWorld: not stopped (stopwait != 0)" 1525 } else { 1526 for _, pp := range allp { 1527 if pp.status != _Pgcstop { 1528 bad = "stopTheWorld: not stopped (status != _Pgcstop)" 1529 } 1530 } 1531 } 1532 if freezing.Load() { 1533 // Some other thread is panicking. This can cause the 1534 // sanity checks above to fail if the panic happens in 1535 // the signal handler on a stopped thread. Either way, 1536 // we should halt this thread. 1537 lock(&deadlock) 1538 lock(&deadlock) 1539 } 1540 if bad != "" { 1541 throw(bad) 1542 } 1543 1544 worldStopped() 1545 1546 return worldStop{reason: reason, start: start} 1547 } 1548 1549 // reason is the same STW reason passed to stopTheWorld. start is the start 1550 // time returned by stopTheWorld. 1551 // 1552 // now is the current time; prefer to pass 0 to capture a fresh timestamp. 1553 // 1554 // stattTheWorldWithSema returns now. 1555 func startTheWorldWithSema(now int64, w worldStop) int64 { 1556 assertWorldStopped() 1557 1558 mp := acquirem() // disable preemption because it can be holding p in a local var 1559 if netpollinited() { 1560 list, delta := netpoll(0) // non-blocking 1561 injectglist(&list) 1562 netpollAdjustWaiters(delta) 1563 } 1564 lock(&sched.lock) 1565 1566 procs := gomaxprocs 1567 if newprocs != 0 { 1568 procs = newprocs 1569 newprocs = 0 1570 } 1571 p1 := procresize(procs) 1572 sched.gcwaiting.Store(false) 1573 if sched.sysmonwait.Load() { 1574 sched.sysmonwait.Store(false) 1575 notewakeup(&sched.sysmonnote) 1576 } 1577 unlock(&sched.lock) 1578 1579 worldStarted() 1580 1581 for p1 != nil { 1582 p := p1 1583 p1 = p1.link.ptr() 1584 if p.m != 0 { 1585 mp := p.m.ptr() 1586 p.m = 0 1587 if mp.nextp != 0 { 1588 throw("startTheWorld: inconsistent mp->nextp") 1589 } 1590 mp.nextp.set(p) 1591 notewakeup(&mp.park) 1592 } else { 1593 // Start M to run P. Do not start another M below. 1594 newm(nil, p, -1) 1595 } 1596 } 1597 1598 // Capture start-the-world time before doing clean-up tasks. 1599 if now == 0 { 1600 now = nanotime() 1601 } 1602 totalTime := now - w.start 1603 if w.reason.isGC() { 1604 sched.stwTotalTimeGC.record(totalTime) 1605 } else { 1606 sched.stwTotalTimeOther.record(totalTime) 1607 } 1608 trace := traceAcquire() 1609 if trace.ok() { 1610 trace.STWDone() 1611 traceRelease(trace) 1612 } 1613 1614 // Wakeup an additional proc in case we have excessive runnable goroutines 1615 // in local queues or in the global queue. If we don't, the proc will park itself. 1616 // If we have lots of excessive work, resetspinning will unpark additional procs as necessary. 1617 wakep() 1618 1619 releasem(mp) 1620 1621 return now 1622 } 1623 1624 // usesLibcall indicates whether this runtime performs system calls 1625 // via libcall. 1626 func usesLibcall() bool { 1627 switch GOOS { 1628 case "aix", "darwin", "illumos", "ios", "solaris", "windows": 1629 return true 1630 case "openbsd": 1631 return GOARCH != "mips64" 1632 } 1633 return false 1634 } 1635 1636 // mStackIsSystemAllocated indicates whether this runtime starts on a 1637 // system-allocated stack. 1638 func mStackIsSystemAllocated() bool { 1639 switch GOOS { 1640 case "aix", "darwin", "plan9", "illumos", "ios", "solaris", "windows": 1641 return true 1642 case "openbsd": 1643 return GOARCH != "mips64" 1644 } 1645 return false 1646 } 1647 1648 // mstart is the entry-point for new Ms. 1649 // It is written in assembly, uses ABI0, is marked TOPFRAME, and calls mstart0. 1650 func mstart() 1651 1652 // mstart0 is the Go entry-point for new Ms. 1653 // This must not split the stack because we may not even have stack 1654 // bounds set up yet. 1655 // 1656 // May run during STW (because it doesn't have a P yet), so write 1657 // barriers are not allowed. 1658 // 1659 //go:nosplit 1660 //go:nowritebarrierrec 1661 func mstart0() { 1662 gp := getg() 1663 1664 osStack := gp.stack.lo == 0 1665 if osStack { 1666 // Initialize stack bounds from system stack. 1667 // Cgo may have left stack size in stack.hi. 1668 // minit may update the stack bounds. 1669 // 1670 // Note: these bounds may not be very accurate. 1671 // We set hi to &size, but there are things above 1672 // it. The 1024 is supposed to compensate this, 1673 // but is somewhat arbitrary. 1674 size := gp.stack.hi 1675 if size == 0 { 1676 size = 16384 * sys.StackGuardMultiplier 1677 } 1678 gp.stack.hi = uintptr(noescape(unsafe.Pointer(&size))) 1679 gp.stack.lo = gp.stack.hi - size + 1024 1680 } 1681 // Initialize stack guard so that we can start calling regular 1682 // Go code. 1683 gp.stackguard0 = gp.stack.lo + stackGuard 1684 // This is the g0, so we can also call go:systemstack 1685 // functions, which check stackguard1. 1686 gp.stackguard1 = gp.stackguard0 1687 mstart1() 1688 1689 // Exit this thread. 1690 if mStackIsSystemAllocated() { 1691 // Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate 1692 // the stack, but put it in gp.stack before mstart, 1693 // so the logic above hasn't set osStack yet. 1694 osStack = true 1695 } 1696 mexit(osStack) 1697 } 1698 1699 // The go:noinline is to guarantee the getcallerpc/getcallersp below are safe, 1700 // so that we can set up g0.sched to return to the call of mstart1 above. 1701 // 1702 //go:noinline 1703 func mstart1() { 1704 gp := getg() 1705 1706 if gp != gp.m.g0 { 1707 throw("bad runtime·mstart") 1708 } 1709 1710 // Set up m.g0.sched as a label returning to just 1711 // after the mstart1 call in mstart0 above, for use by goexit0 and mcall. 1712 // We're never coming back to mstart1 after we call schedule, 1713 // so other calls can reuse the current frame. 1714 // And goexit0 does a gogo that needs to return from mstart1 1715 // and let mstart0 exit the thread. 1716 gp.sched.g = guintptr(unsafe.Pointer(gp)) 1717 gp.sched.pc = getcallerpc() 1718 gp.sched.sp = getcallersp() 1719 1720 asminit() 1721 minit() 1722 1723 // Install signal handlers; after minit so that minit can 1724 // prepare the thread to be able to handle the signals. 1725 if gp.m == &m0 { 1726 mstartm0() 1727 } 1728 1729 if fn := gp.m.mstartfn; fn != nil { 1730 fn() 1731 } 1732 1733 if gp.m != &m0 { 1734 acquirep(gp.m.nextp.ptr()) 1735 gp.m.nextp = 0 1736 } 1737 schedule() 1738 } 1739 1740 // mstartm0 implements part of mstart1 that only runs on the m0. 1741 // 1742 // Write barriers are allowed here because we know the GC can't be 1743 // running yet, so they'll be no-ops. 1744 // 1745 //go:yeswritebarrierrec 1746 func mstartm0() { 1747 // Create an extra M for callbacks on threads not created by Go. 1748 // An extra M is also needed on Windows for callbacks created by 1749 // syscall.NewCallback. See issue #6751 for details. 1750 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 1751 cgoHasExtraM = true 1752 newextram() 1753 } 1754 initsig(false) 1755 } 1756 1757 // mPark causes a thread to park itself, returning once woken. 1758 // 1759 //go:nosplit 1760 func mPark() { 1761 gp := getg() 1762 notesleep(&gp.m.park) 1763 noteclear(&gp.m.park) 1764 } 1765 1766 // mexit tears down and exits the current thread. 1767 // 1768 // Don't call this directly to exit the thread, since it must run at 1769 // the top of the thread stack. Instead, use gogo(&gp.m.g0.sched) to 1770 // unwind the stack to the point that exits the thread. 1771 // 1772 // It is entered with m.p != nil, so write barriers are allowed. It 1773 // will release the P before exiting. 1774 // 1775 //go:yeswritebarrierrec 1776 func mexit(osStack bool) { 1777 mp := getg().m 1778 1779 if mp == &m0 { 1780 // This is the main thread. Just wedge it. 1781 // 1782 // On Linux, exiting the main thread puts the process 1783 // into a non-waitable zombie state. On Plan 9, 1784 // exiting the main thread unblocks wait even though 1785 // other threads are still running. On Solaris we can 1786 // neither exitThread nor return from mstart. Other 1787 // bad things probably happen on other platforms. 1788 // 1789 // We could try to clean up this M more before wedging 1790 // it, but that complicates signal handling. 1791 handoffp(releasep()) 1792 lock(&sched.lock) 1793 sched.nmfreed++ 1794 checkdead() 1795 unlock(&sched.lock) 1796 mPark() 1797 throw("locked m0 woke up") 1798 } 1799 1800 sigblock(true) 1801 unminit() 1802 1803 // Free the gsignal stack. 1804 if mp.gsignal != nil { 1805 stackfree(mp.gsignal.stack) 1806 // On some platforms, when calling into VDSO (e.g. nanotime) 1807 // we store our g on the gsignal stack, if there is one. 1808 // Now the stack is freed, unlink it from the m, so we 1809 // won't write to it when calling VDSO code. 1810 mp.gsignal = nil 1811 } 1812 1813 // Remove m from allm. 1814 lock(&sched.lock) 1815 for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink { 1816 if *pprev == mp { 1817 *pprev = mp.alllink 1818 goto found 1819 } 1820 } 1821 throw("m not found in allm") 1822 found: 1823 // Events must not be traced after this point. 1824 1825 // Delay reaping m until it's done with the stack. 1826 // 1827 // Put mp on the free list, though it will not be reaped while freeWait 1828 // is freeMWait. mp is no longer reachable via allm, so even if it is 1829 // on an OS stack, we must keep a reference to mp alive so that the GC 1830 // doesn't free mp while we are still using it. 1831 // 1832 // Note that the free list must not be linked through alllink because 1833 // some functions walk allm without locking, so may be using alllink. 1834 // 1835 // N.B. It's important that the M appears on the free list simultaneously 1836 // with it being removed so that the tracer can find it. 1837 mp.freeWait.Store(freeMWait) 1838 mp.freelink = sched.freem 1839 sched.freem = mp 1840 unlock(&sched.lock) 1841 1842 atomic.Xadd64(&ncgocall, int64(mp.ncgocall)) 1843 sched.totalRuntimeLockWaitTime.Add(mp.mLockProfile.waitTime.Load()) 1844 1845 // Release the P. 1846 handoffp(releasep()) 1847 // After this point we must not have write barriers. 1848 1849 // Invoke the deadlock detector. This must happen after 1850 // handoffp because it may have started a new M to take our 1851 // P's work. 1852 lock(&sched.lock) 1853 sched.nmfreed++ 1854 checkdead() 1855 unlock(&sched.lock) 1856 1857 if GOOS == "darwin" || GOOS == "ios" { 1858 // Make sure pendingPreemptSignals is correct when an M exits. 1859 // For #41702. 1860 if mp.signalPending.Load() != 0 { 1861 pendingPreemptSignals.Add(-1) 1862 } 1863 } 1864 1865 // Destroy all allocated resources. After this is called, we may no 1866 // longer take any locks. 1867 mdestroy(mp) 1868 1869 if osStack { 1870 // No more uses of mp, so it is safe to drop the reference. 1871 mp.freeWait.Store(freeMRef) 1872 1873 // Return from mstart and let the system thread 1874 // library free the g0 stack and terminate the thread. 1875 return 1876 } 1877 1878 // mstart is the thread's entry point, so there's nothing to 1879 // return to. Exit the thread directly. exitThread will clear 1880 // m.freeWait when it's done with the stack and the m can be 1881 // reaped. 1882 exitThread(&mp.freeWait) 1883 } 1884 1885 // forEachP calls fn(p) for every P p when p reaches a GC safe point. 1886 // If a P is currently executing code, this will bring the P to a GC 1887 // safe point and execute fn on that P. If the P is not executing code 1888 // (it is idle or in a syscall), this will call fn(p) directly while 1889 // preventing the P from exiting its state. This does not ensure that 1890 // fn will run on every CPU executing Go code, but it acts as a global 1891 // memory barrier. GC uses this as a "ragged barrier." 1892 // 1893 // The caller must hold worldsema. fn must not refer to any 1894 // part of the current goroutine's stack, since the GC may move it. 1895 func forEachP(reason waitReason, fn func(*p)) { 1896 systemstack(func() { 1897 gp := getg().m.curg 1898 // Mark the user stack as preemptible so that it may be scanned. 1899 // Otherwise, our attempt to force all P's to a safepoint could 1900 // result in a deadlock as we attempt to preempt a worker that's 1901 // trying to preempt us (e.g. for a stack scan). 1902 // 1903 // N.B. The execution tracer is not aware of this status 1904 // transition and handles it specially based on the 1905 // wait reason. 1906 casGToWaiting(gp, _Grunning, reason) 1907 forEachPInternal(fn) 1908 casgstatus(gp, _Gwaiting, _Grunning) 1909 }) 1910 } 1911 1912 // forEachPInternal calls fn(p) for every P p when p reaches a GC safe point. 1913 // It is the internal implementation of forEachP. 1914 // 1915 // The caller must hold worldsema and either must ensure that a GC is not 1916 // running (otherwise this may deadlock with the GC trying to preempt this P) 1917 // or it must leave its goroutine in a preemptible state before it switches 1918 // to the systemstack. Due to these restrictions, prefer forEachP when possible. 1919 // 1920 //go:systemstack 1921 func forEachPInternal(fn func(*p)) { 1922 mp := acquirem() 1923 pp := getg().m.p.ptr() 1924 1925 lock(&sched.lock) 1926 if sched.safePointWait != 0 { 1927 throw("forEachP: sched.safePointWait != 0") 1928 } 1929 sched.safePointWait = gomaxprocs - 1 1930 sched.safePointFn = fn 1931 1932 // Ask all Ps to run the safe point function. 1933 for _, p2 := range allp { 1934 if p2 != pp { 1935 atomic.Store(&p2.runSafePointFn, 1) 1936 } 1937 } 1938 preemptall() 1939 1940 // Any P entering _Pidle or _Psyscall from now on will observe 1941 // p.runSafePointFn == 1 and will call runSafePointFn when 1942 // changing its status to _Pidle/_Psyscall. 1943 1944 // Run safe point function for all idle Ps. sched.pidle will 1945 // not change because we hold sched.lock. 1946 for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() { 1947 if atomic.Cas(&p.runSafePointFn, 1, 0) { 1948 fn(p) 1949 sched.safePointWait-- 1950 } 1951 } 1952 1953 wait := sched.safePointWait > 0 1954 unlock(&sched.lock) 1955 1956 // Run fn for the current P. 1957 fn(pp) 1958 1959 // Force Ps currently in _Psyscall into _Pidle and hand them 1960 // off to induce safe point function execution. 1961 for _, p2 := range allp { 1962 s := p2.status 1963 1964 // We need to be fine-grained about tracing here, since handoffp 1965 // might call into the tracer, and the tracer is non-reentrant. 1966 trace := traceAcquire() 1967 if s == _Psyscall && p2.runSafePointFn == 1 && atomic.Cas(&p2.status, s, _Pidle) { 1968 if trace.ok() { 1969 // It's important that we traceRelease before we call handoffp, which may also traceAcquire. 1970 trace.GoSysBlock(p2) 1971 trace.ProcSteal(p2, false) 1972 traceRelease(trace) 1973 } 1974 p2.syscalltick++ 1975 handoffp(p2) 1976 } else if trace.ok() { 1977 traceRelease(trace) 1978 } 1979 } 1980 1981 // Wait for remaining Ps to run fn. 1982 if wait { 1983 for { 1984 // Wait for 100us, then try to re-preempt in 1985 // case of any races. 1986 // 1987 // Requires system stack. 1988 if notetsleep(&sched.safePointNote, 100*1000) { 1989 noteclear(&sched.safePointNote) 1990 break 1991 } 1992 preemptall() 1993 } 1994 } 1995 if sched.safePointWait != 0 { 1996 throw("forEachP: not done") 1997 } 1998 for _, p2 := range allp { 1999 if p2.runSafePointFn != 0 { 2000 throw("forEachP: P did not run fn") 2001 } 2002 } 2003 2004 lock(&sched.lock) 2005 sched.safePointFn = nil 2006 unlock(&sched.lock) 2007 releasem(mp) 2008 } 2009 2010 // runSafePointFn runs the safe point function, if any, for this P. 2011 // This should be called like 2012 // 2013 // if getg().m.p.runSafePointFn != 0 { 2014 // runSafePointFn() 2015 // } 2016 // 2017 // runSafePointFn must be checked on any transition in to _Pidle or 2018 // _Psyscall to avoid a race where forEachP sees that the P is running 2019 // just before the P goes into _Pidle/_Psyscall and neither forEachP 2020 // nor the P run the safe-point function. 2021 func runSafePointFn() { 2022 p := getg().m.p.ptr() 2023 // Resolve the race between forEachP running the safe-point 2024 // function on this P's behalf and this P running the 2025 // safe-point function directly. 2026 if !atomic.Cas(&p.runSafePointFn, 1, 0) { 2027 return 2028 } 2029 sched.safePointFn(p) 2030 lock(&sched.lock) 2031 sched.safePointWait-- 2032 if sched.safePointWait == 0 { 2033 notewakeup(&sched.safePointNote) 2034 } 2035 unlock(&sched.lock) 2036 } 2037 2038 // When running with cgo, we call _cgo_thread_start 2039 // to start threads for us so that we can play nicely with 2040 // foreign code. 2041 var cgoThreadStart unsafe.Pointer 2042 2043 type cgothreadstart struct { 2044 g guintptr 2045 tls *uint64 2046 fn unsafe.Pointer 2047 } 2048 2049 // Allocate a new m unassociated with any thread. 2050 // Can use p for allocation context if needed. 2051 // fn is recorded as the new m's m.mstartfn. 2052 // id is optional pre-allocated m ID. Omit by passing -1. 2053 // 2054 // This function is allowed to have write barriers even if the caller 2055 // isn't because it borrows pp. 2056 // 2057 //go:yeswritebarrierrec 2058 func allocm(pp *p, fn func(), id int64) *m { 2059 allocmLock.rlock() 2060 2061 // The caller owns pp, but we may borrow (i.e., acquirep) it. We must 2062 // disable preemption to ensure it is not stolen, which would make the 2063 // caller lose ownership. 2064 acquirem() 2065 2066 gp := getg() 2067 if gp.m.p == 0 { 2068 acquirep(pp) // temporarily borrow p for mallocs in this function 2069 } 2070 2071 // Release the free M list. We need to do this somewhere and 2072 // this may free up a stack we can use. 2073 if sched.freem != nil { 2074 lock(&sched.lock) 2075 var newList *m 2076 for freem := sched.freem; freem != nil; { 2077 // Wait for freeWait to indicate that freem's stack is unused. 2078 wait := freem.freeWait.Load() 2079 if wait == freeMWait { 2080 next := freem.freelink 2081 freem.freelink = newList 2082 newList = freem 2083 freem = next 2084 continue 2085 } 2086 // Drop any remaining trace resources. 2087 // Ms can continue to emit events all the way until wait != freeMWait, 2088 // so it's only safe to call traceThreadDestroy at this point. 2089 if traceEnabled() || traceShuttingDown() { 2090 traceThreadDestroy(freem) 2091 } 2092 // Free the stack if needed. For freeMRef, there is 2093 // nothing to do except drop freem from the sched.freem 2094 // list. 2095 if wait == freeMStack { 2096 // stackfree must be on the system stack, but allocm is 2097 // reachable off the system stack transitively from 2098 // startm. 2099 systemstack(func() { 2100 stackfree(freem.g0.stack) 2101 }) 2102 } 2103 freem = freem.freelink 2104 } 2105 sched.freem = newList 2106 unlock(&sched.lock) 2107 } 2108 2109 mp := new(m) 2110 mp.mstartfn = fn 2111 mcommoninit(mp, id) 2112 2113 // In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack. 2114 // Windows and Plan 9 will layout sched stack on OS stack. 2115 if iscgo || mStackIsSystemAllocated() { 2116 mp.g0 = malg(-1) 2117 } else { 2118 mp.g0 = malg(16384 * sys.StackGuardMultiplier) 2119 } 2120 mp.g0.m = mp 2121 2122 if pp == gp.m.p.ptr() { 2123 releasep() 2124 } 2125 2126 releasem(gp.m) 2127 allocmLock.runlock() 2128 return mp 2129 } 2130 2131 // needm is called when a cgo callback happens on a 2132 // thread without an m (a thread not created by Go). 2133 // In this case, needm is expected to find an m to use 2134 // and return with m, g initialized correctly. 2135 // Since m and g are not set now (likely nil, but see below) 2136 // needm is limited in what routines it can call. In particular 2137 // it can only call nosplit functions (textflag 7) and cannot 2138 // do any scheduling that requires an m. 2139 // 2140 // In order to avoid needing heavy lifting here, we adopt 2141 // the following strategy: there is a stack of available m's 2142 // that can be stolen. Using compare-and-swap 2143 // to pop from the stack has ABA races, so we simulate 2144 // a lock by doing an exchange (via Casuintptr) to steal the stack 2145 // head and replace the top pointer with MLOCKED (1). 2146 // This serves as a simple spin lock that we can use even 2147 // without an m. The thread that locks the stack in this way 2148 // unlocks the stack by storing a valid stack head pointer. 2149 // 2150 // In order to make sure that there is always an m structure 2151 // available to be stolen, we maintain the invariant that there 2152 // is always one more than needed. At the beginning of the 2153 // program (if cgo is in use) the list is seeded with a single m. 2154 // If needm finds that it has taken the last m off the list, its job 2155 // is - once it has installed its own m so that it can do things like 2156 // allocate memory - to create a spare m and put it on the list. 2157 // 2158 // Each of these extra m's also has a g0 and a curg that are 2159 // pressed into service as the scheduling stack and current 2160 // goroutine for the duration of the cgo callback. 2161 // 2162 // It calls dropm to put the m back on the list, 2163 // 1. when the callback is done with the m in non-pthread platforms, 2164 // 2. or when the C thread exiting on pthread platforms. 2165 // 2166 // The signal argument indicates whether we're called from a signal 2167 // handler. 2168 // 2169 //go:nosplit 2170 func needm(signal bool) { 2171 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 2172 // Can happen if C/C++ code calls Go from a global ctor. 2173 // Can also happen on Windows if a global ctor uses a 2174 // callback created by syscall.NewCallback. See issue #6751 2175 // for details. 2176 // 2177 // Can not throw, because scheduler is not initialized yet. 2178 writeErrStr("fatal error: cgo callback before cgo call\n") 2179 exit(1) 2180 } 2181 2182 // Save and block signals before getting an M. 2183 // The signal handler may call needm itself, 2184 // and we must avoid a deadlock. Also, once g is installed, 2185 // any incoming signals will try to execute, 2186 // but we won't have the sigaltstack settings and other data 2187 // set up appropriately until the end of minit, which will 2188 // unblock the signals. This is the same dance as when 2189 // starting a new m to run Go code via newosproc. 2190 var sigmask sigset 2191 sigsave(&sigmask) 2192 sigblock(false) 2193 2194 // getExtraM is safe here because of the invariant above, 2195 // that the extra list always contains or will soon contain 2196 // at least one m. 2197 mp, last := getExtraM() 2198 2199 // Set needextram when we've just emptied the list, 2200 // so that the eventual call into cgocallbackg will 2201 // allocate a new m for the extra list. We delay the 2202 // allocation until then so that it can be done 2203 // after exitsyscall makes sure it is okay to be 2204 // running at all (that is, there's no garbage collection 2205 // running right now). 2206 mp.needextram = last 2207 2208 // Store the original signal mask for use by minit. 2209 mp.sigmask = sigmask 2210 2211 // Install TLS on some platforms (previously setg 2212 // would do this if necessary). 2213 osSetupTLS(mp) 2214 2215 // Install g (= m->g0) and set the stack bounds 2216 // to match the current stack. 2217 setg(mp.g0) 2218 sp := getcallersp() 2219 callbackUpdateSystemStack(mp, sp, signal) 2220 2221 // Should mark we are already in Go now. 2222 // Otherwise, we may call needm again when we get a signal, before cgocallbackg1, 2223 // which means the extram list may be empty, that will cause a deadlock. 2224 mp.isExtraInC = false 2225 2226 // Initialize this thread to use the m. 2227 asminit() 2228 minit() 2229 2230 // Emit a trace event for this dead -> syscall transition, 2231 // but only in the new tracer and only if we're not in a signal handler. 2232 // 2233 // N.B. the tracer can run on a bare M just fine, we just have 2234 // to make sure to do this before setg(nil) and unminit. 2235 var trace traceLocker 2236 if goexperiment.ExecTracer2 && !signal { 2237 trace = traceAcquire() 2238 } 2239 2240 // mp.curg is now a real goroutine. 2241 casgstatus(mp.curg, _Gdead, _Gsyscall) 2242 sched.ngsys.Add(-1) 2243 2244 if goexperiment.ExecTracer2 && !signal { 2245 if trace.ok() { 2246 trace.GoCreateSyscall(mp.curg) 2247 traceRelease(trace) 2248 } 2249 } 2250 mp.isExtraInSig = signal 2251 } 2252 2253 // Acquire an extra m and bind it to the C thread when a pthread key has been created. 2254 // 2255 //go:nosplit 2256 func needAndBindM() { 2257 needm(false) 2258 2259 if _cgo_pthread_key_created != nil && *(*uintptr)(_cgo_pthread_key_created) != 0 { 2260 cgoBindM() 2261 } 2262 } 2263 2264 // newextram allocates m's and puts them on the extra list. 2265 // It is called with a working local m, so that it can do things 2266 // like call schedlock and allocate. 2267 func newextram() { 2268 c := extraMWaiters.Swap(0) 2269 if c > 0 { 2270 for i := uint32(0); i < c; i++ { 2271 oneNewExtraM() 2272 } 2273 } else if extraMLength.Load() == 0 { 2274 // Make sure there is at least one extra M. 2275 oneNewExtraM() 2276 } 2277 } 2278 2279 // oneNewExtraM allocates an m and puts it on the extra list. 2280 func oneNewExtraM() { 2281 // Create extra goroutine locked to extra m. 2282 // The goroutine is the context in which the cgo callback will run. 2283 // The sched.pc will never be returned to, but setting it to 2284 // goexit makes clear to the traceback routines where 2285 // the goroutine stack ends. 2286 mp := allocm(nil, nil, -1) 2287 gp := malg(4096) 2288 gp.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum 2289 gp.sched.sp = gp.stack.hi 2290 gp.sched.sp -= 4 * goarch.PtrSize // extra space in case of reads slightly beyond frame 2291 gp.sched.lr = 0 2292 gp.sched.g = guintptr(unsafe.Pointer(gp)) 2293 gp.syscallpc = gp.sched.pc 2294 gp.syscallsp = gp.sched.sp 2295 gp.stktopsp = gp.sched.sp 2296 // malg returns status as _Gidle. Change to _Gdead before 2297 // adding to allg where GC can see it. We use _Gdead to hide 2298 // this from tracebacks and stack scans since it isn't a 2299 // "real" goroutine until needm grabs it. 2300 casgstatus(gp, _Gidle, _Gdead) 2301 gp.m = mp 2302 mp.curg = gp 2303 mp.isextra = true 2304 // mark we are in C by default. 2305 mp.isExtraInC = true 2306 mp.lockedInt++ 2307 mp.lockedg.set(gp) 2308 gp.lockedm.set(mp) 2309 gp.goid = sched.goidgen.Add(1) 2310 if raceenabled { 2311 gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum) 2312 } 2313 trace := traceAcquire() 2314 if trace.ok() { 2315 trace.OneNewExtraM(gp) 2316 traceRelease(trace) 2317 } 2318 // put on allg for garbage collector 2319 allgadd(gp) 2320 2321 // gp is now on the allg list, but we don't want it to be 2322 // counted by gcount. It would be more "proper" to increment 2323 // sched.ngfree, but that requires locking. Incrementing ngsys 2324 // has the same effect. 2325 sched.ngsys.Add(1) 2326 2327 // Add m to the extra list. 2328 addExtraM(mp) 2329 } 2330 2331 // dropm puts the current m back onto the extra list. 2332 // 2333 // 1. On systems without pthreads, like Windows 2334 // dropm is called when a cgo callback has called needm but is now 2335 // done with the callback and returning back into the non-Go thread. 2336 // 2337 // The main expense here is the call to signalstack to release the 2338 // m's signal stack, and then the call to needm on the next callback 2339 // from this thread. It is tempting to try to save the m for next time, 2340 // which would eliminate both these costs, but there might not be 2341 // a next time: the current thread (which Go does not control) might exit. 2342 // If we saved the m for that thread, there would be an m leak each time 2343 // such a thread exited. Instead, we acquire and release an m on each 2344 // call. These should typically not be scheduling operations, just a few 2345 // atomics, so the cost should be small. 2346 // 2347 // 2. On systems with pthreads 2348 // dropm is called while a non-Go thread is exiting. 2349 // We allocate a pthread per-thread variable using pthread_key_create, 2350 // to register a thread-exit-time destructor. 2351 // And store the g into a thread-specific value associated with the pthread key, 2352 // when first return back to C. 2353 // So that the destructor would invoke dropm while the non-Go thread is exiting. 2354 // This is much faster since it avoids expensive signal-related syscalls. 2355 // 2356 // This always runs without a P, so //go:nowritebarrierrec is required. 2357 // 2358 // This may run with a different stack than was recorded in g0 (there is no 2359 // call to callbackUpdateSystemStack prior to dropm), so this must be 2360 // //go:nosplit to avoid the stack bounds check. 2361 // 2362 //go:nowritebarrierrec 2363 //go:nosplit 2364 func dropm() { 2365 // Clear m and g, and return m to the extra list. 2366 // After the call to setg we can only call nosplit functions 2367 // with no pointer manipulation. 2368 mp := getg().m 2369 2370 // Emit a trace event for this syscall -> dead transition, 2371 // but only in the new tracer. 2372 // 2373 // N.B. the tracer can run on a bare M just fine, we just have 2374 // to make sure to do this before setg(nil) and unminit. 2375 var trace traceLocker 2376 if goexperiment.ExecTracer2 && !mp.isExtraInSig { 2377 trace = traceAcquire() 2378 } 2379 2380 // Return mp.curg to dead state. 2381 casgstatus(mp.curg, _Gsyscall, _Gdead) 2382 mp.curg.preemptStop = false 2383 sched.ngsys.Add(1) 2384 2385 if goexperiment.ExecTracer2 && !mp.isExtraInSig { 2386 if trace.ok() { 2387 trace.GoDestroySyscall() 2388 traceRelease(trace) 2389 } 2390 } 2391 2392 if goexperiment.ExecTracer2 { 2393 // Trash syscalltick so that it doesn't line up with mp.old.syscalltick anymore. 2394 // 2395 // In the new tracer, we model needm and dropm and a goroutine being created and 2396 // destroyed respectively. The m then might get reused with a different procid but 2397 // still with a reference to oldp, and still with the same syscalltick. The next 2398 // time a G is "created" in needm, it'll return and quietly reacquire its P from a 2399 // different m with a different procid, which will confuse the trace parser. By 2400 // trashing syscalltick, we ensure that it'll appear as if we lost the P to the 2401 // tracer parser and that we just reacquired it. 2402 // 2403 // Trash the value by decrementing because that gets us as far away from the value 2404 // the syscall exit code expects as possible. Setting to zero is risky because 2405 // syscalltick could already be zero (and in fact, is initialized to zero). 2406 mp.syscalltick-- 2407 } 2408 2409 // Reset trace state unconditionally. This goroutine is being 'destroyed' 2410 // from the perspective of the tracer. 2411 mp.curg.trace.reset() 2412 2413 // Flush all the M's buffers. This is necessary because the M might 2414 // be used on a different thread with a different procid, so we have 2415 // to make sure we don't write into the same buffer. 2416 // 2417 // N.B. traceThreadDestroy is a no-op in the old tracer, so avoid the 2418 // unnecessary acquire/release of the lock. 2419 if goexperiment.ExecTracer2 && (traceEnabled() || traceShuttingDown()) { 2420 // Acquire sched.lock across thread destruction. One of the invariants of the tracer 2421 // is that a thread cannot disappear from the tracer's view (allm or freem) without 2422 // it noticing, so it requires that sched.lock be held over traceThreadDestroy. 2423 // 2424 // This isn't strictly necessary in this case, because this thread never leaves allm, 2425 // but the critical section is short and dropm is rare on pthread platforms, so just 2426 // take the lock and play it safe. traceThreadDestroy also asserts that the lock is held. 2427 lock(&sched.lock) 2428 traceThreadDestroy(mp) 2429 unlock(&sched.lock) 2430 } 2431 mp.isExtraInSig = false 2432 2433 // Block signals before unminit. 2434 // Unminit unregisters the signal handling stack (but needs g on some systems). 2435 // Setg(nil) clears g, which is the signal handler's cue not to run Go handlers. 2436 // It's important not to try to handle a signal between those two steps. 2437 sigmask := mp.sigmask 2438 sigblock(false) 2439 unminit() 2440 2441 setg(nil) 2442 2443 // Clear g0 stack bounds to ensure that needm always refreshes the 2444 // bounds when reusing this M. 2445 g0 := mp.g0 2446 g0.stack.hi = 0 2447 g0.stack.lo = 0 2448 g0.stackguard0 = 0 2449 g0.stackguard1 = 0 2450 2451 putExtraM(mp) 2452 2453 msigrestore(sigmask) 2454 } 2455 2456 // bindm store the g0 of the current m into a thread-specific value. 2457 // 2458 // We allocate a pthread per-thread variable using pthread_key_create, 2459 // to register a thread-exit-time destructor. 2460 // We are here setting the thread-specific value of the pthread key, to enable the destructor. 2461 // So that the pthread_key_destructor would dropm while the C thread is exiting. 2462 // 2463 // And the saved g will be used in pthread_key_destructor, 2464 // since the g stored in the TLS by Go might be cleared in some platforms, 2465 // before the destructor invoked, so, we restore g by the stored g, before dropm. 2466 // 2467 // We store g0 instead of m, to make the assembly code simpler, 2468 // since we need to restore g0 in runtime.cgocallback. 2469 // 2470 // On systems without pthreads, like Windows, bindm shouldn't be used. 2471 // 2472 // NOTE: this always runs without a P, so, nowritebarrierrec required. 2473 // 2474 //go:nosplit 2475 //go:nowritebarrierrec 2476 func cgoBindM() { 2477 if GOOS == "windows" || GOOS == "plan9" { 2478 fatal("bindm in unexpected GOOS") 2479 } 2480 g := getg() 2481 if g.m.g0 != g { 2482 fatal("the current g is not g0") 2483 } 2484 if _cgo_bindm != nil { 2485 asmcgocall(_cgo_bindm, unsafe.Pointer(g)) 2486 } 2487 } 2488 2489 // A helper function for EnsureDropM. 2490 func getm() uintptr { 2491 return uintptr(unsafe.Pointer(getg().m)) 2492 } 2493 2494 var ( 2495 // Locking linked list of extra M's, via mp.schedlink. Must be accessed 2496 // only via lockextra/unlockextra. 2497 // 2498 // Can't be atomic.Pointer[m] because we use an invalid pointer as a 2499 // "locked" sentinel value. M's on this list remain visible to the GC 2500 // because their mp.curg is on allgs. 2501 extraM atomic.Uintptr 2502 // Number of M's in the extraM list. 2503 extraMLength atomic.Uint32 2504 // Number of waiters in lockextra. 2505 extraMWaiters atomic.Uint32 2506 2507 // Number of extra M's in use by threads. 2508 extraMInUse atomic.Uint32 2509 ) 2510 2511 // lockextra locks the extra list and returns the list head. 2512 // The caller must unlock the list by storing a new list head 2513 // to extram. If nilokay is true, then lockextra will 2514 // return a nil list head if that's what it finds. If nilokay is false, 2515 // lockextra will keep waiting until the list head is no longer nil. 2516 // 2517 //go:nosplit 2518 func lockextra(nilokay bool) *m { 2519 const locked = 1 2520 2521 incr := false 2522 for { 2523 old := extraM.Load() 2524 if old == locked { 2525 osyield_no_g() 2526 continue 2527 } 2528 if old == 0 && !nilokay { 2529 if !incr { 2530 // Add 1 to the number of threads 2531 // waiting for an M. 2532 // This is cleared by newextram. 2533 extraMWaiters.Add(1) 2534 incr = true 2535 } 2536 usleep_no_g(1) 2537 continue 2538 } 2539 if extraM.CompareAndSwap(old, locked) { 2540 return (*m)(unsafe.Pointer(old)) 2541 } 2542 osyield_no_g() 2543 continue 2544 } 2545 } 2546 2547 //go:nosplit 2548 func unlockextra(mp *m, delta int32) { 2549 extraMLength.Add(delta) 2550 extraM.Store(uintptr(unsafe.Pointer(mp))) 2551 } 2552 2553 // Return an M from the extra M list. Returns last == true if the list becomes 2554 // empty because of this call. 2555 // 2556 // Spins waiting for an extra M, so caller must ensure that the list always 2557 // contains or will soon contain at least one M. 2558 // 2559 //go:nosplit 2560 func getExtraM() (mp *m, last bool) { 2561 mp = lockextra(false) 2562 extraMInUse.Add(1) 2563 unlockextra(mp.schedlink.ptr(), -1) 2564 return mp, mp.schedlink.ptr() == nil 2565 } 2566 2567 // Returns an extra M back to the list. mp must be from getExtraM. Newly 2568 // allocated M's should use addExtraM. 2569 // 2570 //go:nosplit 2571 func putExtraM(mp *m) { 2572 extraMInUse.Add(-1) 2573 addExtraM(mp) 2574 } 2575 2576 // Adds a newly allocated M to the extra M list. 2577 // 2578 //go:nosplit 2579 func addExtraM(mp *m) { 2580 mnext := lockextra(true) 2581 mp.schedlink.set(mnext) 2582 unlockextra(mp, 1) 2583 } 2584 2585 var ( 2586 // allocmLock is locked for read when creating new Ms in allocm and their 2587 // addition to allm. Thus acquiring this lock for write blocks the 2588 // creation of new Ms. 2589 allocmLock rwmutex 2590 2591 // execLock serializes exec and clone to avoid bugs or unspecified 2592 // behaviour around exec'ing while creating/destroying threads. See 2593 // issue #19546. 2594 execLock rwmutex 2595 ) 2596 2597 // These errors are reported (via writeErrStr) by some OS-specific 2598 // versions of newosproc and newosproc0. 2599 const ( 2600 failthreadcreate = "runtime: failed to create new OS thread\n" 2601 failallocatestack = "runtime: failed to allocate stack for the new OS thread\n" 2602 ) 2603 2604 // newmHandoff contains a list of m structures that need new OS threads. 2605 // This is used by newm in situations where newm itself can't safely 2606 // start an OS thread. 2607 var newmHandoff struct { 2608 lock mutex 2609 2610 // newm points to a list of M structures that need new OS 2611 // threads. The list is linked through m.schedlink. 2612 newm muintptr 2613 2614 // waiting indicates that wake needs to be notified when an m 2615 // is put on the list. 2616 waiting bool 2617 wake note 2618 2619 // haveTemplateThread indicates that the templateThread has 2620 // been started. This is not protected by lock. Use cas to set 2621 // to 1. 2622 haveTemplateThread uint32 2623 } 2624 2625 // Create a new m. It will start off with a call to fn, or else the scheduler. 2626 // fn needs to be static and not a heap allocated closure. 2627 // May run with m.p==nil, so write barriers are not allowed. 2628 // 2629 // id is optional pre-allocated m ID. Omit by passing -1. 2630 // 2631 //go:nowritebarrierrec 2632 func newm(fn func(), pp *p, id int64) { 2633 // allocm adds a new M to allm, but they do not start until created by 2634 // the OS in newm1 or the template thread. 2635 // 2636 // doAllThreadsSyscall requires that every M in allm will eventually 2637 // start and be signal-able, even with a STW. 2638 // 2639 // Disable preemption here until we start the thread to ensure that 2640 // newm is not preempted between allocm and starting the new thread, 2641 // ensuring that anything added to allm is guaranteed to eventually 2642 // start. 2643 acquirem() 2644 2645 mp := allocm(pp, fn, id) 2646 mp.nextp.set(pp) 2647 mp.sigmask = initSigmask 2648 if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" { 2649 // We're on a locked M or a thread that may have been 2650 // started by C. The kernel state of this thread may 2651 // be strange (the user may have locked it for that 2652 // purpose). We don't want to clone that into another 2653 // thread. Instead, ask a known-good thread to create 2654 // the thread for us. 2655 // 2656 // This is disabled on Plan 9. See golang.org/issue/22227. 2657 // 2658 // TODO: This may be unnecessary on Windows, which 2659 // doesn't model thread creation off fork. 2660 lock(&newmHandoff.lock) 2661 if newmHandoff.haveTemplateThread == 0 { 2662 throw("on a locked thread with no template thread") 2663 } 2664 mp.schedlink = newmHandoff.newm 2665 newmHandoff.newm.set(mp) 2666 if newmHandoff.waiting { 2667 newmHandoff.waiting = false 2668 notewakeup(&newmHandoff.wake) 2669 } 2670 unlock(&newmHandoff.lock) 2671 // The M has not started yet, but the template thread does not 2672 // participate in STW, so it will always process queued Ms and 2673 // it is safe to releasem. 2674 releasem(getg().m) 2675 return 2676 } 2677 newm1(mp) 2678 releasem(getg().m) 2679 } 2680 2681 func newm1(mp *m) { 2682 if iscgo { 2683 var ts cgothreadstart 2684 if _cgo_thread_start == nil { 2685 throw("_cgo_thread_start missing") 2686 } 2687 ts.g.set(mp.g0) 2688 ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0])) 2689 ts.fn = unsafe.Pointer(abi.FuncPCABI0(mstart)) 2690 if msanenabled { 2691 msanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2692 } 2693 if asanenabled { 2694 asanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2695 } 2696 execLock.rlock() // Prevent process clone. 2697 asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts)) 2698 execLock.runlock() 2699 return 2700 } 2701 execLock.rlock() // Prevent process clone. 2702 newosproc(mp) 2703 execLock.runlock() 2704 } 2705 2706 // startTemplateThread starts the template thread if it is not already 2707 // running. 2708 // 2709 // The calling thread must itself be in a known-good state. 2710 func startTemplateThread() { 2711 if GOARCH == "wasm" { // no threads on wasm yet 2712 return 2713 } 2714 2715 // Disable preemption to guarantee that the template thread will be 2716 // created before a park once haveTemplateThread is set. 2717 mp := acquirem() 2718 if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) { 2719 releasem(mp) 2720 return 2721 } 2722 newm(templateThread, nil, -1) 2723 releasem(mp) 2724 } 2725 2726 // templateThread is a thread in a known-good state that exists solely 2727 // to start new threads in known-good states when the calling thread 2728 // may not be in a good state. 2729 // 2730 // Many programs never need this, so templateThread is started lazily 2731 // when we first enter a state that might lead to running on a thread 2732 // in an unknown state. 2733 // 2734 // templateThread runs on an M without a P, so it must not have write 2735 // barriers. 2736 // 2737 //go:nowritebarrierrec 2738 func templateThread() { 2739 lock(&sched.lock) 2740 sched.nmsys++ 2741 checkdead() 2742 unlock(&sched.lock) 2743 2744 for { 2745 lock(&newmHandoff.lock) 2746 for newmHandoff.newm != 0 { 2747 newm := newmHandoff.newm.ptr() 2748 newmHandoff.newm = 0 2749 unlock(&newmHandoff.lock) 2750 for newm != nil { 2751 next := newm.schedlink.ptr() 2752 newm.schedlink = 0 2753 newm1(newm) 2754 newm = next 2755 } 2756 lock(&newmHandoff.lock) 2757 } 2758 newmHandoff.waiting = true 2759 noteclear(&newmHandoff.wake) 2760 unlock(&newmHandoff.lock) 2761 notesleep(&newmHandoff.wake) 2762 } 2763 } 2764 2765 // Stops execution of the current m until new work is available. 2766 // Returns with acquired P. 2767 func stopm() { 2768 gp := getg() 2769 2770 if gp.m.locks != 0 { 2771 throw("stopm holding locks") 2772 } 2773 if gp.m.p != 0 { 2774 throw("stopm holding p") 2775 } 2776 if gp.m.spinning { 2777 throw("stopm spinning") 2778 } 2779 2780 lock(&sched.lock) 2781 mput(gp.m) 2782 unlock(&sched.lock) 2783 mPark() 2784 acquirep(gp.m.nextp.ptr()) 2785 gp.m.nextp = 0 2786 } 2787 2788 func mspinning() { 2789 // startm's caller incremented nmspinning. Set the new M's spinning. 2790 getg().m.spinning = true 2791 } 2792 2793 // Schedules some M to run the p (creates an M if necessary). 2794 // If p==nil, tries to get an idle P, if no idle P's does nothing. 2795 // May run with m.p==nil, so write barriers are not allowed. 2796 // If spinning is set, the caller has incremented nmspinning and must provide a 2797 // P. startm will set m.spinning in the newly started M. 2798 // 2799 // Callers passing a non-nil P must call from a non-preemptible context. See 2800 // comment on acquirem below. 2801 // 2802 // Argument lockheld indicates whether the caller already acquired the 2803 // scheduler lock. Callers holding the lock when making the call must pass 2804 // true. The lock might be temporarily dropped, but will be reacquired before 2805 // returning. 2806 // 2807 // Must not have write barriers because this may be called without a P. 2808 // 2809 //go:nowritebarrierrec 2810 func startm(pp *p, spinning, lockheld bool) { 2811 // Disable preemption. 2812 // 2813 // Every owned P must have an owner that will eventually stop it in the 2814 // event of a GC stop request. startm takes transient ownership of a P 2815 // (either from argument or pidleget below) and transfers ownership to 2816 // a started M, which will be responsible for performing the stop. 2817 // 2818 // Preemption must be disabled during this transient ownership, 2819 // otherwise the P this is running on may enter GC stop while still 2820 // holding the transient P, leaving that P in limbo and deadlocking the 2821 // STW. 2822 // 2823 // Callers passing a non-nil P must already be in non-preemptible 2824 // context, otherwise such preemption could occur on function entry to 2825 // startm. Callers passing a nil P may be preemptible, so we must 2826 // disable preemption before acquiring a P from pidleget below. 2827 mp := acquirem() 2828 if !lockheld { 2829 lock(&sched.lock) 2830 } 2831 if pp == nil { 2832 if spinning { 2833 // TODO(prattmic): All remaining calls to this function 2834 // with _p_ == nil could be cleaned up to find a P 2835 // before calling startm. 2836 throw("startm: P required for spinning=true") 2837 } 2838 pp, _ = pidleget(0) 2839 if pp == nil { 2840 if !lockheld { 2841 unlock(&sched.lock) 2842 } 2843 releasem(mp) 2844 return 2845 } 2846 } 2847 nmp := mget() 2848 if nmp == nil { 2849 // No M is available, we must drop sched.lock and call newm. 2850 // However, we already own a P to assign to the M. 2851 // 2852 // Once sched.lock is released, another G (e.g., in a syscall), 2853 // could find no idle P while checkdead finds a runnable G but 2854 // no running M's because this new M hasn't started yet, thus 2855 // throwing in an apparent deadlock. 2856 // This apparent deadlock is possible when startm is called 2857 // from sysmon, which doesn't count as a running M. 2858 // 2859 // Avoid this situation by pre-allocating the ID for the new M, 2860 // thus marking it as 'running' before we drop sched.lock. This 2861 // new M will eventually run the scheduler to execute any 2862 // queued G's. 2863 id := mReserveID() 2864 unlock(&sched.lock) 2865 2866 var fn func() 2867 if spinning { 2868 // The caller incremented nmspinning, so set m.spinning in the new M. 2869 fn = mspinning 2870 } 2871 newm(fn, pp, id) 2872 2873 if lockheld { 2874 lock(&sched.lock) 2875 } 2876 // Ownership transfer of pp committed by start in newm. 2877 // Preemption is now safe. 2878 releasem(mp) 2879 return 2880 } 2881 if !lockheld { 2882 unlock(&sched.lock) 2883 } 2884 if nmp.spinning { 2885 throw("startm: m is spinning") 2886 } 2887 if nmp.nextp != 0 { 2888 throw("startm: m has p") 2889 } 2890 if spinning && !runqempty(pp) { 2891 throw("startm: p has runnable gs") 2892 } 2893 // The caller incremented nmspinning, so set m.spinning in the new M. 2894 nmp.spinning = spinning 2895 nmp.nextp.set(pp) 2896 notewakeup(&nmp.park) 2897 // Ownership transfer of pp committed by wakeup. Preemption is now 2898 // safe. 2899 releasem(mp) 2900 } 2901 2902 // Hands off P from syscall or locked M. 2903 // Always runs without a P, so write barriers are not allowed. 2904 // 2905 //go:nowritebarrierrec 2906 func handoffp(pp *p) { 2907 // handoffp must start an M in any situation where 2908 // findrunnable would return a G to run on pp. 2909 2910 // if it has local work, start it straight away 2911 if !runqempty(pp) || sched.runqsize != 0 { 2912 startm(pp, false, false) 2913 return 2914 } 2915 // if there's trace work to do, start it straight away 2916 if (traceEnabled() || traceShuttingDown()) && traceReaderAvailable() != nil { 2917 startm(pp, false, false) 2918 return 2919 } 2920 // if it has GC work, start it straight away 2921 if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) { 2922 startm(pp, false, false) 2923 return 2924 } 2925 // no local work, check that there are no spinning/idle M's, 2926 // otherwise our help is not required 2927 if sched.nmspinning.Load()+sched.npidle.Load() == 0 && sched.nmspinning.CompareAndSwap(0, 1) { // TODO: fast atomic 2928 sched.needspinning.Store(0) 2929 startm(pp, true, false) 2930 return 2931 } 2932 lock(&sched.lock) 2933 if sched.gcwaiting.Load() { 2934 pp.status = _Pgcstop 2935 sched.stopwait-- 2936 if sched.stopwait == 0 { 2937 notewakeup(&sched.stopnote) 2938 } 2939 unlock(&sched.lock) 2940 return 2941 } 2942 if pp.runSafePointFn != 0 && atomic.Cas(&pp.runSafePointFn, 1, 0) { 2943 sched.safePointFn(pp) 2944 sched.safePointWait-- 2945 if sched.safePointWait == 0 { 2946 notewakeup(&sched.safePointNote) 2947 } 2948 } 2949 if sched.runqsize != 0 { 2950 unlock(&sched.lock) 2951 startm(pp, false, false) 2952 return 2953 } 2954 // If this is the last running P and nobody is polling network, 2955 // need to wakeup another M to poll network. 2956 if sched.npidle.Load() == gomaxprocs-1 && sched.lastpoll.Load() != 0 { 2957 unlock(&sched.lock) 2958 startm(pp, false, false) 2959 return 2960 } 2961 2962 // The scheduler lock cannot be held when calling wakeNetPoller below 2963 // because wakeNetPoller may call wakep which may call startm. 2964 when := pp.timers.wakeTime() 2965 pidleput(pp, 0) 2966 unlock(&sched.lock) 2967 2968 if when != 0 { 2969 wakeNetPoller(when) 2970 } 2971 } 2972 2973 // Tries to add one more P to execute G's. 2974 // Called when a G is made runnable (newproc, ready). 2975 // Must be called with a P. 2976 func wakep() { 2977 // Be conservative about spinning threads, only start one if none exist 2978 // already. 2979 if sched.nmspinning.Load() != 0 || !sched.nmspinning.CompareAndSwap(0, 1) { 2980 return 2981 } 2982 2983 // Disable preemption until ownership of pp transfers to the next M in 2984 // startm. Otherwise preemption here would leave pp stuck waiting to 2985 // enter _Pgcstop. 2986 // 2987 // See preemption comment on acquirem in startm for more details. 2988 mp := acquirem() 2989 2990 var pp *p 2991 lock(&sched.lock) 2992 pp, _ = pidlegetSpinning(0) 2993 if pp == nil { 2994 if sched.nmspinning.Add(-1) < 0 { 2995 throw("wakep: negative nmspinning") 2996 } 2997 unlock(&sched.lock) 2998 releasem(mp) 2999 return 3000 } 3001 // Since we always have a P, the race in the "No M is available" 3002 // comment in startm doesn't apply during the small window between the 3003 // unlock here and lock in startm. A checkdead in between will always 3004 // see at least one running M (ours). 3005 unlock(&sched.lock) 3006 3007 startm(pp, true, false) 3008 3009 releasem(mp) 3010 } 3011 3012 // Stops execution of the current m that is locked to a g until the g is runnable again. 3013 // Returns with acquired P. 3014 func stoplockedm() { 3015 gp := getg() 3016 3017 if gp.m.lockedg == 0 || gp.m.lockedg.ptr().lockedm.ptr() != gp.m { 3018 throw("stoplockedm: inconsistent locking") 3019 } 3020 if gp.m.p != 0 { 3021 // Schedule another M to run this p. 3022 pp := releasep() 3023 handoffp(pp) 3024 } 3025 incidlelocked(1) 3026 // Wait until another thread schedules lockedg again. 3027 mPark() 3028 status := readgstatus(gp.m.lockedg.ptr()) 3029 if status&^_Gscan != _Grunnable { 3030 print("runtime:stoplockedm: lockedg (atomicstatus=", status, ") is not Grunnable or Gscanrunnable\n") 3031 dumpgstatus(gp.m.lockedg.ptr()) 3032 throw("stoplockedm: not runnable") 3033 } 3034 acquirep(gp.m.nextp.ptr()) 3035 gp.m.nextp = 0 3036 } 3037 3038 // Schedules the locked m to run the locked gp. 3039 // May run during STW, so write barriers are not allowed. 3040 // 3041 //go:nowritebarrierrec 3042 func startlockedm(gp *g) { 3043 mp := gp.lockedm.ptr() 3044 if mp == getg().m { 3045 throw("startlockedm: locked to me") 3046 } 3047 if mp.nextp != 0 { 3048 throw("startlockedm: m has p") 3049 } 3050 // directly handoff current P to the locked m 3051 incidlelocked(-1) 3052 pp := releasep() 3053 mp.nextp.set(pp) 3054 notewakeup(&mp.park) 3055 stopm() 3056 } 3057 3058 // Stops the current m for stopTheWorld. 3059 // Returns when the world is restarted. 3060 func gcstopm() { 3061 gp := getg() 3062 3063 if !sched.gcwaiting.Load() { 3064 throw("gcstopm: not waiting for gc") 3065 } 3066 if gp.m.spinning { 3067 gp.m.spinning = false 3068 // OK to just drop nmspinning here, 3069 // startTheWorld will unpark threads as necessary. 3070 if sched.nmspinning.Add(-1) < 0 { 3071 throw("gcstopm: negative nmspinning") 3072 } 3073 } 3074 pp := releasep() 3075 lock(&sched.lock) 3076 pp.status = _Pgcstop 3077 sched.stopwait-- 3078 if sched.stopwait == 0 { 3079 notewakeup(&sched.stopnote) 3080 } 3081 unlock(&sched.lock) 3082 stopm() 3083 } 3084 3085 // Schedules gp to run on the current M. 3086 // If inheritTime is true, gp inherits the remaining time in the 3087 // current time slice. Otherwise, it starts a new time slice. 3088 // Never returns. 3089 // 3090 // Write barriers are allowed because this is called immediately after 3091 // acquiring a P in several places. 3092 // 3093 //go:yeswritebarrierrec 3094 func execute(gp *g, inheritTime bool) { 3095 mp := getg().m 3096 3097 if goroutineProfile.active { 3098 // Make sure that gp has had its stack written out to the goroutine 3099 // profile, exactly as it was when the goroutine profiler first stopped 3100 // the world. 3101 tryRecordGoroutineProfile(gp, osyield) 3102 } 3103 3104 // Assign gp.m before entering _Grunning so running Gs have an 3105 // M. 3106 mp.curg = gp 3107 gp.m = mp 3108 casgstatus(gp, _Grunnable, _Grunning) 3109 gp.waitsince = 0 3110 gp.preempt = false 3111 gp.stackguard0 = gp.stack.lo + stackGuard 3112 if !inheritTime { 3113 mp.p.ptr().schedtick++ 3114 } 3115 3116 // Check whether the profiler needs to be turned on or off. 3117 hz := sched.profilehz 3118 if mp.profilehz != hz { 3119 setThreadCPUProfiler(hz) 3120 } 3121 3122 trace := traceAcquire() 3123 if trace.ok() { 3124 // GoSysExit has to happen when we have a P, but before GoStart. 3125 // So we emit it here. 3126 if !goexperiment.ExecTracer2 && gp.syscallsp != 0 { 3127 trace.GoSysExit(true) 3128 } 3129 trace.GoStart() 3130 traceRelease(trace) 3131 } 3132 3133 gogo(&gp.sched) 3134 } 3135 3136 // Finds a runnable goroutine to execute. 3137 // Tries to steal from other P's, get g from local or global queue, poll network. 3138 // tryWakeP indicates that the returned goroutine is not normal (GC worker, trace 3139 // reader) so the caller should try to wake a P. 3140 func findRunnable() (gp *g, inheritTime, tryWakeP bool) { 3141 mp := getg().m 3142 3143 // The conditions here and in handoffp must agree: if 3144 // findrunnable would return a G to run, handoffp must start 3145 // an M. 3146 3147 top: 3148 pp := mp.p.ptr() 3149 if sched.gcwaiting.Load() { 3150 gcstopm() 3151 goto top 3152 } 3153 if pp.runSafePointFn != 0 { 3154 runSafePointFn() 3155 } 3156 3157 // now and pollUntil are saved for work stealing later, 3158 // which may steal timers. It's important that between now 3159 // and then, nothing blocks, so these numbers remain mostly 3160 // relevant. 3161 now, pollUntil, _ := pp.timers.check(0) 3162 3163 // Try to schedule the trace reader. 3164 if traceEnabled() || traceShuttingDown() { 3165 gp := traceReader() 3166 if gp != nil { 3167 trace := traceAcquire() 3168 casgstatus(gp, _Gwaiting, _Grunnable) 3169 if trace.ok() { 3170 trace.GoUnpark(gp, 0) 3171 traceRelease(trace) 3172 } 3173 return gp, false, true 3174 } 3175 } 3176 3177 // Try to schedule a GC worker. 3178 if gcBlackenEnabled != 0 { 3179 gp, tnow := gcController.findRunnableGCWorker(pp, now) 3180 if gp != nil { 3181 return gp, false, true 3182 } 3183 now = tnow 3184 } 3185 3186 // Check the global runnable queue once in a while to ensure fairness. 3187 // Otherwise two goroutines can completely occupy the local runqueue 3188 // by constantly respawning each other. 3189 if pp.schedtick%61 == 0 && sched.runqsize > 0 { 3190 lock(&sched.lock) 3191 gp := globrunqget(pp, 1) 3192 unlock(&sched.lock) 3193 if gp != nil { 3194 return gp, false, false 3195 } 3196 } 3197 3198 // Wake up the finalizer G. 3199 if fingStatus.Load()&(fingWait|fingWake) == fingWait|fingWake { 3200 if gp := wakefing(); gp != nil { 3201 ready(gp, 0, true) 3202 } 3203 } 3204 if *cgo_yield != nil { 3205 asmcgocall(*cgo_yield, nil) 3206 } 3207 3208 // local runq 3209 if gp, inheritTime := runqget(pp); gp != nil { 3210 return gp, inheritTime, false 3211 } 3212 3213 // global runq 3214 if sched.runqsize != 0 { 3215 lock(&sched.lock) 3216 gp := globrunqget(pp, 0) 3217 unlock(&sched.lock) 3218 if gp != nil { 3219 return gp, false, false 3220 } 3221 } 3222 3223 // Poll network. 3224 // This netpoll is only an optimization before we resort to stealing. 3225 // We can safely skip it if there are no waiters or a thread is blocked 3226 // in netpoll already. If there is any kind of logical race with that 3227 // blocked thread (e.g. it has already returned from netpoll, but does 3228 // not set lastpoll yet), this thread will do blocking netpoll below 3229 // anyway. 3230 if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 { 3231 if list, delta := netpoll(0); !list.empty() { // non-blocking 3232 gp := list.pop() 3233 injectglist(&list) 3234 netpollAdjustWaiters(delta) 3235 trace := traceAcquire() 3236 casgstatus(gp, _Gwaiting, _Grunnable) 3237 if trace.ok() { 3238 trace.GoUnpark(gp, 0) 3239 traceRelease(trace) 3240 } 3241 return gp, false, false 3242 } 3243 } 3244 3245 // Spinning Ms: steal work from other Ps. 3246 // 3247 // Limit the number of spinning Ms to half the number of busy Ps. 3248 // This is necessary to prevent excessive CPU consumption when 3249 // GOMAXPROCS>>1 but the program parallelism is low. 3250 if mp.spinning || 2*sched.nmspinning.Load() < gomaxprocs-sched.npidle.Load() { 3251 if !mp.spinning { 3252 mp.becomeSpinning() 3253 } 3254 3255 gp, inheritTime, tnow, w, newWork := stealWork(now) 3256 if gp != nil { 3257 // Successfully stole. 3258 return gp, inheritTime, false 3259 } 3260 if newWork { 3261 // There may be new timer or GC work; restart to 3262 // discover. 3263 goto top 3264 } 3265 3266 now = tnow 3267 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3268 // Earlier timer to wait for. 3269 pollUntil = w 3270 } 3271 } 3272 3273 // We have nothing to do. 3274 // 3275 // If we're in the GC mark phase, can safely scan and blacken objects, 3276 // and have work to do, run idle-time marking rather than give up the P. 3277 if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) && gcController.addIdleMarkWorker() { 3278 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 3279 if node != nil { 3280 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 3281 gp := node.gp.ptr() 3282 3283 trace := traceAcquire() 3284 casgstatus(gp, _Gwaiting, _Grunnable) 3285 if trace.ok() { 3286 trace.GoUnpark(gp, 0) 3287 traceRelease(trace) 3288 } 3289 return gp, false, false 3290 } 3291 gcController.removeIdleMarkWorker() 3292 } 3293 3294 // wasm only: 3295 // If a callback returned and no other goroutine is awake, 3296 // then wake event handler goroutine which pauses execution 3297 // until a callback was triggered. 3298 gp, otherReady := beforeIdle(now, pollUntil) 3299 if gp != nil { 3300 trace := traceAcquire() 3301 casgstatus(gp, _Gwaiting, _Grunnable) 3302 if trace.ok() { 3303 trace.GoUnpark(gp, 0) 3304 traceRelease(trace) 3305 } 3306 return gp, false, false 3307 } 3308 if otherReady { 3309 goto top 3310 } 3311 3312 // Before we drop our P, make a snapshot of the allp slice, 3313 // which can change underfoot once we no longer block 3314 // safe-points. We don't need to snapshot the contents because 3315 // everything up to cap(allp) is immutable. 3316 allpSnapshot := allp 3317 // Also snapshot masks. Value changes are OK, but we can't allow 3318 // len to change out from under us. 3319 idlepMaskSnapshot := idlepMask 3320 timerpMaskSnapshot := timerpMask 3321 3322 // return P and block 3323 lock(&sched.lock) 3324 if sched.gcwaiting.Load() || pp.runSafePointFn != 0 { 3325 unlock(&sched.lock) 3326 goto top 3327 } 3328 if sched.runqsize != 0 { 3329 gp := globrunqget(pp, 0) 3330 unlock(&sched.lock) 3331 return gp, false, false 3332 } 3333 if !mp.spinning && sched.needspinning.Load() == 1 { 3334 // See "Delicate dance" comment below. 3335 mp.becomeSpinning() 3336 unlock(&sched.lock) 3337 goto top 3338 } 3339 if releasep() != pp { 3340 throw("findrunnable: wrong p") 3341 } 3342 now = pidleput(pp, now) 3343 unlock(&sched.lock) 3344 3345 // Delicate dance: thread transitions from spinning to non-spinning 3346 // state, potentially concurrently with submission of new work. We must 3347 // drop nmspinning first and then check all sources again (with 3348 // #StoreLoad memory barrier in between). If we do it the other way 3349 // around, another thread can submit work after we've checked all 3350 // sources but before we drop nmspinning; as a result nobody will 3351 // unpark a thread to run the work. 3352 // 3353 // This applies to the following sources of work: 3354 // 3355 // * Goroutines added to the global or a per-P run queue. 3356 // * New/modified-earlier timers on a per-P timer heap. 3357 // * Idle-priority GC work (barring golang.org/issue/19112). 3358 // 3359 // If we discover new work below, we need to restore m.spinning as a 3360 // signal for resetspinning to unpark a new worker thread (because 3361 // there can be more than one starving goroutine). 3362 // 3363 // However, if after discovering new work we also observe no idle Ps 3364 // (either here or in resetspinning), we have a problem. We may be 3365 // racing with a non-spinning M in the block above, having found no 3366 // work and preparing to release its P and park. Allowing that P to go 3367 // idle will result in loss of work conservation (idle P while there is 3368 // runnable work). This could result in complete deadlock in the 3369 // unlikely event that we discover new work (from netpoll) right as we 3370 // are racing with _all_ other Ps going idle. 3371 // 3372 // We use sched.needspinning to synchronize with non-spinning Ms going 3373 // idle. If needspinning is set when they are about to drop their P, 3374 // they abort the drop and instead become a new spinning M on our 3375 // behalf. If we are not racing and the system is truly fully loaded 3376 // then no spinning threads are required, and the next thread to 3377 // naturally become spinning will clear the flag. 3378 // 3379 // Also see "Worker thread parking/unparking" comment at the top of the 3380 // file. 3381 wasSpinning := mp.spinning 3382 if mp.spinning { 3383 mp.spinning = false 3384 if sched.nmspinning.Add(-1) < 0 { 3385 throw("findrunnable: negative nmspinning") 3386 } 3387 3388 // Note the for correctness, only the last M transitioning from 3389 // spinning to non-spinning must perform these rechecks to 3390 // ensure no missed work. However, the runtime has some cases 3391 // of transient increments of nmspinning that are decremented 3392 // without going through this path, so we must be conservative 3393 // and perform the check on all spinning Ms. 3394 // 3395 // See https://go.dev/issue/43997. 3396 3397 // Check global and P runqueues again. 3398 3399 lock(&sched.lock) 3400 if sched.runqsize != 0 { 3401 pp, _ := pidlegetSpinning(0) 3402 if pp != nil { 3403 gp := globrunqget(pp, 0) 3404 if gp == nil { 3405 throw("global runq empty with non-zero runqsize") 3406 } 3407 unlock(&sched.lock) 3408 acquirep(pp) 3409 mp.becomeSpinning() 3410 return gp, false, false 3411 } 3412 } 3413 unlock(&sched.lock) 3414 3415 pp := checkRunqsNoP(allpSnapshot, idlepMaskSnapshot) 3416 if pp != nil { 3417 acquirep(pp) 3418 mp.becomeSpinning() 3419 goto top 3420 } 3421 3422 // Check for idle-priority GC work again. 3423 pp, gp := checkIdleGCNoP() 3424 if pp != nil { 3425 acquirep(pp) 3426 mp.becomeSpinning() 3427 3428 // Run the idle worker. 3429 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 3430 trace := traceAcquire() 3431 casgstatus(gp, _Gwaiting, _Grunnable) 3432 if trace.ok() { 3433 trace.GoUnpark(gp, 0) 3434 traceRelease(trace) 3435 } 3436 return gp, false, false 3437 } 3438 3439 // Finally, check for timer creation or expiry concurrently with 3440 // transitioning from spinning to non-spinning. 3441 // 3442 // Note that we cannot use checkTimers here because it calls 3443 // adjusttimers which may need to allocate memory, and that isn't 3444 // allowed when we don't have an active P. 3445 pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil) 3446 } 3447 3448 // Poll network until next timer. 3449 if netpollinited() && (netpollAnyWaiters() || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 { 3450 sched.pollUntil.Store(pollUntil) 3451 if mp.p != 0 { 3452 throw("findrunnable: netpoll with p") 3453 } 3454 if mp.spinning { 3455 throw("findrunnable: netpoll with spinning") 3456 } 3457 delay := int64(-1) 3458 if pollUntil != 0 { 3459 if now == 0 { 3460 now = nanotime() 3461 } 3462 delay = pollUntil - now 3463 if delay < 0 { 3464 delay = 0 3465 } 3466 } 3467 if faketime != 0 { 3468 // When using fake time, just poll. 3469 delay = 0 3470 } 3471 list, delta := netpoll(delay) // block until new work is available 3472 // Refresh now again, after potentially blocking. 3473 now = nanotime() 3474 sched.pollUntil.Store(0) 3475 sched.lastpoll.Store(now) 3476 if faketime != 0 && list.empty() { 3477 // Using fake time and nothing is ready; stop M. 3478 // When all M's stop, checkdead will call timejump. 3479 stopm() 3480 goto top 3481 } 3482 lock(&sched.lock) 3483 pp, _ := pidleget(now) 3484 unlock(&sched.lock) 3485 if pp == nil { 3486 injectglist(&list) 3487 netpollAdjustWaiters(delta) 3488 } else { 3489 acquirep(pp) 3490 if !list.empty() { 3491 gp := list.pop() 3492 injectglist(&list) 3493 netpollAdjustWaiters(delta) 3494 trace := traceAcquire() 3495 casgstatus(gp, _Gwaiting, _Grunnable) 3496 if trace.ok() { 3497 trace.GoUnpark(gp, 0) 3498 traceRelease(trace) 3499 } 3500 return gp, false, false 3501 } 3502 if wasSpinning { 3503 mp.becomeSpinning() 3504 } 3505 goto top 3506 } 3507 } else if pollUntil != 0 && netpollinited() { 3508 pollerPollUntil := sched.pollUntil.Load() 3509 if pollerPollUntil == 0 || pollerPollUntil > pollUntil { 3510 netpollBreak() 3511 } 3512 } 3513 stopm() 3514 goto top 3515 } 3516 3517 // pollWork reports whether there is non-background work this P could 3518 // be doing. This is a fairly lightweight check to be used for 3519 // background work loops, like idle GC. It checks a subset of the 3520 // conditions checked by the actual scheduler. 3521 func pollWork() bool { 3522 if sched.runqsize != 0 { 3523 return true 3524 } 3525 p := getg().m.p.ptr() 3526 if !runqempty(p) { 3527 return true 3528 } 3529 if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 { 3530 if list, delta := netpoll(0); !list.empty() { 3531 injectglist(&list) 3532 netpollAdjustWaiters(delta) 3533 return true 3534 } 3535 } 3536 return false 3537 } 3538 3539 // stealWork attempts to steal a runnable goroutine or timer from any P. 3540 // 3541 // If newWork is true, new work may have been readied. 3542 // 3543 // If now is not 0 it is the current time. stealWork returns the passed time or 3544 // the current time if now was passed as 0. 3545 func stealWork(now int64) (gp *g, inheritTime bool, rnow, pollUntil int64, newWork bool) { 3546 pp := getg().m.p.ptr() 3547 3548 ranTimer := false 3549 3550 const stealTries = 4 3551 for i := 0; i < stealTries; i++ { 3552 stealTimersOrRunNextG := i == stealTries-1 3553 3554 for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() { 3555 if sched.gcwaiting.Load() { 3556 // GC work may be available. 3557 return nil, false, now, pollUntil, true 3558 } 3559 p2 := allp[enum.position()] 3560 if pp == p2 { 3561 continue 3562 } 3563 3564 // Steal timers from p2. This call to checkTimers is the only place 3565 // where we might hold a lock on a different P's timers. We do this 3566 // once on the last pass before checking runnext because stealing 3567 // from the other P's runnext should be the last resort, so if there 3568 // are timers to steal do that first. 3569 // 3570 // We only check timers on one of the stealing iterations because 3571 // the time stored in now doesn't change in this loop and checking 3572 // the timers for each P more than once with the same value of now 3573 // is probably a waste of time. 3574 // 3575 // timerpMask tells us whether the P may have timers at all. If it 3576 // can't, no need to check at all. 3577 if stealTimersOrRunNextG && timerpMask.read(enum.position()) { 3578 tnow, w, ran := p2.timers.check(now) 3579 now = tnow 3580 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3581 pollUntil = w 3582 } 3583 if ran { 3584 // Running the timers may have 3585 // made an arbitrary number of G's 3586 // ready and added them to this P's 3587 // local run queue. That invalidates 3588 // the assumption of runqsteal 3589 // that it always has room to add 3590 // stolen G's. So check now if there 3591 // is a local G to run. 3592 if gp, inheritTime := runqget(pp); gp != nil { 3593 return gp, inheritTime, now, pollUntil, ranTimer 3594 } 3595 ranTimer = true 3596 } 3597 } 3598 3599 // Don't bother to attempt to steal if p2 is idle. 3600 if !idlepMask.read(enum.position()) { 3601 if gp := runqsteal(pp, p2, stealTimersOrRunNextG); gp != nil { 3602 return gp, false, now, pollUntil, ranTimer 3603 } 3604 } 3605 } 3606 } 3607 3608 // No goroutines found to steal. Regardless, running a timer may have 3609 // made some goroutine ready that we missed. Indicate the next timer to 3610 // wait for. 3611 return nil, false, now, pollUntil, ranTimer 3612 } 3613 3614 // Check all Ps for a runnable G to steal. 3615 // 3616 // On entry we have no P. If a G is available to steal and a P is available, 3617 // the P is returned which the caller should acquire and attempt to steal the 3618 // work to. 3619 func checkRunqsNoP(allpSnapshot []*p, idlepMaskSnapshot pMask) *p { 3620 for id, p2 := range allpSnapshot { 3621 if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(p2) { 3622 lock(&sched.lock) 3623 pp, _ := pidlegetSpinning(0) 3624 if pp == nil { 3625 // Can't get a P, don't bother checking remaining Ps. 3626 unlock(&sched.lock) 3627 return nil 3628 } 3629 unlock(&sched.lock) 3630 return pp 3631 } 3632 } 3633 3634 // No work available. 3635 return nil 3636 } 3637 3638 // Check all Ps for a timer expiring sooner than pollUntil. 3639 // 3640 // Returns updated pollUntil value. 3641 func checkTimersNoP(allpSnapshot []*p, timerpMaskSnapshot pMask, pollUntil int64) int64 { 3642 for id, p2 := range allpSnapshot { 3643 if timerpMaskSnapshot.read(uint32(id)) { 3644 w := p2.timers.wakeTime() 3645 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3646 pollUntil = w 3647 } 3648 } 3649 } 3650 3651 return pollUntil 3652 } 3653 3654 // Check for idle-priority GC, without a P on entry. 3655 // 3656 // If some GC work, a P, and a worker G are all available, the P and G will be 3657 // returned. The returned P has not been wired yet. 3658 func checkIdleGCNoP() (*p, *g) { 3659 // N.B. Since we have no P, gcBlackenEnabled may change at any time; we 3660 // must check again after acquiring a P. As an optimization, we also check 3661 // if an idle mark worker is needed at all. This is OK here, because if we 3662 // observe that one isn't needed, at least one is currently running. Even if 3663 // it stops running, its own journey into the scheduler should schedule it 3664 // again, if need be (at which point, this check will pass, if relevant). 3665 if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() { 3666 return nil, nil 3667 } 3668 if !gcMarkWorkAvailable(nil) { 3669 return nil, nil 3670 } 3671 3672 // Work is available; we can start an idle GC worker only if there is 3673 // an available P and available worker G. 3674 // 3675 // We can attempt to acquire these in either order, though both have 3676 // synchronization concerns (see below). Workers are almost always 3677 // available (see comment in findRunnableGCWorker for the one case 3678 // there may be none). Since we're slightly less likely to find a P, 3679 // check for that first. 3680 // 3681 // Synchronization: note that we must hold sched.lock until we are 3682 // committed to keeping it. Otherwise we cannot put the unnecessary P 3683 // back in sched.pidle without performing the full set of idle 3684 // transition checks. 3685 // 3686 // If we were to check gcBgMarkWorkerPool first, we must somehow handle 3687 // the assumption in gcControllerState.findRunnableGCWorker that an 3688 // empty gcBgMarkWorkerPool is only possible if gcMarkDone is running. 3689 lock(&sched.lock) 3690 pp, now := pidlegetSpinning(0) 3691 if pp == nil { 3692 unlock(&sched.lock) 3693 return nil, nil 3694 } 3695 3696 // Now that we own a P, gcBlackenEnabled can't change (as it requires STW). 3697 if gcBlackenEnabled == 0 || !gcController.addIdleMarkWorker() { 3698 pidleput(pp, now) 3699 unlock(&sched.lock) 3700 return nil, nil 3701 } 3702 3703 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 3704 if node == nil { 3705 pidleput(pp, now) 3706 unlock(&sched.lock) 3707 gcController.removeIdleMarkWorker() 3708 return nil, nil 3709 } 3710 3711 unlock(&sched.lock) 3712 3713 return pp, node.gp.ptr() 3714 } 3715 3716 // wakeNetPoller wakes up the thread sleeping in the network poller if it isn't 3717 // going to wake up before the when argument; or it wakes an idle P to service 3718 // timers and the network poller if there isn't one already. 3719 func wakeNetPoller(when int64) { 3720 if sched.lastpoll.Load() == 0 { 3721 // In findrunnable we ensure that when polling the pollUntil 3722 // field is either zero or the time to which the current 3723 // poll is expected to run. This can have a spurious wakeup 3724 // but should never miss a wakeup. 3725 pollerPollUntil := sched.pollUntil.Load() 3726 if pollerPollUntil == 0 || pollerPollUntil > when { 3727 netpollBreak() 3728 } 3729 } else { 3730 // There are no threads in the network poller, try to get 3731 // one there so it can handle new timers. 3732 if GOOS != "plan9" { // Temporary workaround - see issue #42303. 3733 wakep() 3734 } 3735 } 3736 } 3737 3738 func resetspinning() { 3739 gp := getg() 3740 if !gp.m.spinning { 3741 throw("resetspinning: not a spinning m") 3742 } 3743 gp.m.spinning = false 3744 nmspinning := sched.nmspinning.Add(-1) 3745 if nmspinning < 0 { 3746 throw("findrunnable: negative nmspinning") 3747 } 3748 // M wakeup policy is deliberately somewhat conservative, so check if we 3749 // need to wakeup another P here. See "Worker thread parking/unparking" 3750 // comment at the top of the file for details. 3751 wakep() 3752 } 3753 3754 // injectglist adds each runnable G on the list to some run queue, 3755 // and clears glist. If there is no current P, they are added to the 3756 // global queue, and up to npidle M's are started to run them. 3757 // Otherwise, for each idle P, this adds a G to the global queue 3758 // and starts an M. Any remaining G's are added to the current P's 3759 // local run queue. 3760 // This may temporarily acquire sched.lock. 3761 // Can run concurrently with GC. 3762 func injectglist(glist *gList) { 3763 if glist.empty() { 3764 return 3765 } 3766 trace := traceAcquire() 3767 if trace.ok() { 3768 for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { 3769 trace.GoUnpark(gp, 0) 3770 } 3771 traceRelease(trace) 3772 } 3773 3774 // Mark all the goroutines as runnable before we put them 3775 // on the run queues. 3776 head := glist.head.ptr() 3777 var tail *g 3778 qsize := 0 3779 for gp := head; gp != nil; gp = gp.schedlink.ptr() { 3780 tail = gp 3781 qsize++ 3782 casgstatus(gp, _Gwaiting, _Grunnable) 3783 } 3784 3785 // Turn the gList into a gQueue. 3786 var q gQueue 3787 q.head.set(head) 3788 q.tail.set(tail) 3789 *glist = gList{} 3790 3791 startIdle := func(n int) { 3792 for i := 0; i < n; i++ { 3793 mp := acquirem() // See comment in startm. 3794 lock(&sched.lock) 3795 3796 pp, _ := pidlegetSpinning(0) 3797 if pp == nil { 3798 unlock(&sched.lock) 3799 releasem(mp) 3800 break 3801 } 3802 3803 startm(pp, false, true) 3804 unlock(&sched.lock) 3805 releasem(mp) 3806 } 3807 } 3808 3809 pp := getg().m.p.ptr() 3810 if pp == nil { 3811 lock(&sched.lock) 3812 globrunqputbatch(&q, int32(qsize)) 3813 unlock(&sched.lock) 3814 startIdle(qsize) 3815 return 3816 } 3817 3818 npidle := int(sched.npidle.Load()) 3819 var ( 3820 globq gQueue 3821 n int 3822 ) 3823 for n = 0; n < npidle && !q.empty(); n++ { 3824 g := q.pop() 3825 globq.pushBack(g) 3826 } 3827 if n > 0 { 3828 lock(&sched.lock) 3829 globrunqputbatch(&globq, int32(n)) 3830 unlock(&sched.lock) 3831 startIdle(n) 3832 qsize -= n 3833 } 3834 3835 if !q.empty() { 3836 runqputbatch(pp, &q, qsize) 3837 } 3838 3839 // Some P's might have become idle after we loaded `sched.npidle` 3840 // but before any goroutines were added to the queue, which could 3841 // lead to idle P's when there is work available in the global queue. 3842 // That could potentially last until other goroutines become ready 3843 // to run. That said, we need to find a way to hedge 3844 // 3845 // Calling wakep() here is the best bet, it will do nothing in the 3846 // common case (no racing on `sched.npidle`), while it could wake one 3847 // more P to execute G's, which might end up with >1 P's: the first one 3848 // wakes another P and so forth until there is no more work, but this 3849 // ought to be an extremely rare case. 3850 // 3851 // Also see "Worker thread parking/unparking" comment at the top of the file for details. 3852 wakep() 3853 } 3854 3855 // One round of scheduler: find a runnable goroutine and execute it. 3856 // Never returns. 3857 func schedule() { 3858 mp := getg().m 3859 3860 if mp.locks != 0 { 3861 throw("schedule: holding locks") 3862 } 3863 3864 if mp.lockedg != 0 { 3865 stoplockedm() 3866 execute(mp.lockedg.ptr(), false) // Never returns. 3867 } 3868 3869 // We should not schedule away from a g that is executing a cgo call, 3870 // since the cgo call is using the m's g0 stack. 3871 if mp.incgo { 3872 throw("schedule: in cgo") 3873 } 3874 3875 top: 3876 pp := mp.p.ptr() 3877 pp.preempt = false 3878 3879 // Safety check: if we are spinning, the run queue should be empty. 3880 // Check this before calling checkTimers, as that might call 3881 // goready to put a ready goroutine on the local run queue. 3882 if mp.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) { 3883 throw("schedule: spinning with local work") 3884 } 3885 3886 gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available 3887 3888 if debug.dontfreezetheworld > 0 && freezing.Load() { 3889 // See comment in freezetheworld. We don't want to perturb 3890 // scheduler state, so we didn't gcstopm in findRunnable, but 3891 // also don't want to allow new goroutines to run. 3892 // 3893 // Deadlock here rather than in the findRunnable loop so if 3894 // findRunnable is stuck in a loop we don't perturb that 3895 // either. 3896 lock(&deadlock) 3897 lock(&deadlock) 3898 } 3899 3900 // This thread is going to run a goroutine and is not spinning anymore, 3901 // so if it was marked as spinning we need to reset it now and potentially 3902 // start a new spinning M. 3903 if mp.spinning { 3904 resetspinning() 3905 } 3906 3907 if sched.disable.user && !schedEnabled(gp) { 3908 // Scheduling of this goroutine is disabled. Put it on 3909 // the list of pending runnable goroutines for when we 3910 // re-enable user scheduling and look again. 3911 lock(&sched.lock) 3912 if schedEnabled(gp) { 3913 // Something re-enabled scheduling while we 3914 // were acquiring the lock. 3915 unlock(&sched.lock) 3916 } else { 3917 sched.disable.runnable.pushBack(gp) 3918 sched.disable.n++ 3919 unlock(&sched.lock) 3920 goto top 3921 } 3922 } 3923 3924 // If about to schedule a not-normal goroutine (a GCworker or tracereader), 3925 // wake a P if there is one. 3926 if tryWakeP { 3927 wakep() 3928 } 3929 if gp.lockedm != 0 { 3930 // Hands off own p to the locked m, 3931 // then blocks waiting for a new p. 3932 startlockedm(gp) 3933 goto top 3934 } 3935 3936 execute(gp, inheritTime) 3937 } 3938 3939 // dropg removes the association between m and the current goroutine m->curg (gp for short). 3940 // Typically a caller sets gp's status away from Grunning and then 3941 // immediately calls dropg to finish the job. The caller is also responsible 3942 // for arranging that gp will be restarted using ready at an 3943 // appropriate time. After calling dropg and arranging for gp to be 3944 // readied later, the caller can do other work but eventually should 3945 // call schedule to restart the scheduling of goroutines on this m. 3946 func dropg() { 3947 gp := getg() 3948 3949 setMNoWB(&gp.m.curg.m, nil) 3950 setGNoWB(&gp.m.curg, nil) 3951 } 3952 3953 func parkunlock_c(gp *g, lock unsafe.Pointer) bool { 3954 unlock((*mutex)(lock)) 3955 return true 3956 } 3957 3958 // park continuation on g0. 3959 func park_m(gp *g) { 3960 mp := getg().m 3961 3962 trace := traceAcquire() 3963 3964 // N.B. Not using casGToWaiting here because the waitreason is 3965 // set by park_m's caller. 3966 casgstatus(gp, _Grunning, _Gwaiting) 3967 if trace.ok() { 3968 trace.GoPark(mp.waitTraceBlockReason, mp.waitTraceSkip) 3969 traceRelease(trace) 3970 } 3971 3972 dropg() 3973 3974 if fn := mp.waitunlockf; fn != nil { 3975 ok := fn(gp, mp.waitlock) 3976 mp.waitunlockf = nil 3977 mp.waitlock = nil 3978 if !ok { 3979 trace := traceAcquire() 3980 casgstatus(gp, _Gwaiting, _Grunnable) 3981 if trace.ok() { 3982 trace.GoUnpark(gp, 2) 3983 traceRelease(trace) 3984 } 3985 execute(gp, true) // Schedule it back, never returns. 3986 } 3987 } 3988 schedule() 3989 } 3990 3991 func goschedImpl(gp *g, preempted bool) { 3992 trace := traceAcquire() 3993 status := readgstatus(gp) 3994 if status&^_Gscan != _Grunning { 3995 dumpgstatus(gp) 3996 throw("bad g status") 3997 } 3998 casgstatus(gp, _Grunning, _Grunnable) 3999 if trace.ok() { 4000 if preempted { 4001 trace.GoPreempt() 4002 } else { 4003 trace.GoSched() 4004 } 4005 traceRelease(trace) 4006 } 4007 4008 dropg() 4009 lock(&sched.lock) 4010 globrunqput(gp) 4011 unlock(&sched.lock) 4012 4013 if mainStarted { 4014 wakep() 4015 } 4016 4017 schedule() 4018 } 4019 4020 // Gosched continuation on g0. 4021 func gosched_m(gp *g) { 4022 goschedImpl(gp, false) 4023 } 4024 4025 // goschedguarded is a forbidden-states-avoided version of gosched_m. 4026 func goschedguarded_m(gp *g) { 4027 if !canPreemptM(gp.m) { 4028 gogo(&gp.sched) // never return 4029 } 4030 goschedImpl(gp, false) 4031 } 4032 4033 func gopreempt_m(gp *g) { 4034 goschedImpl(gp, true) 4035 } 4036 4037 // preemptPark parks gp and puts it in _Gpreempted. 4038 // 4039 //go:systemstack 4040 func preemptPark(gp *g) { 4041 status := readgstatus(gp) 4042 if status&^_Gscan != _Grunning { 4043 dumpgstatus(gp) 4044 throw("bad g status") 4045 } 4046 4047 if gp.asyncSafePoint { 4048 // Double-check that async preemption does not 4049 // happen in SPWRITE assembly functions. 4050 // isAsyncSafePoint must exclude this case. 4051 f := findfunc(gp.sched.pc) 4052 if !f.valid() { 4053 throw("preempt at unknown pc") 4054 } 4055 if f.flag&abi.FuncFlagSPWrite != 0 { 4056 println("runtime: unexpected SPWRITE function", funcname(f), "in async preempt") 4057 throw("preempt SPWRITE") 4058 } 4059 } 4060 4061 // Transition from _Grunning to _Gscan|_Gpreempted. We can't 4062 // be in _Grunning when we dropg because then we'd be running 4063 // without an M, but the moment we're in _Gpreempted, 4064 // something could claim this G before we've fully cleaned it 4065 // up. Hence, we set the scan bit to lock down further 4066 // transitions until we can dropg. 4067 casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted) 4068 dropg() 4069 4070 // Be careful about how we trace this next event. The ordering 4071 // is subtle. 4072 // 4073 // The moment we CAS into _Gpreempted, suspendG could CAS to 4074 // _Gwaiting, do its work, and ready the goroutine. All of 4075 // this could happen before we even get the chance to emit 4076 // an event. The end result is that the events could appear 4077 // out of order, and the tracer generally assumes the scheduler 4078 // takes care of the ordering between GoPark and GoUnpark. 4079 // 4080 // The answer here is simple: emit the event while we still hold 4081 // the _Gscan bit on the goroutine. We still need to traceAcquire 4082 // and traceRelease across the CAS because the tracer could be 4083 // what's calling suspendG in the first place, and we want the 4084 // CAS and event emission to appear atomic to the tracer. 4085 trace := traceAcquire() 4086 if trace.ok() { 4087 trace.GoPark(traceBlockPreempted, 0) 4088 } 4089 casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted) 4090 if trace.ok() { 4091 traceRelease(trace) 4092 } 4093 schedule() 4094 } 4095 4096 // goyield is like Gosched, but it: 4097 // - emits a GoPreempt trace event instead of a GoSched trace event 4098 // - puts the current G on the runq of the current P instead of the globrunq 4099 func goyield() { 4100 checkTimeouts() 4101 mcall(goyield_m) 4102 } 4103 4104 func goyield_m(gp *g) { 4105 trace := traceAcquire() 4106 pp := gp.m.p.ptr() 4107 casgstatus(gp, _Grunning, _Grunnable) 4108 if trace.ok() { 4109 trace.GoPreempt() 4110 traceRelease(trace) 4111 } 4112 dropg() 4113 runqput(pp, gp, false) 4114 schedule() 4115 } 4116 4117 // Finishes execution of the current goroutine. 4118 func goexit1() { 4119 if raceenabled { 4120 racegoend() 4121 } 4122 trace := traceAcquire() 4123 if trace.ok() { 4124 trace.GoEnd() 4125 traceRelease(trace) 4126 } 4127 mcall(goexit0) 4128 } 4129 4130 // goexit continuation on g0. 4131 func goexit0(gp *g) { 4132 gdestroy(gp) 4133 schedule() 4134 } 4135 4136 func gdestroy(gp *g) { 4137 mp := getg().m 4138 pp := mp.p.ptr() 4139 4140 casgstatus(gp, _Grunning, _Gdead) 4141 gcController.addScannableStack(pp, -int64(gp.stack.hi-gp.stack.lo)) 4142 if isSystemGoroutine(gp, false) { 4143 sched.ngsys.Add(-1) 4144 } 4145 gp.m = nil 4146 locked := gp.lockedm != 0 4147 gp.lockedm = 0 4148 mp.lockedg = 0 4149 gp.preemptStop = false 4150 gp.paniconfault = false 4151 gp._defer = nil // should be true already but just in case. 4152 gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data. 4153 gp.writebuf = nil 4154 gp.waitreason = waitReasonZero 4155 gp.param = nil 4156 gp.labels = nil 4157 gp.timer = nil 4158 4159 if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 { 4160 // Flush assist credit to the global pool. This gives 4161 // better information to pacing if the application is 4162 // rapidly creating an exiting goroutines. 4163 assistWorkPerByte := gcController.assistWorkPerByte.Load() 4164 scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes)) 4165 gcController.bgScanCredit.Add(scanCredit) 4166 gp.gcAssistBytes = 0 4167 } 4168 4169 dropg() 4170 4171 if GOARCH == "wasm" { // no threads yet on wasm 4172 gfput(pp, gp) 4173 return 4174 } 4175 4176 if mp.lockedInt != 0 { 4177 print("invalid m->lockedInt = ", mp.lockedInt, "\n") 4178 throw("internal lockOSThread error") 4179 } 4180 gfput(pp, gp) 4181 if locked { 4182 // The goroutine may have locked this thread because 4183 // it put it in an unusual kernel state. Kill it 4184 // rather than returning it to the thread pool. 4185 4186 // Return to mstart, which will release the P and exit 4187 // the thread. 4188 if GOOS != "plan9" { // See golang.org/issue/22227. 4189 gogo(&mp.g0.sched) 4190 } else { 4191 // Clear lockedExt on plan9 since we may end up re-using 4192 // this thread. 4193 mp.lockedExt = 0 4194 } 4195 } 4196 } 4197 4198 // save updates getg().sched to refer to pc and sp so that a following 4199 // gogo will restore pc and sp. 4200 // 4201 // save must not have write barriers because invoking a write barrier 4202 // can clobber getg().sched. 4203 // 4204 //go:nosplit 4205 //go:nowritebarrierrec 4206 func save(pc, sp uintptr) { 4207 gp := getg() 4208 4209 if gp == gp.m.g0 || gp == gp.m.gsignal { 4210 // m.g0.sched is special and must describe the context 4211 // for exiting the thread. mstart1 writes to it directly. 4212 // m.gsignal.sched should not be used at all. 4213 // This check makes sure save calls do not accidentally 4214 // run in contexts where they'd write to system g's. 4215 throw("save on system g not allowed") 4216 } 4217 4218 gp.sched.pc = pc 4219 gp.sched.sp = sp 4220 gp.sched.lr = 0 4221 gp.sched.ret = 0 4222 // We need to ensure ctxt is zero, but can't have a write 4223 // barrier here. However, it should always already be zero. 4224 // Assert that. 4225 if gp.sched.ctxt != nil { 4226 badctxt() 4227 } 4228 } 4229 4230 // The goroutine g is about to enter a system call. 4231 // Record that it's not using the cpu anymore. 4232 // This is called only from the go syscall library and cgocall, 4233 // not from the low-level system calls used by the runtime. 4234 // 4235 // Entersyscall cannot split the stack: the save must 4236 // make g->sched refer to the caller's stack segment, because 4237 // entersyscall is going to return immediately after. 4238 // 4239 // Nothing entersyscall calls can split the stack either. 4240 // We cannot safely move the stack during an active call to syscall, 4241 // because we do not know which of the uintptr arguments are 4242 // really pointers (back into the stack). 4243 // In practice, this means that we make the fast path run through 4244 // entersyscall doing no-split things, and the slow path has to use systemstack 4245 // to run bigger things on the system stack. 4246 // 4247 // reentersyscall is the entry point used by cgo callbacks, where explicitly 4248 // saved SP and PC are restored. This is needed when exitsyscall will be called 4249 // from a function further up in the call stack than the parent, as g->syscallsp 4250 // must always point to a valid stack frame. entersyscall below is the normal 4251 // entry point for syscalls, which obtains the SP and PC from the caller. 4252 // 4253 // Syscall tracing (old tracer): 4254 // At the start of a syscall we emit traceGoSysCall to capture the stack trace. 4255 // If the syscall does not block, that is it, we do not emit any other events. 4256 // If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock; 4257 // when syscall returns we emit traceGoSysExit and when the goroutine starts running 4258 // (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart. 4259 // To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock, 4260 // we remember current value of syscalltick in m (gp.m.syscalltick = gp.m.p.ptr().syscalltick), 4261 // whoever emits traceGoSysBlock increments p.syscalltick afterwards; 4262 // and we wait for the increment before emitting traceGoSysExit. 4263 // Note that the increment is done even if tracing is not enabled, 4264 // because tracing can be enabled in the middle of syscall. We don't want the wait to hang. 4265 // 4266 //go:nosplit 4267 func reentersyscall(pc, sp uintptr) { 4268 trace := traceAcquire() 4269 gp := getg() 4270 4271 // Disable preemption because during this function g is in Gsyscall status, 4272 // but can have inconsistent g->sched, do not let GC observe it. 4273 gp.m.locks++ 4274 4275 // Entersyscall must not call any function that might split/grow the stack. 4276 // (See details in comment above.) 4277 // Catch calls that might, by replacing the stack guard with something that 4278 // will trip any stack check and leaving a flag to tell newstack to die. 4279 gp.stackguard0 = stackPreempt 4280 gp.throwsplit = true 4281 4282 // Leave SP around for GC and traceback. 4283 save(pc, sp) 4284 gp.syscallsp = sp 4285 gp.syscallpc = pc 4286 casgstatus(gp, _Grunning, _Gsyscall) 4287 if staticLockRanking { 4288 // When doing static lock ranking casgstatus can call 4289 // systemstack which clobbers g.sched. 4290 save(pc, sp) 4291 } 4292 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4293 systemstack(func() { 4294 print("entersyscall inconsistent ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4295 throw("entersyscall") 4296 }) 4297 } 4298 4299 if trace.ok() { 4300 systemstack(func() { 4301 trace.GoSysCall() 4302 traceRelease(trace) 4303 }) 4304 // systemstack itself clobbers g.sched.{pc,sp} and we might 4305 // need them later when the G is genuinely blocked in a 4306 // syscall 4307 save(pc, sp) 4308 } 4309 4310 if sched.sysmonwait.Load() { 4311 systemstack(entersyscall_sysmon) 4312 save(pc, sp) 4313 } 4314 4315 if gp.m.p.ptr().runSafePointFn != 0 { 4316 // runSafePointFn may stack split if run on this stack 4317 systemstack(runSafePointFn) 4318 save(pc, sp) 4319 } 4320 4321 gp.m.syscalltick = gp.m.p.ptr().syscalltick 4322 pp := gp.m.p.ptr() 4323 pp.m = 0 4324 gp.m.oldp.set(pp) 4325 gp.m.p = 0 4326 atomic.Store(&pp.status, _Psyscall) 4327 if sched.gcwaiting.Load() { 4328 systemstack(entersyscall_gcwait) 4329 save(pc, sp) 4330 } 4331 4332 gp.m.locks-- 4333 } 4334 4335 // Standard syscall entry used by the go syscall library and normal cgo calls. 4336 // 4337 // This is exported via linkname to assembly in the syscall package and x/sys. 4338 // 4339 //go:nosplit 4340 //go:linkname entersyscall 4341 func entersyscall() { 4342 reentersyscall(getcallerpc(), getcallersp()) 4343 } 4344 4345 func entersyscall_sysmon() { 4346 lock(&sched.lock) 4347 if sched.sysmonwait.Load() { 4348 sched.sysmonwait.Store(false) 4349 notewakeup(&sched.sysmonnote) 4350 } 4351 unlock(&sched.lock) 4352 } 4353 4354 func entersyscall_gcwait() { 4355 gp := getg() 4356 pp := gp.m.oldp.ptr() 4357 4358 lock(&sched.lock) 4359 trace := traceAcquire() 4360 if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) { 4361 if trace.ok() { 4362 if goexperiment.ExecTracer2 { 4363 // This is a steal in the new tracer. While it's very likely 4364 // that we were the ones to put this P into _Psyscall, between 4365 // then and now it's totally possible it had been stolen and 4366 // then put back into _Psyscall for us to acquire here. In such 4367 // case ProcStop would be incorrect. 4368 // 4369 // TODO(mknyszek): Consider emitting a ProcStop instead when 4370 // gp.m.syscalltick == pp.syscalltick, since then we know we never 4371 // lost the P. 4372 trace.ProcSteal(pp, true) 4373 } else { 4374 trace.GoSysBlock(pp) 4375 trace.ProcStop(pp) 4376 } 4377 traceRelease(trace) 4378 } 4379 pp.syscalltick++ 4380 if sched.stopwait--; sched.stopwait == 0 { 4381 notewakeup(&sched.stopnote) 4382 } 4383 } else if trace.ok() { 4384 traceRelease(trace) 4385 } 4386 unlock(&sched.lock) 4387 } 4388 4389 // The same as entersyscall(), but with a hint that the syscall is blocking. 4390 // 4391 //go:nosplit 4392 func entersyscallblock() { 4393 gp := getg() 4394 4395 gp.m.locks++ // see comment in entersyscall 4396 gp.throwsplit = true 4397 gp.stackguard0 = stackPreempt // see comment in entersyscall 4398 gp.m.syscalltick = gp.m.p.ptr().syscalltick 4399 gp.m.p.ptr().syscalltick++ 4400 4401 // Leave SP around for GC and traceback. 4402 pc := getcallerpc() 4403 sp := getcallersp() 4404 save(pc, sp) 4405 gp.syscallsp = gp.sched.sp 4406 gp.syscallpc = gp.sched.pc 4407 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4408 sp1 := sp 4409 sp2 := gp.sched.sp 4410 sp3 := gp.syscallsp 4411 systemstack(func() { 4412 print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4413 throw("entersyscallblock") 4414 }) 4415 } 4416 casgstatus(gp, _Grunning, _Gsyscall) 4417 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4418 systemstack(func() { 4419 print("entersyscallblock inconsistent ", hex(sp), " ", hex(gp.sched.sp), " ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4420 throw("entersyscallblock") 4421 }) 4422 } 4423 4424 systemstack(entersyscallblock_handoff) 4425 4426 // Resave for traceback during blocked call. 4427 save(getcallerpc(), getcallersp()) 4428 4429 gp.m.locks-- 4430 } 4431 4432 func entersyscallblock_handoff() { 4433 trace := traceAcquire() 4434 if trace.ok() { 4435 trace.GoSysCall() 4436 trace.GoSysBlock(getg().m.p.ptr()) 4437 traceRelease(trace) 4438 } 4439 handoffp(releasep()) 4440 } 4441 4442 // The goroutine g exited its system call. 4443 // Arrange for it to run on a cpu again. 4444 // This is called only from the go syscall library, not 4445 // from the low-level system calls used by the runtime. 4446 // 4447 // Write barriers are not allowed because our P may have been stolen. 4448 // 4449 // This is exported via linkname to assembly in the syscall package. 4450 // 4451 //go:nosplit 4452 //go:nowritebarrierrec 4453 //go:linkname exitsyscall 4454 func exitsyscall() { 4455 gp := getg() 4456 4457 gp.m.locks++ // see comment in entersyscall 4458 if getcallersp() > gp.syscallsp { 4459 throw("exitsyscall: syscall frame is no longer valid") 4460 } 4461 4462 gp.waitsince = 0 4463 oldp := gp.m.oldp.ptr() 4464 gp.m.oldp = 0 4465 if exitsyscallfast(oldp) { 4466 // When exitsyscallfast returns success, we have a P so can now use 4467 // write barriers 4468 if goroutineProfile.active { 4469 // Make sure that gp has had its stack written out to the goroutine 4470 // profile, exactly as it was when the goroutine profiler first 4471 // stopped the world. 4472 systemstack(func() { 4473 tryRecordGoroutineProfileWB(gp) 4474 }) 4475 } 4476 trace := traceAcquire() 4477 if trace.ok() { 4478 lostP := oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick 4479 systemstack(func() { 4480 if goexperiment.ExecTracer2 { 4481 // Write out syscall exit eagerly in the experiment. 4482 // 4483 // It's important that we write this *after* we know whether we 4484 // lost our P or not (determined by exitsyscallfast). 4485 trace.GoSysExit(lostP) 4486 } 4487 if lostP { 4488 // We lost the P at some point, even though we got it back here. 4489 // Trace that we're starting again, because there was a traceGoSysBlock 4490 // call somewhere in exitsyscallfast (indicating that this goroutine 4491 // had blocked) and we're about to start running again. 4492 trace.GoStart() 4493 } 4494 }) 4495 } 4496 // There's a cpu for us, so we can run. 4497 gp.m.p.ptr().syscalltick++ 4498 // We need to cas the status and scan before resuming... 4499 casgstatus(gp, _Gsyscall, _Grunning) 4500 if trace.ok() { 4501 traceRelease(trace) 4502 } 4503 4504 // Garbage collector isn't running (since we are), 4505 // so okay to clear syscallsp. 4506 gp.syscallsp = 0 4507 gp.m.locks-- 4508 if gp.preempt { 4509 // restore the preemption request in case we've cleared it in newstack 4510 gp.stackguard0 = stackPreempt 4511 } else { 4512 // otherwise restore the real stackGuard, we've spoiled it in entersyscall/entersyscallblock 4513 gp.stackguard0 = gp.stack.lo + stackGuard 4514 } 4515 gp.throwsplit = false 4516 4517 if sched.disable.user && !schedEnabled(gp) { 4518 // Scheduling of this goroutine is disabled. 4519 Gosched() 4520 } 4521 4522 return 4523 } 4524 4525 if !goexperiment.ExecTracer2 { 4526 // In the old tracer, because we don't have a P we can't 4527 // actually record the true time we exited the syscall. 4528 // Record it. 4529 trace := traceAcquire() 4530 if trace.ok() { 4531 trace.RecordSyscallExitedTime(gp, oldp) 4532 traceRelease(trace) 4533 } 4534 } 4535 4536 gp.m.locks-- 4537 4538 // Call the scheduler. 4539 mcall(exitsyscall0) 4540 4541 // Scheduler returned, so we're allowed to run now. 4542 // Delete the syscallsp information that we left for 4543 // the garbage collector during the system call. 4544 // Must wait until now because until gosched returns 4545 // we don't know for sure that the garbage collector 4546 // is not running. 4547 gp.syscallsp = 0 4548 gp.m.p.ptr().syscalltick++ 4549 gp.throwsplit = false 4550 } 4551 4552 //go:nosplit 4553 func exitsyscallfast(oldp *p) bool { 4554 gp := getg() 4555 4556 // Freezetheworld sets stopwait but does not retake P's. 4557 if sched.stopwait == freezeStopWait { 4558 return false 4559 } 4560 4561 // Try to re-acquire the last P. 4562 trace := traceAcquire() 4563 if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) { 4564 // There's a cpu for us, so we can run. 4565 wirep(oldp) 4566 exitsyscallfast_reacquired(trace) 4567 if trace.ok() { 4568 traceRelease(trace) 4569 } 4570 return true 4571 } 4572 if trace.ok() { 4573 traceRelease(trace) 4574 } 4575 4576 // Try to get any other idle P. 4577 if sched.pidle != 0 { 4578 var ok bool 4579 systemstack(func() { 4580 ok = exitsyscallfast_pidle() 4581 if ok && !goexperiment.ExecTracer2 { 4582 trace := traceAcquire() 4583 if trace.ok() { 4584 if oldp != nil { 4585 // Wait till traceGoSysBlock event is emitted. 4586 // This ensures consistency of the trace (the goroutine is started after it is blocked). 4587 for oldp.syscalltick == gp.m.syscalltick { 4588 osyield() 4589 } 4590 } 4591 // In the experiment, we write this in exitsyscall. 4592 // Don't write it here unless the experiment is off. 4593 trace.GoSysExit(true) 4594 traceRelease(trace) 4595 } 4596 } 4597 }) 4598 if ok { 4599 return true 4600 } 4601 } 4602 return false 4603 } 4604 4605 // exitsyscallfast_reacquired is the exitsyscall path on which this G 4606 // has successfully reacquired the P it was running on before the 4607 // syscall. 4608 // 4609 //go:nosplit 4610 func exitsyscallfast_reacquired(trace traceLocker) { 4611 gp := getg() 4612 if gp.m.syscalltick != gp.m.p.ptr().syscalltick { 4613 if trace.ok() { 4614 // The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed). 4615 // traceGoSysBlock for this syscall was already emitted, 4616 // but here we effectively retake the p from the new syscall running on the same p. 4617 systemstack(func() { 4618 if goexperiment.ExecTracer2 { 4619 // In the experiment, we're stealing the P. It's treated 4620 // as if it temporarily stopped running. Then, start running. 4621 trace.ProcSteal(gp.m.p.ptr(), true) 4622 trace.ProcStart() 4623 } else { 4624 // Denote blocking of the new syscall. 4625 trace.GoSysBlock(gp.m.p.ptr()) 4626 // Denote completion of the current syscall. 4627 trace.GoSysExit(true) 4628 } 4629 }) 4630 } 4631 gp.m.p.ptr().syscalltick++ 4632 } 4633 } 4634 4635 func exitsyscallfast_pidle() bool { 4636 lock(&sched.lock) 4637 pp, _ := pidleget(0) 4638 if pp != nil && sched.sysmonwait.Load() { 4639 sched.sysmonwait.Store(false) 4640 notewakeup(&sched.sysmonnote) 4641 } 4642 unlock(&sched.lock) 4643 if pp != nil { 4644 acquirep(pp) 4645 return true 4646 } 4647 return false 4648 } 4649 4650 // exitsyscall slow path on g0. 4651 // Failed to acquire P, enqueue gp as runnable. 4652 // 4653 // Called via mcall, so gp is the calling g from this M. 4654 // 4655 //go:nowritebarrierrec 4656 func exitsyscall0(gp *g) { 4657 var trace traceLocker 4658 if goexperiment.ExecTracer2 { 4659 traceExitingSyscall() 4660 trace = traceAcquire() 4661 } 4662 casgstatus(gp, _Gsyscall, _Grunnable) 4663 if goexperiment.ExecTracer2 { 4664 traceExitedSyscall() 4665 if trace.ok() { 4666 // Write out syscall exit eagerly in the experiment. 4667 // 4668 // It's important that we write this *after* we know whether we 4669 // lost our P or not (determined by exitsyscallfast). 4670 trace.GoSysExit(true) 4671 traceRelease(trace) 4672 } 4673 } 4674 dropg() 4675 lock(&sched.lock) 4676 var pp *p 4677 if schedEnabled(gp) { 4678 pp, _ = pidleget(0) 4679 } 4680 var locked bool 4681 if pp == nil { 4682 globrunqput(gp) 4683 4684 // Below, we stoplockedm if gp is locked. globrunqput releases 4685 // ownership of gp, so we must check if gp is locked prior to 4686 // committing the release by unlocking sched.lock, otherwise we 4687 // could race with another M transitioning gp from unlocked to 4688 // locked. 4689 locked = gp.lockedm != 0 4690 } else if sched.sysmonwait.Load() { 4691 sched.sysmonwait.Store(false) 4692 notewakeup(&sched.sysmonnote) 4693 } 4694 unlock(&sched.lock) 4695 if pp != nil { 4696 acquirep(pp) 4697 execute(gp, false) // Never returns. 4698 } 4699 if locked { 4700 // Wait until another thread schedules gp and so m again. 4701 // 4702 // N.B. lockedm must be this M, as this g was running on this M 4703 // before entersyscall. 4704 stoplockedm() 4705 execute(gp, false) // Never returns. 4706 } 4707 stopm() 4708 schedule() // Never returns. 4709 } 4710 4711 // Called from syscall package before fork. 4712 // 4713 //go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork 4714 //go:nosplit 4715 func syscall_runtime_BeforeFork() { 4716 gp := getg().m.curg 4717 4718 // Block signals during a fork, so that the child does not run 4719 // a signal handler before exec if a signal is sent to the process 4720 // group. See issue #18600. 4721 gp.m.locks++ 4722 sigsave(&gp.m.sigmask) 4723 sigblock(false) 4724 4725 // This function is called before fork in syscall package. 4726 // Code between fork and exec must not allocate memory nor even try to grow stack. 4727 // Here we spoil g.stackguard0 to reliably detect any attempts to grow stack. 4728 // runtime_AfterFork will undo this in parent process, but not in child. 4729 gp.stackguard0 = stackFork 4730 } 4731 4732 // Called from syscall package after fork in parent. 4733 // 4734 //go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork 4735 //go:nosplit 4736 func syscall_runtime_AfterFork() { 4737 gp := getg().m.curg 4738 4739 // See the comments in beforefork. 4740 gp.stackguard0 = gp.stack.lo + stackGuard 4741 4742 msigrestore(gp.m.sigmask) 4743 4744 gp.m.locks-- 4745 } 4746 4747 // inForkedChild is true while manipulating signals in the child process. 4748 // This is used to avoid calling libc functions in case we are using vfork. 4749 var inForkedChild bool 4750 4751 // Called from syscall package after fork in child. 4752 // It resets non-sigignored signals to the default handler, and 4753 // restores the signal mask in preparation for the exec. 4754 // 4755 // Because this might be called during a vfork, and therefore may be 4756 // temporarily sharing address space with the parent process, this must 4757 // not change any global variables or calling into C code that may do so. 4758 // 4759 //go:linkname syscall_runtime_AfterForkInChild syscall.runtime_AfterForkInChild 4760 //go:nosplit 4761 //go:nowritebarrierrec 4762 func syscall_runtime_AfterForkInChild() { 4763 // It's OK to change the global variable inForkedChild here 4764 // because we are going to change it back. There is no race here, 4765 // because if we are sharing address space with the parent process, 4766 // then the parent process can not be running concurrently. 4767 inForkedChild = true 4768 4769 clearSignalHandlers() 4770 4771 // When we are the child we are the only thread running, 4772 // so we know that nothing else has changed gp.m.sigmask. 4773 msigrestore(getg().m.sigmask) 4774 4775 inForkedChild = false 4776 } 4777 4778 // pendingPreemptSignals is the number of preemption signals 4779 // that have been sent but not received. This is only used on Darwin. 4780 // For #41702. 4781 var pendingPreemptSignals atomic.Int32 4782 4783 // Called from syscall package before Exec. 4784 // 4785 //go:linkname syscall_runtime_BeforeExec syscall.runtime_BeforeExec 4786 func syscall_runtime_BeforeExec() { 4787 // Prevent thread creation during exec. 4788 execLock.lock() 4789 4790 // On Darwin, wait for all pending preemption signals to 4791 // be received. See issue #41702. 4792 if GOOS == "darwin" || GOOS == "ios" { 4793 for pendingPreemptSignals.Load() > 0 { 4794 osyield() 4795 } 4796 } 4797 } 4798 4799 // Called from syscall package after Exec. 4800 // 4801 //go:linkname syscall_runtime_AfterExec syscall.runtime_AfterExec 4802 func syscall_runtime_AfterExec() { 4803 execLock.unlock() 4804 } 4805 4806 // Allocate a new g, with a stack big enough for stacksize bytes. 4807 func malg(stacksize int32) *g { 4808 newg := new(g) 4809 if stacksize >= 0 { 4810 stacksize = round2(stackSystem + stacksize) 4811 systemstack(func() { 4812 newg.stack = stackalloc(uint32(stacksize)) 4813 }) 4814 newg.stackguard0 = newg.stack.lo + stackGuard 4815 newg.stackguard1 = ^uintptr(0) 4816 // Clear the bottom word of the stack. We record g 4817 // there on gsignal stack during VDSO on ARM and ARM64. 4818 *(*uintptr)(unsafe.Pointer(newg.stack.lo)) = 0 4819 } 4820 return newg 4821 } 4822 4823 // Create a new g running fn. 4824 // Put it on the queue of g's waiting to run. 4825 // The compiler turns a go statement into a call to this. 4826 func newproc(fn *funcval) { 4827 gp := getg() 4828 pc := getcallerpc() 4829 systemstack(func() { 4830 newg := newproc1(fn, gp, pc, false, waitReasonZero) 4831 4832 pp := getg().m.p.ptr() 4833 runqput(pp, newg, true) 4834 4835 if mainStarted { 4836 wakep() 4837 } 4838 }) 4839 } 4840 4841 // Create a new g in state _Grunnable (or _Gwaiting if parked is true), starting at fn. 4842 // callerpc is the address of the go statement that created this. The caller is responsible 4843 // for adding the new g to the scheduler. If parked is true, waitreason must be non-zero. 4844 func newproc1(fn *funcval, callergp *g, callerpc uintptr, parked bool, waitreason waitReason) *g { 4845 if fn == nil { 4846 fatal("go of nil func value") 4847 } 4848 4849 mp := acquirem() // disable preemption because we hold M and P in local vars. 4850 pp := mp.p.ptr() 4851 newg := gfget(pp) 4852 if newg == nil { 4853 newg = malg(stackMin) 4854 casgstatus(newg, _Gidle, _Gdead) 4855 allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. 4856 } 4857 if newg.stack.hi == 0 { 4858 throw("newproc1: newg missing stack") 4859 } 4860 4861 if readgstatus(newg) != _Gdead { 4862 throw("newproc1: new g is not Gdead") 4863 } 4864 4865 totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame 4866 totalSize = alignUp(totalSize, sys.StackAlign) 4867 sp := newg.stack.hi - totalSize 4868 if usesLR { 4869 // caller's LR 4870 *(*uintptr)(unsafe.Pointer(sp)) = 0 4871 prepGoExitFrame(sp) 4872 } 4873 if GOARCH == "arm64" { 4874 // caller's FP 4875 *(*uintptr)(unsafe.Pointer(sp - goarch.PtrSize)) = 0 4876 } 4877 4878 memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched)) 4879 newg.sched.sp = sp 4880 newg.stktopsp = sp 4881 newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function 4882 newg.sched.g = guintptr(unsafe.Pointer(newg)) 4883 gostartcallfn(&newg.sched, fn) 4884 newg.parentGoid = callergp.goid 4885 newg.gopc = callerpc 4886 newg.ancestors = saveAncestors(callergp) 4887 newg.startpc = fn.fn 4888 if isSystemGoroutine(newg, false) { 4889 sched.ngsys.Add(1) 4890 } else { 4891 // Only user goroutines inherit pprof labels. 4892 if mp.curg != nil { 4893 newg.labels = mp.curg.labels 4894 } 4895 if goroutineProfile.active { 4896 // A concurrent goroutine profile is running. It should include 4897 // exactly the set of goroutines that were alive when the goroutine 4898 // profiler first stopped the world. That does not include newg, so 4899 // mark it as not needing a profile before transitioning it from 4900 // _Gdead. 4901 newg.goroutineProfiled.Store(goroutineProfileSatisfied) 4902 } 4903 } 4904 // Track initial transition? 4905 newg.trackingSeq = uint8(cheaprand()) 4906 if newg.trackingSeq%gTrackingPeriod == 0 { 4907 newg.tracking = true 4908 } 4909 gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo)) 4910 4911 // Get a goid and switch to runnable. Make all this atomic to the tracer. 4912 trace := traceAcquire() 4913 var status uint32 = _Grunnable 4914 if parked { 4915 status = _Gwaiting 4916 newg.waitreason = waitreason 4917 } 4918 casgstatus(newg, _Gdead, status) 4919 if pp.goidcache == pp.goidcacheend { 4920 // Sched.goidgen is the last allocated id, 4921 // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. 4922 // At startup sched.goidgen=0, so main goroutine receives goid=1. 4923 pp.goidcache = sched.goidgen.Add(_GoidCacheBatch) 4924 pp.goidcache -= _GoidCacheBatch - 1 4925 pp.goidcacheend = pp.goidcache + _GoidCacheBatch 4926 } 4927 newg.goid = pp.goidcache 4928 pp.goidcache++ 4929 newg.trace.reset() 4930 if trace.ok() { 4931 trace.GoCreate(newg, newg.startpc, parked) 4932 traceRelease(trace) 4933 } 4934 4935 // Set up race context. 4936 if raceenabled { 4937 newg.racectx = racegostart(callerpc) 4938 newg.raceignore = 0 4939 if newg.labels != nil { 4940 // See note in proflabel.go on labelSync's role in synchronizing 4941 // with the reads in the signal handler. 4942 racereleasemergeg(newg, unsafe.Pointer(&labelSync)) 4943 } 4944 } 4945 releasem(mp) 4946 4947 return newg 4948 } 4949 4950 // saveAncestors copies previous ancestors of the given caller g and 4951 // includes info for the current caller into a new set of tracebacks for 4952 // a g being created. 4953 func saveAncestors(callergp *g) *[]ancestorInfo { 4954 // Copy all prior info, except for the root goroutine (goid 0). 4955 if debug.tracebackancestors <= 0 || callergp.goid == 0 { 4956 return nil 4957 } 4958 var callerAncestors []ancestorInfo 4959 if callergp.ancestors != nil { 4960 callerAncestors = *callergp.ancestors 4961 } 4962 n := int32(len(callerAncestors)) + 1 4963 if n > debug.tracebackancestors { 4964 n = debug.tracebackancestors 4965 } 4966 ancestors := make([]ancestorInfo, n) 4967 copy(ancestors[1:], callerAncestors) 4968 4969 var pcs [tracebackInnerFrames]uintptr 4970 npcs := gcallers(callergp, 0, pcs[:]) 4971 ipcs := make([]uintptr, npcs) 4972 copy(ipcs, pcs[:]) 4973 ancestors[0] = ancestorInfo{ 4974 pcs: ipcs, 4975 goid: callergp.goid, 4976 gopc: callergp.gopc, 4977 } 4978 4979 ancestorsp := new([]ancestorInfo) 4980 *ancestorsp = ancestors 4981 return ancestorsp 4982 } 4983 4984 // Put on gfree list. 4985 // If local list is too long, transfer a batch to the global list. 4986 func gfput(pp *p, gp *g) { 4987 if readgstatus(gp) != _Gdead { 4988 throw("gfput: bad status (not Gdead)") 4989 } 4990 4991 stksize := gp.stack.hi - gp.stack.lo 4992 4993 if stksize != uintptr(startingStackSize) { 4994 // non-standard stack size - free it. 4995 stackfree(gp.stack) 4996 gp.stack.lo = 0 4997 gp.stack.hi = 0 4998 gp.stackguard0 = 0 4999 } 5000 5001 pp.gFree.push(gp) 5002 pp.gFree.n++ 5003 if pp.gFree.n >= 64 { 5004 var ( 5005 inc int32 5006 stackQ gQueue 5007 noStackQ gQueue 5008 ) 5009 for pp.gFree.n >= 32 { 5010 gp := pp.gFree.pop() 5011 pp.gFree.n-- 5012 if gp.stack.lo == 0 { 5013 noStackQ.push(gp) 5014 } else { 5015 stackQ.push(gp) 5016 } 5017 inc++ 5018 } 5019 lock(&sched.gFree.lock) 5020 sched.gFree.noStack.pushAll(noStackQ) 5021 sched.gFree.stack.pushAll(stackQ) 5022 sched.gFree.n += inc 5023 unlock(&sched.gFree.lock) 5024 } 5025 } 5026 5027 // Get from gfree list. 5028 // If local list is empty, grab a batch from global list. 5029 func gfget(pp *p) *g { 5030 retry: 5031 if pp.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) { 5032 lock(&sched.gFree.lock) 5033 // Move a batch of free Gs to the P. 5034 for pp.gFree.n < 32 { 5035 // Prefer Gs with stacks. 5036 gp := sched.gFree.stack.pop() 5037 if gp == nil { 5038 gp = sched.gFree.noStack.pop() 5039 if gp == nil { 5040 break 5041 } 5042 } 5043 sched.gFree.n-- 5044 pp.gFree.push(gp) 5045 pp.gFree.n++ 5046 } 5047 unlock(&sched.gFree.lock) 5048 goto retry 5049 } 5050 gp := pp.gFree.pop() 5051 if gp == nil { 5052 return nil 5053 } 5054 pp.gFree.n-- 5055 if gp.stack.lo != 0 && gp.stack.hi-gp.stack.lo != uintptr(startingStackSize) { 5056 // Deallocate old stack. We kept it in gfput because it was the 5057 // right size when the goroutine was put on the free list, but 5058 // the right size has changed since then. 5059 systemstack(func() { 5060 stackfree(gp.stack) 5061 gp.stack.lo = 0 5062 gp.stack.hi = 0 5063 gp.stackguard0 = 0 5064 }) 5065 } 5066 if gp.stack.lo == 0 { 5067 // Stack was deallocated in gfput or just above. Allocate a new one. 5068 systemstack(func() { 5069 gp.stack = stackalloc(startingStackSize) 5070 }) 5071 gp.stackguard0 = gp.stack.lo + stackGuard 5072 } else { 5073 if raceenabled { 5074 racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5075 } 5076 if msanenabled { 5077 msanmalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5078 } 5079 if asanenabled { 5080 asanunpoison(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5081 } 5082 } 5083 return gp 5084 } 5085 5086 // Purge all cached G's from gfree list to the global list. 5087 func gfpurge(pp *p) { 5088 var ( 5089 inc int32 5090 stackQ gQueue 5091 noStackQ gQueue 5092 ) 5093 for !pp.gFree.empty() { 5094 gp := pp.gFree.pop() 5095 pp.gFree.n-- 5096 if gp.stack.lo == 0 { 5097 noStackQ.push(gp) 5098 } else { 5099 stackQ.push(gp) 5100 } 5101 inc++ 5102 } 5103 lock(&sched.gFree.lock) 5104 sched.gFree.noStack.pushAll(noStackQ) 5105 sched.gFree.stack.pushAll(stackQ) 5106 sched.gFree.n += inc 5107 unlock(&sched.gFree.lock) 5108 } 5109 5110 // Breakpoint executes a breakpoint trap. 5111 func Breakpoint() { 5112 breakpoint() 5113 } 5114 5115 // dolockOSThread is called by LockOSThread and lockOSThread below 5116 // after they modify m.locked. Do not allow preemption during this call, 5117 // or else the m might be different in this function than in the caller. 5118 // 5119 //go:nosplit 5120 func dolockOSThread() { 5121 if GOARCH == "wasm" { 5122 return // no threads on wasm yet 5123 } 5124 gp := getg() 5125 gp.m.lockedg.set(gp) 5126 gp.lockedm.set(gp.m) 5127 } 5128 5129 // LockOSThread wires the calling goroutine to its current operating system thread. 5130 // The calling goroutine will always execute in that thread, 5131 // and no other goroutine will execute in it, 5132 // until the calling goroutine has made as many calls to 5133 // [UnlockOSThread] as to LockOSThread. 5134 // If the calling goroutine exits without unlocking the thread, 5135 // the thread will be terminated. 5136 // 5137 // All init functions are run on the startup thread. Calling LockOSThread 5138 // from an init function will cause the main function to be invoked on 5139 // that thread. 5140 // 5141 // A goroutine should call LockOSThread before calling OS services or 5142 // non-Go library functions that depend on per-thread state. 5143 // 5144 //go:nosplit 5145 func LockOSThread() { 5146 if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" { 5147 // If we need to start a new thread from the locked 5148 // thread, we need the template thread. Start it now 5149 // while we're in a known-good state. 5150 startTemplateThread() 5151 } 5152 gp := getg() 5153 gp.m.lockedExt++ 5154 if gp.m.lockedExt == 0 { 5155 gp.m.lockedExt-- 5156 panic("LockOSThread nesting overflow") 5157 } 5158 dolockOSThread() 5159 } 5160 5161 //go:nosplit 5162 func lockOSThread() { 5163 getg().m.lockedInt++ 5164 dolockOSThread() 5165 } 5166 5167 // dounlockOSThread is called by UnlockOSThread and unlockOSThread below 5168 // after they update m->locked. Do not allow preemption during this call, 5169 // or else the m might be in different in this function than in the caller. 5170 // 5171 //go:nosplit 5172 func dounlockOSThread() { 5173 if GOARCH == "wasm" { 5174 return // no threads on wasm yet 5175 } 5176 gp := getg() 5177 if gp.m.lockedInt != 0 || gp.m.lockedExt != 0 { 5178 return 5179 } 5180 gp.m.lockedg = 0 5181 gp.lockedm = 0 5182 } 5183 5184 // UnlockOSThread undoes an earlier call to LockOSThread. 5185 // If this drops the number of active LockOSThread calls on the 5186 // calling goroutine to zero, it unwires the calling goroutine from 5187 // its fixed operating system thread. 5188 // If there are no active LockOSThread calls, this is a no-op. 5189 // 5190 // Before calling UnlockOSThread, the caller must ensure that the OS 5191 // thread is suitable for running other goroutines. If the caller made 5192 // any permanent changes to the state of the thread that would affect 5193 // other goroutines, it should not call this function and thus leave 5194 // the goroutine locked to the OS thread until the goroutine (and 5195 // hence the thread) exits. 5196 // 5197 //go:nosplit 5198 func UnlockOSThread() { 5199 gp := getg() 5200 if gp.m.lockedExt == 0 { 5201 return 5202 } 5203 gp.m.lockedExt-- 5204 dounlockOSThread() 5205 } 5206 5207 //go:nosplit 5208 func unlockOSThread() { 5209 gp := getg() 5210 if gp.m.lockedInt == 0 { 5211 systemstack(badunlockosthread) 5212 } 5213 gp.m.lockedInt-- 5214 dounlockOSThread() 5215 } 5216 5217 func badunlockosthread() { 5218 throw("runtime: internal error: misuse of lockOSThread/unlockOSThread") 5219 } 5220 5221 func gcount() int32 { 5222 n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.n - sched.ngsys.Load() 5223 for _, pp := range allp { 5224 n -= pp.gFree.n 5225 } 5226 5227 // All these variables can be changed concurrently, so the result can be inconsistent. 5228 // But at least the current goroutine is running. 5229 if n < 1 { 5230 n = 1 5231 } 5232 return n 5233 } 5234 5235 func mcount() int32 { 5236 return int32(sched.mnext - sched.nmfreed) 5237 } 5238 5239 var prof struct { 5240 signalLock atomic.Uint32 5241 5242 // Must hold signalLock to write. Reads may be lock-free, but 5243 // signalLock should be taken to synchronize with changes. 5244 hz atomic.Int32 5245 } 5246 5247 func _System() { _System() } 5248 func _ExternalCode() { _ExternalCode() } 5249 func _LostExternalCode() { _LostExternalCode() } 5250 func _GC() { _GC() } 5251 func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() } 5252 func _LostContendedRuntimeLock() { _LostContendedRuntimeLock() } 5253 func _VDSO() { _VDSO() } 5254 5255 // Called if we receive a SIGPROF signal. 5256 // Called by the signal handler, may run during STW. 5257 // 5258 //go:nowritebarrierrec 5259 func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { 5260 if prof.hz.Load() == 0 { 5261 return 5262 } 5263 5264 // If mp.profilehz is 0, then profiling is not enabled for this thread. 5265 // We must check this to avoid a deadlock between setcpuprofilerate 5266 // and the call to cpuprof.add, below. 5267 if mp != nil && mp.profilehz == 0 { 5268 return 5269 } 5270 5271 // On mips{,le}/arm, 64bit atomics are emulated with spinlocks, in 5272 // runtime/internal/atomic. If SIGPROF arrives while the program is inside 5273 // the critical section, it creates a deadlock (when writing the sample). 5274 // As a workaround, create a counter of SIGPROFs while in critical section 5275 // to store the count, and pass it to sigprof.add() later when SIGPROF is 5276 // received from somewhere else (with _LostSIGPROFDuringAtomic64 as pc). 5277 if GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm" { 5278 if f := findfunc(pc); f.valid() { 5279 if hasPrefix(funcname(f), "runtime/internal/atomic") { 5280 cpuprof.lostAtomic++ 5281 return 5282 } 5283 } 5284 if GOARCH == "arm" && goarm < 7 && GOOS == "linux" && pc&0xffff0000 == 0xffff0000 { 5285 // runtime/internal/atomic functions call into kernel 5286 // helpers on arm < 7. See 5287 // runtime/internal/atomic/sys_linux_arm.s. 5288 cpuprof.lostAtomic++ 5289 return 5290 } 5291 } 5292 5293 // Profiling runs concurrently with GC, so it must not allocate. 5294 // Set a trap in case the code does allocate. 5295 // Note that on windows, one thread takes profiles of all the 5296 // other threads, so mp is usually not getg().m. 5297 // In fact mp may not even be stopped. 5298 // See golang.org/issue/17165. 5299 getg().m.mallocing++ 5300 5301 var u unwinder 5302 var stk [maxCPUProfStack]uintptr 5303 n := 0 5304 if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 { 5305 cgoOff := 0 5306 // Check cgoCallersUse to make sure that we are not 5307 // interrupting other code that is fiddling with 5308 // cgoCallers. We are running in a signal handler 5309 // with all signals blocked, so we don't have to worry 5310 // about any other code interrupting us. 5311 if mp.cgoCallersUse.Load() == 0 && mp.cgoCallers != nil && mp.cgoCallers[0] != 0 { 5312 for cgoOff < len(mp.cgoCallers) && mp.cgoCallers[cgoOff] != 0 { 5313 cgoOff++ 5314 } 5315 n += copy(stk[:], mp.cgoCallers[:cgoOff]) 5316 mp.cgoCallers[0] = 0 5317 } 5318 5319 // Collect Go stack that leads to the cgo call. 5320 u.initAt(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, unwindSilentErrors) 5321 } else if usesLibcall() && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 { 5322 // Libcall, i.e. runtime syscall on windows. 5323 // Collect Go stack that leads to the call. 5324 u.initAt(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), unwindSilentErrors) 5325 } else if mp != nil && mp.vdsoSP != 0 { 5326 // VDSO call, e.g. nanotime1 on Linux. 5327 // Collect Go stack that leads to the call. 5328 u.initAt(mp.vdsoPC, mp.vdsoSP, 0, gp, unwindSilentErrors|unwindJumpStack) 5329 } else { 5330 u.initAt(pc, sp, lr, gp, unwindSilentErrors|unwindTrap|unwindJumpStack) 5331 } 5332 n += tracebackPCs(&u, 0, stk[n:]) 5333 5334 if n <= 0 { 5335 // Normal traceback is impossible or has failed. 5336 // Account it against abstract "System" or "GC". 5337 n = 2 5338 if inVDSOPage(pc) { 5339 pc = abi.FuncPCABIInternal(_VDSO) + sys.PCQuantum 5340 } else if pc > firstmoduledata.etext { 5341 // "ExternalCode" is better than "etext". 5342 pc = abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum 5343 } 5344 stk[0] = pc 5345 if mp.preemptoff != "" { 5346 stk[1] = abi.FuncPCABIInternal(_GC) + sys.PCQuantum 5347 } else { 5348 stk[1] = abi.FuncPCABIInternal(_System) + sys.PCQuantum 5349 } 5350 } 5351 5352 if prof.hz.Load() != 0 { 5353 // Note: it can happen on Windows that we interrupted a system thread 5354 // with no g, so gp could nil. The other nil checks are done out of 5355 // caution, but not expected to be nil in practice. 5356 var tagPtr *unsafe.Pointer 5357 if gp != nil && gp.m != nil && gp.m.curg != nil { 5358 tagPtr = &gp.m.curg.labels 5359 } 5360 cpuprof.add(tagPtr, stk[:n]) 5361 5362 gprof := gp 5363 var mp *m 5364 var pp *p 5365 if gp != nil && gp.m != nil { 5366 if gp.m.curg != nil { 5367 gprof = gp.m.curg 5368 } 5369 mp = gp.m 5370 pp = gp.m.p.ptr() 5371 } 5372 traceCPUSample(gprof, mp, pp, stk[:n]) 5373 } 5374 getg().m.mallocing-- 5375 } 5376 5377 // setcpuprofilerate sets the CPU profiling rate to hz times per second. 5378 // If hz <= 0, setcpuprofilerate turns off CPU profiling. 5379 func setcpuprofilerate(hz int32) { 5380 // Force sane arguments. 5381 if hz < 0 { 5382 hz = 0 5383 } 5384 5385 // Disable preemption, otherwise we can be rescheduled to another thread 5386 // that has profiling enabled. 5387 gp := getg() 5388 gp.m.locks++ 5389 5390 // Stop profiler on this thread so that it is safe to lock prof. 5391 // if a profiling signal came in while we had prof locked, 5392 // it would deadlock. 5393 setThreadCPUProfiler(0) 5394 5395 for !prof.signalLock.CompareAndSwap(0, 1) { 5396 osyield() 5397 } 5398 if prof.hz.Load() != hz { 5399 setProcessCPUProfiler(hz) 5400 prof.hz.Store(hz) 5401 } 5402 prof.signalLock.Store(0) 5403 5404 lock(&sched.lock) 5405 sched.profilehz = hz 5406 unlock(&sched.lock) 5407 5408 if hz != 0 { 5409 setThreadCPUProfiler(hz) 5410 } 5411 5412 gp.m.locks-- 5413 } 5414 5415 // init initializes pp, which may be a freshly allocated p or a 5416 // previously destroyed p, and transitions it to status _Pgcstop. 5417 func (pp *p) init(id int32) { 5418 pp.id = id 5419 pp.status = _Pgcstop 5420 pp.sudogcache = pp.sudogbuf[:0] 5421 pp.deferpool = pp.deferpoolbuf[:0] 5422 pp.wbBuf.reset() 5423 if pp.mcache == nil { 5424 if id == 0 { 5425 if mcache0 == nil { 5426 throw("missing mcache?") 5427 } 5428 // Use the bootstrap mcache0. Only one P will get 5429 // mcache0: the one with ID 0. 5430 pp.mcache = mcache0 5431 } else { 5432 pp.mcache = allocmcache() 5433 } 5434 } 5435 if raceenabled && pp.raceprocctx == 0 { 5436 if id == 0 { 5437 pp.raceprocctx = raceprocctx0 5438 raceprocctx0 = 0 // bootstrap 5439 } else { 5440 pp.raceprocctx = raceproccreate() 5441 } 5442 } 5443 lockInit(&pp.timers.mu, lockRankTimers) 5444 5445 // This P may get timers when it starts running. Set the mask here 5446 // since the P may not go through pidleget (notably P 0 on startup). 5447 timerpMask.set(id) 5448 // Similarly, we may not go through pidleget before this P starts 5449 // running if it is P 0 on startup. 5450 idlepMask.clear(id) 5451 } 5452 5453 // destroy releases all of the resources associated with pp and 5454 // transitions it to status _Pdead. 5455 // 5456 // sched.lock must be held and the world must be stopped. 5457 func (pp *p) destroy() { 5458 assertLockHeld(&sched.lock) 5459 assertWorldStopped() 5460 5461 // Move all runnable goroutines to the global queue 5462 for pp.runqhead != pp.runqtail { 5463 // Pop from tail of local queue 5464 pp.runqtail-- 5465 gp := pp.runq[pp.runqtail%uint32(len(pp.runq))].ptr() 5466 // Push onto head of global queue 5467 globrunqputhead(gp) 5468 } 5469 if pp.runnext != 0 { 5470 globrunqputhead(pp.runnext.ptr()) 5471 pp.runnext = 0 5472 } 5473 5474 // Move all timers to the local P. 5475 getg().m.p.ptr().timers.take(&pp.timers) 5476 5477 // Flush p's write barrier buffer. 5478 if gcphase != _GCoff { 5479 wbBufFlush1(pp) 5480 pp.gcw.dispose() 5481 } 5482 for i := range pp.sudogbuf { 5483 pp.sudogbuf[i] = nil 5484 } 5485 pp.sudogcache = pp.sudogbuf[:0] 5486 pp.pinnerCache = nil 5487 for j := range pp.deferpoolbuf { 5488 pp.deferpoolbuf[j] = nil 5489 } 5490 pp.deferpool = pp.deferpoolbuf[:0] 5491 systemstack(func() { 5492 for i := 0; i < pp.mspancache.len; i++ { 5493 // Safe to call since the world is stopped. 5494 mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i])) 5495 } 5496 pp.mspancache.len = 0 5497 lock(&mheap_.lock) 5498 pp.pcache.flush(&mheap_.pages) 5499 unlock(&mheap_.lock) 5500 }) 5501 freemcache(pp.mcache) 5502 pp.mcache = nil 5503 gfpurge(pp) 5504 traceProcFree(pp) 5505 if raceenabled { 5506 if pp.timers.raceCtx != 0 { 5507 // The race detector code uses a callback to fetch 5508 // the proc context, so arrange for that callback 5509 // to see the right thing. 5510 // This hack only works because we are the only 5511 // thread running. 5512 mp := getg().m 5513 phold := mp.p.ptr() 5514 mp.p.set(pp) 5515 5516 racectxend(pp.timers.raceCtx) 5517 pp.timers.raceCtx = 0 5518 5519 mp.p.set(phold) 5520 } 5521 raceprocdestroy(pp.raceprocctx) 5522 pp.raceprocctx = 0 5523 } 5524 pp.gcAssistTime = 0 5525 pp.status = _Pdead 5526 } 5527 5528 // Change number of processors. 5529 // 5530 // sched.lock must be held, and the world must be stopped. 5531 // 5532 // gcworkbufs must not be being modified by either the GC or the write barrier 5533 // code, so the GC must not be running if the number of Ps actually changes. 5534 // 5535 // Returns list of Ps with local work, they need to be scheduled by the caller. 5536 func procresize(nprocs int32) *p { 5537 assertLockHeld(&sched.lock) 5538 assertWorldStopped() 5539 5540 old := gomaxprocs 5541 if old < 0 || nprocs <= 0 { 5542 throw("procresize: invalid arg") 5543 } 5544 trace := traceAcquire() 5545 if trace.ok() { 5546 trace.Gomaxprocs(nprocs) 5547 traceRelease(trace) 5548 } 5549 5550 // update statistics 5551 now := nanotime() 5552 if sched.procresizetime != 0 { 5553 sched.totaltime += int64(old) * (now - sched.procresizetime) 5554 } 5555 sched.procresizetime = now 5556 5557 maskWords := (nprocs + 31) / 32 5558 5559 // Grow allp if necessary. 5560 if nprocs > int32(len(allp)) { 5561 // Synchronize with retake, which could be running 5562 // concurrently since it doesn't run on a P. 5563 lock(&allpLock) 5564 if nprocs <= int32(cap(allp)) { 5565 allp = allp[:nprocs] 5566 } else { 5567 nallp := make([]*p, nprocs) 5568 // Copy everything up to allp's cap so we 5569 // never lose old allocated Ps. 5570 copy(nallp, allp[:cap(allp)]) 5571 allp = nallp 5572 } 5573 5574 if maskWords <= int32(cap(idlepMask)) { 5575 idlepMask = idlepMask[:maskWords] 5576 timerpMask = timerpMask[:maskWords] 5577 } else { 5578 nidlepMask := make([]uint32, maskWords) 5579 // No need to copy beyond len, old Ps are irrelevant. 5580 copy(nidlepMask, idlepMask) 5581 idlepMask = nidlepMask 5582 5583 ntimerpMask := make([]uint32, maskWords) 5584 copy(ntimerpMask, timerpMask) 5585 timerpMask = ntimerpMask 5586 } 5587 unlock(&allpLock) 5588 } 5589 5590 // initialize new P's 5591 for i := old; i < nprocs; i++ { 5592 pp := allp[i] 5593 if pp == nil { 5594 pp = new(p) 5595 } 5596 pp.init(i) 5597 atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) 5598 } 5599 5600 gp := getg() 5601 if gp.m.p != 0 && gp.m.p.ptr().id < nprocs { 5602 // continue to use the current P 5603 gp.m.p.ptr().status = _Prunning 5604 gp.m.p.ptr().mcache.prepareForSweep() 5605 } else { 5606 // release the current P and acquire allp[0]. 5607 // 5608 // We must do this before destroying our current P 5609 // because p.destroy itself has write barriers, so we 5610 // need to do that from a valid P. 5611 if gp.m.p != 0 { 5612 trace := traceAcquire() 5613 if trace.ok() { 5614 // Pretend that we were descheduled 5615 // and then scheduled again to keep 5616 // the trace sane. 5617 trace.GoSched() 5618 trace.ProcStop(gp.m.p.ptr()) 5619 traceRelease(trace) 5620 } 5621 gp.m.p.ptr().m = 0 5622 } 5623 gp.m.p = 0 5624 pp := allp[0] 5625 pp.m = 0 5626 pp.status = _Pidle 5627 acquirep(pp) 5628 trace := traceAcquire() 5629 if trace.ok() { 5630 trace.GoStart() 5631 traceRelease(trace) 5632 } 5633 } 5634 5635 // g.m.p is now set, so we no longer need mcache0 for bootstrapping. 5636 mcache0 = nil 5637 5638 // release resources from unused P's 5639 for i := nprocs; i < old; i++ { 5640 pp := allp[i] 5641 pp.destroy() 5642 // can't free P itself because it can be referenced by an M in syscall 5643 } 5644 5645 // Trim allp. 5646 if int32(len(allp)) != nprocs { 5647 lock(&allpLock) 5648 allp = allp[:nprocs] 5649 idlepMask = idlepMask[:maskWords] 5650 timerpMask = timerpMask[:maskWords] 5651 unlock(&allpLock) 5652 } 5653 5654 var runnablePs *p 5655 for i := nprocs - 1; i >= 0; i-- { 5656 pp := allp[i] 5657 if gp.m.p.ptr() == pp { 5658 continue 5659 } 5660 pp.status = _Pidle 5661 if runqempty(pp) { 5662 pidleput(pp, now) 5663 } else { 5664 pp.m.set(mget()) 5665 pp.link.set(runnablePs) 5666 runnablePs = pp 5667 } 5668 } 5669 stealOrder.reset(uint32(nprocs)) 5670 var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 5671 atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs)) 5672 if old != nprocs { 5673 // Notify the limiter that the amount of procs has changed. 5674 gcCPULimiter.resetCapacity(now, nprocs) 5675 } 5676 return runnablePs 5677 } 5678 5679 // Associate p and the current m. 5680 // 5681 // This function is allowed to have write barriers even if the caller 5682 // isn't because it immediately acquires pp. 5683 // 5684 //go:yeswritebarrierrec 5685 func acquirep(pp *p) { 5686 // Do the part that isn't allowed to have write barriers. 5687 wirep(pp) 5688 5689 // Have p; write barriers now allowed. 5690 5691 // Perform deferred mcache flush before this P can allocate 5692 // from a potentially stale mcache. 5693 pp.mcache.prepareForSweep() 5694 5695 trace := traceAcquire() 5696 if trace.ok() { 5697 trace.ProcStart() 5698 traceRelease(trace) 5699 } 5700 } 5701 5702 // wirep is the first step of acquirep, which actually associates the 5703 // current M to pp. This is broken out so we can disallow write 5704 // barriers for this part, since we don't yet have a P. 5705 // 5706 //go:nowritebarrierrec 5707 //go:nosplit 5708 func wirep(pp *p) { 5709 gp := getg() 5710 5711 if gp.m.p != 0 { 5712 // Call on the systemstack to avoid a nosplit overflow build failure 5713 // on some platforms when built with -N -l. See #64113. 5714 systemstack(func() { 5715 throw("wirep: already in go") 5716 }) 5717 } 5718 if pp.m != 0 || pp.status != _Pidle { 5719 // Call on the systemstack to avoid a nosplit overflow build failure 5720 // on some platforms when built with -N -l. See #64113. 5721 systemstack(func() { 5722 id := int64(0) 5723 if pp.m != 0 { 5724 id = pp.m.ptr().id 5725 } 5726 print("wirep: p->m=", pp.m, "(", id, ") p->status=", pp.status, "\n") 5727 throw("wirep: invalid p state") 5728 }) 5729 } 5730 gp.m.p.set(pp) 5731 pp.m.set(gp.m) 5732 pp.status = _Prunning 5733 } 5734 5735 // Disassociate p and the current m. 5736 func releasep() *p { 5737 trace := traceAcquire() 5738 if trace.ok() { 5739 trace.ProcStop(getg().m.p.ptr()) 5740 traceRelease(trace) 5741 } 5742 return releasepNoTrace() 5743 } 5744 5745 // Disassociate p and the current m without tracing an event. 5746 func releasepNoTrace() *p { 5747 gp := getg() 5748 5749 if gp.m.p == 0 { 5750 throw("releasep: invalid arg") 5751 } 5752 pp := gp.m.p.ptr() 5753 if pp.m.ptr() != gp.m || pp.status != _Prunning { 5754 print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n") 5755 throw("releasep: invalid p state") 5756 } 5757 gp.m.p = 0 5758 pp.m = 0 5759 pp.status = _Pidle 5760 return pp 5761 } 5762 5763 func incidlelocked(v int32) { 5764 lock(&sched.lock) 5765 sched.nmidlelocked += v 5766 if v > 0 { 5767 checkdead() 5768 } 5769 unlock(&sched.lock) 5770 } 5771 5772 // Check for deadlock situation. 5773 // The check is based on number of running M's, if 0 -> deadlock. 5774 // sched.lock must be held. 5775 func checkdead() { 5776 assertLockHeld(&sched.lock) 5777 5778 // For -buildmode=c-shared or -buildmode=c-archive it's OK if 5779 // there are no running goroutines. The calling program is 5780 // assumed to be running. 5781 if islibrary || isarchive { 5782 return 5783 } 5784 5785 // If we are dying because of a signal caught on an already idle thread, 5786 // freezetheworld will cause all running threads to block. 5787 // And runtime will essentially enter into deadlock state, 5788 // except that there is a thread that will call exit soon. 5789 if panicking.Load() > 0 { 5790 return 5791 } 5792 5793 // If we are not running under cgo, but we have an extra M then account 5794 // for it. (It is possible to have an extra M on Windows without cgo to 5795 // accommodate callbacks created by syscall.NewCallback. See issue #6751 5796 // for details.) 5797 var run0 int32 5798 if !iscgo && cgoHasExtraM && extraMLength.Load() > 0 { 5799 run0 = 1 5800 } 5801 5802 run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys 5803 if run > run0 { 5804 return 5805 } 5806 if run < 0 { 5807 print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n") 5808 unlock(&sched.lock) 5809 throw("checkdead: inconsistent counts") 5810 } 5811 5812 grunning := 0 5813 forEachG(func(gp *g) { 5814 if isSystemGoroutine(gp, false) { 5815 return 5816 } 5817 s := readgstatus(gp) 5818 switch s &^ _Gscan { 5819 case _Gwaiting, 5820 _Gpreempted: 5821 grunning++ 5822 case _Grunnable, 5823 _Grunning, 5824 _Gsyscall: 5825 print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n") 5826 unlock(&sched.lock) 5827 throw("checkdead: runnable g") 5828 } 5829 }) 5830 if grunning == 0 { // possible if main goroutine calls runtime·Goexit() 5831 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 5832 fatal("no goroutines (main called runtime.Goexit) - deadlock!") 5833 } 5834 5835 // Maybe jump time forward for playground. 5836 if faketime != 0 { 5837 if when := timeSleepUntil(); when < maxWhen { 5838 faketime = when 5839 5840 // Start an M to steal the timer. 5841 pp, _ := pidleget(faketime) 5842 if pp == nil { 5843 // There should always be a free P since 5844 // nothing is running. 5845 unlock(&sched.lock) 5846 throw("checkdead: no p for timer") 5847 } 5848 mp := mget() 5849 if mp == nil { 5850 // There should always be a free M since 5851 // nothing is running. 5852 unlock(&sched.lock) 5853 throw("checkdead: no m for timer") 5854 } 5855 // M must be spinning to steal. We set this to be 5856 // explicit, but since this is the only M it would 5857 // become spinning on its own anyways. 5858 sched.nmspinning.Add(1) 5859 mp.spinning = true 5860 mp.nextp.set(pp) 5861 notewakeup(&mp.park) 5862 return 5863 } 5864 } 5865 5866 // There are no goroutines running, so we can look at the P's. 5867 for _, pp := range allp { 5868 if len(pp.timers.heap) > 0 { 5869 return 5870 } 5871 } 5872 5873 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 5874 fatal("all goroutines are asleep - deadlock!") 5875 } 5876 5877 // forcegcperiod is the maximum time in nanoseconds between garbage 5878 // collections. If we go this long without a garbage collection, one 5879 // is forced to run. 5880 // 5881 // This is a variable for testing purposes. It normally doesn't change. 5882 var forcegcperiod int64 = 2 * 60 * 1e9 5883 5884 // needSysmonWorkaround is true if the workaround for 5885 // golang.org/issue/42515 is needed on NetBSD. 5886 var needSysmonWorkaround bool = false 5887 5888 // haveSysmon indicates whether there is sysmon thread support. 5889 // 5890 // No threads on wasm yet, so no sysmon. 5891 const haveSysmon = GOARCH != "wasm" 5892 5893 // Always runs without a P, so write barriers are not allowed. 5894 // 5895 //go:nowritebarrierrec 5896 func sysmon() { 5897 lock(&sched.lock) 5898 sched.nmsys++ 5899 checkdead() 5900 unlock(&sched.lock) 5901 5902 lasttrace := int64(0) 5903 idle := 0 // how many cycles in succession we had not wokeup somebody 5904 delay := uint32(0) 5905 5906 for { 5907 if idle == 0 { // start with 20us sleep... 5908 delay = 20 5909 } else if idle > 50 { // start doubling the sleep after 1ms... 5910 delay *= 2 5911 } 5912 if delay > 10*1000 { // up to 10ms 5913 delay = 10 * 1000 5914 } 5915 usleep(delay) 5916 5917 // sysmon should not enter deep sleep if schedtrace is enabled so that 5918 // it can print that information at the right time. 5919 // 5920 // It should also not enter deep sleep if there are any active P's so 5921 // that it can retake P's from syscalls, preempt long running G's, and 5922 // poll the network if all P's are busy for long stretches. 5923 // 5924 // It should wakeup from deep sleep if any P's become active either due 5925 // to exiting a syscall or waking up due to a timer expiring so that it 5926 // can resume performing those duties. If it wakes from a syscall it 5927 // resets idle and delay as a bet that since it had retaken a P from a 5928 // syscall before, it may need to do it again shortly after the 5929 // application starts work again. It does not reset idle when waking 5930 // from a timer to avoid adding system load to applications that spend 5931 // most of their time sleeping. 5932 now := nanotime() 5933 if debug.schedtrace <= 0 && (sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs) { 5934 lock(&sched.lock) 5935 if sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs { 5936 syscallWake := false 5937 next := timeSleepUntil() 5938 if next > now { 5939 sched.sysmonwait.Store(true) 5940 unlock(&sched.lock) 5941 // Make wake-up period small enough 5942 // for the sampling to be correct. 5943 sleep := forcegcperiod / 2 5944 if next-now < sleep { 5945 sleep = next - now 5946 } 5947 shouldRelax := sleep >= osRelaxMinNS 5948 if shouldRelax { 5949 osRelax(true) 5950 } 5951 syscallWake = notetsleep(&sched.sysmonnote, sleep) 5952 if shouldRelax { 5953 osRelax(false) 5954 } 5955 lock(&sched.lock) 5956 sched.sysmonwait.Store(false) 5957 noteclear(&sched.sysmonnote) 5958 } 5959 if syscallWake { 5960 idle = 0 5961 delay = 20 5962 } 5963 } 5964 unlock(&sched.lock) 5965 } 5966 5967 lock(&sched.sysmonlock) 5968 // Update now in case we blocked on sysmonnote or spent a long time 5969 // blocked on schedlock or sysmonlock above. 5970 now = nanotime() 5971 5972 // trigger libc interceptors if needed 5973 if *cgo_yield != nil { 5974 asmcgocall(*cgo_yield, nil) 5975 } 5976 // poll network if not polled for more than 10ms 5977 lastpoll := sched.lastpoll.Load() 5978 if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now { 5979 sched.lastpoll.CompareAndSwap(lastpoll, now) 5980 list, delta := netpoll(0) // non-blocking - returns list of goroutines 5981 if !list.empty() { 5982 // Need to decrement number of idle locked M's 5983 // (pretending that one more is running) before injectglist. 5984 // Otherwise it can lead to the following situation: 5985 // injectglist grabs all P's but before it starts M's to run the P's, 5986 // another M returns from syscall, finishes running its G, 5987 // observes that there is no work to do and no other running M's 5988 // and reports deadlock. 5989 incidlelocked(-1) 5990 injectglist(&list) 5991 incidlelocked(1) 5992 netpollAdjustWaiters(delta) 5993 } 5994 } 5995 if GOOS == "netbsd" && needSysmonWorkaround { 5996 // netpoll is responsible for waiting for timer 5997 // expiration, so we typically don't have to worry 5998 // about starting an M to service timers. (Note that 5999 // sleep for timeSleepUntil above simply ensures sysmon 6000 // starts running again when that timer expiration may 6001 // cause Go code to run again). 6002 // 6003 // However, netbsd has a kernel bug that sometimes 6004 // misses netpollBreak wake-ups, which can lead to 6005 // unbounded delays servicing timers. If we detect this 6006 // overrun, then startm to get something to handle the 6007 // timer. 6008 // 6009 // See issue 42515 and 6010 // https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=50094. 6011 if next := timeSleepUntil(); next < now { 6012 startm(nil, false, false) 6013 } 6014 } 6015 if scavenger.sysmonWake.Load() != 0 { 6016 // Kick the scavenger awake if someone requested it. 6017 scavenger.wake() 6018 } 6019 // retake P's blocked in syscalls 6020 // and preempt long running G's 6021 if retake(now) != 0 { 6022 idle = 0 6023 } else { 6024 idle++ 6025 } 6026 // check if we need to force a GC 6027 if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && forcegc.idle.Load() { 6028 lock(&forcegc.lock) 6029 forcegc.idle.Store(false) 6030 var list gList 6031 list.push(forcegc.g) 6032 injectglist(&list) 6033 unlock(&forcegc.lock) 6034 } 6035 if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now { 6036 lasttrace = now 6037 schedtrace(debug.scheddetail > 0) 6038 } 6039 unlock(&sched.sysmonlock) 6040 } 6041 } 6042 6043 type sysmontick struct { 6044 schedtick uint32 6045 schedwhen int64 6046 syscalltick uint32 6047 syscallwhen int64 6048 } 6049 6050 // forcePreemptNS is the time slice given to a G before it is 6051 // preempted. 6052 const forcePreemptNS = 10 * 1000 * 1000 // 10ms 6053 6054 func retake(now int64) uint32 { 6055 n := 0 6056 // Prevent allp slice changes. This lock will be completely 6057 // uncontended unless we're already stopping the world. 6058 lock(&allpLock) 6059 // We can't use a range loop over allp because we may 6060 // temporarily drop the allpLock. Hence, we need to re-fetch 6061 // allp each time around the loop. 6062 for i := 0; i < len(allp); i++ { 6063 pp := allp[i] 6064 if pp == nil { 6065 // This can happen if procresize has grown 6066 // allp but not yet created new Ps. 6067 continue 6068 } 6069 pd := &pp.sysmontick 6070 s := pp.status 6071 sysretake := false 6072 if s == _Prunning || s == _Psyscall { 6073 // Preempt G if it's running on the same schedtick for 6074 // too long. This could be from a single long-running 6075 // goroutine or a sequence of goroutines run via 6076 // runnext, which share a single schedtick time slice. 6077 t := int64(pp.schedtick) 6078 if int64(pd.schedtick) != t { 6079 pd.schedtick = uint32(t) 6080 pd.schedwhen = now 6081 } else if pd.schedwhen+forcePreemptNS <= now { 6082 preemptone(pp) 6083 // In case of syscall, preemptone() doesn't 6084 // work, because there is no M wired to P. 6085 sysretake = true 6086 } 6087 } 6088 if s == _Psyscall { 6089 // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). 6090 t := int64(pp.syscalltick) 6091 if !sysretake && int64(pd.syscalltick) != t { 6092 pd.syscalltick = uint32(t) 6093 pd.syscallwhen = now 6094 continue 6095 } 6096 // On the one hand we don't want to retake Ps if there is no other work to do, 6097 // but on the other hand we want to retake them eventually 6098 // because they can prevent the sysmon thread from deep sleep. 6099 if runqempty(pp) && sched.nmspinning.Load()+sched.npidle.Load() > 0 && pd.syscallwhen+10*1000*1000 > now { 6100 continue 6101 } 6102 // Drop allpLock so we can take sched.lock. 6103 unlock(&allpLock) 6104 // Need to decrement number of idle locked M's 6105 // (pretending that one more is running) before the CAS. 6106 // Otherwise the M from which we retake can exit the syscall, 6107 // increment nmidle and report deadlock. 6108 incidlelocked(-1) 6109 trace := traceAcquire() 6110 if atomic.Cas(&pp.status, s, _Pidle) { 6111 if trace.ok() { 6112 trace.GoSysBlock(pp) 6113 trace.ProcSteal(pp, false) 6114 traceRelease(trace) 6115 } 6116 n++ 6117 pp.syscalltick++ 6118 handoffp(pp) 6119 } else if trace.ok() { 6120 traceRelease(trace) 6121 } 6122 incidlelocked(1) 6123 lock(&allpLock) 6124 } 6125 } 6126 unlock(&allpLock) 6127 return uint32(n) 6128 } 6129 6130 // Tell all goroutines that they have been preempted and they should stop. 6131 // This function is purely best-effort. It can fail to inform a goroutine if a 6132 // processor just started running it. 6133 // No locks need to be held. 6134 // Returns true if preemption request was issued to at least one goroutine. 6135 func preemptall() bool { 6136 res := false 6137 for _, pp := range allp { 6138 if pp.status != _Prunning { 6139 continue 6140 } 6141 if preemptone(pp) { 6142 res = true 6143 } 6144 } 6145 return res 6146 } 6147 6148 // Tell the goroutine running on processor P to stop. 6149 // This function is purely best-effort. It can incorrectly fail to inform the 6150 // goroutine. It can inform the wrong goroutine. Even if it informs the 6151 // correct goroutine, that goroutine might ignore the request if it is 6152 // simultaneously executing newstack. 6153 // No lock needs to be held. 6154 // Returns true if preemption request was issued. 6155 // The actual preemption will happen at some point in the future 6156 // and will be indicated by the gp->status no longer being 6157 // Grunning 6158 func preemptone(pp *p) bool { 6159 mp := pp.m.ptr() 6160 if mp == nil || mp == getg().m { 6161 return false 6162 } 6163 gp := mp.curg 6164 if gp == nil || gp == mp.g0 { 6165 return false 6166 } 6167 6168 gp.preempt = true 6169 6170 // Every call in a goroutine checks for stack overflow by 6171 // comparing the current stack pointer to gp->stackguard0. 6172 // Setting gp->stackguard0 to StackPreempt folds 6173 // preemption into the normal stack overflow check. 6174 gp.stackguard0 = stackPreempt 6175 6176 // Request an async preemption of this P. 6177 if preemptMSupported && debug.asyncpreemptoff == 0 { 6178 pp.preempt = true 6179 preemptM(mp) 6180 } 6181 6182 return true 6183 } 6184 6185 var starttime int64 6186 6187 func schedtrace(detailed bool) { 6188 now := nanotime() 6189 if starttime == 0 { 6190 starttime = now 6191 } 6192 6193 lock(&sched.lock) 6194 print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle.Load(), " threads=", mcount(), " spinningthreads=", sched.nmspinning.Load(), " needspinning=", sched.needspinning.Load(), " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize) 6195 if detailed { 6196 print(" gcwaiting=", sched.gcwaiting.Load(), " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait.Load(), "\n") 6197 } 6198 // We must be careful while reading data from P's, M's and G's. 6199 // Even if we hold schedlock, most data can be changed concurrently. 6200 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. 6201 for i, pp := range allp { 6202 mp := pp.m.ptr() 6203 h := atomic.Load(&pp.runqhead) 6204 t := atomic.Load(&pp.runqtail) 6205 if detailed { 6206 print(" P", i, ": status=", pp.status, " schedtick=", pp.schedtick, " syscalltick=", pp.syscalltick, " m=") 6207 if mp != nil { 6208 print(mp.id) 6209 } else { 6210 print("nil") 6211 } 6212 print(" runqsize=", t-h, " gfreecnt=", pp.gFree.n, " timerslen=", len(pp.timers.heap), "\n") 6213 } else { 6214 // In non-detailed mode format lengths of per-P run queues as: 6215 // [len1 len2 len3 len4] 6216 print(" ") 6217 if i == 0 { 6218 print("[") 6219 } 6220 print(t - h) 6221 if i == len(allp)-1 { 6222 print("]\n") 6223 } 6224 } 6225 } 6226 6227 if !detailed { 6228 unlock(&sched.lock) 6229 return 6230 } 6231 6232 for mp := allm; mp != nil; mp = mp.alllink { 6233 pp := mp.p.ptr() 6234 print(" M", mp.id, ": p=") 6235 if pp != nil { 6236 print(pp.id) 6237 } else { 6238 print("nil") 6239 } 6240 print(" curg=") 6241 if mp.curg != nil { 6242 print(mp.curg.goid) 6243 } else { 6244 print("nil") 6245 } 6246 print(" mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, " locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=") 6247 if lockedg := mp.lockedg.ptr(); lockedg != nil { 6248 print(lockedg.goid) 6249 } else { 6250 print("nil") 6251 } 6252 print("\n") 6253 } 6254 6255 forEachG(func(gp *g) { 6256 print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=") 6257 if gp.m != nil { 6258 print(gp.m.id) 6259 } else { 6260 print("nil") 6261 } 6262 print(" lockedm=") 6263 if lockedm := gp.lockedm.ptr(); lockedm != nil { 6264 print(lockedm.id) 6265 } else { 6266 print("nil") 6267 } 6268 print("\n") 6269 }) 6270 unlock(&sched.lock) 6271 } 6272 6273 // schedEnableUser enables or disables the scheduling of user 6274 // goroutines. 6275 // 6276 // This does not stop already running user goroutines, so the caller 6277 // should first stop the world when disabling user goroutines. 6278 func schedEnableUser(enable bool) { 6279 lock(&sched.lock) 6280 if sched.disable.user == !enable { 6281 unlock(&sched.lock) 6282 return 6283 } 6284 sched.disable.user = !enable 6285 if enable { 6286 n := sched.disable.n 6287 sched.disable.n = 0 6288 globrunqputbatch(&sched.disable.runnable, n) 6289 unlock(&sched.lock) 6290 for ; n != 0 && sched.npidle.Load() != 0; n-- { 6291 startm(nil, false, false) 6292 } 6293 } else { 6294 unlock(&sched.lock) 6295 } 6296 } 6297 6298 // schedEnabled reports whether gp should be scheduled. It returns 6299 // false is scheduling of gp is disabled. 6300 // 6301 // sched.lock must be held. 6302 func schedEnabled(gp *g) bool { 6303 assertLockHeld(&sched.lock) 6304 6305 if sched.disable.user { 6306 return isSystemGoroutine(gp, true) 6307 } 6308 return true 6309 } 6310 6311 // Put mp on midle list. 6312 // sched.lock must be held. 6313 // May run during STW, so write barriers are not allowed. 6314 // 6315 //go:nowritebarrierrec 6316 func mput(mp *m) { 6317 assertLockHeld(&sched.lock) 6318 6319 mp.schedlink = sched.midle 6320 sched.midle.set(mp) 6321 sched.nmidle++ 6322 checkdead() 6323 } 6324 6325 // Try to get an m from midle list. 6326 // sched.lock must be held. 6327 // May run during STW, so write barriers are not allowed. 6328 // 6329 //go:nowritebarrierrec 6330 func mget() *m { 6331 assertLockHeld(&sched.lock) 6332 6333 mp := sched.midle.ptr() 6334 if mp != nil { 6335 sched.midle = mp.schedlink 6336 sched.nmidle-- 6337 } 6338 return mp 6339 } 6340 6341 // Put gp on the global runnable queue. 6342 // sched.lock must be held. 6343 // May run during STW, so write barriers are not allowed. 6344 // 6345 //go:nowritebarrierrec 6346 func globrunqput(gp *g) { 6347 assertLockHeld(&sched.lock) 6348 6349 sched.runq.pushBack(gp) 6350 sched.runqsize++ 6351 } 6352 6353 // Put gp at the head of the global runnable queue. 6354 // sched.lock must be held. 6355 // May run during STW, so write barriers are not allowed. 6356 // 6357 //go:nowritebarrierrec 6358 func globrunqputhead(gp *g) { 6359 assertLockHeld(&sched.lock) 6360 6361 sched.runq.push(gp) 6362 sched.runqsize++ 6363 } 6364 6365 // Put a batch of runnable goroutines on the global runnable queue. 6366 // This clears *batch. 6367 // sched.lock must be held. 6368 // May run during STW, so write barriers are not allowed. 6369 // 6370 //go:nowritebarrierrec 6371 func globrunqputbatch(batch *gQueue, n int32) { 6372 assertLockHeld(&sched.lock) 6373 6374 sched.runq.pushBackAll(*batch) 6375 sched.runqsize += n 6376 *batch = gQueue{} 6377 } 6378 6379 // Try get a batch of G's from the global runnable queue. 6380 // sched.lock must be held. 6381 func globrunqget(pp *p, max int32) *g { 6382 assertLockHeld(&sched.lock) 6383 6384 if sched.runqsize == 0 { 6385 return nil 6386 } 6387 6388 n := sched.runqsize/gomaxprocs + 1 6389 if n > sched.runqsize { 6390 n = sched.runqsize 6391 } 6392 if max > 0 && n > max { 6393 n = max 6394 } 6395 if n > int32(len(pp.runq))/2 { 6396 n = int32(len(pp.runq)) / 2 6397 } 6398 6399 sched.runqsize -= n 6400 6401 gp := sched.runq.pop() 6402 n-- 6403 for ; n > 0; n-- { 6404 gp1 := sched.runq.pop() 6405 runqput(pp, gp1, false) 6406 } 6407 return gp 6408 } 6409 6410 // pMask is an atomic bitstring with one bit per P. 6411 type pMask []uint32 6412 6413 // read returns true if P id's bit is set. 6414 func (p pMask) read(id uint32) bool { 6415 word := id / 32 6416 mask := uint32(1) << (id % 32) 6417 return (atomic.Load(&p[word]) & mask) != 0 6418 } 6419 6420 // set sets P id's bit. 6421 func (p pMask) set(id int32) { 6422 word := id / 32 6423 mask := uint32(1) << (id % 32) 6424 atomic.Or(&p[word], mask) 6425 } 6426 6427 // clear clears P id's bit. 6428 func (p pMask) clear(id int32) { 6429 word := id / 32 6430 mask := uint32(1) << (id % 32) 6431 atomic.And(&p[word], ^mask) 6432 } 6433 6434 // pidleput puts p on the _Pidle list. now must be a relatively recent call 6435 // to nanotime or zero. Returns now or the current time if now was zero. 6436 // 6437 // This releases ownership of p. Once sched.lock is released it is no longer 6438 // safe to use p. 6439 // 6440 // sched.lock must be held. 6441 // 6442 // May run during STW, so write barriers are not allowed. 6443 // 6444 //go:nowritebarrierrec 6445 func pidleput(pp *p, now int64) int64 { 6446 assertLockHeld(&sched.lock) 6447 6448 if !runqempty(pp) { 6449 throw("pidleput: P has non-empty run queue") 6450 } 6451 if now == 0 { 6452 now = nanotime() 6453 } 6454 if pp.timers.len.Load() == 0 { 6455 timerpMask.clear(pp.id) 6456 } 6457 idlepMask.set(pp.id) 6458 pp.link = sched.pidle 6459 sched.pidle.set(pp) 6460 sched.npidle.Add(1) 6461 if !pp.limiterEvent.start(limiterEventIdle, now) { 6462 throw("must be able to track idle limiter event") 6463 } 6464 return now 6465 } 6466 6467 // pidleget tries to get a p from the _Pidle list, acquiring ownership. 6468 // 6469 // sched.lock must be held. 6470 // 6471 // May run during STW, so write barriers are not allowed. 6472 // 6473 //go:nowritebarrierrec 6474 func pidleget(now int64) (*p, int64) { 6475 assertLockHeld(&sched.lock) 6476 6477 pp := sched.pidle.ptr() 6478 if pp != nil { 6479 // Timer may get added at any time now. 6480 if now == 0 { 6481 now = nanotime() 6482 } 6483 timerpMask.set(pp.id) 6484 idlepMask.clear(pp.id) 6485 sched.pidle = pp.link 6486 sched.npidle.Add(-1) 6487 pp.limiterEvent.stop(limiterEventIdle, now) 6488 } 6489 return pp, now 6490 } 6491 6492 // pidlegetSpinning tries to get a p from the _Pidle list, acquiring ownership. 6493 // This is called by spinning Ms (or callers than need a spinning M) that have 6494 // found work. If no P is available, this must synchronized with non-spinning 6495 // Ms that may be preparing to drop their P without discovering this work. 6496 // 6497 // sched.lock must be held. 6498 // 6499 // May run during STW, so write barriers are not allowed. 6500 // 6501 //go:nowritebarrierrec 6502 func pidlegetSpinning(now int64) (*p, int64) { 6503 assertLockHeld(&sched.lock) 6504 6505 pp, now := pidleget(now) 6506 if pp == nil { 6507 // See "Delicate dance" comment in findrunnable. We found work 6508 // that we cannot take, we must synchronize with non-spinning 6509 // Ms that may be preparing to drop their P. 6510 sched.needspinning.Store(1) 6511 return nil, now 6512 } 6513 6514 return pp, now 6515 } 6516 6517 // runqempty reports whether pp has no Gs on its local run queue. 6518 // It never returns true spuriously. 6519 func runqempty(pp *p) bool { 6520 // Defend against a race where 1) pp has G1 in runqnext but runqhead == runqtail, 6521 // 2) runqput on pp kicks G1 to the runq, 3) runqget on pp empties runqnext. 6522 // Simply observing that runqhead == runqtail and then observing that runqnext == nil 6523 // does not mean the queue is empty. 6524 for { 6525 head := atomic.Load(&pp.runqhead) 6526 tail := atomic.Load(&pp.runqtail) 6527 runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&pp.runnext))) 6528 if tail == atomic.Load(&pp.runqtail) { 6529 return head == tail && runnext == 0 6530 } 6531 } 6532 } 6533 6534 // To shake out latent assumptions about scheduling order, 6535 // we introduce some randomness into scheduling decisions 6536 // when running with the race detector. 6537 // The need for this was made obvious by changing the 6538 // (deterministic) scheduling order in Go 1.5 and breaking 6539 // many poorly-written tests. 6540 // With the randomness here, as long as the tests pass 6541 // consistently with -race, they shouldn't have latent scheduling 6542 // assumptions. 6543 const randomizeScheduler = raceenabled 6544 6545 // runqput tries to put g on the local runnable queue. 6546 // If next is false, runqput adds g to the tail of the runnable queue. 6547 // If next is true, runqput puts g in the pp.runnext slot. 6548 // If the run queue is full, runnext puts g on the global queue. 6549 // Executed only by the owner P. 6550 func runqput(pp *p, gp *g, next bool) { 6551 if !haveSysmon && next { 6552 // A runnext goroutine shares the same time slice as the 6553 // current goroutine (inheritTime from runqget). To prevent a 6554 // ping-pong pair of goroutines from starving all others, we 6555 // depend on sysmon to preempt "long-running goroutines". That 6556 // is, any set of goroutines sharing the same time slice. 6557 // 6558 // If there is no sysmon, we must avoid runnext entirely or 6559 // risk starvation. 6560 next = false 6561 } 6562 if randomizeScheduler && next && randn(2) == 0 { 6563 next = false 6564 } 6565 6566 if next { 6567 retryNext: 6568 oldnext := pp.runnext 6569 if !pp.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) { 6570 goto retryNext 6571 } 6572 if oldnext == 0 { 6573 return 6574 } 6575 // Kick the old runnext out to the regular run queue. 6576 gp = oldnext.ptr() 6577 } 6578 6579 retry: 6580 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 6581 t := pp.runqtail 6582 if t-h < uint32(len(pp.runq)) { 6583 pp.runq[t%uint32(len(pp.runq))].set(gp) 6584 atomic.StoreRel(&pp.runqtail, t+1) // store-release, makes the item available for consumption 6585 return 6586 } 6587 if runqputslow(pp, gp, h, t) { 6588 return 6589 } 6590 // the queue is not full, now the put above must succeed 6591 goto retry 6592 } 6593 6594 // Put g and a batch of work from local runnable queue on global queue. 6595 // Executed only by the owner P. 6596 func runqputslow(pp *p, gp *g, h, t uint32) bool { 6597 var batch [len(pp.runq)/2 + 1]*g 6598 6599 // First, grab a batch from local queue. 6600 n := t - h 6601 n = n / 2 6602 if n != uint32(len(pp.runq)/2) { 6603 throw("runqputslow: queue is not full") 6604 } 6605 for i := uint32(0); i < n; i++ { 6606 batch[i] = pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 6607 } 6608 if !atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 6609 return false 6610 } 6611 batch[n] = gp 6612 6613 if randomizeScheduler { 6614 for i := uint32(1); i <= n; i++ { 6615 j := cheaprandn(i + 1) 6616 batch[i], batch[j] = batch[j], batch[i] 6617 } 6618 } 6619 6620 // Link the goroutines. 6621 for i := uint32(0); i < n; i++ { 6622 batch[i].schedlink.set(batch[i+1]) 6623 } 6624 var q gQueue 6625 q.head.set(batch[0]) 6626 q.tail.set(batch[n]) 6627 6628 // Now put the batch on global queue. 6629 lock(&sched.lock) 6630 globrunqputbatch(&q, int32(n+1)) 6631 unlock(&sched.lock) 6632 return true 6633 } 6634 6635 // runqputbatch tries to put all the G's on q on the local runnable queue. 6636 // If the queue is full, they are put on the global queue; in that case 6637 // this will temporarily acquire the scheduler lock. 6638 // Executed only by the owner P. 6639 func runqputbatch(pp *p, q *gQueue, qsize int) { 6640 h := atomic.LoadAcq(&pp.runqhead) 6641 t := pp.runqtail 6642 n := uint32(0) 6643 for !q.empty() && t-h < uint32(len(pp.runq)) { 6644 gp := q.pop() 6645 pp.runq[t%uint32(len(pp.runq))].set(gp) 6646 t++ 6647 n++ 6648 } 6649 qsize -= int(n) 6650 6651 if randomizeScheduler { 6652 off := func(o uint32) uint32 { 6653 return (pp.runqtail + o) % uint32(len(pp.runq)) 6654 } 6655 for i := uint32(1); i < n; i++ { 6656 j := cheaprandn(i + 1) 6657 pp.runq[off(i)], pp.runq[off(j)] = pp.runq[off(j)], pp.runq[off(i)] 6658 } 6659 } 6660 6661 atomic.StoreRel(&pp.runqtail, t) 6662 if !q.empty() { 6663 lock(&sched.lock) 6664 globrunqputbatch(q, int32(qsize)) 6665 unlock(&sched.lock) 6666 } 6667 } 6668 6669 // Get g from local runnable queue. 6670 // If inheritTime is true, gp should inherit the remaining time in the 6671 // current time slice. Otherwise, it should start a new time slice. 6672 // Executed only by the owner P. 6673 func runqget(pp *p) (gp *g, inheritTime bool) { 6674 // If there's a runnext, it's the next G to run. 6675 next := pp.runnext 6676 // If the runnext is non-0 and the CAS fails, it could only have been stolen by another P, 6677 // because other Ps can race to set runnext to 0, but only the current P can set it to non-0. 6678 // Hence, there's no need to retry this CAS if it fails. 6679 if next != 0 && pp.runnext.cas(next, 0) { 6680 return next.ptr(), true 6681 } 6682 6683 for { 6684 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 6685 t := pp.runqtail 6686 if t == h { 6687 return nil, false 6688 } 6689 gp := pp.runq[h%uint32(len(pp.runq))].ptr() 6690 if atomic.CasRel(&pp.runqhead, h, h+1) { // cas-release, commits consume 6691 return gp, false 6692 } 6693 } 6694 } 6695 6696 // runqdrain drains the local runnable queue of pp and returns all goroutines in it. 6697 // Executed only by the owner P. 6698 func runqdrain(pp *p) (drainQ gQueue, n uint32) { 6699 oldNext := pp.runnext 6700 if oldNext != 0 && pp.runnext.cas(oldNext, 0) { 6701 drainQ.pushBack(oldNext.ptr()) 6702 n++ 6703 } 6704 6705 retry: 6706 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 6707 t := pp.runqtail 6708 qn := t - h 6709 if qn == 0 { 6710 return 6711 } 6712 if qn > uint32(len(pp.runq)) { // read inconsistent h and t 6713 goto retry 6714 } 6715 6716 if !atomic.CasRel(&pp.runqhead, h, h+qn) { // cas-release, commits consume 6717 goto retry 6718 } 6719 6720 // We've inverted the order in which it gets G's from the local P's runnable queue 6721 // and then advances the head pointer because we don't want to mess up the statuses of G's 6722 // while runqdrain() and runqsteal() are running in parallel. 6723 // Thus we should advance the head pointer before draining the local P into a gQueue, 6724 // so that we can update any gp.schedlink only after we take the full ownership of G, 6725 // meanwhile, other P's can't access to all G's in local P's runnable queue and steal them. 6726 // See https://groups.google.com/g/golang-dev/c/0pTKxEKhHSc/m/6Q85QjdVBQAJ for more details. 6727 for i := uint32(0); i < qn; i++ { 6728 gp := pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 6729 drainQ.pushBack(gp) 6730 n++ 6731 } 6732 return 6733 } 6734 6735 // Grabs a batch of goroutines from pp's runnable queue into batch. 6736 // Batch is a ring buffer starting at batchHead. 6737 // Returns number of grabbed goroutines. 6738 // Can be executed by any P. 6739 func runqgrab(pp *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 { 6740 for { 6741 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 6742 t := atomic.LoadAcq(&pp.runqtail) // load-acquire, synchronize with the producer 6743 n := t - h 6744 n = n - n/2 6745 if n == 0 { 6746 if stealRunNextG { 6747 // Try to steal from pp.runnext. 6748 if next := pp.runnext; next != 0 { 6749 if pp.status == _Prunning { 6750 // Sleep to ensure that pp isn't about to run the g 6751 // we are about to steal. 6752 // The important use case here is when the g running 6753 // on pp ready()s another g and then almost 6754 // immediately blocks. Instead of stealing runnext 6755 // in this window, back off to give pp a chance to 6756 // schedule runnext. This will avoid thrashing gs 6757 // between different Ps. 6758 // A sync chan send/recv takes ~50ns as of time of 6759 // writing, so 3us gives ~50x overshoot. 6760 if !osHasLowResTimer { 6761 usleep(3) 6762 } else { 6763 // On some platforms system timer granularity is 6764 // 1-15ms, which is way too much for this 6765 // optimization. So just yield. 6766 osyield() 6767 } 6768 } 6769 if !pp.runnext.cas(next, 0) { 6770 continue 6771 } 6772 batch[batchHead%uint32(len(batch))] = next 6773 return 1 6774 } 6775 } 6776 return 0 6777 } 6778 if n > uint32(len(pp.runq)/2) { // read inconsistent h and t 6779 continue 6780 } 6781 for i := uint32(0); i < n; i++ { 6782 g := pp.runq[(h+i)%uint32(len(pp.runq))] 6783 batch[(batchHead+i)%uint32(len(batch))] = g 6784 } 6785 if atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 6786 return n 6787 } 6788 } 6789 } 6790 6791 // Steal half of elements from local runnable queue of p2 6792 // and put onto local runnable queue of p. 6793 // Returns one of the stolen elements (or nil if failed). 6794 func runqsteal(pp, p2 *p, stealRunNextG bool) *g { 6795 t := pp.runqtail 6796 n := runqgrab(p2, &pp.runq, t, stealRunNextG) 6797 if n == 0 { 6798 return nil 6799 } 6800 n-- 6801 gp := pp.runq[(t+n)%uint32(len(pp.runq))].ptr() 6802 if n == 0 { 6803 return gp 6804 } 6805 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 6806 if t-h+n >= uint32(len(pp.runq)) { 6807 throw("runqsteal: runq overflow") 6808 } 6809 atomic.StoreRel(&pp.runqtail, t+n) // store-release, makes the item available for consumption 6810 return gp 6811 } 6812 6813 // A gQueue is a dequeue of Gs linked through g.schedlink. A G can only 6814 // be on one gQueue or gList at a time. 6815 type gQueue struct { 6816 head guintptr 6817 tail guintptr 6818 } 6819 6820 // empty reports whether q is empty. 6821 func (q *gQueue) empty() bool { 6822 return q.head == 0 6823 } 6824 6825 // push adds gp to the head of q. 6826 func (q *gQueue) push(gp *g) { 6827 gp.schedlink = q.head 6828 q.head.set(gp) 6829 if q.tail == 0 { 6830 q.tail.set(gp) 6831 } 6832 } 6833 6834 // pushBack adds gp to the tail of q. 6835 func (q *gQueue) pushBack(gp *g) { 6836 gp.schedlink = 0 6837 if q.tail != 0 { 6838 q.tail.ptr().schedlink.set(gp) 6839 } else { 6840 q.head.set(gp) 6841 } 6842 q.tail.set(gp) 6843 } 6844 6845 // pushBackAll adds all Gs in q2 to the tail of q. After this q2 must 6846 // not be used. 6847 func (q *gQueue) pushBackAll(q2 gQueue) { 6848 if q2.tail == 0 { 6849 return 6850 } 6851 q2.tail.ptr().schedlink = 0 6852 if q.tail != 0 { 6853 q.tail.ptr().schedlink = q2.head 6854 } else { 6855 q.head = q2.head 6856 } 6857 q.tail = q2.tail 6858 } 6859 6860 // pop removes and returns the head of queue q. It returns nil if 6861 // q is empty. 6862 func (q *gQueue) pop() *g { 6863 gp := q.head.ptr() 6864 if gp != nil { 6865 q.head = gp.schedlink 6866 if q.head == 0 { 6867 q.tail = 0 6868 } 6869 } 6870 return gp 6871 } 6872 6873 // popList takes all Gs in q and returns them as a gList. 6874 func (q *gQueue) popList() gList { 6875 stack := gList{q.head} 6876 *q = gQueue{} 6877 return stack 6878 } 6879 6880 // A gList is a list of Gs linked through g.schedlink. A G can only be 6881 // on one gQueue or gList at a time. 6882 type gList struct { 6883 head guintptr 6884 } 6885 6886 // empty reports whether l is empty. 6887 func (l *gList) empty() bool { 6888 return l.head == 0 6889 } 6890 6891 // push adds gp to the head of l. 6892 func (l *gList) push(gp *g) { 6893 gp.schedlink = l.head 6894 l.head.set(gp) 6895 } 6896 6897 // pushAll prepends all Gs in q to l. 6898 func (l *gList) pushAll(q gQueue) { 6899 if !q.empty() { 6900 q.tail.ptr().schedlink = l.head 6901 l.head = q.head 6902 } 6903 } 6904 6905 // pop removes and returns the head of l. If l is empty, it returns nil. 6906 func (l *gList) pop() *g { 6907 gp := l.head.ptr() 6908 if gp != nil { 6909 l.head = gp.schedlink 6910 } 6911 return gp 6912 } 6913 6914 //go:linkname setMaxThreads runtime/debug.setMaxThreads 6915 func setMaxThreads(in int) (out int) { 6916 lock(&sched.lock) 6917 out = int(sched.maxmcount) 6918 if in > 0x7fffffff { // MaxInt32 6919 sched.maxmcount = 0x7fffffff 6920 } else { 6921 sched.maxmcount = int32(in) 6922 } 6923 checkmcount() 6924 unlock(&sched.lock) 6925 return 6926 } 6927 6928 //go:nosplit 6929 func procPin() int { 6930 gp := getg() 6931 mp := gp.m 6932 6933 mp.locks++ 6934 return int(mp.p.ptr().id) 6935 } 6936 6937 //go:nosplit 6938 func procUnpin() { 6939 gp := getg() 6940 gp.m.locks-- 6941 } 6942 6943 //go:linkname sync_runtime_procPin sync.runtime_procPin 6944 //go:nosplit 6945 func sync_runtime_procPin() int { 6946 return procPin() 6947 } 6948 6949 //go:linkname sync_runtime_procUnpin sync.runtime_procUnpin 6950 //go:nosplit 6951 func sync_runtime_procUnpin() { 6952 procUnpin() 6953 } 6954 6955 //go:linkname sync_atomic_runtime_procPin sync/atomic.runtime_procPin 6956 //go:nosplit 6957 func sync_atomic_runtime_procPin() int { 6958 return procPin() 6959 } 6960 6961 //go:linkname sync_atomic_runtime_procUnpin sync/atomic.runtime_procUnpin 6962 //go:nosplit 6963 func sync_atomic_runtime_procUnpin() { 6964 procUnpin() 6965 } 6966 6967 // Active spinning for sync.Mutex. 6968 // 6969 //go:linkname sync_runtime_canSpin sync.runtime_canSpin 6970 //go:nosplit 6971 func sync_runtime_canSpin(i int) bool { 6972 // sync.Mutex is cooperative, so we are conservative with spinning. 6973 // Spin only few times and only if running on a multicore machine and 6974 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty. 6975 // As opposed to runtime mutex we don't do passive spinning here, 6976 // because there can be work on global runq or on other Ps. 6977 if i >= active_spin || ncpu <= 1 || gomaxprocs <= sched.npidle.Load()+sched.nmspinning.Load()+1 { 6978 return false 6979 } 6980 if p := getg().m.p.ptr(); !runqempty(p) { 6981 return false 6982 } 6983 return true 6984 } 6985 6986 //go:linkname sync_runtime_doSpin sync.runtime_doSpin 6987 //go:nosplit 6988 func sync_runtime_doSpin() { 6989 procyield(active_spin_cnt) 6990 } 6991 6992 var stealOrder randomOrder 6993 6994 // randomOrder/randomEnum are helper types for randomized work stealing. 6995 // They allow to enumerate all Ps in different pseudo-random orders without repetitions. 6996 // The algorithm is based on the fact that if we have X such that X and GOMAXPROCS 6997 // are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration. 6998 type randomOrder struct { 6999 count uint32 7000 coprimes []uint32 7001 } 7002 7003 type randomEnum struct { 7004 i uint32 7005 count uint32 7006 pos uint32 7007 inc uint32 7008 } 7009 7010 func (ord *randomOrder) reset(count uint32) { 7011 ord.count = count 7012 ord.coprimes = ord.coprimes[:0] 7013 for i := uint32(1); i <= count; i++ { 7014 if gcd(i, count) == 1 { 7015 ord.coprimes = append(ord.coprimes, i) 7016 } 7017 } 7018 } 7019 7020 func (ord *randomOrder) start(i uint32) randomEnum { 7021 return randomEnum{ 7022 count: ord.count, 7023 pos: i % ord.count, 7024 inc: ord.coprimes[i/ord.count%uint32(len(ord.coprimes))], 7025 } 7026 } 7027 7028 func (enum *randomEnum) done() bool { 7029 return enum.i == enum.count 7030 } 7031 7032 func (enum *randomEnum) next() { 7033 enum.i++ 7034 enum.pos = (enum.pos + enum.inc) % enum.count 7035 } 7036 7037 func (enum *randomEnum) position() uint32 { 7038 return enum.pos 7039 } 7040 7041 func gcd(a, b uint32) uint32 { 7042 for b != 0 { 7043 a, b = b, a%b 7044 } 7045 return a 7046 } 7047 7048 // An initTask represents the set of initializations that need to be done for a package. 7049 // Keep in sync with ../../test/noinit.go:initTask 7050 type initTask struct { 7051 state uint32 // 0 = uninitialized, 1 = in progress, 2 = done 7052 nfns uint32 7053 // followed by nfns pcs, uintptr sized, one per init function to run 7054 } 7055 7056 // inittrace stores statistics for init functions which are 7057 // updated by malloc and newproc when active is true. 7058 var inittrace tracestat 7059 7060 type tracestat struct { 7061 active bool // init tracing activation status 7062 id uint64 // init goroutine id 7063 allocs uint64 // heap allocations 7064 bytes uint64 // heap allocated bytes 7065 } 7066 7067 func doInit(ts []*initTask) { 7068 for _, t := range ts { 7069 doInit1(t) 7070 } 7071 } 7072 7073 func doInit1(t *initTask) { 7074 switch t.state { 7075 case 2: // fully initialized 7076 return 7077 case 1: // initialization in progress 7078 throw("recursive call during initialization - linker skew") 7079 default: // not initialized yet 7080 t.state = 1 // initialization in progress 7081 7082 var ( 7083 start int64 7084 before tracestat 7085 ) 7086 7087 if inittrace.active { 7088 start = nanotime() 7089 // Load stats non-atomically since tracinit is updated only by this init goroutine. 7090 before = inittrace 7091 } 7092 7093 if t.nfns == 0 { 7094 // We should have pruned all of these in the linker. 7095 throw("inittask with no functions") 7096 } 7097 7098 firstFunc := add(unsafe.Pointer(t), 8) 7099 for i := uint32(0); i < t.nfns; i++ { 7100 p := add(firstFunc, uintptr(i)*goarch.PtrSize) 7101 f := *(*func())(unsafe.Pointer(&p)) 7102 f() 7103 } 7104 7105 if inittrace.active { 7106 end := nanotime() 7107 // Load stats non-atomically since tracinit is updated only by this init goroutine. 7108 after := inittrace 7109 7110 f := *(*func())(unsafe.Pointer(&firstFunc)) 7111 pkg := funcpkgpath(findfunc(abi.FuncPCABIInternal(f))) 7112 7113 var sbuf [24]byte 7114 print("init ", pkg, " @") 7115 print(string(fmtNSAsMS(sbuf[:], uint64(start-runtimeInitTime))), " ms, ") 7116 print(string(fmtNSAsMS(sbuf[:], uint64(end-start))), " ms clock, ") 7117 print(string(itoa(sbuf[:], after.bytes-before.bytes)), " bytes, ") 7118 print(string(itoa(sbuf[:], after.allocs-before.allocs)), " allocs") 7119 print("\n") 7120 } 7121 7122 t.state = 2 // initialization done 7123 } 7124 }