github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/runtime/proc.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "internal/abi" 9 "internal/cpu" 10 "internal/goarch" 11 "runtime/internal/atomic" 12 "runtime/internal/sys" 13 "unsafe" 14 ) 15 16 // set using cmd/go/internal/modload.ModInfoProg 17 var modinfo string 18 19 // Goroutine scheduler 20 // The scheduler's job is to distribute ready-to-run goroutines over worker threads. 21 // 22 // The main concepts are: 23 // G - goroutine. 24 // M - worker thread, or machine. 25 // P - processor, a resource that is required to execute Go code. 26 // M must have an associated P to execute Go code, however it can be 27 // blocked or in a syscall w/o an associated P. 28 // 29 // Design doc at https://golang.org/s/go11sched. 30 31 // Worker thread parking/unparking. 32 // We need to balance between keeping enough running worker threads to utilize 33 // available hardware parallelism and parking excessive running worker threads 34 // to conserve CPU resources and power. This is not simple for two reasons: 35 // (1) scheduler state is intentionally distributed (in particular, per-P work 36 // queues), so it is not possible to compute global predicates on fast paths; 37 // (2) for optimal thread management we would need to know the future (don't park 38 // a worker thread when a new goroutine will be readied in near future). 39 // 40 // Three rejected approaches that would work badly: 41 // 1. Centralize all scheduler state (would inhibit scalability). 42 // 2. Direct goroutine handoff. That is, when we ready a new goroutine and there 43 // is a spare P, unpark a thread and handoff it the thread and the goroutine. 44 // This would lead to thread state thrashing, as the thread that readied the 45 // goroutine can be out of work the very next moment, we will need to park it. 46 // Also, it would destroy locality of computation as we want to preserve 47 // dependent goroutines on the same thread; and introduce additional latency. 48 // 3. Unpark an additional thread whenever we ready a goroutine and there is an 49 // idle P, but don't do handoff. This would lead to excessive thread parking/ 50 // unparking as the additional threads will instantly park without discovering 51 // any work to do. 52 // 53 // The current approach: 54 // 55 // This approach applies to three primary sources of potential work: readying a 56 // goroutine, new/modified-earlier timers, and idle-priority GC. See below for 57 // additional details. 58 // 59 // We unpark an additional thread when we submit work if (this is wakep()): 60 // 1. There is an idle P, and 61 // 2. There are no "spinning" worker threads. 62 // 63 // A worker thread is considered spinning if it is out of local work and did 64 // not find work in the global run queue or netpoller; the spinning state is 65 // denoted in m.spinning and in sched.nmspinning. Threads unparked this way are 66 // also considered spinning; we don't do goroutine handoff so such threads are 67 // out of work initially. Spinning threads spin on looking for work in per-P 68 // run queues and timer heaps or from the GC before parking. If a spinning 69 // thread finds work it takes itself out of the spinning state and proceeds to 70 // execution. If it does not find work it takes itself out of the spinning 71 // state and then parks. 72 // 73 // If there is at least one spinning thread (sched.nmspinning>1), we don't 74 // unpark new threads when submitting work. To compensate for that, if the last 75 // spinning thread finds work and stops spinning, it must unpark a new spinning 76 // thread. This approach smooths out unjustified spikes of thread unparking, 77 // but at the same time guarantees eventual maximal CPU parallelism 78 // utilization. 79 // 80 // The main implementation complication is that we need to be very careful 81 // during spinning->non-spinning thread transition. This transition can race 82 // with submission of new work, and either one part or another needs to unpark 83 // another worker thread. If they both fail to do that, we can end up with 84 // semi-persistent CPU underutilization. 85 // 86 // The general pattern for submission is: 87 // 1. Submit work to the local run queue, timer heap, or GC state. 88 // 2. #StoreLoad-style memory barrier. 89 // 3. Check sched.nmspinning. 90 // 91 // The general pattern for spinning->non-spinning transition is: 92 // 1. Decrement nmspinning. 93 // 2. #StoreLoad-style memory barrier. 94 // 3. Check all per-P work queues and GC for new work. 95 // 96 // Note that all this complexity does not apply to global run queue as we are 97 // not sloppy about thread unparking when submitting to global queue. Also see 98 // comments for nmspinning manipulation. 99 // 100 // How these different sources of work behave varies, though it doesn't affect 101 // the synchronization approach: 102 // * Ready goroutine: this is an obvious source of work; the goroutine is 103 // immediately ready and must run on some thread eventually. 104 // * New/modified-earlier timer: The current timer implementation (see time.go) 105 // uses netpoll in a thread with no work available to wait for the soonest 106 // timer. If there is no thread waiting, we want a new spinning thread to go 107 // wait. 108 // * Idle-priority GC: The GC wakes a stopped idle thread to contribute to 109 // background GC work (note: currently disabled per golang.org/issue/19112). 110 // Also see golang.org/issue/44313, as this should be extended to all GC 111 // workers. 112 113 var ( 114 m0 m 115 g0 g 116 mcache0 *mcache 117 raceprocctx0 uintptr 118 ) 119 120 //go:linkname runtime_inittask runtime..inittask 121 var runtime_inittask initTask 122 123 //go:linkname main_inittask main..inittask 124 var main_inittask initTask 125 126 // main_init_done is a signal used by cgocallbackg that initialization 127 // has been completed. It is made before _cgo_notify_runtime_init_done, 128 // so all cgo calls can rely on it existing. When main_init is complete, 129 // it is closed, meaning cgocallbackg can reliably receive from it. 130 var main_init_done chan bool 131 132 //go:linkname main_main main.main 133 func main_main() 134 135 // mainStarted indicates that the main M has started. 136 var mainStarted bool 137 138 // runtimeInitTime is the nanotime() at which the runtime started. 139 var runtimeInitTime int64 140 141 // Value to use for signal mask for newly created M's. 142 var initSigmask sigset 143 144 // The main goroutine. 145 func main() { 146 mp := getg().m 147 148 // Racectx of m0->g0 is used only as the parent of the main goroutine. 149 // It must not be used for anything else. 150 mp.g0.racectx = 0 151 152 // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit. 153 // Using decimal instead of binary GB and MB because 154 // they look nicer in the stack overflow failure message. 155 if goarch.PtrSize == 8 { 156 maxstacksize = 1000000000 157 } else { 158 maxstacksize = 250000000 159 } 160 161 // An upper limit for max stack size. Used to avoid random crashes 162 // after calling SetMaxStack and trying to allocate a stack that is too big, 163 // since stackalloc works with 32-bit sizes. 164 maxstackceiling = 2 * maxstacksize 165 166 // Allow newproc to start new Ms. 167 mainStarted = true 168 169 if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon 170 systemstack(func() { 171 newm(sysmon, nil, -1) 172 }) 173 } 174 175 // Lock the main goroutine onto this, the main OS thread, 176 // during initialization. Most programs won't care, but a few 177 // do require certain calls to be made by the main thread. 178 // Those can arrange for main.main to run in the main thread 179 // by calling runtime.LockOSThread during initialization 180 // to preserve the lock. 181 lockOSThread() 182 183 if mp != &m0 { 184 throw("runtime.main not on m0") 185 } 186 187 // Record when the world started. 188 // Must be before doInit for tracing init. 189 runtimeInitTime = nanotime() 190 if runtimeInitTime == 0 { 191 throw("nanotime returning zero") 192 } 193 194 if debug.inittrace != 0 { 195 inittrace.id = getg().goid 196 inittrace.active = true 197 } 198 199 doInit(&runtime_inittask) // Must be before defer. 200 201 // Defer unlock so that runtime.Goexit during init does the unlock too. 202 needUnlock := true 203 defer func() { 204 if needUnlock { 205 unlockOSThread() 206 } 207 }() 208 209 gcenable() 210 211 main_init_done = make(chan bool) 212 if iscgo { 213 if _cgo_thread_start == nil { 214 throw("_cgo_thread_start missing") 215 } 216 if GOOS != "windows" { 217 if _cgo_setenv == nil { 218 throw("_cgo_setenv missing") 219 } 220 if _cgo_unsetenv == nil { 221 throw("_cgo_unsetenv missing") 222 } 223 } 224 if _cgo_notify_runtime_init_done == nil { 225 throw("_cgo_notify_runtime_init_done missing") 226 } 227 // Start the template thread in case we enter Go from 228 // a C-created thread and need to create a new thread. 229 startTemplateThread() 230 cgocall(_cgo_notify_runtime_init_done, nil) 231 } 232 233 doInit(&main_inittask) 234 235 // Disable init tracing after main init done to avoid overhead 236 // of collecting statistics in malloc and newproc 237 inittrace.active = false 238 239 close(main_init_done) 240 241 needUnlock = false 242 unlockOSThread() 243 244 if isarchive || islibrary { 245 // A program compiled with -buildmode=c-archive or c-shared 246 // has a main, but it is not executed. 247 return 248 } 249 fn := main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime 250 fn() 251 if raceenabled { 252 runExitHooks(0) // run hooks now, since racefini does not return 253 racefini() 254 } 255 256 // Make racy client program work: if panicking on 257 // another goroutine at the same time as main returns, 258 // let the other goroutine finish printing the panic trace. 259 // Once it does, it will exit. See issues 3934 and 20018. 260 if runningPanicDefers.Load() != 0 { 261 // Running deferred functions should not take long. 262 for c := 0; c < 1000; c++ { 263 if runningPanicDefers.Load() == 0 { 264 break 265 } 266 Gosched() 267 } 268 } 269 if panicking.Load() != 0 { 270 gopark(nil, nil, waitReasonPanicWait, traceEvGoStop, 1) 271 } 272 runExitHooks(0) 273 274 exit(0) 275 for { 276 var x *int32 277 *x = 0 278 } 279 } 280 281 // os_beforeExit is called from os.Exit(0). 282 // 283 //go:linkname os_beforeExit os.runtime_beforeExit 284 func os_beforeExit(exitCode int) { 285 runExitHooks(exitCode) 286 if exitCode == 0 && raceenabled { 287 racefini() 288 } 289 } 290 291 // start forcegc helper goroutine 292 func init() { 293 go forcegchelper() 294 } 295 296 func forcegchelper() { 297 forcegc.g = getg() 298 lockInit(&forcegc.lock, lockRankForcegc) 299 for { 300 lock(&forcegc.lock) 301 if forcegc.idle.Load() { 302 throw("forcegc: phase error") 303 } 304 forcegc.idle.Store(true) 305 goparkunlock(&forcegc.lock, waitReasonForceGCIdle, traceEvGoBlock, 1) 306 // this goroutine is explicitly resumed by sysmon 307 if debug.gctrace > 0 { 308 println("GC forced") 309 } 310 // Time-triggered, fully concurrent. 311 gcStart(gcTrigger{kind: gcTriggerTime, now: nanotime()}) 312 } 313 } 314 315 //go:nosplit 316 317 // Gosched yields the processor, allowing other goroutines to run. It does not 318 // suspend the current goroutine, so execution resumes automatically. 319 func Gosched() { 320 checkTimeouts() 321 mcall(gosched_m) 322 } 323 324 // goschedguarded yields the processor like gosched, but also checks 325 // for forbidden states and opts out of the yield in those cases. 326 // 327 //go:nosplit 328 func goschedguarded() { 329 mcall(goschedguarded_m) 330 } 331 332 // goschedIfBusy yields the processor like gosched, but only does so if 333 // there are no idle Ps or if we're on the only P and there's nothing in 334 // the run queue. In both cases, there is freely available idle time. 335 // 336 //go:nosplit 337 func goschedIfBusy() { 338 if sched.npidle.Load() > 0 { 339 return 340 } 341 mcall(gosched_m) 342 } 343 344 // Puts the current goroutine into a waiting state and calls unlockf on the 345 // system stack. 346 // 347 // If unlockf returns false, the goroutine is resumed. 348 // 349 // unlockf must not access this G's stack, as it may be moved between 350 // the call to gopark and the call to unlockf. 351 // 352 // Note that because unlockf is called after putting the G into a waiting 353 // state, the G may have already been readied by the time unlockf is called 354 // unless there is external synchronization preventing the G from being 355 // readied. If unlockf returns false, it must guarantee that the G cannot be 356 // externally readied. 357 // 358 // Reason explains why the goroutine has been parked. It is displayed in stack 359 // traces and heap dumps. Reasons should be unique and descriptive. Do not 360 // re-use reasons, add new ones. 361 func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int) { 362 if reason != waitReasonSleep { 363 checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy 364 } 365 mp := acquirem() 366 gp := mp.curg 367 status := readgstatus(gp) 368 if status != _Grunning && status != _Gscanrunning { 369 throw("gopark: bad g status") 370 } 371 mp.waitlock = lock 372 mp.waitunlockf = unlockf 373 gp.waitreason = reason 374 mp.waittraceev = traceEv 375 mp.waittraceskip = traceskip 376 releasem(mp) 377 // can't do anything that might move the G between Ms here. 378 mcall(park_m) 379 } 380 381 // Puts the current goroutine into a waiting state and unlocks the lock. 382 // The goroutine can be made runnable again by calling goready(gp). 383 func goparkunlock(lock *mutex, reason waitReason, traceEv byte, traceskip int) { 384 gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip) 385 } 386 387 func goready(gp *g, traceskip int) { 388 systemstack(func() { 389 ready(gp, traceskip, true) 390 }) 391 } 392 393 //go:nosplit 394 func acquireSudog() *sudog { 395 // Delicate dance: the semaphore implementation calls 396 // acquireSudog, acquireSudog calls new(sudog), 397 // new calls malloc, malloc can call the garbage collector, 398 // and the garbage collector calls the semaphore implementation 399 // in stopTheWorld. 400 // Break the cycle by doing acquirem/releasem around new(sudog). 401 // The acquirem/releasem increments m.locks during new(sudog), 402 // which keeps the garbage collector from being invoked. 403 mp := acquirem() 404 pp := mp.p.ptr() 405 if len(pp.sudogcache) == 0 { 406 lock(&sched.sudoglock) 407 // First, try to grab a batch from central cache. 408 for len(pp.sudogcache) < cap(pp.sudogcache)/2 && sched.sudogcache != nil { 409 s := sched.sudogcache 410 sched.sudogcache = s.next 411 s.next = nil 412 pp.sudogcache = append(pp.sudogcache, s) 413 } 414 unlock(&sched.sudoglock) 415 // If the central cache is empty, allocate a new one. 416 if len(pp.sudogcache) == 0 { 417 pp.sudogcache = append(pp.sudogcache, new(sudog)) 418 } 419 } 420 n := len(pp.sudogcache) 421 s := pp.sudogcache[n-1] 422 pp.sudogcache[n-1] = nil 423 pp.sudogcache = pp.sudogcache[:n-1] 424 if s.elem != nil { 425 throw("acquireSudog: found s.elem != nil in cache") 426 } 427 releasem(mp) 428 return s 429 } 430 431 //go:nosplit 432 func releaseSudog(s *sudog) { 433 if s.elem != nil { 434 throw("runtime: sudog with non-nil elem") 435 } 436 if s.isSelect { 437 throw("runtime: sudog with non-false isSelect") 438 } 439 if s.next != nil { 440 throw("runtime: sudog with non-nil next") 441 } 442 if s.prev != nil { 443 throw("runtime: sudog with non-nil prev") 444 } 445 if s.waitlink != nil { 446 throw("runtime: sudog with non-nil waitlink") 447 } 448 if s.c != nil { 449 throw("runtime: sudog with non-nil c") 450 } 451 gp := getg() 452 if gp.param != nil { 453 throw("runtime: releaseSudog with non-nil gp.param") 454 } 455 mp := acquirem() // avoid rescheduling to another P 456 pp := mp.p.ptr() 457 if len(pp.sudogcache) == cap(pp.sudogcache) { 458 // Transfer half of local cache to the central cache. 459 var first, last *sudog 460 for len(pp.sudogcache) > cap(pp.sudogcache)/2 { 461 n := len(pp.sudogcache) 462 p := pp.sudogcache[n-1] 463 pp.sudogcache[n-1] = nil 464 pp.sudogcache = pp.sudogcache[:n-1] 465 if first == nil { 466 first = p 467 } else { 468 last.next = p 469 } 470 last = p 471 } 472 lock(&sched.sudoglock) 473 last.next = sched.sudogcache 474 sched.sudogcache = first 475 unlock(&sched.sudoglock) 476 } 477 pp.sudogcache = append(pp.sudogcache, s) 478 releasem(mp) 479 } 480 481 // called from assembly 482 func badmcall(fn func(*g)) { 483 throw("runtime: mcall called on m->g0 stack") 484 } 485 486 func badmcall2(fn func(*g)) { 487 throw("runtime: mcall function returned") 488 } 489 490 func badreflectcall() { 491 panic(plainError("arg size to reflect.call more than 1GB")) 492 } 493 494 var badmorestackg0Msg = "fatal: morestack on g0\n" 495 496 //go:nosplit 497 //go:nowritebarrierrec 498 func badmorestackg0() { 499 sp := stringStructOf(&badmorestackg0Msg) 500 write(2, sp.str, int32(sp.len)) 501 } 502 503 var badmorestackgsignalMsg = "fatal: morestack on gsignal\n" 504 505 //go:nosplit 506 //go:nowritebarrierrec 507 func badmorestackgsignal() { 508 sp := stringStructOf(&badmorestackgsignalMsg) 509 write(2, sp.str, int32(sp.len)) 510 } 511 512 //go:nosplit 513 func badctxt() { 514 throw("ctxt != 0") 515 } 516 517 func lockedOSThread() bool { 518 gp := getg() 519 return gp.lockedm != 0 && gp.m.lockedg != 0 520 } 521 522 var ( 523 // allgs contains all Gs ever created (including dead Gs), and thus 524 // never shrinks. 525 // 526 // Access via the slice is protected by allglock or stop-the-world. 527 // Readers that cannot take the lock may (carefully!) use the atomic 528 // variables below. 529 allglock mutex 530 allgs []*g 531 532 // allglen and allgptr are atomic variables that contain len(allgs) and 533 // &allgs[0] respectively. Proper ordering depends on totally-ordered 534 // loads and stores. Writes are protected by allglock. 535 // 536 // allgptr is updated before allglen. Readers should read allglen 537 // before allgptr to ensure that allglen is always <= len(allgptr). New 538 // Gs appended during the race can be missed. For a consistent view of 539 // all Gs, allglock must be held. 540 // 541 // allgptr copies should always be stored as a concrete type or 542 // unsafe.Pointer, not uintptr, to ensure that GC can still reach it 543 // even if it points to a stale array. 544 allglen uintptr 545 allgptr **g 546 ) 547 548 func allgadd(gp *g) { 549 if readgstatus(gp) == _Gidle { 550 throw("allgadd: bad status Gidle") 551 } 552 553 lock(&allglock) 554 allgs = append(allgs, gp) 555 if &allgs[0] != allgptr { 556 atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0])) 557 } 558 atomic.Storeuintptr(&allglen, uintptr(len(allgs))) 559 unlock(&allglock) 560 } 561 562 // allGsSnapshot returns a snapshot of the slice of all Gs. 563 // 564 // The world must be stopped or allglock must be held. 565 func allGsSnapshot() []*g { 566 assertWorldStoppedOrLockHeld(&allglock) 567 568 // Because the world is stopped or allglock is held, allgadd 569 // cannot happen concurrently with this. allgs grows 570 // monotonically and existing entries never change, so we can 571 // simply return a copy of the slice header. For added safety, 572 // we trim everything past len because that can still change. 573 return allgs[:len(allgs):len(allgs)] 574 } 575 576 // atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex. 577 func atomicAllG() (**g, uintptr) { 578 length := atomic.Loaduintptr(&allglen) 579 ptr := (**g)(atomic.Loadp(unsafe.Pointer(&allgptr))) 580 return ptr, length 581 } 582 583 // atomicAllGIndex returns ptr[i] with the allgptr returned from atomicAllG. 584 func atomicAllGIndex(ptr **g, i uintptr) *g { 585 return *(**g)(add(unsafe.Pointer(ptr), i*goarch.PtrSize)) 586 } 587 588 // forEachG calls fn on every G from allgs. 589 // 590 // forEachG takes a lock to exclude concurrent addition of new Gs. 591 func forEachG(fn func(gp *g)) { 592 lock(&allglock) 593 for _, gp := range allgs { 594 fn(gp) 595 } 596 unlock(&allglock) 597 } 598 599 // forEachGRace calls fn on every G from allgs. 600 // 601 // forEachGRace avoids locking, but does not exclude addition of new Gs during 602 // execution, which may be missed. 603 func forEachGRace(fn func(gp *g)) { 604 ptr, length := atomicAllG() 605 for i := uintptr(0); i < length; i++ { 606 gp := atomicAllGIndex(ptr, i) 607 fn(gp) 608 } 609 return 610 } 611 612 const ( 613 // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once. 614 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. 615 _GoidCacheBatch = 16 616 ) 617 618 // cpuinit extracts the environment variable GODEBUG from the environment on 619 // Unix-like operating systems and calls internal/cpu.Initialize. 620 func cpuinit() { 621 const prefix = "GODEBUG=" 622 var env string 623 624 switch GOOS { 625 case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": 626 cpu.DebugOptions = true 627 628 // Similar to goenv_unix but extracts the environment value for 629 // GODEBUG directly. 630 // TODO(moehrmann): remove when general goenvs() can be called before cpuinit() 631 n := int32(0) 632 for argv_index(argv, argc+1+n) != nil { 633 n++ 634 } 635 636 for i := int32(0); i < n; i++ { 637 p := argv_index(argv, argc+1+i) 638 s := unsafe.String(p, findnull(p)) 639 640 if hasPrefix(s, prefix) { 641 env = gostring(p)[len(prefix):] 642 break 643 } 644 } 645 } 646 647 cpu.Initialize(env) 648 649 // Support cpu feature variables are used in code generated by the compiler 650 // to guard execution of instructions that can not be assumed to be always supported. 651 switch GOARCH { 652 case "386", "amd64": 653 x86HasPOPCNT = cpu.X86.HasPOPCNT 654 x86HasSSE41 = cpu.X86.HasSSE41 655 x86HasFMA = cpu.X86.HasFMA 656 657 case "arm": 658 armHasVFPv4 = cpu.ARM.HasVFPv4 659 660 case "arm64": 661 arm64HasATOMICS = cpu.ARM64.HasATOMICS 662 } 663 } 664 665 // The bootstrap sequence is: 666 // 667 // call osinit 668 // call schedinit 669 // make & queue new G 670 // call runtime·mstart 671 // 672 // The new G calls runtime·main. 673 func schedinit() { 674 lockInit(&sched.lock, lockRankSched) 675 lockInit(&sched.sysmonlock, lockRankSysmon) 676 lockInit(&sched.deferlock, lockRankDefer) 677 lockInit(&sched.sudoglock, lockRankSudog) 678 lockInit(&deadlock, lockRankDeadlock) 679 lockInit(&paniclk, lockRankPanic) 680 lockInit(&allglock, lockRankAllg) 681 lockInit(&allpLock, lockRankAllp) 682 lockInit(&reflectOffs.lock, lockRankReflectOffs) 683 lockInit(&finlock, lockRankFin) 684 lockInit(&trace.bufLock, lockRankTraceBuf) 685 lockInit(&trace.stringsLock, lockRankTraceStrings) 686 lockInit(&trace.lock, lockRankTrace) 687 lockInit(&cpuprof.lock, lockRankCpuprof) 688 lockInit(&trace.stackTab.lock, lockRankTraceStackTab) 689 // Enforce that this lock is always a leaf lock. 690 // All of this lock's critical sections should be 691 // extremely short. 692 lockInit(&memstats.heapStats.noPLock, lockRankLeafRank) 693 694 // raceinit must be the first call to race detector. 695 // In particular, it must be done before mallocinit below calls racemapshadow. 696 gp := getg() 697 if raceenabled { 698 gp.racectx, raceprocctx0 = raceinit() 699 } 700 701 sched.maxmcount = 10000 702 703 // The world starts stopped. 704 worldStopped() 705 706 moduledataverify() 707 stackinit() 708 mallocinit() 709 cpuinit() // must run before alginit 710 alginit() // maps, hash, fastrand must not be used before this call 711 fastrandinit() // must run before mcommoninit 712 mcommoninit(gp.m, -1) 713 modulesinit() // provides activeModules 714 typelinksinit() // uses maps, activeModules 715 itabsinit() // uses activeModules 716 stkobjinit() // must run before GC starts 717 718 sigsave(&gp.m.sigmask) 719 initSigmask = gp.m.sigmask 720 721 goargs() 722 goenvs() 723 parsedebugvars() 724 gcinit() 725 726 // if disableMemoryProfiling is set, update MemProfileRate to 0 to turn off memprofile. 727 // Note: parsedebugvars may update MemProfileRate, but when disableMemoryProfiling is 728 // set to true by the linker, it means that nothing is consuming the profile, it is 729 // safe to set MemProfileRate to 0. 730 if disableMemoryProfiling { 731 MemProfileRate = 0 732 } 733 734 lock(&sched.lock) 735 sched.lastpoll.Store(nanotime()) 736 procs := ncpu 737 if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 { 738 procs = n 739 } 740 if procresize(procs) != nil { 741 throw("unknown runnable goroutine during bootstrap") 742 } 743 unlock(&sched.lock) 744 745 // World is effectively started now, as P's can run. 746 worldStarted() 747 748 // For cgocheck > 1, we turn on the write barrier at all times 749 // and check all pointer writes. We can't do this until after 750 // procresize because the write barrier needs a P. 751 if debug.cgocheck > 1 { 752 writeBarrier.cgo = true 753 writeBarrier.enabled = true 754 for _, pp := range allp { 755 pp.wbBuf.reset() 756 } 757 } 758 759 if buildVersion == "" { 760 // Condition should never trigger. This code just serves 761 // to ensure runtime·buildVersion is kept in the resulting binary. 762 buildVersion = "unknown" 763 } 764 if len(modinfo) == 1 { 765 // Condition should never trigger. This code just serves 766 // to ensure runtime·modinfo is kept in the resulting binary. 767 modinfo = "" 768 } 769 } 770 771 func dumpgstatus(gp *g) { 772 thisg := getg() 773 print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 774 print("runtime: getg: g=", thisg, ", goid=", thisg.goid, ", g->atomicstatus=", readgstatus(thisg), "\n") 775 } 776 777 // sched.lock must be held. 778 func checkmcount() { 779 assertLockHeld(&sched.lock) 780 781 if mcount() > sched.maxmcount { 782 print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n") 783 throw("thread exhaustion") 784 } 785 } 786 787 // mReserveID returns the next ID to use for a new m. This new m is immediately 788 // considered 'running' by checkdead. 789 // 790 // sched.lock must be held. 791 func mReserveID() int64 { 792 assertLockHeld(&sched.lock) 793 794 if sched.mnext+1 < sched.mnext { 795 throw("runtime: thread ID overflow") 796 } 797 id := sched.mnext 798 sched.mnext++ 799 checkmcount() 800 return id 801 } 802 803 // Pre-allocated ID may be passed as 'id', or omitted by passing -1. 804 func mcommoninit(mp *m, id int64) { 805 gp := getg() 806 807 // g0 stack won't make sense for user (and is not necessary unwindable). 808 if gp != gp.m.g0 { 809 callers(1, mp.createstack[:]) 810 } 811 812 lock(&sched.lock) 813 814 if id >= 0 { 815 mp.id = id 816 } else { 817 mp.id = mReserveID() 818 } 819 820 lo := uint32(int64Hash(uint64(mp.id), fastrandseed)) 821 hi := uint32(int64Hash(uint64(cputicks()), ^fastrandseed)) 822 if lo|hi == 0 { 823 hi = 1 824 } 825 // Same behavior as for 1.17. 826 // TODO: Simplify ths. 827 if goarch.BigEndian { 828 mp.fastrand = uint64(lo)<<32 | uint64(hi) 829 } else { 830 mp.fastrand = uint64(hi)<<32 | uint64(lo) 831 } 832 833 mpreinit(mp) 834 if mp.gsignal != nil { 835 mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard 836 } 837 838 // Add to allm so garbage collector doesn't free g->m 839 // when it is just in a register or thread-local storage. 840 mp.alllink = allm 841 842 // NumCgoCall() iterates over allm w/o schedlock, 843 // so we need to publish it safely. 844 atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp)) 845 unlock(&sched.lock) 846 847 // Allocate memory to hold a cgo traceback if the cgo call crashes. 848 if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" { 849 mp.cgoCallers = new(cgoCallers) 850 } 851 } 852 853 func (mp *m) becomeSpinning() { 854 mp.spinning = true 855 sched.nmspinning.Add(1) 856 sched.needspinning.Store(0) 857 } 858 859 var fastrandseed uintptr 860 861 func fastrandinit() { 862 s := (*[unsafe.Sizeof(fastrandseed)]byte)(unsafe.Pointer(&fastrandseed))[:] 863 getRandomData(s) 864 } 865 866 // Mark gp ready to run. 867 func ready(gp *g, traceskip int, next bool) { 868 if trace.enabled { 869 traceGoUnpark(gp, traceskip) 870 } 871 872 status := readgstatus(gp) 873 874 // Mark runnable. 875 mp := acquirem() // disable preemption because it can be holding p in a local var 876 if status&^_Gscan != _Gwaiting { 877 dumpgstatus(gp) 878 throw("bad g->status in ready") 879 } 880 881 // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq 882 casgstatus(gp, _Gwaiting, _Grunnable) 883 runqput(mp.p.ptr(), gp, next) 884 wakep() 885 releasem(mp) 886 } 887 888 // freezeStopWait is a large value that freezetheworld sets 889 // sched.stopwait to in order to request that all Gs permanently stop. 890 const freezeStopWait = 0x7fffffff 891 892 // freezing is set to non-zero if the runtime is trying to freeze the 893 // world. 894 var freezing atomic.Bool 895 896 // Similar to stopTheWorld but best-effort and can be called several times. 897 // There is no reverse operation, used during crashing. 898 // This function must not lock any mutexes. 899 func freezetheworld() { 900 freezing.Store(true) 901 // stopwait and preemption requests can be lost 902 // due to races with concurrently executing threads, 903 // so try several times 904 for i := 0; i < 5; i++ { 905 // this should tell the scheduler to not start any new goroutines 906 sched.stopwait = freezeStopWait 907 sched.gcwaiting.Store(true) 908 // this should stop running goroutines 909 if !preemptall() { 910 break // no running goroutines 911 } 912 usleep(1000) 913 } 914 // to be sure 915 usleep(1000) 916 preemptall() 917 usleep(1000) 918 } 919 920 // All reads and writes of g's status go through readgstatus, casgstatus 921 // castogscanstatus, casfrom_Gscanstatus. 922 // 923 //go:nosplit 924 func readgstatus(gp *g) uint32 { 925 return gp.atomicstatus.Load() 926 } 927 928 // The Gscanstatuses are acting like locks and this releases them. 929 // If it proves to be a performance hit we should be able to make these 930 // simple atomic stores but for now we are going to throw if 931 // we see an inconsistent state. 932 func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { 933 success := false 934 935 // Check that transition is valid. 936 switch oldval { 937 default: 938 print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 939 dumpgstatus(gp) 940 throw("casfrom_Gscanstatus:top gp->status is not in scan state") 941 case _Gscanrunnable, 942 _Gscanwaiting, 943 _Gscanrunning, 944 _Gscansyscall, 945 _Gscanpreempted: 946 if newval == oldval&^_Gscan { 947 success = gp.atomicstatus.CompareAndSwap(oldval, newval) 948 } 949 } 950 if !success { 951 print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 952 dumpgstatus(gp) 953 throw("casfrom_Gscanstatus: gp->status is not in scan state") 954 } 955 releaseLockRank(lockRankGscan) 956 } 957 958 // This will return false if the gp is not in the expected status and the cas fails. 959 // This acts like a lock acquire while the casfromgstatus acts like a lock release. 960 func castogscanstatus(gp *g, oldval, newval uint32) bool { 961 switch oldval { 962 case _Grunnable, 963 _Grunning, 964 _Gwaiting, 965 _Gsyscall: 966 if newval == oldval|_Gscan { 967 r := gp.atomicstatus.CompareAndSwap(oldval, newval) 968 if r { 969 acquireLockRank(lockRankGscan) 970 } 971 return r 972 973 } 974 } 975 print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n") 976 throw("castogscanstatus") 977 panic("not reached") 978 } 979 980 // casgstatusAlwaysTrack is a debug flag that causes casgstatus to always track 981 // various latencies on every transition instead of sampling them. 982 var casgstatusAlwaysTrack = false 983 984 // If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus 985 // and casfrom_Gscanstatus instead. 986 // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that 987 // put it in the Gscan state is finished. 988 // 989 //go:nosplit 990 func casgstatus(gp *g, oldval, newval uint32) { 991 if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval { 992 systemstack(func() { 993 print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n") 994 throw("casgstatus: bad incoming values") 995 }) 996 } 997 998 acquireLockRank(lockRankGscan) 999 releaseLockRank(lockRankGscan) 1000 1001 // See https://golang.org/cl/21503 for justification of the yield delay. 1002 const yieldDelay = 5 * 1000 1003 var nextYield int64 1004 1005 // loop if gp->atomicstatus is in a scan state giving 1006 // GC time to finish and change the state to oldval. 1007 for i := 0; !gp.atomicstatus.CompareAndSwap(oldval, newval); i++ { 1008 if oldval == _Gwaiting && gp.atomicstatus.Load() == _Grunnable { 1009 throw("casgstatus: waiting for Gwaiting but is Grunnable") 1010 } 1011 if i == 0 { 1012 nextYield = nanotime() + yieldDelay 1013 } 1014 if nanotime() < nextYield { 1015 for x := 0; x < 10 && gp.atomicstatus.Load() != oldval; x++ { 1016 procyield(1) 1017 } 1018 } else { 1019 osyield() 1020 nextYield = nanotime() + yieldDelay/2 1021 } 1022 } 1023 1024 if oldval == _Grunning { 1025 // Track every gTrackingPeriod time a goroutine transitions out of running. 1026 if casgstatusAlwaysTrack || gp.trackingSeq%gTrackingPeriod == 0 { 1027 gp.tracking = true 1028 } 1029 gp.trackingSeq++ 1030 } 1031 if !gp.tracking { 1032 return 1033 } 1034 1035 // Handle various kinds of tracking. 1036 // 1037 // Currently: 1038 // - Time spent in runnable. 1039 // - Time spent blocked on a sync.Mutex or sync.RWMutex. 1040 switch oldval { 1041 case _Grunnable: 1042 // We transitioned out of runnable, so measure how much 1043 // time we spent in this state and add it to 1044 // runnableTime. 1045 now := nanotime() 1046 gp.runnableTime += now - gp.trackingStamp 1047 gp.trackingStamp = 0 1048 case _Gwaiting: 1049 if !gp.waitreason.isMutexWait() { 1050 // Not blocking on a lock. 1051 break 1052 } 1053 // Blocking on a lock, measure it. Note that because we're 1054 // sampling, we have to multiply by our sampling period to get 1055 // a more representative estimate of the absolute value. 1056 // gTrackingPeriod also represents an accurate sampling period 1057 // because we can only enter this state from _Grunning. 1058 now := nanotime() 1059 sched.totalMutexWaitTime.Add((now - gp.trackingStamp) * gTrackingPeriod) 1060 gp.trackingStamp = 0 1061 } 1062 switch newval { 1063 case _Gwaiting: 1064 if !gp.waitreason.isMutexWait() { 1065 // Not blocking on a lock. 1066 break 1067 } 1068 // Blocking on a lock. Write down the timestamp. 1069 now := nanotime() 1070 gp.trackingStamp = now 1071 case _Grunnable: 1072 // We just transitioned into runnable, so record what 1073 // time that happened. 1074 now := nanotime() 1075 gp.trackingStamp = now 1076 case _Grunning: 1077 // We're transitioning into running, so turn off 1078 // tracking and record how much time we spent in 1079 // runnable. 1080 gp.tracking = false 1081 sched.timeToRun.record(gp.runnableTime) 1082 gp.runnableTime = 0 1083 } 1084 } 1085 1086 // casGToWaiting transitions gp from old to _Gwaiting, and sets the wait reason. 1087 // 1088 // Use this over casgstatus when possible to ensure that a waitreason is set. 1089 func casGToWaiting(gp *g, old uint32, reason waitReason) { 1090 // Set the wait reason before calling casgstatus, because casgstatus will use it. 1091 gp.waitreason = reason 1092 casgstatus(gp, old, _Gwaiting) 1093 } 1094 1095 // casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable. 1096 // Returns old status. Cannot call casgstatus directly, because we are racing with an 1097 // async wakeup that might come in from netpoll. If we see Gwaiting from the readgstatus, 1098 // it might have become Grunnable by the time we get to the cas. If we called casgstatus, 1099 // it would loop waiting for the status to go back to Gwaiting, which it never will. 1100 // 1101 //go:nosplit 1102 func casgcopystack(gp *g) uint32 { 1103 for { 1104 oldstatus := readgstatus(gp) &^ _Gscan 1105 if oldstatus != _Gwaiting && oldstatus != _Grunnable { 1106 throw("copystack: bad status, not Gwaiting or Grunnable") 1107 } 1108 if gp.atomicstatus.CompareAndSwap(oldstatus, _Gcopystack) { 1109 return oldstatus 1110 } 1111 } 1112 } 1113 1114 // casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted. 1115 // 1116 // TODO(austin): This is the only status operation that both changes 1117 // the status and locks the _Gscan bit. Rethink this. 1118 func casGToPreemptScan(gp *g, old, new uint32) { 1119 if old != _Grunning || new != _Gscan|_Gpreempted { 1120 throw("bad g transition") 1121 } 1122 acquireLockRank(lockRankGscan) 1123 for !gp.atomicstatus.CompareAndSwap(_Grunning, _Gscan|_Gpreempted) { 1124 } 1125 } 1126 1127 // casGFromPreempted attempts to transition gp from _Gpreempted to 1128 // _Gwaiting. If successful, the caller is responsible for 1129 // re-scheduling gp. 1130 func casGFromPreempted(gp *g, old, new uint32) bool { 1131 if old != _Gpreempted || new != _Gwaiting { 1132 throw("bad g transition") 1133 } 1134 gp.waitreason = waitReasonPreempted 1135 return gp.atomicstatus.CompareAndSwap(_Gpreempted, _Gwaiting) 1136 } 1137 1138 // stopTheWorld stops all P's from executing goroutines, interrupting 1139 // all goroutines at GC safe points and records reason as the reason 1140 // for the stop. On return, only the current goroutine's P is running. 1141 // stopTheWorld must not be called from a system stack and the caller 1142 // must not hold worldsema. The caller must call startTheWorld when 1143 // other P's should resume execution. 1144 // 1145 // stopTheWorld is safe for multiple goroutines to call at the 1146 // same time. Each will execute its own stop, and the stops will 1147 // be serialized. 1148 // 1149 // This is also used by routines that do stack dumps. If the system is 1150 // in panic or being exited, this may not reliably stop all 1151 // goroutines. 1152 func stopTheWorld(reason string) { 1153 semacquire(&worldsema) 1154 gp := getg() 1155 gp.m.preemptoff = reason 1156 systemstack(func() { 1157 // Mark the goroutine which called stopTheWorld preemptible so its 1158 // stack may be scanned. 1159 // This lets a mark worker scan us while we try to stop the world 1160 // since otherwise we could get in a mutual preemption deadlock. 1161 // We must not modify anything on the G stack because a stack shrink 1162 // may occur. A stack shrink is otherwise OK though because in order 1163 // to return from this function (and to leave the system stack) we 1164 // must have preempted all goroutines, including any attempting 1165 // to scan our stack, in which case, any stack shrinking will 1166 // have already completed by the time we exit. 1167 // Don't provide a wait reason because we're still executing. 1168 casGToWaiting(gp, _Grunning, waitReasonStoppingTheWorld) 1169 stopTheWorldWithSema() 1170 casgstatus(gp, _Gwaiting, _Grunning) 1171 }) 1172 } 1173 1174 // startTheWorld undoes the effects of stopTheWorld. 1175 func startTheWorld() { 1176 systemstack(func() { startTheWorldWithSema(false) }) 1177 1178 // worldsema must be held over startTheWorldWithSema to ensure 1179 // gomaxprocs cannot change while worldsema is held. 1180 // 1181 // Release worldsema with direct handoff to the next waiter, but 1182 // acquirem so that semrelease1 doesn't try to yield our time. 1183 // 1184 // Otherwise if e.g. ReadMemStats is being called in a loop, 1185 // it might stomp on other attempts to stop the world, such as 1186 // for starting or ending GC. The operation this blocks is 1187 // so heavy-weight that we should just try to be as fair as 1188 // possible here. 1189 // 1190 // We don't want to just allow us to get preempted between now 1191 // and releasing the semaphore because then we keep everyone 1192 // (including, for example, GCs) waiting longer. 1193 mp := acquirem() 1194 mp.preemptoff = "" 1195 semrelease1(&worldsema, true, 0) 1196 releasem(mp) 1197 } 1198 1199 // stopTheWorldGC has the same effect as stopTheWorld, but blocks 1200 // until the GC is not running. It also blocks a GC from starting 1201 // until startTheWorldGC is called. 1202 func stopTheWorldGC(reason string) { 1203 semacquire(&gcsema) 1204 stopTheWorld(reason) 1205 } 1206 1207 // startTheWorldGC undoes the effects of stopTheWorldGC. 1208 func startTheWorldGC() { 1209 startTheWorld() 1210 semrelease(&gcsema) 1211 } 1212 1213 // Holding worldsema grants an M the right to try to stop the world. 1214 var worldsema uint32 = 1 1215 1216 // Holding gcsema grants the M the right to block a GC, and blocks 1217 // until the current GC is done. In particular, it prevents gomaxprocs 1218 // from changing concurrently. 1219 // 1220 // TODO(mknyszek): Once gomaxprocs and the execution tracer can handle 1221 // being changed/enabled during a GC, remove this. 1222 var gcsema uint32 = 1 1223 1224 // stopTheWorldWithSema is the core implementation of stopTheWorld. 1225 // The caller is responsible for acquiring worldsema and disabling 1226 // preemption first and then should stopTheWorldWithSema on the system 1227 // stack: 1228 // 1229 // semacquire(&worldsema, 0) 1230 // m.preemptoff = "reason" 1231 // systemstack(stopTheWorldWithSema) 1232 // 1233 // When finished, the caller must either call startTheWorld or undo 1234 // these three operations separately: 1235 // 1236 // m.preemptoff = "" 1237 // systemstack(startTheWorldWithSema) 1238 // semrelease(&worldsema) 1239 // 1240 // It is allowed to acquire worldsema once and then execute multiple 1241 // startTheWorldWithSema/stopTheWorldWithSema pairs. 1242 // Other P's are able to execute between successive calls to 1243 // startTheWorldWithSema and stopTheWorldWithSema. 1244 // Holding worldsema causes any other goroutines invoking 1245 // stopTheWorld to block. 1246 func stopTheWorldWithSema() { 1247 gp := getg() 1248 1249 // If we hold a lock, then we won't be able to stop another M 1250 // that is blocked trying to acquire the lock. 1251 if gp.m.locks > 0 { 1252 throw("stopTheWorld: holding locks") 1253 } 1254 1255 lock(&sched.lock) 1256 sched.stopwait = gomaxprocs 1257 sched.gcwaiting.Store(true) 1258 preemptall() 1259 // stop current P 1260 gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. 1261 sched.stopwait-- 1262 // try to retake all P's in Psyscall status 1263 for _, pp := range allp { 1264 s := pp.status 1265 if s == _Psyscall && atomic.Cas(&pp.status, s, _Pgcstop) { 1266 if trace.enabled { 1267 traceGoSysBlock(pp) 1268 traceProcStop(pp) 1269 } 1270 pp.syscalltick++ 1271 sched.stopwait-- 1272 } 1273 } 1274 // stop idle P's 1275 now := nanotime() 1276 for { 1277 pp, _ := pidleget(now) 1278 if pp == nil { 1279 break 1280 } 1281 pp.status = _Pgcstop 1282 sched.stopwait-- 1283 } 1284 wait := sched.stopwait > 0 1285 unlock(&sched.lock) 1286 1287 // wait for remaining P's to stop voluntarily 1288 if wait { 1289 for { 1290 // wait for 100us, then try to re-preempt in case of any races 1291 if notetsleep(&sched.stopnote, 100*1000) { 1292 noteclear(&sched.stopnote) 1293 break 1294 } 1295 preemptall() 1296 } 1297 } 1298 1299 // sanity checks 1300 bad := "" 1301 if sched.stopwait != 0 { 1302 bad = "stopTheWorld: not stopped (stopwait != 0)" 1303 } else { 1304 for _, pp := range allp { 1305 if pp.status != _Pgcstop { 1306 bad = "stopTheWorld: not stopped (status != _Pgcstop)" 1307 } 1308 } 1309 } 1310 if freezing.Load() { 1311 // Some other thread is panicking. This can cause the 1312 // sanity checks above to fail if the panic happens in 1313 // the signal handler on a stopped thread. Either way, 1314 // we should halt this thread. 1315 lock(&deadlock) 1316 lock(&deadlock) 1317 } 1318 if bad != "" { 1319 throw(bad) 1320 } 1321 1322 worldStopped() 1323 } 1324 1325 func startTheWorldWithSema(emitTraceEvent bool) int64 { 1326 assertWorldStopped() 1327 1328 mp := acquirem() // disable preemption because it can be holding p in a local var 1329 if netpollinited() { 1330 list := netpoll(0) // non-blocking 1331 injectglist(&list) 1332 } 1333 lock(&sched.lock) 1334 1335 procs := gomaxprocs 1336 if newprocs != 0 { 1337 procs = newprocs 1338 newprocs = 0 1339 } 1340 p1 := procresize(procs) 1341 sched.gcwaiting.Store(false) 1342 if sched.sysmonwait.Load() { 1343 sched.sysmonwait.Store(false) 1344 notewakeup(&sched.sysmonnote) 1345 } 1346 unlock(&sched.lock) 1347 1348 worldStarted() 1349 1350 for p1 != nil { 1351 p := p1 1352 p1 = p1.link.ptr() 1353 if p.m != 0 { 1354 mp := p.m.ptr() 1355 p.m = 0 1356 if mp.nextp != 0 { 1357 throw("startTheWorld: inconsistent mp->nextp") 1358 } 1359 mp.nextp.set(p) 1360 notewakeup(&mp.park) 1361 } else { 1362 // Start M to run P. Do not start another M below. 1363 newm(nil, p, -1) 1364 } 1365 } 1366 1367 // Capture start-the-world time before doing clean-up tasks. 1368 startTime := nanotime() 1369 if emitTraceEvent { 1370 traceGCSTWDone() 1371 } 1372 1373 // Wakeup an additional proc in case we have excessive runnable goroutines 1374 // in local queues or in the global queue. If we don't, the proc will park itself. 1375 // If we have lots of excessive work, resetspinning will unpark additional procs as necessary. 1376 wakep() 1377 1378 releasem(mp) 1379 1380 return startTime 1381 } 1382 1383 // usesLibcall indicates whether this runtime performs system calls 1384 // via libcall. 1385 func usesLibcall() bool { 1386 switch GOOS { 1387 case "aix", "darwin", "illumos", "ios", "solaris", "windows": 1388 return true 1389 case "openbsd": 1390 return GOARCH == "386" || GOARCH == "amd64" || GOARCH == "arm" || GOARCH == "arm64" 1391 } 1392 return false 1393 } 1394 1395 // mStackIsSystemAllocated indicates whether this runtime starts on a 1396 // system-allocated stack. 1397 func mStackIsSystemAllocated() bool { 1398 switch GOOS { 1399 case "aix", "darwin", "plan9", "illumos", "ios", "solaris", "windows": 1400 return true 1401 case "openbsd": 1402 switch GOARCH { 1403 case "386", "amd64", "arm", "arm64": 1404 return true 1405 } 1406 } 1407 return false 1408 } 1409 1410 // mstart is the entry-point for new Ms. 1411 // It is written in assembly, uses ABI0, is marked TOPFRAME, and calls mstart0. 1412 func mstart() 1413 1414 // mstart0 is the Go entry-point for new Ms. 1415 // This must not split the stack because we may not even have stack 1416 // bounds set up yet. 1417 // 1418 // May run during STW (because it doesn't have a P yet), so write 1419 // barriers are not allowed. 1420 // 1421 //go:nosplit 1422 //go:nowritebarrierrec 1423 func mstart0() { 1424 gp := getg() 1425 1426 osStack := gp.stack.lo == 0 1427 if osStack { 1428 // Initialize stack bounds from system stack. 1429 // Cgo may have left stack size in stack.hi. 1430 // minit may update the stack bounds. 1431 // 1432 // Note: these bounds may not be very accurate. 1433 // We set hi to &size, but there are things above 1434 // it. The 1024 is supposed to compensate this, 1435 // but is somewhat arbitrary. 1436 size := gp.stack.hi 1437 if size == 0 { 1438 size = 8192 * sys.StackGuardMultiplier 1439 } 1440 gp.stack.hi = uintptr(noescape(unsafe.Pointer(&size))) 1441 gp.stack.lo = gp.stack.hi - size + 1024 1442 } 1443 // Initialize stack guard so that we can start calling regular 1444 // Go code. 1445 gp.stackguard0 = gp.stack.lo + _StackGuard 1446 // This is the g0, so we can also call go:systemstack 1447 // functions, which check stackguard1. 1448 gp.stackguard1 = gp.stackguard0 1449 mstart1() 1450 1451 // Exit this thread. 1452 if mStackIsSystemAllocated() { 1453 // Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate 1454 // the stack, but put it in gp.stack before mstart, 1455 // so the logic above hasn't set osStack yet. 1456 osStack = true 1457 } 1458 mexit(osStack) 1459 } 1460 1461 // The go:noinline is to guarantee the getcallerpc/getcallersp below are safe, 1462 // so that we can set up g0.sched to return to the call of mstart1 above. 1463 // 1464 //go:noinline 1465 func mstart1() { 1466 gp := getg() 1467 1468 if gp != gp.m.g0 { 1469 throw("bad runtime·mstart") 1470 } 1471 1472 // Set up m.g0.sched as a label returning to just 1473 // after the mstart1 call in mstart0 above, for use by goexit0 and mcall. 1474 // We're never coming back to mstart1 after we call schedule, 1475 // so other calls can reuse the current frame. 1476 // And goexit0 does a gogo that needs to return from mstart1 1477 // and let mstart0 exit the thread. 1478 gp.sched.g = guintptr(unsafe.Pointer(gp)) 1479 gp.sched.pc = getcallerpc() 1480 gp.sched.sp = getcallersp() 1481 1482 asminit() 1483 minit() 1484 1485 // Install signal handlers; after minit so that minit can 1486 // prepare the thread to be able to handle the signals. 1487 if gp.m == &m0 { 1488 mstartm0() 1489 } 1490 1491 if fn := gp.m.mstartfn; fn != nil { 1492 fn() 1493 } 1494 1495 if gp.m != &m0 { 1496 acquirep(gp.m.nextp.ptr()) 1497 gp.m.nextp = 0 1498 } 1499 schedule() 1500 } 1501 1502 // mstartm0 implements part of mstart1 that only runs on the m0. 1503 // 1504 // Write barriers are allowed here because we know the GC can't be 1505 // running yet, so they'll be no-ops. 1506 // 1507 //go:yeswritebarrierrec 1508 func mstartm0() { 1509 // Create an extra M for callbacks on threads not created by Go. 1510 // An extra M is also needed on Windows for callbacks created by 1511 // syscall.NewCallback. See issue #6751 for details. 1512 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 1513 cgoHasExtraM = true 1514 newextram() 1515 } 1516 initsig(false) 1517 } 1518 1519 // mPark causes a thread to park itself, returning once woken. 1520 // 1521 //go:nosplit 1522 func mPark() { 1523 gp := getg() 1524 notesleep(&gp.m.park) 1525 noteclear(&gp.m.park) 1526 } 1527 1528 // mexit tears down and exits the current thread. 1529 // 1530 // Don't call this directly to exit the thread, since it must run at 1531 // the top of the thread stack. Instead, use gogo(&gp.m.g0.sched) to 1532 // unwind the stack to the point that exits the thread. 1533 // 1534 // It is entered with m.p != nil, so write barriers are allowed. It 1535 // will release the P before exiting. 1536 // 1537 //go:yeswritebarrierrec 1538 func mexit(osStack bool) { 1539 mp := getg().m 1540 1541 if mp == &m0 { 1542 // This is the main thread. Just wedge it. 1543 // 1544 // On Linux, exiting the main thread puts the process 1545 // into a non-waitable zombie state. On Plan 9, 1546 // exiting the main thread unblocks wait even though 1547 // other threads are still running. On Solaris we can 1548 // neither exitThread nor return from mstart. Other 1549 // bad things probably happen on other platforms. 1550 // 1551 // We could try to clean up this M more before wedging 1552 // it, but that complicates signal handling. 1553 handoffp(releasep()) 1554 lock(&sched.lock) 1555 sched.nmfreed++ 1556 checkdead() 1557 unlock(&sched.lock) 1558 mPark() 1559 throw("locked m0 woke up") 1560 } 1561 1562 sigblock(true) 1563 unminit() 1564 1565 // Free the gsignal stack. 1566 if mp.gsignal != nil { 1567 stackfree(mp.gsignal.stack) 1568 // On some platforms, when calling into VDSO (e.g. nanotime) 1569 // we store our g on the gsignal stack, if there is one. 1570 // Now the stack is freed, unlink it from the m, so we 1571 // won't write to it when calling VDSO code. 1572 mp.gsignal = nil 1573 } 1574 1575 // Remove m from allm. 1576 lock(&sched.lock) 1577 for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink { 1578 if *pprev == mp { 1579 *pprev = mp.alllink 1580 goto found 1581 } 1582 } 1583 throw("m not found in allm") 1584 found: 1585 // Delay reaping m until it's done with the stack. 1586 // 1587 // Put mp on the free list, though it will not be reaped while freeWait 1588 // is freeMWait. mp is no longer reachable via allm, so even if it is 1589 // on an OS stack, we must keep a reference to mp alive so that the GC 1590 // doesn't free mp while we are still using it. 1591 // 1592 // Note that the free list must not be linked through alllink because 1593 // some functions walk allm without locking, so may be using alllink. 1594 mp.freeWait.Store(freeMWait) 1595 mp.freelink = sched.freem 1596 sched.freem = mp 1597 unlock(&sched.lock) 1598 1599 atomic.Xadd64(&ncgocall, int64(mp.ncgocall)) 1600 1601 // Release the P. 1602 handoffp(releasep()) 1603 // After this point we must not have write barriers. 1604 1605 // Invoke the deadlock detector. This must happen after 1606 // handoffp because it may have started a new M to take our 1607 // P's work. 1608 lock(&sched.lock) 1609 sched.nmfreed++ 1610 checkdead() 1611 unlock(&sched.lock) 1612 1613 if GOOS == "darwin" || GOOS == "ios" { 1614 // Make sure pendingPreemptSignals is correct when an M exits. 1615 // For #41702. 1616 if mp.signalPending.Load() != 0 { 1617 pendingPreemptSignals.Add(-1) 1618 } 1619 } 1620 1621 // Destroy all allocated resources. After this is called, we may no 1622 // longer take any locks. 1623 mdestroy(mp) 1624 1625 if osStack { 1626 // No more uses of mp, so it is safe to drop the reference. 1627 mp.freeWait.Store(freeMRef) 1628 1629 // Return from mstart and let the system thread 1630 // library free the g0 stack and terminate the thread. 1631 return 1632 } 1633 1634 // mstart is the thread's entry point, so there's nothing to 1635 // return to. Exit the thread directly. exitThread will clear 1636 // m.freeWait when it's done with the stack and the m can be 1637 // reaped. 1638 exitThread(&mp.freeWait) 1639 } 1640 1641 // forEachP calls fn(p) for every P p when p reaches a GC safe point. 1642 // If a P is currently executing code, this will bring the P to a GC 1643 // safe point and execute fn on that P. If the P is not executing code 1644 // (it is idle or in a syscall), this will call fn(p) directly while 1645 // preventing the P from exiting its state. This does not ensure that 1646 // fn will run on every CPU executing Go code, but it acts as a global 1647 // memory barrier. GC uses this as a "ragged barrier." 1648 // 1649 // The caller must hold worldsema. 1650 // 1651 //go:systemstack 1652 func forEachP(fn func(*p)) { 1653 mp := acquirem() 1654 pp := getg().m.p.ptr() 1655 1656 lock(&sched.lock) 1657 if sched.safePointWait != 0 { 1658 throw("forEachP: sched.safePointWait != 0") 1659 } 1660 sched.safePointWait = gomaxprocs - 1 1661 sched.safePointFn = fn 1662 1663 // Ask all Ps to run the safe point function. 1664 for _, p2 := range allp { 1665 if p2 != pp { 1666 atomic.Store(&p2.runSafePointFn, 1) 1667 } 1668 } 1669 preemptall() 1670 1671 // Any P entering _Pidle or _Psyscall from now on will observe 1672 // p.runSafePointFn == 1 and will call runSafePointFn when 1673 // changing its status to _Pidle/_Psyscall. 1674 1675 // Run safe point function for all idle Ps. sched.pidle will 1676 // not change because we hold sched.lock. 1677 for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() { 1678 if atomic.Cas(&p.runSafePointFn, 1, 0) { 1679 fn(p) 1680 sched.safePointWait-- 1681 } 1682 } 1683 1684 wait := sched.safePointWait > 0 1685 unlock(&sched.lock) 1686 1687 // Run fn for the current P. 1688 fn(pp) 1689 1690 // Force Ps currently in _Psyscall into _Pidle and hand them 1691 // off to induce safe point function execution. 1692 for _, p2 := range allp { 1693 s := p2.status 1694 if s == _Psyscall && p2.runSafePointFn == 1 && atomic.Cas(&p2.status, s, _Pidle) { 1695 if trace.enabled { 1696 traceGoSysBlock(p2) 1697 traceProcStop(p2) 1698 } 1699 p2.syscalltick++ 1700 handoffp(p2) 1701 } 1702 } 1703 1704 // Wait for remaining Ps to run fn. 1705 if wait { 1706 for { 1707 // Wait for 100us, then try to re-preempt in 1708 // case of any races. 1709 // 1710 // Requires system stack. 1711 if notetsleep(&sched.safePointNote, 100*1000) { 1712 noteclear(&sched.safePointNote) 1713 break 1714 } 1715 preemptall() 1716 } 1717 } 1718 if sched.safePointWait != 0 { 1719 throw("forEachP: not done") 1720 } 1721 for _, p2 := range allp { 1722 if p2.runSafePointFn != 0 { 1723 throw("forEachP: P did not run fn") 1724 } 1725 } 1726 1727 lock(&sched.lock) 1728 sched.safePointFn = nil 1729 unlock(&sched.lock) 1730 releasem(mp) 1731 } 1732 1733 // runSafePointFn runs the safe point function, if any, for this P. 1734 // This should be called like 1735 // 1736 // if getg().m.p.runSafePointFn != 0 { 1737 // runSafePointFn() 1738 // } 1739 // 1740 // runSafePointFn must be checked on any transition in to _Pidle or 1741 // _Psyscall to avoid a race where forEachP sees that the P is running 1742 // just before the P goes into _Pidle/_Psyscall and neither forEachP 1743 // nor the P run the safe-point function. 1744 func runSafePointFn() { 1745 p := getg().m.p.ptr() 1746 // Resolve the race between forEachP running the safe-point 1747 // function on this P's behalf and this P running the 1748 // safe-point function directly. 1749 if !atomic.Cas(&p.runSafePointFn, 1, 0) { 1750 return 1751 } 1752 sched.safePointFn(p) 1753 lock(&sched.lock) 1754 sched.safePointWait-- 1755 if sched.safePointWait == 0 { 1756 notewakeup(&sched.safePointNote) 1757 } 1758 unlock(&sched.lock) 1759 } 1760 1761 // When running with cgo, we call _cgo_thread_start 1762 // to start threads for us so that we can play nicely with 1763 // foreign code. 1764 var cgoThreadStart unsafe.Pointer 1765 1766 type cgothreadstart struct { 1767 g guintptr 1768 tls *uint64 1769 fn unsafe.Pointer 1770 } 1771 1772 // Allocate a new m unassociated with any thread. 1773 // Can use p for allocation context if needed. 1774 // fn is recorded as the new m's m.mstartfn. 1775 // id is optional pre-allocated m ID. Omit by passing -1. 1776 // 1777 // This function is allowed to have write barriers even if the caller 1778 // isn't because it borrows pp. 1779 // 1780 //go:yeswritebarrierrec 1781 func allocm(pp *p, fn func(), id int64) *m { 1782 allocmLock.rlock() 1783 1784 // The caller owns pp, but we may borrow (i.e., acquirep) it. We must 1785 // disable preemption to ensure it is not stolen, which would make the 1786 // caller lose ownership. 1787 acquirem() 1788 1789 gp := getg() 1790 if gp.m.p == 0 { 1791 acquirep(pp) // temporarily borrow p for mallocs in this function 1792 } 1793 1794 // Release the free M list. We need to do this somewhere and 1795 // this may free up a stack we can use. 1796 if sched.freem != nil { 1797 lock(&sched.lock) 1798 var newList *m 1799 for freem := sched.freem; freem != nil; { 1800 wait := freem.freeWait.Load() 1801 if wait == freeMWait { 1802 next := freem.freelink 1803 freem.freelink = newList 1804 newList = freem 1805 freem = next 1806 continue 1807 } 1808 // Free the stack if needed. For freeMRef, there is 1809 // nothing to do except drop freem from the sched.freem 1810 // list. 1811 if wait == freeMStack { 1812 // stackfree must be on the system stack, but allocm is 1813 // reachable off the system stack transitively from 1814 // startm. 1815 systemstack(func() { 1816 stackfree(freem.g0.stack) 1817 }) 1818 } 1819 freem = freem.freelink 1820 } 1821 sched.freem = newList 1822 unlock(&sched.lock) 1823 } 1824 1825 mp := new(m) 1826 mp.mstartfn = fn 1827 mcommoninit(mp, id) 1828 1829 // In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack. 1830 // Windows and Plan 9 will layout sched stack on OS stack. 1831 if iscgo || mStackIsSystemAllocated() { 1832 mp.g0 = malg(-1) 1833 } else { 1834 mp.g0 = malg(8192 * sys.StackGuardMultiplier) 1835 } 1836 mp.g0.m = mp 1837 1838 if pp == gp.m.p.ptr() { 1839 releasep() 1840 } 1841 1842 releasem(gp.m) 1843 allocmLock.runlock() 1844 return mp 1845 } 1846 1847 // needm is called when a cgo callback happens on a 1848 // thread without an m (a thread not created by Go). 1849 // In this case, needm is expected to find an m to use 1850 // and return with m, g initialized correctly. 1851 // Since m and g are not set now (likely nil, but see below) 1852 // needm is limited in what routines it can call. In particular 1853 // it can only call nosplit functions (textflag 7) and cannot 1854 // do any scheduling that requires an m. 1855 // 1856 // In order to avoid needing heavy lifting here, we adopt 1857 // the following strategy: there is a stack of available m's 1858 // that can be stolen. Using compare-and-swap 1859 // to pop from the stack has ABA races, so we simulate 1860 // a lock by doing an exchange (via Casuintptr) to steal the stack 1861 // head and replace the top pointer with MLOCKED (1). 1862 // This serves as a simple spin lock that we can use even 1863 // without an m. The thread that locks the stack in this way 1864 // unlocks the stack by storing a valid stack head pointer. 1865 // 1866 // In order to make sure that there is always an m structure 1867 // available to be stolen, we maintain the invariant that there 1868 // is always one more than needed. At the beginning of the 1869 // program (if cgo is in use) the list is seeded with a single m. 1870 // If needm finds that it has taken the last m off the list, its job 1871 // is - once it has installed its own m so that it can do things like 1872 // allocate memory - to create a spare m and put it on the list. 1873 // 1874 // Each of these extra m's also has a g0 and a curg that are 1875 // pressed into service as the scheduling stack and current 1876 // goroutine for the duration of the cgo callback. 1877 // 1878 // When the callback is done with the m, it calls dropm to 1879 // put the m back on the list. 1880 // 1881 //go:nosplit 1882 func needm() { 1883 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 1884 // Can happen if C/C++ code calls Go from a global ctor. 1885 // Can also happen on Windows if a global ctor uses a 1886 // callback created by syscall.NewCallback. See issue #6751 1887 // for details. 1888 // 1889 // Can not throw, because scheduler is not initialized yet. 1890 write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback))) 1891 exit(1) 1892 } 1893 1894 // Save and block signals before getting an M. 1895 // The signal handler may call needm itself, 1896 // and we must avoid a deadlock. Also, once g is installed, 1897 // any incoming signals will try to execute, 1898 // but we won't have the sigaltstack settings and other data 1899 // set up appropriately until the end of minit, which will 1900 // unblock the signals. This is the same dance as when 1901 // starting a new m to run Go code via newosproc. 1902 var sigmask sigset 1903 sigsave(&sigmask) 1904 sigblock(false) 1905 1906 // Lock extra list, take head, unlock popped list. 1907 // nilokay=false is safe here because of the invariant above, 1908 // that the extra list always contains or will soon contain 1909 // at least one m. 1910 mp := lockextra(false) 1911 1912 // Set needextram when we've just emptied the list, 1913 // so that the eventual call into cgocallbackg will 1914 // allocate a new m for the extra list. We delay the 1915 // allocation until then so that it can be done 1916 // after exitsyscall makes sure it is okay to be 1917 // running at all (that is, there's no garbage collection 1918 // running right now). 1919 mp.needextram = mp.schedlink == 0 1920 extraMCount-- 1921 unlockextra(mp.schedlink.ptr()) 1922 1923 // Store the original signal mask for use by minit. 1924 mp.sigmask = sigmask 1925 1926 // Install TLS on some platforms (previously setg 1927 // would do this if necessary). 1928 osSetupTLS(mp) 1929 1930 // Install g (= m->g0) and set the stack bounds 1931 // to match the current stack. We don't actually know 1932 // how big the stack is, like we don't know how big any 1933 // scheduling stack is, but we assume there's at least 32 kB, 1934 // which is more than enough for us. 1935 setg(mp.g0) 1936 gp := getg() 1937 gp.stack.hi = getcallersp() + 1024 1938 gp.stack.lo = getcallersp() - 32*1024 1939 gp.stackguard0 = gp.stack.lo + _StackGuard 1940 1941 // Initialize this thread to use the m. 1942 asminit() 1943 minit() 1944 1945 // mp.curg is now a real goroutine. 1946 casgstatus(mp.curg, _Gdead, _Gsyscall) 1947 sched.ngsys.Add(-1) 1948 } 1949 1950 var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n") 1951 1952 // newextram allocates m's and puts them on the extra list. 1953 // It is called with a working local m, so that it can do things 1954 // like call schedlock and allocate. 1955 func newextram() { 1956 c := extraMWaiters.Swap(0) 1957 if c > 0 { 1958 for i := uint32(0); i < c; i++ { 1959 oneNewExtraM() 1960 } 1961 } else { 1962 // Make sure there is at least one extra M. 1963 mp := lockextra(true) 1964 unlockextra(mp) 1965 if mp == nil { 1966 oneNewExtraM() 1967 } 1968 } 1969 } 1970 1971 // oneNewExtraM allocates an m and puts it on the extra list. 1972 func oneNewExtraM() { 1973 // Create extra goroutine locked to extra m. 1974 // The goroutine is the context in which the cgo callback will run. 1975 // The sched.pc will never be returned to, but setting it to 1976 // goexit makes clear to the traceback routines where 1977 // the goroutine stack ends. 1978 mp := allocm(nil, nil, -1) 1979 gp := malg(4096) 1980 gp.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum 1981 gp.sched.sp = gp.stack.hi 1982 gp.sched.sp -= 4 * goarch.PtrSize // extra space in case of reads slightly beyond frame 1983 gp.sched.lr = 0 1984 gp.sched.g = guintptr(unsafe.Pointer(gp)) 1985 gp.syscallpc = gp.sched.pc 1986 gp.syscallsp = gp.sched.sp 1987 gp.stktopsp = gp.sched.sp 1988 // malg returns status as _Gidle. Change to _Gdead before 1989 // adding to allg where GC can see it. We use _Gdead to hide 1990 // this from tracebacks and stack scans since it isn't a 1991 // "real" goroutine until needm grabs it. 1992 casgstatus(gp, _Gidle, _Gdead) 1993 gp.m = mp 1994 mp.curg = gp 1995 mp.isextra = true 1996 mp.lockedInt++ 1997 mp.lockedg.set(gp) 1998 gp.lockedm.set(mp) 1999 gp.goid = sched.goidgen.Add(1) 2000 gp.sysblocktraced = true 2001 if raceenabled { 2002 gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum) 2003 } 2004 if trace.enabled { 2005 // Trigger two trace events for the locked g in the extra m, 2006 // since the next event of the g will be traceEvGoSysExit in exitsyscall, 2007 // while calling from C thread to Go. 2008 traceGoCreate(gp, 0) // no start pc 2009 gp.traceseq++ 2010 traceEvent(traceEvGoInSyscall, -1, gp.goid) 2011 } 2012 // put on allg for garbage collector 2013 allgadd(gp) 2014 2015 // gp is now on the allg list, but we don't want it to be 2016 // counted by gcount. It would be more "proper" to increment 2017 // sched.ngfree, but that requires locking. Incrementing ngsys 2018 // has the same effect. 2019 sched.ngsys.Add(1) 2020 2021 // Add m to the extra list. 2022 mnext := lockextra(true) 2023 mp.schedlink.set(mnext) 2024 extraMCount++ 2025 unlockextra(mp) 2026 } 2027 2028 // dropm is called when a cgo callback has called needm but is now 2029 // done with the callback and returning back into the non-Go thread. 2030 // It puts the current m back onto the extra list. 2031 // 2032 // The main expense here is the call to signalstack to release the 2033 // m's signal stack, and then the call to needm on the next callback 2034 // from this thread. It is tempting to try to save the m for next time, 2035 // which would eliminate both these costs, but there might not be 2036 // a next time: the current thread (which Go does not control) might exit. 2037 // If we saved the m for that thread, there would be an m leak each time 2038 // such a thread exited. Instead, we acquire and release an m on each 2039 // call. These should typically not be scheduling operations, just a few 2040 // atomics, so the cost should be small. 2041 // 2042 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread 2043 // variable using pthread_key_create. Unlike the pthread keys we already use 2044 // on OS X, this dummy key would never be read by Go code. It would exist 2045 // only so that we could register at thread-exit-time destructor. 2046 // That destructor would put the m back onto the extra list. 2047 // This is purely a performance optimization. The current version, 2048 // in which dropm happens on each cgo call, is still correct too. 2049 // We may have to keep the current version on systems with cgo 2050 // but without pthreads, like Windows. 2051 func dropm() { 2052 // Clear m and g, and return m to the extra list. 2053 // After the call to setg we can only call nosplit functions 2054 // with no pointer manipulation. 2055 mp := getg().m 2056 2057 // Return mp.curg to dead state. 2058 casgstatus(mp.curg, _Gsyscall, _Gdead) 2059 mp.curg.preemptStop = false 2060 sched.ngsys.Add(1) 2061 2062 // Block signals before unminit. 2063 // Unminit unregisters the signal handling stack (but needs g on some systems). 2064 // Setg(nil) clears g, which is the signal handler's cue not to run Go handlers. 2065 // It's important not to try to handle a signal between those two steps. 2066 sigmask := mp.sigmask 2067 sigblock(false) 2068 unminit() 2069 2070 mnext := lockextra(true) 2071 extraMCount++ 2072 mp.schedlink.set(mnext) 2073 2074 setg(nil) 2075 2076 // Commit the release of mp. 2077 unlockextra(mp) 2078 2079 msigrestore(sigmask) 2080 } 2081 2082 // A helper function for EnsureDropM. 2083 func getm() uintptr { 2084 return uintptr(unsafe.Pointer(getg().m)) 2085 } 2086 2087 var extram atomic.Uintptr 2088 var extraMCount uint32 // Protected by lockextra 2089 var extraMWaiters atomic.Uint32 2090 2091 // lockextra locks the extra list and returns the list head. 2092 // The caller must unlock the list by storing a new list head 2093 // to extram. If nilokay is true, then lockextra will 2094 // return a nil list head if that's what it finds. If nilokay is false, 2095 // lockextra will keep waiting until the list head is no longer nil. 2096 // 2097 //go:nosplit 2098 func lockextra(nilokay bool) *m { 2099 const locked = 1 2100 2101 incr := false 2102 for { 2103 old := extram.Load() 2104 if old == locked { 2105 osyield_no_g() 2106 continue 2107 } 2108 if old == 0 && !nilokay { 2109 if !incr { 2110 // Add 1 to the number of threads 2111 // waiting for an M. 2112 // This is cleared by newextram. 2113 extraMWaiters.Add(1) 2114 incr = true 2115 } 2116 usleep_no_g(1) 2117 continue 2118 } 2119 if extram.CompareAndSwap(old, locked) { 2120 return (*m)(unsafe.Pointer(old)) 2121 } 2122 osyield_no_g() 2123 continue 2124 } 2125 } 2126 2127 //go:nosplit 2128 func unlockextra(mp *m) { 2129 extram.Store(uintptr(unsafe.Pointer(mp))) 2130 } 2131 2132 var ( 2133 // allocmLock is locked for read when creating new Ms in allocm and their 2134 // addition to allm. Thus acquiring this lock for write blocks the 2135 // creation of new Ms. 2136 allocmLock rwmutex 2137 2138 // execLock serializes exec and clone to avoid bugs or unspecified 2139 // behaviour around exec'ing while creating/destroying threads. See 2140 // issue #19546. 2141 execLock rwmutex 2142 ) 2143 2144 // newmHandoff contains a list of m structures that need new OS threads. 2145 // This is used by newm in situations where newm itself can't safely 2146 // start an OS thread. 2147 var newmHandoff struct { 2148 lock mutex 2149 2150 // newm points to a list of M structures that need new OS 2151 // threads. The list is linked through m.schedlink. 2152 newm muintptr 2153 2154 // waiting indicates that wake needs to be notified when an m 2155 // is put on the list. 2156 waiting bool 2157 wake note 2158 2159 // haveTemplateThread indicates that the templateThread has 2160 // been started. This is not protected by lock. Use cas to set 2161 // to 1. 2162 haveTemplateThread uint32 2163 } 2164 2165 // Create a new m. It will start off with a call to fn, or else the scheduler. 2166 // fn needs to be static and not a heap allocated closure. 2167 // May run with m.p==nil, so write barriers are not allowed. 2168 // 2169 // id is optional pre-allocated m ID. Omit by passing -1. 2170 // 2171 //go:nowritebarrierrec 2172 func newm(fn func(), pp *p, id int64) { 2173 // allocm adds a new M to allm, but they do not start until created by 2174 // the OS in newm1 or the template thread. 2175 // 2176 // doAllThreadsSyscall requires that every M in allm will eventually 2177 // start and be signal-able, even with a STW. 2178 // 2179 // Disable preemption here until we start the thread to ensure that 2180 // newm is not preempted between allocm and starting the new thread, 2181 // ensuring that anything added to allm is guaranteed to eventually 2182 // start. 2183 acquirem() 2184 2185 mp := allocm(pp, fn, id) 2186 mp.nextp.set(pp) 2187 mp.sigmask = initSigmask 2188 if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" { 2189 // We're on a locked M or a thread that may have been 2190 // started by C. The kernel state of this thread may 2191 // be strange (the user may have locked it for that 2192 // purpose). We don't want to clone that into another 2193 // thread. Instead, ask a known-good thread to create 2194 // the thread for us. 2195 // 2196 // This is disabled on Plan 9. See golang.org/issue/22227. 2197 // 2198 // TODO: This may be unnecessary on Windows, which 2199 // doesn't model thread creation off fork. 2200 lock(&newmHandoff.lock) 2201 if newmHandoff.haveTemplateThread == 0 { 2202 throw("on a locked thread with no template thread") 2203 } 2204 mp.schedlink = newmHandoff.newm 2205 newmHandoff.newm.set(mp) 2206 if newmHandoff.waiting { 2207 newmHandoff.waiting = false 2208 notewakeup(&newmHandoff.wake) 2209 } 2210 unlock(&newmHandoff.lock) 2211 // The M has not started yet, but the template thread does not 2212 // participate in STW, so it will always process queued Ms and 2213 // it is safe to releasem. 2214 releasem(getg().m) 2215 return 2216 } 2217 newm1(mp) 2218 releasem(getg().m) 2219 } 2220 2221 func newm1(mp *m) { 2222 if iscgo { 2223 var ts cgothreadstart 2224 if _cgo_thread_start == nil { 2225 throw("_cgo_thread_start missing") 2226 } 2227 ts.g.set(mp.g0) 2228 ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0])) 2229 ts.fn = unsafe.Pointer(abi.FuncPCABI0(mstart)) 2230 if msanenabled { 2231 msanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2232 } 2233 if asanenabled { 2234 asanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2235 } 2236 execLock.rlock() // Prevent process clone. 2237 asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts)) 2238 execLock.runlock() 2239 return 2240 } 2241 execLock.rlock() // Prevent process clone. 2242 newosproc(mp) 2243 execLock.runlock() 2244 } 2245 2246 // startTemplateThread starts the template thread if it is not already 2247 // running. 2248 // 2249 // The calling thread must itself be in a known-good state. 2250 func startTemplateThread() { 2251 if GOARCH == "wasm" { // no threads on wasm yet 2252 return 2253 } 2254 2255 // Disable preemption to guarantee that the template thread will be 2256 // created before a park once haveTemplateThread is set. 2257 mp := acquirem() 2258 if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) { 2259 releasem(mp) 2260 return 2261 } 2262 newm(templateThread, nil, -1) 2263 releasem(mp) 2264 } 2265 2266 // templateThread is a thread in a known-good state that exists solely 2267 // to start new threads in known-good states when the calling thread 2268 // may not be in a good state. 2269 // 2270 // Many programs never need this, so templateThread is started lazily 2271 // when we first enter a state that might lead to running on a thread 2272 // in an unknown state. 2273 // 2274 // templateThread runs on an M without a P, so it must not have write 2275 // barriers. 2276 // 2277 //go:nowritebarrierrec 2278 func templateThread() { 2279 lock(&sched.lock) 2280 sched.nmsys++ 2281 checkdead() 2282 unlock(&sched.lock) 2283 2284 for { 2285 lock(&newmHandoff.lock) 2286 for newmHandoff.newm != 0 { 2287 newm := newmHandoff.newm.ptr() 2288 newmHandoff.newm = 0 2289 unlock(&newmHandoff.lock) 2290 for newm != nil { 2291 next := newm.schedlink.ptr() 2292 newm.schedlink = 0 2293 newm1(newm) 2294 newm = next 2295 } 2296 lock(&newmHandoff.lock) 2297 } 2298 newmHandoff.waiting = true 2299 noteclear(&newmHandoff.wake) 2300 unlock(&newmHandoff.lock) 2301 notesleep(&newmHandoff.wake) 2302 } 2303 } 2304 2305 // Stops execution of the current m until new work is available. 2306 // Returns with acquired P. 2307 func stopm() { 2308 gp := getg() 2309 2310 if gp.m.locks != 0 { 2311 throw("stopm holding locks") 2312 } 2313 if gp.m.p != 0 { 2314 throw("stopm holding p") 2315 } 2316 if gp.m.spinning { 2317 throw("stopm spinning") 2318 } 2319 2320 lock(&sched.lock) 2321 mput(gp.m) 2322 unlock(&sched.lock) 2323 mPark() 2324 acquirep(gp.m.nextp.ptr()) 2325 gp.m.nextp = 0 2326 } 2327 2328 func mspinning() { 2329 // startm's caller incremented nmspinning. Set the new M's spinning. 2330 getg().m.spinning = true 2331 } 2332 2333 // Schedules some M to run the p (creates an M if necessary). 2334 // If p==nil, tries to get an idle P, if no idle P's does nothing. 2335 // May run with m.p==nil, so write barriers are not allowed. 2336 // If spinning is set, the caller has incremented nmspinning and must provide a 2337 // P. startm will set m.spinning in the newly started M. 2338 // 2339 // Callers passing a non-nil P must call from a non-preemptible context. See 2340 // comment on acquirem below. 2341 // 2342 // Must not have write barriers because this may be called without a P. 2343 // 2344 //go:nowritebarrierrec 2345 func startm(pp *p, spinning bool) { 2346 // Disable preemption. 2347 // 2348 // Every owned P must have an owner that will eventually stop it in the 2349 // event of a GC stop request. startm takes transient ownership of a P 2350 // (either from argument or pidleget below) and transfers ownership to 2351 // a started M, which will be responsible for performing the stop. 2352 // 2353 // Preemption must be disabled during this transient ownership, 2354 // otherwise the P this is running on may enter GC stop while still 2355 // holding the transient P, leaving that P in limbo and deadlocking the 2356 // STW. 2357 // 2358 // Callers passing a non-nil P must already be in non-preemptible 2359 // context, otherwise such preemption could occur on function entry to 2360 // startm. Callers passing a nil P may be preemptible, so we must 2361 // disable preemption before acquiring a P from pidleget below. 2362 mp := acquirem() 2363 lock(&sched.lock) 2364 if pp == nil { 2365 if spinning { 2366 // TODO(prattmic): All remaining calls to this function 2367 // with _p_ == nil could be cleaned up to find a P 2368 // before calling startm. 2369 throw("startm: P required for spinning=true") 2370 } 2371 pp, _ = pidleget(0) 2372 if pp == nil { 2373 unlock(&sched.lock) 2374 releasem(mp) 2375 return 2376 } 2377 } 2378 nmp := mget() 2379 if nmp == nil { 2380 // No M is available, we must drop sched.lock and call newm. 2381 // However, we already own a P to assign to the M. 2382 // 2383 // Once sched.lock is released, another G (e.g., in a syscall), 2384 // could find no idle P while checkdead finds a runnable G but 2385 // no running M's because this new M hasn't started yet, thus 2386 // throwing in an apparent deadlock. 2387 // 2388 // Avoid this situation by pre-allocating the ID for the new M, 2389 // thus marking it as 'running' before we drop sched.lock. This 2390 // new M will eventually run the scheduler to execute any 2391 // queued G's. 2392 id := mReserveID() 2393 unlock(&sched.lock) 2394 2395 var fn func() 2396 if spinning { 2397 // The caller incremented nmspinning, so set m.spinning in the new M. 2398 fn = mspinning 2399 } 2400 newm(fn, pp, id) 2401 // Ownership transfer of pp committed by start in newm. 2402 // Preemption is now safe. 2403 releasem(mp) 2404 return 2405 } 2406 unlock(&sched.lock) 2407 if nmp.spinning { 2408 throw("startm: m is spinning") 2409 } 2410 if nmp.nextp != 0 { 2411 throw("startm: m has p") 2412 } 2413 if spinning && !runqempty(pp) { 2414 throw("startm: p has runnable gs") 2415 } 2416 // The caller incremented nmspinning, so set m.spinning in the new M. 2417 nmp.spinning = spinning 2418 nmp.nextp.set(pp) 2419 notewakeup(&nmp.park) 2420 // Ownership transfer of pp committed by wakeup. Preemption is now 2421 // safe. 2422 releasem(mp) 2423 } 2424 2425 // Hands off P from syscall or locked M. 2426 // Always runs without a P, so write barriers are not allowed. 2427 // 2428 //go:nowritebarrierrec 2429 func handoffp(pp *p) { 2430 // handoffp must start an M in any situation where 2431 // findrunnable would return a G to run on pp. 2432 2433 // if it has local work, start it straight away 2434 if !runqempty(pp) || sched.runqsize != 0 { 2435 startm(pp, false) 2436 return 2437 } 2438 // if there's trace work to do, start it straight away 2439 if (trace.enabled || trace.shutdown) && traceReaderAvailable() != nil { 2440 startm(pp, false) 2441 return 2442 } 2443 // if it has GC work, start it straight away 2444 if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) { 2445 startm(pp, false) 2446 return 2447 } 2448 // no local work, check that there are no spinning/idle M's, 2449 // otherwise our help is not required 2450 if sched.nmspinning.Load()+sched.npidle.Load() == 0 && sched.nmspinning.CompareAndSwap(0, 1) { // TODO: fast atomic 2451 sched.needspinning.Store(0) 2452 startm(pp, true) 2453 return 2454 } 2455 lock(&sched.lock) 2456 if sched.gcwaiting.Load() { 2457 pp.status = _Pgcstop 2458 sched.stopwait-- 2459 if sched.stopwait == 0 { 2460 notewakeup(&sched.stopnote) 2461 } 2462 unlock(&sched.lock) 2463 return 2464 } 2465 if pp.runSafePointFn != 0 && atomic.Cas(&pp.runSafePointFn, 1, 0) { 2466 sched.safePointFn(pp) 2467 sched.safePointWait-- 2468 if sched.safePointWait == 0 { 2469 notewakeup(&sched.safePointNote) 2470 } 2471 } 2472 if sched.runqsize != 0 { 2473 unlock(&sched.lock) 2474 startm(pp, false) 2475 return 2476 } 2477 // If this is the last running P and nobody is polling network, 2478 // need to wakeup another M to poll network. 2479 if sched.npidle.Load() == gomaxprocs-1 && sched.lastpoll.Load() != 0 { 2480 unlock(&sched.lock) 2481 startm(pp, false) 2482 return 2483 } 2484 2485 // The scheduler lock cannot be held when calling wakeNetPoller below 2486 // because wakeNetPoller may call wakep which may call startm. 2487 when := nobarrierWakeTime(pp) 2488 pidleput(pp, 0) 2489 unlock(&sched.lock) 2490 2491 if when != 0 { 2492 wakeNetPoller(when) 2493 } 2494 } 2495 2496 // Tries to add one more P to execute G's. 2497 // Called when a G is made runnable (newproc, ready). 2498 // Must be called with a P. 2499 func wakep() { 2500 // Be conservative about spinning threads, only start one if none exist 2501 // already. 2502 if sched.nmspinning.Load() != 0 || !sched.nmspinning.CompareAndSwap(0, 1) { 2503 return 2504 } 2505 2506 // Disable preemption until ownership of pp transfers to the next M in 2507 // startm. Otherwise preemption here would leave pp stuck waiting to 2508 // enter _Pgcstop. 2509 // 2510 // See preemption comment on acquirem in startm for more details. 2511 mp := acquirem() 2512 2513 var pp *p 2514 lock(&sched.lock) 2515 pp, _ = pidlegetSpinning(0) 2516 if pp == nil { 2517 if sched.nmspinning.Add(-1) < 0 { 2518 throw("wakep: negative nmspinning") 2519 } 2520 unlock(&sched.lock) 2521 releasem(mp) 2522 return 2523 } 2524 // Since we always have a P, the race in the "No M is available" 2525 // comment in startm doesn't apply during the small window between the 2526 // unlock here and lock in startm. A checkdead in between will always 2527 // see at least one running M (ours). 2528 unlock(&sched.lock) 2529 2530 startm(pp, true) 2531 2532 releasem(mp) 2533 } 2534 2535 // Stops execution of the current m that is locked to a g until the g is runnable again. 2536 // Returns with acquired P. 2537 func stoplockedm() { 2538 gp := getg() 2539 2540 if gp.m.lockedg == 0 || gp.m.lockedg.ptr().lockedm.ptr() != gp.m { 2541 throw("stoplockedm: inconsistent locking") 2542 } 2543 if gp.m.p != 0 { 2544 // Schedule another M to run this p. 2545 pp := releasep() 2546 handoffp(pp) 2547 } 2548 incidlelocked(1) 2549 // Wait until another thread schedules lockedg again. 2550 mPark() 2551 status := readgstatus(gp.m.lockedg.ptr()) 2552 if status&^_Gscan != _Grunnable { 2553 print("runtime:stoplockedm: lockedg (atomicstatus=", status, ") is not Grunnable or Gscanrunnable\n") 2554 dumpgstatus(gp.m.lockedg.ptr()) 2555 throw("stoplockedm: not runnable") 2556 } 2557 acquirep(gp.m.nextp.ptr()) 2558 gp.m.nextp = 0 2559 } 2560 2561 // Schedules the locked m to run the locked gp. 2562 // May run during STW, so write barriers are not allowed. 2563 // 2564 //go:nowritebarrierrec 2565 func startlockedm(gp *g) { 2566 mp := gp.lockedm.ptr() 2567 if mp == getg().m { 2568 throw("startlockedm: locked to me") 2569 } 2570 if mp.nextp != 0 { 2571 throw("startlockedm: m has p") 2572 } 2573 // directly handoff current P to the locked m 2574 incidlelocked(-1) 2575 pp := releasep() 2576 mp.nextp.set(pp) 2577 notewakeup(&mp.park) 2578 stopm() 2579 } 2580 2581 // Stops the current m for stopTheWorld. 2582 // Returns when the world is restarted. 2583 func gcstopm() { 2584 gp := getg() 2585 2586 if !sched.gcwaiting.Load() { 2587 throw("gcstopm: not waiting for gc") 2588 } 2589 if gp.m.spinning { 2590 gp.m.spinning = false 2591 // OK to just drop nmspinning here, 2592 // startTheWorld will unpark threads as necessary. 2593 if sched.nmspinning.Add(-1) < 0 { 2594 throw("gcstopm: negative nmspinning") 2595 } 2596 } 2597 pp := releasep() 2598 lock(&sched.lock) 2599 pp.status = _Pgcstop 2600 sched.stopwait-- 2601 if sched.stopwait == 0 { 2602 notewakeup(&sched.stopnote) 2603 } 2604 unlock(&sched.lock) 2605 stopm() 2606 } 2607 2608 // Schedules gp to run on the current M. 2609 // If inheritTime is true, gp inherits the remaining time in the 2610 // current time slice. Otherwise, it starts a new time slice. 2611 // Never returns. 2612 // 2613 // Write barriers are allowed because this is called immediately after 2614 // acquiring a P in several places. 2615 // 2616 //go:yeswritebarrierrec 2617 func execute(gp *g, inheritTime bool) { 2618 mp := getg().m 2619 2620 if goroutineProfile.active { 2621 // Make sure that gp has had its stack written out to the goroutine 2622 // profile, exactly as it was when the goroutine profiler first stopped 2623 // the world. 2624 tryRecordGoroutineProfile(gp, osyield) 2625 } 2626 2627 // Assign gp.m before entering _Grunning so running Gs have an 2628 // M. 2629 mp.curg = gp 2630 gp.m = mp 2631 casgstatus(gp, _Grunnable, _Grunning) 2632 gp.waitsince = 0 2633 gp.preempt = false 2634 gp.stackguard0 = gp.stack.lo + _StackGuard 2635 if !inheritTime { 2636 mp.p.ptr().schedtick++ 2637 } 2638 2639 // Check whether the profiler needs to be turned on or off. 2640 hz := sched.profilehz 2641 if mp.profilehz != hz { 2642 setThreadCPUProfiler(hz) 2643 } 2644 2645 if trace.enabled { 2646 // GoSysExit has to happen when we have a P, but before GoStart. 2647 // So we emit it here. 2648 if gp.syscallsp != 0 && gp.sysblocktraced { 2649 traceGoSysExit(gp.sysexitticks) 2650 } 2651 traceGoStart() 2652 } 2653 2654 gogo(&gp.sched) 2655 } 2656 2657 // Finds a runnable goroutine to execute. 2658 // Tries to steal from other P's, get g from local or global queue, poll network. 2659 // tryWakeP indicates that the returned goroutine is not normal (GC worker, trace 2660 // reader) so the caller should try to wake a P. 2661 func findRunnable() (gp *g, inheritTime, tryWakeP bool) { 2662 mp := getg().m 2663 2664 // The conditions here and in handoffp must agree: if 2665 // findrunnable would return a G to run, handoffp must start 2666 // an M. 2667 2668 top: 2669 pp := mp.p.ptr() 2670 if sched.gcwaiting.Load() { 2671 gcstopm() 2672 goto top 2673 } 2674 if pp.runSafePointFn != 0 { 2675 runSafePointFn() 2676 } 2677 2678 // now and pollUntil are saved for work stealing later, 2679 // which may steal timers. It's important that between now 2680 // and then, nothing blocks, so these numbers remain mostly 2681 // relevant. 2682 now, pollUntil, _ := checkTimers(pp, 0) 2683 2684 // Try to schedule the trace reader. 2685 if trace.enabled || trace.shutdown { 2686 gp := traceReader() 2687 if gp != nil { 2688 casgstatus(gp, _Gwaiting, _Grunnable) 2689 traceGoUnpark(gp, 0) 2690 return gp, false, true 2691 } 2692 } 2693 2694 // Try to schedule a GC worker. 2695 if gcBlackenEnabled != 0 { 2696 gp, tnow := gcController.findRunnableGCWorker(pp, now) 2697 if gp != nil { 2698 return gp, false, true 2699 } 2700 now = tnow 2701 } 2702 2703 // Check the global runnable queue once in a while to ensure fairness. 2704 // Otherwise two goroutines can completely occupy the local runqueue 2705 // by constantly respawning each other. 2706 if pp.schedtick%61 == 0 && sched.runqsize > 0 { 2707 lock(&sched.lock) 2708 gp := globrunqget(pp, 1) 2709 unlock(&sched.lock) 2710 if gp != nil { 2711 return gp, false, false 2712 } 2713 } 2714 2715 // Wake up the finalizer G. 2716 if fingStatus.Load()&(fingWait|fingWake) == fingWait|fingWake { 2717 if gp := wakefing(); gp != nil { 2718 ready(gp, 0, true) 2719 } 2720 } 2721 if *cgo_yield != nil { 2722 asmcgocall(*cgo_yield, nil) 2723 } 2724 2725 // local runq 2726 if gp, inheritTime := runqget(pp); gp != nil { 2727 return gp, inheritTime, false 2728 } 2729 2730 // global runq 2731 if sched.runqsize != 0 { 2732 lock(&sched.lock) 2733 gp := globrunqget(pp, 0) 2734 unlock(&sched.lock) 2735 if gp != nil { 2736 return gp, false, false 2737 } 2738 } 2739 2740 // Poll network. 2741 // This netpoll is only an optimization before we resort to stealing. 2742 // We can safely skip it if there are no waiters or a thread is blocked 2743 // in netpoll already. If there is any kind of logical race with that 2744 // blocked thread (e.g. it has already returned from netpoll, but does 2745 // not set lastpoll yet), this thread will do blocking netpoll below 2746 // anyway. 2747 if netpollinited() && netpollWaiters.Load() > 0 && sched.lastpoll.Load() != 0 { 2748 if list := netpoll(0); !list.empty() { // non-blocking 2749 gp := list.pop() 2750 injectglist(&list) 2751 casgstatus(gp, _Gwaiting, _Grunnable) 2752 if trace.enabled { 2753 traceGoUnpark(gp, 0) 2754 } 2755 return gp, false, false 2756 } 2757 } 2758 2759 // Spinning Ms: steal work from other Ps. 2760 // 2761 // Limit the number of spinning Ms to half the number of busy Ps. 2762 // This is necessary to prevent excessive CPU consumption when 2763 // GOMAXPROCS>>1 but the program parallelism is low. 2764 if mp.spinning || 2*sched.nmspinning.Load() < gomaxprocs-sched.npidle.Load() { 2765 if !mp.spinning { 2766 mp.becomeSpinning() 2767 } 2768 2769 gp, inheritTime, tnow, w, newWork := stealWork(now) 2770 if gp != nil { 2771 // Successfully stole. 2772 return gp, inheritTime, false 2773 } 2774 if newWork { 2775 // There may be new timer or GC work; restart to 2776 // discover. 2777 goto top 2778 } 2779 2780 now = tnow 2781 if w != 0 && (pollUntil == 0 || w < pollUntil) { 2782 // Earlier timer to wait for. 2783 pollUntil = w 2784 } 2785 } 2786 2787 // We have nothing to do. 2788 // 2789 // If we're in the GC mark phase, can safely scan and blacken objects, 2790 // and have work to do, run idle-time marking rather than give up the P. 2791 if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) && gcController.addIdleMarkWorker() { 2792 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 2793 if node != nil { 2794 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 2795 gp := node.gp.ptr() 2796 casgstatus(gp, _Gwaiting, _Grunnable) 2797 if trace.enabled { 2798 traceGoUnpark(gp, 0) 2799 } 2800 return gp, false, false 2801 } 2802 gcController.removeIdleMarkWorker() 2803 } 2804 2805 // wasm only: 2806 // If a callback returned and no other goroutine is awake, 2807 // then wake event handler goroutine which pauses execution 2808 // until a callback was triggered. 2809 gp, otherReady := beforeIdle(now, pollUntil) 2810 if gp != nil { 2811 casgstatus(gp, _Gwaiting, _Grunnable) 2812 if trace.enabled { 2813 traceGoUnpark(gp, 0) 2814 } 2815 return gp, false, false 2816 } 2817 if otherReady { 2818 goto top 2819 } 2820 2821 // Before we drop our P, make a snapshot of the allp slice, 2822 // which can change underfoot once we no longer block 2823 // safe-points. We don't need to snapshot the contents because 2824 // everything up to cap(allp) is immutable. 2825 allpSnapshot := allp 2826 // Also snapshot masks. Value changes are OK, but we can't allow 2827 // len to change out from under us. 2828 idlepMaskSnapshot := idlepMask 2829 timerpMaskSnapshot := timerpMask 2830 2831 // return P and block 2832 lock(&sched.lock) 2833 if sched.gcwaiting.Load() || pp.runSafePointFn != 0 { 2834 unlock(&sched.lock) 2835 goto top 2836 } 2837 if sched.runqsize != 0 { 2838 gp := globrunqget(pp, 0) 2839 unlock(&sched.lock) 2840 return gp, false, false 2841 } 2842 if !mp.spinning && sched.needspinning.Load() == 1 { 2843 // See "Delicate dance" comment below. 2844 mp.becomeSpinning() 2845 unlock(&sched.lock) 2846 goto top 2847 } 2848 if releasep() != pp { 2849 throw("findrunnable: wrong p") 2850 } 2851 now = pidleput(pp, now) 2852 unlock(&sched.lock) 2853 2854 // Delicate dance: thread transitions from spinning to non-spinning 2855 // state, potentially concurrently with submission of new work. We must 2856 // drop nmspinning first and then check all sources again (with 2857 // #StoreLoad memory barrier in between). If we do it the other way 2858 // around, another thread can submit work after we've checked all 2859 // sources but before we drop nmspinning; as a result nobody will 2860 // unpark a thread to run the work. 2861 // 2862 // This applies to the following sources of work: 2863 // 2864 // * Goroutines added to a per-P run queue. 2865 // * New/modified-earlier timers on a per-P timer heap. 2866 // * Idle-priority GC work (barring golang.org/issue/19112). 2867 // 2868 // If we discover new work below, we need to restore m.spinning as a 2869 // signal for resetspinning to unpark a new worker thread (because 2870 // there can be more than one starving goroutine). 2871 // 2872 // However, if after discovering new work we also observe no idle Ps 2873 // (either here or in resetspinning), we have a problem. We may be 2874 // racing with a non-spinning M in the block above, having found no 2875 // work and preparing to release its P and park. Allowing that P to go 2876 // idle will result in loss of work conservation (idle P while there is 2877 // runnable work). This could result in complete deadlock in the 2878 // unlikely event that we discover new work (from netpoll) right as we 2879 // are racing with _all_ other Ps going idle. 2880 // 2881 // We use sched.needspinning to synchronize with non-spinning Ms going 2882 // idle. If needspinning is set when they are about to drop their P, 2883 // they abort the drop and instead become a new spinning M on our 2884 // behalf. If we are not racing and the system is truly fully loaded 2885 // then no spinning threads are required, and the next thread to 2886 // naturally become spinning will clear the flag. 2887 // 2888 // Also see "Worker thread parking/unparking" comment at the top of the 2889 // file. 2890 wasSpinning := mp.spinning 2891 if mp.spinning { 2892 mp.spinning = false 2893 if sched.nmspinning.Add(-1) < 0 { 2894 throw("findrunnable: negative nmspinning") 2895 } 2896 2897 // Note the for correctness, only the last M transitioning from 2898 // spinning to non-spinning must perform these rechecks to 2899 // ensure no missed work. However, the runtime has some cases 2900 // of transient increments of nmspinning that are decremented 2901 // without going through this path, so we must be conservative 2902 // and perform the check on all spinning Ms. 2903 // 2904 // See https://go.dev/issue/43997. 2905 2906 // Check all runqueues once again. 2907 pp := checkRunqsNoP(allpSnapshot, idlepMaskSnapshot) 2908 if pp != nil { 2909 acquirep(pp) 2910 mp.becomeSpinning() 2911 goto top 2912 } 2913 2914 // Check for idle-priority GC work again. 2915 pp, gp := checkIdleGCNoP() 2916 if pp != nil { 2917 acquirep(pp) 2918 mp.becomeSpinning() 2919 2920 // Run the idle worker. 2921 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 2922 casgstatus(gp, _Gwaiting, _Grunnable) 2923 if trace.enabled { 2924 traceGoUnpark(gp, 0) 2925 } 2926 return gp, false, false 2927 } 2928 2929 // Finally, check for timer creation or expiry concurrently with 2930 // transitioning from spinning to non-spinning. 2931 // 2932 // Note that we cannot use checkTimers here because it calls 2933 // adjusttimers which may need to allocate memory, and that isn't 2934 // allowed when we don't have an active P. 2935 pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil) 2936 } 2937 2938 // Poll network until next timer. 2939 if netpollinited() && (netpollWaiters.Load() > 0 || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 { 2940 sched.pollUntil.Store(pollUntil) 2941 if mp.p != 0 { 2942 throw("findrunnable: netpoll with p") 2943 } 2944 if mp.spinning { 2945 throw("findrunnable: netpoll with spinning") 2946 } 2947 // Refresh now. 2948 now = nanotime() 2949 delay := int64(-1) 2950 if pollUntil != 0 { 2951 delay = pollUntil - now 2952 if delay < 0 { 2953 delay = 0 2954 } 2955 } 2956 if faketime != 0 { 2957 // When using fake time, just poll. 2958 delay = 0 2959 } 2960 list := netpoll(delay) // block until new work is available 2961 sched.pollUntil.Store(0) 2962 sched.lastpoll.Store(now) 2963 if faketime != 0 && list.empty() { 2964 // Using fake time and nothing is ready; stop M. 2965 // When all M's stop, checkdead will call timejump. 2966 stopm() 2967 goto top 2968 } 2969 lock(&sched.lock) 2970 pp, _ := pidleget(now) 2971 unlock(&sched.lock) 2972 if pp == nil { 2973 injectglist(&list) 2974 } else { 2975 acquirep(pp) 2976 if !list.empty() { 2977 gp := list.pop() 2978 injectglist(&list) 2979 casgstatus(gp, _Gwaiting, _Grunnable) 2980 if trace.enabled { 2981 traceGoUnpark(gp, 0) 2982 } 2983 return gp, false, false 2984 } 2985 if wasSpinning { 2986 mp.becomeSpinning() 2987 } 2988 goto top 2989 } 2990 } else if pollUntil != 0 && netpollinited() { 2991 pollerPollUntil := sched.pollUntil.Load() 2992 if pollerPollUntil == 0 || pollerPollUntil > pollUntil { 2993 netpollBreak() 2994 } 2995 } 2996 stopm() 2997 goto top 2998 } 2999 3000 // pollWork reports whether there is non-background work this P could 3001 // be doing. This is a fairly lightweight check to be used for 3002 // background work loops, like idle GC. It checks a subset of the 3003 // conditions checked by the actual scheduler. 3004 func pollWork() bool { 3005 if sched.runqsize != 0 { 3006 return true 3007 } 3008 p := getg().m.p.ptr() 3009 if !runqempty(p) { 3010 return true 3011 } 3012 if netpollinited() && netpollWaiters.Load() > 0 && sched.lastpoll.Load() != 0 { 3013 if list := netpoll(0); !list.empty() { 3014 injectglist(&list) 3015 return true 3016 } 3017 } 3018 return false 3019 } 3020 3021 // stealWork attempts to steal a runnable goroutine or timer from any P. 3022 // 3023 // If newWork is true, new work may have been readied. 3024 // 3025 // If now is not 0 it is the current time. stealWork returns the passed time or 3026 // the current time if now was passed as 0. 3027 func stealWork(now int64) (gp *g, inheritTime bool, rnow, pollUntil int64, newWork bool) { 3028 pp := getg().m.p.ptr() 3029 3030 ranTimer := false 3031 3032 const stealTries = 4 3033 for i := 0; i < stealTries; i++ { 3034 stealTimersOrRunNextG := i == stealTries-1 3035 3036 for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() { 3037 if sched.gcwaiting.Load() { 3038 // GC work may be available. 3039 return nil, false, now, pollUntil, true 3040 } 3041 p2 := allp[enum.position()] 3042 if pp == p2 { 3043 continue 3044 } 3045 3046 // Steal timers from p2. This call to checkTimers is the only place 3047 // where we might hold a lock on a different P's timers. We do this 3048 // once on the last pass before checking runnext because stealing 3049 // from the other P's runnext should be the last resort, so if there 3050 // are timers to steal do that first. 3051 // 3052 // We only check timers on one of the stealing iterations because 3053 // the time stored in now doesn't change in this loop and checking 3054 // the timers for each P more than once with the same value of now 3055 // is probably a waste of time. 3056 // 3057 // timerpMask tells us whether the P may have timers at all. If it 3058 // can't, no need to check at all. 3059 if stealTimersOrRunNextG && timerpMask.read(enum.position()) { 3060 tnow, w, ran := checkTimers(p2, now) 3061 now = tnow 3062 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3063 pollUntil = w 3064 } 3065 if ran { 3066 // Running the timers may have 3067 // made an arbitrary number of G's 3068 // ready and added them to this P's 3069 // local run queue. That invalidates 3070 // the assumption of runqsteal 3071 // that it always has room to add 3072 // stolen G's. So check now if there 3073 // is a local G to run. 3074 if gp, inheritTime := runqget(pp); gp != nil { 3075 return gp, inheritTime, now, pollUntil, ranTimer 3076 } 3077 ranTimer = true 3078 } 3079 } 3080 3081 // Don't bother to attempt to steal if p2 is idle. 3082 if !idlepMask.read(enum.position()) { 3083 if gp := runqsteal(pp, p2, stealTimersOrRunNextG); gp != nil { 3084 return gp, false, now, pollUntil, ranTimer 3085 } 3086 } 3087 } 3088 } 3089 3090 // No goroutines found to steal. Regardless, running a timer may have 3091 // made some goroutine ready that we missed. Indicate the next timer to 3092 // wait for. 3093 return nil, false, now, pollUntil, ranTimer 3094 } 3095 3096 // Check all Ps for a runnable G to steal. 3097 // 3098 // On entry we have no P. If a G is available to steal and a P is available, 3099 // the P is returned which the caller should acquire and attempt to steal the 3100 // work to. 3101 func checkRunqsNoP(allpSnapshot []*p, idlepMaskSnapshot pMask) *p { 3102 for id, p2 := range allpSnapshot { 3103 if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(p2) { 3104 lock(&sched.lock) 3105 pp, _ := pidlegetSpinning(0) 3106 if pp == nil { 3107 // Can't get a P, don't bother checking remaining Ps. 3108 unlock(&sched.lock) 3109 return nil 3110 } 3111 unlock(&sched.lock) 3112 return pp 3113 } 3114 } 3115 3116 // No work available. 3117 return nil 3118 } 3119 3120 // Check all Ps for a timer expiring sooner than pollUntil. 3121 // 3122 // Returns updated pollUntil value. 3123 func checkTimersNoP(allpSnapshot []*p, timerpMaskSnapshot pMask, pollUntil int64) int64 { 3124 for id, p2 := range allpSnapshot { 3125 if timerpMaskSnapshot.read(uint32(id)) { 3126 w := nobarrierWakeTime(p2) 3127 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3128 pollUntil = w 3129 } 3130 } 3131 } 3132 3133 return pollUntil 3134 } 3135 3136 // Check for idle-priority GC, without a P on entry. 3137 // 3138 // If some GC work, a P, and a worker G are all available, the P and G will be 3139 // returned. The returned P has not been wired yet. 3140 func checkIdleGCNoP() (*p, *g) { 3141 // N.B. Since we have no P, gcBlackenEnabled may change at any time; we 3142 // must check again after acquiring a P. As an optimization, we also check 3143 // if an idle mark worker is needed at all. This is OK here, because if we 3144 // observe that one isn't needed, at least one is currently running. Even if 3145 // it stops running, its own journey into the scheduler should schedule it 3146 // again, if need be (at which point, this check will pass, if relevant). 3147 if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() { 3148 return nil, nil 3149 } 3150 if !gcMarkWorkAvailable(nil) { 3151 return nil, nil 3152 } 3153 3154 // Work is available; we can start an idle GC worker only if there is 3155 // an available P and available worker G. 3156 // 3157 // We can attempt to acquire these in either order, though both have 3158 // synchronization concerns (see below). Workers are almost always 3159 // available (see comment in findRunnableGCWorker for the one case 3160 // there may be none). Since we're slightly less likely to find a P, 3161 // check for that first. 3162 // 3163 // Synchronization: note that we must hold sched.lock until we are 3164 // committed to keeping it. Otherwise we cannot put the unnecessary P 3165 // back in sched.pidle without performing the full set of idle 3166 // transition checks. 3167 // 3168 // If we were to check gcBgMarkWorkerPool first, we must somehow handle 3169 // the assumption in gcControllerState.findRunnableGCWorker that an 3170 // empty gcBgMarkWorkerPool is only possible if gcMarkDone is running. 3171 lock(&sched.lock) 3172 pp, now := pidlegetSpinning(0) 3173 if pp == nil { 3174 unlock(&sched.lock) 3175 return nil, nil 3176 } 3177 3178 // Now that we own a P, gcBlackenEnabled can't change (as it requires STW). 3179 if gcBlackenEnabled == 0 || !gcController.addIdleMarkWorker() { 3180 pidleput(pp, now) 3181 unlock(&sched.lock) 3182 return nil, nil 3183 } 3184 3185 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 3186 if node == nil { 3187 pidleput(pp, now) 3188 unlock(&sched.lock) 3189 gcController.removeIdleMarkWorker() 3190 return nil, nil 3191 } 3192 3193 unlock(&sched.lock) 3194 3195 return pp, node.gp.ptr() 3196 } 3197 3198 // wakeNetPoller wakes up the thread sleeping in the network poller if it isn't 3199 // going to wake up before the when argument; or it wakes an idle P to service 3200 // timers and the network poller if there isn't one already. 3201 func wakeNetPoller(when int64) { 3202 if sched.lastpoll.Load() == 0 { 3203 // In findrunnable we ensure that when polling the pollUntil 3204 // field is either zero or the time to which the current 3205 // poll is expected to run. This can have a spurious wakeup 3206 // but should never miss a wakeup. 3207 pollerPollUntil := sched.pollUntil.Load() 3208 if pollerPollUntil == 0 || pollerPollUntil > when { 3209 netpollBreak() 3210 } 3211 } else { 3212 // There are no threads in the network poller, try to get 3213 // one there so it can handle new timers. 3214 if GOOS != "plan9" { // Temporary workaround - see issue #42303. 3215 wakep() 3216 } 3217 } 3218 } 3219 3220 func resetspinning() { 3221 gp := getg() 3222 if !gp.m.spinning { 3223 throw("resetspinning: not a spinning m") 3224 } 3225 gp.m.spinning = false 3226 nmspinning := sched.nmspinning.Add(-1) 3227 if nmspinning < 0 { 3228 throw("findrunnable: negative nmspinning") 3229 } 3230 // M wakeup policy is deliberately somewhat conservative, so check if we 3231 // need to wakeup another P here. See "Worker thread parking/unparking" 3232 // comment at the top of the file for details. 3233 wakep() 3234 } 3235 3236 // injectglist adds each runnable G on the list to some run queue, 3237 // and clears glist. If there is no current P, they are added to the 3238 // global queue, and up to npidle M's are started to run them. 3239 // Otherwise, for each idle P, this adds a G to the global queue 3240 // and starts an M. Any remaining G's are added to the current P's 3241 // local run queue. 3242 // This may temporarily acquire sched.lock. 3243 // Can run concurrently with GC. 3244 func injectglist(glist *gList) { 3245 if glist.empty() { 3246 return 3247 } 3248 if trace.enabled { 3249 for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { 3250 traceGoUnpark(gp, 0) 3251 } 3252 } 3253 3254 // Mark all the goroutines as runnable before we put them 3255 // on the run queues. 3256 head := glist.head.ptr() 3257 var tail *g 3258 qsize := 0 3259 for gp := head; gp != nil; gp = gp.schedlink.ptr() { 3260 tail = gp 3261 qsize++ 3262 casgstatus(gp, _Gwaiting, _Grunnable) 3263 } 3264 3265 // Turn the gList into a gQueue. 3266 var q gQueue 3267 q.head.set(head) 3268 q.tail.set(tail) 3269 *glist = gList{} 3270 3271 startIdle := func(n int) { 3272 for i := 0; i < n; i++ { 3273 mp := acquirem() // See comment in startm. 3274 lock(&sched.lock) 3275 3276 pp, _ := pidlegetSpinning(0) 3277 if pp == nil { 3278 unlock(&sched.lock) 3279 releasem(mp) 3280 break 3281 } 3282 3283 unlock(&sched.lock) 3284 startm(pp, false) 3285 releasem(mp) 3286 } 3287 } 3288 3289 pp := getg().m.p.ptr() 3290 if pp == nil { 3291 lock(&sched.lock) 3292 globrunqputbatch(&q, int32(qsize)) 3293 unlock(&sched.lock) 3294 startIdle(qsize) 3295 return 3296 } 3297 3298 npidle := int(sched.npidle.Load()) 3299 var globq gQueue 3300 var n int 3301 for n = 0; n < npidle && !q.empty(); n++ { 3302 g := q.pop() 3303 globq.pushBack(g) 3304 } 3305 if n > 0 { 3306 lock(&sched.lock) 3307 globrunqputbatch(&globq, int32(n)) 3308 unlock(&sched.lock) 3309 startIdle(n) 3310 qsize -= n 3311 } 3312 3313 if !q.empty() { 3314 runqputbatch(pp, &q, qsize) 3315 } 3316 } 3317 3318 // One round of scheduler: find a runnable goroutine and execute it. 3319 // Never returns. 3320 func schedule() { 3321 mp := getg().m 3322 3323 if mp.locks != 0 { 3324 throw("schedule: holding locks") 3325 } 3326 3327 if mp.lockedg != 0 { 3328 stoplockedm() 3329 execute(mp.lockedg.ptr(), false) // Never returns. 3330 } 3331 3332 // We should not schedule away from a g that is executing a cgo call, 3333 // since the cgo call is using the m's g0 stack. 3334 if mp.incgo { 3335 throw("schedule: in cgo") 3336 } 3337 3338 top: 3339 pp := mp.p.ptr() 3340 pp.preempt = false 3341 3342 // Safety check: if we are spinning, the run queue should be empty. 3343 // Check this before calling checkTimers, as that might call 3344 // goready to put a ready goroutine on the local run queue. 3345 if mp.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) { 3346 throw("schedule: spinning with local work") 3347 } 3348 3349 gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available 3350 3351 // This thread is going to run a goroutine and is not spinning anymore, 3352 // so if it was marked as spinning we need to reset it now and potentially 3353 // start a new spinning M. 3354 if mp.spinning { 3355 resetspinning() 3356 } 3357 3358 if sched.disable.user && !schedEnabled(gp) { 3359 // Scheduling of this goroutine is disabled. Put it on 3360 // the list of pending runnable goroutines for when we 3361 // re-enable user scheduling and look again. 3362 lock(&sched.lock) 3363 if schedEnabled(gp) { 3364 // Something re-enabled scheduling while we 3365 // were acquiring the lock. 3366 unlock(&sched.lock) 3367 } else { 3368 sched.disable.runnable.pushBack(gp) 3369 sched.disable.n++ 3370 unlock(&sched.lock) 3371 goto top 3372 } 3373 } 3374 3375 // If about to schedule a not-normal goroutine (a GCworker or tracereader), 3376 // wake a P if there is one. 3377 if tryWakeP { 3378 wakep() 3379 } 3380 if gp.lockedm != 0 { 3381 // Hands off own p to the locked m, 3382 // then blocks waiting for a new p. 3383 startlockedm(gp) 3384 goto top 3385 } 3386 3387 execute(gp, inheritTime) 3388 } 3389 3390 // dropg removes the association between m and the current goroutine m->curg (gp for short). 3391 // Typically a caller sets gp's status away from Grunning and then 3392 // immediately calls dropg to finish the job. The caller is also responsible 3393 // for arranging that gp will be restarted using ready at an 3394 // appropriate time. After calling dropg and arranging for gp to be 3395 // readied later, the caller can do other work but eventually should 3396 // call schedule to restart the scheduling of goroutines on this m. 3397 func dropg() { 3398 gp := getg() 3399 3400 setMNoWB(&gp.m.curg.m, nil) 3401 setGNoWB(&gp.m.curg, nil) 3402 } 3403 3404 // checkTimers runs any timers for the P that are ready. 3405 // If now is not 0 it is the current time. 3406 // It returns the passed time or the current time if now was passed as 0. 3407 // and the time when the next timer should run or 0 if there is no next timer, 3408 // and reports whether it ran any timers. 3409 // If the time when the next timer should run is not 0, 3410 // it is always larger than the returned time. 3411 // We pass now in and out to avoid extra calls of nanotime. 3412 // 3413 //go:yeswritebarrierrec 3414 func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) { 3415 // If it's not yet time for the first timer, or the first adjusted 3416 // timer, then there is nothing to do. 3417 next := pp.timer0When.Load() 3418 nextAdj := pp.timerModifiedEarliest.Load() 3419 if next == 0 || (nextAdj != 0 && nextAdj < next) { 3420 next = nextAdj 3421 } 3422 3423 if next == 0 { 3424 // No timers to run or adjust. 3425 return now, 0, false 3426 } 3427 3428 if now == 0 { 3429 now = nanotime() 3430 } 3431 if now < next { 3432 // Next timer is not ready to run, but keep going 3433 // if we would clear deleted timers. 3434 // This corresponds to the condition below where 3435 // we decide whether to call clearDeletedTimers. 3436 if pp != getg().m.p.ptr() || int(pp.deletedTimers.Load()) <= int(pp.numTimers.Load()/4) { 3437 return now, next, false 3438 } 3439 } 3440 3441 lock(&pp.timersLock) 3442 3443 if len(pp.timers) > 0 { 3444 adjusttimers(pp, now) 3445 for len(pp.timers) > 0 { 3446 // Note that runtimer may temporarily unlock 3447 // pp.timersLock. 3448 if tw := runtimer(pp, now); tw != 0 { 3449 if tw > 0 { 3450 pollUntil = tw 3451 } 3452 break 3453 } 3454 ran = true 3455 } 3456 } 3457 3458 // If this is the local P, and there are a lot of deleted timers, 3459 // clear them out. We only do this for the local P to reduce 3460 // lock contention on timersLock. 3461 if pp == getg().m.p.ptr() && int(pp.deletedTimers.Load()) > len(pp.timers)/4 { 3462 clearDeletedTimers(pp) 3463 } 3464 3465 unlock(&pp.timersLock) 3466 3467 return now, pollUntil, ran 3468 } 3469 3470 func parkunlock_c(gp *g, lock unsafe.Pointer) bool { 3471 unlock((*mutex)(lock)) 3472 return true 3473 } 3474 3475 // park continuation on g0. 3476 func park_m(gp *g) { 3477 mp := getg().m 3478 3479 if trace.enabled { 3480 traceGoPark(mp.waittraceev, mp.waittraceskip) 3481 } 3482 3483 // N.B. Not using casGToWaiting here because the waitreason is 3484 // set by park_m's caller. 3485 casgstatus(gp, _Grunning, _Gwaiting) 3486 dropg() 3487 3488 if fn := mp.waitunlockf; fn != nil { 3489 ok := fn(gp, mp.waitlock) 3490 mp.waitunlockf = nil 3491 mp.waitlock = nil 3492 if !ok { 3493 if trace.enabled { 3494 traceGoUnpark(gp, 2) 3495 } 3496 casgstatus(gp, _Gwaiting, _Grunnable) 3497 execute(gp, true) // Schedule it back, never returns. 3498 } 3499 } 3500 schedule() 3501 } 3502 3503 func goschedImpl(gp *g) { 3504 status := readgstatus(gp) 3505 if status&^_Gscan != _Grunning { 3506 dumpgstatus(gp) 3507 throw("bad g status") 3508 } 3509 casgstatus(gp, _Grunning, _Grunnable) 3510 dropg() 3511 lock(&sched.lock) 3512 globrunqput(gp) 3513 unlock(&sched.lock) 3514 3515 schedule() 3516 } 3517 3518 // Gosched continuation on g0. 3519 func gosched_m(gp *g) { 3520 if trace.enabled { 3521 traceGoSched() 3522 } 3523 goschedImpl(gp) 3524 } 3525 3526 // goschedguarded is a forbidden-states-avoided version of gosched_m 3527 func goschedguarded_m(gp *g) { 3528 3529 if !canPreemptM(gp.m) { 3530 gogo(&gp.sched) // never return 3531 } 3532 3533 if trace.enabled { 3534 traceGoSched() 3535 } 3536 goschedImpl(gp) 3537 } 3538 3539 func gopreempt_m(gp *g) { 3540 if trace.enabled { 3541 traceGoPreempt() 3542 } 3543 goschedImpl(gp) 3544 } 3545 3546 // preemptPark parks gp and puts it in _Gpreempted. 3547 // 3548 //go:systemstack 3549 func preemptPark(gp *g) { 3550 if trace.enabled { 3551 traceGoPark(traceEvGoBlock, 0) 3552 } 3553 status := readgstatus(gp) 3554 if status&^_Gscan != _Grunning { 3555 dumpgstatus(gp) 3556 throw("bad g status") 3557 } 3558 3559 if gp.asyncSafePoint { 3560 // Double-check that async preemption does not 3561 // happen in SPWRITE assembly functions. 3562 // isAsyncSafePoint must exclude this case. 3563 f := findfunc(gp.sched.pc) 3564 if !f.valid() { 3565 throw("preempt at unknown pc") 3566 } 3567 if f.flag&funcFlag_SPWRITE != 0 { 3568 println("runtime: unexpected SPWRITE function", funcname(f), "in async preempt") 3569 throw("preempt SPWRITE") 3570 } 3571 } 3572 3573 // Transition from _Grunning to _Gscan|_Gpreempted. We can't 3574 // be in _Grunning when we dropg because then we'd be running 3575 // without an M, but the moment we're in _Gpreempted, 3576 // something could claim this G before we've fully cleaned it 3577 // up. Hence, we set the scan bit to lock down further 3578 // transitions until we can dropg. 3579 casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted) 3580 dropg() 3581 casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted) 3582 schedule() 3583 } 3584 3585 // goyield is like Gosched, but it: 3586 // - emits a GoPreempt trace event instead of a GoSched trace event 3587 // - puts the current G on the runq of the current P instead of the globrunq 3588 func goyield() { 3589 checkTimeouts() 3590 mcall(goyield_m) 3591 } 3592 3593 func goyield_m(gp *g) { 3594 if trace.enabled { 3595 traceGoPreempt() 3596 } 3597 pp := gp.m.p.ptr() 3598 casgstatus(gp, _Grunning, _Grunnable) 3599 dropg() 3600 runqput(pp, gp, false) 3601 schedule() 3602 } 3603 3604 // Finishes execution of the current goroutine. 3605 func goexit1() { 3606 if raceenabled { 3607 racegoend() 3608 } 3609 if trace.enabled { 3610 traceGoEnd() 3611 } 3612 mcall(goexit0) 3613 } 3614 3615 // goexit continuation on g0. 3616 func goexit0(gp *g) { 3617 mp := getg().m 3618 pp := mp.p.ptr() 3619 3620 casgstatus(gp, _Grunning, _Gdead) 3621 gcController.addScannableStack(pp, -int64(gp.stack.hi-gp.stack.lo)) 3622 if isSystemGoroutine(gp, false) { 3623 sched.ngsys.Add(-1) 3624 } 3625 gp.m = nil 3626 locked := gp.lockedm != 0 3627 gp.lockedm = 0 3628 mp.lockedg = 0 3629 gp.preemptStop = false 3630 gp.paniconfault = false 3631 gp._defer = nil // should be true already but just in case. 3632 gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data. 3633 gp.writebuf = nil 3634 gp.waitreason = waitReasonZero 3635 gp.param = nil 3636 gp.labels = nil 3637 gp.timer = nil 3638 3639 if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 { 3640 // Flush assist credit to the global pool. This gives 3641 // better information to pacing if the application is 3642 // rapidly creating an exiting goroutines. 3643 assistWorkPerByte := gcController.assistWorkPerByte.Load() 3644 scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes)) 3645 gcController.bgScanCredit.Add(scanCredit) 3646 gp.gcAssistBytes = 0 3647 } 3648 3649 dropg() 3650 3651 if GOARCH == "wasm" { // no threads yet on wasm 3652 gfput(pp, gp) 3653 schedule() // never returns 3654 } 3655 3656 if mp.lockedInt != 0 { 3657 print("invalid m->lockedInt = ", mp.lockedInt, "\n") 3658 throw("internal lockOSThread error") 3659 } 3660 gfput(pp, gp) 3661 if locked { 3662 // The goroutine may have locked this thread because 3663 // it put it in an unusual kernel state. Kill it 3664 // rather than returning it to the thread pool. 3665 3666 // Return to mstart, which will release the P and exit 3667 // the thread. 3668 if GOOS != "plan9" { // See golang.org/issue/22227. 3669 gogo(&mp.g0.sched) 3670 } else { 3671 // Clear lockedExt on plan9 since we may end up re-using 3672 // this thread. 3673 mp.lockedExt = 0 3674 } 3675 } 3676 schedule() 3677 } 3678 3679 // save updates getg().sched to refer to pc and sp so that a following 3680 // gogo will restore pc and sp. 3681 // 3682 // save must not have write barriers because invoking a write barrier 3683 // can clobber getg().sched. 3684 // 3685 //go:nosplit 3686 //go:nowritebarrierrec 3687 func save(pc, sp uintptr) { 3688 gp := getg() 3689 3690 if gp == gp.m.g0 || gp == gp.m.gsignal { 3691 // m.g0.sched is special and must describe the context 3692 // for exiting the thread. mstart1 writes to it directly. 3693 // m.gsignal.sched should not be used at all. 3694 // This check makes sure save calls do not accidentally 3695 // run in contexts where they'd write to system g's. 3696 throw("save on system g not allowed") 3697 } 3698 3699 gp.sched.pc = pc 3700 gp.sched.sp = sp 3701 gp.sched.lr = 0 3702 gp.sched.ret = 0 3703 // We need to ensure ctxt is zero, but can't have a write 3704 // barrier here. However, it should always already be zero. 3705 // Assert that. 3706 if gp.sched.ctxt != nil { 3707 badctxt() 3708 } 3709 } 3710 3711 // The goroutine g is about to enter a system call. 3712 // Record that it's not using the cpu anymore. 3713 // This is called only from the go syscall library and cgocall, 3714 // not from the low-level system calls used by the runtime. 3715 // 3716 // Entersyscall cannot split the stack: the save must 3717 // make g->sched refer to the caller's stack segment, because 3718 // entersyscall is going to return immediately after. 3719 // 3720 // Nothing entersyscall calls can split the stack either. 3721 // We cannot safely move the stack during an active call to syscall, 3722 // because we do not know which of the uintptr arguments are 3723 // really pointers (back into the stack). 3724 // In practice, this means that we make the fast path run through 3725 // entersyscall doing no-split things, and the slow path has to use systemstack 3726 // to run bigger things on the system stack. 3727 // 3728 // reentersyscall is the entry point used by cgo callbacks, where explicitly 3729 // saved SP and PC are restored. This is needed when exitsyscall will be called 3730 // from a function further up in the call stack than the parent, as g->syscallsp 3731 // must always point to a valid stack frame. entersyscall below is the normal 3732 // entry point for syscalls, which obtains the SP and PC from the caller. 3733 // 3734 // Syscall tracing: 3735 // At the start of a syscall we emit traceGoSysCall to capture the stack trace. 3736 // If the syscall does not block, that is it, we do not emit any other events. 3737 // If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock; 3738 // when syscall returns we emit traceGoSysExit and when the goroutine starts running 3739 // (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart. 3740 // To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock, 3741 // we remember current value of syscalltick in m (gp.m.syscalltick = gp.m.p.ptr().syscalltick), 3742 // whoever emits traceGoSysBlock increments p.syscalltick afterwards; 3743 // and we wait for the increment before emitting traceGoSysExit. 3744 // Note that the increment is done even if tracing is not enabled, 3745 // because tracing can be enabled in the middle of syscall. We don't want the wait to hang. 3746 // 3747 //go:nosplit 3748 func reentersyscall(pc, sp uintptr) { 3749 gp := getg() 3750 3751 // Disable preemption because during this function g is in Gsyscall status, 3752 // but can have inconsistent g->sched, do not let GC observe it. 3753 gp.m.locks++ 3754 3755 // Entersyscall must not call any function that might split/grow the stack. 3756 // (See details in comment above.) 3757 // Catch calls that might, by replacing the stack guard with something that 3758 // will trip any stack check and leaving a flag to tell newstack to die. 3759 gp.stackguard0 = stackPreempt 3760 gp.throwsplit = true 3761 3762 // Leave SP around for GC and traceback. 3763 save(pc, sp) 3764 gp.syscallsp = sp 3765 gp.syscallpc = pc 3766 casgstatus(gp, _Grunning, _Gsyscall) 3767 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 3768 systemstack(func() { 3769 print("entersyscall inconsistent ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 3770 throw("entersyscall") 3771 }) 3772 } 3773 3774 if trace.enabled { 3775 systemstack(traceGoSysCall) 3776 // systemstack itself clobbers g.sched.{pc,sp} and we might 3777 // need them later when the G is genuinely blocked in a 3778 // syscall 3779 save(pc, sp) 3780 } 3781 3782 if sched.sysmonwait.Load() { 3783 systemstack(entersyscall_sysmon) 3784 save(pc, sp) 3785 } 3786 3787 if gp.m.p.ptr().runSafePointFn != 0 { 3788 // runSafePointFn may stack split if run on this stack 3789 systemstack(runSafePointFn) 3790 save(pc, sp) 3791 } 3792 3793 gp.m.syscalltick = gp.m.p.ptr().syscalltick 3794 gp.sysblocktraced = true 3795 pp := gp.m.p.ptr() 3796 pp.m = 0 3797 gp.m.oldp.set(pp) 3798 gp.m.p = 0 3799 atomic.Store(&pp.status, _Psyscall) 3800 if sched.gcwaiting.Load() { 3801 systemstack(entersyscall_gcwait) 3802 save(pc, sp) 3803 } 3804 3805 gp.m.locks-- 3806 } 3807 3808 // Standard syscall entry used by the go syscall library and normal cgo calls. 3809 // 3810 // This is exported via linkname to assembly in the syscall package and x/sys. 3811 // 3812 //go:nosplit 3813 //go:linkname entersyscall 3814 func entersyscall() { 3815 reentersyscall(getcallerpc(), getcallersp()) 3816 } 3817 3818 func entersyscall_sysmon() { 3819 lock(&sched.lock) 3820 if sched.sysmonwait.Load() { 3821 sched.sysmonwait.Store(false) 3822 notewakeup(&sched.sysmonnote) 3823 } 3824 unlock(&sched.lock) 3825 } 3826 3827 func entersyscall_gcwait() { 3828 gp := getg() 3829 pp := gp.m.oldp.ptr() 3830 3831 lock(&sched.lock) 3832 if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) { 3833 if trace.enabled { 3834 traceGoSysBlock(pp) 3835 traceProcStop(pp) 3836 } 3837 pp.syscalltick++ 3838 if sched.stopwait--; sched.stopwait == 0 { 3839 notewakeup(&sched.stopnote) 3840 } 3841 } 3842 unlock(&sched.lock) 3843 } 3844 3845 // The same as entersyscall(), but with a hint that the syscall is blocking. 3846 // 3847 //go:nosplit 3848 func entersyscallblock() { 3849 gp := getg() 3850 3851 gp.m.locks++ // see comment in entersyscall 3852 gp.throwsplit = true 3853 gp.stackguard0 = stackPreempt // see comment in entersyscall 3854 gp.m.syscalltick = gp.m.p.ptr().syscalltick 3855 gp.sysblocktraced = true 3856 gp.m.p.ptr().syscalltick++ 3857 3858 // Leave SP around for GC and traceback. 3859 pc := getcallerpc() 3860 sp := getcallersp() 3861 save(pc, sp) 3862 gp.syscallsp = gp.sched.sp 3863 gp.syscallpc = gp.sched.pc 3864 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 3865 sp1 := sp 3866 sp2 := gp.sched.sp 3867 sp3 := gp.syscallsp 3868 systemstack(func() { 3869 print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 3870 throw("entersyscallblock") 3871 }) 3872 } 3873 casgstatus(gp, _Grunning, _Gsyscall) 3874 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 3875 systemstack(func() { 3876 print("entersyscallblock inconsistent ", hex(sp), " ", hex(gp.sched.sp), " ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 3877 throw("entersyscallblock") 3878 }) 3879 } 3880 3881 systemstack(entersyscallblock_handoff) 3882 3883 // Resave for traceback during blocked call. 3884 save(getcallerpc(), getcallersp()) 3885 3886 gp.m.locks-- 3887 } 3888 3889 func entersyscallblock_handoff() { 3890 if trace.enabled { 3891 traceGoSysCall() 3892 traceGoSysBlock(getg().m.p.ptr()) 3893 } 3894 handoffp(releasep()) 3895 } 3896 3897 // The goroutine g exited its system call. 3898 // Arrange for it to run on a cpu again. 3899 // This is called only from the go syscall library, not 3900 // from the low-level system calls used by the runtime. 3901 // 3902 // Write barriers are not allowed because our P may have been stolen. 3903 // 3904 // This is exported via linkname to assembly in the syscall package. 3905 // 3906 //go:nosplit 3907 //go:nowritebarrierrec 3908 //go:linkname exitsyscall 3909 func exitsyscall() { 3910 gp := getg() 3911 3912 gp.m.locks++ // see comment in entersyscall 3913 if getcallersp() > gp.syscallsp { 3914 throw("exitsyscall: syscall frame is no longer valid") 3915 } 3916 3917 gp.waitsince = 0 3918 oldp := gp.m.oldp.ptr() 3919 gp.m.oldp = 0 3920 if exitsyscallfast(oldp) { 3921 // When exitsyscallfast returns success, we have a P so can now use 3922 // write barriers 3923 if goroutineProfile.active { 3924 // Make sure that gp has had its stack written out to the goroutine 3925 // profile, exactly as it was when the goroutine profiler first 3926 // stopped the world. 3927 systemstack(func() { 3928 tryRecordGoroutineProfileWB(gp) 3929 }) 3930 } 3931 if trace.enabled { 3932 if oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick { 3933 systemstack(traceGoStart) 3934 } 3935 } 3936 // There's a cpu for us, so we can run. 3937 gp.m.p.ptr().syscalltick++ 3938 // We need to cas the status and scan before resuming... 3939 casgstatus(gp, _Gsyscall, _Grunning) 3940 3941 // Garbage collector isn't running (since we are), 3942 // so okay to clear syscallsp. 3943 gp.syscallsp = 0 3944 gp.m.locks-- 3945 if gp.preempt { 3946 // restore the preemption request in case we've cleared it in newstack 3947 gp.stackguard0 = stackPreempt 3948 } else { 3949 // otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock 3950 gp.stackguard0 = gp.stack.lo + _StackGuard 3951 } 3952 gp.throwsplit = false 3953 3954 if sched.disable.user && !schedEnabled(gp) { 3955 // Scheduling of this goroutine is disabled. 3956 Gosched() 3957 } 3958 3959 return 3960 } 3961 3962 gp.sysexitticks = 0 3963 if trace.enabled { 3964 // Wait till traceGoSysBlock event is emitted. 3965 // This ensures consistency of the trace (the goroutine is started after it is blocked). 3966 for oldp != nil && oldp.syscalltick == gp.m.syscalltick { 3967 osyield() 3968 } 3969 // We can't trace syscall exit right now because we don't have a P. 3970 // Tracing code can invoke write barriers that cannot run without a P. 3971 // So instead we remember the syscall exit time and emit the event 3972 // in execute when we have a P. 3973 gp.sysexitticks = cputicks() 3974 } 3975 3976 gp.m.locks-- 3977 3978 // Call the scheduler. 3979 mcall(exitsyscall0) 3980 3981 // Scheduler returned, so we're allowed to run now. 3982 // Delete the syscallsp information that we left for 3983 // the garbage collector during the system call. 3984 // Must wait until now because until gosched returns 3985 // we don't know for sure that the garbage collector 3986 // is not running. 3987 gp.syscallsp = 0 3988 gp.m.p.ptr().syscalltick++ 3989 gp.throwsplit = false 3990 } 3991 3992 //go:nosplit 3993 func exitsyscallfast(oldp *p) bool { 3994 gp := getg() 3995 3996 // Freezetheworld sets stopwait but does not retake P's. 3997 if sched.stopwait == freezeStopWait { 3998 return false 3999 } 4000 4001 // Try to re-acquire the last P. 4002 if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) { 4003 // There's a cpu for us, so we can run. 4004 wirep(oldp) 4005 exitsyscallfast_reacquired() 4006 return true 4007 } 4008 4009 // Try to get any other idle P. 4010 if sched.pidle != 0 { 4011 var ok bool 4012 systemstack(func() { 4013 ok = exitsyscallfast_pidle() 4014 if ok && trace.enabled { 4015 if oldp != nil { 4016 // Wait till traceGoSysBlock event is emitted. 4017 // This ensures consistency of the trace (the goroutine is started after it is blocked). 4018 for oldp.syscalltick == gp.m.syscalltick { 4019 osyield() 4020 } 4021 } 4022 traceGoSysExit(0) 4023 } 4024 }) 4025 if ok { 4026 return true 4027 } 4028 } 4029 return false 4030 } 4031 4032 // exitsyscallfast_reacquired is the exitsyscall path on which this G 4033 // has successfully reacquired the P it was running on before the 4034 // syscall. 4035 // 4036 //go:nosplit 4037 func exitsyscallfast_reacquired() { 4038 gp := getg() 4039 if gp.m.syscalltick != gp.m.p.ptr().syscalltick { 4040 if trace.enabled { 4041 // The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed). 4042 // traceGoSysBlock for this syscall was already emitted, 4043 // but here we effectively retake the p from the new syscall running on the same p. 4044 systemstack(func() { 4045 // Denote blocking of the new syscall. 4046 traceGoSysBlock(gp.m.p.ptr()) 4047 // Denote completion of the current syscall. 4048 traceGoSysExit(0) 4049 }) 4050 } 4051 gp.m.p.ptr().syscalltick++ 4052 } 4053 } 4054 4055 func exitsyscallfast_pidle() bool { 4056 lock(&sched.lock) 4057 pp, _ := pidleget(0) 4058 if pp != nil && sched.sysmonwait.Load() { 4059 sched.sysmonwait.Store(false) 4060 notewakeup(&sched.sysmonnote) 4061 } 4062 unlock(&sched.lock) 4063 if pp != nil { 4064 acquirep(pp) 4065 return true 4066 } 4067 return false 4068 } 4069 4070 // exitsyscall slow path on g0. 4071 // Failed to acquire P, enqueue gp as runnable. 4072 // 4073 // Called via mcall, so gp is the calling g from this M. 4074 // 4075 //go:nowritebarrierrec 4076 func exitsyscall0(gp *g) { 4077 casgstatus(gp, _Gsyscall, _Grunnable) 4078 dropg() 4079 lock(&sched.lock) 4080 var pp *p 4081 if schedEnabled(gp) { 4082 pp, _ = pidleget(0) 4083 } 4084 var locked bool 4085 if pp == nil { 4086 globrunqput(gp) 4087 4088 // Below, we stoplockedm if gp is locked. globrunqput releases 4089 // ownership of gp, so we must check if gp is locked prior to 4090 // committing the release by unlocking sched.lock, otherwise we 4091 // could race with another M transitioning gp from unlocked to 4092 // locked. 4093 locked = gp.lockedm != 0 4094 } else if sched.sysmonwait.Load() { 4095 sched.sysmonwait.Store(false) 4096 notewakeup(&sched.sysmonnote) 4097 } 4098 unlock(&sched.lock) 4099 if pp != nil { 4100 acquirep(pp) 4101 execute(gp, false) // Never returns. 4102 } 4103 if locked { 4104 // Wait until another thread schedules gp and so m again. 4105 // 4106 // N.B. lockedm must be this M, as this g was running on this M 4107 // before entersyscall. 4108 stoplockedm() 4109 execute(gp, false) // Never returns. 4110 } 4111 stopm() 4112 schedule() // Never returns. 4113 } 4114 4115 // Called from syscall package before fork. 4116 // 4117 //go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork 4118 //go:nosplit 4119 func syscall_runtime_BeforeFork() { 4120 gp := getg().m.curg 4121 4122 // Block signals during a fork, so that the child does not run 4123 // a signal handler before exec if a signal is sent to the process 4124 // group. See issue #18600. 4125 gp.m.locks++ 4126 sigsave(&gp.m.sigmask) 4127 sigblock(false) 4128 4129 // This function is called before fork in syscall package. 4130 // Code between fork and exec must not allocate memory nor even try to grow stack. 4131 // Here we spoil g->_StackGuard to reliably detect any attempts to grow stack. 4132 // runtime_AfterFork will undo this in parent process, but not in child. 4133 gp.stackguard0 = stackFork 4134 } 4135 4136 // Called from syscall package after fork in parent. 4137 // 4138 //go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork 4139 //go:nosplit 4140 func syscall_runtime_AfterFork() { 4141 gp := getg().m.curg 4142 4143 // See the comments in beforefork. 4144 gp.stackguard0 = gp.stack.lo + _StackGuard 4145 4146 msigrestore(gp.m.sigmask) 4147 4148 gp.m.locks-- 4149 } 4150 4151 // inForkedChild is true while manipulating signals in the child process. 4152 // This is used to avoid calling libc functions in case we are using vfork. 4153 var inForkedChild bool 4154 4155 // Called from syscall package after fork in child. 4156 // It resets non-sigignored signals to the default handler, and 4157 // restores the signal mask in preparation for the exec. 4158 // 4159 // Because this might be called during a vfork, and therefore may be 4160 // temporarily sharing address space with the parent process, this must 4161 // not change any global variables or calling into C code that may do so. 4162 // 4163 //go:linkname syscall_runtime_AfterForkInChild syscall.runtime_AfterForkInChild 4164 //go:nosplit 4165 //go:nowritebarrierrec 4166 func syscall_runtime_AfterForkInChild() { 4167 // It's OK to change the global variable inForkedChild here 4168 // because we are going to change it back. There is no race here, 4169 // because if we are sharing address space with the parent process, 4170 // then the parent process can not be running concurrently. 4171 inForkedChild = true 4172 4173 clearSignalHandlers() 4174 4175 // When we are the child we are the only thread running, 4176 // so we know that nothing else has changed gp.m.sigmask. 4177 msigrestore(getg().m.sigmask) 4178 4179 inForkedChild = false 4180 } 4181 4182 // pendingPreemptSignals is the number of preemption signals 4183 // that have been sent but not received. This is only used on Darwin. 4184 // For #41702. 4185 var pendingPreemptSignals atomic.Int32 4186 4187 // Called from syscall package before Exec. 4188 // 4189 //go:linkname syscall_runtime_BeforeExec syscall.runtime_BeforeExec 4190 func syscall_runtime_BeforeExec() { 4191 // Prevent thread creation during exec. 4192 execLock.lock() 4193 4194 // On Darwin, wait for all pending preemption signals to 4195 // be received. See issue #41702. 4196 if GOOS == "darwin" || GOOS == "ios" { 4197 for pendingPreemptSignals.Load() > 0 { 4198 osyield() 4199 } 4200 } 4201 } 4202 4203 // Called from syscall package after Exec. 4204 // 4205 //go:linkname syscall_runtime_AfterExec syscall.runtime_AfterExec 4206 func syscall_runtime_AfterExec() { 4207 execLock.unlock() 4208 } 4209 4210 // Allocate a new g, with a stack big enough for stacksize bytes. 4211 func malg(stacksize int32) *g { 4212 newg := new(g) 4213 if stacksize >= 0 { 4214 stacksize = round2(_StackSystem + stacksize) 4215 systemstack(func() { 4216 newg.stack = stackalloc(uint32(stacksize)) 4217 }) 4218 newg.stackguard0 = newg.stack.lo + _StackGuard 4219 newg.stackguard1 = ^uintptr(0) 4220 // Clear the bottom word of the stack. We record g 4221 // there on gsignal stack during VDSO on ARM and ARM64. 4222 *(*uintptr)(unsafe.Pointer(newg.stack.lo)) = 0 4223 } 4224 return newg 4225 } 4226 4227 // Create a new g running fn. 4228 // Put it on the queue of g's waiting to run. 4229 // The compiler turns a go statement into a call to this. 4230 func newproc(fn *funcval) { 4231 gp := getg() 4232 pc := getcallerpc() 4233 systemstack(func() { 4234 newg := newproc1(fn, gp, pc) 4235 4236 pp := getg().m.p.ptr() 4237 runqput(pp, newg, true) 4238 4239 if mainStarted { 4240 wakep() 4241 } 4242 }) 4243 } 4244 4245 // Create a new g in state _Grunnable, starting at fn. callerpc is the 4246 // address of the go statement that created this. The caller is responsible 4247 // for adding the new g to the scheduler. 4248 func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g { 4249 if fn == nil { 4250 fatal("go of nil func value") 4251 } 4252 4253 mp := acquirem() // disable preemption because we hold M and P in local vars. 4254 pp := mp.p.ptr() 4255 newg := gfget(pp) 4256 if newg == nil { 4257 newg = malg(_StackMin) 4258 casgstatus(newg, _Gidle, _Gdead) 4259 allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. 4260 } 4261 if newg.stack.hi == 0 { 4262 throw("newproc1: newg missing stack") 4263 } 4264 4265 if readgstatus(newg) != _Gdead { 4266 throw("newproc1: new g is not Gdead") 4267 } 4268 4269 totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame 4270 totalSize = alignUp(totalSize, sys.StackAlign) 4271 sp := newg.stack.hi - totalSize 4272 spArg := sp 4273 if usesLR { 4274 // caller's LR 4275 *(*uintptr)(unsafe.Pointer(sp)) = 0 4276 prepGoExitFrame(sp) 4277 spArg += sys.MinFrameSize 4278 } 4279 4280 memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched)) 4281 newg.sched.sp = sp 4282 newg.stktopsp = sp 4283 newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function 4284 newg.sched.g = guintptr(unsafe.Pointer(newg)) 4285 gostartcallfn(&newg.sched, fn) 4286 newg.gopc = callerpc 4287 newg.ancestors = saveAncestors(callergp) 4288 newg.startpc = fn.fn 4289 if isSystemGoroutine(newg, false) { 4290 sched.ngsys.Add(1) 4291 } else { 4292 // Only user goroutines inherit pprof labels. 4293 if mp.curg != nil { 4294 newg.labels = mp.curg.labels 4295 } 4296 if goroutineProfile.active { 4297 // A concurrent goroutine profile is running. It should include 4298 // exactly the set of goroutines that were alive when the goroutine 4299 // profiler first stopped the world. That does not include newg, so 4300 // mark it as not needing a profile before transitioning it from 4301 // _Gdead. 4302 newg.goroutineProfiled.Store(goroutineProfileSatisfied) 4303 } 4304 } 4305 // Track initial transition? 4306 newg.trackingSeq = uint8(fastrand()) 4307 if newg.trackingSeq%gTrackingPeriod == 0 { 4308 newg.tracking = true 4309 } 4310 casgstatus(newg, _Gdead, _Grunnable) 4311 gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo)) 4312 4313 if pp.goidcache == pp.goidcacheend { 4314 // Sched.goidgen is the last allocated id, 4315 // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. 4316 // At startup sched.goidgen=0, so main goroutine receives goid=1. 4317 pp.goidcache = sched.goidgen.Add(_GoidCacheBatch) 4318 pp.goidcache -= _GoidCacheBatch - 1 4319 pp.goidcacheend = pp.goidcache + _GoidCacheBatch 4320 } 4321 newg.goid = pp.goidcache 4322 pp.goidcache++ 4323 if raceenabled { 4324 newg.racectx = racegostart(callerpc) 4325 if newg.labels != nil { 4326 // See note in proflabel.go on labelSync's role in synchronizing 4327 // with the reads in the signal handler. 4328 racereleasemergeg(newg, unsafe.Pointer(&labelSync)) 4329 } 4330 } 4331 if trace.enabled { 4332 traceGoCreate(newg, newg.startpc) 4333 } 4334 releasem(mp) 4335 4336 return newg 4337 } 4338 4339 // saveAncestors copies previous ancestors of the given caller g and 4340 // includes infor for the current caller into a new set of tracebacks for 4341 // a g being created. 4342 func saveAncestors(callergp *g) *[]ancestorInfo { 4343 // Copy all prior info, except for the root goroutine (goid 0). 4344 if debug.tracebackancestors <= 0 || callergp.goid == 0 { 4345 return nil 4346 } 4347 var callerAncestors []ancestorInfo 4348 if callergp.ancestors != nil { 4349 callerAncestors = *callergp.ancestors 4350 } 4351 n := int32(len(callerAncestors)) + 1 4352 if n > debug.tracebackancestors { 4353 n = debug.tracebackancestors 4354 } 4355 ancestors := make([]ancestorInfo, n) 4356 copy(ancestors[1:], callerAncestors) 4357 4358 var pcs [_TracebackMaxFrames]uintptr 4359 npcs := gcallers(callergp, 0, pcs[:]) 4360 ipcs := make([]uintptr, npcs) 4361 copy(ipcs, pcs[:]) 4362 ancestors[0] = ancestorInfo{ 4363 pcs: ipcs, 4364 goid: callergp.goid, 4365 gopc: callergp.gopc, 4366 } 4367 4368 ancestorsp := new([]ancestorInfo) 4369 *ancestorsp = ancestors 4370 return ancestorsp 4371 } 4372 4373 // Put on gfree list. 4374 // If local list is too long, transfer a batch to the global list. 4375 func gfput(pp *p, gp *g) { 4376 if readgstatus(gp) != _Gdead { 4377 throw("gfput: bad status (not Gdead)") 4378 } 4379 4380 stksize := gp.stack.hi - gp.stack.lo 4381 4382 if stksize != uintptr(startingStackSize) { 4383 // non-standard stack size - free it. 4384 stackfree(gp.stack) 4385 gp.stack.lo = 0 4386 gp.stack.hi = 0 4387 gp.stackguard0 = 0 4388 } 4389 4390 pp.gFree.push(gp) 4391 pp.gFree.n++ 4392 if pp.gFree.n >= 64 { 4393 var ( 4394 inc int32 4395 stackQ gQueue 4396 noStackQ gQueue 4397 ) 4398 for pp.gFree.n >= 32 { 4399 gp := pp.gFree.pop() 4400 pp.gFree.n-- 4401 if gp.stack.lo == 0 { 4402 noStackQ.push(gp) 4403 } else { 4404 stackQ.push(gp) 4405 } 4406 inc++ 4407 } 4408 lock(&sched.gFree.lock) 4409 sched.gFree.noStack.pushAll(noStackQ) 4410 sched.gFree.stack.pushAll(stackQ) 4411 sched.gFree.n += inc 4412 unlock(&sched.gFree.lock) 4413 } 4414 } 4415 4416 // Get from gfree list. 4417 // If local list is empty, grab a batch from global list. 4418 func gfget(pp *p) *g { 4419 retry: 4420 if pp.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) { 4421 lock(&sched.gFree.lock) 4422 // Move a batch of free Gs to the P. 4423 for pp.gFree.n < 32 { 4424 // Prefer Gs with stacks. 4425 gp := sched.gFree.stack.pop() 4426 if gp == nil { 4427 gp = sched.gFree.noStack.pop() 4428 if gp == nil { 4429 break 4430 } 4431 } 4432 sched.gFree.n-- 4433 pp.gFree.push(gp) 4434 pp.gFree.n++ 4435 } 4436 unlock(&sched.gFree.lock) 4437 goto retry 4438 } 4439 gp := pp.gFree.pop() 4440 if gp == nil { 4441 return nil 4442 } 4443 pp.gFree.n-- 4444 if gp.stack.lo != 0 && gp.stack.hi-gp.stack.lo != uintptr(startingStackSize) { 4445 // Deallocate old stack. We kept it in gfput because it was the 4446 // right size when the goroutine was put on the free list, but 4447 // the right size has changed since then. 4448 systemstack(func() { 4449 stackfree(gp.stack) 4450 gp.stack.lo = 0 4451 gp.stack.hi = 0 4452 gp.stackguard0 = 0 4453 }) 4454 } 4455 if gp.stack.lo == 0 { 4456 // Stack was deallocated in gfput or just above. Allocate a new one. 4457 systemstack(func() { 4458 gp.stack = stackalloc(startingStackSize) 4459 }) 4460 gp.stackguard0 = gp.stack.lo + _StackGuard 4461 } else { 4462 if raceenabled { 4463 racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 4464 } 4465 if msanenabled { 4466 msanmalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 4467 } 4468 if asanenabled { 4469 asanunpoison(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 4470 } 4471 } 4472 return gp 4473 } 4474 4475 // Purge all cached G's from gfree list to the global list. 4476 func gfpurge(pp *p) { 4477 var ( 4478 inc int32 4479 stackQ gQueue 4480 noStackQ gQueue 4481 ) 4482 for !pp.gFree.empty() { 4483 gp := pp.gFree.pop() 4484 pp.gFree.n-- 4485 if gp.stack.lo == 0 { 4486 noStackQ.push(gp) 4487 } else { 4488 stackQ.push(gp) 4489 } 4490 inc++ 4491 } 4492 lock(&sched.gFree.lock) 4493 sched.gFree.noStack.pushAll(noStackQ) 4494 sched.gFree.stack.pushAll(stackQ) 4495 sched.gFree.n += inc 4496 unlock(&sched.gFree.lock) 4497 } 4498 4499 // Breakpoint executes a breakpoint trap. 4500 func Breakpoint() { 4501 breakpoint() 4502 } 4503 4504 // dolockOSThread is called by LockOSThread and lockOSThread below 4505 // after they modify m.locked. Do not allow preemption during this call, 4506 // or else the m might be different in this function than in the caller. 4507 // 4508 //go:nosplit 4509 func dolockOSThread() { 4510 if GOARCH == "wasm" { 4511 return // no threads on wasm yet 4512 } 4513 gp := getg() 4514 gp.m.lockedg.set(gp) 4515 gp.lockedm.set(gp.m) 4516 } 4517 4518 //go:nosplit 4519 4520 // LockOSThread wires the calling goroutine to its current operating system thread. 4521 // The calling goroutine will always execute in that thread, 4522 // and no other goroutine will execute in it, 4523 // until the calling goroutine has made as many calls to 4524 // UnlockOSThread as to LockOSThread. 4525 // If the calling goroutine exits without unlocking the thread, 4526 // the thread will be terminated. 4527 // 4528 // All init functions are run on the startup thread. Calling LockOSThread 4529 // from an init function will cause the main function to be invoked on 4530 // that thread. 4531 // 4532 // A goroutine should call LockOSThread before calling OS services or 4533 // non-Go library functions that depend on per-thread state. 4534 func LockOSThread() { 4535 if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" { 4536 // If we need to start a new thread from the locked 4537 // thread, we need the template thread. Start it now 4538 // while we're in a known-good state. 4539 startTemplateThread() 4540 } 4541 gp := getg() 4542 gp.m.lockedExt++ 4543 if gp.m.lockedExt == 0 { 4544 gp.m.lockedExt-- 4545 panic("LockOSThread nesting overflow") 4546 } 4547 dolockOSThread() 4548 } 4549 4550 //go:nosplit 4551 func lockOSThread() { 4552 getg().m.lockedInt++ 4553 dolockOSThread() 4554 } 4555 4556 // dounlockOSThread is called by UnlockOSThread and unlockOSThread below 4557 // after they update m->locked. Do not allow preemption during this call, 4558 // or else the m might be in different in this function than in the caller. 4559 // 4560 //go:nosplit 4561 func dounlockOSThread() { 4562 if GOARCH == "wasm" { 4563 return // no threads on wasm yet 4564 } 4565 gp := getg() 4566 if gp.m.lockedInt != 0 || gp.m.lockedExt != 0 { 4567 return 4568 } 4569 gp.m.lockedg = 0 4570 gp.lockedm = 0 4571 } 4572 4573 //go:nosplit 4574 4575 // UnlockOSThread undoes an earlier call to LockOSThread. 4576 // If this drops the number of active LockOSThread calls on the 4577 // calling goroutine to zero, it unwires the calling goroutine from 4578 // its fixed operating system thread. 4579 // If there are no active LockOSThread calls, this is a no-op. 4580 // 4581 // Before calling UnlockOSThread, the caller must ensure that the OS 4582 // thread is suitable for running other goroutines. If the caller made 4583 // any permanent changes to the state of the thread that would affect 4584 // other goroutines, it should not call this function and thus leave 4585 // the goroutine locked to the OS thread until the goroutine (and 4586 // hence the thread) exits. 4587 func UnlockOSThread() { 4588 gp := getg() 4589 if gp.m.lockedExt == 0 { 4590 return 4591 } 4592 gp.m.lockedExt-- 4593 dounlockOSThread() 4594 } 4595 4596 //go:nosplit 4597 func unlockOSThread() { 4598 gp := getg() 4599 if gp.m.lockedInt == 0 { 4600 systemstack(badunlockosthread) 4601 } 4602 gp.m.lockedInt-- 4603 dounlockOSThread() 4604 } 4605 4606 func badunlockosthread() { 4607 throw("runtime: internal error: misuse of lockOSThread/unlockOSThread") 4608 } 4609 4610 func gcount() int32 { 4611 n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.n - sched.ngsys.Load() 4612 for _, pp := range allp { 4613 n -= pp.gFree.n 4614 } 4615 4616 // All these variables can be changed concurrently, so the result can be inconsistent. 4617 // But at least the current goroutine is running. 4618 if n < 1 { 4619 n = 1 4620 } 4621 return n 4622 } 4623 4624 func mcount() int32 { 4625 return int32(sched.mnext - sched.nmfreed) 4626 } 4627 4628 var prof struct { 4629 signalLock atomic.Uint32 4630 4631 // Must hold signalLock to write. Reads may be lock-free, but 4632 // signalLock should be taken to synchronize with changes. 4633 hz atomic.Int32 4634 } 4635 4636 func _System() { _System() } 4637 func _ExternalCode() { _ExternalCode() } 4638 func _LostExternalCode() { _LostExternalCode() } 4639 func _GC() { _GC() } 4640 func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() } 4641 func _VDSO() { _VDSO() } 4642 4643 // Called if we receive a SIGPROF signal. 4644 // Called by the signal handler, may run during STW. 4645 // 4646 //go:nowritebarrierrec 4647 func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { 4648 if prof.hz.Load() == 0 { 4649 return 4650 } 4651 4652 // If mp.profilehz is 0, then profiling is not enabled for this thread. 4653 // We must check this to avoid a deadlock between setcpuprofilerate 4654 // and the call to cpuprof.add, below. 4655 if mp != nil && mp.profilehz == 0 { 4656 return 4657 } 4658 4659 // On mips{,le}/arm, 64bit atomics are emulated with spinlocks, in 4660 // runtime/internal/atomic. If SIGPROF arrives while the program is inside 4661 // the critical section, it creates a deadlock (when writing the sample). 4662 // As a workaround, create a counter of SIGPROFs while in critical section 4663 // to store the count, and pass it to sigprof.add() later when SIGPROF is 4664 // received from somewhere else (with _LostSIGPROFDuringAtomic64 as pc). 4665 if GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm" { 4666 if f := findfunc(pc); f.valid() { 4667 if hasPrefix(funcname(f), "runtime/internal/atomic") { 4668 cpuprof.lostAtomic++ 4669 return 4670 } 4671 } 4672 if GOARCH == "arm" && goarm < 7 && GOOS == "linux" && pc&0xffff0000 == 0xffff0000 { 4673 // runtime/internal/atomic functions call into kernel 4674 // helpers on arm < 7. See 4675 // runtime/internal/atomic/sys_linux_arm.s. 4676 cpuprof.lostAtomic++ 4677 return 4678 } 4679 } 4680 4681 // Profiling runs concurrently with GC, so it must not allocate. 4682 // Set a trap in case the code does allocate. 4683 // Note that on windows, one thread takes profiles of all the 4684 // other threads, so mp is usually not getg().m. 4685 // In fact mp may not even be stopped. 4686 // See golang.org/issue/17165. 4687 getg().m.mallocing++ 4688 4689 var stk [maxCPUProfStack]uintptr 4690 n := 0 4691 if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 { 4692 cgoOff := 0 4693 // Check cgoCallersUse to make sure that we are not 4694 // interrupting other code that is fiddling with 4695 // cgoCallers. We are running in a signal handler 4696 // with all signals blocked, so we don't have to worry 4697 // about any other code interrupting us. 4698 if mp.cgoCallersUse.Load() == 0 && mp.cgoCallers != nil && mp.cgoCallers[0] != 0 { 4699 for cgoOff < len(mp.cgoCallers) && mp.cgoCallers[cgoOff] != 0 { 4700 cgoOff++ 4701 } 4702 copy(stk[:], mp.cgoCallers[:cgoOff]) 4703 mp.cgoCallers[0] = 0 4704 } 4705 4706 // Collect Go stack that leads to the cgo call. 4707 n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[cgoOff], len(stk)-cgoOff, nil, nil, 0) 4708 if n > 0 { 4709 n += cgoOff 4710 } 4711 } else { 4712 n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack) 4713 } 4714 4715 if n <= 0 { 4716 // Normal traceback is impossible or has failed. 4717 // See if it falls into several common cases. 4718 n = 0 4719 if usesLibcall() && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 { 4720 // Libcall, i.e. runtime syscall on windows. 4721 // Collect Go stack that leads to the call. 4722 n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0) 4723 } 4724 if n == 0 && mp != nil && mp.vdsoSP != 0 { 4725 n = gentraceback(mp.vdsoPC, mp.vdsoSP, 0, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack) 4726 } 4727 if n == 0 { 4728 // If all of the above has failed, account it against abstract "System" or "GC". 4729 n = 2 4730 if inVDSOPage(pc) { 4731 pc = abi.FuncPCABIInternal(_VDSO) + sys.PCQuantum 4732 } else if pc > firstmoduledata.etext { 4733 // "ExternalCode" is better than "etext". 4734 pc = abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum 4735 } 4736 stk[0] = pc 4737 if mp.preemptoff != "" { 4738 stk[1] = abi.FuncPCABIInternal(_GC) + sys.PCQuantum 4739 } else { 4740 stk[1] = abi.FuncPCABIInternal(_System) + sys.PCQuantum 4741 } 4742 } 4743 } 4744 4745 if prof.hz.Load() != 0 { 4746 // Note: it can happen on Windows that we interrupted a system thread 4747 // with no g, so gp could nil. The other nil checks are done out of 4748 // caution, but not expected to be nil in practice. 4749 var tagPtr *unsafe.Pointer 4750 if gp != nil && gp.m != nil && gp.m.curg != nil { 4751 tagPtr = &gp.m.curg.labels 4752 } 4753 cpuprof.add(tagPtr, stk[:n]) 4754 4755 gprof := gp 4756 var pp *p 4757 if gp != nil && gp.m != nil { 4758 if gp.m.curg != nil { 4759 gprof = gp.m.curg 4760 } 4761 pp = gp.m.p.ptr() 4762 } 4763 traceCPUSample(gprof, pp, stk[:n]) 4764 } 4765 getg().m.mallocing-- 4766 } 4767 4768 // setcpuprofilerate sets the CPU profiling rate to hz times per second. 4769 // If hz <= 0, setcpuprofilerate turns off CPU profiling. 4770 func setcpuprofilerate(hz int32) { 4771 // Force sane arguments. 4772 if hz < 0 { 4773 hz = 0 4774 } 4775 4776 // Disable preemption, otherwise we can be rescheduled to another thread 4777 // that has profiling enabled. 4778 gp := getg() 4779 gp.m.locks++ 4780 4781 // Stop profiler on this thread so that it is safe to lock prof. 4782 // if a profiling signal came in while we had prof locked, 4783 // it would deadlock. 4784 setThreadCPUProfiler(0) 4785 4786 for !prof.signalLock.CompareAndSwap(0, 1) { 4787 osyield() 4788 } 4789 if prof.hz.Load() != hz { 4790 setProcessCPUProfiler(hz) 4791 prof.hz.Store(hz) 4792 } 4793 prof.signalLock.Store(0) 4794 4795 lock(&sched.lock) 4796 sched.profilehz = hz 4797 unlock(&sched.lock) 4798 4799 if hz != 0 { 4800 setThreadCPUProfiler(hz) 4801 } 4802 4803 gp.m.locks-- 4804 } 4805 4806 // init initializes pp, which may be a freshly allocated p or a 4807 // previously destroyed p, and transitions it to status _Pgcstop. 4808 func (pp *p) init(id int32) { 4809 pp.id = id 4810 pp.status = _Pgcstop 4811 pp.sudogcache = pp.sudogbuf[:0] 4812 pp.deferpool = pp.deferpoolbuf[:0] 4813 pp.wbBuf.reset() 4814 if pp.mcache == nil { 4815 if id == 0 { 4816 if mcache0 == nil { 4817 throw("missing mcache?") 4818 } 4819 // Use the bootstrap mcache0. Only one P will get 4820 // mcache0: the one with ID 0. 4821 pp.mcache = mcache0 4822 } else { 4823 pp.mcache = allocmcache() 4824 } 4825 } 4826 if raceenabled && pp.raceprocctx == 0 { 4827 if id == 0 { 4828 pp.raceprocctx = raceprocctx0 4829 raceprocctx0 = 0 // bootstrap 4830 } else { 4831 pp.raceprocctx = raceproccreate() 4832 } 4833 } 4834 lockInit(&pp.timersLock, lockRankTimers) 4835 4836 // This P may get timers when it starts running. Set the mask here 4837 // since the P may not go through pidleget (notably P 0 on startup). 4838 timerpMask.set(id) 4839 // Similarly, we may not go through pidleget before this P starts 4840 // running if it is P 0 on startup. 4841 idlepMask.clear(id) 4842 } 4843 4844 // destroy releases all of the resources associated with pp and 4845 // transitions it to status _Pdead. 4846 // 4847 // sched.lock must be held and the world must be stopped. 4848 func (pp *p) destroy() { 4849 assertLockHeld(&sched.lock) 4850 assertWorldStopped() 4851 4852 // Move all runnable goroutines to the global queue 4853 for pp.runqhead != pp.runqtail { 4854 // Pop from tail of local queue 4855 pp.runqtail-- 4856 gp := pp.runq[pp.runqtail%uint32(len(pp.runq))].ptr() 4857 // Push onto head of global queue 4858 globrunqputhead(gp) 4859 } 4860 if pp.runnext != 0 { 4861 globrunqputhead(pp.runnext.ptr()) 4862 pp.runnext = 0 4863 } 4864 if len(pp.timers) > 0 { 4865 plocal := getg().m.p.ptr() 4866 // The world is stopped, but we acquire timersLock to 4867 // protect against sysmon calling timeSleepUntil. 4868 // This is the only case where we hold the timersLock of 4869 // more than one P, so there are no deadlock concerns. 4870 lock(&plocal.timersLock) 4871 lock(&pp.timersLock) 4872 moveTimers(plocal, pp.timers) 4873 pp.timers = nil 4874 pp.numTimers.Store(0) 4875 pp.deletedTimers.Store(0) 4876 pp.timer0When.Store(0) 4877 unlock(&pp.timersLock) 4878 unlock(&plocal.timersLock) 4879 } 4880 // Flush p's write barrier buffer. 4881 if gcphase != _GCoff { 4882 wbBufFlush1(pp) 4883 pp.gcw.dispose() 4884 } 4885 for i := range pp.sudogbuf { 4886 pp.sudogbuf[i] = nil 4887 } 4888 pp.sudogcache = pp.sudogbuf[:0] 4889 for j := range pp.deferpoolbuf { 4890 pp.deferpoolbuf[j] = nil 4891 } 4892 pp.deferpool = pp.deferpoolbuf[:0] 4893 systemstack(func() { 4894 for i := 0; i < pp.mspancache.len; i++ { 4895 // Safe to call since the world is stopped. 4896 mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i])) 4897 } 4898 pp.mspancache.len = 0 4899 lock(&mheap_.lock) 4900 pp.pcache.flush(&mheap_.pages) 4901 unlock(&mheap_.lock) 4902 }) 4903 freemcache(pp.mcache) 4904 pp.mcache = nil 4905 gfpurge(pp) 4906 traceProcFree(pp) 4907 if raceenabled { 4908 if pp.timerRaceCtx != 0 { 4909 // The race detector code uses a callback to fetch 4910 // the proc context, so arrange for that callback 4911 // to see the right thing. 4912 // This hack only works because we are the only 4913 // thread running. 4914 mp := getg().m 4915 phold := mp.p.ptr() 4916 mp.p.set(pp) 4917 4918 racectxend(pp.timerRaceCtx) 4919 pp.timerRaceCtx = 0 4920 4921 mp.p.set(phold) 4922 } 4923 raceprocdestroy(pp.raceprocctx) 4924 pp.raceprocctx = 0 4925 } 4926 pp.gcAssistTime = 0 4927 pp.status = _Pdead 4928 } 4929 4930 // Change number of processors. 4931 // 4932 // sched.lock must be held, and the world must be stopped. 4933 // 4934 // gcworkbufs must not be being modified by either the GC or the write barrier 4935 // code, so the GC must not be running if the number of Ps actually changes. 4936 // 4937 // Returns list of Ps with local work, they need to be scheduled by the caller. 4938 func procresize(nprocs int32) *p { 4939 assertLockHeld(&sched.lock) 4940 assertWorldStopped() 4941 4942 old := gomaxprocs 4943 if old < 0 || nprocs <= 0 { 4944 throw("procresize: invalid arg") 4945 } 4946 if trace.enabled { 4947 traceGomaxprocs(nprocs) 4948 } 4949 4950 // update statistics 4951 now := nanotime() 4952 if sched.procresizetime != 0 { 4953 sched.totaltime += int64(old) * (now - sched.procresizetime) 4954 } 4955 sched.procresizetime = now 4956 4957 maskWords := (nprocs + 31) / 32 4958 4959 // Grow allp if necessary. 4960 if nprocs > int32(len(allp)) { 4961 // Synchronize with retake, which could be running 4962 // concurrently since it doesn't run on a P. 4963 lock(&allpLock) 4964 if nprocs <= int32(cap(allp)) { 4965 allp = allp[:nprocs] 4966 } else { 4967 nallp := make([]*p, nprocs) 4968 // Copy everything up to allp's cap so we 4969 // never lose old allocated Ps. 4970 copy(nallp, allp[:cap(allp)]) 4971 allp = nallp 4972 } 4973 4974 if maskWords <= int32(cap(idlepMask)) { 4975 idlepMask = idlepMask[:maskWords] 4976 timerpMask = timerpMask[:maskWords] 4977 } else { 4978 nidlepMask := make([]uint32, maskWords) 4979 // No need to copy beyond len, old Ps are irrelevant. 4980 copy(nidlepMask, idlepMask) 4981 idlepMask = nidlepMask 4982 4983 ntimerpMask := make([]uint32, maskWords) 4984 copy(ntimerpMask, timerpMask) 4985 timerpMask = ntimerpMask 4986 } 4987 unlock(&allpLock) 4988 } 4989 4990 // initialize new P's 4991 for i := old; i < nprocs; i++ { 4992 pp := allp[i] 4993 if pp == nil { 4994 pp = new(p) 4995 } 4996 pp.init(i) 4997 atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) 4998 } 4999 5000 gp := getg() 5001 if gp.m.p != 0 && gp.m.p.ptr().id < nprocs { 5002 // continue to use the current P 5003 gp.m.p.ptr().status = _Prunning 5004 gp.m.p.ptr().mcache.prepareForSweep() 5005 } else { 5006 // release the current P and acquire allp[0]. 5007 // 5008 // We must do this before destroying our current P 5009 // because p.destroy itself has write barriers, so we 5010 // need to do that from a valid P. 5011 if gp.m.p != 0 { 5012 if trace.enabled { 5013 // Pretend that we were descheduled 5014 // and then scheduled again to keep 5015 // the trace sane. 5016 traceGoSched() 5017 traceProcStop(gp.m.p.ptr()) 5018 } 5019 gp.m.p.ptr().m = 0 5020 } 5021 gp.m.p = 0 5022 pp := allp[0] 5023 pp.m = 0 5024 pp.status = _Pidle 5025 acquirep(pp) 5026 if trace.enabled { 5027 traceGoStart() 5028 } 5029 } 5030 5031 // g.m.p is now set, so we no longer need mcache0 for bootstrapping. 5032 mcache0 = nil 5033 5034 // release resources from unused P's 5035 for i := nprocs; i < old; i++ { 5036 pp := allp[i] 5037 pp.destroy() 5038 // can't free P itself because it can be referenced by an M in syscall 5039 } 5040 5041 // Trim allp. 5042 if int32(len(allp)) != nprocs { 5043 lock(&allpLock) 5044 allp = allp[:nprocs] 5045 idlepMask = idlepMask[:maskWords] 5046 timerpMask = timerpMask[:maskWords] 5047 unlock(&allpLock) 5048 } 5049 5050 var runnablePs *p 5051 for i := nprocs - 1; i >= 0; i-- { 5052 pp := allp[i] 5053 if gp.m.p.ptr() == pp { 5054 continue 5055 } 5056 pp.status = _Pidle 5057 if runqempty(pp) { 5058 pidleput(pp, now) 5059 } else { 5060 pp.m.set(mget()) 5061 pp.link.set(runnablePs) 5062 runnablePs = pp 5063 } 5064 } 5065 stealOrder.reset(uint32(nprocs)) 5066 var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 5067 atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs)) 5068 if old != nprocs { 5069 // Notify the limiter that the amount of procs has changed. 5070 gcCPULimiter.resetCapacity(now, nprocs) 5071 } 5072 return runnablePs 5073 } 5074 5075 // Associate p and the current m. 5076 // 5077 // This function is allowed to have write barriers even if the caller 5078 // isn't because it immediately acquires pp. 5079 // 5080 //go:yeswritebarrierrec 5081 func acquirep(pp *p) { 5082 // Do the part that isn't allowed to have write barriers. 5083 wirep(pp) 5084 5085 // Have p; write barriers now allowed. 5086 5087 // Perform deferred mcache flush before this P can allocate 5088 // from a potentially stale mcache. 5089 pp.mcache.prepareForSweep() 5090 5091 if trace.enabled { 5092 traceProcStart() 5093 } 5094 } 5095 5096 // wirep is the first step of acquirep, which actually associates the 5097 // current M to pp. This is broken out so we can disallow write 5098 // barriers for this part, since we don't yet have a P. 5099 // 5100 //go:nowritebarrierrec 5101 //go:nosplit 5102 func wirep(pp *p) { 5103 gp := getg() 5104 5105 if gp.m.p != 0 { 5106 throw("wirep: already in go") 5107 } 5108 if pp.m != 0 || pp.status != _Pidle { 5109 id := int64(0) 5110 if pp.m != 0 { 5111 id = pp.m.ptr().id 5112 } 5113 print("wirep: p->m=", pp.m, "(", id, ") p->status=", pp.status, "\n") 5114 throw("wirep: invalid p state") 5115 } 5116 gp.m.p.set(pp) 5117 pp.m.set(gp.m) 5118 pp.status = _Prunning 5119 } 5120 5121 // Disassociate p and the current m. 5122 func releasep() *p { 5123 gp := getg() 5124 5125 if gp.m.p == 0 { 5126 throw("releasep: invalid arg") 5127 } 5128 pp := gp.m.p.ptr() 5129 if pp.m.ptr() != gp.m || pp.status != _Prunning { 5130 print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n") 5131 throw("releasep: invalid p state") 5132 } 5133 if trace.enabled { 5134 traceProcStop(gp.m.p.ptr()) 5135 } 5136 gp.m.p = 0 5137 pp.m = 0 5138 pp.status = _Pidle 5139 return pp 5140 } 5141 5142 func incidlelocked(v int32) { 5143 lock(&sched.lock) 5144 sched.nmidlelocked += v 5145 if v > 0 { 5146 checkdead() 5147 } 5148 unlock(&sched.lock) 5149 } 5150 5151 // Check for deadlock situation. 5152 // The check is based on number of running M's, if 0 -> deadlock. 5153 // sched.lock must be held. 5154 func checkdead() { 5155 assertLockHeld(&sched.lock) 5156 5157 // For -buildmode=c-shared or -buildmode=c-archive it's OK if 5158 // there are no running goroutines. The calling program is 5159 // assumed to be running. 5160 if islibrary || isarchive { 5161 return 5162 } 5163 5164 // If we are dying because of a signal caught on an already idle thread, 5165 // freezetheworld will cause all running threads to block. 5166 // And runtime will essentially enter into deadlock state, 5167 // except that there is a thread that will call exit soon. 5168 if panicking.Load() > 0 { 5169 return 5170 } 5171 5172 // If we are not running under cgo, but we have an extra M then account 5173 // for it. (It is possible to have an extra M on Windows without cgo to 5174 // accommodate callbacks created by syscall.NewCallback. See issue #6751 5175 // for details.) 5176 var run0 int32 5177 if !iscgo && cgoHasExtraM { 5178 mp := lockextra(true) 5179 haveExtraM := extraMCount > 0 5180 unlockextra(mp) 5181 if haveExtraM { 5182 run0 = 1 5183 } 5184 } 5185 5186 run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys 5187 if run > run0 { 5188 return 5189 } 5190 if run < 0 { 5191 print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n") 5192 throw("checkdead: inconsistent counts") 5193 } 5194 5195 grunning := 0 5196 forEachG(func(gp *g) { 5197 if isSystemGoroutine(gp, false) { 5198 return 5199 } 5200 s := readgstatus(gp) 5201 switch s &^ _Gscan { 5202 case _Gwaiting, 5203 _Gpreempted: 5204 grunning++ 5205 case _Grunnable, 5206 _Grunning, 5207 _Gsyscall: 5208 print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n") 5209 throw("checkdead: runnable g") 5210 } 5211 }) 5212 if grunning == 0 { // possible if main goroutine calls runtime·Goexit() 5213 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 5214 fatal("no goroutines (main called runtime.Goexit) - deadlock!") 5215 } 5216 5217 // Maybe jump time forward for playground. 5218 if faketime != 0 { 5219 if when := timeSleepUntil(); when < maxWhen { 5220 faketime = when 5221 5222 // Start an M to steal the timer. 5223 pp, _ := pidleget(faketime) 5224 if pp == nil { 5225 // There should always be a free P since 5226 // nothing is running. 5227 throw("checkdead: no p for timer") 5228 } 5229 mp := mget() 5230 if mp == nil { 5231 // There should always be a free M since 5232 // nothing is running. 5233 throw("checkdead: no m for timer") 5234 } 5235 // M must be spinning to steal. We set this to be 5236 // explicit, but since this is the only M it would 5237 // become spinning on its own anyways. 5238 sched.nmspinning.Add(1) 5239 mp.spinning = true 5240 mp.nextp.set(pp) 5241 notewakeup(&mp.park) 5242 return 5243 } 5244 } 5245 5246 // There are no goroutines running, so we can look at the P's. 5247 for _, pp := range allp { 5248 if len(pp.timers) > 0 { 5249 return 5250 } 5251 } 5252 5253 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 5254 fatal("all goroutines are asleep - deadlock!") 5255 } 5256 5257 // forcegcperiod is the maximum time in nanoseconds between garbage 5258 // collections. If we go this long without a garbage collection, one 5259 // is forced to run. 5260 // 5261 // This is a variable for testing purposes. It normally doesn't change. 5262 var forcegcperiod int64 = 2 * 60 * 1e9 5263 5264 // needSysmonWorkaround is true if the workaround for 5265 // golang.org/issue/42515 is needed on NetBSD. 5266 var needSysmonWorkaround bool = false 5267 5268 // Always runs without a P, so write barriers are not allowed. 5269 // 5270 //go:nowritebarrierrec 5271 func sysmon() { 5272 lock(&sched.lock) 5273 sched.nmsys++ 5274 checkdead() 5275 unlock(&sched.lock) 5276 5277 lasttrace := int64(0) 5278 idle := 0 // how many cycles in succession we had not wokeup somebody 5279 delay := uint32(0) 5280 5281 for { 5282 if idle == 0 { // start with 20us sleep... 5283 delay = 20 5284 } else if idle > 50 { // start doubling the sleep after 1ms... 5285 delay *= 2 5286 } 5287 if delay > 10*1000 { // up to 10ms 5288 delay = 10 * 1000 5289 } 5290 usleep(delay) 5291 5292 // sysmon should not enter deep sleep if schedtrace is enabled so that 5293 // it can print that information at the right time. 5294 // 5295 // It should also not enter deep sleep if there are any active P's so 5296 // that it can retake P's from syscalls, preempt long running G's, and 5297 // poll the network if all P's are busy for long stretches. 5298 // 5299 // It should wakeup from deep sleep if any P's become active either due 5300 // to exiting a syscall or waking up due to a timer expiring so that it 5301 // can resume performing those duties. If it wakes from a syscall it 5302 // resets idle and delay as a bet that since it had retaken a P from a 5303 // syscall before, it may need to do it again shortly after the 5304 // application starts work again. It does not reset idle when waking 5305 // from a timer to avoid adding system load to applications that spend 5306 // most of their time sleeping. 5307 now := nanotime() 5308 if debug.schedtrace <= 0 && (sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs) { 5309 lock(&sched.lock) 5310 if sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs { 5311 syscallWake := false 5312 next := timeSleepUntil() 5313 if next > now { 5314 sched.sysmonwait.Store(true) 5315 unlock(&sched.lock) 5316 // Make wake-up period small enough 5317 // for the sampling to be correct. 5318 sleep := forcegcperiod / 2 5319 if next-now < sleep { 5320 sleep = next - now 5321 } 5322 shouldRelax := sleep >= osRelaxMinNS 5323 if shouldRelax { 5324 osRelax(true) 5325 } 5326 syscallWake = notetsleep(&sched.sysmonnote, sleep) 5327 if shouldRelax { 5328 osRelax(false) 5329 } 5330 lock(&sched.lock) 5331 sched.sysmonwait.Store(false) 5332 noteclear(&sched.sysmonnote) 5333 } 5334 if syscallWake { 5335 idle = 0 5336 delay = 20 5337 } 5338 } 5339 unlock(&sched.lock) 5340 } 5341 5342 lock(&sched.sysmonlock) 5343 // Update now in case we blocked on sysmonnote or spent a long time 5344 // blocked on schedlock or sysmonlock above. 5345 now = nanotime() 5346 5347 // trigger libc interceptors if needed 5348 if *cgo_yield != nil { 5349 asmcgocall(*cgo_yield, nil) 5350 } 5351 // poll network if not polled for more than 10ms 5352 lastpoll := sched.lastpoll.Load() 5353 if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now { 5354 sched.lastpoll.CompareAndSwap(lastpoll, now) 5355 list := netpoll(0) // non-blocking - returns list of goroutines 5356 if !list.empty() { 5357 // Need to decrement number of idle locked M's 5358 // (pretending that one more is running) before injectglist. 5359 // Otherwise it can lead to the following situation: 5360 // injectglist grabs all P's but before it starts M's to run the P's, 5361 // another M returns from syscall, finishes running its G, 5362 // observes that there is no work to do and no other running M's 5363 // and reports deadlock. 5364 incidlelocked(-1) 5365 injectglist(&list) 5366 incidlelocked(1) 5367 } 5368 } 5369 if GOOS == "netbsd" && needSysmonWorkaround { 5370 // netpoll is responsible for waiting for timer 5371 // expiration, so we typically don't have to worry 5372 // about starting an M to service timers. (Note that 5373 // sleep for timeSleepUntil above simply ensures sysmon 5374 // starts running again when that timer expiration may 5375 // cause Go code to run again). 5376 // 5377 // However, netbsd has a kernel bug that sometimes 5378 // misses netpollBreak wake-ups, which can lead to 5379 // unbounded delays servicing timers. If we detect this 5380 // overrun, then startm to get something to handle the 5381 // timer. 5382 // 5383 // See issue 42515 and 5384 // https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=50094. 5385 if next := timeSleepUntil(); next < now { 5386 startm(nil, false) 5387 } 5388 } 5389 if scavenger.sysmonWake.Load() != 0 { 5390 // Kick the scavenger awake if someone requested it. 5391 scavenger.wake() 5392 } 5393 // retake P's blocked in syscalls 5394 // and preempt long running G's 5395 if retake(now) != 0 { 5396 idle = 0 5397 } else { 5398 idle++ 5399 } 5400 // check if we need to force a GC 5401 if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && forcegc.idle.Load() { 5402 lock(&forcegc.lock) 5403 forcegc.idle.Store(false) 5404 var list gList 5405 list.push(forcegc.g) 5406 injectglist(&list) 5407 unlock(&forcegc.lock) 5408 } 5409 if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now { 5410 lasttrace = now 5411 schedtrace(debug.scheddetail > 0) 5412 } 5413 unlock(&sched.sysmonlock) 5414 } 5415 } 5416 5417 type sysmontick struct { 5418 schedtick uint32 5419 schedwhen int64 5420 syscalltick uint32 5421 syscallwhen int64 5422 } 5423 5424 // forcePreemptNS is the time slice given to a G before it is 5425 // preempted. 5426 const forcePreemptNS = 10 * 1000 * 1000 // 10ms 5427 5428 func retake(now int64) uint32 { 5429 n := 0 5430 // Prevent allp slice changes. This lock will be completely 5431 // uncontended unless we're already stopping the world. 5432 lock(&allpLock) 5433 // We can't use a range loop over allp because we may 5434 // temporarily drop the allpLock. Hence, we need to re-fetch 5435 // allp each time around the loop. 5436 for i := 0; i < len(allp); i++ { 5437 pp := allp[i] 5438 if pp == nil { 5439 // This can happen if procresize has grown 5440 // allp but not yet created new Ps. 5441 continue 5442 } 5443 pd := &pp.sysmontick 5444 s := pp.status 5445 sysretake := false 5446 if s == _Prunning || s == _Psyscall { 5447 // Preempt G if it's running for too long. 5448 t := int64(pp.schedtick) 5449 if int64(pd.schedtick) != t { 5450 pd.schedtick = uint32(t) 5451 pd.schedwhen = now 5452 } else if pd.schedwhen+forcePreemptNS <= now { 5453 preemptone(pp) 5454 // In case of syscall, preemptone() doesn't 5455 // work, because there is no M wired to P. 5456 sysretake = true 5457 } 5458 } 5459 if s == _Psyscall { 5460 // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). 5461 t := int64(pp.syscalltick) 5462 if !sysretake && int64(pd.syscalltick) != t { 5463 pd.syscalltick = uint32(t) 5464 pd.syscallwhen = now 5465 continue 5466 } 5467 // On the one hand we don't want to retake Ps if there is no other work to do, 5468 // but on the other hand we want to retake them eventually 5469 // because they can prevent the sysmon thread from deep sleep. 5470 if runqempty(pp) && sched.nmspinning.Load()+sched.npidle.Load() > 0 && pd.syscallwhen+10*1000*1000 > now { 5471 continue 5472 } 5473 // Drop allpLock so we can take sched.lock. 5474 unlock(&allpLock) 5475 // Need to decrement number of idle locked M's 5476 // (pretending that one more is running) before the CAS. 5477 // Otherwise the M from which we retake can exit the syscall, 5478 // increment nmidle and report deadlock. 5479 incidlelocked(-1) 5480 if atomic.Cas(&pp.status, s, _Pidle) { 5481 if trace.enabled { 5482 traceGoSysBlock(pp) 5483 traceProcStop(pp) 5484 } 5485 n++ 5486 pp.syscalltick++ 5487 handoffp(pp) 5488 } 5489 incidlelocked(1) 5490 lock(&allpLock) 5491 } 5492 } 5493 unlock(&allpLock) 5494 return uint32(n) 5495 } 5496 5497 // Tell all goroutines that they have been preempted and they should stop. 5498 // This function is purely best-effort. It can fail to inform a goroutine if a 5499 // processor just started running it. 5500 // No locks need to be held. 5501 // Returns true if preemption request was issued to at least one goroutine. 5502 func preemptall() bool { 5503 res := false 5504 for _, pp := range allp { 5505 if pp.status != _Prunning { 5506 continue 5507 } 5508 if preemptone(pp) { 5509 res = true 5510 } 5511 } 5512 return res 5513 } 5514 5515 // Tell the goroutine running on processor P to stop. 5516 // This function is purely best-effort. It can incorrectly fail to inform the 5517 // goroutine. It can inform the wrong goroutine. Even if it informs the 5518 // correct goroutine, that goroutine might ignore the request if it is 5519 // simultaneously executing newstack. 5520 // No lock needs to be held. 5521 // Returns true if preemption request was issued. 5522 // The actual preemption will happen at some point in the future 5523 // and will be indicated by the gp->status no longer being 5524 // Grunning 5525 func preemptone(pp *p) bool { 5526 mp := pp.m.ptr() 5527 if mp == nil || mp == getg().m { 5528 return false 5529 } 5530 gp := mp.curg 5531 if gp == nil || gp == mp.g0 { 5532 return false 5533 } 5534 5535 gp.preempt = true 5536 5537 // Every call in a goroutine checks for stack overflow by 5538 // comparing the current stack pointer to gp->stackguard0. 5539 // Setting gp->stackguard0 to StackPreempt folds 5540 // preemption into the normal stack overflow check. 5541 gp.stackguard0 = stackPreempt 5542 5543 // Request an async preemption of this P. 5544 if preemptMSupported && debug.asyncpreemptoff == 0 { 5545 pp.preempt = true 5546 preemptM(mp) 5547 } 5548 5549 return true 5550 } 5551 5552 var starttime int64 5553 5554 func schedtrace(detailed bool) { 5555 now := nanotime() 5556 if starttime == 0 { 5557 starttime = now 5558 } 5559 5560 lock(&sched.lock) 5561 print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle.Load(), " threads=", mcount(), " spinningthreads=", sched.nmspinning.Load(), " needspinning=", sched.needspinning.Load(), " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize) 5562 if detailed { 5563 print(" gcwaiting=", sched.gcwaiting.Load(), " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait.Load(), "\n") 5564 } 5565 // We must be careful while reading data from P's, M's and G's. 5566 // Even if we hold schedlock, most data can be changed concurrently. 5567 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. 5568 for i, pp := range allp { 5569 mp := pp.m.ptr() 5570 h := atomic.Load(&pp.runqhead) 5571 t := atomic.Load(&pp.runqtail) 5572 if detailed { 5573 print(" P", i, ": status=", pp.status, " schedtick=", pp.schedtick, " syscalltick=", pp.syscalltick, " m=") 5574 if mp != nil { 5575 print(mp.id) 5576 } else { 5577 print("nil") 5578 } 5579 print(" runqsize=", t-h, " gfreecnt=", pp.gFree.n, " timerslen=", len(pp.timers), "\n") 5580 } else { 5581 // In non-detailed mode format lengths of per-P run queues as: 5582 // [len1 len2 len3 len4] 5583 print(" ") 5584 if i == 0 { 5585 print("[") 5586 } 5587 print(t - h) 5588 if i == len(allp)-1 { 5589 print("]\n") 5590 } 5591 } 5592 } 5593 5594 if !detailed { 5595 unlock(&sched.lock) 5596 return 5597 } 5598 5599 for mp := allm; mp != nil; mp = mp.alllink { 5600 pp := mp.p.ptr() 5601 print(" M", mp.id, ": p=") 5602 if pp != nil { 5603 print(pp.id) 5604 } else { 5605 print("nil") 5606 } 5607 print(" curg=") 5608 if mp.curg != nil { 5609 print(mp.curg.goid) 5610 } else { 5611 print("nil") 5612 } 5613 print(" mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, " locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=") 5614 if lockedg := mp.lockedg.ptr(); lockedg != nil { 5615 print(lockedg.goid) 5616 } else { 5617 print("nil") 5618 } 5619 print("\n") 5620 } 5621 5622 forEachG(func(gp *g) { 5623 print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=") 5624 if gp.m != nil { 5625 print(gp.m.id) 5626 } else { 5627 print("nil") 5628 } 5629 print(" lockedm=") 5630 if lockedm := gp.lockedm.ptr(); lockedm != nil { 5631 print(lockedm.id) 5632 } else { 5633 print("nil") 5634 } 5635 print("\n") 5636 }) 5637 unlock(&sched.lock) 5638 } 5639 5640 // schedEnableUser enables or disables the scheduling of user 5641 // goroutines. 5642 // 5643 // This does not stop already running user goroutines, so the caller 5644 // should first stop the world when disabling user goroutines. 5645 func schedEnableUser(enable bool) { 5646 lock(&sched.lock) 5647 if sched.disable.user == !enable { 5648 unlock(&sched.lock) 5649 return 5650 } 5651 sched.disable.user = !enable 5652 if enable { 5653 n := sched.disable.n 5654 sched.disable.n = 0 5655 globrunqputbatch(&sched.disable.runnable, n) 5656 unlock(&sched.lock) 5657 for ; n != 0 && sched.npidle.Load() != 0; n-- { 5658 startm(nil, false) 5659 } 5660 } else { 5661 unlock(&sched.lock) 5662 } 5663 } 5664 5665 // schedEnabled reports whether gp should be scheduled. It returns 5666 // false is scheduling of gp is disabled. 5667 // 5668 // sched.lock must be held. 5669 func schedEnabled(gp *g) bool { 5670 assertLockHeld(&sched.lock) 5671 5672 if sched.disable.user { 5673 return isSystemGoroutine(gp, true) 5674 } 5675 return true 5676 } 5677 5678 // Put mp on midle list. 5679 // sched.lock must be held. 5680 // May run during STW, so write barriers are not allowed. 5681 // 5682 //go:nowritebarrierrec 5683 func mput(mp *m) { 5684 assertLockHeld(&sched.lock) 5685 5686 mp.schedlink = sched.midle 5687 sched.midle.set(mp) 5688 sched.nmidle++ 5689 checkdead() 5690 } 5691 5692 // Try to get an m from midle list. 5693 // sched.lock must be held. 5694 // May run during STW, so write barriers are not allowed. 5695 // 5696 //go:nowritebarrierrec 5697 func mget() *m { 5698 assertLockHeld(&sched.lock) 5699 5700 mp := sched.midle.ptr() 5701 if mp != nil { 5702 sched.midle = mp.schedlink 5703 sched.nmidle-- 5704 } 5705 return mp 5706 } 5707 5708 // Put gp on the global runnable queue. 5709 // sched.lock must be held. 5710 // May run during STW, so write barriers are not allowed. 5711 // 5712 //go:nowritebarrierrec 5713 func globrunqput(gp *g) { 5714 assertLockHeld(&sched.lock) 5715 5716 sched.runq.pushBack(gp) 5717 sched.runqsize++ 5718 } 5719 5720 // Put gp at the head of the global runnable queue. 5721 // sched.lock must be held. 5722 // May run during STW, so write barriers are not allowed. 5723 // 5724 //go:nowritebarrierrec 5725 func globrunqputhead(gp *g) { 5726 assertLockHeld(&sched.lock) 5727 5728 sched.runq.push(gp) 5729 sched.runqsize++ 5730 } 5731 5732 // Put a batch of runnable goroutines on the global runnable queue. 5733 // This clears *batch. 5734 // sched.lock must be held. 5735 // May run during STW, so write barriers are not allowed. 5736 // 5737 //go:nowritebarrierrec 5738 func globrunqputbatch(batch *gQueue, n int32) { 5739 assertLockHeld(&sched.lock) 5740 5741 sched.runq.pushBackAll(*batch) 5742 sched.runqsize += n 5743 *batch = gQueue{} 5744 } 5745 5746 // Try get a batch of G's from the global runnable queue. 5747 // sched.lock must be held. 5748 func globrunqget(pp *p, max int32) *g { 5749 assertLockHeld(&sched.lock) 5750 5751 if sched.runqsize == 0 { 5752 return nil 5753 } 5754 5755 n := sched.runqsize/gomaxprocs + 1 5756 if n > sched.runqsize { 5757 n = sched.runqsize 5758 } 5759 if max > 0 && n > max { 5760 n = max 5761 } 5762 if n > int32(len(pp.runq))/2 { 5763 n = int32(len(pp.runq)) / 2 5764 } 5765 5766 sched.runqsize -= n 5767 5768 gp := sched.runq.pop() 5769 n-- 5770 for ; n > 0; n-- { 5771 gp1 := sched.runq.pop() 5772 runqput(pp, gp1, false) 5773 } 5774 return gp 5775 } 5776 5777 // pMask is an atomic bitstring with one bit per P. 5778 type pMask []uint32 5779 5780 // read returns true if P id's bit is set. 5781 func (p pMask) read(id uint32) bool { 5782 word := id / 32 5783 mask := uint32(1) << (id % 32) 5784 return (atomic.Load(&p[word]) & mask) != 0 5785 } 5786 5787 // set sets P id's bit. 5788 func (p pMask) set(id int32) { 5789 word := id / 32 5790 mask := uint32(1) << (id % 32) 5791 atomic.Or(&p[word], mask) 5792 } 5793 5794 // clear clears P id's bit. 5795 func (p pMask) clear(id int32) { 5796 word := id / 32 5797 mask := uint32(1) << (id % 32) 5798 atomic.And(&p[word], ^mask) 5799 } 5800 5801 // updateTimerPMask clears pp's timer mask if it has no timers on its heap. 5802 // 5803 // Ideally, the timer mask would be kept immediately consistent on any timer 5804 // operations. Unfortunately, updating a shared global data structure in the 5805 // timer hot path adds too much overhead in applications frequently switching 5806 // between no timers and some timers. 5807 // 5808 // As a compromise, the timer mask is updated only on pidleget / pidleput. A 5809 // running P (returned by pidleget) may add a timer at any time, so its mask 5810 // must be set. An idle P (passed to pidleput) cannot add new timers while 5811 // idle, so if it has no timers at that time, its mask may be cleared. 5812 // 5813 // Thus, we get the following effects on timer-stealing in findrunnable: 5814 // 5815 // - Idle Ps with no timers when they go idle are never checked in findrunnable 5816 // (for work- or timer-stealing; this is the ideal case). 5817 // - Running Ps must always be checked. 5818 // - Idle Ps whose timers are stolen must continue to be checked until they run 5819 // again, even after timer expiration. 5820 // 5821 // When the P starts running again, the mask should be set, as a timer may be 5822 // added at any time. 5823 // 5824 // TODO(prattmic): Additional targeted updates may improve the above cases. 5825 // e.g., updating the mask when stealing a timer. 5826 func updateTimerPMask(pp *p) { 5827 if pp.numTimers.Load() > 0 { 5828 return 5829 } 5830 5831 // Looks like there are no timers, however another P may transiently 5832 // decrement numTimers when handling a timerModified timer in 5833 // checkTimers. We must take timersLock to serialize with these changes. 5834 lock(&pp.timersLock) 5835 if pp.numTimers.Load() == 0 { 5836 timerpMask.clear(pp.id) 5837 } 5838 unlock(&pp.timersLock) 5839 } 5840 5841 // pidleput puts p on the _Pidle list. now must be a relatively recent call 5842 // to nanotime or zero. Returns now or the current time if now was zero. 5843 // 5844 // This releases ownership of p. Once sched.lock is released it is no longer 5845 // safe to use p. 5846 // 5847 // sched.lock must be held. 5848 // 5849 // May run during STW, so write barriers are not allowed. 5850 // 5851 //go:nowritebarrierrec 5852 func pidleput(pp *p, now int64) int64 { 5853 assertLockHeld(&sched.lock) 5854 5855 if !runqempty(pp) { 5856 throw("pidleput: P has non-empty run queue") 5857 } 5858 if now == 0 { 5859 now = nanotime() 5860 } 5861 updateTimerPMask(pp) // clear if there are no timers. 5862 idlepMask.set(pp.id) 5863 pp.link = sched.pidle 5864 sched.pidle.set(pp) 5865 sched.npidle.Add(1) 5866 if !pp.limiterEvent.start(limiterEventIdle, now) { 5867 throw("must be able to track idle limiter event") 5868 } 5869 return now 5870 } 5871 5872 // pidleget tries to get a p from the _Pidle list, acquiring ownership. 5873 // 5874 // sched.lock must be held. 5875 // 5876 // May run during STW, so write barriers are not allowed. 5877 // 5878 //go:nowritebarrierrec 5879 func pidleget(now int64) (*p, int64) { 5880 assertLockHeld(&sched.lock) 5881 5882 pp := sched.pidle.ptr() 5883 if pp != nil { 5884 // Timer may get added at any time now. 5885 if now == 0 { 5886 now = nanotime() 5887 } 5888 timerpMask.set(pp.id) 5889 idlepMask.clear(pp.id) 5890 sched.pidle = pp.link 5891 sched.npidle.Add(-1) 5892 pp.limiterEvent.stop(limiterEventIdle, now) 5893 } 5894 return pp, now 5895 } 5896 5897 // pidlegetSpinning tries to get a p from the _Pidle list, acquiring ownership. 5898 // This is called by spinning Ms (or callers than need a spinning M) that have 5899 // found work. If no P is available, this must synchronized with non-spinning 5900 // Ms that may be preparing to drop their P without discovering this work. 5901 // 5902 // sched.lock must be held. 5903 // 5904 // May run during STW, so write barriers are not allowed. 5905 // 5906 //go:nowritebarrierrec 5907 func pidlegetSpinning(now int64) (*p, int64) { 5908 assertLockHeld(&sched.lock) 5909 5910 pp, now := pidleget(now) 5911 if pp == nil { 5912 // See "Delicate dance" comment in findrunnable. We found work 5913 // that we cannot take, we must synchronize with non-spinning 5914 // Ms that may be preparing to drop their P. 5915 sched.needspinning.Store(1) 5916 return nil, now 5917 } 5918 5919 return pp, now 5920 } 5921 5922 // runqempty reports whether pp has no Gs on its local run queue. 5923 // It never returns true spuriously. 5924 func runqempty(pp *p) bool { 5925 // Defend against a race where 1) pp has G1 in runqnext but runqhead == runqtail, 5926 // 2) runqput on pp kicks G1 to the runq, 3) runqget on pp empties runqnext. 5927 // Simply observing that runqhead == runqtail and then observing that runqnext == nil 5928 // does not mean the queue is empty. 5929 for { 5930 head := atomic.Load(&pp.runqhead) 5931 tail := atomic.Load(&pp.runqtail) 5932 runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&pp.runnext))) 5933 if tail == atomic.Load(&pp.runqtail) { 5934 return head == tail && runnext == 0 5935 } 5936 } 5937 } 5938 5939 // To shake out latent assumptions about scheduling order, 5940 // we introduce some randomness into scheduling decisions 5941 // when running with the race detector. 5942 // The need for this was made obvious by changing the 5943 // (deterministic) scheduling order in Go 1.5 and breaking 5944 // many poorly-written tests. 5945 // With the randomness here, as long as the tests pass 5946 // consistently with -race, they shouldn't have latent scheduling 5947 // assumptions. 5948 const randomizeScheduler = raceenabled 5949 5950 // runqput tries to put g on the local runnable queue. 5951 // If next is false, runqput adds g to the tail of the runnable queue. 5952 // If next is true, runqput puts g in the pp.runnext slot. 5953 // If the run queue is full, runnext puts g on the global queue. 5954 // Executed only by the owner P. 5955 func runqput(pp *p, gp *g, next bool) { 5956 if randomizeScheduler && next && fastrandn(2) == 0 { 5957 next = false 5958 } 5959 5960 if next { 5961 retryNext: 5962 oldnext := pp.runnext 5963 if !pp.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) { 5964 goto retryNext 5965 } 5966 if oldnext == 0 { 5967 return 5968 } 5969 // Kick the old runnext out to the regular run queue. 5970 gp = oldnext.ptr() 5971 } 5972 5973 retry: 5974 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 5975 t := pp.runqtail 5976 if t-h < uint32(len(pp.runq)) { 5977 pp.runq[t%uint32(len(pp.runq))].set(gp) 5978 atomic.StoreRel(&pp.runqtail, t+1) // store-release, makes the item available for consumption 5979 return 5980 } 5981 if runqputslow(pp, gp, h, t) { 5982 return 5983 } 5984 // the queue is not full, now the put above must succeed 5985 goto retry 5986 } 5987 5988 // Put g and a batch of work from local runnable queue on global queue. 5989 // Executed only by the owner P. 5990 func runqputslow(pp *p, gp *g, h, t uint32) bool { 5991 var batch [len(pp.runq)/2 + 1]*g 5992 5993 // First, grab a batch from local queue. 5994 n := t - h 5995 n = n / 2 5996 if n != uint32(len(pp.runq)/2) { 5997 throw("runqputslow: queue is not full") 5998 } 5999 for i := uint32(0); i < n; i++ { 6000 batch[i] = pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 6001 } 6002 if !atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 6003 return false 6004 } 6005 batch[n] = gp 6006 6007 if randomizeScheduler { 6008 for i := uint32(1); i <= n; i++ { 6009 j := fastrandn(i + 1) 6010 batch[i], batch[j] = batch[j], batch[i] 6011 } 6012 } 6013 6014 // Link the goroutines. 6015 for i := uint32(0); i < n; i++ { 6016 batch[i].schedlink.set(batch[i+1]) 6017 } 6018 var q gQueue 6019 q.head.set(batch[0]) 6020 q.tail.set(batch[n]) 6021 6022 // Now put the batch on global queue. 6023 lock(&sched.lock) 6024 globrunqputbatch(&q, int32(n+1)) 6025 unlock(&sched.lock) 6026 return true 6027 } 6028 6029 // runqputbatch tries to put all the G's on q on the local runnable queue. 6030 // If the queue is full, they are put on the global queue; in that case 6031 // this will temporarily acquire the scheduler lock. 6032 // Executed only by the owner P. 6033 func runqputbatch(pp *p, q *gQueue, qsize int) { 6034 h := atomic.LoadAcq(&pp.runqhead) 6035 t := pp.runqtail 6036 n := uint32(0) 6037 for !q.empty() && t-h < uint32(len(pp.runq)) { 6038 gp := q.pop() 6039 pp.runq[t%uint32(len(pp.runq))].set(gp) 6040 t++ 6041 n++ 6042 } 6043 qsize -= int(n) 6044 6045 if randomizeScheduler { 6046 off := func(o uint32) uint32 { 6047 return (pp.runqtail + o) % uint32(len(pp.runq)) 6048 } 6049 for i := uint32(1); i < n; i++ { 6050 j := fastrandn(i + 1) 6051 pp.runq[off(i)], pp.runq[off(j)] = pp.runq[off(j)], pp.runq[off(i)] 6052 } 6053 } 6054 6055 atomic.StoreRel(&pp.runqtail, t) 6056 if !q.empty() { 6057 lock(&sched.lock) 6058 globrunqputbatch(q, int32(qsize)) 6059 unlock(&sched.lock) 6060 } 6061 } 6062 6063 // Get g from local runnable queue. 6064 // If inheritTime is true, gp should inherit the remaining time in the 6065 // current time slice. Otherwise, it should start a new time slice. 6066 // Executed only by the owner P. 6067 func runqget(pp *p) (gp *g, inheritTime bool) { 6068 // If there's a runnext, it's the next G to run. 6069 next := pp.runnext 6070 // If the runnext is non-0 and the CAS fails, it could only have been stolen by another P, 6071 // because other Ps can race to set runnext to 0, but only the current P can set it to non-0. 6072 // Hence, there's no need to retry this CAS if it fails. 6073 if next != 0 && pp.runnext.cas(next, 0) { 6074 return next.ptr(), true 6075 } 6076 6077 for { 6078 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 6079 t := pp.runqtail 6080 if t == h { 6081 return nil, false 6082 } 6083 gp := pp.runq[h%uint32(len(pp.runq))].ptr() 6084 if atomic.CasRel(&pp.runqhead, h, h+1) { // cas-release, commits consume 6085 return gp, false 6086 } 6087 } 6088 } 6089 6090 // runqdrain drains the local runnable queue of pp and returns all goroutines in it. 6091 // Executed only by the owner P. 6092 func runqdrain(pp *p) (drainQ gQueue, n uint32) { 6093 oldNext := pp.runnext 6094 if oldNext != 0 && pp.runnext.cas(oldNext, 0) { 6095 drainQ.pushBack(oldNext.ptr()) 6096 n++ 6097 } 6098 6099 retry: 6100 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 6101 t := pp.runqtail 6102 qn := t - h 6103 if qn == 0 { 6104 return 6105 } 6106 if qn > uint32(len(pp.runq)) { // read inconsistent h and t 6107 goto retry 6108 } 6109 6110 if !atomic.CasRel(&pp.runqhead, h, h+qn) { // cas-release, commits consume 6111 goto retry 6112 } 6113 6114 // We've inverted the order in which it gets G's from the local P's runnable queue 6115 // and then advances the head pointer because we don't want to mess up the statuses of G's 6116 // while runqdrain() and runqsteal() are running in parallel. 6117 // Thus we should advance the head pointer before draining the local P into a gQueue, 6118 // so that we can update any gp.schedlink only after we take the full ownership of G, 6119 // meanwhile, other P's can't access to all G's in local P's runnable queue and steal them. 6120 // See https://groups.google.com/g/golang-dev/c/0pTKxEKhHSc/m/6Q85QjdVBQAJ for more details. 6121 for i := uint32(0); i < qn; i++ { 6122 gp := pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 6123 drainQ.pushBack(gp) 6124 n++ 6125 } 6126 return 6127 } 6128 6129 // Grabs a batch of goroutines from pp's runnable queue into batch. 6130 // Batch is a ring buffer starting at batchHead. 6131 // Returns number of grabbed goroutines. 6132 // Can be executed by any P. 6133 func runqgrab(pp *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 { 6134 for { 6135 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 6136 t := atomic.LoadAcq(&pp.runqtail) // load-acquire, synchronize with the producer 6137 n := t - h 6138 n = n - n/2 6139 if n == 0 { 6140 if stealRunNextG { 6141 // Try to steal from pp.runnext. 6142 if next := pp.runnext; next != 0 { 6143 if pp.status == _Prunning { 6144 // Sleep to ensure that pp isn't about to run the g 6145 // we are about to steal. 6146 // The important use case here is when the g running 6147 // on pp ready()s another g and then almost 6148 // immediately blocks. Instead of stealing runnext 6149 // in this window, back off to give pp a chance to 6150 // schedule runnext. This will avoid thrashing gs 6151 // between different Ps. 6152 // A sync chan send/recv takes ~50ns as of time of 6153 // writing, so 3us gives ~50x overshoot. 6154 if GOOS != "windows" && GOOS != "openbsd" && GOOS != "netbsd" { 6155 usleep(3) 6156 } else { 6157 // On some platforms system timer granularity is 6158 // 1-15ms, which is way too much for this 6159 // optimization. So just yield. 6160 osyield() 6161 } 6162 } 6163 if !pp.runnext.cas(next, 0) { 6164 continue 6165 } 6166 batch[batchHead%uint32(len(batch))] = next 6167 return 1 6168 } 6169 } 6170 return 0 6171 } 6172 if n > uint32(len(pp.runq)/2) { // read inconsistent h and t 6173 continue 6174 } 6175 for i := uint32(0); i < n; i++ { 6176 g := pp.runq[(h+i)%uint32(len(pp.runq))] 6177 batch[(batchHead+i)%uint32(len(batch))] = g 6178 } 6179 if atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 6180 return n 6181 } 6182 } 6183 } 6184 6185 // Steal half of elements from local runnable queue of p2 6186 // and put onto local runnable queue of p. 6187 // Returns one of the stolen elements (or nil if failed). 6188 func runqsteal(pp, p2 *p, stealRunNextG bool) *g { 6189 t := pp.runqtail 6190 n := runqgrab(p2, &pp.runq, t, stealRunNextG) 6191 if n == 0 { 6192 return nil 6193 } 6194 n-- 6195 gp := pp.runq[(t+n)%uint32(len(pp.runq))].ptr() 6196 if n == 0 { 6197 return gp 6198 } 6199 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 6200 if t-h+n >= uint32(len(pp.runq)) { 6201 throw("runqsteal: runq overflow") 6202 } 6203 atomic.StoreRel(&pp.runqtail, t+n) // store-release, makes the item available for consumption 6204 return gp 6205 } 6206 6207 // A gQueue is a dequeue of Gs linked through g.schedlink. A G can only 6208 // be on one gQueue or gList at a time. 6209 type gQueue struct { 6210 head guintptr 6211 tail guintptr 6212 } 6213 6214 // empty reports whether q is empty. 6215 func (q *gQueue) empty() bool { 6216 return q.head == 0 6217 } 6218 6219 // push adds gp to the head of q. 6220 func (q *gQueue) push(gp *g) { 6221 gp.schedlink = q.head 6222 q.head.set(gp) 6223 if q.tail == 0 { 6224 q.tail.set(gp) 6225 } 6226 } 6227 6228 // pushBack adds gp to the tail of q. 6229 func (q *gQueue) pushBack(gp *g) { 6230 gp.schedlink = 0 6231 if q.tail != 0 { 6232 q.tail.ptr().schedlink.set(gp) 6233 } else { 6234 q.head.set(gp) 6235 } 6236 q.tail.set(gp) 6237 } 6238 6239 // pushBackAll adds all Gs in q2 to the tail of q. After this q2 must 6240 // not be used. 6241 func (q *gQueue) pushBackAll(q2 gQueue) { 6242 if q2.tail == 0 { 6243 return 6244 } 6245 q2.tail.ptr().schedlink = 0 6246 if q.tail != 0 { 6247 q.tail.ptr().schedlink = q2.head 6248 } else { 6249 q.head = q2.head 6250 } 6251 q.tail = q2.tail 6252 } 6253 6254 // pop removes and returns the head of queue q. It returns nil if 6255 // q is empty. 6256 func (q *gQueue) pop() *g { 6257 gp := q.head.ptr() 6258 if gp != nil { 6259 q.head = gp.schedlink 6260 if q.head == 0 { 6261 q.tail = 0 6262 } 6263 } 6264 return gp 6265 } 6266 6267 // popList takes all Gs in q and returns them as a gList. 6268 func (q *gQueue) popList() gList { 6269 stack := gList{q.head} 6270 *q = gQueue{} 6271 return stack 6272 } 6273 6274 // A gList is a list of Gs linked through g.schedlink. A G can only be 6275 // on one gQueue or gList at a time. 6276 type gList struct { 6277 head guintptr 6278 } 6279 6280 // empty reports whether l is empty. 6281 func (l *gList) empty() bool { 6282 return l.head == 0 6283 } 6284 6285 // push adds gp to the head of l. 6286 func (l *gList) push(gp *g) { 6287 gp.schedlink = l.head 6288 l.head.set(gp) 6289 } 6290 6291 // pushAll prepends all Gs in q to l. 6292 func (l *gList) pushAll(q gQueue) { 6293 if !q.empty() { 6294 q.tail.ptr().schedlink = l.head 6295 l.head = q.head 6296 } 6297 } 6298 6299 // pop removes and returns the head of l. If l is empty, it returns nil. 6300 func (l *gList) pop() *g { 6301 gp := l.head.ptr() 6302 if gp != nil { 6303 l.head = gp.schedlink 6304 } 6305 return gp 6306 } 6307 6308 //go:linkname setMaxThreads runtime/debug.setMaxThreads 6309 func setMaxThreads(in int) (out int) { 6310 lock(&sched.lock) 6311 out = int(sched.maxmcount) 6312 if in > 0x7fffffff { // MaxInt32 6313 sched.maxmcount = 0x7fffffff 6314 } else { 6315 sched.maxmcount = int32(in) 6316 } 6317 checkmcount() 6318 unlock(&sched.lock) 6319 return 6320 } 6321 6322 //go:nosplit 6323 func procPin() int { 6324 gp := getg() 6325 mp := gp.m 6326 6327 mp.locks++ 6328 return int(mp.p.ptr().id) 6329 } 6330 6331 //go:nosplit 6332 func procUnpin() { 6333 gp := getg() 6334 gp.m.locks-- 6335 } 6336 6337 //go:linkname sync_runtime_procPin sync.runtime_procPin 6338 //go:nosplit 6339 func sync_runtime_procPin() int { 6340 return procPin() 6341 } 6342 6343 //go:linkname sync_runtime_procUnpin sync.runtime_procUnpin 6344 //go:nosplit 6345 func sync_runtime_procUnpin() { 6346 procUnpin() 6347 } 6348 6349 //go:linkname sync_atomic_runtime_procPin sync/atomic.runtime_procPin 6350 //go:nosplit 6351 func sync_atomic_runtime_procPin() int { 6352 return procPin() 6353 } 6354 6355 //go:linkname sync_atomic_runtime_procUnpin sync/atomic.runtime_procUnpin 6356 //go:nosplit 6357 func sync_atomic_runtime_procUnpin() { 6358 procUnpin() 6359 } 6360 6361 // Active spinning for sync.Mutex. 6362 // 6363 //go:linkname sync_runtime_canSpin sync.runtime_canSpin 6364 //go:nosplit 6365 func sync_runtime_canSpin(i int) bool { 6366 // sync.Mutex is cooperative, so we are conservative with spinning. 6367 // Spin only few times and only if running on a multicore machine and 6368 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty. 6369 // As opposed to runtime mutex we don't do passive spinning here, 6370 // because there can be work on global runq or on other Ps. 6371 if i >= active_spin || ncpu <= 1 || gomaxprocs <= sched.npidle.Load()+sched.nmspinning.Load()+1 { 6372 return false 6373 } 6374 if p := getg().m.p.ptr(); !runqempty(p) { 6375 return false 6376 } 6377 return true 6378 } 6379 6380 //go:linkname sync_runtime_doSpin sync.runtime_doSpin 6381 //go:nosplit 6382 func sync_runtime_doSpin() { 6383 procyield(active_spin_cnt) 6384 } 6385 6386 var stealOrder randomOrder 6387 6388 // randomOrder/randomEnum are helper types for randomized work stealing. 6389 // They allow to enumerate all Ps in different pseudo-random orders without repetitions. 6390 // The algorithm is based on the fact that if we have X such that X and GOMAXPROCS 6391 // are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration. 6392 type randomOrder struct { 6393 count uint32 6394 coprimes []uint32 6395 } 6396 6397 type randomEnum struct { 6398 i uint32 6399 count uint32 6400 pos uint32 6401 inc uint32 6402 } 6403 6404 func (ord *randomOrder) reset(count uint32) { 6405 ord.count = count 6406 ord.coprimes = ord.coprimes[:0] 6407 for i := uint32(1); i <= count; i++ { 6408 if gcd(i, count) == 1 { 6409 ord.coprimes = append(ord.coprimes, i) 6410 } 6411 } 6412 } 6413 6414 func (ord *randomOrder) start(i uint32) randomEnum { 6415 return randomEnum{ 6416 count: ord.count, 6417 pos: i % ord.count, 6418 inc: ord.coprimes[i/ord.count%uint32(len(ord.coprimes))], 6419 } 6420 } 6421 6422 func (enum *randomEnum) done() bool { 6423 return enum.i == enum.count 6424 } 6425 6426 func (enum *randomEnum) next() { 6427 enum.i++ 6428 enum.pos = (enum.pos + enum.inc) % enum.count 6429 } 6430 6431 func (enum *randomEnum) position() uint32 { 6432 return enum.pos 6433 } 6434 6435 func gcd(a, b uint32) uint32 { 6436 for b != 0 { 6437 a, b = b, a%b 6438 } 6439 return a 6440 } 6441 6442 // An initTask represents the set of initializations that need to be done for a package. 6443 // Keep in sync with ../../test/initempty.go:initTask 6444 type initTask struct { 6445 // TODO: pack the first 3 fields more tightly? 6446 state uintptr // 0 = uninitialized, 1 = in progress, 2 = done 6447 ndeps uintptr 6448 nfns uintptr 6449 // followed by ndeps instances of an *initTask, one per package depended on 6450 // followed by nfns pcs, one per init function to run 6451 } 6452 6453 // inittrace stores statistics for init functions which are 6454 // updated by malloc and newproc when active is true. 6455 var inittrace tracestat 6456 6457 type tracestat struct { 6458 active bool // init tracing activation status 6459 id uint64 // init goroutine id 6460 allocs uint64 // heap allocations 6461 bytes uint64 // heap allocated bytes 6462 } 6463 6464 func doInit(t *initTask) { 6465 switch t.state { 6466 case 2: // fully initialized 6467 return 6468 case 1: // initialization in progress 6469 throw("recursive call during initialization - linker skew") 6470 default: // not initialized yet 6471 t.state = 1 // initialization in progress 6472 6473 for i := uintptr(0); i < t.ndeps; i++ { 6474 p := add(unsafe.Pointer(t), (3+i)*goarch.PtrSize) 6475 t2 := *(**initTask)(p) 6476 doInit(t2) 6477 } 6478 6479 if t.nfns == 0 { 6480 t.state = 2 // initialization done 6481 return 6482 } 6483 6484 var ( 6485 start int64 6486 before tracestat 6487 ) 6488 6489 if inittrace.active { 6490 start = nanotime() 6491 // Load stats non-atomically since tracinit is updated only by this init goroutine. 6492 before = inittrace 6493 } 6494 6495 firstFunc := add(unsafe.Pointer(t), (3+t.ndeps)*goarch.PtrSize) 6496 for i := uintptr(0); i < t.nfns; i++ { 6497 p := add(firstFunc, i*goarch.PtrSize) 6498 f := *(*func())(unsafe.Pointer(&p)) 6499 f() 6500 } 6501 6502 if inittrace.active { 6503 end := nanotime() 6504 // Load stats non-atomically since tracinit is updated only by this init goroutine. 6505 after := inittrace 6506 6507 f := *(*func())(unsafe.Pointer(&firstFunc)) 6508 pkg := funcpkgpath(findfunc(abi.FuncPCABIInternal(f))) 6509 6510 var sbuf [24]byte 6511 print("init ", pkg, " @") 6512 print(string(fmtNSAsMS(sbuf[:], uint64(start-runtimeInitTime))), " ms, ") 6513 print(string(fmtNSAsMS(sbuf[:], uint64(end-start))), " ms clock, ") 6514 print(string(itoa(sbuf[:], after.bytes-before.bytes)), " bytes, ") 6515 print(string(itoa(sbuf[:], after.allocs-before.allocs)), " allocs") 6516 print("\n") 6517 } 6518 6519 t.state = 2 // initialization done 6520 } 6521 }