github.com/stingnevermore/go@v0.0.0-20180120041312-3810f5bfed72/src/runtime/mgc.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector (GC). 6 // 7 // The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple 8 // GC thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is 9 // non-generational and non-compacting. Allocation is done using size segregated per P allocation 10 // areas to minimize fragmentation while eliminating locks in the common case. 11 // 12 // The algorithm decomposes into several steps. 13 // This is a high level description of the algorithm being used. For an overview of GC a good 14 // place to start is Richard Jones' gchandbook.org. 15 // 16 // The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see 17 // Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. 18 // On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 19 // 966-975. 20 // For journal quality proofs that these steps are complete, correct, and terminate see 21 // Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. 22 // Concurrency and Computation: Practice and Experience 15(3-5), 2003. 23 // 24 // 1. GC performs sweep termination. 25 // 26 // a. Stop the world. This causes all Ps to reach a GC safe-point. 27 // 28 // b. Sweep any unswept spans. There will only be unswept spans if 29 // this GC cycle was forced before the expected time. 30 // 31 // 2. GC performs the "mark 1" sub-phase. In this sub-phase, Ps are 32 // allowed to locally cache parts of the work queue. 33 // 34 // a. Prepare for the mark phase by setting gcphase to _GCmark 35 // (from _GCoff), enabling the write barrier, enabling mutator 36 // assists, and enqueueing root mark jobs. No objects may be 37 // scanned until all Ps have enabled the write barrier, which is 38 // accomplished using STW. 39 // 40 // b. Start the world. From this point, GC work is done by mark 41 // workers started by the scheduler and by assists performed as 42 // part of allocation. The write barrier shades both the 43 // overwritten pointer and the new pointer value for any pointer 44 // writes (see mbarrier.go for details). Newly allocated objects 45 // are immediately marked black. 46 // 47 // c. GC performs root marking jobs. This includes scanning all 48 // stacks, shading all globals, and shading any heap pointers in 49 // off-heap runtime data structures. Scanning a stack stops a 50 // goroutine, shades any pointers found on its stack, and then 51 // resumes the goroutine. 52 // 53 // d. GC drains the work queue of grey objects, scanning each grey 54 // object to black and shading all pointers found in the object 55 // (which in turn may add those pointers to the work queue). 56 // 57 // 3. Once the global work queue is empty (but local work queue caches 58 // may still contain work), GC performs the "mark 2" sub-phase. 59 // 60 // a. GC stops all workers, disables local work queue caches, 61 // flushes each P's local work queue cache to the global work queue 62 // cache, and reenables workers. 63 // 64 // b. GC again drains the work queue, as in 2d above. 65 // 66 // 4. Once the work queue is empty, GC performs mark termination. 67 // 68 // a. Stop the world. 69 // 70 // b. Set gcphase to _GCmarktermination, and disable workers and 71 // assists. 72 // 73 // c. Drain any remaining work from the work queue (typically there 74 // will be none). 75 // 76 // d. Perform other housekeeping like flushing mcaches. 77 // 78 // 5. GC performs the sweep phase. 79 // 80 // a. Prepare for the sweep phase by setting gcphase to _GCoff, 81 // setting up sweep state and disabling the write barrier. 82 // 83 // b. Start the world. From this point on, newly allocated objects 84 // are white, and allocating sweeps spans before use if necessary. 85 // 86 // c. GC does concurrent sweeping in the background and in response 87 // to allocation. See description below. 88 // 89 // 6. When sufficient allocation has taken place, replay the sequence 90 // starting with 1 above. See discussion of GC rate below. 91 92 // Concurrent sweep. 93 // 94 // The sweep phase proceeds concurrently with normal program execution. 95 // The heap is swept span-by-span both lazily (when a goroutine needs another span) 96 // and concurrently in a background goroutine (this helps programs that are not CPU bound). 97 // At the end of STW mark termination all spans are marked as "needs sweeping". 98 // 99 // The background sweeper goroutine simply sweeps spans one-by-one. 100 // 101 // To avoid requesting more OS memory while there are unswept spans, when a 102 // goroutine needs another span, it first attempts to reclaim that much memory 103 // by sweeping. When a goroutine needs to allocate a new small-object span, it 104 // sweeps small-object spans for the same object size until it frees at least 105 // one object. When a goroutine needs to allocate large-object span from heap, 106 // it sweeps spans until it frees at least that many pages into heap. There is 107 // one case where this may not suffice: if a goroutine sweeps and frees two 108 // nonadjacent one-page spans to the heap, it will allocate a new two-page 109 // span, but there can still be other one-page unswept spans which could be 110 // combined into a two-page span. 111 // 112 // It's critical to ensure that no operations proceed on unswept spans (that would corrupt 113 // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, 114 // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. 115 // When a goroutine explicitly frees an object or sets a finalizer, it ensures that 116 // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish). 117 // The finalizer goroutine is kicked off only when all spans are swept. 118 // When the next GC starts, it sweeps all not-yet-swept spans (if any). 119 120 // GC rate. 121 // Next GC is after we've allocated an extra amount of memory proportional to 122 // the amount already in use. The proportion is controlled by GOGC environment variable 123 // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M 124 // (this mark is tracked in next_gc variable). This keeps the GC cost in linear 125 // proportion to the allocation cost. Adjusting GOGC just changes the linear constant 126 // (and also the amount of extra memory used). 127 128 // Oblets 129 // 130 // In order to prevent long pauses while scanning large objects and to 131 // improve parallelism, the garbage collector breaks up scan jobs for 132 // objects larger than maxObletBytes into "oblets" of at most 133 // maxObletBytes. When scanning encounters the beginning of a large 134 // object, it scans only the first oblet and enqueues the remaining 135 // oblets as new scan jobs. 136 137 package runtime 138 139 import ( 140 "runtime/internal/atomic" 141 "runtime/internal/sys" 142 "unsafe" 143 ) 144 145 const ( 146 _DebugGC = 0 147 _ConcurrentSweep = true 148 _FinBlockSize = 4 * 1024 149 150 // sweepMinHeapDistance is a lower bound on the heap distance 151 // (in bytes) reserved for concurrent sweeping between GC 152 // cycles. This will be scaled by gcpercent/100. 153 sweepMinHeapDistance = 1024 * 1024 154 ) 155 156 // heapminimum is the minimum heap size at which to trigger GC. 157 // For small heaps, this overrides the usual GOGC*live set rule. 158 // 159 // When there is a very small live set but a lot of allocation, simply 160 // collecting when the heap reaches GOGC*live results in many GC 161 // cycles and high total per-GC overhead. This minimum amortizes this 162 // per-GC overhead while keeping the heap reasonably small. 163 // 164 // During initialization this is set to 4MB*GOGC/100. In the case of 165 // GOGC==0, this will set heapminimum to 0, resulting in constant 166 // collection even when the heap size is small, which is useful for 167 // debugging. 168 var heapminimum uint64 = defaultHeapMinimum 169 170 // defaultHeapMinimum is the value of heapminimum for GOGC==100. 171 const defaultHeapMinimum = 4 << 20 172 173 // Initialized from $GOGC. GOGC=off means no GC. 174 var gcpercent int32 175 176 func gcinit() { 177 if unsafe.Sizeof(workbuf{}) != _WorkbufSize { 178 throw("size of Workbuf is suboptimal") 179 } 180 181 // No sweep on the first cycle. 182 mheap_.sweepdone = 1 183 184 // Set a reasonable initial GC trigger. 185 memstats.triggerRatio = 7 / 8.0 186 187 // Fake a heap_marked value so it looks like a trigger at 188 // heapminimum is the appropriate growth from heap_marked. 189 // This will go into computing the initial GC goal. 190 memstats.heap_marked = uint64(float64(heapminimum) / (1 + memstats.triggerRatio)) 191 192 // Set gcpercent from the environment. This will also compute 193 // and set the GC trigger and goal. 194 _ = setGCPercent(readgogc()) 195 196 work.startSema = 1 197 work.markDoneSema = 1 198 } 199 200 func readgogc() int32 { 201 p := gogetenv("GOGC") 202 if p == "off" { 203 return -1 204 } 205 if n, ok := atoi32(p); ok { 206 return n 207 } 208 return 100 209 } 210 211 // gcenable is called after the bulk of the runtime initialization, 212 // just before we're about to start letting user code run. 213 // It kicks off the background sweeper goroutine and enables GC. 214 func gcenable() { 215 c := make(chan int, 1) 216 go bgsweep(c) 217 <-c 218 memstats.enablegc = true // now that runtime is initialized, GC is okay 219 } 220 221 //go:linkname setGCPercent runtime/debug.setGCPercent 222 func setGCPercent(in int32) (out int32) { 223 lock(&mheap_.lock) 224 out = gcpercent 225 if in < 0 { 226 in = -1 227 } 228 gcpercent = in 229 heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 230 // Update pacing in response to gcpercent change. 231 gcSetTriggerRatio(memstats.triggerRatio) 232 unlock(&mheap_.lock) 233 234 // If we just disabled GC, wait for any concurrent GC to 235 // finish so we always return with no GC running. 236 if in < 0 { 237 // Disable phase transitions. 238 lock(&work.sweepWaiters.lock) 239 if gcphase == _GCmark { 240 // GC is active. Wait until we reach sweeping. 241 gp := getg() 242 gp.schedlink = work.sweepWaiters.head 243 work.sweepWaiters.head.set(gp) 244 goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1) 245 } else { 246 // GC isn't active. 247 unlock(&work.sweepWaiters.lock) 248 } 249 } 250 251 return out 252 } 253 254 // Garbage collector phase. 255 // Indicates to write barrier and synchronization task to perform. 256 var gcphase uint32 257 258 // The compiler knows about this variable. 259 // If you change it, you must change builtin/runtime.go, too. 260 // If you change the first four bytes, you must also change the write 261 // barrier insertion code. 262 var writeBarrier struct { 263 enabled bool // compiler emits a check of this before calling write barrier 264 pad [3]byte // compiler uses 32-bit load for "enabled" field 265 needed bool // whether we need a write barrier for current GC phase 266 cgo bool // whether we need a write barrier for a cgo check 267 alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load 268 } 269 270 // gcBlackenEnabled is 1 if mutator assists and background mark 271 // workers are allowed to blacken objects. This must only be set when 272 // gcphase == _GCmark. 273 var gcBlackenEnabled uint32 274 275 // gcBlackenPromptly indicates that optimizations that may 276 // hide work from the global work queue should be disabled. 277 // 278 // If gcBlackenPromptly is true, per-P gcWork caches should 279 // be flushed immediately and new objects should be allocated black. 280 // 281 // There is a tension between allocating objects white and 282 // allocating them black. If white and the objects die before being 283 // marked they can be collected during this GC cycle. On the other 284 // hand allocating them black will reduce _GCmarktermination latency 285 // since more work is done in the mark phase. This tension is resolved 286 // by allocating white until the mark phase is approaching its end and 287 // then allocating black for the remainder of the mark phase. 288 var gcBlackenPromptly bool 289 290 const ( 291 _GCoff = iota // GC not running; sweeping in background, write barrier disabled 292 _GCmark // GC marking roots and workbufs: allocate black, write barrier ENABLED 293 _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED 294 ) 295 296 //go:nosplit 297 func setGCPhase(x uint32) { 298 atomic.Store(&gcphase, x) 299 writeBarrier.needed = gcphase == _GCmark || gcphase == _GCmarktermination 300 writeBarrier.enabled = writeBarrier.needed || writeBarrier.cgo 301 } 302 303 // gcMarkWorkerMode represents the mode that a concurrent mark worker 304 // should operate in. 305 // 306 // Concurrent marking happens through four different mechanisms. One 307 // is mutator assists, which happen in response to allocations and are 308 // not scheduled. The other three are variations in the per-P mark 309 // workers and are distinguished by gcMarkWorkerMode. 310 type gcMarkWorkerMode int 311 312 const ( 313 // gcMarkWorkerDedicatedMode indicates that the P of a mark 314 // worker is dedicated to running that mark worker. The mark 315 // worker should run without preemption. 316 gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota 317 318 // gcMarkWorkerFractionalMode indicates that a P is currently 319 // running the "fractional" mark worker. The fractional worker 320 // is necessary when GOMAXPROCS*gcBackgroundUtilization is not 321 // an integer. The fractional worker should run until it is 322 // preempted and will be scheduled to pick up the fractional 323 // part of GOMAXPROCS*gcBackgroundUtilization. 324 gcMarkWorkerFractionalMode 325 326 // gcMarkWorkerIdleMode indicates that a P is running the mark 327 // worker because it has nothing else to do. The idle worker 328 // should run until it is preempted and account its time 329 // against gcController.idleMarkTime. 330 gcMarkWorkerIdleMode 331 ) 332 333 // gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes 334 // to use in execution traces. 335 var gcMarkWorkerModeStrings = [...]string{ 336 "GC (dedicated)", 337 "GC (fractional)", 338 "GC (idle)", 339 } 340 341 // gcController implements the GC pacing controller that determines 342 // when to trigger concurrent garbage collection and how much marking 343 // work to do in mutator assists and background marking. 344 // 345 // It uses a feedback control algorithm to adjust the memstats.gc_trigger 346 // trigger based on the heap growth and GC CPU utilization each cycle. 347 // This algorithm optimizes for heap growth to match GOGC and for CPU 348 // utilization between assist and background marking to be 25% of 349 // GOMAXPROCS. The high-level design of this algorithm is documented 350 // at https://golang.org/s/go15gcpacing. 351 // 352 // All fields of gcController are used only during a single mark 353 // cycle. 354 var gcController gcControllerState 355 356 type gcControllerState struct { 357 // scanWork is the total scan work performed this cycle. This 358 // is updated atomically during the cycle. Updates occur in 359 // bounded batches, since it is both written and read 360 // throughout the cycle. At the end of the cycle, this is how 361 // much of the retained heap is scannable. 362 // 363 // Currently this is the bytes of heap scanned. For most uses, 364 // this is an opaque unit of work, but for estimation the 365 // definition is important. 366 scanWork int64 367 368 // bgScanCredit is the scan work credit accumulated by the 369 // concurrent background scan. This credit is accumulated by 370 // the background scan and stolen by mutator assists. This is 371 // updated atomically. Updates occur in bounded batches, since 372 // it is both written and read throughout the cycle. 373 bgScanCredit int64 374 375 // assistTime is the nanoseconds spent in mutator assists 376 // during this cycle. This is updated atomically. Updates 377 // occur in bounded batches, since it is both written and read 378 // throughout the cycle. 379 assistTime int64 380 381 // dedicatedMarkTime is the nanoseconds spent in dedicated 382 // mark workers during this cycle. This is updated atomically 383 // at the end of the concurrent mark phase. 384 dedicatedMarkTime int64 385 386 // fractionalMarkTime is the nanoseconds spent in the 387 // fractional mark worker during this cycle. This is updated 388 // atomically throughout the cycle and will be up-to-date if 389 // the fractional mark worker is not currently running. 390 fractionalMarkTime int64 391 392 // idleMarkTime is the nanoseconds spent in idle marking 393 // during this cycle. This is updated atomically throughout 394 // the cycle. 395 idleMarkTime int64 396 397 // markStartTime is the absolute start time in nanoseconds 398 // that assists and background mark workers started. 399 markStartTime int64 400 401 // dedicatedMarkWorkersNeeded is the number of dedicated mark 402 // workers that need to be started. This is computed at the 403 // beginning of each cycle and decremented atomically as 404 // dedicated mark workers get started. 405 dedicatedMarkWorkersNeeded int64 406 407 // assistWorkPerByte is the ratio of scan work to allocated 408 // bytes that should be performed by mutator assists. This is 409 // computed at the beginning of each cycle and updated every 410 // time heap_scan is updated. 411 assistWorkPerByte float64 412 413 // assistBytesPerWork is 1/assistWorkPerByte. 414 assistBytesPerWork float64 415 416 // fractionalUtilizationGoal is the fraction of wall clock 417 // time that should be spent in the fractional mark worker on 418 // each P that isn't running a dedicated worker. 419 // 420 // For example, if the utilization goal is 25% and there are 421 // no dedicated workers, this will be 0.25. If there goal is 422 // 25%, there is one dedicated worker, and GOMAXPROCS is 5, 423 // this will be 0.05 to make up the missing 5%. 424 // 425 // If this is zero, no fractional workers are needed. 426 fractionalUtilizationGoal float64 427 428 _ [sys.CacheLineSize]byte 429 } 430 431 // startCycle resets the GC controller's state and computes estimates 432 // for a new GC cycle. The caller must hold worldsema. 433 func (c *gcControllerState) startCycle() { 434 c.scanWork = 0 435 c.bgScanCredit = 0 436 c.assistTime = 0 437 c.dedicatedMarkTime = 0 438 c.fractionalMarkTime = 0 439 c.idleMarkTime = 0 440 441 // If this is the first GC cycle or we're operating on a very 442 // small heap, fake heap_marked so it looks like gc_trigger is 443 // the appropriate growth from heap_marked, even though the 444 // real heap_marked may not have a meaningful value (on the 445 // first cycle) or may be much smaller (resulting in a large 446 // error response). 447 if memstats.gc_trigger <= heapminimum { 448 memstats.heap_marked = uint64(float64(memstats.gc_trigger) / (1 + memstats.triggerRatio)) 449 } 450 451 // Re-compute the heap goal for this cycle in case something 452 // changed. This is the same calculation we use elsewhere. 453 memstats.next_gc = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 454 if gcpercent < 0 { 455 memstats.next_gc = ^uint64(0) 456 } 457 458 // Ensure that the heap goal is at least a little larger than 459 // the current live heap size. This may not be the case if GC 460 // start is delayed or if the allocation that pushed heap_live 461 // over gc_trigger is large or if the trigger is really close to 462 // GOGC. Assist is proportional to this distance, so enforce a 463 // minimum distance, even if it means going over the GOGC goal 464 // by a tiny bit. 465 if memstats.next_gc < memstats.heap_live+1024*1024 { 466 memstats.next_gc = memstats.heap_live + 1024*1024 467 } 468 469 // Compute the background mark utilization goal. In general, 470 // this may not come out exactly. We round the number of 471 // dedicated workers so that the utilization is closest to 472 // 25%. For small GOMAXPROCS, this would introduce too much 473 // error, so we add fractional workers in that case. 474 totalUtilizationGoal := float64(gomaxprocs) * gcBackgroundUtilization 475 c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal + 0.5) 476 utilError := float64(c.dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1 477 const maxUtilError = 0.3 478 if utilError < -maxUtilError || utilError > maxUtilError { 479 // Rounding put us more than 30% off our goal. With 480 // gcBackgroundUtilization of 25%, this happens for 481 // GOMAXPROCS<=3 or GOMAXPROCS=6. Enable fractional 482 // workers to compensate. 483 if float64(c.dedicatedMarkWorkersNeeded) > totalUtilizationGoal { 484 // Too many dedicated workers. 485 c.dedicatedMarkWorkersNeeded-- 486 } 487 c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(gomaxprocs) 488 } else { 489 c.fractionalUtilizationGoal = 0 490 } 491 492 // Clear per-P state 493 for _, p := range allp { 494 p.gcAssistTime = 0 495 p.gcFractionalMarkTime = 0 496 } 497 498 // Compute initial values for controls that are updated 499 // throughout the cycle. 500 c.revise() 501 502 if debug.gcpacertrace > 0 { 503 print("pacer: assist ratio=", c.assistWorkPerByte, 504 " (scan ", memstats.heap_scan>>20, " MB in ", 505 work.initialHeapLive>>20, "->", 506 memstats.next_gc>>20, " MB)", 507 " workers=", c.dedicatedMarkWorkersNeeded, 508 "+", c.fractionalUtilizationGoal, "\n") 509 } 510 } 511 512 // revise updates the assist ratio during the GC cycle to account for 513 // improved estimates. This should be called either under STW or 514 // whenever memstats.heap_scan, memstats.heap_live, or 515 // memstats.next_gc is updated (with mheap_.lock held). 516 // 517 // It should only be called when gcBlackenEnabled != 0 (because this 518 // is when assists are enabled and the necessary statistics are 519 // available). 520 func (c *gcControllerState) revise() { 521 gcpercent := gcpercent 522 if gcpercent < 0 { 523 // If GC is disabled but we're running a forced GC, 524 // act like GOGC is huge for the below calculations. 525 gcpercent = 100000 526 } 527 live := atomic.Load64(&memstats.heap_live) 528 529 var heapGoal, scanWorkExpected int64 530 if live <= memstats.next_gc { 531 // We're under the soft goal. Pace GC to complete at 532 // next_gc assuming the heap is in steady-state. 533 heapGoal = int64(memstats.next_gc) 534 535 // Compute the expected scan work remaining. 536 // 537 // This is estimated based on the expected 538 // steady-state scannable heap. For example, with 539 // GOGC=100, only half of the scannable heap is 540 // expected to be live, so that's what we target. 541 // 542 // (This is a float calculation to avoid overflowing on 543 // 100*heap_scan.) 544 scanWorkExpected = int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent)) 545 } else { 546 // We're past the soft goal. Pace GC so that in the 547 // worst case it will complete by the hard goal. 548 const maxOvershoot = 1.1 549 heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) 550 551 // Compute the upper bound on the scan work remaining. 552 scanWorkExpected = int64(memstats.heap_scan) 553 } 554 555 // Compute the remaining scan work estimate. 556 // 557 // Note that we currently count allocations during GC as both 558 // scannable heap (heap_scan) and scan work completed 559 // (scanWork), so allocation will change this difference will 560 // slowly in the soft regime and not at all in the hard 561 // regime. 562 scanWorkRemaining := scanWorkExpected - c.scanWork 563 if scanWorkRemaining < 1000 { 564 // We set a somewhat arbitrary lower bound on 565 // remaining scan work since if we aim a little high, 566 // we can miss by a little. 567 // 568 // We *do* need to enforce that this is at least 1, 569 // since marking is racy and double-scanning objects 570 // may legitimately make the remaining scan work 571 // negative, even in the hard goal regime. 572 scanWorkRemaining = 1000 573 } 574 575 // Compute the heap distance remaining. 576 heapRemaining := heapGoal - int64(live) 577 if heapRemaining <= 0 { 578 // This shouldn't happen, but if it does, avoid 579 // dividing by zero or setting the assist negative. 580 heapRemaining = 1 581 } 582 583 // Compute the mutator assist ratio so by the time the mutator 584 // allocates the remaining heap bytes up to next_gc, it will 585 // have done (or stolen) the remaining amount of scan work. 586 c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining) 587 c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining) 588 } 589 590 // endCycle computes the trigger ratio for the next cycle. 591 func (c *gcControllerState) endCycle() float64 { 592 if work.userForced { 593 // Forced GC means this cycle didn't start at the 594 // trigger, so where it finished isn't good 595 // information about how to adjust the trigger. 596 // Just leave it where it is. 597 return memstats.triggerRatio 598 } 599 600 // Proportional response gain for the trigger controller. Must 601 // be in [0, 1]. Lower values smooth out transient effects but 602 // take longer to respond to phase changes. Higher values 603 // react to phase changes quickly, but are more affected by 604 // transient changes. Values near 1 may be unstable. 605 const triggerGain = 0.5 606 607 // Compute next cycle trigger ratio. First, this computes the 608 // "error" for this cycle; that is, how far off the trigger 609 // was from what it should have been, accounting for both heap 610 // growth and GC CPU utilization. We compute the actual heap 611 // growth during this cycle and scale that by how far off from 612 // the goal CPU utilization we were (to estimate the heap 613 // growth if we had the desired CPU utilization). The 614 // difference between this estimate and the GOGC-based goal 615 // heap growth is the error. 616 goalGrowthRatio := float64(gcpercent) / 100 617 actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1 618 assistDuration := nanotime() - c.markStartTime 619 620 // Assume background mark hit its utilization goal. 621 utilization := gcBackgroundUtilization 622 // Add assist utilization; avoid divide by zero. 623 if assistDuration > 0 { 624 utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs)) 625 } 626 627 triggerError := goalGrowthRatio - memstats.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-memstats.triggerRatio) 628 629 // Finally, we adjust the trigger for next time by this error, 630 // damped by the proportional gain. 631 triggerRatio := memstats.triggerRatio + triggerGain*triggerError 632 633 if debug.gcpacertrace > 0 { 634 // Print controller state in terms of the design 635 // document. 636 H_m_prev := memstats.heap_marked 637 h_t := memstats.triggerRatio 638 H_T := memstats.gc_trigger 639 h_a := actualGrowthRatio 640 H_a := memstats.heap_live 641 h_g := goalGrowthRatio 642 H_g := int64(float64(H_m_prev) * (1 + h_g)) 643 u_a := utilization 644 u_g := gcGoalUtilization 645 W_a := c.scanWork 646 print("pacer: H_m_prev=", H_m_prev, 647 " h_t=", h_t, " H_T=", H_T, 648 " h_a=", h_a, " H_a=", H_a, 649 " h_g=", h_g, " H_g=", H_g, 650 " u_a=", u_a, " u_g=", u_g, 651 " W_a=", W_a, 652 " goalΔ=", goalGrowthRatio-h_t, 653 " actualΔ=", h_a-h_t, 654 " u_a/u_g=", u_a/u_g, 655 "\n") 656 } 657 658 return triggerRatio 659 } 660 661 // enlistWorker encourages another dedicated mark worker to start on 662 // another P if there are spare worker slots. It is used by putfull 663 // when more work is made available. 664 // 665 //go:nowritebarrier 666 func (c *gcControllerState) enlistWorker() { 667 // If there are idle Ps, wake one so it will run an idle worker. 668 // NOTE: This is suspected of causing deadlocks. See golang.org/issue/19112. 669 // 670 // if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { 671 // wakep() 672 // return 673 // } 674 675 // There are no idle Ps. If we need more dedicated workers, 676 // try to preempt a running P so it will switch to a worker. 677 if c.dedicatedMarkWorkersNeeded <= 0 { 678 return 679 } 680 // Pick a random other P to preempt. 681 if gomaxprocs <= 1 { 682 return 683 } 684 gp := getg() 685 if gp == nil || gp.m == nil || gp.m.p == 0 { 686 return 687 } 688 myID := gp.m.p.ptr().id 689 for tries := 0; tries < 5; tries++ { 690 id := int32(fastrandn(uint32(gomaxprocs - 1))) 691 if id >= myID { 692 id++ 693 } 694 p := allp[id] 695 if p.status != _Prunning { 696 continue 697 } 698 if preemptone(p) { 699 return 700 } 701 } 702 } 703 704 // findRunnableGCWorker returns the background mark worker for _p_ if it 705 // should be run. This must only be called when gcBlackenEnabled != 0. 706 func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { 707 if gcBlackenEnabled == 0 { 708 throw("gcControllerState.findRunnable: blackening not enabled") 709 } 710 if _p_.gcBgMarkWorker == 0 { 711 // The mark worker associated with this P is blocked 712 // performing a mark transition. We can't run it 713 // because it may be on some other run or wait queue. 714 return nil 715 } 716 717 if !gcMarkWorkAvailable(_p_) { 718 // No work to be done right now. This can happen at 719 // the end of the mark phase when there are still 720 // assists tapering off. Don't bother running a worker 721 // now because it'll just return immediately. 722 return nil 723 } 724 725 decIfPositive := func(ptr *int64) bool { 726 if *ptr > 0 { 727 if atomic.Xaddint64(ptr, -1) >= 0 { 728 return true 729 } 730 // We lost a race 731 atomic.Xaddint64(ptr, +1) 732 } 733 return false 734 } 735 736 if decIfPositive(&c.dedicatedMarkWorkersNeeded) { 737 // This P is now dedicated to marking until the end of 738 // the concurrent mark phase. 739 _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode 740 } else if c.fractionalUtilizationGoal == 0 { 741 // No need for fractional workers. 742 return nil 743 } else { 744 // Is this P behind on the fractional utilization 745 // goal? 746 // 747 // This should be kept in sync with pollFractionalWorkerExit. 748 delta := nanotime() - gcController.markStartTime 749 if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { 750 // Nope. No need to run a fractional worker. 751 return nil 752 } 753 // Run a fractional worker. 754 _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode 755 } 756 757 // Run the background mark worker 758 gp := _p_.gcBgMarkWorker.ptr() 759 casgstatus(gp, _Gwaiting, _Grunnable) 760 if trace.enabled { 761 traceGoUnpark(gp, 0) 762 } 763 return gp 764 } 765 766 // pollFractionalWorkerExit returns true if a fractional mark worker 767 // should self-preempt. It assumes it is called from the fractional 768 // worker. 769 func pollFractionalWorkerExit() bool { 770 // This should be kept in sync with the fractional worker 771 // scheduler logic in findRunnableGCWorker. 772 now := nanotime() 773 delta := now - gcController.markStartTime 774 if delta <= 0 { 775 return true 776 } 777 p := getg().m.p.ptr() 778 selfTime := p.gcFractionalMarkTime + (now - p.gcMarkWorkerStartTime) 779 // Add some slack to the utilization goal so that the 780 // fractional worker isn't behind again the instant it exits. 781 return float64(selfTime)/float64(delta) > 1.2*gcController.fractionalUtilizationGoal 782 } 783 784 // gcSetTriggerRatio sets the trigger ratio and updates everything 785 // derived from it: the absolute trigger, the heap goal, mark pacing, 786 // and sweep pacing. 787 // 788 // This can be called any time. If GC is the in the middle of a 789 // concurrent phase, it will adjust the pacing of that phase. 790 // 791 // This depends on gcpercent, memstats.heap_marked, and 792 // memstats.heap_live. These must be up to date. 793 // 794 // mheap_.lock must be held or the world must be stopped. 795 func gcSetTriggerRatio(triggerRatio float64) { 796 // Set the trigger ratio, capped to reasonable bounds. 797 if triggerRatio < 0 { 798 // This can happen if the mutator is allocating very 799 // quickly or the GC is scanning very slowly. 800 triggerRatio = 0 801 } else if gcpercent >= 0 { 802 // Ensure there's always a little margin so that the 803 // mutator assist ratio isn't infinity. 804 maxTriggerRatio := 0.95 * float64(gcpercent) / 100 805 if triggerRatio > maxTriggerRatio { 806 triggerRatio = maxTriggerRatio 807 } 808 } 809 memstats.triggerRatio = triggerRatio 810 811 // Compute the absolute GC trigger from the trigger ratio. 812 // 813 // We trigger the next GC cycle when the allocated heap has 814 // grown by the trigger ratio over the marked heap size. 815 trigger := ^uint64(0) 816 if gcpercent >= 0 { 817 trigger = uint64(float64(memstats.heap_marked) * (1 + triggerRatio)) 818 // Don't trigger below the minimum heap size. 819 minTrigger := heapminimum 820 if !gosweepdone() { 821 // Concurrent sweep happens in the heap growth 822 // from heap_live to gc_trigger, so ensure 823 // that concurrent sweep has some heap growth 824 // in which to perform sweeping before we 825 // start the next GC cycle. 826 sweepMin := atomic.Load64(&memstats.heap_live) + sweepMinHeapDistance*uint64(gcpercent)/100 827 if sweepMin > minTrigger { 828 minTrigger = sweepMin 829 } 830 } 831 if trigger < minTrigger { 832 trigger = minTrigger 833 } 834 if int64(trigger) < 0 { 835 print("runtime: next_gc=", memstats.next_gc, " heap_marked=", memstats.heap_marked, " heap_live=", memstats.heap_live, " initialHeapLive=", work.initialHeapLive, "triggerRatio=", triggerRatio, " minTrigger=", minTrigger, "\n") 836 throw("gc_trigger underflow") 837 } 838 } 839 memstats.gc_trigger = trigger 840 841 // Compute the next GC goal, which is when the allocated heap 842 // has grown by GOGC/100 over the heap marked by the last 843 // cycle. 844 goal := ^uint64(0) 845 if gcpercent >= 0 { 846 goal = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 847 if goal < trigger { 848 // The trigger ratio is always less than GOGC/100, but 849 // other bounds on the trigger may have raised it. 850 // Push up the goal, too. 851 goal = trigger 852 } 853 } 854 memstats.next_gc = goal 855 if trace.enabled { 856 traceNextGC() 857 } 858 859 // Update mark pacing. 860 if gcphase != _GCoff { 861 gcController.revise() 862 } 863 864 // Update sweep pacing. 865 if gosweepdone() { 866 mheap_.sweepPagesPerByte = 0 867 } else { 868 // Concurrent sweep needs to sweep all of the in-use 869 // pages by the time the allocated heap reaches the GC 870 // trigger. Compute the ratio of in-use pages to sweep 871 // per byte allocated, accounting for the fact that 872 // some might already be swept. 873 heapLiveBasis := atomic.Load64(&memstats.heap_live) 874 heapDistance := int64(trigger) - int64(heapLiveBasis) 875 // Add a little margin so rounding errors and 876 // concurrent sweep are less likely to leave pages 877 // unswept when GC starts. 878 heapDistance -= 1024 * 1024 879 if heapDistance < _PageSize { 880 // Avoid setting the sweep ratio extremely high 881 heapDistance = _PageSize 882 } 883 pagesSwept := atomic.Load64(&mheap_.pagesSwept) 884 sweepDistancePages := int64(mheap_.pagesInUse) - int64(pagesSwept) 885 if sweepDistancePages <= 0 { 886 mheap_.sweepPagesPerByte = 0 887 } else { 888 mheap_.sweepPagesPerByte = float64(sweepDistancePages) / float64(heapDistance) 889 mheap_.sweepHeapLiveBasis = heapLiveBasis 890 // Write pagesSweptBasis last, since this 891 // signals concurrent sweeps to recompute 892 // their debt. 893 atomic.Store64(&mheap_.pagesSweptBasis, pagesSwept) 894 } 895 } 896 } 897 898 // gcGoalUtilization is the goal CPU utilization for 899 // marking as a fraction of GOMAXPROCS. 900 const gcGoalUtilization = 0.30 901 902 // gcBackgroundUtilization is the fixed CPU utilization for background 903 // marking. It must be <= gcGoalUtilization. The difference between 904 // gcGoalUtilization and gcBackgroundUtilization will be made up by 905 // mark assists. The scheduler will aim to use within 50% of this 906 // goal. 907 // 908 // Setting this to < gcGoalUtilization avoids saturating the trigger 909 // feedback controller when there are no assists, which allows it to 910 // better control CPU and heap growth. However, the larger the gap, 911 // the more mutator assists are expected to happen, which impact 912 // mutator latency. 913 const gcBackgroundUtilization = 0.25 914 915 // gcCreditSlack is the amount of scan work credit that can can 916 // accumulate locally before updating gcController.scanWork and, 917 // optionally, gcController.bgScanCredit. Lower values give a more 918 // accurate assist ratio and make it more likely that assists will 919 // successfully steal background credit. Higher values reduce memory 920 // contention. 921 const gcCreditSlack = 2000 922 923 // gcAssistTimeSlack is the nanoseconds of mutator assist time that 924 // can accumulate on a P before updating gcController.assistTime. 925 const gcAssistTimeSlack = 5000 926 927 // gcOverAssistWork determines how many extra units of scan work a GC 928 // assist does when an assist happens. This amortizes the cost of an 929 // assist by pre-paying for this many bytes of future allocations. 930 const gcOverAssistWork = 64 << 10 931 932 var work struct { 933 full lfstack // lock-free list of full blocks workbuf 934 empty lfstack // lock-free list of empty blocks workbuf 935 pad0 [sys.CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait 936 937 wbufSpans struct { 938 lock mutex 939 // free is a list of spans dedicated to workbufs, but 940 // that don't currently contain any workbufs. 941 free mSpanList 942 // busy is a list of all spans containing workbufs on 943 // one of the workbuf lists. 944 busy mSpanList 945 } 946 947 // Restore 64-bit alignment on 32-bit. 948 _ uint32 949 950 // bytesMarked is the number of bytes marked this cycle. This 951 // includes bytes blackened in scanned objects, noscan objects 952 // that go straight to black, and permagrey objects scanned by 953 // markroot during the concurrent scan phase. This is updated 954 // atomically during the cycle. Updates may be batched 955 // arbitrarily, since the value is only read at the end of the 956 // cycle. 957 // 958 // Because of benign races during marking, this number may not 959 // be the exact number of marked bytes, but it should be very 960 // close. 961 // 962 // Put this field here because it needs 64-bit atomic access 963 // (and thus 8-byte alignment even on 32-bit architectures). 964 bytesMarked uint64 965 966 markrootNext uint32 // next markroot job 967 markrootJobs uint32 // number of markroot jobs 968 969 nproc uint32 970 tstart int64 971 nwait uint32 972 ndone uint32 973 alldone note 974 975 // helperDrainBlock indicates that GC mark termination helpers 976 // should pass gcDrainBlock to gcDrain to block in the 977 // getfull() barrier. Otherwise, they should pass gcDrainNoBlock. 978 // 979 // TODO: This is a temporary fallback to work around races 980 // that cause early mark termination. 981 helperDrainBlock bool 982 983 // Number of roots of various root types. Set by gcMarkRootPrepare. 984 nFlushCacheRoots int 985 nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int 986 987 // markrootDone indicates that roots have been marked at least 988 // once during the current GC cycle. This is checked by root 989 // marking operations that have to happen only during the 990 // first root marking pass, whether that's during the 991 // concurrent mark phase in current GC or mark termination in 992 // STW GC. 993 markrootDone bool 994 995 // Each type of GC state transition is protected by a lock. 996 // Since multiple threads can simultaneously detect the state 997 // transition condition, any thread that detects a transition 998 // condition must acquire the appropriate transition lock, 999 // re-check the transition condition and return if it no 1000 // longer holds or perform the transition if it does. 1001 // Likewise, any transition must invalidate the transition 1002 // condition before releasing the lock. This ensures that each 1003 // transition is performed by exactly one thread and threads 1004 // that need the transition to happen block until it has 1005 // happened. 1006 // 1007 // startSema protects the transition from "off" to mark or 1008 // mark termination. 1009 startSema uint32 1010 // markDoneSema protects transitions from mark 1 to mark 2 and 1011 // from mark 2 to mark termination. 1012 markDoneSema uint32 1013 1014 bgMarkReady note // signal background mark worker has started 1015 bgMarkDone uint32 // cas to 1 when at a background mark completion point 1016 // Background mark completion signaling 1017 1018 // mode is the concurrency mode of the current GC cycle. 1019 mode gcMode 1020 1021 // userForced indicates the current GC cycle was forced by an 1022 // explicit user call. 1023 userForced bool 1024 1025 // totaltime is the CPU nanoseconds spent in GC since the 1026 // program started if debug.gctrace > 0. 1027 totaltime int64 1028 1029 // initialHeapLive is the value of memstats.heap_live at the 1030 // beginning of this GC cycle. 1031 initialHeapLive uint64 1032 1033 // assistQueue is a queue of assists that are blocked because 1034 // there was neither enough credit to steal or enough work to 1035 // do. 1036 assistQueue struct { 1037 lock mutex 1038 head, tail guintptr 1039 } 1040 1041 // sweepWaiters is a list of blocked goroutines to wake when 1042 // we transition from mark termination to sweep. 1043 sweepWaiters struct { 1044 lock mutex 1045 head guintptr 1046 } 1047 1048 // cycles is the number of completed GC cycles, where a GC 1049 // cycle is sweep termination, mark, mark termination, and 1050 // sweep. This differs from memstats.numgc, which is 1051 // incremented at mark termination. 1052 cycles uint32 1053 1054 // Timing/utilization stats for this cycle. 1055 stwprocs, maxprocs int32 1056 tSweepTerm, tMark, tMarkTerm, tEnd int64 // nanotime() of phase start 1057 1058 pauseNS int64 // total STW time this cycle 1059 pauseStart int64 // nanotime() of last STW 1060 1061 // debug.gctrace heap sizes for this cycle. 1062 heap0, heap1, heap2, heapGoal uint64 1063 } 1064 1065 // GC runs a garbage collection and blocks the caller until the 1066 // garbage collection is complete. It may also block the entire 1067 // program. 1068 func GC() { 1069 // We consider a cycle to be: sweep termination, mark, mark 1070 // termination, and sweep. This function shouldn't return 1071 // until a full cycle has been completed, from beginning to 1072 // end. Hence, we always want to finish up the current cycle 1073 // and start a new one. That means: 1074 // 1075 // 1. In sweep termination, mark, or mark termination of cycle 1076 // N, wait until mark termination N completes and transitions 1077 // to sweep N. 1078 // 1079 // 2. In sweep N, help with sweep N. 1080 // 1081 // At this point we can begin a full cycle N+1. 1082 // 1083 // 3. Trigger cycle N+1 by starting sweep termination N+1. 1084 // 1085 // 4. Wait for mark termination N+1 to complete. 1086 // 1087 // 5. Help with sweep N+1 until it's done. 1088 // 1089 // This all has to be written to deal with the fact that the 1090 // GC may move ahead on its own. For example, when we block 1091 // until mark termination N, we may wake up in cycle N+2. 1092 1093 gp := getg() 1094 1095 // Prevent the GC phase or cycle count from changing. 1096 lock(&work.sweepWaiters.lock) 1097 n := atomic.Load(&work.cycles) 1098 if gcphase == _GCmark { 1099 // Wait until sweep termination, mark, and mark 1100 // termination of cycle N complete. 1101 gp.schedlink = work.sweepWaiters.head 1102 work.sweepWaiters.head.set(gp) 1103 goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1) 1104 } else { 1105 // We're in sweep N already. 1106 unlock(&work.sweepWaiters.lock) 1107 } 1108 1109 // We're now in sweep N or later. Trigger GC cycle N+1, which 1110 // will first finish sweep N if necessary and then enter sweep 1111 // termination N+1. 1112 gcStart(gcBackgroundMode, gcTrigger{kind: gcTriggerCycle, n: n + 1}) 1113 1114 // Wait for mark termination N+1 to complete. 1115 lock(&work.sweepWaiters.lock) 1116 if gcphase == _GCmark && atomic.Load(&work.cycles) == n+1 { 1117 gp.schedlink = work.sweepWaiters.head 1118 work.sweepWaiters.head.set(gp) 1119 goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1) 1120 } else { 1121 unlock(&work.sweepWaiters.lock) 1122 } 1123 1124 // Finish sweep N+1 before returning. We do this both to 1125 // complete the cycle and because runtime.GC() is often used 1126 // as part of tests and benchmarks to get the system into a 1127 // relatively stable and isolated state. 1128 for atomic.Load(&work.cycles) == n+1 && gosweepone() != ^uintptr(0) { 1129 sweep.nbgsweep++ 1130 Gosched() 1131 } 1132 1133 // Callers may assume that the heap profile reflects the 1134 // just-completed cycle when this returns (historically this 1135 // happened because this was a STW GC), but right now the 1136 // profile still reflects mark termination N, not N+1. 1137 // 1138 // As soon as all of the sweep frees from cycle N+1 are done, 1139 // we can go ahead and publish the heap profile. 1140 // 1141 // First, wait for sweeping to finish. (We know there are no 1142 // more spans on the sweep queue, but we may be concurrently 1143 // sweeping spans, so we have to wait.) 1144 for atomic.Load(&work.cycles) == n+1 && atomic.Load(&mheap_.sweepers) != 0 { 1145 Gosched() 1146 } 1147 1148 // Now we're really done with sweeping, so we can publish the 1149 // stable heap profile. Only do this if we haven't already hit 1150 // another mark termination. 1151 mp := acquirem() 1152 cycle := atomic.Load(&work.cycles) 1153 if cycle == n+1 || (gcphase == _GCmark && cycle == n+2) { 1154 mProf_PostSweep() 1155 } 1156 releasem(mp) 1157 } 1158 1159 // gcMode indicates how concurrent a GC cycle should be. 1160 type gcMode int 1161 1162 const ( 1163 gcBackgroundMode gcMode = iota // concurrent GC and sweep 1164 gcForceMode // stop-the-world GC now, concurrent sweep 1165 gcForceBlockMode // stop-the-world GC now and STW sweep (forced by user) 1166 ) 1167 1168 // A gcTrigger is a predicate for starting a GC cycle. Specifically, 1169 // it is an exit condition for the _GCoff phase. 1170 type gcTrigger struct { 1171 kind gcTriggerKind 1172 now int64 // gcTriggerTime: current time 1173 n uint32 // gcTriggerCycle: cycle number to start 1174 } 1175 1176 type gcTriggerKind int 1177 1178 const ( 1179 // gcTriggerAlways indicates that a cycle should be started 1180 // unconditionally, even if GOGC is off or we're in a cycle 1181 // right now. This cannot be consolidated with other cycles. 1182 gcTriggerAlways gcTriggerKind = iota 1183 1184 // gcTriggerHeap indicates that a cycle should be started when 1185 // the heap size reaches the trigger heap size computed by the 1186 // controller. 1187 gcTriggerHeap 1188 1189 // gcTriggerTime indicates that a cycle should be started when 1190 // it's been more than forcegcperiod nanoseconds since the 1191 // previous GC cycle. 1192 gcTriggerTime 1193 1194 // gcTriggerCycle indicates that a cycle should be started if 1195 // we have not yet started cycle number gcTrigger.n (relative 1196 // to work.cycles). 1197 gcTriggerCycle 1198 ) 1199 1200 // test returns true if the trigger condition is satisfied, meaning 1201 // that the exit condition for the _GCoff phase has been met. The exit 1202 // condition should be tested when allocating. 1203 func (t gcTrigger) test() bool { 1204 if !memstats.enablegc || panicking != 0 { 1205 return false 1206 } 1207 if t.kind == gcTriggerAlways { 1208 return true 1209 } 1210 if gcphase != _GCoff { 1211 return false 1212 } 1213 switch t.kind { 1214 case gcTriggerHeap: 1215 // Non-atomic access to heap_live for performance. If 1216 // we are going to trigger on this, this thread just 1217 // atomically wrote heap_live anyway and we'll see our 1218 // own write. 1219 return memstats.heap_live >= memstats.gc_trigger 1220 case gcTriggerTime: 1221 if gcpercent < 0 { 1222 return false 1223 } 1224 lastgc := int64(atomic.Load64(&memstats.last_gc_nanotime)) 1225 return lastgc != 0 && t.now-lastgc > forcegcperiod 1226 case gcTriggerCycle: 1227 // t.n > work.cycles, but accounting for wraparound. 1228 return int32(t.n-work.cycles) > 0 1229 } 1230 return true 1231 } 1232 1233 // gcStart transitions the GC from _GCoff to _GCmark (if 1234 // !mode.stwMark) or _GCmarktermination (if mode.stwMark) by 1235 // performing sweep termination and GC initialization. 1236 // 1237 // This may return without performing this transition in some cases, 1238 // such as when called on a system stack or with locks held. 1239 func gcStart(mode gcMode, trigger gcTrigger) { 1240 // Since this is called from malloc and malloc is called in 1241 // the guts of a number of libraries that might be holding 1242 // locks, don't attempt to start GC in non-preemptible or 1243 // potentially unstable situations. 1244 mp := acquirem() 1245 if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" { 1246 releasem(mp) 1247 return 1248 } 1249 releasem(mp) 1250 mp = nil 1251 1252 // Pick up the remaining unswept/not being swept spans concurrently 1253 // 1254 // This shouldn't happen if we're being invoked in background 1255 // mode since proportional sweep should have just finished 1256 // sweeping everything, but rounding errors, etc, may leave a 1257 // few spans unswept. In forced mode, this is necessary since 1258 // GC can be forced at any point in the sweeping cycle. 1259 // 1260 // We check the transition condition continuously here in case 1261 // this G gets delayed in to the next GC cycle. 1262 for trigger.test() && gosweepone() != ^uintptr(0) { 1263 sweep.nbgsweep++ 1264 } 1265 1266 // Perform GC initialization and the sweep termination 1267 // transition. 1268 semacquire(&work.startSema) 1269 // Re-check transition condition under transition lock. 1270 if !trigger.test() { 1271 semrelease(&work.startSema) 1272 return 1273 } 1274 1275 // For stats, check if this GC was forced by the user. 1276 work.userForced = trigger.kind == gcTriggerAlways || trigger.kind == gcTriggerCycle 1277 1278 // In gcstoptheworld debug mode, upgrade the mode accordingly. 1279 // We do this after re-checking the transition condition so 1280 // that multiple goroutines that detect the heap trigger don't 1281 // start multiple STW GCs. 1282 if mode == gcBackgroundMode { 1283 if debug.gcstoptheworld == 1 { 1284 mode = gcForceMode 1285 } else if debug.gcstoptheworld == 2 { 1286 mode = gcForceBlockMode 1287 } 1288 } 1289 1290 // Ok, we're doing it! Stop everybody else 1291 semacquire(&worldsema) 1292 1293 if trace.enabled { 1294 traceGCStart() 1295 } 1296 1297 if mode == gcBackgroundMode { 1298 gcBgMarkStartWorkers() 1299 } 1300 1301 gcResetMarkState() 1302 1303 work.stwprocs, work.maxprocs = gomaxprocs, gomaxprocs 1304 if work.stwprocs > ncpu { 1305 // This is used to compute CPU time of the STW phases, 1306 // so it can't be more than ncpu, even if GOMAXPROCS is. 1307 work.stwprocs = ncpu 1308 } 1309 work.heap0 = atomic.Load64(&memstats.heap_live) 1310 work.pauseNS = 0 1311 work.mode = mode 1312 1313 now := nanotime() 1314 work.tSweepTerm = now 1315 work.pauseStart = now 1316 if trace.enabled { 1317 traceGCSTWStart(1) 1318 } 1319 systemstack(stopTheWorldWithSema) 1320 // Finish sweep before we start concurrent scan. 1321 systemstack(func() { 1322 finishsweep_m() 1323 }) 1324 // clearpools before we start the GC. If we wait they memory will not be 1325 // reclaimed until the next GC cycle. 1326 clearpools() 1327 1328 work.cycles++ 1329 if mode == gcBackgroundMode { // Do as much work concurrently as possible 1330 gcController.startCycle() 1331 work.heapGoal = memstats.next_gc 1332 1333 // Enter concurrent mark phase and enable 1334 // write barriers. 1335 // 1336 // Because the world is stopped, all Ps will 1337 // observe that write barriers are enabled by 1338 // the time we start the world and begin 1339 // scanning. 1340 // 1341 // Write barriers must be enabled before assists are 1342 // enabled because they must be enabled before 1343 // any non-leaf heap objects are marked. Since 1344 // allocations are blocked until assists can 1345 // happen, we want enable assists as early as 1346 // possible. 1347 setGCPhase(_GCmark) 1348 1349 gcBgMarkPrepare() // Must happen before assist enable. 1350 gcMarkRootPrepare() 1351 1352 // Mark all active tinyalloc blocks. Since we're 1353 // allocating from these, they need to be black like 1354 // other allocations. The alternative is to blacken 1355 // the tiny block on every allocation from it, which 1356 // would slow down the tiny allocator. 1357 gcMarkTinyAllocs() 1358 1359 // At this point all Ps have enabled the write 1360 // barrier, thus maintaining the no white to 1361 // black invariant. Enable mutator assists to 1362 // put back-pressure on fast allocating 1363 // mutators. 1364 atomic.Store(&gcBlackenEnabled, 1) 1365 1366 // Assists and workers can start the moment we start 1367 // the world. 1368 gcController.markStartTime = now 1369 1370 // Concurrent mark. 1371 systemstack(func() { 1372 now = startTheWorldWithSema(trace.enabled) 1373 }) 1374 work.pauseNS += now - work.pauseStart 1375 work.tMark = now 1376 } else { 1377 if trace.enabled { 1378 // Switch to mark termination STW. 1379 traceGCSTWDone() 1380 traceGCSTWStart(0) 1381 } 1382 t := nanotime() 1383 work.tMark, work.tMarkTerm = t, t 1384 work.heapGoal = work.heap0 1385 1386 // Perform mark termination. This will restart the world. 1387 gcMarkTermination(memstats.triggerRatio) 1388 } 1389 1390 semrelease(&work.startSema) 1391 } 1392 1393 // gcMarkDone transitions the GC from mark 1 to mark 2 and from mark 2 1394 // to mark termination. 1395 // 1396 // This should be called when all mark work has been drained. In mark 1397 // 1, this includes all root marking jobs, global work buffers, and 1398 // active work buffers in assists and background workers; however, 1399 // work may still be cached in per-P work buffers. In mark 2, per-P 1400 // caches are disabled. 1401 // 1402 // The calling context must be preemptible. 1403 // 1404 // Note that it is explicitly okay to have write barriers in this 1405 // function because completion of concurrent mark is best-effort 1406 // anyway. Any work created by write barriers here will be cleaned up 1407 // by mark termination. 1408 func gcMarkDone() { 1409 top: 1410 semacquire(&work.markDoneSema) 1411 1412 // Re-check transition condition under transition lock. 1413 if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) { 1414 semrelease(&work.markDoneSema) 1415 return 1416 } 1417 1418 // Disallow starting new workers so that any remaining workers 1419 // in the current mark phase will drain out. 1420 // 1421 // TODO(austin): Should dedicated workers keep an eye on this 1422 // and exit gcDrain promptly? 1423 atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff) 1424 prevFractionalGoal := gcController.fractionalUtilizationGoal 1425 gcController.fractionalUtilizationGoal = 0 1426 1427 if !gcBlackenPromptly { 1428 // Transition from mark 1 to mark 2. 1429 // 1430 // The global work list is empty, but there can still be work 1431 // sitting in the per-P work caches. 1432 // Flush and disable work caches. 1433 1434 // Disallow caching workbufs and indicate that we're in mark 2. 1435 gcBlackenPromptly = true 1436 1437 // Prevent completion of mark 2 until we've flushed 1438 // cached workbufs. 1439 atomic.Xadd(&work.nwait, -1) 1440 1441 // GC is set up for mark 2. Let Gs blocked on the 1442 // transition lock go while we flush caches. 1443 semrelease(&work.markDoneSema) 1444 1445 systemstack(func() { 1446 // Flush all currently cached workbufs and 1447 // ensure all Ps see gcBlackenPromptly. This 1448 // also blocks until any remaining mark 1 1449 // workers have exited their loop so we can 1450 // start new mark 2 workers. 1451 forEachP(func(_p_ *p) { 1452 wbBufFlush1(_p_) 1453 _p_.gcw.dispose() 1454 }) 1455 }) 1456 1457 // Check that roots are marked. We should be able to 1458 // do this before the forEachP, but based on issue 1459 // #16083 there may be a (harmless) race where we can 1460 // enter mark 2 while some workers are still scanning 1461 // stacks. The forEachP ensures these scans are done. 1462 // 1463 // TODO(austin): Figure out the race and fix this 1464 // properly. 1465 gcMarkRootCheck() 1466 1467 // Now we can start up mark 2 workers. 1468 atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff) 1469 gcController.fractionalUtilizationGoal = prevFractionalGoal 1470 1471 incnwait := atomic.Xadd(&work.nwait, +1) 1472 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { 1473 // This loop will make progress because 1474 // gcBlackenPromptly is now true, so it won't 1475 // take this same "if" branch. 1476 goto top 1477 } 1478 } else { 1479 // Transition to mark termination. 1480 now := nanotime() 1481 work.tMarkTerm = now 1482 work.pauseStart = now 1483 getg().m.preemptoff = "gcing" 1484 if trace.enabled { 1485 traceGCSTWStart(0) 1486 } 1487 systemstack(stopTheWorldWithSema) 1488 // The gcphase is _GCmark, it will transition to _GCmarktermination 1489 // below. The important thing is that the wb remains active until 1490 // all marking is complete. This includes writes made by the GC. 1491 1492 // Record that one root marking pass has completed. 1493 work.markrootDone = true 1494 1495 // Disable assists and background workers. We must do 1496 // this before waking blocked assists. 1497 atomic.Store(&gcBlackenEnabled, 0) 1498 1499 // Wake all blocked assists. These will run when we 1500 // start the world again. 1501 gcWakeAllAssists() 1502 1503 // Likewise, release the transition lock. Blocked 1504 // workers and assists will run when we start the 1505 // world again. 1506 semrelease(&work.markDoneSema) 1507 1508 // endCycle depends on all gcWork cache stats being 1509 // flushed. This is ensured by mark 2. 1510 nextTriggerRatio := gcController.endCycle() 1511 1512 // Perform mark termination. This will restart the world. 1513 gcMarkTermination(nextTriggerRatio) 1514 } 1515 } 1516 1517 func gcMarkTermination(nextTriggerRatio float64) { 1518 // World is stopped. 1519 // Start marktermination which includes enabling the write barrier. 1520 atomic.Store(&gcBlackenEnabled, 0) 1521 gcBlackenPromptly = false 1522 setGCPhase(_GCmarktermination) 1523 1524 work.heap1 = memstats.heap_live 1525 startTime := nanotime() 1526 1527 mp := acquirem() 1528 mp.preemptoff = "gcing" 1529 _g_ := getg() 1530 _g_.m.traceback = 2 1531 gp := _g_.m.curg 1532 casgstatus(gp, _Grunning, _Gwaiting) 1533 gp.waitreason = "garbage collection" 1534 1535 // Run gc on the g0 stack. We do this so that the g stack 1536 // we're currently running on will no longer change. Cuts 1537 // the root set down a bit (g0 stacks are not scanned, and 1538 // we don't need to scan gc's internal state). We also 1539 // need to switch to g0 so we can shrink the stack. 1540 systemstack(func() { 1541 gcMark(startTime) 1542 // Must return immediately. 1543 // The outer function's stack may have moved 1544 // during gcMark (it shrinks stacks, including the 1545 // outer function's stack), so we must not refer 1546 // to any of its variables. Return back to the 1547 // non-system stack to pick up the new addresses 1548 // before continuing. 1549 }) 1550 1551 systemstack(func() { 1552 work.heap2 = work.bytesMarked 1553 if debug.gccheckmark > 0 { 1554 // Run a full stop-the-world mark using checkmark bits, 1555 // to check that we didn't forget to mark anything during 1556 // the concurrent mark process. 1557 gcResetMarkState() 1558 initCheckmarks() 1559 gcMark(startTime) 1560 clearCheckmarks() 1561 } 1562 1563 // marking is complete so we can turn the write barrier off 1564 setGCPhase(_GCoff) 1565 gcSweep(work.mode) 1566 1567 if debug.gctrace > 1 { 1568 startTime = nanotime() 1569 // The g stacks have been scanned so 1570 // they have gcscanvalid==true and gcworkdone==true. 1571 // Reset these so that all stacks will be rescanned. 1572 gcResetMarkState() 1573 finishsweep_m() 1574 1575 // Still in STW but gcphase is _GCoff, reset to _GCmarktermination 1576 // At this point all objects will be found during the gcMark which 1577 // does a complete STW mark and object scan. 1578 setGCPhase(_GCmarktermination) 1579 gcMark(startTime) 1580 setGCPhase(_GCoff) // marking is done, turn off wb. 1581 gcSweep(work.mode) 1582 } 1583 }) 1584 1585 _g_.m.traceback = 0 1586 casgstatus(gp, _Gwaiting, _Grunning) 1587 1588 if trace.enabled { 1589 traceGCDone() 1590 } 1591 1592 // all done 1593 mp.preemptoff = "" 1594 1595 if gcphase != _GCoff { 1596 throw("gc done but gcphase != _GCoff") 1597 } 1598 1599 // Update GC trigger and pacing for the next cycle. 1600 gcSetTriggerRatio(nextTriggerRatio) 1601 1602 // Update timing memstats 1603 now := nanotime() 1604 sec, nsec, _ := time_now() 1605 unixNow := sec*1e9 + int64(nsec) 1606 work.pauseNS += now - work.pauseStart 1607 work.tEnd = now 1608 atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user 1609 atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us 1610 memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) 1611 memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow) 1612 memstats.pause_total_ns += uint64(work.pauseNS) 1613 1614 // Update work.totaltime. 1615 sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm) 1616 // We report idle marking time below, but omit it from the 1617 // overall utilization here since it's "free". 1618 markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime 1619 markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm) 1620 cycleCpu := sweepTermCpu + markCpu + markTermCpu 1621 work.totaltime += cycleCpu 1622 1623 // Compute overall GC CPU utilization. 1624 totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs) 1625 memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu) 1626 1627 // Reset sweep state. 1628 sweep.nbgsweep = 0 1629 sweep.npausesweep = 0 1630 1631 if work.userForced { 1632 memstats.numforcedgc++ 1633 } 1634 1635 // Bump GC cycle count and wake goroutines waiting on sweep. 1636 lock(&work.sweepWaiters.lock) 1637 memstats.numgc++ 1638 injectglist(work.sweepWaiters.head.ptr()) 1639 work.sweepWaiters.head = 0 1640 unlock(&work.sweepWaiters.lock) 1641 1642 // Finish the current heap profiling cycle and start a new 1643 // heap profiling cycle. We do this before starting the world 1644 // so events don't leak into the wrong cycle. 1645 mProf_NextCycle() 1646 1647 systemstack(func() { startTheWorldWithSema(true) }) 1648 1649 // Flush the heap profile so we can start a new cycle next GC. 1650 // This is relatively expensive, so we don't do it with the 1651 // world stopped. 1652 mProf_Flush() 1653 1654 // Prepare workbufs for freeing by the sweeper. We do this 1655 // asynchronously because it can take non-trivial time. 1656 prepareFreeWorkbufs() 1657 1658 // Free stack spans. This must be done between GC cycles. 1659 systemstack(freeStackSpans) 1660 1661 // Print gctrace before dropping worldsema. As soon as we drop 1662 // worldsema another cycle could start and smash the stats 1663 // we're trying to print. 1664 if debug.gctrace > 0 { 1665 util := int(memstats.gc_cpu_fraction * 100) 1666 1667 var sbuf [24]byte 1668 printlock() 1669 print("gc ", memstats.numgc, 1670 " @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", 1671 util, "%: ") 1672 prev := work.tSweepTerm 1673 for i, ns := range []int64{work.tMark, work.tMarkTerm, work.tEnd} { 1674 if i != 0 { 1675 print("+") 1676 } 1677 print(string(fmtNSAsMS(sbuf[:], uint64(ns-prev)))) 1678 prev = ns 1679 } 1680 print(" ms clock, ") 1681 for i, ns := range []int64{sweepTermCpu, gcController.assistTime, gcController.dedicatedMarkTime + gcController.fractionalMarkTime, gcController.idleMarkTime, markTermCpu} { 1682 if i == 2 || i == 3 { 1683 // Separate mark time components with /. 1684 print("/") 1685 } else if i != 0 { 1686 print("+") 1687 } 1688 print(string(fmtNSAsMS(sbuf[:], uint64(ns)))) 1689 } 1690 print(" ms cpu, ", 1691 work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ", 1692 work.heapGoal>>20, " MB goal, ", 1693 work.maxprocs, " P") 1694 if work.userForced { 1695 print(" (forced)") 1696 } 1697 print("\n") 1698 printunlock() 1699 } 1700 1701 semrelease(&worldsema) 1702 // Careful: another GC cycle may start now. 1703 1704 releasem(mp) 1705 mp = nil 1706 1707 // now that gc is done, kick off finalizer thread if needed 1708 if !concurrentSweep { 1709 // give the queued finalizers, if any, a chance to run 1710 Gosched() 1711 } 1712 } 1713 1714 // gcBgMarkStartWorkers prepares background mark worker goroutines. 1715 // These goroutines will not run until the mark phase, but they must 1716 // be started while the work is not stopped and from a regular G 1717 // stack. The caller must hold worldsema. 1718 func gcBgMarkStartWorkers() { 1719 // Background marking is performed by per-P G's. Ensure that 1720 // each P has a background GC G. 1721 for _, p := range allp { 1722 if p.gcBgMarkWorker == 0 { 1723 go gcBgMarkWorker(p) 1724 notetsleepg(&work.bgMarkReady, -1) 1725 noteclear(&work.bgMarkReady) 1726 } 1727 } 1728 } 1729 1730 // gcBgMarkPrepare sets up state for background marking. 1731 // Mutator assists must not yet be enabled. 1732 func gcBgMarkPrepare() { 1733 // Background marking will stop when the work queues are empty 1734 // and there are no more workers (note that, since this is 1735 // concurrent, this may be a transient state, but mark 1736 // termination will clean it up). Between background workers 1737 // and assists, we don't really know how many workers there 1738 // will be, so we pretend to have an arbitrarily large number 1739 // of workers, almost all of which are "waiting". While a 1740 // worker is working it decrements nwait. If nproc == nwait, 1741 // there are no workers. 1742 work.nproc = ^uint32(0) 1743 work.nwait = ^uint32(0) 1744 } 1745 1746 func gcBgMarkWorker(_p_ *p) { 1747 gp := getg() 1748 1749 type parkInfo struct { 1750 m muintptr // Release this m on park. 1751 attach puintptr // If non-nil, attach to this p on park. 1752 } 1753 // We pass park to a gopark unlock function, so it can't be on 1754 // the stack (see gopark). Prevent deadlock from recursively 1755 // starting GC by disabling preemption. 1756 gp.m.preemptoff = "GC worker init" 1757 park := new(parkInfo) 1758 gp.m.preemptoff = "" 1759 1760 park.m.set(acquirem()) 1761 park.attach.set(_p_) 1762 // Inform gcBgMarkStartWorkers that this worker is ready. 1763 // After this point, the background mark worker is scheduled 1764 // cooperatively by gcController.findRunnable. Hence, it must 1765 // never be preempted, as this would put it into _Grunnable 1766 // and put it on a run queue. Instead, when the preempt flag 1767 // is set, this puts itself into _Gwaiting to be woken up by 1768 // gcController.findRunnable at the appropriate time. 1769 notewakeup(&work.bgMarkReady) 1770 1771 for { 1772 // Go to sleep until woken by gcController.findRunnable. 1773 // We can't releasem yet since even the call to gopark 1774 // may be preempted. 1775 gopark(func(g *g, parkp unsafe.Pointer) bool { 1776 park := (*parkInfo)(parkp) 1777 1778 // The worker G is no longer running, so it's 1779 // now safe to allow preemption. 1780 releasem(park.m.ptr()) 1781 1782 // If the worker isn't attached to its P, 1783 // attach now. During initialization and after 1784 // a phase change, the worker may have been 1785 // running on a different P. As soon as we 1786 // attach, the owner P may schedule the 1787 // worker, so this must be done after the G is 1788 // stopped. 1789 if park.attach != 0 { 1790 p := park.attach.ptr() 1791 park.attach.set(nil) 1792 // cas the worker because we may be 1793 // racing with a new worker starting 1794 // on this P. 1795 if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) { 1796 // The P got a new worker. 1797 // Exit this worker. 1798 return false 1799 } 1800 } 1801 return true 1802 }, unsafe.Pointer(park), "GC worker (idle)", traceEvGoBlock, 0) 1803 1804 // Loop until the P dies and disassociates this 1805 // worker (the P may later be reused, in which case 1806 // it will get a new worker) or we failed to associate. 1807 if _p_.gcBgMarkWorker.ptr() != gp { 1808 break 1809 } 1810 1811 // Disable preemption so we can use the gcw. If the 1812 // scheduler wants to preempt us, we'll stop draining, 1813 // dispose the gcw, and then preempt. 1814 park.m.set(acquirem()) 1815 1816 if gcBlackenEnabled == 0 { 1817 throw("gcBgMarkWorker: blackening not enabled") 1818 } 1819 1820 startTime := nanotime() 1821 _p_.gcMarkWorkerStartTime = startTime 1822 1823 decnwait := atomic.Xadd(&work.nwait, -1) 1824 if decnwait == work.nproc { 1825 println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) 1826 throw("work.nwait was > work.nproc") 1827 } 1828 1829 systemstack(func() { 1830 // Mark our goroutine preemptible so its stack 1831 // can be scanned. This lets two mark workers 1832 // scan each other (otherwise, they would 1833 // deadlock). We must not modify anything on 1834 // the G stack. However, stack shrinking is 1835 // disabled for mark workers, so it is safe to 1836 // read from the G stack. 1837 casgstatus(gp, _Grunning, _Gwaiting) 1838 switch _p_.gcMarkWorkerMode { 1839 default: 1840 throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") 1841 case gcMarkWorkerDedicatedMode: 1842 gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) 1843 if gp.preempt { 1844 // We were preempted. This is 1845 // a useful signal to kick 1846 // everything out of the run 1847 // queue so it can run 1848 // somewhere else. 1849 lock(&sched.lock) 1850 for { 1851 gp, _ := runqget(_p_) 1852 if gp == nil { 1853 break 1854 } 1855 globrunqput(gp) 1856 } 1857 unlock(&sched.lock) 1858 } 1859 // Go back to draining, this time 1860 // without preemption. 1861 gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit) 1862 case gcMarkWorkerFractionalMode: 1863 gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) 1864 case gcMarkWorkerIdleMode: 1865 gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) 1866 } 1867 casgstatus(gp, _Gwaiting, _Grunning) 1868 }) 1869 1870 // If we are nearing the end of mark, dispose 1871 // of the cache promptly. We must do this 1872 // before signaling that we're no longer 1873 // working so that other workers can't observe 1874 // no workers and no work while we have this 1875 // cached, and before we compute done. 1876 if gcBlackenPromptly { 1877 _p_.gcw.dispose() 1878 } 1879 1880 // Account for time. 1881 duration := nanotime() - startTime 1882 switch _p_.gcMarkWorkerMode { 1883 case gcMarkWorkerDedicatedMode: 1884 atomic.Xaddint64(&gcController.dedicatedMarkTime, duration) 1885 atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) 1886 case gcMarkWorkerFractionalMode: 1887 atomic.Xaddint64(&gcController.fractionalMarkTime, duration) 1888 atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration) 1889 case gcMarkWorkerIdleMode: 1890 atomic.Xaddint64(&gcController.idleMarkTime, duration) 1891 } 1892 1893 // Was this the last worker and did we run out 1894 // of work? 1895 incnwait := atomic.Xadd(&work.nwait, +1) 1896 if incnwait > work.nproc { 1897 println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode, 1898 "work.nwait=", incnwait, "work.nproc=", work.nproc) 1899 throw("work.nwait > work.nproc") 1900 } 1901 1902 // If this worker reached a background mark completion 1903 // point, signal the main GC goroutine. 1904 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { 1905 // Make this G preemptible and disassociate it 1906 // as the worker for this P so 1907 // findRunnableGCWorker doesn't try to 1908 // schedule it. 1909 _p_.gcBgMarkWorker.set(nil) 1910 releasem(park.m.ptr()) 1911 1912 gcMarkDone() 1913 1914 // Disable preemption and prepare to reattach 1915 // to the P. 1916 // 1917 // We may be running on a different P at this 1918 // point, so we can't reattach until this G is 1919 // parked. 1920 park.m.set(acquirem()) 1921 park.attach.set(_p_) 1922 } 1923 } 1924 } 1925 1926 // gcMarkWorkAvailable returns true if executing a mark worker 1927 // on p is potentially useful. p may be nil, in which case it only 1928 // checks the global sources of work. 1929 func gcMarkWorkAvailable(p *p) bool { 1930 if p != nil && !p.gcw.empty() { 1931 return true 1932 } 1933 if !work.full.empty() { 1934 return true // global work available 1935 } 1936 if work.markrootNext < work.markrootJobs { 1937 return true // root scan work available 1938 } 1939 return false 1940 } 1941 1942 // gcMark runs the mark (or, for concurrent GC, mark termination) 1943 // All gcWork caches must be empty. 1944 // STW is in effect at this point. 1945 //TODO go:nowritebarrier 1946 func gcMark(start_time int64) { 1947 if debug.allocfreetrace > 0 { 1948 tracegc() 1949 } 1950 1951 if gcphase != _GCmarktermination { 1952 throw("in gcMark expecting to see gcphase as _GCmarktermination") 1953 } 1954 work.tstart = start_time 1955 1956 // Queue root marking jobs. 1957 gcMarkRootPrepare() 1958 1959 work.nwait = 0 1960 work.ndone = 0 1961 work.nproc = uint32(gcprocs()) 1962 1963 if work.full == 0 && work.nDataRoots+work.nBSSRoots+work.nSpanRoots+work.nStackRoots == 0 { 1964 // There's no work on the work queue and no root jobs 1965 // that can produce work, so don't bother entering the 1966 // getfull() barrier. 1967 // 1968 // This will be the situation the vast majority of the 1969 // time after concurrent mark. However, we still need 1970 // a fallback for STW GC and because there are some 1971 // known races that occasionally leave work around for 1972 // mark termination. 1973 // 1974 // We're still hedging our bets here: if we do 1975 // accidentally produce some work, we'll still process 1976 // it, just not necessarily in parallel. 1977 // 1978 // TODO(austin): Fix the races and and remove 1979 // work draining from mark termination so we don't 1980 // need the fallback path. 1981 work.helperDrainBlock = false 1982 } else { 1983 work.helperDrainBlock = true 1984 } 1985 1986 if work.nproc > 1 { 1987 noteclear(&work.alldone) 1988 helpgc(int32(work.nproc)) 1989 } 1990 1991 gchelperstart() 1992 1993 gcw := &getg().m.p.ptr().gcw 1994 if work.helperDrainBlock { 1995 gcDrain(gcw, gcDrainBlock) 1996 } else { 1997 gcDrain(gcw, gcDrainNoBlock) 1998 } 1999 gcw.dispose() 2000 2001 if debug.gccheckmark > 0 { 2002 // This is expensive when there's a large number of 2003 // Gs, so only do it if checkmark is also enabled. 2004 gcMarkRootCheck() 2005 } 2006 if work.full != 0 { 2007 throw("work.full != 0") 2008 } 2009 2010 if work.nproc > 1 { 2011 notesleep(&work.alldone) 2012 } 2013 2014 // Record that at least one root marking pass has completed. 2015 work.markrootDone = true 2016 2017 // Double-check that all gcWork caches are empty. This should 2018 // be ensured by mark 2 before we enter mark termination. 2019 for _, p := range allp { 2020 gcw := &p.gcw 2021 if !gcw.empty() { 2022 throw("P has cached GC work at end of mark termination") 2023 } 2024 if gcw.scanWork != 0 || gcw.bytesMarked != 0 { 2025 throw("P has unflushed stats at end of mark termination") 2026 } 2027 } 2028 2029 cachestats() 2030 2031 // Update the marked heap stat. 2032 memstats.heap_marked = work.bytesMarked 2033 2034 // Update other GC heap size stats. This must happen after 2035 // cachestats (which flushes local statistics to these) and 2036 // flushallmcaches (which modifies heap_live). 2037 memstats.heap_live = work.bytesMarked 2038 memstats.heap_scan = uint64(gcController.scanWork) 2039 2040 if trace.enabled { 2041 traceHeapAlloc() 2042 } 2043 } 2044 2045 func gcSweep(mode gcMode) { 2046 if gcphase != _GCoff { 2047 throw("gcSweep being done but phase is not GCoff") 2048 } 2049 2050 lock(&mheap_.lock) 2051 mheap_.sweepgen += 2 2052 mheap_.sweepdone = 0 2053 if mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 { 2054 // We should have drained this list during the last 2055 // sweep phase. We certainly need to start this phase 2056 // with an empty swept list. 2057 throw("non-empty swept list") 2058 } 2059 mheap_.pagesSwept = 0 2060 unlock(&mheap_.lock) 2061 2062 if !_ConcurrentSweep || mode == gcForceBlockMode { 2063 // Special case synchronous sweep. 2064 // Record that no proportional sweeping has to happen. 2065 lock(&mheap_.lock) 2066 mheap_.sweepPagesPerByte = 0 2067 unlock(&mheap_.lock) 2068 // Sweep all spans eagerly. 2069 for sweepone() != ^uintptr(0) { 2070 sweep.npausesweep++ 2071 } 2072 // Free workbufs eagerly. 2073 prepareFreeWorkbufs() 2074 for freeSomeWbufs(false) { 2075 } 2076 // All "free" events for this mark/sweep cycle have 2077 // now happened, so we can make this profile cycle 2078 // available immediately. 2079 mProf_NextCycle() 2080 mProf_Flush() 2081 return 2082 } 2083 2084 // Background sweep. 2085 lock(&sweep.lock) 2086 if sweep.parked { 2087 sweep.parked = false 2088 ready(sweep.g, 0, true) 2089 } 2090 unlock(&sweep.lock) 2091 } 2092 2093 // gcResetMarkState resets global state prior to marking (concurrent 2094 // or STW) and resets the stack scan state of all Gs. 2095 // 2096 // This is safe to do without the world stopped because any Gs created 2097 // during or after this will start out in the reset state. 2098 func gcResetMarkState() { 2099 // This may be called during a concurrent phase, so make sure 2100 // allgs doesn't change. 2101 lock(&allglock) 2102 for _, gp := range allgs { 2103 gp.gcscandone = false // set to true in gcphasework 2104 gp.gcscanvalid = false // stack has not been scanned 2105 gp.gcAssistBytes = 0 2106 } 2107 unlock(&allglock) 2108 2109 work.bytesMarked = 0 2110 work.initialHeapLive = atomic.Load64(&memstats.heap_live) 2111 work.markrootDone = false 2112 } 2113 2114 // Hooks for other packages 2115 2116 var poolcleanup func() 2117 2118 //go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup 2119 func sync_runtime_registerPoolCleanup(f func()) { 2120 poolcleanup = f 2121 } 2122 2123 func clearpools() { 2124 // clear sync.Pools 2125 if poolcleanup != nil { 2126 poolcleanup() 2127 } 2128 2129 // Clear central sudog cache. 2130 // Leave per-P caches alone, they have strictly bounded size. 2131 // Disconnect cached list before dropping it on the floor, 2132 // so that a dangling ref to one entry does not pin all of them. 2133 lock(&sched.sudoglock) 2134 var sg, sgnext *sudog 2135 for sg = sched.sudogcache; sg != nil; sg = sgnext { 2136 sgnext = sg.next 2137 sg.next = nil 2138 } 2139 sched.sudogcache = nil 2140 unlock(&sched.sudoglock) 2141 2142 // Clear central defer pools. 2143 // Leave per-P pools alone, they have strictly bounded size. 2144 lock(&sched.deferlock) 2145 for i := range sched.deferpool { 2146 // disconnect cached list before dropping it on the floor, 2147 // so that a dangling ref to one entry does not pin all of them. 2148 var d, dlink *_defer 2149 for d = sched.deferpool[i]; d != nil; d = dlink { 2150 dlink = d.link 2151 d.link = nil 2152 } 2153 sched.deferpool[i] = nil 2154 } 2155 unlock(&sched.deferlock) 2156 } 2157 2158 // gchelper runs mark termination tasks on Ps other than the P 2159 // coordinating mark termination. 2160 // 2161 // The caller is responsible for ensuring that this has a P to run on, 2162 // even though it's running during STW. Because of this, it's allowed 2163 // to have write barriers. 2164 // 2165 //go:yeswritebarrierrec 2166 func gchelper() { 2167 _g_ := getg() 2168 _g_.m.traceback = 2 2169 gchelperstart() 2170 2171 // Parallel mark over GC roots and heap 2172 if gcphase == _GCmarktermination { 2173 gcw := &_g_.m.p.ptr().gcw 2174 if work.helperDrainBlock { 2175 gcDrain(gcw, gcDrainBlock) // blocks in getfull 2176 } else { 2177 gcDrain(gcw, gcDrainNoBlock) 2178 } 2179 gcw.dispose() 2180 } 2181 2182 nproc := atomic.Load(&work.nproc) // work.nproc can change right after we increment work.ndone 2183 if atomic.Xadd(&work.ndone, +1) == nproc-1 { 2184 notewakeup(&work.alldone) 2185 } 2186 _g_.m.traceback = 0 2187 } 2188 2189 func gchelperstart() { 2190 _g_ := getg() 2191 2192 if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc { 2193 throw("gchelperstart: bad m->helpgc") 2194 } 2195 if _g_ != _g_.m.g0 { 2196 throw("gchelper not running on g0 stack") 2197 } 2198 } 2199 2200 // Timing 2201 2202 // itoaDiv formats val/(10**dec) into buf. 2203 func itoaDiv(buf []byte, val uint64, dec int) []byte { 2204 i := len(buf) - 1 2205 idec := i - dec 2206 for val >= 10 || i >= idec { 2207 buf[i] = byte(val%10 + '0') 2208 i-- 2209 if i == idec { 2210 buf[i] = '.' 2211 i-- 2212 } 2213 val /= 10 2214 } 2215 buf[i] = byte(val + '0') 2216 return buf[i:] 2217 } 2218 2219 // fmtNSAsMS nicely formats ns nanoseconds as milliseconds. 2220 func fmtNSAsMS(buf []byte, ns uint64) []byte { 2221 if ns >= 10e6 { 2222 // Format as whole milliseconds. 2223 return itoaDiv(buf, ns/1e6, 0) 2224 } 2225 // Format two digits of precision, with at most three decimal places. 2226 x := ns / 1e3 2227 if x == 0 { 2228 buf[0] = '0' 2229 return buf[:1] 2230 } 2231 dec := 3 2232 for x >= 100 { 2233 x /= 10 2234 dec-- 2235 } 2236 return itoaDiv(buf, x, dec) 2237 }