github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/runtime/mgc.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector (GC). 6 // 7 // The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple 8 // GC thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is 9 // non-generational and non-compacting. Allocation is done using size segregated per P allocation 10 // areas to minimize fragmentation while eliminating locks in the common case. 11 // 12 // The algorithm decomposes into several steps. 13 // This is a high level description of the algorithm being used. For an overview of GC a good 14 // place to start is Richard Jones' gchandbook.org. 15 // 16 // The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see 17 // Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. 18 // On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 19 // 966-975. 20 // For journal quality proofs that these steps are complete, correct, and terminate see 21 // Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. 22 // Concurrency and Computation: Practice and Experience 15(3-5), 2003. 23 // 24 // 1. GC performs sweep termination. 25 // 26 // a. Stop the world. This causes all Ps to reach a GC safe-point. 27 // 28 // b. Sweep any unswept spans. There will only be unswept spans if 29 // this GC cycle was forced before the expected time. 30 // 31 // 2. GC performs the mark phase. 32 // 33 // a. Prepare for the mark phase by setting gcphase to _GCmark 34 // (from _GCoff), enabling the write barrier, enabling mutator 35 // assists, and enqueueing root mark jobs. No objects may be 36 // scanned until all Ps have enabled the write barrier, which is 37 // accomplished using STW. 38 // 39 // b. Start the world. From this point, GC work is done by mark 40 // workers started by the scheduler and by assists performed as 41 // part of allocation. The write barrier shades both the 42 // overwritten pointer and the new pointer value for any pointer 43 // writes (see mbarrier.go for details). Newly allocated objects 44 // are immediately marked black. 45 // 46 // c. GC performs root marking jobs. This includes scanning all 47 // stacks, shading all globals, and shading any heap pointers in 48 // off-heap runtime data structures. Scanning a stack stops a 49 // goroutine, shades any pointers found on its stack, and then 50 // resumes the goroutine. 51 // 52 // d. GC drains the work queue of grey objects, scanning each grey 53 // object to black and shading all pointers found in the object 54 // (which in turn may add those pointers to the work queue). 55 // 56 // e. Because GC work is spread across local caches, GC uses a 57 // distributed termination algorithm to detect when there are no 58 // more root marking jobs or grey objects (see gcMarkDone). At this 59 // point, GC transitions to mark termination. 60 // 61 // 3. GC performs mark termination. 62 // 63 // a. Stop the world. 64 // 65 // b. Set gcphase to _GCmarktermination, and disable workers and 66 // assists. 67 // 68 // c. Perform housekeeping like flushing mcaches. 69 // 70 // 4. GC performs the sweep phase. 71 // 72 // a. Prepare for the sweep phase by setting gcphase to _GCoff, 73 // setting up sweep state and disabling the write barrier. 74 // 75 // b. Start the world. From this point on, newly allocated objects 76 // are white, and allocating sweeps spans before use if necessary. 77 // 78 // c. GC does concurrent sweeping in the background and in response 79 // to allocation. See description below. 80 // 81 // 5. When sufficient allocation has taken place, replay the sequence 82 // starting with 1 above. See discussion of GC rate below. 83 84 // Concurrent sweep. 85 // 86 // The sweep phase proceeds concurrently with normal program execution. 87 // The heap is swept span-by-span both lazily (when a goroutine needs another span) 88 // and concurrently in a background goroutine (this helps programs that are not CPU bound). 89 // At the end of STW mark termination all spans are marked as "needs sweeping". 90 // 91 // The background sweeper goroutine simply sweeps spans one-by-one. 92 // 93 // To avoid requesting more OS memory while there are unswept spans, when a 94 // goroutine needs another span, it first attempts to reclaim that much memory 95 // by sweeping. When a goroutine needs to allocate a new small-object span, it 96 // sweeps small-object spans for the same object size until it frees at least 97 // one object. When a goroutine needs to allocate large-object span from heap, 98 // it sweeps spans until it frees at least that many pages into heap. There is 99 // one case where this may not suffice: if a goroutine sweeps and frees two 100 // nonadjacent one-page spans to the heap, it will allocate a new two-page 101 // span, but there can still be other one-page unswept spans which could be 102 // combined into a two-page span. 103 // 104 // It's critical to ensure that no operations proceed on unswept spans (that would corrupt 105 // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, 106 // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. 107 // When a goroutine explicitly frees an object or sets a finalizer, it ensures that 108 // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish). 109 // The finalizer goroutine is kicked off only when all spans are swept. 110 // When the next GC starts, it sweeps all not-yet-swept spans (if any). 111 112 // GC rate. 113 // Next GC is after we've allocated an extra amount of memory proportional to 114 // the amount already in use. The proportion is controlled by GOGC environment variable 115 // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M 116 // (this mark is tracked in next_gc variable). This keeps the GC cost in linear 117 // proportion to the allocation cost. Adjusting GOGC just changes the linear constant 118 // (and also the amount of extra memory used). 119 120 // Oblets 121 // 122 // In order to prevent long pauses while scanning large objects and to 123 // improve parallelism, the garbage collector breaks up scan jobs for 124 // objects larger than maxObletBytes into "oblets" of at most 125 // maxObletBytes. When scanning encounters the beginning of a large 126 // object, it scans only the first oblet and enqueues the remaining 127 // oblets as new scan jobs. 128 129 package runtime 130 131 import ( 132 "internal/cpu" 133 "runtime/internal/atomic" 134 "unsafe" 135 ) 136 137 const ( 138 _DebugGC = 0 139 _ConcurrentSweep = true 140 _FinBlockSize = 4 * 1024 141 142 // debugScanConservative enables debug logging for stack 143 // frames that are scanned conservatively. 144 debugScanConservative = false 145 146 // sweepMinHeapDistance is a lower bound on the heap distance 147 // (in bytes) reserved for concurrent sweeping between GC 148 // cycles. 149 sweepMinHeapDistance = 1024 * 1024 150 ) 151 152 // heapminimum is the minimum heap size at which to trigger GC. 153 // For small heaps, this overrides the usual GOGC*live set rule. 154 // 155 // When there is a very small live set but a lot of allocation, simply 156 // collecting when the heap reaches GOGC*live results in many GC 157 // cycles and high total per-GC overhead. This minimum amortizes this 158 // per-GC overhead while keeping the heap reasonably small. 159 // 160 // During initialization this is set to 4MB*GOGC/100. In the case of 161 // GOGC==0, this will set heapminimum to 0, resulting in constant 162 // collection even when the heap size is small, which is useful for 163 // debugging. 164 var heapminimum uint64 = defaultHeapMinimum 165 166 // defaultHeapMinimum is the value of heapminimum for GOGC==100. 167 const defaultHeapMinimum = 4 << 20 168 169 // Initialized from $GOGC. GOGC=off means no GC. 170 var gcpercent int32 171 172 func gcinit() { 173 if unsafe.Sizeof(workbuf{}) != _WorkbufSize { 174 throw("size of Workbuf is suboptimal") 175 } 176 177 // No sweep on the first cycle. 178 mheap_.sweepdone = 1 179 180 // Set a reasonable initial GC trigger. 181 memstats.triggerRatio = 7 / 8.0 182 183 // Fake a heap_marked value so it looks like a trigger at 184 // heapminimum is the appropriate growth from heap_marked. 185 // This will go into computing the initial GC goal. 186 memstats.heap_marked = uint64(float64(heapminimum) / (1 + memstats.triggerRatio)) 187 188 // Set gcpercent from the environment. This will also compute 189 // and set the GC trigger and goal. 190 _ = setGCPercent(readgogc()) 191 192 work.startSema = 1 193 work.markDoneSema = 1 194 lockInit(&work.sweepWaiters.lock, lockRankSweepWaiters) 195 lockInit(&work.assistQueue.lock, lockRankAssistQueue) 196 lockInit(&work.wbufSpans.lock, lockRankWbufSpans) 197 } 198 199 func readgogc() int32 { 200 p := gogetenv("GOGC") 201 if p == "off" { 202 return -1 203 } 204 if n, ok := atoi32(p); ok { 205 return n 206 } 207 return 100 208 } 209 210 // gcenable is called after the bulk of the runtime initialization, 211 // just before we're about to start letting user code run. 212 // It kicks off the background sweeper goroutine, the background 213 // scavenger goroutine, and enables GC. 214 func gcenable() { 215 // Kick off sweeping and scavenging. 216 c := make(chan int, 2) 217 go bgsweep(c) 218 go bgscavenge(c) 219 <-c 220 <-c 221 memstats.enablegc = true // now that runtime is initialized, GC is okay 222 } 223 224 //go:linkname setGCPercent runtime/debug.setGCPercent 225 func setGCPercent(in int32) (out int32) { 226 // Run on the system stack since we grab the heap lock. 227 systemstack(func() { 228 lock(&mheap_.lock) 229 out = gcpercent 230 if in < 0 { 231 in = -1 232 } 233 gcpercent = in 234 heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 235 // Update pacing in response to gcpercent change. 236 gcSetTriggerRatio(memstats.triggerRatio) 237 unlock(&mheap_.lock) 238 }) 239 240 // If we just disabled GC, wait for any concurrent GC mark to 241 // finish so we always return with no GC running. 242 if in < 0 { 243 gcWaitOnMark(atomic.Load(&work.cycles)) 244 } 245 246 return out 247 } 248 249 // Garbage collector phase. 250 // Indicates to write barrier and synchronization task to perform. 251 var gcphase uint32 252 253 // The compiler knows about this variable. 254 // If you change it, you must change builtin/runtime.go, too. 255 // If you change the first four bytes, you must also change the write 256 // barrier insertion code. 257 var writeBarrier struct { 258 enabled bool // compiler emits a check of this before calling write barrier 259 pad [3]byte // compiler uses 32-bit load for "enabled" field 260 needed bool // whether we need a write barrier for current GC phase 261 cgo bool // whether we need a write barrier for a cgo check 262 alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load 263 } 264 265 // gcBlackenEnabled is 1 if mutator assists and background mark 266 // workers are allowed to blacken objects. This must only be set when 267 // gcphase == _GCmark. 268 var gcBlackenEnabled uint32 269 270 const ( 271 _GCoff = iota // GC not running; sweeping in background, write barrier disabled 272 _GCmark // GC marking roots and workbufs: allocate black, write barrier ENABLED 273 _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED 274 ) 275 276 //go:nosplit 277 func setGCPhase(x uint32) { 278 atomic.Store(&gcphase, x) 279 writeBarrier.needed = gcphase == _GCmark || gcphase == _GCmarktermination 280 writeBarrier.enabled = writeBarrier.needed || writeBarrier.cgo 281 } 282 283 // gcMarkWorkerMode represents the mode that a concurrent mark worker 284 // should operate in. 285 // 286 // Concurrent marking happens through four different mechanisms. One 287 // is mutator assists, which happen in response to allocations and are 288 // not scheduled. The other three are variations in the per-P mark 289 // workers and are distinguished by gcMarkWorkerMode. 290 type gcMarkWorkerMode int 291 292 const ( 293 // gcMarkWorkerNotWorker indicates that the next scheduled G is not 294 // starting work and the mode should be ignored. 295 gcMarkWorkerNotWorker gcMarkWorkerMode = iota 296 297 // gcMarkWorkerDedicatedMode indicates that the P of a mark 298 // worker is dedicated to running that mark worker. The mark 299 // worker should run without preemption. 300 gcMarkWorkerDedicatedMode 301 302 // gcMarkWorkerFractionalMode indicates that a P is currently 303 // running the "fractional" mark worker. The fractional worker 304 // is necessary when GOMAXPROCS*gcBackgroundUtilization is not 305 // an integer. The fractional worker should run until it is 306 // preempted and will be scheduled to pick up the fractional 307 // part of GOMAXPROCS*gcBackgroundUtilization. 308 gcMarkWorkerFractionalMode 309 310 // gcMarkWorkerIdleMode indicates that a P is running the mark 311 // worker because it has nothing else to do. The idle worker 312 // should run until it is preempted and account its time 313 // against gcController.idleMarkTime. 314 gcMarkWorkerIdleMode 315 ) 316 317 // gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes 318 // to use in execution traces. 319 var gcMarkWorkerModeStrings = [...]string{ 320 "Not worker", 321 "GC (dedicated)", 322 "GC (fractional)", 323 "GC (idle)", 324 } 325 326 // gcController implements the GC pacing controller that determines 327 // when to trigger concurrent garbage collection and how much marking 328 // work to do in mutator assists and background marking. 329 // 330 // It uses a feedback control algorithm to adjust the memstats.gc_trigger 331 // trigger based on the heap growth and GC CPU utilization each cycle. 332 // This algorithm optimizes for heap growth to match GOGC and for CPU 333 // utilization between assist and background marking to be 25% of 334 // GOMAXPROCS. The high-level design of this algorithm is documented 335 // at https://golang.org/s/go15gcpacing. 336 // 337 // All fields of gcController are used only during a single mark 338 // cycle. 339 var gcController gcControllerState 340 341 type gcControllerState struct { 342 // scanWork is the total scan work performed this cycle. This 343 // is updated atomically during the cycle. Updates occur in 344 // bounded batches, since it is both written and read 345 // throughout the cycle. At the end of the cycle, this is how 346 // much of the retained heap is scannable. 347 // 348 // Currently this is the bytes of heap scanned. For most uses, 349 // this is an opaque unit of work, but for estimation the 350 // definition is important. 351 scanWork int64 352 353 // bgScanCredit is the scan work credit accumulated by the 354 // concurrent background scan. This credit is accumulated by 355 // the background scan and stolen by mutator assists. This is 356 // updated atomically. Updates occur in bounded batches, since 357 // it is both written and read throughout the cycle. 358 bgScanCredit int64 359 360 // assistTime is the nanoseconds spent in mutator assists 361 // during this cycle. This is updated atomically. Updates 362 // occur in bounded batches, since it is both written and read 363 // throughout the cycle. 364 assistTime int64 365 366 // dedicatedMarkTime is the nanoseconds spent in dedicated 367 // mark workers during this cycle. This is updated atomically 368 // at the end of the concurrent mark phase. 369 dedicatedMarkTime int64 370 371 // fractionalMarkTime is the nanoseconds spent in the 372 // fractional mark worker during this cycle. This is updated 373 // atomically throughout the cycle and will be up-to-date if 374 // the fractional mark worker is not currently running. 375 fractionalMarkTime int64 376 377 // idleMarkTime is the nanoseconds spent in idle marking 378 // during this cycle. This is updated atomically throughout 379 // the cycle. 380 idleMarkTime int64 381 382 // markStartTime is the absolute start time in nanoseconds 383 // that assists and background mark workers started. 384 markStartTime int64 385 386 // dedicatedMarkWorkersNeeded is the number of dedicated mark 387 // workers that need to be started. This is computed at the 388 // beginning of each cycle and decremented atomically as 389 // dedicated mark workers get started. 390 dedicatedMarkWorkersNeeded int64 391 392 // assistWorkPerByte is the ratio of scan work to allocated 393 // bytes that should be performed by mutator assists. This is 394 // computed at the beginning of each cycle and updated every 395 // time heap_scan is updated. 396 // 397 // Stored as a uint64, but it's actually a float64. Use 398 // float64frombits to get the value. 399 // 400 // Read and written atomically. 401 assistWorkPerByte uint64 402 403 // assistBytesPerWork is 1/assistWorkPerByte. 404 // 405 // Stored as a uint64, but it's actually a float64. Use 406 // float64frombits to get the value. 407 // 408 // Read and written atomically. 409 // 410 // Note that because this is read and written independently 411 // from assistWorkPerByte users may notice a skew between 412 // the two values, and such a state should be safe. 413 assistBytesPerWork uint64 414 415 // fractionalUtilizationGoal is the fraction of wall clock 416 // time that should be spent in the fractional mark worker on 417 // each P that isn't running a dedicated worker. 418 // 419 // For example, if the utilization goal is 25% and there are 420 // no dedicated workers, this will be 0.25. If the goal is 421 // 25%, there is one dedicated worker, and GOMAXPROCS is 5, 422 // this will be 0.05 to make up the missing 5%. 423 // 424 // If this is zero, no fractional workers are needed. 425 fractionalUtilizationGoal float64 426 427 _ cpu.CacheLinePad 428 } 429 430 // startCycle resets the GC controller's state and computes estimates 431 // for a new GC cycle. The caller must hold worldsema and the world 432 // must be stopped. 433 func (c *gcControllerState) startCycle() { 434 c.scanWork = 0 435 c.bgScanCredit = 0 436 c.assistTime = 0 437 c.dedicatedMarkTime = 0 438 c.fractionalMarkTime = 0 439 c.idleMarkTime = 0 440 441 // Ensure that the heap goal is at least a little larger than 442 // the current live heap size. This may not be the case if GC 443 // start is delayed or if the allocation that pushed heap_live 444 // over gc_trigger is large or if the trigger is really close to 445 // GOGC. Assist is proportional to this distance, so enforce a 446 // minimum distance, even if it means going over the GOGC goal 447 // by a tiny bit. 448 if memstats.next_gc < memstats.heap_live+1024*1024 { 449 memstats.next_gc = memstats.heap_live + 1024*1024 450 } 451 452 // Compute the background mark utilization goal. In general, 453 // this may not come out exactly. We round the number of 454 // dedicated workers so that the utilization is closest to 455 // 25%. For small GOMAXPROCS, this would introduce too much 456 // error, so we add fractional workers in that case. 457 totalUtilizationGoal := float64(gomaxprocs) * gcBackgroundUtilization 458 c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal + 0.5) 459 utilError := float64(c.dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1 460 const maxUtilError = 0.3 461 if utilError < -maxUtilError || utilError > maxUtilError { 462 // Rounding put us more than 30% off our goal. With 463 // gcBackgroundUtilization of 25%, this happens for 464 // GOMAXPROCS<=3 or GOMAXPROCS=6. Enable fractional 465 // workers to compensate. 466 if float64(c.dedicatedMarkWorkersNeeded) > totalUtilizationGoal { 467 // Too many dedicated workers. 468 c.dedicatedMarkWorkersNeeded-- 469 } 470 c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(gomaxprocs) 471 } else { 472 c.fractionalUtilizationGoal = 0 473 } 474 475 // In STW mode, we just want dedicated workers. 476 if debug.gcstoptheworld > 0 { 477 c.dedicatedMarkWorkersNeeded = int64(gomaxprocs) 478 c.fractionalUtilizationGoal = 0 479 } 480 481 // Clear per-P state 482 for _, p := range allp { 483 p.gcAssistTime = 0 484 p.gcFractionalMarkTime = 0 485 } 486 487 // Compute initial values for controls that are updated 488 // throughout the cycle. 489 c.revise() 490 491 if debug.gcpacertrace > 0 { 492 assistRatio := float64frombits(atomic.Load64(&c.assistWorkPerByte)) 493 print("pacer: assist ratio=", assistRatio, 494 " (scan ", memstats.heap_scan>>20, " MB in ", 495 work.initialHeapLive>>20, "->", 496 memstats.next_gc>>20, " MB)", 497 " workers=", c.dedicatedMarkWorkersNeeded, 498 "+", c.fractionalUtilizationGoal, "\n") 499 } 500 } 501 502 // revise updates the assist ratio during the GC cycle to account for 503 // improved estimates. This should be called whenever memstats.heap_scan, 504 // memstats.heap_live, or memstats.next_gc is updated. It is safe to 505 // call concurrently, but it may race with other calls to revise. 506 // 507 // The result of this race is that the two assist ratio values may not line 508 // up or may be stale. In practice this is OK because the assist ratio 509 // moves slowly throughout a GC cycle, and the assist ratio is a best-effort 510 // heuristic anyway. Furthermore, no part of the heuristic depends on 511 // the two assist ratio values being exact reciprocals of one another, since 512 // the two values are used to convert values from different sources. 513 // 514 // The worst case result of this raciness is that we may miss a larger shift 515 // in the ratio (say, if we decide to pace more aggressively against the 516 // hard heap goal) but even this "hard goal" is best-effort (see #40460). 517 // The dedicated GC should ensure we don't exceed the hard goal by too much 518 // in the rare case we do exceed it. 519 // 520 // It should only be called when gcBlackenEnabled != 0 (because this 521 // is when assists are enabled and the necessary statistics are 522 // available). 523 func (c *gcControllerState) revise() { 524 gcpercent := gcpercent 525 if gcpercent < 0 { 526 // If GC is disabled but we're running a forced GC, 527 // act like GOGC is huge for the below calculations. 528 gcpercent = 100000 529 } 530 live := atomic.Load64(&memstats.heap_live) 531 scan := atomic.Load64(&memstats.heap_scan) 532 work := atomic.Loadint64(&c.scanWork) 533 534 // Assume we're under the soft goal. Pace GC to complete at 535 // next_gc assuming the heap is in steady-state. 536 heapGoal := int64(atomic.Load64(&memstats.next_gc)) 537 538 // Compute the expected scan work remaining. 539 // 540 // This is estimated based on the expected 541 // steady-state scannable heap. For example, with 542 // GOGC=100, only half of the scannable heap is 543 // expected to be live, so that's what we target. 544 // 545 // (This is a float calculation to avoid overflowing on 546 // 100*heap_scan.) 547 scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent)) 548 549 if int64(live) > heapGoal || work > scanWorkExpected { 550 // We're past the soft goal, or we've already done more scan 551 // work than we expected. Pace GC so that in the worst case it 552 // will complete by the hard goal. 553 const maxOvershoot = 1.1 554 heapGoal = int64(float64(heapGoal) * maxOvershoot) 555 556 // Compute the upper bound on the scan work remaining. 557 scanWorkExpected = int64(scan) 558 } 559 560 // Compute the remaining scan work estimate. 561 // 562 // Note that we currently count allocations during GC as both 563 // scannable heap (heap_scan) and scan work completed 564 // (scanWork), so allocation will change this difference 565 // slowly in the soft regime and not at all in the hard 566 // regime. 567 scanWorkRemaining := scanWorkExpected - work 568 if scanWorkRemaining < 1000 { 569 // We set a somewhat arbitrary lower bound on 570 // remaining scan work since if we aim a little high, 571 // we can miss by a little. 572 // 573 // We *do* need to enforce that this is at least 1, 574 // since marking is racy and double-scanning objects 575 // may legitimately make the remaining scan work 576 // negative, even in the hard goal regime. 577 scanWorkRemaining = 1000 578 } 579 580 // Compute the heap distance remaining. 581 heapRemaining := heapGoal - int64(live) 582 if heapRemaining <= 0 { 583 // This shouldn't happen, but if it does, avoid 584 // dividing by zero or setting the assist negative. 585 heapRemaining = 1 586 } 587 588 // Compute the mutator assist ratio so by the time the mutator 589 // allocates the remaining heap bytes up to next_gc, it will 590 // have done (or stolen) the remaining amount of scan work. 591 // Note that the assist ratio values are updated atomically 592 // but not together. This means there may be some degree of 593 // skew between the two values. This is generally OK as the 594 // values shift relatively slowly over the course of a GC 595 // cycle. 596 assistWorkPerByte := float64(scanWorkRemaining) / float64(heapRemaining) 597 assistBytesPerWork := float64(heapRemaining) / float64(scanWorkRemaining) 598 atomic.Store64(&c.assistWorkPerByte, float64bits(assistWorkPerByte)) 599 atomic.Store64(&c.assistBytesPerWork, float64bits(assistBytesPerWork)) 600 } 601 602 // endCycle computes the trigger ratio for the next cycle. 603 func (c *gcControllerState) endCycle() float64 { 604 if work.userForced { 605 // Forced GC means this cycle didn't start at the 606 // trigger, so where it finished isn't good 607 // information about how to adjust the trigger. 608 // Just leave it where it is. 609 return memstats.triggerRatio 610 } 611 612 // Proportional response gain for the trigger controller. Must 613 // be in [0, 1]. Lower values smooth out transient effects but 614 // take longer to respond to phase changes. Higher values 615 // react to phase changes quickly, but are more affected by 616 // transient changes. Values near 1 may be unstable. 617 const triggerGain = 0.5 618 619 // Compute next cycle trigger ratio. First, this computes the 620 // "error" for this cycle; that is, how far off the trigger 621 // was from what it should have been, accounting for both heap 622 // growth and GC CPU utilization. We compute the actual heap 623 // growth during this cycle and scale that by how far off from 624 // the goal CPU utilization we were (to estimate the heap 625 // growth if we had the desired CPU utilization). The 626 // difference between this estimate and the GOGC-based goal 627 // heap growth is the error. 628 goalGrowthRatio := gcEffectiveGrowthRatio() 629 actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1 630 assistDuration := nanotime() - c.markStartTime 631 632 // Assume background mark hit its utilization goal. 633 utilization := gcBackgroundUtilization 634 // Add assist utilization; avoid divide by zero. 635 if assistDuration > 0 { 636 utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs)) 637 } 638 639 triggerError := goalGrowthRatio - memstats.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-memstats.triggerRatio) 640 641 // Finally, we adjust the trigger for next time by this error, 642 // damped by the proportional gain. 643 triggerRatio := memstats.triggerRatio + triggerGain*triggerError 644 645 if debug.gcpacertrace > 0 { 646 // Print controller state in terms of the design 647 // document. 648 H_m_prev := memstats.heap_marked 649 h_t := memstats.triggerRatio 650 H_T := memstats.gc_trigger 651 h_a := actualGrowthRatio 652 H_a := memstats.heap_live 653 h_g := goalGrowthRatio 654 H_g := int64(float64(H_m_prev) * (1 + h_g)) 655 u_a := utilization 656 u_g := gcGoalUtilization 657 W_a := c.scanWork 658 print("pacer: H_m_prev=", H_m_prev, 659 " h_t=", h_t, " H_T=", H_T, 660 " h_a=", h_a, " H_a=", H_a, 661 " h_g=", h_g, " H_g=", H_g, 662 " u_a=", u_a, " u_g=", u_g, 663 " W_a=", W_a, 664 " goalΔ=", goalGrowthRatio-h_t, 665 " actualΔ=", h_a-h_t, 666 " u_a/u_g=", u_a/u_g, 667 "\n") 668 } 669 670 return triggerRatio 671 } 672 673 // enlistWorker encourages another dedicated mark worker to start on 674 // another P if there are spare worker slots. It is used by putfull 675 // when more work is made available. 676 // 677 //go:nowritebarrier 678 func (c *gcControllerState) enlistWorker() { 679 // If there are idle Ps, wake one so it will run an idle worker. 680 // NOTE: This is suspected of causing deadlocks. See golang.org/issue/19112. 681 // 682 // if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { 683 // wakep() 684 // return 685 // } 686 687 // There are no idle Ps. If we need more dedicated workers, 688 // try to preempt a running P so it will switch to a worker. 689 if c.dedicatedMarkWorkersNeeded <= 0 { 690 return 691 } 692 // Pick a random other P to preempt. 693 if gomaxprocs <= 1 { 694 return 695 } 696 gp := getg() 697 if gp == nil || gp.m == nil || gp.m.p == 0 { 698 return 699 } 700 myID := gp.m.p.ptr().id 701 for tries := 0; tries < 5; tries++ { 702 id := int32(fastrandn(uint32(gomaxprocs - 1))) 703 if id >= myID { 704 id++ 705 } 706 p := allp[id] 707 if p.status != _Prunning { 708 continue 709 } 710 if preemptone(p) { 711 return 712 } 713 } 714 } 715 716 // findRunnableGCWorker returns a background mark worker for _p_ if it 717 // should be run. This must only be called when gcBlackenEnabled != 0. 718 func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { 719 if gcBlackenEnabled == 0 { 720 throw("gcControllerState.findRunnable: blackening not enabled") 721 } 722 723 if !gcMarkWorkAvailable(_p_) { 724 // No work to be done right now. This can happen at 725 // the end of the mark phase when there are still 726 // assists tapering off. Don't bother running a worker 727 // now because it'll just return immediately. 728 return nil 729 } 730 731 // Grab a worker before we commit to running below. 732 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 733 if node == nil { 734 // There is at least one worker per P, so normally there are 735 // enough workers to run on all Ps, if necessary. However, once 736 // a worker enters gcMarkDone it may park without rejoining the 737 // pool, thus freeing a P with no corresponding worker. 738 // gcMarkDone never depends on another worker doing work, so it 739 // is safe to simply do nothing here. 740 // 741 // If gcMarkDone bails out without completing the mark phase, 742 // it will always do so with queued global work. Thus, that P 743 // will be immediately eligible to re-run the worker G it was 744 // just using, ensuring work can complete. 745 return nil 746 } 747 748 decIfPositive := func(ptr *int64) bool { 749 for { 750 v := atomic.Loadint64(ptr) 751 if v <= 0 { 752 return false 753 } 754 755 // TODO: having atomic.Casint64 would be more pleasant. 756 if atomic.Cas64((*uint64)(unsafe.Pointer(ptr)), uint64(v), uint64(v-1)) { 757 return true 758 } 759 } 760 } 761 762 if decIfPositive(&c.dedicatedMarkWorkersNeeded) { 763 // This P is now dedicated to marking until the end of 764 // the concurrent mark phase. 765 _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode 766 } else if c.fractionalUtilizationGoal == 0 { 767 // No need for fractional workers. 768 gcBgMarkWorkerPool.push(&node.node) 769 return nil 770 } else { 771 // Is this P behind on the fractional utilization 772 // goal? 773 // 774 // This should be kept in sync with pollFractionalWorkerExit. 775 delta := nanotime() - gcController.markStartTime 776 if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { 777 // Nope. No need to run a fractional worker. 778 gcBgMarkWorkerPool.push(&node.node) 779 return nil 780 } 781 // Run a fractional worker. 782 _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode 783 } 784 785 // Run the background mark worker. 786 gp := node.gp.ptr() 787 casgstatus(gp, _Gwaiting, _Grunnable) 788 if trace.enabled { 789 traceGoUnpark(gp, 0) 790 } 791 return gp 792 } 793 794 // pollFractionalWorkerExit reports whether a fractional mark worker 795 // should self-preempt. It assumes it is called from the fractional 796 // worker. 797 func pollFractionalWorkerExit() bool { 798 // This should be kept in sync with the fractional worker 799 // scheduler logic in findRunnableGCWorker. 800 now := nanotime() 801 delta := now - gcController.markStartTime 802 if delta <= 0 { 803 return true 804 } 805 p := getg().m.p.ptr() 806 selfTime := p.gcFractionalMarkTime + (now - p.gcMarkWorkerStartTime) 807 // Add some slack to the utilization goal so that the 808 // fractional worker isn't behind again the instant it exits. 809 return float64(selfTime)/float64(delta) > 1.2*gcController.fractionalUtilizationGoal 810 } 811 812 // gcSetTriggerRatio sets the trigger ratio and updates everything 813 // derived from it: the absolute trigger, the heap goal, mark pacing, 814 // and sweep pacing. 815 // 816 // This can be called any time. If GC is the in the middle of a 817 // concurrent phase, it will adjust the pacing of that phase. 818 // 819 // This depends on gcpercent, memstats.heap_marked, and 820 // memstats.heap_live. These must be up to date. 821 // 822 // mheap_.lock must be held or the world must be stopped. 823 func gcSetTriggerRatio(triggerRatio float64) { 824 assertWorldStoppedOrLockHeld(&mheap_.lock) 825 826 // Compute the next GC goal, which is when the allocated heap 827 // has grown by GOGC/100 over the heap marked by the last 828 // cycle. 829 goal := ^uint64(0) 830 if gcpercent >= 0 { 831 goal = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 832 } 833 834 // Set the trigger ratio, capped to reasonable bounds. 835 if gcpercent >= 0 { 836 scalingFactor := float64(gcpercent) / 100 837 // Ensure there's always a little margin so that the 838 // mutator assist ratio isn't infinity. 839 maxTriggerRatio := 0.95 * scalingFactor 840 if triggerRatio > maxTriggerRatio { 841 triggerRatio = maxTriggerRatio 842 } 843 844 // If we let triggerRatio go too low, then if the application 845 // is allocating very rapidly we might end up in a situation 846 // where we're allocating black during a nearly always-on GC. 847 // The result of this is a growing heap and ultimately an 848 // increase in RSS. By capping us at a point >0, we're essentially 849 // saying that we're OK using more CPU during the GC to prevent 850 // this growth in RSS. 851 // 852 // The current constant was chosen empirically: given a sufficiently 853 // fast/scalable allocator with 48 Ps that could drive the trigger ratio 854 // to <0.05, this constant causes applications to retain the same peak 855 // RSS compared to not having this allocator. 856 minTriggerRatio := 0.6 * scalingFactor 857 if triggerRatio < minTriggerRatio { 858 triggerRatio = minTriggerRatio 859 } 860 } else if triggerRatio < 0 { 861 // gcpercent < 0, so just make sure we're not getting a negative 862 // triggerRatio. This case isn't expected to happen in practice, 863 // and doesn't really matter because if gcpercent < 0 then we won't 864 // ever consume triggerRatio further on in this function, but let's 865 // just be defensive here; the triggerRatio being negative is almost 866 // certainly undesirable. 867 triggerRatio = 0 868 } 869 memstats.triggerRatio = triggerRatio 870 871 // Compute the absolute GC trigger from the trigger ratio. 872 // 873 // We trigger the next GC cycle when the allocated heap has 874 // grown by the trigger ratio over the marked heap size. 875 trigger := ^uint64(0) 876 if gcpercent >= 0 { 877 trigger = uint64(float64(memstats.heap_marked) * (1 + triggerRatio)) 878 // Don't trigger below the minimum heap size. 879 minTrigger := heapminimum 880 if !isSweepDone() { 881 // Concurrent sweep happens in the heap growth 882 // from heap_live to gc_trigger, so ensure 883 // that concurrent sweep has some heap growth 884 // in which to perform sweeping before we 885 // start the next GC cycle. 886 sweepMin := atomic.Load64(&memstats.heap_live) + sweepMinHeapDistance 887 if sweepMin > minTrigger { 888 minTrigger = sweepMin 889 } 890 } 891 if trigger < minTrigger { 892 trigger = minTrigger 893 } 894 if int64(trigger) < 0 { 895 print("runtime: next_gc=", memstats.next_gc, " heap_marked=", memstats.heap_marked, " heap_live=", memstats.heap_live, " initialHeapLive=", work.initialHeapLive, "triggerRatio=", triggerRatio, " minTrigger=", minTrigger, "\n") 896 throw("gc_trigger underflow") 897 } 898 if trigger > goal { 899 // The trigger ratio is always less than GOGC/100, but 900 // other bounds on the trigger may have raised it. 901 // Push up the goal, too. 902 goal = trigger 903 } 904 } 905 906 // Commit to the trigger and goal. 907 memstats.gc_trigger = trigger 908 atomic.Store64(&memstats.next_gc, goal) 909 if trace.enabled { 910 traceNextGC() 911 } 912 913 // Update mark pacing. 914 if gcphase != _GCoff { 915 gcController.revise() 916 } 917 918 // Update sweep pacing. 919 if isSweepDone() { 920 mheap_.sweepPagesPerByte = 0 921 } else { 922 // Concurrent sweep needs to sweep all of the in-use 923 // pages by the time the allocated heap reaches the GC 924 // trigger. Compute the ratio of in-use pages to sweep 925 // per byte allocated, accounting for the fact that 926 // some might already be swept. 927 heapLiveBasis := atomic.Load64(&memstats.heap_live) 928 heapDistance := int64(trigger) - int64(heapLiveBasis) 929 // Add a little margin so rounding errors and 930 // concurrent sweep are less likely to leave pages 931 // unswept when GC starts. 932 heapDistance -= 1024 * 1024 933 if heapDistance < _PageSize { 934 // Avoid setting the sweep ratio extremely high 935 heapDistance = _PageSize 936 } 937 pagesSwept := atomic.Load64(&mheap_.pagesSwept) 938 pagesInUse := atomic.Load64(&mheap_.pagesInUse) 939 sweepDistancePages := int64(pagesInUse) - int64(pagesSwept) 940 if sweepDistancePages <= 0 { 941 mheap_.sweepPagesPerByte = 0 942 } else { 943 mheap_.sweepPagesPerByte = float64(sweepDistancePages) / float64(heapDistance) 944 mheap_.sweepHeapLiveBasis = heapLiveBasis 945 // Write pagesSweptBasis last, since this 946 // signals concurrent sweeps to recompute 947 // their debt. 948 atomic.Store64(&mheap_.pagesSweptBasis, pagesSwept) 949 } 950 } 951 952 gcPaceScavenger() 953 } 954 955 // gcEffectiveGrowthRatio returns the current effective heap growth 956 // ratio (GOGC/100) based on heap_marked from the previous GC and 957 // next_gc for the current GC. 958 // 959 // This may differ from gcpercent/100 because of various upper and 960 // lower bounds on gcpercent. For example, if the heap is smaller than 961 // heapminimum, this can be higher than gcpercent/100. 962 // 963 // mheap_.lock must be held or the world must be stopped. 964 func gcEffectiveGrowthRatio() float64 { 965 assertWorldStoppedOrLockHeld(&mheap_.lock) 966 967 egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked) 968 if egogc < 0 { 969 // Shouldn't happen, but just in case. 970 egogc = 0 971 } 972 return egogc 973 } 974 975 // gcGoalUtilization is the goal CPU utilization for 976 // marking as a fraction of GOMAXPROCS. 977 const gcGoalUtilization = 0.30 978 979 // gcBackgroundUtilization is the fixed CPU utilization for background 980 // marking. It must be <= gcGoalUtilization. The difference between 981 // gcGoalUtilization and gcBackgroundUtilization will be made up by 982 // mark assists. The scheduler will aim to use within 50% of this 983 // goal. 984 // 985 // Setting this to < gcGoalUtilization avoids saturating the trigger 986 // feedback controller when there are no assists, which allows it to 987 // better control CPU and heap growth. However, the larger the gap, 988 // the more mutator assists are expected to happen, which impact 989 // mutator latency. 990 const gcBackgroundUtilization = 0.25 991 992 // gcCreditSlack is the amount of scan work credit that can 993 // accumulate locally before updating gcController.scanWork and, 994 // optionally, gcController.bgScanCredit. Lower values give a more 995 // accurate assist ratio and make it more likely that assists will 996 // successfully steal background credit. Higher values reduce memory 997 // contention. 998 const gcCreditSlack = 2000 999 1000 // gcAssistTimeSlack is the nanoseconds of mutator assist time that 1001 // can accumulate on a P before updating gcController.assistTime. 1002 const gcAssistTimeSlack = 5000 1003 1004 // gcOverAssistWork determines how many extra units of scan work a GC 1005 // assist does when an assist happens. This amortizes the cost of an 1006 // assist by pre-paying for this many bytes of future allocations. 1007 const gcOverAssistWork = 64 << 10 1008 1009 var work struct { 1010 full lfstack // lock-free list of full blocks workbuf 1011 empty lfstack // lock-free list of empty blocks workbuf 1012 pad0 cpu.CacheLinePad // prevents false-sharing between full/empty and nproc/nwait 1013 1014 wbufSpans struct { 1015 lock mutex 1016 // free is a list of spans dedicated to workbufs, but 1017 // that don't currently contain any workbufs. 1018 free mSpanList 1019 // busy is a list of all spans containing workbufs on 1020 // one of the workbuf lists. 1021 busy mSpanList 1022 } 1023 1024 // Restore 64-bit alignment on 32-bit. 1025 _ uint32 1026 1027 // bytesMarked is the number of bytes marked this cycle. This 1028 // includes bytes blackened in scanned objects, noscan objects 1029 // that go straight to black, and permagrey objects scanned by 1030 // markroot during the concurrent scan phase. This is updated 1031 // atomically during the cycle. Updates may be batched 1032 // arbitrarily, since the value is only read at the end of the 1033 // cycle. 1034 // 1035 // Because of benign races during marking, this number may not 1036 // be the exact number of marked bytes, but it should be very 1037 // close. 1038 // 1039 // Put this field here because it needs 64-bit atomic access 1040 // (and thus 8-byte alignment even on 32-bit architectures). 1041 bytesMarked uint64 1042 1043 markrootNext uint32 // next markroot job 1044 markrootJobs uint32 // number of markroot jobs 1045 1046 nproc uint32 1047 tstart int64 1048 nwait uint32 1049 1050 // Number of roots of various root types. Set by gcMarkRootPrepare. 1051 nFlushCacheRoots int 1052 nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int 1053 1054 // Each type of GC state transition is protected by a lock. 1055 // Since multiple threads can simultaneously detect the state 1056 // transition condition, any thread that detects a transition 1057 // condition must acquire the appropriate transition lock, 1058 // re-check the transition condition and return if it no 1059 // longer holds or perform the transition if it does. 1060 // Likewise, any transition must invalidate the transition 1061 // condition before releasing the lock. This ensures that each 1062 // transition is performed by exactly one thread and threads 1063 // that need the transition to happen block until it has 1064 // happened. 1065 // 1066 // startSema protects the transition from "off" to mark or 1067 // mark termination. 1068 startSema uint32 1069 // markDoneSema protects transitions from mark to mark termination. 1070 markDoneSema uint32 1071 1072 bgMarkReady note // signal background mark worker has started 1073 bgMarkDone uint32 // cas to 1 when at a background mark completion point 1074 // Background mark completion signaling 1075 1076 // mode is the concurrency mode of the current GC cycle. 1077 mode gcMode 1078 1079 // userForced indicates the current GC cycle was forced by an 1080 // explicit user call. 1081 userForced bool 1082 1083 // totaltime is the CPU nanoseconds spent in GC since the 1084 // program started if debug.gctrace > 0. 1085 totaltime int64 1086 1087 // initialHeapLive is the value of memstats.heap_live at the 1088 // beginning of this GC cycle. 1089 initialHeapLive uint64 1090 1091 // assistQueue is a queue of assists that are blocked because 1092 // there was neither enough credit to steal or enough work to 1093 // do. 1094 assistQueue struct { 1095 lock mutex 1096 q gQueue 1097 } 1098 1099 // sweepWaiters is a list of blocked goroutines to wake when 1100 // we transition from mark termination to sweep. 1101 sweepWaiters struct { 1102 lock mutex 1103 list gList 1104 } 1105 1106 // cycles is the number of completed GC cycles, where a GC 1107 // cycle is sweep termination, mark, mark termination, and 1108 // sweep. This differs from memstats.numgc, which is 1109 // incremented at mark termination. 1110 cycles uint32 1111 1112 // Timing/utilization stats for this cycle. 1113 stwprocs, maxprocs int32 1114 tSweepTerm, tMark, tMarkTerm, tEnd int64 // nanotime() of phase start 1115 1116 pauseNS int64 // total STW time this cycle 1117 pauseStart int64 // nanotime() of last STW 1118 1119 // debug.gctrace heap sizes for this cycle. 1120 heap0, heap1, heap2, heapGoal uint64 1121 } 1122 1123 // GC runs a garbage collection and blocks the caller until the 1124 // garbage collection is complete. It may also block the entire 1125 // program. 1126 func GC() { 1127 // We consider a cycle to be: sweep termination, mark, mark 1128 // termination, and sweep. This function shouldn't return 1129 // until a full cycle has been completed, from beginning to 1130 // end. Hence, we always want to finish up the current cycle 1131 // and start a new one. That means: 1132 // 1133 // 1. In sweep termination, mark, or mark termination of cycle 1134 // N, wait until mark termination N completes and transitions 1135 // to sweep N. 1136 // 1137 // 2. In sweep N, help with sweep N. 1138 // 1139 // At this point we can begin a full cycle N+1. 1140 // 1141 // 3. Trigger cycle N+1 by starting sweep termination N+1. 1142 // 1143 // 4. Wait for mark termination N+1 to complete. 1144 // 1145 // 5. Help with sweep N+1 until it's done. 1146 // 1147 // This all has to be written to deal with the fact that the 1148 // GC may move ahead on its own. For example, when we block 1149 // until mark termination N, we may wake up in cycle N+2. 1150 1151 // Wait until the current sweep termination, mark, and mark 1152 // termination complete. 1153 n := atomic.Load(&work.cycles) 1154 gcWaitOnMark(n) 1155 1156 // We're now in sweep N or later. Trigger GC cycle N+1, which 1157 // will first finish sweep N if necessary and then enter sweep 1158 // termination N+1. 1159 gcStart(gcTrigger{kind: gcTriggerCycle, n: n + 1}) 1160 1161 // Wait for mark termination N+1 to complete. 1162 gcWaitOnMark(n + 1) 1163 1164 // Finish sweep N+1 before returning. We do this both to 1165 // complete the cycle and because runtime.GC() is often used 1166 // as part of tests and benchmarks to get the system into a 1167 // relatively stable and isolated state. 1168 for atomic.Load(&work.cycles) == n+1 && sweepone() != ^uintptr(0) { 1169 sweep.nbgsweep++ 1170 Gosched() 1171 } 1172 1173 // Callers may assume that the heap profile reflects the 1174 // just-completed cycle when this returns (historically this 1175 // happened because this was a STW GC), but right now the 1176 // profile still reflects mark termination N, not N+1. 1177 // 1178 // As soon as all of the sweep frees from cycle N+1 are done, 1179 // we can go ahead and publish the heap profile. 1180 // 1181 // First, wait for sweeping to finish. (We know there are no 1182 // more spans on the sweep queue, but we may be concurrently 1183 // sweeping spans, so we have to wait.) 1184 for atomic.Load(&work.cycles) == n+1 && atomic.Load(&mheap_.sweepers) != 0 { 1185 Gosched() 1186 } 1187 1188 // Now we're really done with sweeping, so we can publish the 1189 // stable heap profile. Only do this if we haven't already hit 1190 // another mark termination. 1191 mp := acquirem() 1192 cycle := atomic.Load(&work.cycles) 1193 if cycle == n+1 || (gcphase == _GCmark && cycle == n+2) { 1194 mProf_PostSweep() 1195 } 1196 releasem(mp) 1197 } 1198 1199 // gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has 1200 // already completed this mark phase, it returns immediately. 1201 func gcWaitOnMark(n uint32) { 1202 for { 1203 // Disable phase transitions. 1204 lock(&work.sweepWaiters.lock) 1205 nMarks := atomic.Load(&work.cycles) 1206 if gcphase != _GCmark { 1207 // We've already completed this cycle's mark. 1208 nMarks++ 1209 } 1210 if nMarks > n { 1211 // We're done. 1212 unlock(&work.sweepWaiters.lock) 1213 return 1214 } 1215 1216 // Wait until sweep termination, mark, and mark 1217 // termination of cycle N complete. 1218 work.sweepWaiters.list.push(getg()) 1219 goparkunlock(&work.sweepWaiters.lock, waitReasonWaitForGCCycle, traceEvGoBlock, 1) 1220 } 1221 } 1222 1223 // gcMode indicates how concurrent a GC cycle should be. 1224 type gcMode int 1225 1226 const ( 1227 gcBackgroundMode gcMode = iota // concurrent GC and sweep 1228 gcForceMode // stop-the-world GC now, concurrent sweep 1229 gcForceBlockMode // stop-the-world GC now and STW sweep (forced by user) 1230 ) 1231 1232 // A gcTrigger is a predicate for starting a GC cycle. Specifically, 1233 // it is an exit condition for the _GCoff phase. 1234 type gcTrigger struct { 1235 kind gcTriggerKind 1236 now int64 // gcTriggerTime: current time 1237 n uint32 // gcTriggerCycle: cycle number to start 1238 } 1239 1240 type gcTriggerKind int 1241 1242 const ( 1243 // gcTriggerHeap indicates that a cycle should be started when 1244 // the heap size reaches the trigger heap size computed by the 1245 // controller. 1246 gcTriggerHeap gcTriggerKind = iota 1247 1248 // gcTriggerTime indicates that a cycle should be started when 1249 // it's been more than forcegcperiod nanoseconds since the 1250 // previous GC cycle. 1251 gcTriggerTime 1252 1253 // gcTriggerCycle indicates that a cycle should be started if 1254 // we have not yet started cycle number gcTrigger.n (relative 1255 // to work.cycles). 1256 gcTriggerCycle 1257 ) 1258 1259 // test reports whether the trigger condition is satisfied, meaning 1260 // that the exit condition for the _GCoff phase has been met. The exit 1261 // condition should be tested when allocating. 1262 func (t gcTrigger) test() bool { 1263 if !memstats.enablegc || panicking != 0 || gcphase != _GCoff { 1264 return false 1265 } 1266 switch t.kind { 1267 case gcTriggerHeap: 1268 // Non-atomic access to heap_live for performance. If 1269 // we are going to trigger on this, this thread just 1270 // atomically wrote heap_live anyway and we'll see our 1271 // own write. 1272 return memstats.heap_live >= memstats.gc_trigger 1273 case gcTriggerTime: 1274 if gcpercent < 0 { 1275 return false 1276 } 1277 lastgc := int64(atomic.Load64(&memstats.last_gc_nanotime)) 1278 return lastgc != 0 && t.now-lastgc > forcegcperiod 1279 case gcTriggerCycle: 1280 // t.n > work.cycles, but accounting for wraparound. 1281 return int32(t.n-work.cycles) > 0 1282 } 1283 return true 1284 } 1285 1286 // gcStart starts the GC. It transitions from _GCoff to _GCmark (if 1287 // debug.gcstoptheworld == 0) or performs all of GC (if 1288 // debug.gcstoptheworld != 0). 1289 // 1290 // This may return without performing this transition in some cases, 1291 // such as when called on a system stack or with locks held. 1292 func gcStart(trigger gcTrigger) { 1293 // Since this is called from malloc and malloc is called in 1294 // the guts of a number of libraries that might be holding 1295 // locks, don't attempt to start GC in non-preemptible or 1296 // potentially unstable situations. 1297 mp := acquirem() 1298 if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" { 1299 releasem(mp) 1300 return 1301 } 1302 releasem(mp) 1303 mp = nil 1304 1305 // Pick up the remaining unswept/not being swept spans concurrently 1306 // 1307 // This shouldn't happen if we're being invoked in background 1308 // mode since proportional sweep should have just finished 1309 // sweeping everything, but rounding errors, etc, may leave a 1310 // few spans unswept. In forced mode, this is necessary since 1311 // GC can be forced at any point in the sweeping cycle. 1312 // 1313 // We check the transition condition continuously here in case 1314 // this G gets delayed in to the next GC cycle. 1315 for trigger.test() && sweepone() != ^uintptr(0) { 1316 sweep.nbgsweep++ 1317 } 1318 1319 // Perform GC initialization and the sweep termination 1320 // transition. 1321 semacquire(&work.startSema) 1322 // Re-check transition condition under transition lock. 1323 if !trigger.test() { 1324 semrelease(&work.startSema) 1325 return 1326 } 1327 1328 // For stats, check if this GC was forced by the user. 1329 work.userForced = trigger.kind == gcTriggerCycle 1330 1331 // In gcstoptheworld debug mode, upgrade the mode accordingly. 1332 // We do this after re-checking the transition condition so 1333 // that multiple goroutines that detect the heap trigger don't 1334 // start multiple STW GCs. 1335 mode := gcBackgroundMode 1336 if debug.gcstoptheworld == 1 { 1337 mode = gcForceMode 1338 } else if debug.gcstoptheworld == 2 { 1339 mode = gcForceBlockMode 1340 } 1341 1342 // Ok, we're doing it! Stop everybody else 1343 semacquire(&gcsema) 1344 semacquire(&worldsema) 1345 1346 if trace.enabled { 1347 traceGCStart() 1348 } 1349 1350 // Check that all Ps have finished deferred mcache flushes. 1351 for _, p := range allp { 1352 if fg := atomic.Load(&p.mcache.flushGen); fg != mheap_.sweepgen { 1353 println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen) 1354 throw("p mcache not flushed") 1355 } 1356 } 1357 1358 gcBgMarkStartWorkers() 1359 1360 systemstack(gcResetMarkState) 1361 1362 work.stwprocs, work.maxprocs = gomaxprocs, gomaxprocs 1363 if work.stwprocs > ncpu { 1364 // This is used to compute CPU time of the STW phases, 1365 // so it can't be more than ncpu, even if GOMAXPROCS is. 1366 work.stwprocs = ncpu 1367 } 1368 work.heap0 = atomic.Load64(&memstats.heap_live) 1369 work.pauseNS = 0 1370 work.mode = mode 1371 1372 now := nanotime() 1373 work.tSweepTerm = now 1374 work.pauseStart = now 1375 if trace.enabled { 1376 traceGCSTWStart(1) 1377 } 1378 systemstack(stopTheWorldWithSema) 1379 // Finish sweep before we start concurrent scan. 1380 systemstack(func() { 1381 finishsweep_m() 1382 }) 1383 1384 // clearpools before we start the GC. If we wait they memory will not be 1385 // reclaimed until the next GC cycle. 1386 clearpools() 1387 1388 work.cycles++ 1389 1390 gcController.startCycle() 1391 work.heapGoal = memstats.next_gc 1392 1393 // In STW mode, disable scheduling of user Gs. This may also 1394 // disable scheduling of this goroutine, so it may block as 1395 // soon as we start the world again. 1396 if mode != gcBackgroundMode { 1397 schedEnableUser(false) 1398 } 1399 1400 // Enter concurrent mark phase and enable 1401 // write barriers. 1402 // 1403 // Because the world is stopped, all Ps will 1404 // observe that write barriers are enabled by 1405 // the time we start the world and begin 1406 // scanning. 1407 // 1408 // Write barriers must be enabled before assists are 1409 // enabled because they must be enabled before 1410 // any non-leaf heap objects are marked. Since 1411 // allocations are blocked until assists can 1412 // happen, we want enable assists as early as 1413 // possible. 1414 setGCPhase(_GCmark) 1415 1416 gcBgMarkPrepare() // Must happen before assist enable. 1417 gcMarkRootPrepare() 1418 1419 // Mark all active tinyalloc blocks. Since we're 1420 // allocating from these, they need to be black like 1421 // other allocations. The alternative is to blacken 1422 // the tiny block on every allocation from it, which 1423 // would slow down the tiny allocator. 1424 gcMarkTinyAllocs() 1425 1426 // At this point all Ps have enabled the write 1427 // barrier, thus maintaining the no white to 1428 // black invariant. Enable mutator assists to 1429 // put back-pressure on fast allocating 1430 // mutators. 1431 atomic.Store(&gcBlackenEnabled, 1) 1432 1433 // Assists and workers can start the moment we start 1434 // the world. 1435 gcController.markStartTime = now 1436 1437 // In STW mode, we could block the instant systemstack 1438 // returns, so make sure we're not preemptible. 1439 mp = acquirem() 1440 1441 // Concurrent mark. 1442 systemstack(func() { 1443 now = startTheWorldWithSema(trace.enabled) 1444 work.pauseNS += now - work.pauseStart 1445 work.tMark = now 1446 memstats.gcPauseDist.record(now - work.pauseStart) 1447 }) 1448 1449 // Release the world sema before Gosched() in STW mode 1450 // because we will need to reacquire it later but before 1451 // this goroutine becomes runnable again, and we could 1452 // self-deadlock otherwise. 1453 semrelease(&worldsema) 1454 releasem(mp) 1455 1456 // Make sure we block instead of returning to user code 1457 // in STW mode. 1458 if mode != gcBackgroundMode { 1459 Gosched() 1460 } 1461 1462 semrelease(&work.startSema) 1463 } 1464 1465 // gcMarkDoneFlushed counts the number of P's with flushed work. 1466 // 1467 // Ideally this would be a captured local in gcMarkDone, but forEachP 1468 // escapes its callback closure, so it can't capture anything. 1469 // 1470 // This is protected by markDoneSema. 1471 var gcMarkDoneFlushed uint32 1472 1473 // gcMarkDone transitions the GC from mark to mark termination if all 1474 // reachable objects have been marked (that is, there are no grey 1475 // objects and can be no more in the future). Otherwise, it flushes 1476 // all local work to the global queues where it can be discovered by 1477 // other workers. 1478 // 1479 // This should be called when all local mark work has been drained and 1480 // there are no remaining workers. Specifically, when 1481 // 1482 // work.nwait == work.nproc && !gcMarkWorkAvailable(p) 1483 // 1484 // The calling context must be preemptible. 1485 // 1486 // Flushing local work is important because idle Ps may have local 1487 // work queued. This is the only way to make that work visible and 1488 // drive GC to completion. 1489 // 1490 // It is explicitly okay to have write barriers in this function. If 1491 // it does transition to mark termination, then all reachable objects 1492 // have been marked, so the write barrier cannot shade any more 1493 // objects. 1494 func gcMarkDone() { 1495 // Ensure only one thread is running the ragged barrier at a 1496 // time. 1497 semacquire(&work.markDoneSema) 1498 1499 top: 1500 // Re-check transition condition under transition lock. 1501 // 1502 // It's critical that this checks the global work queues are 1503 // empty before performing the ragged barrier. Otherwise, 1504 // there could be global work that a P could take after the P 1505 // has passed the ragged barrier. 1506 if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) { 1507 semrelease(&work.markDoneSema) 1508 return 1509 } 1510 1511 // forEachP needs worldsema to execute, and we'll need it to 1512 // stop the world later, so acquire worldsema now. 1513 semacquire(&worldsema) 1514 1515 // Flush all local buffers and collect flushedWork flags. 1516 gcMarkDoneFlushed = 0 1517 systemstack(func() { 1518 gp := getg().m.curg 1519 // Mark the user stack as preemptible so that it may be scanned. 1520 // Otherwise, our attempt to force all P's to a safepoint could 1521 // result in a deadlock as we attempt to preempt a worker that's 1522 // trying to preempt us (e.g. for a stack scan). 1523 casgstatus(gp, _Grunning, _Gwaiting) 1524 forEachP(func(_p_ *p) { 1525 // Flush the write barrier buffer, since this may add 1526 // work to the gcWork. 1527 wbBufFlush1(_p_) 1528 1529 // Flush the gcWork, since this may create global work 1530 // and set the flushedWork flag. 1531 // 1532 // TODO(austin): Break up these workbufs to 1533 // better distribute work. 1534 _p_.gcw.dispose() 1535 // Collect the flushedWork flag. 1536 if _p_.gcw.flushedWork { 1537 atomic.Xadd(&gcMarkDoneFlushed, 1) 1538 _p_.gcw.flushedWork = false 1539 } 1540 }) 1541 casgstatus(gp, _Gwaiting, _Grunning) 1542 }) 1543 1544 if gcMarkDoneFlushed != 0 { 1545 // More grey objects were discovered since the 1546 // previous termination check, so there may be more 1547 // work to do. Keep going. It's possible the 1548 // transition condition became true again during the 1549 // ragged barrier, so re-check it. 1550 semrelease(&worldsema) 1551 goto top 1552 } 1553 1554 // There was no global work, no local work, and no Ps 1555 // communicated work since we took markDoneSema. Therefore 1556 // there are no grey objects and no more objects can be 1557 // shaded. Transition to mark termination. 1558 now := nanotime() 1559 work.tMarkTerm = now 1560 work.pauseStart = now 1561 getg().m.preemptoff = "gcing" 1562 if trace.enabled { 1563 traceGCSTWStart(0) 1564 } 1565 systemstack(stopTheWorldWithSema) 1566 // The gcphase is _GCmark, it will transition to _GCmarktermination 1567 // below. The important thing is that the wb remains active until 1568 // all marking is complete. This includes writes made by the GC. 1569 1570 // There is sometimes work left over when we enter mark termination due 1571 // to write barriers performed after the completion barrier above. 1572 // Detect this and resume concurrent mark. This is obviously 1573 // unfortunate. 1574 // 1575 // See issue #27993 for details. 1576 // 1577 // Switch to the system stack to call wbBufFlush1, though in this case 1578 // it doesn't matter because we're non-preemptible anyway. 1579 restart := false 1580 systemstack(func() { 1581 for _, p := range allp { 1582 wbBufFlush1(p) 1583 if !p.gcw.empty() { 1584 restart = true 1585 break 1586 } 1587 } 1588 }) 1589 if restart { 1590 getg().m.preemptoff = "" 1591 systemstack(func() { 1592 now := startTheWorldWithSema(true) 1593 work.pauseNS += now - work.pauseStart 1594 memstats.gcPauseDist.record(now - work.pauseStart) 1595 }) 1596 semrelease(&worldsema) 1597 goto top 1598 } 1599 1600 // Disable assists and background workers. We must do 1601 // this before waking blocked assists. 1602 atomic.Store(&gcBlackenEnabled, 0) 1603 1604 // Wake all blocked assists. These will run when we 1605 // start the world again. 1606 gcWakeAllAssists() 1607 1608 // Likewise, release the transition lock. Blocked 1609 // workers and assists will run when we start the 1610 // world again. 1611 semrelease(&work.markDoneSema) 1612 1613 // In STW mode, re-enable user goroutines. These will be 1614 // queued to run after we start the world. 1615 schedEnableUser(true) 1616 1617 // endCycle depends on all gcWork cache stats being flushed. 1618 // The termination algorithm above ensured that up to 1619 // allocations since the ragged barrier. 1620 nextTriggerRatio := gcController.endCycle() 1621 1622 // Perform mark termination. This will restart the world. 1623 gcMarkTermination(nextTriggerRatio) 1624 } 1625 1626 // World must be stopped and mark assists and background workers must be 1627 // disabled. 1628 func gcMarkTermination(nextTriggerRatio float64) { 1629 // Start marktermination (write barrier remains enabled for now). 1630 setGCPhase(_GCmarktermination) 1631 1632 work.heap1 = memstats.heap_live 1633 startTime := nanotime() 1634 1635 mp := acquirem() 1636 mp.preemptoff = "gcing" 1637 _g_ := getg() 1638 _g_.m.traceback = 2 1639 gp := _g_.m.curg 1640 casgstatus(gp, _Grunning, _Gwaiting) 1641 gp.waitreason = waitReasonGarbageCollection 1642 1643 // Run gc on the g0 stack. We do this so that the g stack 1644 // we're currently running on will no longer change. Cuts 1645 // the root set down a bit (g0 stacks are not scanned, and 1646 // we don't need to scan gc's internal state). We also 1647 // need to switch to g0 so we can shrink the stack. 1648 systemstack(func() { 1649 gcMark(startTime) 1650 // Must return immediately. 1651 // The outer function's stack may have moved 1652 // during gcMark (it shrinks stacks, including the 1653 // outer function's stack), so we must not refer 1654 // to any of its variables. Return back to the 1655 // non-system stack to pick up the new addresses 1656 // before continuing. 1657 }) 1658 1659 systemstack(func() { 1660 work.heap2 = work.bytesMarked 1661 if debug.gccheckmark > 0 { 1662 // Run a full non-parallel, stop-the-world 1663 // mark using checkmark bits, to check that we 1664 // didn't forget to mark anything during the 1665 // concurrent mark process. 1666 startCheckmarks() 1667 gcResetMarkState() 1668 gcw := &getg().m.p.ptr().gcw 1669 gcDrain(gcw, 0) 1670 wbBufFlush1(getg().m.p.ptr()) 1671 gcw.dispose() 1672 endCheckmarks() 1673 } 1674 1675 // marking is complete so we can turn the write barrier off 1676 setGCPhase(_GCoff) 1677 gcSweep(work.mode) 1678 }) 1679 1680 _g_.m.traceback = 0 1681 casgstatus(gp, _Gwaiting, _Grunning) 1682 1683 if trace.enabled { 1684 traceGCDone() 1685 } 1686 1687 // all done 1688 mp.preemptoff = "" 1689 1690 if gcphase != _GCoff { 1691 throw("gc done but gcphase != _GCoff") 1692 } 1693 1694 // Record next_gc and heap_inuse for scavenger. 1695 memstats.last_next_gc = memstats.next_gc 1696 memstats.last_heap_inuse = memstats.heap_inuse 1697 1698 // Update GC trigger and pacing for the next cycle. 1699 gcSetTriggerRatio(nextTriggerRatio) 1700 1701 // Update timing memstats 1702 now := nanotime() 1703 sec, nsec, _ := time_now() 1704 unixNow := sec*1e9 + int64(nsec) 1705 work.pauseNS += now - work.pauseStart 1706 work.tEnd = now 1707 memstats.gcPauseDist.record(now - work.pauseStart) 1708 atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user 1709 atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us 1710 memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) 1711 memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow) 1712 memstats.pause_total_ns += uint64(work.pauseNS) 1713 1714 // Update work.totaltime. 1715 sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm) 1716 // We report idle marking time below, but omit it from the 1717 // overall utilization here since it's "free". 1718 markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime 1719 markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm) 1720 cycleCpu := sweepTermCpu + markCpu + markTermCpu 1721 work.totaltime += cycleCpu 1722 1723 // Compute overall GC CPU utilization. 1724 totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs) 1725 memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu) 1726 1727 // Reset sweep state. 1728 sweep.nbgsweep = 0 1729 sweep.npausesweep = 0 1730 1731 if work.userForced { 1732 memstats.numforcedgc++ 1733 } 1734 1735 // Bump GC cycle count and wake goroutines waiting on sweep. 1736 lock(&work.sweepWaiters.lock) 1737 memstats.numgc++ 1738 injectglist(&work.sweepWaiters.list) 1739 unlock(&work.sweepWaiters.lock) 1740 1741 // Finish the current heap profiling cycle and start a new 1742 // heap profiling cycle. We do this before starting the world 1743 // so events don't leak into the wrong cycle. 1744 mProf_NextCycle() 1745 1746 systemstack(func() { startTheWorldWithSema(true) }) 1747 1748 // Flush the heap profile so we can start a new cycle next GC. 1749 // This is relatively expensive, so we don't do it with the 1750 // world stopped. 1751 mProf_Flush() 1752 1753 // Prepare workbufs for freeing by the sweeper. We do this 1754 // asynchronously because it can take non-trivial time. 1755 prepareFreeWorkbufs() 1756 1757 // Free stack spans. This must be done between GC cycles. 1758 systemstack(freeStackSpans) 1759 1760 // Ensure all mcaches are flushed. Each P will flush its own 1761 // mcache before allocating, but idle Ps may not. Since this 1762 // is necessary to sweep all spans, we need to ensure all 1763 // mcaches are flushed before we start the next GC cycle. 1764 systemstack(func() { 1765 forEachP(func(_p_ *p) { 1766 _p_.mcache.prepareForSweep() 1767 }) 1768 }) 1769 1770 // Print gctrace before dropping worldsema. As soon as we drop 1771 // worldsema another cycle could start and smash the stats 1772 // we're trying to print. 1773 if debug.gctrace > 0 { 1774 util := int(memstats.gc_cpu_fraction * 100) 1775 1776 var sbuf [24]byte 1777 printlock() 1778 print("gc ", memstats.numgc, 1779 " @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", 1780 util, "%: ") 1781 prev := work.tSweepTerm 1782 for i, ns := range []int64{work.tMark, work.tMarkTerm, work.tEnd} { 1783 if i != 0 { 1784 print("+") 1785 } 1786 print(string(fmtNSAsMS(sbuf[:], uint64(ns-prev)))) 1787 prev = ns 1788 } 1789 print(" ms clock, ") 1790 for i, ns := range []int64{sweepTermCpu, gcController.assistTime, gcController.dedicatedMarkTime + gcController.fractionalMarkTime, gcController.idleMarkTime, markTermCpu} { 1791 if i == 2 || i == 3 { 1792 // Separate mark time components with /. 1793 print("/") 1794 } else if i != 0 { 1795 print("+") 1796 } 1797 print(string(fmtNSAsMS(sbuf[:], uint64(ns)))) 1798 } 1799 print(" ms cpu, ", 1800 work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ", 1801 work.heapGoal>>20, " MB goal, ", 1802 work.maxprocs, " P") 1803 if work.userForced { 1804 print(" (forced)") 1805 } 1806 print("\n") 1807 printunlock() 1808 } 1809 1810 semrelease(&worldsema) 1811 semrelease(&gcsema) 1812 // Careful: another GC cycle may start now. 1813 1814 releasem(mp) 1815 mp = nil 1816 1817 // now that gc is done, kick off finalizer thread if needed 1818 if !concurrentSweep { 1819 // give the queued finalizers, if any, a chance to run 1820 Gosched() 1821 } 1822 } 1823 1824 // gcBgMarkStartWorkers prepares background mark worker goroutines. These 1825 // goroutines will not run until the mark phase, but they must be started while 1826 // the work is not stopped and from a regular G stack. The caller must hold 1827 // worldsema. 1828 func gcBgMarkStartWorkers() { 1829 // Background marking is performed by per-P G's. Ensure that each P has 1830 // a background GC G. 1831 // 1832 // Worker Gs don't exit if gomaxprocs is reduced. If it is raised 1833 // again, we can reuse the old workers; no need to create new workers. 1834 for gcBgMarkWorkerCount < gomaxprocs { 1835 go gcBgMarkWorker() 1836 1837 notetsleepg(&work.bgMarkReady, -1) 1838 noteclear(&work.bgMarkReady) 1839 // The worker is now guaranteed to be added to the pool before 1840 // its P's next findRunnableGCWorker. 1841 1842 gcBgMarkWorkerCount++ 1843 } 1844 } 1845 1846 // gcBgMarkPrepare sets up state for background marking. 1847 // Mutator assists must not yet be enabled. 1848 func gcBgMarkPrepare() { 1849 // Background marking will stop when the work queues are empty 1850 // and there are no more workers (note that, since this is 1851 // concurrent, this may be a transient state, but mark 1852 // termination will clean it up). Between background workers 1853 // and assists, we don't really know how many workers there 1854 // will be, so we pretend to have an arbitrarily large number 1855 // of workers, almost all of which are "waiting". While a 1856 // worker is working it decrements nwait. If nproc == nwait, 1857 // there are no workers. 1858 work.nproc = ^uint32(0) 1859 work.nwait = ^uint32(0) 1860 } 1861 1862 // gcBgMarkWorker is an entry in the gcBgMarkWorkerPool. It points to a single 1863 // gcBgMarkWorker goroutine. 1864 type gcBgMarkWorkerNode struct { 1865 // Unused workers are managed in a lock-free stack. This field must be first. 1866 node lfnode 1867 1868 // The g of this worker. 1869 gp guintptr 1870 1871 // Release this m on park. This is used to communicate with the unlock 1872 // function, which cannot access the G's stack. It is unused outside of 1873 // gcBgMarkWorker(). 1874 m muintptr 1875 } 1876 1877 func gcBgMarkWorker() { 1878 gp := getg() 1879 1880 // We pass node to a gopark unlock function, so it can't be on 1881 // the stack (see gopark). Prevent deadlock from recursively 1882 // starting GC by disabling preemption. 1883 gp.m.preemptoff = "GC worker init" 1884 node := new(gcBgMarkWorkerNode) 1885 gp.m.preemptoff = "" 1886 1887 node.gp.set(gp) 1888 1889 node.m.set(acquirem()) 1890 notewakeup(&work.bgMarkReady) 1891 // After this point, the background mark worker is generally scheduled 1892 // cooperatively by gcController.findRunnableGCWorker. While performing 1893 // work on the P, preemption is disabled because we are working on 1894 // P-local work buffers. When the preempt flag is set, this puts itself 1895 // into _Gwaiting to be woken up by gcController.findRunnableGCWorker 1896 // at the appropriate time. 1897 // 1898 // When preemption is enabled (e.g., while in gcMarkDone), this worker 1899 // may be preempted and schedule as a _Grunnable G from a runq. That is 1900 // fine; it will eventually gopark again for further scheduling via 1901 // findRunnableGCWorker. 1902 // 1903 // Since we disable preemption before notifying bgMarkReady, we 1904 // guarantee that this G will be in the worker pool for the next 1905 // findRunnableGCWorker. This isn't strictly necessary, but it reduces 1906 // latency between _GCmark starting and the workers starting. 1907 1908 for { 1909 // Go to sleep until woken by 1910 // gcController.findRunnableGCWorker. 1911 gopark(func(g *g, nodep unsafe.Pointer) bool { 1912 node := (*gcBgMarkWorkerNode)(nodep) 1913 1914 if mp := node.m.ptr(); mp != nil { 1915 // The worker G is no longer running; release 1916 // the M. 1917 // 1918 // N.B. it is _safe_ to release the M as soon 1919 // as we are no longer performing P-local mark 1920 // work. 1921 // 1922 // However, since we cooperatively stop work 1923 // when gp.preempt is set, if we releasem in 1924 // the loop then the following call to gopark 1925 // would immediately preempt the G. This is 1926 // also safe, but inefficient: the G must 1927 // schedule again only to enter gopark and park 1928 // again. Thus, we defer the release until 1929 // after parking the G. 1930 releasem(mp) 1931 } 1932 1933 // Release this G to the pool. 1934 gcBgMarkWorkerPool.push(&node.node) 1935 // Note that at this point, the G may immediately be 1936 // rescheduled and may be running. 1937 return true 1938 }, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0) 1939 1940 // Preemption must not occur here, or another G might see 1941 // p.gcMarkWorkerMode. 1942 1943 // Disable preemption so we can use the gcw. If the 1944 // scheduler wants to preempt us, we'll stop draining, 1945 // dispose the gcw, and then preempt. 1946 node.m.set(acquirem()) 1947 pp := gp.m.p.ptr() // P can't change with preemption disabled. 1948 1949 if gcBlackenEnabled == 0 { 1950 println("worker mode", pp.gcMarkWorkerMode) 1951 throw("gcBgMarkWorker: blackening not enabled") 1952 } 1953 1954 if pp.gcMarkWorkerMode == gcMarkWorkerNotWorker { 1955 throw("gcBgMarkWorker: mode not set") 1956 } 1957 1958 startTime := nanotime() 1959 pp.gcMarkWorkerStartTime = startTime 1960 1961 decnwait := atomic.Xadd(&work.nwait, -1) 1962 if decnwait == work.nproc { 1963 println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) 1964 throw("work.nwait was > work.nproc") 1965 } 1966 1967 systemstack(func() { 1968 // Mark our goroutine preemptible so its stack 1969 // can be scanned. This lets two mark workers 1970 // scan each other (otherwise, they would 1971 // deadlock). We must not modify anything on 1972 // the G stack. However, stack shrinking is 1973 // disabled for mark workers, so it is safe to 1974 // read from the G stack. 1975 casgstatus(gp, _Grunning, _Gwaiting) 1976 switch pp.gcMarkWorkerMode { 1977 default: 1978 throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") 1979 case gcMarkWorkerDedicatedMode: 1980 gcDrain(&pp.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) 1981 if gp.preempt { 1982 // We were preempted. This is 1983 // a useful signal to kick 1984 // everything out of the run 1985 // queue so it can run 1986 // somewhere else. 1987 lock(&sched.lock) 1988 for { 1989 gp, _ := runqget(pp) 1990 if gp == nil { 1991 break 1992 } 1993 globrunqput(gp) 1994 } 1995 unlock(&sched.lock) 1996 } 1997 // Go back to draining, this time 1998 // without preemption. 1999 gcDrain(&pp.gcw, gcDrainFlushBgCredit) 2000 case gcMarkWorkerFractionalMode: 2001 gcDrain(&pp.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) 2002 case gcMarkWorkerIdleMode: 2003 gcDrain(&pp.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) 2004 } 2005 casgstatus(gp, _Gwaiting, _Grunning) 2006 }) 2007 2008 // Account for time. 2009 duration := nanotime() - startTime 2010 switch pp.gcMarkWorkerMode { 2011 case gcMarkWorkerDedicatedMode: 2012 atomic.Xaddint64(&gcController.dedicatedMarkTime, duration) 2013 atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) 2014 case gcMarkWorkerFractionalMode: 2015 atomic.Xaddint64(&gcController.fractionalMarkTime, duration) 2016 atomic.Xaddint64(&pp.gcFractionalMarkTime, duration) 2017 case gcMarkWorkerIdleMode: 2018 atomic.Xaddint64(&gcController.idleMarkTime, duration) 2019 } 2020 2021 // Was this the last worker and did we run out 2022 // of work? 2023 incnwait := atomic.Xadd(&work.nwait, +1) 2024 if incnwait > work.nproc { 2025 println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode, 2026 "work.nwait=", incnwait, "work.nproc=", work.nproc) 2027 throw("work.nwait > work.nproc") 2028 } 2029 2030 // We'll releasem after this point and thus this P may run 2031 // something else. We must clear the worker mode to avoid 2032 // attributing the mode to a different (non-worker) G in 2033 // traceGoStart. 2034 pp.gcMarkWorkerMode = gcMarkWorkerNotWorker 2035 2036 // If this worker reached a background mark completion 2037 // point, signal the main GC goroutine. 2038 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { 2039 // We don't need the P-local buffers here, allow 2040 // preemption becuse we may schedule like a regular 2041 // goroutine in gcMarkDone (block on locks, etc). 2042 releasem(node.m.ptr()) 2043 node.m.set(nil) 2044 2045 gcMarkDone() 2046 } 2047 } 2048 } 2049 2050 // gcMarkWorkAvailable reports whether executing a mark worker 2051 // on p is potentially useful. p may be nil, in which case it only 2052 // checks the global sources of work. 2053 func gcMarkWorkAvailable(p *p) bool { 2054 if p != nil && !p.gcw.empty() { 2055 return true 2056 } 2057 if !work.full.empty() { 2058 return true // global work available 2059 } 2060 if work.markrootNext < work.markrootJobs { 2061 return true // root scan work available 2062 } 2063 return false 2064 } 2065 2066 // gcMark runs the mark (or, for concurrent GC, mark termination) 2067 // All gcWork caches must be empty. 2068 // STW is in effect at this point. 2069 func gcMark(start_time int64) { 2070 if debug.allocfreetrace > 0 { 2071 tracegc() 2072 } 2073 2074 if gcphase != _GCmarktermination { 2075 throw("in gcMark expecting to see gcphase as _GCmarktermination") 2076 } 2077 work.tstart = start_time 2078 2079 // Check that there's no marking work remaining. 2080 if work.full != 0 || work.markrootNext < work.markrootJobs { 2081 print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") 2082 panic("non-empty mark queue after concurrent mark") 2083 } 2084 2085 if debug.gccheckmark > 0 { 2086 // This is expensive when there's a large number of 2087 // Gs, so only do it if checkmark is also enabled. 2088 gcMarkRootCheck() 2089 } 2090 if work.full != 0 { 2091 throw("work.full != 0") 2092 } 2093 2094 // Clear out buffers and double-check that all gcWork caches 2095 // are empty. This should be ensured by gcMarkDone before we 2096 // enter mark termination. 2097 // 2098 // TODO: We could clear out buffers just before mark if this 2099 // has a non-negligible impact on STW time. 2100 for _, p := range allp { 2101 // The write barrier may have buffered pointers since 2102 // the gcMarkDone barrier. However, since the barrier 2103 // ensured all reachable objects were marked, all of 2104 // these must be pointers to black objects. Hence we 2105 // can just discard the write barrier buffer. 2106 if debug.gccheckmark > 0 { 2107 // For debugging, flush the buffer and make 2108 // sure it really was all marked. 2109 wbBufFlush1(p) 2110 } else { 2111 p.wbBuf.reset() 2112 } 2113 2114 gcw := &p.gcw 2115 if !gcw.empty() { 2116 printlock() 2117 print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) 2118 if gcw.wbuf1 == nil { 2119 print(" wbuf1=<nil>") 2120 } else { 2121 print(" wbuf1.n=", gcw.wbuf1.nobj) 2122 } 2123 if gcw.wbuf2 == nil { 2124 print(" wbuf2=<nil>") 2125 } else { 2126 print(" wbuf2.n=", gcw.wbuf2.nobj) 2127 } 2128 print("\n") 2129 throw("P has cached GC work at end of mark termination") 2130 } 2131 // There may still be cached empty buffers, which we 2132 // need to flush since we're going to free them. Also, 2133 // there may be non-zero stats because we allocated 2134 // black after the gcMarkDone barrier. 2135 gcw.dispose() 2136 } 2137 2138 // Update the marked heap stat. 2139 memstats.heap_marked = work.bytesMarked 2140 2141 // Flush scanAlloc from each mcache since we're about to modify 2142 // heap_scan directly. If we were to flush this later, then scanAlloc 2143 // might have incorrect information. 2144 for _, p := range allp { 2145 c := p.mcache 2146 if c == nil { 2147 continue 2148 } 2149 memstats.heap_scan += uint64(c.scanAlloc) 2150 c.scanAlloc = 0 2151 } 2152 2153 // Update other GC heap size stats. This must happen after 2154 // cachestats (which flushes local statistics to these) and 2155 // flushallmcaches (which modifies heap_live). 2156 memstats.heap_live = work.bytesMarked 2157 memstats.heap_scan = uint64(gcController.scanWork) 2158 2159 if trace.enabled { 2160 traceHeapAlloc() 2161 } 2162 } 2163 2164 // gcSweep must be called on the system stack because it acquires the heap 2165 // lock. See mheap for details. 2166 // 2167 // The world must be stopped. 2168 // 2169 //go:systemstack 2170 func gcSweep(mode gcMode) { 2171 assertWorldStopped() 2172 2173 if gcphase != _GCoff { 2174 throw("gcSweep being done but phase is not GCoff") 2175 } 2176 2177 lock(&mheap_.lock) 2178 mheap_.sweepgen += 2 2179 mheap_.sweepdone = 0 2180 mheap_.pagesSwept = 0 2181 mheap_.sweepArenas = mheap_.allArenas 2182 mheap_.reclaimIndex = 0 2183 mheap_.reclaimCredit = 0 2184 unlock(&mheap_.lock) 2185 2186 sweep.centralIndex.clear() 2187 2188 if !_ConcurrentSweep || mode == gcForceBlockMode { 2189 // Special case synchronous sweep. 2190 // Record that no proportional sweeping has to happen. 2191 lock(&mheap_.lock) 2192 mheap_.sweepPagesPerByte = 0 2193 unlock(&mheap_.lock) 2194 // Sweep all spans eagerly. 2195 for sweepone() != ^uintptr(0) { 2196 sweep.npausesweep++ 2197 } 2198 // Free workbufs eagerly. 2199 prepareFreeWorkbufs() 2200 for freeSomeWbufs(false) { 2201 } 2202 // All "free" events for this mark/sweep cycle have 2203 // now happened, so we can make this profile cycle 2204 // available immediately. 2205 mProf_NextCycle() 2206 mProf_Flush() 2207 return 2208 } 2209 2210 // Background sweep. 2211 lock(&sweep.lock) 2212 if sweep.parked { 2213 sweep.parked = false 2214 ready(sweep.g, 0, true) 2215 } 2216 unlock(&sweep.lock) 2217 } 2218 2219 // gcResetMarkState resets global state prior to marking (concurrent 2220 // or STW) and resets the stack scan state of all Gs. 2221 // 2222 // This is safe to do without the world stopped because any Gs created 2223 // during or after this will start out in the reset state. 2224 // 2225 // gcResetMarkState must be called on the system stack because it acquires 2226 // the heap lock. See mheap for details. 2227 // 2228 //go:systemstack 2229 func gcResetMarkState() { 2230 // This may be called during a concurrent phase, so make sure 2231 // allgs doesn't change. 2232 lock(&allglock) 2233 for _, gp := range allgs { 2234 gp.gcscandone = false // set to true in gcphasework 2235 gp.gcAssistBytes = 0 2236 } 2237 unlock(&allglock) 2238 2239 // Clear page marks. This is just 1MB per 64GB of heap, so the 2240 // time here is pretty trivial. 2241 lock(&mheap_.lock) 2242 arenas := mheap_.allArenas 2243 unlock(&mheap_.lock) 2244 for _, ai := range arenas { 2245 ha := mheap_.arenas[ai.l1()][ai.l2()] 2246 for i := range ha.pageMarks { 2247 ha.pageMarks[i] = 0 2248 } 2249 } 2250 2251 work.bytesMarked = 0 2252 work.initialHeapLive = atomic.Load64(&memstats.heap_live) 2253 } 2254 2255 // Hooks for other packages 2256 2257 var poolcleanup func() 2258 2259 //go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup 2260 func sync_runtime_registerPoolCleanup(f func()) { 2261 poolcleanup = f 2262 } 2263 2264 func clearpools() { 2265 // clear sync.Pools 2266 if poolcleanup != nil { 2267 poolcleanup() 2268 } 2269 2270 // Clear central sudog cache. 2271 // Leave per-P caches alone, they have strictly bounded size. 2272 // Disconnect cached list before dropping it on the floor, 2273 // so that a dangling ref to one entry does not pin all of them. 2274 lock(&sched.sudoglock) 2275 var sg, sgnext *sudog 2276 for sg = sched.sudogcache; sg != nil; sg = sgnext { 2277 sgnext = sg.next 2278 sg.next = nil 2279 } 2280 sched.sudogcache = nil 2281 unlock(&sched.sudoglock) 2282 2283 // Clear central defer pools. 2284 // Leave per-P pools alone, they have strictly bounded size. 2285 lock(&sched.deferlock) 2286 for i := range sched.deferpool { 2287 // disconnect cached list before dropping it on the floor, 2288 // so that a dangling ref to one entry does not pin all of them. 2289 var d, dlink *_defer 2290 for d = sched.deferpool[i]; d != nil; d = dlink { 2291 dlink = d.link 2292 d.link = nil 2293 } 2294 sched.deferpool[i] = nil 2295 } 2296 unlock(&sched.deferlock) 2297 } 2298 2299 // Timing 2300 2301 // itoaDiv formats val/(10**dec) into buf. 2302 func itoaDiv(buf []byte, val uint64, dec int) []byte { 2303 i := len(buf) - 1 2304 idec := i - dec 2305 for val >= 10 || i >= idec { 2306 buf[i] = byte(val%10 + '0') 2307 i-- 2308 if i == idec { 2309 buf[i] = '.' 2310 i-- 2311 } 2312 val /= 10 2313 } 2314 buf[i] = byte(val + '0') 2315 return buf[i:] 2316 } 2317 2318 // fmtNSAsMS nicely formats ns nanoseconds as milliseconds. 2319 func fmtNSAsMS(buf []byte, ns uint64) []byte { 2320 if ns >= 10e6 { 2321 // Format as whole milliseconds. 2322 return itoaDiv(buf, ns/1e6, 0) 2323 } 2324 // Format two digits of precision, with at most three decimal places. 2325 x := ns / 1e3 2326 if x == 0 { 2327 buf[0] = '0' 2328 return buf[:1] 2329 } 2330 dec := 3 2331 for x >= 100 { 2332 x /= 10 2333 dec-- 2334 } 2335 return itoaDiv(buf, x, dec) 2336 }