github.com/mdempsky/go@v0.0.0-20151201204031-5dd372bd1e70/src/runtime/mgc.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // TODO(rsc): The code having to do with the heap bitmap needs very serious cleanup. 6 // It has gotten completely out of control. 7 8 // Garbage collector (GC). 9 // 10 // The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple 11 // GC thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is 12 // non-generational and non-compacting. Allocation is done using size segregated per P allocation 13 // areas to minimize fragmentation while eliminating locks in the common case. 14 // 15 // The algorithm decomposes into several steps. 16 // This is a high level description of the algorithm being used. For an overview of GC a good 17 // place to start is Richard Jones' gchandbook.org. 18 // 19 // The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see 20 // Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. 21 // On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 22 // 966-975. 23 // For journal quality proofs that these steps are complete, correct, and terminate see 24 // Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. 25 // Concurrency and Computation: Practice and Experience 15(3-5), 2003. 26 // 27 // 0. Set phase = GCscan from GCoff. 28 // 1. Wait for all P's to acknowledge phase change. 29 // At this point all goroutines have passed through a GC safepoint and 30 // know we are in the GCscan phase. 31 // 2. GC scans all goroutine stacks, mark and enqueues all encountered pointers 32 // (marking avoids most duplicate enqueuing but races may produce benign duplication). 33 // Preempted goroutines are scanned before P schedules next goroutine. 34 // 3. Set phase = GCmark. 35 // 4. Wait for all P's to acknowledge phase change. 36 // 5. Now write barrier marks and enqueues black, grey, or white to white pointers. 37 // Malloc still allocates white (non-marked) objects. 38 // 6. Meanwhile GC transitively walks the heap marking reachable objects. 39 // 7. When GC finishes marking heap, it preempts P's one-by-one and 40 // retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine 41 // currently scheduled on the P). 42 // 8. Once the GC has exhausted all available marking work it sets phase = marktermination. 43 // 9. Wait for all P's to acknowledge phase change. 44 // 10. Malloc now allocates black objects, so number of unmarked reachable objects 45 // monotonically decreases. 46 // 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet 47 // reachable objects. 48 // 12. When GC completes a full cycle over P's and discovers no new grey 49 // objects, (which means all reachable objects are marked) set phase = GCoff. 50 // 13. Wait for all P's to acknowledge phase change. 51 // 14. Now malloc allocates white (but sweeps spans before use). 52 // Write barrier becomes nop. 53 // 15. GC does background sweeping, see description below. 54 // 16. When sufficient allocation has taken place replay the sequence starting at 0 above, 55 // see discussion of GC rate below. 56 57 // Changing phases. 58 // Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase. 59 // All phase action must be benign in the presence of a change. 60 // Starting with GCoff 61 // GCoff to GCscan 62 // GSscan scans stacks and globals greying them and never marks an object black. 63 // Once all the P's are aware of the new phase they will scan gs on preemption. 64 // This means that the scanning of preempted gs can't start until all the Ps 65 // have acknowledged. 66 // When a stack is scanned, this phase also installs stack barriers to 67 // track how much of the stack has been active. 68 // This transition enables write barriers because stack barriers 69 // assume that writes to higher frames will be tracked by write 70 // barriers. Technically this only needs write barriers for writes 71 // to stack slots, but we enable write barriers in general. 72 // GCscan to GCmark 73 // In GCmark, work buffers are drained until there are no more 74 // pointers to scan. 75 // No scanning of objects (making them black) can happen until all 76 // Ps have enabled the write barrier, but that already happened in 77 // the transition to GCscan. 78 // GCmark to GCmarktermination 79 // The only change here is that we start allocating black so the Ps must acknowledge 80 // the change before we begin the termination algorithm 81 // GCmarktermination to GSsweep 82 // Object currently on the freelist must be marked black for this to work. 83 // Are things on the free lists black or white? How does the sweep phase work? 84 85 // Concurrent sweep. 86 // 87 // The sweep phase proceeds concurrently with normal program execution. 88 // The heap is swept span-by-span both lazily (when a goroutine needs another span) 89 // and concurrently in a background goroutine (this helps programs that are not CPU bound). 90 // At the end of STW mark termination all spans are marked as "needs sweeping". 91 // 92 // The background sweeper goroutine simply sweeps spans one-by-one. 93 // 94 // To avoid requesting more OS memory while there are unswept spans, when a 95 // goroutine needs another span, it first attempts to reclaim that much memory 96 // by sweeping. When a goroutine needs to allocate a new small-object span, it 97 // sweeps small-object spans for the same object size until it frees at least 98 // one object. When a goroutine needs to allocate large-object span from heap, 99 // it sweeps spans until it frees at least that many pages into heap. There is 100 // one case where this may not suffice: if a goroutine sweeps and frees two 101 // nonadjacent one-page spans to the heap, it will allocate a new two-page 102 // span, but there can still be other one-page unswept spans which could be 103 // combined into a two-page span. 104 // 105 // It's critical to ensure that no operations proceed on unswept spans (that would corrupt 106 // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, 107 // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. 108 // When a goroutine explicitly frees an object or sets a finalizer, it ensures that 109 // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish). 110 // The finalizer goroutine is kicked off only when all spans are swept. 111 // When the next GC starts, it sweeps all not-yet-swept spans (if any). 112 113 // GC rate. 114 // Next GC is after we've allocated an extra amount of memory proportional to 115 // the amount already in use. The proportion is controlled by GOGC environment variable 116 // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M 117 // (this mark is tracked in next_gc variable). This keeps the GC cost in linear 118 // proportion to the allocation cost. Adjusting GOGC just changes the linear constant 119 // (and also the amount of extra memory used). 120 121 package runtime 122 123 import ( 124 "runtime/internal/atomic" 125 "runtime/internal/sys" 126 "unsafe" 127 ) 128 129 const ( 130 _DebugGC = 0 131 _ConcurrentSweep = true 132 _FinBlockSize = 4 * 1024 133 134 // sweepMinHeapDistance is a lower bound on the heap distance 135 // (in bytes) reserved for concurrent sweeping between GC 136 // cycles. This will be scaled by gcpercent/100. 137 sweepMinHeapDistance = 1024 * 1024 138 ) 139 140 // heapminimum is the minimum heap size at which to trigger GC. 141 // For small heaps, this overrides the usual GOGC*live set rule. 142 // 143 // When there is a very small live set but a lot of allocation, simply 144 // collecting when the heap reaches GOGC*live results in many GC 145 // cycles and high total per-GC overhead. This minimum amortizes this 146 // per-GC overhead while keeping the heap reasonably small. 147 // 148 // During initialization this is set to 4MB*GOGC/100. In the case of 149 // GOGC==0, this will set heapminimum to 0, resulting in constant 150 // collection even when the heap size is small, which is useful for 151 // debugging. 152 var heapminimum uint64 = defaultHeapMinimum 153 154 // defaultHeapMinimum is the value of heapminimum for GOGC==100. 155 const defaultHeapMinimum = 4 << 20 156 157 // Initialized from $GOGC. GOGC=off means no GC. 158 var gcpercent int32 159 160 func gcinit() { 161 if unsafe.Sizeof(workbuf{}) != _WorkbufSize { 162 throw("size of Workbuf is suboptimal") 163 } 164 165 _ = setGCPercent(readgogc()) 166 for datap := &firstmoduledata; datap != nil; datap = datap.next { 167 datap.gcdatamask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data) 168 datap.gcbssmask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss) 169 } 170 memstats.next_gc = heapminimum 171 work.startSema = 1 172 work.markDoneSema = 1 173 } 174 175 func readgogc() int32 { 176 p := gogetenv("GOGC") 177 if p == "" { 178 return 100 179 } 180 if p == "off" { 181 return -1 182 } 183 return int32(atoi(p)) 184 } 185 186 // gcenable is called after the bulk of the runtime initialization, 187 // just before we're about to start letting user code run. 188 // It kicks off the background sweeper goroutine and enables GC. 189 func gcenable() { 190 c := make(chan int, 1) 191 go bgsweep(c) 192 <-c 193 memstats.enablegc = true // now that runtime is initialized, GC is okay 194 } 195 196 //go:linkname setGCPercent runtime/debug.setGCPercent 197 func setGCPercent(in int32) (out int32) { 198 lock(&mheap_.lock) 199 out = gcpercent 200 if in < 0 { 201 in = -1 202 } 203 gcpercent = in 204 heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 205 unlock(&mheap_.lock) 206 return out 207 } 208 209 // Garbage collector phase. 210 // Indicates to write barrier and sychronization task to preform. 211 var gcphase uint32 212 213 // The compiler knows about this variable. 214 // If you change it, you must change the compiler too. 215 var writeBarrier struct { 216 enabled bool // compiler emits a check of this before calling write barrier 217 needed bool // whether we need a write barrier for current GC phase 218 cgo bool // whether we need a write barrier for a cgo check 219 } 220 221 // gcBlackenEnabled is 1 if mutator assists and background mark 222 // workers are allowed to blacken objects. This must only be set when 223 // gcphase == _GCmark. 224 var gcBlackenEnabled uint32 225 226 // gcBlackenPromptly indicates that optimizations that may 227 // hide work from the global work queue should be disabled. 228 // 229 // If gcBlackenPromptly is true, per-P gcWork caches should 230 // be flushed immediately and new objects should be allocated black. 231 // 232 // There is a tension between allocating objects white and 233 // allocating them black. If white and the objects die before being 234 // marked they can be collected during this GC cycle. On the other 235 // hand allocating them black will reduce _GCmarktermination latency 236 // since more work is done in the mark phase. This tension is resolved 237 // by allocating white until the mark phase is approaching its end and 238 // then allocating black for the remainder of the mark phase. 239 var gcBlackenPromptly bool 240 241 const ( 242 _GCoff = iota // GC not running; sweeping in background, write barrier disabled 243 _GCmark // GC marking roots and workbufs, write barrier ENABLED 244 _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED 245 ) 246 247 //go:nosplit 248 func setGCPhase(x uint32) { 249 atomic.Store(&gcphase, x) 250 writeBarrier.needed = gcphase == _GCmark || gcphase == _GCmarktermination 251 writeBarrier.enabled = writeBarrier.needed || writeBarrier.cgo 252 } 253 254 // gcMarkWorkerMode represents the mode that a concurrent mark worker 255 // should operate in. 256 // 257 // Concurrent marking happens through four different mechanisms. One 258 // is mutator assists, which happen in response to allocations and are 259 // not scheduled. The other three are variations in the per-P mark 260 // workers and are distinguished by gcMarkWorkerMode. 261 type gcMarkWorkerMode int 262 263 const ( 264 // gcMarkWorkerDedicatedMode indicates that the P of a mark 265 // worker is dedicated to running that mark worker. The mark 266 // worker should run without preemption. 267 gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota 268 269 // gcMarkWorkerFractionalMode indicates that a P is currently 270 // running the "fractional" mark worker. The fractional worker 271 // is necessary when GOMAXPROCS*gcGoalUtilization is not an 272 // integer. The fractional worker should run until it is 273 // preempted and will be scheduled to pick up the fractional 274 // part of GOMAXPROCS*gcGoalUtilization. 275 gcMarkWorkerFractionalMode 276 277 // gcMarkWorkerIdleMode indicates that a P is running the mark 278 // worker because it has nothing else to do. The idle worker 279 // should run until it is preempted and account its time 280 // against gcController.idleMarkTime. 281 gcMarkWorkerIdleMode 282 ) 283 284 // gcController implements the GC pacing controller that determines 285 // when to trigger concurrent garbage collection and how much marking 286 // work to do in mutator assists and background marking. 287 // 288 // It uses a feedback control algorithm to adjust the memstats.next_gc 289 // trigger based on the heap growth and GC CPU utilization each cycle. 290 // This algorithm optimizes for heap growth to match GOGC and for CPU 291 // utilization between assist and background marking to be 25% of 292 // GOMAXPROCS. The high-level design of this algorithm is documented 293 // at https://golang.org/s/go15gcpacing. 294 var gcController = gcControllerState{ 295 // Initial trigger ratio guess. 296 triggerRatio: 7 / 8.0, 297 } 298 299 type gcControllerState struct { 300 // scanWork is the total scan work performed this cycle. This 301 // is updated atomically during the cycle. Updates occur in 302 // bounded batches, since it is both written and read 303 // throughout the cycle. 304 // 305 // Currently this is the bytes of heap scanned. For most uses, 306 // this is an opaque unit of work, but for estimation the 307 // definition is important. 308 scanWork int64 309 310 // bgScanCredit is the scan work credit accumulated by the 311 // concurrent background scan. This credit is accumulated by 312 // the background scan and stolen by mutator assists. This is 313 // updated atomically. Updates occur in bounded batches, since 314 // it is both written and read throughout the cycle. 315 bgScanCredit int64 316 317 // assistTime is the nanoseconds spent in mutator assists 318 // during this cycle. This is updated atomically. Updates 319 // occur in bounded batches, since it is both written and read 320 // throughout the cycle. 321 assistTime int64 322 323 // dedicatedMarkTime is the nanoseconds spent in dedicated 324 // mark workers during this cycle. This is updated atomically 325 // at the end of the concurrent mark phase. 326 dedicatedMarkTime int64 327 328 // fractionalMarkTime is the nanoseconds spent in the 329 // fractional mark worker during this cycle. This is updated 330 // atomically throughout the cycle and will be up-to-date if 331 // the fractional mark worker is not currently running. 332 fractionalMarkTime int64 333 334 // idleMarkTime is the nanoseconds spent in idle marking 335 // during this cycle. This is updated atomically throughout 336 // the cycle. 337 idleMarkTime int64 338 339 // bgMarkStartTime is the absolute start time in nanoseconds 340 // that the background mark phase started. 341 bgMarkStartTime int64 342 343 // assistTime is the absolute start time in nanoseconds that 344 // mutator assists were enabled. 345 assistStartTime int64 346 347 // heapGoal is the goal memstats.heap_live for when this cycle 348 // ends. This is computed at the beginning of each cycle. 349 heapGoal uint64 350 351 // dedicatedMarkWorkersNeeded is the number of dedicated mark 352 // workers that need to be started. This is computed at the 353 // beginning of each cycle and decremented atomically as 354 // dedicated mark workers get started. 355 dedicatedMarkWorkersNeeded int64 356 357 // assistWorkPerByte is the ratio of scan work to allocated 358 // bytes that should be performed by mutator assists. This is 359 // computed at the beginning of each cycle and updated every 360 // time heap_scan is updated. 361 assistWorkPerByte float64 362 363 // assistBytesPerWork is 1/assistWorkPerByte. 364 assistBytesPerWork float64 365 366 // fractionalUtilizationGoal is the fraction of wall clock 367 // time that should be spent in the fractional mark worker. 368 // For example, if the overall mark utilization goal is 25% 369 // and GOMAXPROCS is 6, one P will be a dedicated mark worker 370 // and this will be set to 0.5 so that 50% of the time some P 371 // is in a fractional mark worker. This is computed at the 372 // beginning of each cycle. 373 fractionalUtilizationGoal float64 374 375 // triggerRatio is the heap growth ratio at which the garbage 376 // collection cycle should start. E.g., if this is 0.6, then 377 // GC should start when the live heap has reached 1.6 times 378 // the heap size marked by the previous cycle. This is updated 379 // at the end of of each cycle. 380 triggerRatio float64 381 382 _ [sys.CacheLineSize]byte 383 384 // fractionalMarkWorkersNeeded is the number of fractional 385 // mark workers that need to be started. This is either 0 or 386 // 1. This is potentially updated atomically at every 387 // scheduling point (hence it gets its own cache line). 388 fractionalMarkWorkersNeeded int64 389 390 _ [sys.CacheLineSize]byte 391 } 392 393 // startCycle resets the GC controller's state and computes estimates 394 // for a new GC cycle. The caller must hold worldsema. 395 func (c *gcControllerState) startCycle() { 396 c.scanWork = 0 397 c.bgScanCredit = 0 398 c.assistTime = 0 399 c.dedicatedMarkTime = 0 400 c.fractionalMarkTime = 0 401 c.idleMarkTime = 0 402 403 // If this is the first GC cycle or we're operating on a very 404 // small heap, fake heap_marked so it looks like next_gc is 405 // the appropriate growth from heap_marked, even though the 406 // real heap_marked may not have a meaningful value (on the 407 // first cycle) or may be much smaller (resulting in a large 408 // error response). 409 if memstats.next_gc <= heapminimum { 410 memstats.heap_marked = uint64(float64(memstats.next_gc) / (1 + c.triggerRatio)) 411 memstats.heap_reachable = memstats.heap_marked 412 } 413 414 // Compute the heap goal for this cycle 415 c.heapGoal = memstats.heap_reachable + memstats.heap_reachable*uint64(gcpercent)/100 416 417 // Ensure that the heap goal is at least a little larger than 418 // the current live heap size. This may not be the case if GC 419 // start is delayed or if the allocation that pushed heap_live 420 // over next_gc is large or if the trigger is really close to 421 // GOGC. Assist is proportional to this distance, so enforce a 422 // minimum distance, even if it means going over the GOGC goal 423 // by a tiny bit. 424 if c.heapGoal < memstats.heap_live+1024*1024 { 425 c.heapGoal = memstats.heap_live + 1024*1024 426 } 427 428 // Compute the total mark utilization goal and divide it among 429 // dedicated and fractional workers. 430 totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization 431 c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal) 432 c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded) 433 if c.fractionalUtilizationGoal > 0 { 434 c.fractionalMarkWorkersNeeded = 1 435 } else { 436 c.fractionalMarkWorkersNeeded = 0 437 } 438 439 // Clear per-P state 440 for _, p := range &allp { 441 if p == nil { 442 break 443 } 444 p.gcAssistTime = 0 445 } 446 447 // Compute initial values for controls that are updated 448 // throughout the cycle. 449 c.revise() 450 451 if debug.gcpacertrace > 0 { 452 print("pacer: assist ratio=", c.assistWorkPerByte, 453 " (scan ", memstats.heap_scan>>20, " MB in ", 454 work.initialHeapLive>>20, "->", 455 c.heapGoal>>20, " MB)", 456 " workers=", c.dedicatedMarkWorkersNeeded, 457 "+", c.fractionalMarkWorkersNeeded, "\n") 458 } 459 } 460 461 // revise updates the assist ratio during the GC cycle to account for 462 // improved estimates. This should be called either under STW or 463 // whenever memstats.heap_scan or memstats.heap_live is updated (with 464 // mheap_.lock held). 465 // 466 // It should only be called when gcBlackenEnabled != 0 (because this 467 // is when assists are enabled and the necessary statistics are 468 // available). 469 func (c *gcControllerState) revise() { 470 // Compute the expected scan work remaining. 471 // 472 // Note that the scannable heap size is likely to increase 473 // during the GC cycle. This is why it's important to revise 474 // the assist ratio throughout the cycle: if the scannable 475 // heap size increases, the assist ratio based on the initial 476 // scannable heap size may target too little scan work. 477 // 478 // This particular estimate is a strict upper bound on the 479 // possible remaining scan work for the current heap. 480 // You might consider dividing this by 2 (or by 481 // (100+GOGC)/100) to counter this over-estimation, but 482 // benchmarks show that this has almost no effect on mean 483 // mutator utilization, heap size, or assist time and it 484 // introduces the danger of under-estimating and letting the 485 // mutator outpace the garbage collector. 486 scanWorkExpected := int64(memstats.heap_scan) - c.scanWork 487 if scanWorkExpected < 1000 { 488 // We set a somewhat arbitrary lower bound on 489 // remaining scan work since if we aim a little high, 490 // we can miss by a little. 491 // 492 // We *do* need to enforce that this is at least 1, 493 // since marking is racy and double-scanning objects 494 // may legitimately make the expected scan work 495 // negative. 496 scanWorkExpected = 1000 497 } 498 499 // Compute the heap distance remaining. 500 heapDistance := int64(c.heapGoal) - int64(memstats.heap_live) 501 if heapDistance <= 0 { 502 // This shouldn't happen, but if it does, avoid 503 // dividing by zero or setting the assist negative. 504 heapDistance = 1 505 } 506 507 // Compute the mutator assist ratio so by the time the mutator 508 // allocates the remaining heap bytes up to next_gc, it will 509 // have done (or stolen) the remaining amount of scan work. 510 c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance) 511 c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected) 512 } 513 514 // endCycle updates the GC controller state at the end of the 515 // concurrent part of the GC cycle. 516 func (c *gcControllerState) endCycle() { 517 h_t := c.triggerRatio // For debugging 518 519 // Proportional response gain for the trigger controller. Must 520 // be in [0, 1]. Lower values smooth out transient effects but 521 // take longer to respond to phase changes. Higher values 522 // react to phase changes quickly, but are more affected by 523 // transient changes. Values near 1 may be unstable. 524 const triggerGain = 0.5 525 526 // Compute next cycle trigger ratio. First, this computes the 527 // "error" for this cycle; that is, how far off the trigger 528 // was from what it should have been, accounting for both heap 529 // growth and GC CPU utilization. We compute the actual heap 530 // growth during this cycle and scale that by how far off from 531 // the goal CPU utilization we were (to estimate the heap 532 // growth if we had the desired CPU utilization). The 533 // difference between this estimate and the GOGC-based goal 534 // heap growth is the error. 535 // 536 // TODO(austin): next_gc is based on heap_reachable, not 537 // heap_marked, which means the actual growth ratio 538 // technically isn't comparable to the trigger ratio. 539 goalGrowthRatio := float64(gcpercent) / 100 540 actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1 541 assistDuration := nanotime() - c.assistStartTime 542 543 // Assume background mark hit its utilization goal. 544 utilization := gcGoalUtilization 545 // Add assist utilization; avoid divide by zero. 546 if assistDuration > 0 { 547 utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs)) 548 } 549 550 triggerError := goalGrowthRatio - c.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-c.triggerRatio) 551 552 // Finally, we adjust the trigger for next time by this error, 553 // damped by the proportional gain. 554 c.triggerRatio += triggerGain * triggerError 555 if c.triggerRatio < 0 { 556 // This can happen if the mutator is allocating very 557 // quickly or the GC is scanning very slowly. 558 c.triggerRatio = 0 559 } else if c.triggerRatio > goalGrowthRatio*0.95 { 560 // Ensure there's always a little margin so that the 561 // mutator assist ratio isn't infinity. 562 c.triggerRatio = goalGrowthRatio * 0.95 563 } 564 565 if debug.gcpacertrace > 0 { 566 // Print controller state in terms of the design 567 // document. 568 H_m_prev := memstats.heap_marked 569 H_T := memstats.next_gc 570 h_a := actualGrowthRatio 571 H_a := memstats.heap_live 572 h_g := goalGrowthRatio 573 H_g := int64(float64(H_m_prev) * (1 + h_g)) 574 u_a := utilization 575 u_g := gcGoalUtilization 576 W_a := c.scanWork 577 print("pacer: H_m_prev=", H_m_prev, 578 " h_t=", h_t, " H_T=", H_T, 579 " h_a=", h_a, " H_a=", H_a, 580 " h_g=", h_g, " H_g=", H_g, 581 " u_a=", u_a, " u_g=", u_g, 582 " W_a=", W_a, 583 " goalΔ=", goalGrowthRatio-h_t, 584 " actualΔ=", h_a-h_t, 585 " u_a/u_g=", u_a/u_g, 586 "\n") 587 } 588 } 589 590 // enlistWorker encourages another dedicated mark worker to start on 591 // another P if there are spare worker slots. It is used by putfull 592 // when more work is made available. 593 // 594 //go:nowritebarrier 595 func (c *gcControllerState) enlistWorker() { 596 if c.dedicatedMarkWorkersNeeded <= 0 { 597 return 598 } 599 // Pick a random other P to preempt. 600 if gomaxprocs <= 1 { 601 return 602 } 603 gp := getg() 604 if gp == nil || gp.m == nil || gp.m.p == 0 { 605 return 606 } 607 myID := gp.m.p.ptr().id 608 for tries := 0; tries < 5; tries++ { 609 id := int32(fastrand1() % uint32(gomaxprocs-1)) 610 if id >= myID { 611 id++ 612 } 613 p := allp[id] 614 if p.status != _Prunning { 615 continue 616 } 617 if preemptone(p) { 618 return 619 } 620 } 621 } 622 623 // findRunnableGCWorker returns the background mark worker for _p_ if it 624 // should be run. This must only be called when gcBlackenEnabled != 0. 625 func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { 626 if gcBlackenEnabled == 0 { 627 throw("gcControllerState.findRunnable: blackening not enabled") 628 } 629 if _p_.gcBgMarkWorker == nil { 630 // The mark worker associated with this P is blocked 631 // performing a mark transition. We can't run it 632 // because it may be on some other run or wait queue. 633 return nil 634 } 635 636 if !gcMarkWorkAvailable(_p_) { 637 // No work to be done right now. This can happen at 638 // the end of the mark phase when there are still 639 // assists tapering off. Don't bother running a worker 640 // now because it'll just return immediately. 641 return nil 642 } 643 644 decIfPositive := func(ptr *int64) bool { 645 if *ptr > 0 { 646 if atomic.Xaddint64(ptr, -1) >= 0 { 647 return true 648 } 649 // We lost a race 650 atomic.Xaddint64(ptr, +1) 651 } 652 return false 653 } 654 655 if decIfPositive(&c.dedicatedMarkWorkersNeeded) { 656 // This P is now dedicated to marking until the end of 657 // the concurrent mark phase. 658 _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode 659 // TODO(austin): This P isn't going to run anything 660 // else for a while, so kick everything out of its run 661 // queue. 662 } else { 663 if !decIfPositive(&c.fractionalMarkWorkersNeeded) { 664 // No more workers are need right now. 665 return nil 666 } 667 668 // This P has picked the token for the fractional worker. 669 // Is the GC currently under or at the utilization goal? 670 // If so, do more work. 671 // 672 // We used to check whether doing one time slice of work 673 // would remain under the utilization goal, but that has the 674 // effect of delaying work until the mutator has run for 675 // enough time slices to pay for the work. During those time 676 // slices, write barriers are enabled, so the mutator is running slower. 677 // Now instead we do the work whenever we're under or at the 678 // utilization work and pay for it by letting the mutator run later. 679 // This doesn't change the overall utilization averages, but it 680 // front loads the GC work so that the GC finishes earlier and 681 // write barriers can be turned off sooner, effectively giving 682 // the mutator a faster machine. 683 // 684 // The old, slower behavior can be restored by setting 685 // gcForcePreemptNS = forcePreemptNS. 686 const gcForcePreemptNS = 0 687 688 // TODO(austin): We could fast path this and basically 689 // eliminate contention on c.fractionalMarkWorkersNeeded by 690 // precomputing the minimum time at which it's worth 691 // next scheduling the fractional worker. Then Ps 692 // don't have to fight in the window where we've 693 // passed that deadline and no one has started the 694 // worker yet. 695 // 696 // TODO(austin): Shorter preemption interval for mark 697 // worker to improve fairness and give this 698 // finer-grained control over schedule? 699 now := nanotime() - gcController.bgMarkStartTime 700 then := now + gcForcePreemptNS 701 timeUsed := c.fractionalMarkTime + gcForcePreemptNS 702 if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal { 703 // Nope, we'd overshoot the utilization goal 704 atomic.Xaddint64(&c.fractionalMarkWorkersNeeded, +1) 705 return nil 706 } 707 _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode 708 } 709 710 // Run the background mark worker 711 gp := _p_.gcBgMarkWorker 712 casgstatus(gp, _Gwaiting, _Grunnable) 713 if trace.enabled { 714 traceGoUnpark(gp, 0) 715 } 716 return gp 717 } 718 719 // gcGoalUtilization is the goal CPU utilization for background 720 // marking as a fraction of GOMAXPROCS. 721 const gcGoalUtilization = 0.25 722 723 // gcCreditSlack is the amount of scan work credit that can can 724 // accumulate locally before updating gcController.scanWork and, 725 // optionally, gcController.bgScanCredit. Lower values give a more 726 // accurate assist ratio and make it more likely that assists will 727 // successfully steal background credit. Higher values reduce memory 728 // contention. 729 const gcCreditSlack = 2000 730 731 // gcAssistTimeSlack is the nanoseconds of mutator assist time that 732 // can accumulate on a P before updating gcController.assistTime. 733 const gcAssistTimeSlack = 5000 734 735 // gcOverAssistBytes determines how many extra allocation bytes of 736 // assist credit a GC assist builds up when an assist happens. This 737 // amortizes the cost of an assist by pre-paying for this many bytes 738 // of future allocations. 739 const gcOverAssistBytes = 1 << 20 740 741 var work struct { 742 full uint64 // lock-free list of full blocks workbuf 743 empty uint64 // lock-free list of empty blocks workbuf 744 pad0 [sys.CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait 745 746 markrootNext uint32 // next markroot job 747 markrootJobs uint32 // number of markroot jobs 748 749 nproc uint32 750 tstart int64 751 nwait uint32 752 ndone uint32 753 alldone note 754 755 // Number of roots of various root types. Set by gcMarkRootPrepare. 756 nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int 757 758 // finalizersDone indicates that finalizers and objects with 759 // finalizers have been scanned by markroot. During concurrent 760 // GC, this happens during the concurrent scan phase. During 761 // STW GC, this happens during mark termination. 762 finalizersDone bool 763 764 // Each type of GC state transition is protected by a lock. 765 // Since multiple threads can simultaneously detect the state 766 // transition condition, any thread that detects a transition 767 // condition must acquire the appropriate transition lock, 768 // re-check the transition condition and return if it no 769 // longer holds or perform the transition if it does. 770 // Likewise, any transition must invalidate the transition 771 // condition before releasing the lock. This ensures that each 772 // transition is performed by exactly one thread and threads 773 // that need the transition to happen block until it has 774 // happened. 775 // 776 // startSema protects the transition from "off" to mark or 777 // mark termination. 778 startSema uint32 779 // markDoneSema protects transitions from mark 1 to mark 2 and 780 // from mark 2 to mark termination. 781 markDoneSema uint32 782 783 bgMarkReady note // signal background mark worker has started 784 bgMarkDone uint32 // cas to 1 when at a background mark completion point 785 // Background mark completion signaling 786 787 // mode is the concurrency mode of the current GC cycle. 788 mode gcMode 789 790 // Copy of mheap.allspans for marker or sweeper. 791 spans []*mspan 792 793 // totaltime is the CPU nanoseconds spent in GC since the 794 // program started if debug.gctrace > 0. 795 totaltime int64 796 797 // bytesMarked is the number of bytes marked this cycle. This 798 // includes bytes blackened in scanned objects, noscan objects 799 // that go straight to black, and permagrey objects scanned by 800 // markroot during the concurrent scan phase. This is updated 801 // atomically during the cycle. Updates may be batched 802 // arbitrarily, since the value is only read at the end of the 803 // cycle. 804 // 805 // Because of benign races during marking, this number may not 806 // be the exact number of marked bytes, but it should be very 807 // close. 808 bytesMarked uint64 809 810 // initialHeapLive is the value of memstats.heap_live at the 811 // beginning of this GC cycle. 812 initialHeapLive uint64 813 814 // assistQueue is a queue of assists that are blocked because 815 // there was neither enough credit to steal or enough work to 816 // do. 817 assistQueue struct { 818 lock mutex 819 head, tail guintptr 820 } 821 822 // Timing/utilization stats for this cycle. 823 stwprocs, maxprocs int32 824 tSweepTerm, tMark, tMarkTerm, tEnd int64 // nanotime() of phase start 825 826 pauseNS int64 // total STW time this cycle 827 pauseStart int64 // nanotime() of last STW 828 829 // debug.gctrace heap sizes for this cycle. 830 heap0, heap1, heap2, heapGoal uint64 831 } 832 833 // GC runs a garbage collection and blocks the caller until the 834 // garbage collection is complete. It may also block the entire 835 // program. 836 func GC() { 837 gcStart(gcForceBlockMode, false) 838 } 839 840 // gcMode indicates how concurrent a GC cycle should be. 841 type gcMode int 842 843 const ( 844 gcBackgroundMode gcMode = iota // concurrent GC and sweep 845 gcForceMode // stop-the-world GC now, concurrent sweep 846 gcForceBlockMode // stop-the-world GC now and STW sweep 847 ) 848 849 // gcShouldStart returns true if the exit condition for the _GCoff 850 // phase has been met. The exit condition should be tested when 851 // allocating. 852 // 853 // If forceTrigger is true, it ignores the current heap size, but 854 // checks all other conditions. In general this should be false. 855 func gcShouldStart(forceTrigger bool) bool { 856 return gcphase == _GCoff && (forceTrigger || memstats.heap_live >= memstats.next_gc) && memstats.enablegc && panicking == 0 && gcpercent >= 0 857 } 858 859 // gcStart transitions the GC from _GCoff to _GCmark (if mode == 860 // gcBackgroundMode) or _GCmarktermination (if mode != 861 // gcBackgroundMode) by performing sweep termination and GC 862 // initialization. 863 // 864 // This may return without performing this transition in some cases, 865 // such as when called on a system stack or with locks held. 866 func gcStart(mode gcMode, forceTrigger bool) { 867 // Since this is called from malloc and malloc is called in 868 // the guts of a number of libraries that might be holding 869 // locks, don't attempt to start GC in non-preemptible or 870 // potentially unstable situations. 871 mp := acquirem() 872 if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" { 873 releasem(mp) 874 return 875 } 876 releasem(mp) 877 mp = nil 878 879 // Pick up the remaining unswept/not being swept spans concurrently 880 // 881 // This shouldn't happen if we're being invoked in background 882 // mode since proportional sweep should have just finished 883 // sweeping everything, but rounding errors, etc, may leave a 884 // few spans unswept. In forced mode, this is necessary since 885 // GC can be forced at any point in the sweeping cycle. 886 // 887 // We check the transition condition continuously here in case 888 // this G gets delayed in to the next GC cycle. 889 for (mode != gcBackgroundMode || gcShouldStart(forceTrigger)) && gosweepone() != ^uintptr(0) { 890 sweep.nbgsweep++ 891 } 892 893 // Perform GC initialization and the sweep termination 894 // transition. 895 // 896 // If this is a forced GC, don't acquire the transition lock 897 // or re-check the transition condition because we 898 // specifically *don't* want to share the transition with 899 // another thread. 900 useStartSema := mode == gcBackgroundMode 901 if useStartSema { 902 semacquire(&work.startSema, false) 903 // Re-check transition condition under transition lock. 904 if !gcShouldStart(forceTrigger) { 905 semrelease(&work.startSema) 906 return 907 } 908 } 909 910 // In gcstoptheworld debug mode, upgrade the mode accordingly. 911 // We do this after re-checking the transition condition so 912 // that multiple goroutines that detect the heap trigger don't 913 // start multiple STW GCs. 914 if mode == gcBackgroundMode { 915 if debug.gcstoptheworld == 1 { 916 mode = gcForceMode 917 } else if debug.gcstoptheworld == 2 { 918 mode = gcForceBlockMode 919 } 920 } 921 922 // Ok, we're doing it! Stop everybody else 923 semacquire(&worldsema, false) 924 925 if trace.enabled { 926 traceGCStart() 927 } 928 929 if mode == gcBackgroundMode { 930 gcBgMarkStartWorkers() 931 } 932 now := nanotime() 933 work.stwprocs, work.maxprocs = gcprocs(), gomaxprocs 934 work.tSweepTerm = now 935 work.heap0 = memstats.heap_live 936 work.pauseNS = 0 937 work.mode = mode 938 939 work.pauseStart = now 940 systemstack(stopTheWorldWithSema) 941 // Finish sweep before we start concurrent scan. 942 systemstack(func() { 943 finishsweep_m(true) 944 }) 945 // clearpools before we start the GC. If we wait they memory will not be 946 // reclaimed until the next GC cycle. 947 clearpools() 948 949 gcResetMarkState() 950 951 work.finalizersDone = false 952 953 if mode == gcBackgroundMode { // Do as much work concurrently as possible 954 gcController.startCycle() 955 work.heapGoal = gcController.heapGoal 956 957 // Enter concurrent mark phase and enable 958 // write barriers. 959 // 960 // Because the world is stopped, all Ps will 961 // observe that write barriers are enabled by 962 // the time we start the world and begin 963 // scanning. 964 // 965 // It's necessary to enable write barriers 966 // during the scan phase for several reasons: 967 // 968 // They must be enabled for writes to higher 969 // stack frames before we scan stacks and 970 // install stack barriers because this is how 971 // we track writes to inactive stack frames. 972 // (Alternatively, we could not install stack 973 // barriers over frame boundaries with 974 // up-pointers). 975 // 976 // They must be enabled before assists are 977 // enabled because they must be enabled before 978 // any non-leaf heap objects are marked. Since 979 // allocations are blocked until assists can 980 // happen, we want enable assists as early as 981 // possible. 982 setGCPhase(_GCmark) 983 984 // markrootSpans uses work.spans, so make sure 985 // it is up to date. 986 gcCopySpans() 987 988 gcBgMarkPrepare() // Must happen before assist enable. 989 gcMarkRootPrepare() 990 991 // At this point all Ps have enabled the write 992 // barrier, thus maintaining the no white to 993 // black invariant. Enable mutator assists to 994 // put back-pressure on fast allocating 995 // mutators. 996 atomic.Store(&gcBlackenEnabled, 1) 997 998 // Assists and workers can start the moment we start 999 // the world. 1000 gcController.assistStartTime = now 1001 gcController.bgMarkStartTime = now 1002 1003 // Concurrent mark. 1004 systemstack(startTheWorldWithSema) 1005 now = nanotime() 1006 work.pauseNS += now - work.pauseStart 1007 work.tMark = now 1008 } else { 1009 t := nanotime() 1010 work.tMark, work.tMarkTerm = t, t 1011 work.heapGoal = work.heap0 1012 1013 // Perform mark termination. This will restart the world. 1014 gcMarkTermination() 1015 } 1016 1017 if useStartSema { 1018 semrelease(&work.startSema) 1019 } 1020 } 1021 1022 // gcMarkDone transitions the GC from mark 1 to mark 2 and from mark 2 1023 // to mark termination. 1024 // 1025 // This should be called when all mark work has been drained. In mark 1026 // 1, this includes all root marking jobs, global work buffers, and 1027 // active work buffers in assists and background workers; however, 1028 // work may still be cached in per-P work buffers. In mark 2, per-P 1029 // caches are disabled. 1030 // 1031 // The calling context must be preemptible. 1032 // 1033 // Note that it is explicitly okay to have write barriers in this 1034 // function because completion of concurrent mark is best-effort 1035 // anyway. Any work created by write barriers here will be cleaned up 1036 // by mark termination. 1037 func gcMarkDone() { 1038 top: 1039 semacquire(&work.markDoneSema, false) 1040 1041 // Re-check transition condition under transition lock. 1042 if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) { 1043 semrelease(&work.markDoneSema) 1044 return 1045 } 1046 1047 // Disallow starting new workers so that any remaining workers 1048 // in the current mark phase will drain out. 1049 // 1050 // TODO(austin): Should dedicated workers keep an eye on this 1051 // and exit gcDrain promptly? 1052 atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff) 1053 atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff) 1054 1055 if !gcBlackenPromptly { 1056 // Transition from mark 1 to mark 2. 1057 // 1058 // The global work list is empty, but there can still be work 1059 // sitting in the per-P work caches and there can be more 1060 // objects reachable from global roots since they don't have write 1061 // barriers. Rescan some roots and flush work caches. 1062 1063 gcMarkRootCheck() 1064 1065 // Disallow caching workbufs and indicate that we're in mark 2. 1066 gcBlackenPromptly = true 1067 1068 // Prevent completion of mark 2 until we've flushed 1069 // cached workbufs. 1070 atomic.Xadd(&work.nwait, -1) 1071 1072 // Rescan global data and BSS. There may still work 1073 // workers running at this point, so bump "jobs" down 1074 // before "next" so they won't try running root jobs 1075 // until we set next. 1076 atomic.Store(&work.markrootJobs, uint32(fixedRootCount+work.nDataRoots+work.nBSSRoots)) 1077 atomic.Store(&work.markrootNext, fixedRootCount) 1078 1079 // GC is set up for mark 2. Let Gs blocked on the 1080 // transition lock go while we flush caches. 1081 semrelease(&work.markDoneSema) 1082 1083 systemstack(func() { 1084 // Flush all currently cached workbufs and 1085 // ensure all Ps see gcBlackenPromptly. This 1086 // also blocks until any remaining mark 1 1087 // workers have exited their loop so we can 1088 // start new mark 2 workers that will observe 1089 // the new root marking jobs. 1090 forEachP(func(_p_ *p) { 1091 _p_.gcw.dispose() 1092 }) 1093 }) 1094 1095 // Now we can start up mark 2 workers. 1096 atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff) 1097 atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff) 1098 1099 incnwait := atomic.Xadd(&work.nwait, +1) 1100 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { 1101 // This loop will make progress because 1102 // gcBlackenPromptly is now true, so it won't 1103 // take this same "if" branch. 1104 goto top 1105 } 1106 } else { 1107 // Transition to mark termination. 1108 now := nanotime() 1109 work.tMarkTerm = now 1110 work.pauseStart = now 1111 getg().m.preemptoff = "gcing" 1112 systemstack(stopTheWorldWithSema) 1113 // The gcphase is _GCmark, it will transition to _GCmarktermination 1114 // below. The important thing is that the wb remains active until 1115 // all marking is complete. This includes writes made by the GC. 1116 1117 // markroot is done now, so record that objects with 1118 // finalizers have been scanned. 1119 work.finalizersDone = true 1120 1121 // Flush the gcWork caches. This must be done before 1122 // endCycle since endCycle depends on statistics kept 1123 // in these caches. 1124 gcFlushGCWork() 1125 1126 // Wake all blocked assists. These will run when we 1127 // start the world again. 1128 gcWakeAllAssists() 1129 1130 // Likewise, release the transition lock. Blocked 1131 // workers and assists will run when we start the 1132 // world again. 1133 semrelease(&work.markDoneSema) 1134 1135 gcController.endCycle() 1136 1137 // Perform mark termination. This will restart the world. 1138 gcMarkTermination() 1139 } 1140 } 1141 1142 func gcMarkTermination() { 1143 // World is stopped. 1144 // Start marktermination which includes enabling the write barrier. 1145 atomic.Store(&gcBlackenEnabled, 0) 1146 gcBlackenPromptly = false 1147 setGCPhase(_GCmarktermination) 1148 1149 work.heap1 = memstats.heap_live 1150 startTime := nanotime() 1151 1152 mp := acquirem() 1153 mp.preemptoff = "gcing" 1154 _g_ := getg() 1155 _g_.m.traceback = 2 1156 gp := _g_.m.curg 1157 casgstatus(gp, _Grunning, _Gwaiting) 1158 gp.waitreason = "garbage collection" 1159 1160 // Run gc on the g0 stack. We do this so that the g stack 1161 // we're currently running on will no longer change. Cuts 1162 // the root set down a bit (g0 stacks are not scanned, and 1163 // we don't need to scan gc's internal state). We also 1164 // need to switch to g0 so we can shrink the stack. 1165 systemstack(func() { 1166 gcMark(startTime) 1167 // Must return immediately. 1168 // The outer function's stack may have moved 1169 // during gcMark (it shrinks stacks, including the 1170 // outer function's stack), so we must not refer 1171 // to any of its variables. Return back to the 1172 // non-system stack to pick up the new addresses 1173 // before continuing. 1174 }) 1175 1176 systemstack(func() { 1177 work.heap2 = work.bytesMarked 1178 if debug.gccheckmark > 0 { 1179 // Run a full stop-the-world mark using checkmark bits, 1180 // to check that we didn't forget to mark anything during 1181 // the concurrent mark process. 1182 gcResetMarkState() 1183 initCheckmarks() 1184 gcMark(startTime) 1185 clearCheckmarks() 1186 } 1187 1188 // marking is complete so we can turn the write barrier off 1189 setGCPhase(_GCoff) 1190 gcSweep(work.mode) 1191 1192 if debug.gctrace > 1 { 1193 startTime = nanotime() 1194 // The g stacks have been scanned so 1195 // they have gcscanvalid==true and gcworkdone==true. 1196 // Reset these so that all stacks will be rescanned. 1197 gcResetMarkState() 1198 finishsweep_m(true) 1199 1200 // Still in STW but gcphase is _GCoff, reset to _GCmarktermination 1201 // At this point all objects will be found during the gcMark which 1202 // does a complete STW mark and object scan. 1203 setGCPhase(_GCmarktermination) 1204 gcMark(startTime) 1205 setGCPhase(_GCoff) // marking is done, turn off wb. 1206 gcSweep(work.mode) 1207 } 1208 }) 1209 1210 _g_.m.traceback = 0 1211 casgstatus(gp, _Gwaiting, _Grunning) 1212 1213 if trace.enabled { 1214 traceGCDone() 1215 } 1216 1217 // all done 1218 mp.preemptoff = "" 1219 1220 if gcphase != _GCoff { 1221 throw("gc done but gcphase != _GCoff") 1222 } 1223 1224 // Update timing memstats 1225 now, unixNow := nanotime(), unixnanotime() 1226 work.pauseNS += now - work.pauseStart 1227 work.tEnd = now 1228 atomic.Store64(&memstats.last_gc, uint64(unixNow)) // must be Unix time to make sense to user 1229 memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) 1230 memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow) 1231 memstats.pause_total_ns += uint64(work.pauseNS) 1232 1233 // Update work.totaltime. 1234 sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm) 1235 // We report idle marking time below, but omit it from the 1236 // overall utilization here since it's "free". 1237 markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime 1238 markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm) 1239 cycleCpu := sweepTermCpu + markCpu + markTermCpu 1240 work.totaltime += cycleCpu 1241 1242 // Compute overall GC CPU utilization. 1243 totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs) 1244 memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu) 1245 1246 memstats.numgc++ 1247 1248 systemstack(startTheWorldWithSema) 1249 1250 // Free stack spans. This must be done between GC cycles. 1251 systemstack(freeStackSpans) 1252 1253 semrelease(&worldsema) 1254 1255 releasem(mp) 1256 mp = nil 1257 1258 if debug.gctrace > 0 { 1259 util := int(memstats.gc_cpu_fraction * 100) 1260 1261 // Install WB phase is no longer used. 1262 tInstallWB := work.tMark 1263 installWBCpu := int64(0) 1264 1265 // Scan phase is no longer used. 1266 tScan := tInstallWB 1267 scanCpu := int64(0) 1268 1269 // TODO: Clean up the gctrace format. 1270 1271 var sbuf [24]byte 1272 printlock() 1273 print("gc ", memstats.numgc, 1274 " @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", 1275 util, "%: ") 1276 prev := work.tSweepTerm 1277 for i, ns := range []int64{tScan, tInstallWB, work.tMark, work.tMarkTerm, work.tEnd} { 1278 if i != 0 { 1279 print("+") 1280 } 1281 print(string(fmtNSAsMS(sbuf[:], uint64(ns-prev)))) 1282 prev = ns 1283 } 1284 print(" ms clock, ") 1285 for i, ns := range []int64{sweepTermCpu, scanCpu, installWBCpu, gcController.assistTime, gcController.dedicatedMarkTime + gcController.fractionalMarkTime, gcController.idleMarkTime, markTermCpu} { 1286 if i == 4 || i == 5 { 1287 // Separate mark time components with /. 1288 print("/") 1289 } else if i != 0 { 1290 print("+") 1291 } 1292 print(string(fmtNSAsMS(sbuf[:], uint64(ns)))) 1293 } 1294 print(" ms cpu, ", 1295 work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ", 1296 work.heapGoal>>20, " MB goal, ", 1297 work.maxprocs, " P") 1298 if work.mode != gcBackgroundMode { 1299 print(" (forced)") 1300 } 1301 print("\n") 1302 printunlock() 1303 } 1304 sweep.nbgsweep = 0 1305 sweep.npausesweep = 0 1306 1307 // now that gc is done, kick off finalizer thread if needed 1308 if !concurrentSweep { 1309 // give the queued finalizers, if any, a chance to run 1310 Gosched() 1311 } 1312 } 1313 1314 // gcBgMarkStartWorkers prepares background mark worker goroutines. 1315 // These goroutines will not run until the mark phase, but they must 1316 // be started while the work is not stopped and from a regular G 1317 // stack. The caller must hold worldsema. 1318 func gcBgMarkStartWorkers() { 1319 // Background marking is performed by per-P G's. Ensure that 1320 // each P has a background GC G. 1321 for _, p := range &allp { 1322 if p == nil || p.status == _Pdead { 1323 break 1324 } 1325 if p.gcBgMarkWorker == nil { 1326 go gcBgMarkWorker(p) 1327 notetsleepg(&work.bgMarkReady, -1) 1328 noteclear(&work.bgMarkReady) 1329 } 1330 } 1331 } 1332 1333 // gcBgMarkPrepare sets up state for background marking. 1334 // Mutator assists must not yet be enabled. 1335 func gcBgMarkPrepare() { 1336 // Background marking will stop when the work queues are empty 1337 // and there are no more workers (note that, since this is 1338 // concurrent, this may be a transient state, but mark 1339 // termination will clean it up). Between background workers 1340 // and assists, we don't really know how many workers there 1341 // will be, so we pretend to have an arbitrarily large number 1342 // of workers, almost all of which are "waiting". While a 1343 // worker is working it decrements nwait. If nproc == nwait, 1344 // there are no workers. 1345 work.nproc = ^uint32(0) 1346 work.nwait = ^uint32(0) 1347 } 1348 1349 func gcBgMarkWorker(p *p) { 1350 // Register this G as the background mark worker for p. 1351 casgp := func(gpp **g, old, new *g) bool { 1352 return casp((*unsafe.Pointer)(unsafe.Pointer(gpp)), unsafe.Pointer(old), unsafe.Pointer(new)) 1353 } 1354 1355 gp := getg() 1356 mp := acquirem() 1357 owned := casgp(&p.gcBgMarkWorker, nil, gp) 1358 // After this point, the background mark worker is scheduled 1359 // cooperatively by gcController.findRunnable. Hence, it must 1360 // never be preempted, as this would put it into _Grunnable 1361 // and put it on a run queue. Instead, when the preempt flag 1362 // is set, this puts itself into _Gwaiting to be woken up by 1363 // gcController.findRunnable at the appropriate time. 1364 notewakeup(&work.bgMarkReady) 1365 if !owned { 1366 // A sleeping worker came back and reassociated with 1367 // the P. That's fine. 1368 releasem(mp) 1369 return 1370 } 1371 1372 for { 1373 // Go to sleep until woken by gcContoller.findRunnable. 1374 // We can't releasem yet since even the call to gopark 1375 // may be preempted. 1376 gopark(func(g *g, mp unsafe.Pointer) bool { 1377 releasem((*m)(mp)) 1378 return true 1379 }, unsafe.Pointer(mp), "GC worker (idle)", traceEvGoBlock, 0) 1380 1381 // Loop until the P dies and disassociates this 1382 // worker. (The P may later be reused, in which case 1383 // it will get a new worker.) 1384 if p.gcBgMarkWorker != gp { 1385 break 1386 } 1387 1388 // Disable preemption so we can use the gcw. If the 1389 // scheduler wants to preempt us, we'll stop draining, 1390 // dispose the gcw, and then preempt. 1391 mp = acquirem() 1392 1393 if gcBlackenEnabled == 0 { 1394 throw("gcBgMarkWorker: blackening not enabled") 1395 } 1396 1397 startTime := nanotime() 1398 1399 decnwait := atomic.Xadd(&work.nwait, -1) 1400 if decnwait == work.nproc { 1401 println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) 1402 throw("work.nwait was > work.nproc") 1403 } 1404 1405 switch p.gcMarkWorkerMode { 1406 default: 1407 throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") 1408 case gcMarkWorkerDedicatedMode: 1409 gcDrain(&p.gcw, gcDrainNoBlock|gcDrainFlushBgCredit) 1410 case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode: 1411 gcDrain(&p.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) 1412 } 1413 1414 // If we are nearing the end of mark, dispose 1415 // of the cache promptly. We must do this 1416 // before signaling that we're no longer 1417 // working so that other workers can't observe 1418 // no workers and no work while we have this 1419 // cached, and before we compute done. 1420 if gcBlackenPromptly { 1421 p.gcw.dispose() 1422 } 1423 1424 // Account for time. 1425 duration := nanotime() - startTime 1426 switch p.gcMarkWorkerMode { 1427 case gcMarkWorkerDedicatedMode: 1428 atomic.Xaddint64(&gcController.dedicatedMarkTime, duration) 1429 atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) 1430 case gcMarkWorkerFractionalMode: 1431 atomic.Xaddint64(&gcController.fractionalMarkTime, duration) 1432 atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 1) 1433 case gcMarkWorkerIdleMode: 1434 atomic.Xaddint64(&gcController.idleMarkTime, duration) 1435 } 1436 1437 // Was this the last worker and did we run out 1438 // of work? 1439 incnwait := atomic.Xadd(&work.nwait, +1) 1440 if incnwait > work.nproc { 1441 println("runtime: p.gcMarkWorkerMode=", p.gcMarkWorkerMode, 1442 "work.nwait=", incnwait, "work.nproc=", work.nproc) 1443 throw("work.nwait > work.nproc") 1444 } 1445 1446 // If this worker reached a background mark completion 1447 // point, signal the main GC goroutine. 1448 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { 1449 // Make this G preemptible and disassociate it 1450 // as the worker for this P so 1451 // findRunnableGCWorker doesn't try to 1452 // schedule it. 1453 p.gcBgMarkWorker = nil 1454 releasem(mp) 1455 1456 gcMarkDone() 1457 1458 // Disable preemption and reassociate with the P. 1459 // 1460 // We may be running on a different P at this 1461 // point, so this has to be done carefully. 1462 mp = acquirem() 1463 if !casgp(&p.gcBgMarkWorker, nil, gp) { 1464 // The P got a new worker. 1465 releasem(mp) 1466 break 1467 } 1468 } 1469 } 1470 } 1471 1472 // gcMarkWorkAvailable returns true if executing a mark worker 1473 // on p is potentially useful. p may be nil, in which case it only 1474 // checks the global sources of work. 1475 func gcMarkWorkAvailable(p *p) bool { 1476 if p != nil && !p.gcw.empty() { 1477 return true 1478 } 1479 if atomic.Load64(&work.full) != 0 { 1480 return true // global work available 1481 } 1482 if work.markrootNext < work.markrootJobs { 1483 return true // root scan work available 1484 } 1485 return false 1486 } 1487 1488 // gcFlushGCWork disposes the gcWork caches of all Ps. The world must 1489 // be stopped. 1490 //go:nowritebarrier 1491 func gcFlushGCWork() { 1492 // Gather all cached GC work. All other Ps are stopped, so 1493 // it's safe to manipulate their GC work caches. 1494 for i := 0; i < int(gomaxprocs); i++ { 1495 allp[i].gcw.dispose() 1496 } 1497 } 1498 1499 // gcMark runs the mark (or, for concurrent GC, mark termination) 1500 // STW is in effect at this point. 1501 //TODO go:nowritebarrier 1502 func gcMark(start_time int64) { 1503 if debug.allocfreetrace > 0 { 1504 tracegc() 1505 } 1506 1507 if gcphase != _GCmarktermination { 1508 throw("in gcMark expecting to see gcphase as _GCmarktermination") 1509 } 1510 work.tstart = start_time 1511 1512 gcCopySpans() // TODO(rlh): should this be hoisted and done only once? Right now it is done for normal marking and also for checkmarking. 1513 1514 // Make sure the per-P gcWork caches are empty. During mark 1515 // termination, these caches can still be used temporarily, 1516 // but must be disposed to the global lists immediately. 1517 gcFlushGCWork() 1518 1519 // Queue root marking jobs. 1520 gcMarkRootPrepare() 1521 1522 work.nwait = 0 1523 work.ndone = 0 1524 work.nproc = uint32(gcprocs()) 1525 1526 if trace.enabled { 1527 traceGCScanStart() 1528 } 1529 1530 if work.nproc > 1 { 1531 noteclear(&work.alldone) 1532 helpgc(int32(work.nproc)) 1533 } 1534 1535 gchelperstart() 1536 1537 var gcw gcWork 1538 gcDrain(&gcw, gcDrainBlock) 1539 gcw.dispose() 1540 1541 gcMarkRootCheck() 1542 if work.full != 0 { 1543 throw("work.full != 0") 1544 } 1545 1546 if work.nproc > 1 { 1547 notesleep(&work.alldone) 1548 } 1549 1550 // markroot is done now, so record that objects with 1551 // finalizers have been scanned. 1552 work.finalizersDone = true 1553 1554 for i := 0; i < int(gomaxprocs); i++ { 1555 if !allp[i].gcw.empty() { 1556 throw("P has cached GC work at end of mark termination") 1557 } 1558 } 1559 1560 if trace.enabled { 1561 traceGCScanDone() 1562 } 1563 1564 cachestats() 1565 1566 // Compute the reachable heap size at the beginning of the 1567 // cycle. This is approximately the marked heap size at the 1568 // end (which we know) minus the amount of marked heap that 1569 // was allocated after marking began (which we don't know, but 1570 // is approximately the amount of heap that was allocated 1571 // since marking began). 1572 allocatedDuringCycle := memstats.heap_live - work.initialHeapLive 1573 if work.bytesMarked >= allocatedDuringCycle { 1574 memstats.heap_reachable = work.bytesMarked - allocatedDuringCycle 1575 } else { 1576 // This can happen if most of the allocation during 1577 // the cycle never became reachable from the heap. 1578 // Just set the reachable heap approximation to 0 and 1579 // let the heapminimum kick in below. 1580 memstats.heap_reachable = 0 1581 } 1582 1583 // Trigger the next GC cycle when the allocated heap has grown 1584 // by triggerRatio over the reachable heap size. Assume that 1585 // we're in steady state, so the reachable heap size is the 1586 // same now as it was at the beginning of the GC cycle. 1587 memstats.next_gc = uint64(float64(memstats.heap_reachable) * (1 + gcController.triggerRatio)) 1588 if memstats.next_gc < heapminimum { 1589 memstats.next_gc = heapminimum 1590 } 1591 if int64(memstats.next_gc) < 0 { 1592 print("next_gc=", memstats.next_gc, " bytesMarked=", work.bytesMarked, " heap_live=", memstats.heap_live, " initialHeapLive=", work.initialHeapLive, "\n") 1593 throw("next_gc underflow") 1594 } 1595 1596 // Update other GC heap size stats. 1597 memstats.heap_live = work.bytesMarked 1598 memstats.heap_marked = work.bytesMarked 1599 memstats.heap_scan = uint64(gcController.scanWork) 1600 1601 minNextGC := memstats.heap_live + sweepMinHeapDistance*uint64(gcpercent)/100 1602 if memstats.next_gc < minNextGC { 1603 // The allocated heap is already past the trigger. 1604 // This can happen if the triggerRatio is very low and 1605 // the reachable heap estimate is less than the live 1606 // heap size. 1607 // 1608 // Concurrent sweep happens in the heap growth from 1609 // heap_live to next_gc, so bump next_gc up to ensure 1610 // that concurrent sweep has some heap growth in which 1611 // to perform sweeping before we start the next GC 1612 // cycle. 1613 memstats.next_gc = minNextGC 1614 } 1615 1616 if trace.enabled { 1617 traceHeapAlloc() 1618 traceNextGC() 1619 } 1620 } 1621 1622 func gcSweep(mode gcMode) { 1623 if gcphase != _GCoff { 1624 throw("gcSweep being done but phase is not GCoff") 1625 } 1626 gcCopySpans() 1627 1628 lock(&mheap_.lock) 1629 mheap_.sweepgen += 2 1630 mheap_.sweepdone = 0 1631 sweep.spanidx = 0 1632 unlock(&mheap_.lock) 1633 1634 if !_ConcurrentSweep || mode == gcForceBlockMode { 1635 // Special case synchronous sweep. 1636 // Record that no proportional sweeping has to happen. 1637 lock(&mheap_.lock) 1638 mheap_.sweepPagesPerByte = 0 1639 mheap_.pagesSwept = 0 1640 unlock(&mheap_.lock) 1641 // Sweep all spans eagerly. 1642 for sweepone() != ^uintptr(0) { 1643 sweep.npausesweep++ 1644 } 1645 // Do an additional mProf_GC, because all 'free' events are now real as well. 1646 mProf_GC() 1647 mProf_GC() 1648 return 1649 } 1650 1651 // Concurrent sweep needs to sweep all of the in-use pages by 1652 // the time the allocated heap reaches the GC trigger. Compute 1653 // the ratio of in-use pages to sweep per byte allocated. 1654 heapDistance := int64(memstats.next_gc) - int64(memstats.heap_live) 1655 // Add a little margin so rounding errors and concurrent 1656 // sweep are less likely to leave pages unswept when GC starts. 1657 heapDistance -= 1024 * 1024 1658 if heapDistance < _PageSize { 1659 // Avoid setting the sweep ratio extremely high 1660 heapDistance = _PageSize 1661 } 1662 lock(&mheap_.lock) 1663 mheap_.sweepPagesPerByte = float64(mheap_.pagesInUse) / float64(heapDistance) 1664 mheap_.pagesSwept = 0 1665 mheap_.spanBytesAlloc = 0 1666 unlock(&mheap_.lock) 1667 1668 // Background sweep. 1669 lock(&sweep.lock) 1670 if sweep.parked { 1671 sweep.parked = false 1672 ready(sweep.g, 0) 1673 } 1674 unlock(&sweep.lock) 1675 mProf_GC() 1676 } 1677 1678 func gcCopySpans() { 1679 // Cache runtime.mheap_.allspans in work.spans to avoid conflicts with 1680 // resizing/freeing allspans. 1681 // New spans can be created while GC progresses, but they are not garbage for 1682 // this round: 1683 // - new stack spans can be created even while the world is stopped. 1684 // - new malloc spans can be created during the concurrent sweep 1685 // Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap. 1686 lock(&mheap_.lock) 1687 // Free the old cached mark array if necessary. 1688 if work.spans != nil && &work.spans[0] != &h_allspans[0] { 1689 sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys) 1690 } 1691 // Cache the current array for sweeping. 1692 mheap_.gcspans = mheap_.allspans 1693 work.spans = h_allspans 1694 unlock(&mheap_.lock) 1695 } 1696 1697 // gcResetMarkState resets global state prior to marking (concurrent 1698 // or STW) and resets the stack scan state of all Gs. Any Gs created 1699 // after this will also be in the reset state. 1700 func gcResetMarkState() { 1701 // This may be called during a concurrent phase, so make sure 1702 // allgs doesn't change. 1703 lock(&allglock) 1704 for _, gp := range allgs { 1705 gp.gcscandone = false // set to true in gcphasework 1706 gp.gcscanvalid = false // stack has not been scanned 1707 gp.gcAssistBytes = 0 1708 } 1709 unlock(&allglock) 1710 1711 work.bytesMarked = 0 1712 work.initialHeapLive = memstats.heap_live 1713 } 1714 1715 // Hooks for other packages 1716 1717 var poolcleanup func() 1718 1719 //go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup 1720 func sync_runtime_registerPoolCleanup(f func()) { 1721 poolcleanup = f 1722 } 1723 1724 func clearpools() { 1725 // clear sync.Pools 1726 if poolcleanup != nil { 1727 poolcleanup() 1728 } 1729 1730 // Clear central sudog cache. 1731 // Leave per-P caches alone, they have strictly bounded size. 1732 // Disconnect cached list before dropping it on the floor, 1733 // so that a dangling ref to one entry does not pin all of them. 1734 lock(&sched.sudoglock) 1735 var sg, sgnext *sudog 1736 for sg = sched.sudogcache; sg != nil; sg = sgnext { 1737 sgnext = sg.next 1738 sg.next = nil 1739 } 1740 sched.sudogcache = nil 1741 unlock(&sched.sudoglock) 1742 1743 // Clear central defer pools. 1744 // Leave per-P pools alone, they have strictly bounded size. 1745 lock(&sched.deferlock) 1746 for i := range sched.deferpool { 1747 // disconnect cached list before dropping it on the floor, 1748 // so that a dangling ref to one entry does not pin all of them. 1749 var d, dlink *_defer 1750 for d = sched.deferpool[i]; d != nil; d = dlink { 1751 dlink = d.link 1752 d.link = nil 1753 } 1754 sched.deferpool[i] = nil 1755 } 1756 unlock(&sched.deferlock) 1757 } 1758 1759 // Timing 1760 1761 //go:nowritebarrier 1762 func gchelper() { 1763 _g_ := getg() 1764 _g_.m.traceback = 2 1765 gchelperstart() 1766 1767 if trace.enabled { 1768 traceGCScanStart() 1769 } 1770 1771 // Parallel mark over GC roots and heap 1772 if gcphase == _GCmarktermination { 1773 var gcw gcWork 1774 gcDrain(&gcw, gcDrainBlock) // blocks in getfull 1775 gcw.dispose() 1776 } 1777 1778 if trace.enabled { 1779 traceGCScanDone() 1780 } 1781 1782 nproc := work.nproc // work.nproc can change right after we increment work.ndone 1783 if atomic.Xadd(&work.ndone, +1) == nproc-1 { 1784 notewakeup(&work.alldone) 1785 } 1786 _g_.m.traceback = 0 1787 } 1788 1789 func gchelperstart() { 1790 _g_ := getg() 1791 1792 if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc { 1793 throw("gchelperstart: bad m->helpgc") 1794 } 1795 if _g_ != _g_.m.g0 { 1796 throw("gchelper not running on g0 stack") 1797 } 1798 } 1799 1800 // itoaDiv formats val/(10**dec) into buf. 1801 func itoaDiv(buf []byte, val uint64, dec int) []byte { 1802 i := len(buf) - 1 1803 idec := i - dec 1804 for val >= 10 || i >= idec { 1805 buf[i] = byte(val%10 + '0') 1806 i-- 1807 if i == idec { 1808 buf[i] = '.' 1809 i-- 1810 } 1811 val /= 10 1812 } 1813 buf[i] = byte(val + '0') 1814 return buf[i:] 1815 } 1816 1817 // fmtNSAsMS nicely formats ns nanoseconds as milliseconds. 1818 func fmtNSAsMS(buf []byte, ns uint64) []byte { 1819 if ns >= 10e6 { 1820 // Format as whole milliseconds. 1821 return itoaDiv(buf, ns/1e6, 0) 1822 } 1823 // Format two digits of precision, with at most three decimal places. 1824 x := ns / 1e3 1825 if x == 0 { 1826 buf[0] = '0' 1827 return buf[:1] 1828 } 1829 dec := 3 1830 for x >= 100 { 1831 x /= 10 1832 dec-- 1833 } 1834 return itoaDiv(buf, x, dec) 1835 }