github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/runtime/mgc.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // TODO(rsc): The code having to do with the heap bitmap needs very serious cleanup. 6 // It has gotten completely out of control. 7 8 // Garbage collector (GC). 9 // 10 // The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple 11 // GC thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is 12 // non-generational and non-compacting. Allocation is done using size segregated per P allocation 13 // areas to minimize fragmentation while eliminating locks in the common case. 14 // 15 // The algorithm decomposes into several steps. 16 // This is a high level description of the algorithm being used. For an overview of GC a good 17 // place to start is Richard Jones' gchandbook.org. 18 // 19 // The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see 20 // Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. 21 // On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 22 // 966-975. 23 // For journal quality proofs that these steps are complete, correct, and terminate see 24 // Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. 25 // Concurrency and Computation: Practice and Experience 15(3-5), 2003. 26 // 27 // 0. Set phase = GCscan from GCoff. 28 // 1. Wait for all P's to acknowledge phase change. 29 // At this point all goroutines have passed through a GC safepoint and 30 // know we are in the GCscan phase. 31 // 2. GC scans all goroutine stacks, mark and enqueues all encountered pointers 32 // (marking avoids most duplicate enqueuing but races may produce benign duplication). 33 // Preempted goroutines are scanned before P schedules next goroutine. 34 // 3. Set phase = GCmark. 35 // 4. Wait for all P's to acknowledge phase change. 36 // 5. Now write barrier marks and enqueues black, grey, or white to white pointers. 37 // Malloc still allocates white (non-marked) objects. 38 // 6. Meanwhile GC transitively walks the heap marking reachable objects. 39 // 7. When GC finishes marking heap, it preempts P's one-by-one and 40 // retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine 41 // currently scheduled on the P). 42 // 8. Once the GC has exhausted all available marking work it sets phase = marktermination. 43 // 9. Wait for all P's to acknowledge phase change. 44 // 10. Malloc now allocates black objects, so number of unmarked reachable objects 45 // monotonically decreases. 46 // 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet 47 // reachable objects. 48 // 12. When GC completes a full cycle over P's and discovers no new grey 49 // objects, (which means all reachable objects are marked) set phase = GCoff. 50 // 13. Wait for all P's to acknowledge phase change. 51 // 14. Now malloc allocates white (but sweeps spans before use). 52 // Write barrier becomes nop. 53 // 15. GC does background sweeping, see description below. 54 // 16. When sufficient allocation has taken place replay the sequence starting at 0 above, 55 // see discussion of GC rate below. 56 57 // Changing phases. 58 // Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase. 59 // All phase action must be benign in the presence of a change. 60 // Starting with GCoff 61 // GCoff to GCscan 62 // GSscan scans stacks and globals greying them and never marks an object black. 63 // Once all the P's are aware of the new phase they will scan gs on preemption. 64 // This means that the scanning of preempted gs can't start until all the Ps 65 // have acknowledged. 66 // When a stack is scanned, this phase also installs stack barriers to 67 // track how much of the stack has been active. 68 // This transition enables write barriers because stack barriers 69 // assume that writes to higher frames will be tracked by write 70 // barriers. Technically this only needs write barriers for writes 71 // to stack slots, but we enable write barriers in general. 72 // GCscan to GCmark 73 // In GCmark, work buffers are drained until there are no more 74 // pointers to scan. 75 // No scanning of objects (making them black) can happen until all 76 // Ps have enabled the write barrier, but that already happened in 77 // the transition to GCscan. 78 // GCmark to GCmarktermination 79 // The only change here is that we start allocating black so the Ps must acknowledge 80 // the change before we begin the termination algorithm 81 // GCmarktermination to GSsweep 82 // Object currently on the freelist must be marked black for this to work. 83 // Are things on the free lists black or white? How does the sweep phase work? 84 85 // Concurrent sweep. 86 // 87 // The sweep phase proceeds concurrently with normal program execution. 88 // The heap is swept span-by-span both lazily (when a goroutine needs another span) 89 // and concurrently in a background goroutine (this helps programs that are not CPU bound). 90 // At the end of STW mark termination all spans are marked as "needs sweeping". 91 // 92 // The background sweeper goroutine simply sweeps spans one-by-one. 93 // 94 // To avoid requesting more OS memory while there are unswept spans, when a 95 // goroutine needs another span, it first attempts to reclaim that much memory 96 // by sweeping. When a goroutine needs to allocate a new small-object span, it 97 // sweeps small-object spans for the same object size until it frees at least 98 // one object. When a goroutine needs to allocate large-object span from heap, 99 // it sweeps spans until it frees at least that many pages into heap. There is 100 // one case where this may not suffice: if a goroutine sweeps and frees two 101 // nonadjacent one-page spans to the heap, it will allocate a new two-page 102 // span, but there can still be other one-page unswept spans which could be 103 // combined into a two-page span. 104 // 105 // It's critical to ensure that no operations proceed on unswept spans (that would corrupt 106 // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, 107 // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. 108 // When a goroutine explicitly frees an object or sets a finalizer, it ensures that 109 // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish). 110 // The finalizer goroutine is kicked off only when all spans are swept. 111 // When the next GC starts, it sweeps all not-yet-swept spans (if any). 112 113 // GC rate. 114 // Next GC is after we've allocated an extra amount of memory proportional to 115 // the amount already in use. The proportion is controlled by GOGC environment variable 116 // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M 117 // (this mark is tracked in next_gc variable). This keeps the GC cost in linear 118 // proportion to the allocation cost. Adjusting GOGC just changes the linear constant 119 // (and also the amount of extra memory used). 120 121 package runtime 122 123 import "unsafe" 124 125 const ( 126 _DebugGC = 0 127 _ConcurrentSweep = true 128 _FinBlockSize = 4 * 1024 129 130 // sweepMinHeapDistance is a lower bound on the heap distance 131 // (in bytes) reserved for concurrent sweeping between GC 132 // cycles. This will be scaled by gcpercent/100. 133 sweepMinHeapDistance = 1024 * 1024 134 ) 135 136 // heapminimum is the minimum heap size at which to trigger GC. 137 // For small heaps, this overrides the usual GOGC*live set rule. 138 // 139 // When there is a very small live set but a lot of allocation, simply 140 // collecting when the heap reaches GOGC*live results in many GC 141 // cycles and high total per-GC overhead. This minimum amortizes this 142 // per-GC overhead while keeping the heap reasonably small. 143 // 144 // During initialization this is set to 4MB*GOGC/100. In the case of 145 // GOGC==0, this will set heapminimum to 0, resulting in constant 146 // collection even when the heap size is small, which is useful for 147 // debugging. 148 var heapminimum uint64 = defaultHeapMinimum 149 150 // defaultHeapMinimum is the value of heapminimum for GOGC==100. 151 const defaultHeapMinimum = 4 << 20 152 153 // Initialized from $GOGC. GOGC=off means no GC. 154 var gcpercent int32 155 156 func gcinit() { 157 if unsafe.Sizeof(workbuf{}) != _WorkbufSize { 158 throw("size of Workbuf is suboptimal") 159 } 160 161 work.markfor = parforalloc(_MaxGcproc) 162 _ = setGCPercent(readgogc()) 163 for datap := &firstmoduledata; datap != nil; datap = datap.next { 164 datap.gcdatamask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data) 165 datap.gcbssmask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss) 166 } 167 memstats.next_gc = heapminimum 168 } 169 170 func readgogc() int32 { 171 p := gogetenv("GOGC") 172 if p == "" { 173 return 100 174 } 175 if p == "off" { 176 return -1 177 } 178 return int32(atoi(p)) 179 } 180 181 // gcenable is called after the bulk of the runtime initialization, 182 // just before we're about to start letting user code run. 183 // It kicks off the background sweeper goroutine and enables GC. 184 func gcenable() { 185 c := make(chan int, 1) 186 go bgsweep(c) 187 <-c 188 memstats.enablegc = true // now that runtime is initialized, GC is okay 189 } 190 191 //go:linkname setGCPercent runtime/debug.setGCPercent 192 func setGCPercent(in int32) (out int32) { 193 lock(&mheap_.lock) 194 out = gcpercent 195 if in < 0 { 196 in = -1 197 } 198 gcpercent = in 199 heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 200 unlock(&mheap_.lock) 201 return out 202 } 203 204 // Garbage collector phase. 205 // Indicates to write barrier and sychronization task to preform. 206 var gcphase uint32 207 var writeBarrierEnabled bool // compiler emits references to this in write barriers 208 209 // gcBlackenEnabled is 1 if mutator assists and background mark 210 // workers are allowed to blacken objects. This must only be set when 211 // gcphase == _GCmark. 212 var gcBlackenEnabled uint32 213 214 // gcBlackenPromptly indicates that optimizations that may 215 // hide work from the global work queue should be disabled. 216 // 217 // If gcBlackenPromptly is true, per-P gcWork caches should 218 // be flushed immediately and new objects should be allocated black. 219 // 220 // There is a tension between allocating objects white and 221 // allocating them black. If white and the objects die before being 222 // marked they can be collected during this GC cycle. On the other 223 // hand allocating them black will reduce _GCmarktermination latency 224 // since more work is done in the mark phase. This tension is resolved 225 // by allocating white until the mark phase is approaching its end and 226 // then allocating black for the remainder of the mark phase. 227 var gcBlackenPromptly bool 228 229 const ( 230 _GCoff = iota // GC not running; sweeping in background, write barrier disabled 231 _GCmark // GC marking roots and workbufs, write barrier ENABLED 232 _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED 233 ) 234 235 //go:nosplit 236 func setGCPhase(x uint32) { 237 atomicstore(&gcphase, x) 238 writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination 239 } 240 241 // gcMarkWorkerMode represents the mode that a concurrent mark worker 242 // should operate in. 243 // 244 // Concurrent marking happens through four different mechanisms. One 245 // is mutator assists, which happen in response to allocations and are 246 // not scheduled. The other three are variations in the per-P mark 247 // workers and are distinguished by gcMarkWorkerMode. 248 type gcMarkWorkerMode int 249 250 const ( 251 // gcMarkWorkerDedicatedMode indicates that the P of a mark 252 // worker is dedicated to running that mark worker. The mark 253 // worker should run without preemption until concurrent mark 254 // is done. 255 gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota 256 257 // gcMarkWorkerFractionalMode indicates that a P is currently 258 // running the "fractional" mark worker. The fractional worker 259 // is necessary when GOMAXPROCS*gcGoalUtilization is not an 260 // integer. The fractional worker should run until it is 261 // preempted and will be scheduled to pick up the fractional 262 // part of GOMAXPROCS*gcGoalUtilization. 263 gcMarkWorkerFractionalMode 264 265 // gcMarkWorkerIdleMode indicates that a P is running the mark 266 // worker because it has nothing else to do. The idle worker 267 // should run until it is preempted and account its time 268 // against gcController.idleMarkTime. 269 gcMarkWorkerIdleMode 270 ) 271 272 // gcController implements the GC pacing controller that determines 273 // when to trigger concurrent garbage collection and how much marking 274 // work to do in mutator assists and background marking. 275 // 276 // It uses a feedback control algorithm to adjust the memstats.next_gc 277 // trigger based on the heap growth and GC CPU utilization each cycle. 278 // This algorithm optimizes for heap growth to match GOGC and for CPU 279 // utilization between assist and background marking to be 25% of 280 // GOMAXPROCS. The high-level design of this algorithm is documented 281 // at https://golang.org/s/go15gcpacing. 282 var gcController = gcControllerState{ 283 // Initial trigger ratio guess. 284 triggerRatio: 7 / 8.0, 285 } 286 287 type gcControllerState struct { 288 // scanWork is the total scan work performed this cycle. This 289 // is updated atomically during the cycle. Updates occur in 290 // bounded batches, since it is both written and read 291 // throughout the cycle. 292 // 293 // Currently this is the bytes of heap scanned. For most uses, 294 // this is an opaque unit of work, but for estimation the 295 // definition is important. 296 scanWork int64 297 298 // bgScanCredit is the scan work credit accumulated by the 299 // concurrent background scan. This credit is accumulated by 300 // the background scan and stolen by mutator assists. This is 301 // updated atomically. Updates occur in bounded batches, since 302 // it is both written and read throughout the cycle. 303 bgScanCredit int64 304 305 // assistTime is the nanoseconds spent in mutator assists 306 // during this cycle. This is updated atomically. Updates 307 // occur in bounded batches, since it is both written and read 308 // throughout the cycle. 309 assistTime int64 310 311 // dedicatedMarkTime is the nanoseconds spent in dedicated 312 // mark workers during this cycle. This is updated atomically 313 // at the end of the concurrent mark phase. 314 dedicatedMarkTime int64 315 316 // fractionalMarkTime is the nanoseconds spent in the 317 // fractional mark worker during this cycle. This is updated 318 // atomically throughout the cycle and will be up-to-date if 319 // the fractional mark worker is not currently running. 320 fractionalMarkTime int64 321 322 // idleMarkTime is the nanoseconds spent in idle marking 323 // during this cycle. This is updated atomically throughout 324 // the cycle. 325 idleMarkTime int64 326 327 // bgMarkStartTime is the absolute start time in nanoseconds 328 // that the background mark phase started. 329 bgMarkStartTime int64 330 331 // assistTime is the absolute start time in nanoseconds that 332 // mutator assists were enabled. 333 assistStartTime int64 334 335 // heapGoal is the goal memstats.heap_live for when this cycle 336 // ends. This is computed at the beginning of each cycle. 337 heapGoal uint64 338 339 // dedicatedMarkWorkersNeeded is the number of dedicated mark 340 // workers that need to be started. This is computed at the 341 // beginning of each cycle and decremented atomically as 342 // dedicated mark workers get started. 343 dedicatedMarkWorkersNeeded int64 344 345 // assistWorkPerByte is the ratio of scan work to allocated 346 // bytes that should be performed by mutator assists. This is 347 // computed at the beginning of each cycle and updated every 348 // time heap_scan is updated. 349 assistWorkPerByte float64 350 351 // assistBytesPerWork is 1/assistWorkPerByte. 352 assistBytesPerWork float64 353 354 // fractionalUtilizationGoal is the fraction of wall clock 355 // time that should be spent in the fractional mark worker. 356 // For example, if the overall mark utilization goal is 25% 357 // and GOMAXPROCS is 6, one P will be a dedicated mark worker 358 // and this will be set to 0.5 so that 50% of the time some P 359 // is in a fractional mark worker. This is computed at the 360 // beginning of each cycle. 361 fractionalUtilizationGoal float64 362 363 // triggerRatio is the heap growth ratio at which the garbage 364 // collection cycle should start. E.g., if this is 0.6, then 365 // GC should start when the live heap has reached 1.6 times 366 // the heap size marked by the previous cycle. This is updated 367 // at the end of of each cycle. 368 triggerRatio float64 369 370 _ [_CacheLineSize]byte 371 372 // fractionalMarkWorkersNeeded is the number of fractional 373 // mark workers that need to be started. This is either 0 or 374 // 1. This is potentially updated atomically at every 375 // scheduling point (hence it gets its own cache line). 376 fractionalMarkWorkersNeeded int64 377 378 _ [_CacheLineSize]byte 379 } 380 381 // startCycle resets the GC controller's state and computes estimates 382 // for a new GC cycle. The caller must hold worldsema. 383 func (c *gcControllerState) startCycle() { 384 c.scanWork = 0 385 c.bgScanCredit = 0 386 c.assistTime = 0 387 c.dedicatedMarkTime = 0 388 c.fractionalMarkTime = 0 389 c.idleMarkTime = 0 390 391 // If this is the first GC cycle or we're operating on a very 392 // small heap, fake heap_marked so it looks like next_gc is 393 // the appropriate growth from heap_marked, even though the 394 // real heap_marked may not have a meaningful value (on the 395 // first cycle) or may be much smaller (resulting in a large 396 // error response). 397 if memstats.next_gc <= heapminimum { 398 memstats.heap_marked = uint64(float64(memstats.next_gc) / (1 + c.triggerRatio)) 399 memstats.heap_reachable = memstats.heap_marked 400 } 401 402 // Compute the heap goal for this cycle 403 c.heapGoal = memstats.heap_reachable + memstats.heap_reachable*uint64(gcpercent)/100 404 405 // Ensure that the heap goal is at least a little larger than 406 // the current live heap size. This may not be the case if GC 407 // start is delayed or if the allocation that pushed heap_live 408 // over next_gc is large or if the trigger is really close to 409 // GOGC. Assist is proportional to this distance, so enforce a 410 // minimum distance, even if it means going over the GOGC goal 411 // by a tiny bit. 412 if c.heapGoal < memstats.heap_live+1024*1024 { 413 c.heapGoal = memstats.heap_live + 1024*1024 414 } 415 416 // Compute the total mark utilization goal and divide it among 417 // dedicated and fractional workers. 418 totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization 419 c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal) 420 c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded) 421 if c.fractionalUtilizationGoal > 0 { 422 c.fractionalMarkWorkersNeeded = 1 423 } else { 424 c.fractionalMarkWorkersNeeded = 0 425 } 426 427 // Clear per-P state 428 for _, p := range &allp { 429 if p == nil { 430 break 431 } 432 p.gcAssistTime = 0 433 } 434 435 // Compute initial values for controls that are updated 436 // throughout the cycle. 437 c.revise() 438 439 if debug.gcpacertrace > 0 { 440 print("pacer: assist ratio=", c.assistWorkPerByte, 441 " (scan ", memstats.heap_scan>>20, " MB in ", 442 work.initialHeapLive>>20, "->", 443 c.heapGoal>>20, " MB)", 444 " workers=", c.dedicatedMarkWorkersNeeded, 445 "+", c.fractionalMarkWorkersNeeded, "\n") 446 } 447 } 448 449 // revise updates the assist ratio during the GC cycle to account for 450 // improved estimates. This should be called either under STW or 451 // whenever memstats.heap_scan or memstats.heap_live is updated (with 452 // mheap_.lock held). 453 // 454 // It should only be called when gcBlackenEnabled != 0 (because this 455 // is when assists are enabled and the necessary statistics are 456 // available). 457 func (c *gcControllerState) revise() { 458 // Compute the expected scan work remaining. 459 // 460 // Note that the scannable heap size is likely to increase 461 // during the GC cycle. This is why it's important to revise 462 // the assist ratio throughout the cycle: if the scannable 463 // heap size increases, the assist ratio based on the initial 464 // scannable heap size may target too little scan work. 465 // 466 // This particular estimate is a strict upper bound on the 467 // possible remaining scan work for the current heap. 468 // You might consider dividing this by 2 (or by 469 // (100+GOGC)/100) to counter this over-estimation, but 470 // benchmarks show that this has almost no effect on mean 471 // mutator utilization, heap size, or assist time and it 472 // introduces the danger of under-estimating and letting the 473 // mutator outpace the garbage collector. 474 scanWorkExpected := int64(memstats.heap_scan) - c.scanWork 475 if scanWorkExpected < 1000 { 476 // We set a somewhat arbitrary lower bound on 477 // remaining scan work since if we aim a little high, 478 // we can miss by a little. 479 // 480 // We *do* need to enforce that this is at least 1, 481 // since marking is racy and double-scanning objects 482 // may legitimately make the expected scan work 483 // negative. 484 scanWorkExpected = 1000 485 } 486 487 // Compute the heap distance remaining. 488 heapDistance := int64(c.heapGoal) - int64(memstats.heap_live) 489 if heapDistance <= 0 { 490 // This shouldn't happen, but if it does, avoid 491 // dividing by zero or setting the assist negative. 492 heapDistance = 1 493 } 494 495 // Compute the mutator assist ratio so by the time the mutator 496 // allocates the remaining heap bytes up to next_gc, it will 497 // have done (or stolen) the remaining amount of scan work. 498 c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance) 499 c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected) 500 } 501 502 // endCycle updates the GC controller state at the end of the 503 // concurrent part of the GC cycle. 504 func (c *gcControllerState) endCycle() { 505 h_t := c.triggerRatio // For debugging 506 507 // Proportional response gain for the trigger controller. Must 508 // be in [0, 1]. Lower values smooth out transient effects but 509 // take longer to respond to phase changes. Higher values 510 // react to phase changes quickly, but are more affected by 511 // transient changes. Values near 1 may be unstable. 512 const triggerGain = 0.5 513 514 // Compute next cycle trigger ratio. First, this computes the 515 // "error" for this cycle; that is, how far off the trigger 516 // was from what it should have been, accounting for both heap 517 // growth and GC CPU utilization. We compute the actual heap 518 // growth during this cycle and scale that by how far off from 519 // the goal CPU utilization we were (to estimate the heap 520 // growth if we had the desired CPU utilization). The 521 // difference between this estimate and the GOGC-based goal 522 // heap growth is the error. 523 // 524 // TODO(austin): next_gc is based on heap_reachable, not 525 // heap_marked, which means the actual growth ratio 526 // technically isn't comparable to the trigger ratio. 527 goalGrowthRatio := float64(gcpercent) / 100 528 actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1 529 assistDuration := nanotime() - c.assistStartTime 530 531 // Assume background mark hit its utilization goal. 532 utilization := gcGoalUtilization 533 // Add assist utilization; avoid divide by zero. 534 if assistDuration > 0 { 535 utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs)) 536 } 537 538 triggerError := goalGrowthRatio - c.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-c.triggerRatio) 539 540 // Finally, we adjust the trigger for next time by this error, 541 // damped by the proportional gain. 542 c.triggerRatio += triggerGain * triggerError 543 if c.triggerRatio < 0 { 544 // This can happen if the mutator is allocating very 545 // quickly or the GC is scanning very slowly. 546 c.triggerRatio = 0 547 } else if c.triggerRatio > goalGrowthRatio*0.95 { 548 // Ensure there's always a little margin so that the 549 // mutator assist ratio isn't infinity. 550 c.triggerRatio = goalGrowthRatio * 0.95 551 } 552 553 if debug.gcpacertrace > 0 { 554 // Print controller state in terms of the design 555 // document. 556 H_m_prev := memstats.heap_marked 557 H_T := memstats.next_gc 558 h_a := actualGrowthRatio 559 H_a := memstats.heap_live 560 h_g := goalGrowthRatio 561 H_g := int64(float64(H_m_prev) * (1 + h_g)) 562 u_a := utilization 563 u_g := gcGoalUtilization 564 W_a := c.scanWork 565 print("pacer: H_m_prev=", H_m_prev, 566 " h_t=", h_t, " H_T=", H_T, 567 " h_a=", h_a, " H_a=", H_a, 568 " h_g=", h_g, " H_g=", H_g, 569 " u_a=", u_a, " u_g=", u_g, 570 " W_a=", W_a, 571 " goalΔ=", goalGrowthRatio-h_t, 572 " actualΔ=", h_a-h_t, 573 " u_a/u_g=", u_a/u_g, 574 "\n") 575 } 576 } 577 578 // findRunnableGCWorker returns the background mark worker for _p_ if it 579 // should be run. This must only be called when gcBlackenEnabled != 0. 580 func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { 581 if gcBlackenEnabled == 0 { 582 throw("gcControllerState.findRunnable: blackening not enabled") 583 } 584 if _p_.gcBgMarkWorker == nil { 585 throw("gcControllerState.findRunnable: no background mark worker") 586 } 587 if work.bgMark1.done != 0 && work.bgMark2.done != 0 { 588 // Background mark is done. Don't schedule background 589 // mark worker any more. (This is not just an 590 // optimization. Without this we can spin scheduling 591 // the background worker and having it return 592 // immediately with no work to do.) 593 return nil 594 } 595 596 decIfPositive := func(ptr *int64) bool { 597 if *ptr > 0 { 598 if xaddint64(ptr, -1) >= 0 { 599 return true 600 } 601 // We lost a race 602 xaddint64(ptr, +1) 603 } 604 return false 605 } 606 607 if decIfPositive(&c.dedicatedMarkWorkersNeeded) { 608 // This P is now dedicated to marking until the end of 609 // the concurrent mark phase. 610 _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode 611 // TODO(austin): This P isn't going to run anything 612 // else for a while, so kick everything out of its run 613 // queue. 614 } else { 615 if !gcMarkWorkAvailable(_p_) { 616 // No work to be done right now. This can 617 // happen at the end of the mark phase when 618 // there are still assists tapering off. Don't 619 // bother running background mark because 620 // it'll just return immediately. 621 if work.nwait == work.nproc { 622 // There are also no workers, which 623 // means we've reached a completion point. 624 // There may not be any workers to 625 // signal it, so signal it here. 626 readied := false 627 if gcBlackenPromptly { 628 if work.bgMark1.done == 0 { 629 throw("completing mark 2, but bgMark1.done == 0") 630 } 631 readied = work.bgMark2.complete() 632 } else { 633 readied = work.bgMark1.complete() 634 } 635 if readied { 636 // complete just called ready, 637 // but we're inside the 638 // scheduler. Let it know that 639 // that's okay. 640 resetspinning() 641 } 642 } 643 return nil 644 } 645 if !decIfPositive(&c.fractionalMarkWorkersNeeded) { 646 // No more workers are need right now. 647 return nil 648 } 649 650 // This P has picked the token for the fractional worker. 651 // Is the GC currently under or at the utilization goal? 652 // If so, do more work. 653 // 654 // We used to check whether doing one time slice of work 655 // would remain under the utilization goal, but that has the 656 // effect of delaying work until the mutator has run for 657 // enough time slices to pay for the work. During those time 658 // slices, write barriers are enabled, so the mutator is running slower. 659 // Now instead we do the work whenever we're under or at the 660 // utilization work and pay for it by letting the mutator run later. 661 // This doesn't change the overall utilization averages, but it 662 // front loads the GC work so that the GC finishes earlier and 663 // write barriers can be turned off sooner, effectively giving 664 // the mutator a faster machine. 665 // 666 // The old, slower behavior can be restored by setting 667 // gcForcePreemptNS = forcePreemptNS. 668 const gcForcePreemptNS = 0 669 670 // TODO(austin): We could fast path this and basically 671 // eliminate contention on c.fractionalMarkWorkersNeeded by 672 // precomputing the minimum time at which it's worth 673 // next scheduling the fractional worker. Then Ps 674 // don't have to fight in the window where we've 675 // passed that deadline and no one has started the 676 // worker yet. 677 // 678 // TODO(austin): Shorter preemption interval for mark 679 // worker to improve fairness and give this 680 // finer-grained control over schedule? 681 now := nanotime() - gcController.bgMarkStartTime 682 then := now + gcForcePreemptNS 683 timeUsed := c.fractionalMarkTime + gcForcePreemptNS 684 if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal { 685 // Nope, we'd overshoot the utilization goal 686 xaddint64(&c.fractionalMarkWorkersNeeded, +1) 687 return nil 688 } 689 _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode 690 } 691 692 // Run the background mark worker 693 gp := _p_.gcBgMarkWorker 694 casgstatus(gp, _Gwaiting, _Grunnable) 695 if trace.enabled { 696 traceGoUnpark(gp, 0) 697 } 698 return gp 699 } 700 701 // gcGoalUtilization is the goal CPU utilization for background 702 // marking as a fraction of GOMAXPROCS. 703 const gcGoalUtilization = 0.25 704 705 // gcCreditSlack is the amount of scan work credit that can can 706 // accumulate locally before updating gcController.scanWork and, 707 // optionally, gcController.bgScanCredit. Lower values give a more 708 // accurate assist ratio and make it more likely that assists will 709 // successfully steal background credit. Higher values reduce memory 710 // contention. 711 const gcCreditSlack = 2000 712 713 // gcAssistTimeSlack is the nanoseconds of mutator assist time that 714 // can accumulate on a P before updating gcController.assistTime. 715 const gcAssistTimeSlack = 5000 716 717 // gcOverAssistBytes determines how many extra allocation bytes of 718 // assist credit a GC assist builds up when an assist happens. This 719 // amortizes the cost of an assist by pre-paying for this many bytes 720 // of future allocations. 721 const gcOverAssistBytes = 1 << 20 722 723 // Determine whether to initiate a GC. 724 // If the GC is already working no need to trigger another one. 725 // This should establish a feedback loop where if the GC does not 726 // have sufficient time to complete then more memory will be 727 // requested from the OS increasing heap size thus allow future 728 // GCs more time to complete. 729 // memstat.heap_live read has a benign race. 730 // A false negative simple does not start a GC, a false positive 731 // will start a GC needlessly. Neither have correctness issues. 732 func shouldtriggergc() bool { 733 return memstats.heap_live >= memstats.next_gc && atomicloaduint(&bggc.working) == 0 734 } 735 736 // bgMarkSignal synchronizes the GC coordinator and background mark workers. 737 type bgMarkSignal struct { 738 // Workers race to cas to 1. Winner signals coordinator. 739 done uint32 740 // Coordinator to wake up. 741 lock mutex 742 g *g 743 wake bool 744 } 745 746 func (s *bgMarkSignal) wait() { 747 lock(&s.lock) 748 if s.wake { 749 // Wakeup already happened 750 unlock(&s.lock) 751 } else { 752 s.g = getg() 753 goparkunlock(&s.lock, "mark wait (idle)", traceEvGoBlock, 1) 754 } 755 s.wake = false 756 s.g = nil 757 } 758 759 // complete signals the completion of this phase of marking. This can 760 // be called multiple times during a cycle; only the first call has 761 // any effect. 762 // 763 // The caller should arrange to deschedule itself as soon as possible 764 // after calling complete in order to let the coordinator goroutine 765 // run. 766 func (s *bgMarkSignal) complete() bool { 767 if cas(&s.done, 0, 1) { 768 // This is the first worker to reach this completion point. 769 // Signal the main GC goroutine. 770 lock(&s.lock) 771 if s.g == nil { 772 // It hasn't parked yet. 773 s.wake = true 774 } else { 775 ready(s.g, 0) 776 } 777 unlock(&s.lock) 778 return true 779 } 780 return false 781 } 782 783 func (s *bgMarkSignal) clear() { 784 s.done = 0 785 } 786 787 var work struct { 788 full uint64 // lock-free list of full blocks workbuf 789 empty uint64 // lock-free list of empty blocks workbuf 790 pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait 791 792 markrootNext uint32 // next markroot job 793 markrootJobs uint32 // number of markroot jobs 794 795 nproc uint32 796 tstart int64 797 nwait uint32 798 ndone uint32 799 alldone note 800 markfor *parfor 801 802 // Number of roots of various root types. Set by gcMarkRootPrepare. 803 nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int 804 805 // finalizersDone indicates that finalizers and objects with 806 // finalizers have been scanned by markroot. During concurrent 807 // GC, this happens during the concurrent scan phase. During 808 // STW GC, this happens during mark termination. 809 finalizersDone bool 810 811 bgMarkReady note // signal background mark worker has started 812 bgMarkDone uint32 // cas to 1 when at a background mark completion point 813 // Background mark completion signaling 814 815 // Coordination for the 2 parts of the mark phase. 816 bgMark1 bgMarkSignal 817 bgMark2 bgMarkSignal 818 819 // Copy of mheap.allspans for marker or sweeper. 820 spans []*mspan 821 822 // totaltime is the CPU nanoseconds spent in GC since the 823 // program started if debug.gctrace > 0. 824 totaltime int64 825 826 // bytesMarked is the number of bytes marked this cycle. This 827 // includes bytes blackened in scanned objects, noscan objects 828 // that go straight to black, and permagrey objects scanned by 829 // markroot during the concurrent scan phase. This is updated 830 // atomically during the cycle. Updates may be batched 831 // arbitrarily, since the value is only read at the end of the 832 // cycle. 833 // 834 // Because of benign races during marking, this number may not 835 // be the exact number of marked bytes, but it should be very 836 // close. 837 bytesMarked uint64 838 839 // initialHeapLive is the value of memstats.heap_live at the 840 // beginning of this GC cycle. 841 initialHeapLive uint64 842 } 843 844 // GC runs a garbage collection and blocks the caller until the 845 // garbage collection is complete. It may also block the entire 846 // program. 847 func GC() { 848 startGC(gcForceBlockMode, false) 849 } 850 851 // gcMode indicates how concurrent a GC cycle should be. 852 type gcMode int 853 854 const ( 855 gcBackgroundMode gcMode = iota // concurrent GC and sweep 856 gcForceMode // stop-the-world GC now, concurrent sweep 857 gcForceBlockMode // stop-the-world GC now and STW sweep 858 ) 859 860 // startGC starts a GC cycle. If mode is gcBackgroundMode, this will 861 // start GC in the background and return. Otherwise, this will block 862 // until the new GC cycle is started and finishes. If forceTrigger is 863 // true, it indicates that GC should be started regardless of the 864 // current heap size. 865 func startGC(mode gcMode, forceTrigger bool) { 866 // The gc is turned off (via enablegc) until the bootstrap has completed. 867 // Also, malloc gets called in the guts of a number of libraries that might be 868 // holding locks. To avoid deadlocks during stop-the-world, don't bother 869 // trying to run gc while holding a lock. The next mallocgc without a lock 870 // will do the gc instead. 871 mp := acquirem() 872 if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" || !memstats.enablegc || panicking != 0 || gcpercent < 0 { 873 releasem(mp) 874 return 875 } 876 releasem(mp) 877 mp = nil 878 879 if debug.gcstoptheworld == 1 { 880 mode = gcForceMode 881 } else if debug.gcstoptheworld == 2 { 882 mode = gcForceBlockMode 883 } 884 885 if mode != gcBackgroundMode { 886 // special synchronous cases 887 gc(mode) 888 return 889 } 890 891 // trigger concurrent GC 892 readied := false 893 lock(&bggc.lock) 894 // The trigger was originally checked speculatively, so 895 // recheck that this really should trigger GC. (For example, 896 // we may have gone through a whole GC cycle since the 897 // speculative check.) 898 if !(forceTrigger || shouldtriggergc()) { 899 unlock(&bggc.lock) 900 return 901 } 902 if !bggc.started { 903 bggc.working = 1 904 bggc.started = true 905 readied = true 906 go backgroundgc() 907 } else if bggc.working == 0 { 908 bggc.working = 1 909 readied = true 910 ready(bggc.g, 0) 911 } 912 unlock(&bggc.lock) 913 if readied { 914 // This G just started or ready()d the GC goroutine. 915 // Switch directly to it by yielding. 916 Gosched() 917 } 918 } 919 920 // State of the background concurrent GC goroutine. 921 var bggc struct { 922 lock mutex 923 g *g 924 working uint 925 started bool 926 } 927 928 // backgroundgc is running in a goroutine and does the concurrent GC work. 929 // bggc holds the state of the backgroundgc. 930 func backgroundgc() { 931 bggc.g = getg() 932 for { 933 gc(gcBackgroundMode) 934 lock(&bggc.lock) 935 bggc.working = 0 936 goparkunlock(&bggc.lock, "Concurrent GC wait", traceEvGoBlock, 1) 937 } 938 } 939 940 func gc(mode gcMode) { 941 // Timing/utilization tracking 942 var stwprocs, maxprocs int32 943 var tSweepTerm, tMark, tMarkTerm int64 944 945 // debug.gctrace variables 946 var heap0, heap1, heap2, heapGoal uint64 947 948 // memstats statistics 949 var now, pauseStart, pauseNS int64 950 951 // Ok, we're doing it! Stop everybody else 952 semacquire(&worldsema, false) 953 954 // Pick up the remaining unswept/not being swept spans concurrently 955 // 956 // This shouldn't happen if we're being invoked in background 957 // mode since proportional sweep should have just finished 958 // sweeping everything, but rounding errors, etc, may leave a 959 // few spans unswept. In forced mode, this is necessary since 960 // GC can be forced at any point in the sweeping cycle. 961 for gosweepone() != ^uintptr(0) { 962 sweep.nbgsweep++ 963 } 964 965 if trace.enabled { 966 traceGCStart() 967 } 968 969 if mode == gcBackgroundMode { 970 gcBgMarkStartWorkers() 971 } 972 now = nanotime() 973 stwprocs, maxprocs = gcprocs(), gomaxprocs 974 tSweepTerm = now 975 heap0 = memstats.heap_live 976 977 pauseStart = now 978 systemstack(stopTheWorldWithSema) 979 // Finish sweep before we start concurrent scan. 980 systemstack(func() { 981 finishsweep_m(true) 982 }) 983 // clearpools before we start the GC. If we wait they memory will not be 984 // reclaimed until the next GC cycle. 985 clearpools() 986 987 gcResetMarkState() 988 989 work.finalizersDone = false 990 991 if mode == gcBackgroundMode { // Do as much work concurrently as possible 992 gcController.startCycle() 993 heapGoal = gcController.heapGoal 994 995 systemstack(func() { 996 // Enter concurrent mark phase and enable 997 // write barriers. 998 // 999 // Because the world is stopped, all Ps will 1000 // observe that write barriers are enabled by 1001 // the time we start the world and begin 1002 // scanning. 1003 // 1004 // It's necessary to enable write barriers 1005 // during the scan phase for several reasons: 1006 // 1007 // They must be enabled for writes to higher 1008 // stack frames before we scan stacks and 1009 // install stack barriers because this is how 1010 // we track writes to inactive stack frames. 1011 // (Alternatively, we could not install stack 1012 // barriers over frame boundaries with 1013 // up-pointers). 1014 // 1015 // They must be enabled before assists are 1016 // enabled because they must be enabled before 1017 // any non-leaf heap objects are marked. Since 1018 // allocations are blocked until assists can 1019 // happen, we want enable assists as early as 1020 // possible. 1021 setGCPhase(_GCmark) 1022 1023 // markrootSpans uses work.spans, so make sure 1024 // it is up to date. 1025 gcCopySpans() 1026 1027 gcBgMarkPrepare() // Must happen before assist enable. 1028 gcMarkRootPrepare() 1029 1030 // At this point all Ps have enabled the write 1031 // barrier, thus maintaining the no white to 1032 // black invariant. Enable mutator assists to 1033 // put back-pressure on fast allocating 1034 // mutators. 1035 atomicstore(&gcBlackenEnabled, 1) 1036 1037 // Concurrent mark. 1038 startTheWorldWithSema() 1039 now = nanotime() 1040 pauseNS += now - pauseStart 1041 gcController.assistStartTime = now 1042 }) 1043 tMark = now 1044 1045 // Enable background mark workers and wait for 1046 // background mark completion. 1047 gcController.bgMarkStartTime = now 1048 work.bgMark1.clear() 1049 work.bgMark1.wait() 1050 1051 gcMarkRootCheck() 1052 1053 // The global work list is empty, but there can still be work 1054 // sitting in the per-P work caches and there can be more 1055 // objects reachable from global roots since they don't have write 1056 // barriers. Rescan some roots and flush work caches. 1057 systemstack(func() { 1058 // Disallow caching workbufs. 1059 gcBlackenPromptly = true 1060 1061 // Flush all currently cached workbufs. This 1062 // also forces any remaining background 1063 // workers out of their loop. 1064 forEachP(func(_p_ *p) { 1065 _p_.gcw.dispose() 1066 }) 1067 1068 // Rescan global data and BSS. Bump "jobs" 1069 // down before "next" so workers won't try 1070 // running root jobs until we set "next". 1071 atomicstore(&work.markrootJobs, uint32(fixedRootCount+work.nDataRoots+work.nBSSRoots)) 1072 atomicstore(&work.markrootNext, fixedRootCount) 1073 }) 1074 1075 // Wait for this more aggressive background mark to complete. 1076 work.bgMark2.clear() 1077 work.bgMark2.wait() 1078 1079 // Begin mark termination. 1080 now = nanotime() 1081 tMarkTerm = now 1082 pauseStart = now 1083 systemstack(stopTheWorldWithSema) 1084 // The gcphase is _GCmark, it will transition to _GCmarktermination 1085 // below. The important thing is that the wb remains active until 1086 // all marking is complete. This includes writes made by the GC. 1087 1088 // markroot is done now, so record that objects with 1089 // finalizers have been scanned. 1090 work.finalizersDone = true 1091 1092 // Flush the gcWork caches. This must be done before 1093 // endCycle since endCycle depends on statistics kept 1094 // in these caches. 1095 gcFlushGCWork() 1096 1097 gcController.endCycle() 1098 } else { 1099 t := nanotime() 1100 tMark, tMarkTerm = t, t 1101 heapGoal = heap0 1102 } 1103 1104 // World is stopped. 1105 // Start marktermination which includes enabling the write barrier. 1106 atomicstore(&gcBlackenEnabled, 0) 1107 gcBlackenPromptly = false 1108 setGCPhase(_GCmarktermination) 1109 1110 heap1 = memstats.heap_live 1111 startTime := nanotime() 1112 1113 mp := acquirem() 1114 mp.preemptoff = "gcing" 1115 _g_ := getg() 1116 _g_.m.traceback = 2 1117 gp := _g_.m.curg 1118 casgstatus(gp, _Grunning, _Gwaiting) 1119 gp.waitreason = "garbage collection" 1120 1121 // Run gc on the g0 stack. We do this so that the g stack 1122 // we're currently running on will no longer change. Cuts 1123 // the root set down a bit (g0 stacks are not scanned, and 1124 // we don't need to scan gc's internal state). We also 1125 // need to switch to g0 so we can shrink the stack. 1126 systemstack(func() { 1127 gcMark(startTime) 1128 // Must return immediately. 1129 // The outer function's stack may have moved 1130 // during gcMark (it shrinks stacks, including the 1131 // outer function's stack), so we must not refer 1132 // to any of its variables. Return back to the 1133 // non-system stack to pick up the new addresses 1134 // before continuing. 1135 }) 1136 1137 systemstack(func() { 1138 heap2 = work.bytesMarked 1139 if debug.gccheckmark > 0 { 1140 // Run a full stop-the-world mark using checkmark bits, 1141 // to check that we didn't forget to mark anything during 1142 // the concurrent mark process. 1143 gcResetMarkState() 1144 initCheckmarks() 1145 gcMark(startTime) 1146 clearCheckmarks() 1147 } 1148 1149 // marking is complete so we can turn the write barrier off 1150 setGCPhase(_GCoff) 1151 gcSweep(mode) 1152 1153 if debug.gctrace > 1 { 1154 startTime = nanotime() 1155 // The g stacks have been scanned so 1156 // they have gcscanvalid==true and gcworkdone==true. 1157 // Reset these so that all stacks will be rescanned. 1158 gcResetMarkState() 1159 finishsweep_m(true) 1160 1161 // Still in STW but gcphase is _GCoff, reset to _GCmarktermination 1162 // At this point all objects will be found during the gcMark which 1163 // does a complete STW mark and object scan. 1164 setGCPhase(_GCmarktermination) 1165 gcMark(startTime) 1166 setGCPhase(_GCoff) // marking is done, turn off wb. 1167 gcSweep(mode) 1168 } 1169 }) 1170 1171 _g_.m.traceback = 0 1172 casgstatus(gp, _Gwaiting, _Grunning) 1173 1174 if trace.enabled { 1175 traceGCDone() 1176 } 1177 1178 // all done 1179 mp.preemptoff = "" 1180 1181 if gcphase != _GCoff { 1182 throw("gc done but gcphase != _GCoff") 1183 } 1184 1185 // Update timing memstats 1186 now, unixNow := nanotime(), unixnanotime() 1187 pauseNS += now - pauseStart 1188 atomicstore64(&memstats.last_gc, uint64(unixNow)) // must be Unix time to make sense to user 1189 memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(pauseNS) 1190 memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow) 1191 memstats.pause_total_ns += uint64(pauseNS) 1192 1193 // Update work.totaltime. 1194 sweepTermCpu := int64(stwprocs) * (tMark - tSweepTerm) 1195 // We report idle marking time below, but omit it from the 1196 // overall utilization here since it's "free". 1197 markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime 1198 markTermCpu := int64(stwprocs) * (now - tMarkTerm) 1199 cycleCpu := sweepTermCpu + markCpu + markTermCpu 1200 work.totaltime += cycleCpu 1201 1202 // Compute overall GC CPU utilization. 1203 totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs) 1204 memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu) 1205 1206 memstats.numgc++ 1207 1208 systemstack(startTheWorldWithSema) 1209 semrelease(&worldsema) 1210 1211 releasem(mp) 1212 mp = nil 1213 1214 if debug.gctrace > 0 { 1215 tEnd := now 1216 util := int(memstats.gc_cpu_fraction * 100) 1217 1218 // Install WB phase is no longer used. 1219 tInstallWB := tMark 1220 installWBCpu := int64(0) 1221 1222 // Scan phase is no longer used. 1223 tScan := tInstallWB 1224 scanCpu := int64(0) 1225 1226 // TODO: Clean up the gctrace format. 1227 1228 var sbuf [24]byte 1229 printlock() 1230 print("gc ", memstats.numgc, 1231 " @", string(itoaDiv(sbuf[:], uint64(tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", 1232 util, "%: ") 1233 prev := tSweepTerm 1234 for i, ns := range []int64{tScan, tInstallWB, tMark, tMarkTerm, tEnd} { 1235 if i != 0 { 1236 print("+") 1237 } 1238 print(string(fmtNSAsMS(sbuf[:], uint64(ns-prev)))) 1239 prev = ns 1240 } 1241 print(" ms clock, ") 1242 for i, ns := range []int64{sweepTermCpu, scanCpu, installWBCpu, gcController.assistTime, gcController.dedicatedMarkTime + gcController.fractionalMarkTime, gcController.idleMarkTime, markTermCpu} { 1243 if i == 4 || i == 5 { 1244 // Separate mark time components with /. 1245 print("/") 1246 } else if i != 0 { 1247 print("+") 1248 } 1249 print(string(fmtNSAsMS(sbuf[:], uint64(ns)))) 1250 } 1251 print(" ms cpu, ", 1252 heap0>>20, "->", heap1>>20, "->", heap2>>20, " MB, ", 1253 heapGoal>>20, " MB goal, ", 1254 maxprocs, " P") 1255 if mode != gcBackgroundMode { 1256 print(" (forced)") 1257 } 1258 print("\n") 1259 printunlock() 1260 } 1261 sweep.nbgsweep = 0 1262 sweep.npausesweep = 0 1263 1264 // now that gc is done, kick off finalizer thread if needed 1265 if !concurrentSweep { 1266 // give the queued finalizers, if any, a chance to run 1267 Gosched() 1268 } 1269 } 1270 1271 // gcBgMarkStartWorkers prepares background mark worker goroutines. 1272 // These goroutines will not run until the mark phase, but they must 1273 // be started while the work is not stopped and from a regular G 1274 // stack. The caller must hold worldsema. 1275 func gcBgMarkStartWorkers() { 1276 // Background marking is performed by per-P G's. Ensure that 1277 // each P has a background GC G. 1278 for _, p := range &allp { 1279 if p == nil || p.status == _Pdead { 1280 break 1281 } 1282 if p.gcBgMarkWorker == nil { 1283 go gcBgMarkWorker(p) 1284 notetsleepg(&work.bgMarkReady, -1) 1285 noteclear(&work.bgMarkReady) 1286 } 1287 } 1288 } 1289 1290 // gcBgMarkPrepare sets up state for background marking. 1291 // Mutator assists must not yet be enabled. 1292 func gcBgMarkPrepare() { 1293 // Background marking will stop when the work queues are empty 1294 // and there are no more workers (note that, since this is 1295 // concurrent, this may be a transient state, but mark 1296 // termination will clean it up). Between background workers 1297 // and assists, we don't really know how many workers there 1298 // will be, so we pretend to have an arbitrarily large number 1299 // of workers, almost all of which are "waiting". While a 1300 // worker is working it decrements nwait. If nproc == nwait, 1301 // there are no workers. 1302 work.nproc = ^uint32(0) 1303 work.nwait = ^uint32(0) 1304 1305 // Reset background mark completion points. 1306 work.bgMark1.done = 1 1307 work.bgMark2.done = 1 1308 } 1309 1310 func gcBgMarkWorker(p *p) { 1311 // Register this G as the background mark worker for p. 1312 if p.gcBgMarkWorker != nil { 1313 throw("P already has a background mark worker") 1314 } 1315 gp := getg() 1316 1317 mp := acquirem() 1318 p.gcBgMarkWorker = gp 1319 // After this point, the background mark worker is scheduled 1320 // cooperatively by gcController.findRunnable. Hence, it must 1321 // never be preempted, as this would put it into _Grunnable 1322 // and put it on a run queue. Instead, when the preempt flag 1323 // is set, this puts itself into _Gwaiting to be woken up by 1324 // gcController.findRunnable at the appropriate time. 1325 notewakeup(&work.bgMarkReady) 1326 for { 1327 // Go to sleep until woken by gcContoller.findRunnable. 1328 // We can't releasem yet since even the call to gopark 1329 // may be preempted. 1330 gopark(func(g *g, mp unsafe.Pointer) bool { 1331 releasem((*m)(mp)) 1332 return true 1333 }, unsafe.Pointer(mp), "mark worker (idle)", traceEvGoBlock, 0) 1334 1335 // Loop until the P dies and disassociates this 1336 // worker. (The P may later be reused, in which case 1337 // it will get a new worker.) 1338 if p.gcBgMarkWorker != gp { 1339 break 1340 } 1341 1342 // Disable preemption so we can use the gcw. If the 1343 // scheduler wants to preempt us, we'll stop draining, 1344 // dispose the gcw, and then preempt. 1345 mp = acquirem() 1346 1347 if gcBlackenEnabled == 0 { 1348 throw("gcBgMarkWorker: blackening not enabled") 1349 } 1350 1351 startTime := nanotime() 1352 1353 decnwait := xadd(&work.nwait, -1) 1354 if decnwait == work.nproc { 1355 println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) 1356 throw("work.nwait was > work.nproc") 1357 } 1358 1359 done := false 1360 switch p.gcMarkWorkerMode { 1361 default: 1362 throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") 1363 case gcMarkWorkerDedicatedMode: 1364 gcDrain(&p.gcw, gcDrainBlock|gcDrainFlushBgCredit) 1365 // gcDrain did the xadd(&work.nwait +1) to 1366 // match the decrement above. It only returns 1367 // at a mark completion point. 1368 done = true 1369 if !p.gcw.empty() { 1370 throw("gcDrain returned with buffer") 1371 } 1372 case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode: 1373 gcDrain(&p.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) 1374 1375 // If we are nearing the end of mark, dispose 1376 // of the cache promptly. We must do this 1377 // before signaling that we're no longer 1378 // working so that other workers can't observe 1379 // no workers and no work while we have this 1380 // cached, and before we compute done. 1381 if gcBlackenPromptly { 1382 p.gcw.dispose() 1383 } 1384 1385 // Was this the last worker and did we run out 1386 // of work? 1387 incnwait := xadd(&work.nwait, +1) 1388 if incnwait > work.nproc { 1389 println("runtime: p.gcMarkWorkerMode=", p.gcMarkWorkerMode, 1390 "work.nwait=", incnwait, "work.nproc=", work.nproc) 1391 throw("work.nwait > work.nproc") 1392 } 1393 done = incnwait == work.nproc && !gcMarkWorkAvailable(nil) 1394 } 1395 1396 // If this worker reached a background mark completion 1397 // point, signal the main GC goroutine. 1398 if done { 1399 if gcBlackenPromptly { 1400 if work.bgMark1.done == 0 { 1401 throw("completing mark 2, but bgMark1.done == 0") 1402 } 1403 work.bgMark2.complete() 1404 } else { 1405 work.bgMark1.complete() 1406 } 1407 } 1408 1409 duration := nanotime() - startTime 1410 switch p.gcMarkWorkerMode { 1411 case gcMarkWorkerDedicatedMode: 1412 xaddint64(&gcController.dedicatedMarkTime, duration) 1413 xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) 1414 case gcMarkWorkerFractionalMode: 1415 xaddint64(&gcController.fractionalMarkTime, duration) 1416 xaddint64(&gcController.fractionalMarkWorkersNeeded, 1) 1417 case gcMarkWorkerIdleMode: 1418 xaddint64(&gcController.idleMarkTime, duration) 1419 } 1420 } 1421 } 1422 1423 // gcMarkWorkAvailable returns true if executing a mark worker 1424 // on p is potentially useful. p may be nil, in which case it only 1425 // checks the global sources of work. 1426 func gcMarkWorkAvailable(p *p) bool { 1427 if p != nil && !p.gcw.empty() { 1428 return true 1429 } 1430 if atomicload64(&work.full) != 0 { 1431 return true // global work available 1432 } 1433 if work.markrootNext < work.markrootJobs { 1434 return true // root scan work available 1435 } 1436 return false 1437 } 1438 1439 // gcFlushGCWork disposes the gcWork caches of all Ps. The world must 1440 // be stopped. 1441 //go:nowritebarrier 1442 func gcFlushGCWork() { 1443 // Gather all cached GC work. All other Ps are stopped, so 1444 // it's safe to manipulate their GC work caches. 1445 for i := 0; i < int(gomaxprocs); i++ { 1446 allp[i].gcw.dispose() 1447 } 1448 } 1449 1450 // gcMark runs the mark (or, for concurrent GC, mark termination) 1451 // STW is in effect at this point. 1452 //TODO go:nowritebarrier 1453 func gcMark(start_time int64) { 1454 if debug.allocfreetrace > 0 { 1455 tracegc() 1456 } 1457 1458 if gcphase != _GCmarktermination { 1459 throw("in gcMark expecting to see gcphase as _GCmarktermination") 1460 } 1461 work.tstart = start_time 1462 1463 gcCopySpans() // TODO(rlh): should this be hoisted and done only once? Right now it is done for normal marking and also for checkmarking. 1464 1465 // Make sure the per-P gcWork caches are empty. During mark 1466 // termination, these caches can still be used temporarily, 1467 // but must be disposed to the global lists immediately. 1468 gcFlushGCWork() 1469 1470 // Queue root marking jobs. 1471 gcMarkRootPrepare() 1472 1473 work.nwait = 0 1474 work.ndone = 0 1475 work.nproc = uint32(gcprocs()) 1476 1477 if trace.enabled { 1478 traceGCScanStart() 1479 } 1480 1481 if work.nproc > 1 { 1482 noteclear(&work.alldone) 1483 helpgc(int32(work.nproc)) 1484 } 1485 1486 gchelperstart() 1487 1488 var gcw gcWork 1489 gcDrain(&gcw, gcDrainBlock) 1490 gcw.dispose() 1491 1492 gcMarkRootCheck() 1493 if work.full != 0 { 1494 throw("work.full != 0") 1495 } 1496 1497 if work.nproc > 1 { 1498 notesleep(&work.alldone) 1499 } 1500 1501 // markroot is done now, so record that objects with 1502 // finalizers have been scanned. 1503 work.finalizersDone = true 1504 1505 for i := 0; i < int(gomaxprocs); i++ { 1506 if allp[i].gcw.wbuf != 0 { 1507 throw("P has cached GC work at end of mark termination") 1508 } 1509 } 1510 1511 if trace.enabled { 1512 traceGCScanDone() 1513 } 1514 1515 // TODO(austin): This doesn't have to be done during STW, as 1516 // long as we block the next GC cycle until this is done. Move 1517 // it after we start the world, but before dropping worldsema. 1518 // (See issue #11465.) 1519 freeStackSpans() 1520 1521 cachestats() 1522 1523 // Compute the reachable heap size at the beginning of the 1524 // cycle. This is approximately the marked heap size at the 1525 // end (which we know) minus the amount of marked heap that 1526 // was allocated after marking began (which we don't know, but 1527 // is approximately the amount of heap that was allocated 1528 // since marking began). 1529 allocatedDuringCycle := memstats.heap_live - work.initialHeapLive 1530 if work.bytesMarked >= allocatedDuringCycle { 1531 memstats.heap_reachable = work.bytesMarked - allocatedDuringCycle 1532 } else { 1533 // This can happen if most of the allocation during 1534 // the cycle never became reachable from the heap. 1535 // Just set the reachable heap approximation to 0 and 1536 // let the heapminimum kick in below. 1537 memstats.heap_reachable = 0 1538 } 1539 1540 // Trigger the next GC cycle when the allocated heap has grown 1541 // by triggerRatio over the reachable heap size. Assume that 1542 // we're in steady state, so the reachable heap size is the 1543 // same now as it was at the beginning of the GC cycle. 1544 memstats.next_gc = uint64(float64(memstats.heap_reachable) * (1 + gcController.triggerRatio)) 1545 if memstats.next_gc < heapminimum { 1546 memstats.next_gc = heapminimum 1547 } 1548 if int64(memstats.next_gc) < 0 { 1549 print("next_gc=", memstats.next_gc, " bytesMarked=", work.bytesMarked, " heap_live=", memstats.heap_live, " initialHeapLive=", work.initialHeapLive, "\n") 1550 throw("next_gc underflow") 1551 } 1552 1553 // Update other GC heap size stats. 1554 memstats.heap_live = work.bytesMarked 1555 memstats.heap_marked = work.bytesMarked 1556 memstats.heap_scan = uint64(gcController.scanWork) 1557 1558 minNextGC := memstats.heap_live + sweepMinHeapDistance*uint64(gcpercent)/100 1559 if memstats.next_gc < minNextGC { 1560 // The allocated heap is already past the trigger. 1561 // This can happen if the triggerRatio is very low and 1562 // the reachable heap estimate is less than the live 1563 // heap size. 1564 // 1565 // Concurrent sweep happens in the heap growth from 1566 // heap_live to next_gc, so bump next_gc up to ensure 1567 // that concurrent sweep has some heap growth in which 1568 // to perform sweeping before we start the next GC 1569 // cycle. 1570 memstats.next_gc = minNextGC 1571 } 1572 1573 if trace.enabled { 1574 traceHeapAlloc() 1575 traceNextGC() 1576 } 1577 } 1578 1579 func gcSweep(mode gcMode) { 1580 if gcphase != _GCoff { 1581 throw("gcSweep being done but phase is not GCoff") 1582 } 1583 gcCopySpans() 1584 1585 lock(&mheap_.lock) 1586 mheap_.sweepgen += 2 1587 mheap_.sweepdone = 0 1588 sweep.spanidx = 0 1589 unlock(&mheap_.lock) 1590 1591 if !_ConcurrentSweep || mode == gcForceBlockMode { 1592 // Special case synchronous sweep. 1593 // Record that no proportional sweeping has to happen. 1594 lock(&mheap_.lock) 1595 mheap_.sweepPagesPerByte = 0 1596 mheap_.pagesSwept = 0 1597 unlock(&mheap_.lock) 1598 // Sweep all spans eagerly. 1599 for sweepone() != ^uintptr(0) { 1600 sweep.npausesweep++ 1601 } 1602 // Do an additional mProf_GC, because all 'free' events are now real as well. 1603 mProf_GC() 1604 mProf_GC() 1605 return 1606 } 1607 1608 // Concurrent sweep needs to sweep all of the in-use pages by 1609 // the time the allocated heap reaches the GC trigger. Compute 1610 // the ratio of in-use pages to sweep per byte allocated. 1611 heapDistance := int64(memstats.next_gc) - int64(memstats.heap_live) 1612 // Add a little margin so rounding errors and concurrent 1613 // sweep are less likely to leave pages unswept when GC starts. 1614 heapDistance -= 1024 * 1024 1615 if heapDistance < _PageSize { 1616 // Avoid setting the sweep ratio extremely high 1617 heapDistance = _PageSize 1618 } 1619 lock(&mheap_.lock) 1620 mheap_.sweepPagesPerByte = float64(mheap_.pagesInUse) / float64(heapDistance) 1621 mheap_.pagesSwept = 0 1622 mheap_.spanBytesAlloc = 0 1623 unlock(&mheap_.lock) 1624 1625 // Background sweep. 1626 lock(&sweep.lock) 1627 if sweep.parked { 1628 sweep.parked = false 1629 ready(sweep.g, 0) 1630 } 1631 unlock(&sweep.lock) 1632 mProf_GC() 1633 } 1634 1635 func gcCopySpans() { 1636 // Cache runtime.mheap_.allspans in work.spans to avoid conflicts with 1637 // resizing/freeing allspans. 1638 // New spans can be created while GC progresses, but they are not garbage for 1639 // this round: 1640 // - new stack spans can be created even while the world is stopped. 1641 // - new malloc spans can be created during the concurrent sweep 1642 // Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap. 1643 lock(&mheap_.lock) 1644 // Free the old cached mark array if necessary. 1645 if work.spans != nil && &work.spans[0] != &h_allspans[0] { 1646 sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys) 1647 } 1648 // Cache the current array for sweeping. 1649 mheap_.gcspans = mheap_.allspans 1650 work.spans = h_allspans 1651 unlock(&mheap_.lock) 1652 } 1653 1654 // gcResetMarkState resets global state prior to marking (concurrent 1655 // or STW) and resets the stack scan state of all Gs. Any Gs created 1656 // after this will also be in the reset state. 1657 func gcResetMarkState() { 1658 // This may be called during a concurrent phase, so make sure 1659 // allgs doesn't change. 1660 lock(&allglock) 1661 for _, gp := range allgs { 1662 gp.gcscandone = false // set to true in gcphasework 1663 gp.gcscanvalid = false // stack has not been scanned 1664 gp.gcAssistBytes = 0 1665 } 1666 unlock(&allglock) 1667 1668 work.bytesMarked = 0 1669 work.initialHeapLive = memstats.heap_live 1670 } 1671 1672 // Hooks for other packages 1673 1674 var poolcleanup func() 1675 1676 //go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup 1677 func sync_runtime_registerPoolCleanup(f func()) { 1678 poolcleanup = f 1679 } 1680 1681 func clearpools() { 1682 // clear sync.Pools 1683 if poolcleanup != nil { 1684 poolcleanup() 1685 } 1686 1687 // Clear central sudog cache. 1688 // Leave per-P caches alone, they have strictly bounded size. 1689 // Disconnect cached list before dropping it on the floor, 1690 // so that a dangling ref to one entry does not pin all of them. 1691 lock(&sched.sudoglock) 1692 var sg, sgnext *sudog 1693 for sg = sched.sudogcache; sg != nil; sg = sgnext { 1694 sgnext = sg.next 1695 sg.next = nil 1696 } 1697 sched.sudogcache = nil 1698 unlock(&sched.sudoglock) 1699 1700 // Clear central defer pools. 1701 // Leave per-P pools alone, they have strictly bounded size. 1702 lock(&sched.deferlock) 1703 for i := range sched.deferpool { 1704 // disconnect cached list before dropping it on the floor, 1705 // so that a dangling ref to one entry does not pin all of them. 1706 var d, dlink *_defer 1707 for d = sched.deferpool[i]; d != nil; d = dlink { 1708 dlink = d.link 1709 d.link = nil 1710 } 1711 sched.deferpool[i] = nil 1712 } 1713 unlock(&sched.deferlock) 1714 1715 for _, p := range &allp { 1716 if p == nil { 1717 break 1718 } 1719 // clear tinyalloc pool 1720 if c := p.mcache; c != nil { 1721 c.tiny = nil 1722 c.tinyoffset = 0 1723 } 1724 } 1725 } 1726 1727 // Timing 1728 1729 //go:nowritebarrier 1730 func gchelper() { 1731 _g_ := getg() 1732 _g_.m.traceback = 2 1733 gchelperstart() 1734 1735 if trace.enabled { 1736 traceGCScanStart() 1737 } 1738 1739 // Parallel mark over GC roots and heap 1740 if gcphase == _GCmarktermination { 1741 var gcw gcWork 1742 gcDrain(&gcw, gcDrainBlock) // blocks in getfull 1743 gcw.dispose() 1744 } 1745 1746 if trace.enabled { 1747 traceGCScanDone() 1748 } 1749 1750 nproc := work.nproc // work.nproc can change right after we increment work.ndone 1751 if xadd(&work.ndone, +1) == nproc-1 { 1752 notewakeup(&work.alldone) 1753 } 1754 _g_.m.traceback = 0 1755 } 1756 1757 func gchelperstart() { 1758 _g_ := getg() 1759 1760 if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc { 1761 throw("gchelperstart: bad m->helpgc") 1762 } 1763 if _g_ != _g_.m.g0 { 1764 throw("gchelper not running on g0 stack") 1765 } 1766 } 1767 1768 // itoaDiv formats val/(10**dec) into buf. 1769 func itoaDiv(buf []byte, val uint64, dec int) []byte { 1770 i := len(buf) - 1 1771 idec := i - dec 1772 for val >= 10 || i >= idec { 1773 buf[i] = byte(val%10 + '0') 1774 i-- 1775 if i == idec { 1776 buf[i] = '.' 1777 i-- 1778 } 1779 val /= 10 1780 } 1781 buf[i] = byte(val + '0') 1782 return buf[i:] 1783 } 1784 1785 // fmtNSAsMS nicely formats ns nanoseconds as milliseconds. 1786 func fmtNSAsMS(buf []byte, ns uint64) []byte { 1787 if ns >= 10e6 { 1788 // Format as whole milliseconds. 1789 return itoaDiv(buf, ns/1e6, 0) 1790 } 1791 // Format two digits of precision, with at most three decimal places. 1792 x := ns / 1e3 1793 if x == 0 { 1794 buf[0] = '0' 1795 return buf[:1] 1796 } 1797 dec := 3 1798 for x >= 100 { 1799 x /= 10 1800 dec-- 1801 } 1802 return itoaDiv(buf, x, dec) 1803 }