github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/runtime/mgc.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector (GC). 6 // 7 // The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple 8 // GC thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is 9 // non-generational and non-compacting. Allocation is done using size segregated per P allocation 10 // areas to minimize fragmentation while eliminating locks in the common case. 11 // 12 // The algorithm decomposes into several steps. 13 // This is a high level description of the algorithm being used. For an overview of GC a good 14 // place to start is Richard Jones' gchandbook.org. 15 // 16 // The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see 17 // Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. 18 // On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 19 // 966-975. 20 // For journal quality proofs that these steps are complete, correct, and terminate see 21 // Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. 22 // Concurrency and Computation: Practice and Experience 15(3-5), 2003. 23 // 24 // 1. GC performs sweep termination. 25 // 26 // a. Stop the world. This causes all Ps to reach a GC safe-point. 27 // 28 // b. Sweep any unswept spans. There will only be unswept spans if 29 // this GC cycle was forced before the expected time. 30 // 31 // 2. GC performs the mark phase. 32 // 33 // a. Prepare for the mark phase by setting gcphase to _GCmark 34 // (from _GCoff), enabling the write barrier, enabling mutator 35 // assists, and enqueueing root mark jobs. No objects may be 36 // scanned until all Ps have enabled the write barrier, which is 37 // accomplished using STW. 38 // 39 // b. Start the world. From this point, GC work is done by mark 40 // workers started by the scheduler and by assists performed as 41 // part of allocation. The write barrier shades both the 42 // overwritten pointer and the new pointer value for any pointer 43 // writes (see mbarrier.go for details). Newly allocated objects 44 // are immediately marked black. 45 // 46 // c. GC performs root marking jobs. This includes scanning all 47 // stacks, shading all globals, and shading any heap pointers in 48 // off-heap runtime data structures. Scanning a stack stops a 49 // goroutine, shades any pointers found on its stack, and then 50 // resumes the goroutine. 51 // 52 // d. GC drains the work queue of grey objects, scanning each grey 53 // object to black and shading all pointers found in the object 54 // (which in turn may add those pointers to the work queue). 55 // 56 // e. Because GC work is spread across local caches, GC uses a 57 // distributed termination algorithm to detect when there are no 58 // more root marking jobs or grey objects (see gcMarkDone). At this 59 // point, GC transitions to mark termination. 60 // 61 // 3. GC performs mark termination. 62 // 63 // a. Stop the world. 64 // 65 // b. Set gcphase to _GCmarktermination, and disable workers and 66 // assists. 67 // 68 // c. Perform housekeeping like flushing mcaches. 69 // 70 // 4. GC performs the sweep phase. 71 // 72 // a. Prepare for the sweep phase by setting gcphase to _GCoff, 73 // setting up sweep state and disabling the write barrier. 74 // 75 // b. Start the world. From this point on, newly allocated objects 76 // are white, and allocating sweeps spans before use if necessary. 77 // 78 // c. GC does concurrent sweeping in the background and in response 79 // to allocation. See description below. 80 // 81 // 5. When sufficient allocation has taken place, replay the sequence 82 // starting with 1 above. See discussion of GC rate below. 83 84 // Concurrent sweep. 85 // 86 // The sweep phase proceeds concurrently with normal program execution. 87 // The heap is swept span-by-span both lazily (when a goroutine needs another span) 88 // and concurrently in a background goroutine (this helps programs that are not CPU bound). 89 // At the end of STW mark termination all spans are marked as "needs sweeping". 90 // 91 // The background sweeper goroutine simply sweeps spans one-by-one. 92 // 93 // To avoid requesting more OS memory while there are unswept spans, when a 94 // goroutine needs another span, it first attempts to reclaim that much memory 95 // by sweeping. When a goroutine needs to allocate a new small-object span, it 96 // sweeps small-object spans for the same object size until it frees at least 97 // one object. When a goroutine needs to allocate large-object span from heap, 98 // it sweeps spans until it frees at least that many pages into heap. There is 99 // one case where this may not suffice: if a goroutine sweeps and frees two 100 // nonadjacent one-page spans to the heap, it will allocate a new two-page 101 // span, but there can still be other one-page unswept spans which could be 102 // combined into a two-page span. 103 // 104 // It's critical to ensure that no operations proceed on unswept spans (that would corrupt 105 // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, 106 // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. 107 // When a goroutine explicitly frees an object or sets a finalizer, it ensures that 108 // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish). 109 // The finalizer goroutine is kicked off only when all spans are swept. 110 // When the next GC starts, it sweeps all not-yet-swept spans (if any). 111 112 // GC rate. 113 // Next GC is after we've allocated an extra amount of memory proportional to 114 // the amount already in use. The proportion is controlled by GOGC environment variable 115 // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M 116 // (this mark is tracked in next_gc variable). This keeps the GC cost in linear 117 // proportion to the allocation cost. Adjusting GOGC just changes the linear constant 118 // (and also the amount of extra memory used). 119 120 // Oblets 121 // 122 // In order to prevent long pauses while scanning large objects and to 123 // improve parallelism, the garbage collector breaks up scan jobs for 124 // objects larger than maxObletBytes into "oblets" of at most 125 // maxObletBytes. When scanning encounters the beginning of a large 126 // object, it scans only the first oblet and enqueues the remaining 127 // oblets as new scan jobs. 128 129 package runtime 130 131 import ( 132 "internal/cpu" 133 "runtime/internal/atomic" 134 "unsafe" 135 ) 136 137 const ( 138 _DebugGC = 0 139 _ConcurrentSweep = true 140 _FinBlockSize = 4 * 1024 141 142 // sweepMinHeapDistance is a lower bound on the heap distance 143 // (in bytes) reserved for concurrent sweeping between GC 144 // cycles. This will be scaled by gcpercent/100. 145 sweepMinHeapDistance = 1024 * 1024 146 ) 147 148 // heapminimum is the minimum heap size at which to trigger GC. 149 // For small heaps, this overrides the usual GOGC*live set rule. 150 // 151 // When there is a very small live set but a lot of allocation, simply 152 // collecting when the heap reaches GOGC*live results in many GC 153 // cycles and high total per-GC overhead. This minimum amortizes this 154 // per-GC overhead while keeping the heap reasonably small. 155 // 156 // During initialization this is set to 4MB*GOGC/100. In the case of 157 // GOGC==0, this will set heapminimum to 0, resulting in constant 158 // collection even when the heap size is small, which is useful for 159 // debugging. 160 var heapminimum uint64 = defaultHeapMinimum 161 162 // defaultHeapMinimum is the value of heapminimum for GOGC==100. 163 const defaultHeapMinimum = 4 << 20 164 165 // Initialized from $GOGC. GOGC=off means no GC. 166 var gcpercent int32 167 168 func gcinit() { 169 if unsafe.Sizeof(workbuf{}) != _WorkbufSize { 170 throw("size of Workbuf is suboptimal") 171 } 172 173 // No sweep on the first cycle. 174 mheap_.sweepdone = 1 175 176 // Set a reasonable initial GC trigger. 177 memstats.triggerRatio = 7 / 8.0 178 179 // Fake a heap_marked value so it looks like a trigger at 180 // heapminimum is the appropriate growth from heap_marked. 181 // This will go into computing the initial GC goal. 182 memstats.heap_marked = uint64(float64(heapminimum) / (1 + memstats.triggerRatio)) 183 184 // Set gcpercent from the environment. This will also compute 185 // and set the GC trigger and goal. 186 _ = setGCPercent(readgogc()) 187 188 work.startSema = 1 189 work.markDoneSema = 1 190 } 191 192 func readgogc() int32 { 193 p := gogetenv("GOGC") 194 if p == "off" { 195 return -1 196 } 197 if n, ok := atoi32(p); ok { 198 return n 199 } 200 return 100 201 } 202 203 // gcenable is called after the bulk of the runtime initialization, 204 // just before we're about to start letting user code run. 205 // It kicks off the background sweeper goroutine and enables GC. 206 func gcenable() { 207 c := make(chan int, 1) 208 go bgsweep(c) 209 <-c 210 memstats.enablegc = true // now that runtime is initialized, GC is okay 211 } 212 213 //go:linkname setGCPercent runtime/debug.setGCPercent 214 func setGCPercent(in int32) (out int32) { 215 lock(&mheap_.lock) 216 out = gcpercent 217 if in < 0 { 218 in = -1 219 } 220 gcpercent = in 221 heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 222 // Update pacing in response to gcpercent change. 223 gcSetTriggerRatio(memstats.triggerRatio) 224 unlock(&mheap_.lock) 225 226 // If we just disabled GC, wait for any concurrent GC mark to 227 // finish so we always return with no GC running. 228 if in < 0 { 229 gcWaitOnMark(atomic.Load(&work.cycles)) 230 } 231 232 return out 233 } 234 235 // Garbage collector phase. 236 // Indicates to write barrier and synchronization task to perform. 237 var gcphase uint32 238 239 // The compiler knows about this variable. 240 // If you change it, you must change builtin/runtime.go, too. 241 // If you change the first four bytes, you must also change the write 242 // barrier insertion code. 243 var writeBarrier struct { 244 enabled bool // compiler emits a check of this before calling write barrier 245 pad [3]byte // compiler uses 32-bit load for "enabled" field 246 needed bool // whether we need a write barrier for current GC phase 247 cgo bool // whether we need a write barrier for a cgo check 248 alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load 249 } 250 251 // gcBlackenEnabled is 1 if mutator assists and background mark 252 // workers are allowed to blacken objects. This must only be set when 253 // gcphase == _GCmark. 254 var gcBlackenEnabled uint32 255 256 const ( 257 _GCoff = iota // GC not running; sweeping in background, write barrier disabled 258 _GCmark // GC marking roots and workbufs: allocate black, write barrier ENABLED 259 _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED 260 ) 261 262 //go:nosplit 263 func setGCPhase(x uint32) { 264 atomic.Store(&gcphase, x) 265 writeBarrier.needed = gcphase == _GCmark || gcphase == _GCmarktermination 266 writeBarrier.enabled = writeBarrier.needed || writeBarrier.cgo 267 } 268 269 // gcMarkWorkerMode represents the mode that a concurrent mark worker 270 // should operate in. 271 // 272 // Concurrent marking happens through four different mechanisms. One 273 // is mutator assists, which happen in response to allocations and are 274 // not scheduled. The other three are variations in the per-P mark 275 // workers and are distinguished by gcMarkWorkerMode. 276 type gcMarkWorkerMode int 277 278 const ( 279 // gcMarkWorkerDedicatedMode indicates that the P of a mark 280 // worker is dedicated to running that mark worker. The mark 281 // worker should run without preemption. 282 gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota 283 284 // gcMarkWorkerFractionalMode indicates that a P is currently 285 // running the "fractional" mark worker. The fractional worker 286 // is necessary when GOMAXPROCS*gcBackgroundUtilization is not 287 // an integer. The fractional worker should run until it is 288 // preempted and will be scheduled to pick up the fractional 289 // part of GOMAXPROCS*gcBackgroundUtilization. 290 gcMarkWorkerFractionalMode 291 292 // gcMarkWorkerIdleMode indicates that a P is running the mark 293 // worker because it has nothing else to do. The idle worker 294 // should run until it is preempted and account its time 295 // against gcController.idleMarkTime. 296 gcMarkWorkerIdleMode 297 ) 298 299 // gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes 300 // to use in execution traces. 301 var gcMarkWorkerModeStrings = [...]string{ 302 "GC (dedicated)", 303 "GC (fractional)", 304 "GC (idle)", 305 } 306 307 // gcController implements the GC pacing controller that determines 308 // when to trigger concurrent garbage collection and how much marking 309 // work to do in mutator assists and background marking. 310 // 311 // It uses a feedback control algorithm to adjust the memstats.gc_trigger 312 // trigger based on the heap growth and GC CPU utilization each cycle. 313 // This algorithm optimizes for heap growth to match GOGC and for CPU 314 // utilization between assist and background marking to be 25% of 315 // GOMAXPROCS. The high-level design of this algorithm is documented 316 // at https://golang.org/s/go15gcpacing. 317 // 318 // All fields of gcController are used only during a single mark 319 // cycle. 320 var gcController gcControllerState 321 322 type gcControllerState struct { 323 // scanWork is the total scan work performed this cycle. This 324 // is updated atomically during the cycle. Updates occur in 325 // bounded batches, since it is both written and read 326 // throughout the cycle. At the end of the cycle, this is how 327 // much of the retained heap is scannable. 328 // 329 // Currently this is the bytes of heap scanned. For most uses, 330 // this is an opaque unit of work, but for estimation the 331 // definition is important. 332 scanWork int64 333 334 // bgScanCredit is the scan work credit accumulated by the 335 // concurrent background scan. This credit is accumulated by 336 // the background scan and stolen by mutator assists. This is 337 // updated atomically. Updates occur in bounded batches, since 338 // it is both written and read throughout the cycle. 339 bgScanCredit int64 340 341 // assistTime is the nanoseconds spent in mutator assists 342 // during this cycle. This is updated atomically. Updates 343 // occur in bounded batches, since it is both written and read 344 // throughout the cycle. 345 assistTime int64 346 347 // dedicatedMarkTime is the nanoseconds spent in dedicated 348 // mark workers during this cycle. This is updated atomically 349 // at the end of the concurrent mark phase. 350 dedicatedMarkTime int64 351 352 // fractionalMarkTime is the nanoseconds spent in the 353 // fractional mark worker during this cycle. This is updated 354 // atomically throughout the cycle and will be up-to-date if 355 // the fractional mark worker is not currently running. 356 fractionalMarkTime int64 357 358 // idleMarkTime is the nanoseconds spent in idle marking 359 // during this cycle. This is updated atomically throughout 360 // the cycle. 361 idleMarkTime int64 362 363 // markStartTime is the absolute start time in nanoseconds 364 // that assists and background mark workers started. 365 markStartTime int64 366 367 // dedicatedMarkWorkersNeeded is the number of dedicated mark 368 // workers that need to be started. This is computed at the 369 // beginning of each cycle and decremented atomically as 370 // dedicated mark workers get started. 371 dedicatedMarkWorkersNeeded int64 372 373 // assistWorkPerByte is the ratio of scan work to allocated 374 // bytes that should be performed by mutator assists. This is 375 // computed at the beginning of each cycle and updated every 376 // time heap_scan is updated. 377 assistWorkPerByte float64 378 379 // assistBytesPerWork is 1/assistWorkPerByte. 380 assistBytesPerWork float64 381 382 // fractionalUtilizationGoal is the fraction of wall clock 383 // time that should be spent in the fractional mark worker on 384 // each P that isn't running a dedicated worker. 385 // 386 // For example, if the utilization goal is 25% and there are 387 // no dedicated workers, this will be 0.25. If the goal is 388 // 25%, there is one dedicated worker, and GOMAXPROCS is 5, 389 // this will be 0.05 to make up the missing 5%. 390 // 391 // If this is zero, no fractional workers are needed. 392 fractionalUtilizationGoal float64 393 394 _ cpu.CacheLinePad 395 } 396 397 // startCycle resets the GC controller's state and computes estimates 398 // for a new GC cycle. The caller must hold worldsema. 399 func (c *gcControllerState) startCycle() { 400 c.scanWork = 0 401 c.bgScanCredit = 0 402 c.assistTime = 0 403 c.dedicatedMarkTime = 0 404 c.fractionalMarkTime = 0 405 c.idleMarkTime = 0 406 407 // If this is the first GC cycle or we're operating on a very 408 // small heap, fake heap_marked so it looks like gc_trigger is 409 // the appropriate growth from heap_marked, even though the 410 // real heap_marked may not have a meaningful value (on the 411 // first cycle) or may be much smaller (resulting in a large 412 // error response). 413 if memstats.gc_trigger <= heapminimum { 414 memstats.heap_marked = uint64(float64(memstats.gc_trigger) / (1 + memstats.triggerRatio)) 415 } 416 417 // Re-compute the heap goal for this cycle in case something 418 // changed. This is the same calculation we use elsewhere. 419 memstats.next_gc = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 420 if gcpercent < 0 { 421 memstats.next_gc = ^uint64(0) 422 } 423 424 // Ensure that the heap goal is at least a little larger than 425 // the current live heap size. This may not be the case if GC 426 // start is delayed or if the allocation that pushed heap_live 427 // over gc_trigger is large or if the trigger is really close to 428 // GOGC. Assist is proportional to this distance, so enforce a 429 // minimum distance, even if it means going over the GOGC goal 430 // by a tiny bit. 431 if memstats.next_gc < memstats.heap_live+1024*1024 { 432 memstats.next_gc = memstats.heap_live + 1024*1024 433 } 434 435 // Compute the background mark utilization goal. In general, 436 // this may not come out exactly. We round the number of 437 // dedicated workers so that the utilization is closest to 438 // 25%. For small GOMAXPROCS, this would introduce too much 439 // error, so we add fractional workers in that case. 440 totalUtilizationGoal := float64(gomaxprocs) * gcBackgroundUtilization 441 c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal + 0.5) 442 utilError := float64(c.dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1 443 const maxUtilError = 0.3 444 if utilError < -maxUtilError || utilError > maxUtilError { 445 // Rounding put us more than 30% off our goal. With 446 // gcBackgroundUtilization of 25%, this happens for 447 // GOMAXPROCS<=3 or GOMAXPROCS=6. Enable fractional 448 // workers to compensate. 449 if float64(c.dedicatedMarkWorkersNeeded) > totalUtilizationGoal { 450 // Too many dedicated workers. 451 c.dedicatedMarkWorkersNeeded-- 452 } 453 c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(gomaxprocs) 454 } else { 455 c.fractionalUtilizationGoal = 0 456 } 457 458 // In STW mode, we just want dedicated workers. 459 if debug.gcstoptheworld > 0 { 460 c.dedicatedMarkWorkersNeeded = int64(gomaxprocs) 461 c.fractionalUtilizationGoal = 0 462 } 463 464 // Clear per-P state 465 for _, p := range allp { 466 p.gcAssistTime = 0 467 p.gcFractionalMarkTime = 0 468 } 469 470 // Compute initial values for controls that are updated 471 // throughout the cycle. 472 c.revise() 473 474 if debug.gcpacertrace > 0 { 475 print("pacer: assist ratio=", c.assistWorkPerByte, 476 " (scan ", memstats.heap_scan>>20, " MB in ", 477 work.initialHeapLive>>20, "->", 478 memstats.next_gc>>20, " MB)", 479 " workers=", c.dedicatedMarkWorkersNeeded, 480 "+", c.fractionalUtilizationGoal, "\n") 481 } 482 } 483 484 // revise updates the assist ratio during the GC cycle to account for 485 // improved estimates. This should be called either under STW or 486 // whenever memstats.heap_scan, memstats.heap_live, or 487 // memstats.next_gc is updated (with mheap_.lock held). 488 // 489 // It should only be called when gcBlackenEnabled != 0 (because this 490 // is when assists are enabled and the necessary statistics are 491 // available). 492 func (c *gcControllerState) revise() { 493 gcpercent := gcpercent 494 if gcpercent < 0 { 495 // If GC is disabled but we're running a forced GC, 496 // act like GOGC is huge for the below calculations. 497 gcpercent = 100000 498 } 499 live := atomic.Load64(&memstats.heap_live) 500 501 var heapGoal, scanWorkExpected int64 502 if live <= memstats.next_gc { 503 // We're under the soft goal. Pace GC to complete at 504 // next_gc assuming the heap is in steady-state. 505 heapGoal = int64(memstats.next_gc) 506 507 // Compute the expected scan work remaining. 508 // 509 // This is estimated based on the expected 510 // steady-state scannable heap. For example, with 511 // GOGC=100, only half of the scannable heap is 512 // expected to be live, so that's what we target. 513 // 514 // (This is a float calculation to avoid overflowing on 515 // 100*heap_scan.) 516 scanWorkExpected = int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent)) 517 } else { 518 // We're past the soft goal. Pace GC so that in the 519 // worst case it will complete by the hard goal. 520 const maxOvershoot = 1.1 521 heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) 522 523 // Compute the upper bound on the scan work remaining. 524 scanWorkExpected = int64(memstats.heap_scan) 525 } 526 527 // Compute the remaining scan work estimate. 528 // 529 // Note that we currently count allocations during GC as both 530 // scannable heap (heap_scan) and scan work completed 531 // (scanWork), so allocation will change this difference will 532 // slowly in the soft regime and not at all in the hard 533 // regime. 534 scanWorkRemaining := scanWorkExpected - c.scanWork 535 if scanWorkRemaining < 1000 { 536 // We set a somewhat arbitrary lower bound on 537 // remaining scan work since if we aim a little high, 538 // we can miss by a little. 539 // 540 // We *do* need to enforce that this is at least 1, 541 // since marking is racy and double-scanning objects 542 // may legitimately make the remaining scan work 543 // negative, even in the hard goal regime. 544 scanWorkRemaining = 1000 545 } 546 547 // Compute the heap distance remaining. 548 heapRemaining := heapGoal - int64(live) 549 if heapRemaining <= 0 { 550 // This shouldn't happen, but if it does, avoid 551 // dividing by zero or setting the assist negative. 552 heapRemaining = 1 553 } 554 555 // Compute the mutator assist ratio so by the time the mutator 556 // allocates the remaining heap bytes up to next_gc, it will 557 // have done (or stolen) the remaining amount of scan work. 558 c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining) 559 c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining) 560 } 561 562 // endCycle computes the trigger ratio for the next cycle. 563 func (c *gcControllerState) endCycle() float64 { 564 if work.userForced { 565 // Forced GC means this cycle didn't start at the 566 // trigger, so where it finished isn't good 567 // information about how to adjust the trigger. 568 // Just leave it where it is. 569 return memstats.triggerRatio 570 } 571 572 // Proportional response gain for the trigger controller. Must 573 // be in [0, 1]. Lower values smooth out transient effects but 574 // take longer to respond to phase changes. Higher values 575 // react to phase changes quickly, but are more affected by 576 // transient changes. Values near 1 may be unstable. 577 const triggerGain = 0.5 578 579 // Compute next cycle trigger ratio. First, this computes the 580 // "error" for this cycle; that is, how far off the trigger 581 // was from what it should have been, accounting for both heap 582 // growth and GC CPU utilization. We compute the actual heap 583 // growth during this cycle and scale that by how far off from 584 // the goal CPU utilization we were (to estimate the heap 585 // growth if we had the desired CPU utilization). The 586 // difference between this estimate and the GOGC-based goal 587 // heap growth is the error. 588 goalGrowthRatio := float64(gcpercent) / 100 589 actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1 590 assistDuration := nanotime() - c.markStartTime 591 592 // Assume background mark hit its utilization goal. 593 utilization := gcBackgroundUtilization 594 // Add assist utilization; avoid divide by zero. 595 if assistDuration > 0 { 596 utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs)) 597 } 598 599 triggerError := goalGrowthRatio - memstats.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-memstats.triggerRatio) 600 601 // Finally, we adjust the trigger for next time by this error, 602 // damped by the proportional gain. 603 triggerRatio := memstats.triggerRatio + triggerGain*triggerError 604 605 if debug.gcpacertrace > 0 { 606 // Print controller state in terms of the design 607 // document. 608 H_m_prev := memstats.heap_marked 609 h_t := memstats.triggerRatio 610 H_T := memstats.gc_trigger 611 h_a := actualGrowthRatio 612 H_a := memstats.heap_live 613 h_g := goalGrowthRatio 614 H_g := int64(float64(H_m_prev) * (1 + h_g)) 615 u_a := utilization 616 u_g := gcGoalUtilization 617 W_a := c.scanWork 618 print("pacer: H_m_prev=", H_m_prev, 619 " h_t=", h_t, " H_T=", H_T, 620 " h_a=", h_a, " H_a=", H_a, 621 " h_g=", h_g, " H_g=", H_g, 622 " u_a=", u_a, " u_g=", u_g, 623 " W_a=", W_a, 624 " goalΔ=", goalGrowthRatio-h_t, 625 " actualΔ=", h_a-h_t, 626 " u_a/u_g=", u_a/u_g, 627 "\n") 628 } 629 630 return triggerRatio 631 } 632 633 // enlistWorker encourages another dedicated mark worker to start on 634 // another P if there are spare worker slots. It is used by putfull 635 // when more work is made available. 636 // 637 //go:nowritebarrier 638 func (c *gcControllerState) enlistWorker() { 639 // If there are idle Ps, wake one so it will run an idle worker. 640 // NOTE: This is suspected of causing deadlocks. See golang.org/issue/19112. 641 // 642 // if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { 643 // wakep() 644 // return 645 // } 646 647 // There are no idle Ps. If we need more dedicated workers, 648 // try to preempt a running P so it will switch to a worker. 649 if c.dedicatedMarkWorkersNeeded <= 0 { 650 return 651 } 652 // Pick a random other P to preempt. 653 if gomaxprocs <= 1 { 654 return 655 } 656 gp := getg() 657 if gp == nil || gp.m == nil || gp.m.p == 0 { 658 return 659 } 660 myID := gp.m.p.ptr().id 661 for tries := 0; tries < 5; tries++ { 662 id := int32(fastrandn(uint32(gomaxprocs - 1))) 663 if id >= myID { 664 id++ 665 } 666 p := allp[id] 667 if p.status != _Prunning { 668 continue 669 } 670 if preemptone(p) { 671 return 672 } 673 } 674 } 675 676 // findRunnableGCWorker returns the background mark worker for _p_ if it 677 // should be run. This must only be called when gcBlackenEnabled != 0. 678 func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { 679 if gcBlackenEnabled == 0 { 680 throw("gcControllerState.findRunnable: blackening not enabled") 681 } 682 if _p_.gcBgMarkWorker == 0 { 683 // The mark worker associated with this P is blocked 684 // performing a mark transition. We can't run it 685 // because it may be on some other run or wait queue. 686 return nil 687 } 688 689 if !gcMarkWorkAvailable(_p_) { 690 // No work to be done right now. This can happen at 691 // the end of the mark phase when there are still 692 // assists tapering off. Don't bother running a worker 693 // now because it'll just return immediately. 694 return nil 695 } 696 697 decIfPositive := func(ptr *int64) bool { 698 if *ptr > 0 { 699 if atomic.Xaddint64(ptr, -1) >= 0 { 700 return true 701 } 702 // We lost a race 703 atomic.Xaddint64(ptr, +1) 704 } 705 return false 706 } 707 708 if decIfPositive(&c.dedicatedMarkWorkersNeeded) { 709 // This P is now dedicated to marking until the end of 710 // the concurrent mark phase. 711 _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode 712 } else if c.fractionalUtilizationGoal == 0 { 713 // No need for fractional workers. 714 return nil 715 } else { 716 // Is this P behind on the fractional utilization 717 // goal? 718 // 719 // This should be kept in sync with pollFractionalWorkerExit. 720 delta := nanotime() - gcController.markStartTime 721 if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { 722 // Nope. No need to run a fractional worker. 723 return nil 724 } 725 // Run a fractional worker. 726 _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode 727 } 728 729 // Run the background mark worker 730 gp := _p_.gcBgMarkWorker.ptr() 731 casgstatus(gp, _Gwaiting, _Grunnable) 732 if trace.enabled { 733 traceGoUnpark(gp, 0) 734 } 735 return gp 736 } 737 738 // pollFractionalWorkerExit reports whether a fractional mark worker 739 // should self-preempt. It assumes it is called from the fractional 740 // worker. 741 func pollFractionalWorkerExit() bool { 742 // This should be kept in sync with the fractional worker 743 // scheduler logic in findRunnableGCWorker. 744 now := nanotime() 745 delta := now - gcController.markStartTime 746 if delta <= 0 { 747 return true 748 } 749 p := getg().m.p.ptr() 750 selfTime := p.gcFractionalMarkTime + (now - p.gcMarkWorkerStartTime) 751 // Add some slack to the utilization goal so that the 752 // fractional worker isn't behind again the instant it exits. 753 return float64(selfTime)/float64(delta) > 1.2*gcController.fractionalUtilizationGoal 754 } 755 756 // gcSetTriggerRatio sets the trigger ratio and updates everything 757 // derived from it: the absolute trigger, the heap goal, mark pacing, 758 // and sweep pacing. 759 // 760 // This can be called any time. If GC is the in the middle of a 761 // concurrent phase, it will adjust the pacing of that phase. 762 // 763 // This depends on gcpercent, memstats.heap_marked, and 764 // memstats.heap_live. These must be up to date. 765 // 766 // mheap_.lock must be held or the world must be stopped. 767 func gcSetTriggerRatio(triggerRatio float64) { 768 // Set the trigger ratio, capped to reasonable bounds. 769 if triggerRatio < 0 { 770 // This can happen if the mutator is allocating very 771 // quickly or the GC is scanning very slowly. 772 triggerRatio = 0 773 } else if gcpercent >= 0 { 774 // Ensure there's always a little margin so that the 775 // mutator assist ratio isn't infinity. 776 maxTriggerRatio := 0.95 * float64(gcpercent) / 100 777 if triggerRatio > maxTriggerRatio { 778 triggerRatio = maxTriggerRatio 779 } 780 } 781 memstats.triggerRatio = triggerRatio 782 783 // Compute the absolute GC trigger from the trigger ratio. 784 // 785 // We trigger the next GC cycle when the allocated heap has 786 // grown by the trigger ratio over the marked heap size. 787 trigger := ^uint64(0) 788 if gcpercent >= 0 { 789 trigger = uint64(float64(memstats.heap_marked) * (1 + triggerRatio)) 790 // Don't trigger below the minimum heap size. 791 minTrigger := heapminimum 792 if !isSweepDone() { 793 // Concurrent sweep happens in the heap growth 794 // from heap_live to gc_trigger, so ensure 795 // that concurrent sweep has some heap growth 796 // in which to perform sweeping before we 797 // start the next GC cycle. 798 sweepMin := atomic.Load64(&memstats.heap_live) + sweepMinHeapDistance*uint64(gcpercent)/100 799 if sweepMin > minTrigger { 800 minTrigger = sweepMin 801 } 802 } 803 if trigger < minTrigger { 804 trigger = minTrigger 805 } 806 if int64(trigger) < 0 { 807 print("runtime: next_gc=", memstats.next_gc, " heap_marked=", memstats.heap_marked, " heap_live=", memstats.heap_live, " initialHeapLive=", work.initialHeapLive, "triggerRatio=", triggerRatio, " minTrigger=", minTrigger, "\n") 808 throw("gc_trigger underflow") 809 } 810 } 811 memstats.gc_trigger = trigger 812 813 // Compute the next GC goal, which is when the allocated heap 814 // has grown by GOGC/100 over the heap marked by the last 815 // cycle. 816 goal := ^uint64(0) 817 if gcpercent >= 0 { 818 goal = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 819 if goal < trigger { 820 // The trigger ratio is always less than GOGC/100, but 821 // other bounds on the trigger may have raised it. 822 // Push up the goal, too. 823 goal = trigger 824 } 825 } 826 memstats.next_gc = goal 827 if trace.enabled { 828 traceNextGC() 829 } 830 831 // Update mark pacing. 832 if gcphase != _GCoff { 833 gcController.revise() 834 } 835 836 // Update sweep pacing. 837 if isSweepDone() { 838 mheap_.sweepPagesPerByte = 0 839 } else { 840 // Concurrent sweep needs to sweep all of the in-use 841 // pages by the time the allocated heap reaches the GC 842 // trigger. Compute the ratio of in-use pages to sweep 843 // per byte allocated, accounting for the fact that 844 // some might already be swept. 845 heapLiveBasis := atomic.Load64(&memstats.heap_live) 846 heapDistance := int64(trigger) - int64(heapLiveBasis) 847 // Add a little margin so rounding errors and 848 // concurrent sweep are less likely to leave pages 849 // unswept when GC starts. 850 heapDistance -= 1024 * 1024 851 if heapDistance < _PageSize { 852 // Avoid setting the sweep ratio extremely high 853 heapDistance = _PageSize 854 } 855 pagesSwept := atomic.Load64(&mheap_.pagesSwept) 856 sweepDistancePages := int64(mheap_.pagesInUse) - int64(pagesSwept) 857 if sweepDistancePages <= 0 { 858 mheap_.sweepPagesPerByte = 0 859 } else { 860 mheap_.sweepPagesPerByte = float64(sweepDistancePages) / float64(heapDistance) 861 mheap_.sweepHeapLiveBasis = heapLiveBasis 862 // Write pagesSweptBasis last, since this 863 // signals concurrent sweeps to recompute 864 // their debt. 865 atomic.Store64(&mheap_.pagesSweptBasis, pagesSwept) 866 } 867 } 868 } 869 870 // gcGoalUtilization is the goal CPU utilization for 871 // marking as a fraction of GOMAXPROCS. 872 const gcGoalUtilization = 0.30 873 874 // gcBackgroundUtilization is the fixed CPU utilization for background 875 // marking. It must be <= gcGoalUtilization. The difference between 876 // gcGoalUtilization and gcBackgroundUtilization will be made up by 877 // mark assists. The scheduler will aim to use within 50% of this 878 // goal. 879 // 880 // Setting this to < gcGoalUtilization avoids saturating the trigger 881 // feedback controller when there are no assists, which allows it to 882 // better control CPU and heap growth. However, the larger the gap, 883 // the more mutator assists are expected to happen, which impact 884 // mutator latency. 885 const gcBackgroundUtilization = 0.25 886 887 // gcCreditSlack is the amount of scan work credit that can 888 // accumulate locally before updating gcController.scanWork and, 889 // optionally, gcController.bgScanCredit. Lower values give a more 890 // accurate assist ratio and make it more likely that assists will 891 // successfully steal background credit. Higher values reduce memory 892 // contention. 893 const gcCreditSlack = 2000 894 895 // gcAssistTimeSlack is the nanoseconds of mutator assist time that 896 // can accumulate on a P before updating gcController.assistTime. 897 const gcAssistTimeSlack = 5000 898 899 // gcOverAssistWork determines how many extra units of scan work a GC 900 // assist does when an assist happens. This amortizes the cost of an 901 // assist by pre-paying for this many bytes of future allocations. 902 const gcOverAssistWork = 64 << 10 903 904 var work struct { 905 full lfstack // lock-free list of full blocks workbuf 906 empty lfstack // lock-free list of empty blocks workbuf 907 pad0 cpu.CacheLinePad // prevents false-sharing between full/empty and nproc/nwait 908 909 wbufSpans struct { 910 lock mutex 911 // free is a list of spans dedicated to workbufs, but 912 // that don't currently contain any workbufs. 913 free mSpanList 914 // busy is a list of all spans containing workbufs on 915 // one of the workbuf lists. 916 busy mSpanList 917 } 918 919 // Restore 64-bit alignment on 32-bit. 920 _ uint32 921 922 // bytesMarked is the number of bytes marked this cycle. This 923 // includes bytes blackened in scanned objects, noscan objects 924 // that go straight to black, and permagrey objects scanned by 925 // markroot during the concurrent scan phase. This is updated 926 // atomically during the cycle. Updates may be batched 927 // arbitrarily, since the value is only read at the end of the 928 // cycle. 929 // 930 // Because of benign races during marking, this number may not 931 // be the exact number of marked bytes, but it should be very 932 // close. 933 // 934 // Put this field here because it needs 64-bit atomic access 935 // (and thus 8-byte alignment even on 32-bit architectures). 936 bytesMarked uint64 937 938 markrootNext uint32 // next markroot job 939 markrootJobs uint32 // number of markroot jobs 940 941 nproc uint32 942 tstart int64 943 nwait uint32 944 ndone uint32 945 946 // Number of roots of various root types. Set by gcMarkRootPrepare. 947 nFlushCacheRoots int 948 nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int 949 950 // Each type of GC state transition is protected by a lock. 951 // Since multiple threads can simultaneously detect the state 952 // transition condition, any thread that detects a transition 953 // condition must acquire the appropriate transition lock, 954 // re-check the transition condition and return if it no 955 // longer holds or perform the transition if it does. 956 // Likewise, any transition must invalidate the transition 957 // condition before releasing the lock. This ensures that each 958 // transition is performed by exactly one thread and threads 959 // that need the transition to happen block until it has 960 // happened. 961 // 962 // startSema protects the transition from "off" to mark or 963 // mark termination. 964 startSema uint32 965 // markDoneSema protects transitions from mark to mark termination. 966 markDoneSema uint32 967 968 bgMarkReady note // signal background mark worker has started 969 bgMarkDone uint32 // cas to 1 when at a background mark completion point 970 // Background mark completion signaling 971 972 // mode is the concurrency mode of the current GC cycle. 973 mode gcMode 974 975 // userForced indicates the current GC cycle was forced by an 976 // explicit user call. 977 userForced bool 978 979 // totaltime is the CPU nanoseconds spent in GC since the 980 // program started if debug.gctrace > 0. 981 totaltime int64 982 983 // initialHeapLive is the value of memstats.heap_live at the 984 // beginning of this GC cycle. 985 initialHeapLive uint64 986 987 // assistQueue is a queue of assists that are blocked because 988 // there was neither enough credit to steal or enough work to 989 // do. 990 assistQueue struct { 991 lock mutex 992 q gQueue 993 } 994 995 // sweepWaiters is a list of blocked goroutines to wake when 996 // we transition from mark termination to sweep. 997 sweepWaiters struct { 998 lock mutex 999 list gList 1000 } 1001 1002 // cycles is the number of completed GC cycles, where a GC 1003 // cycle is sweep termination, mark, mark termination, and 1004 // sweep. This differs from memstats.numgc, which is 1005 // incremented at mark termination. 1006 cycles uint32 1007 1008 // Timing/utilization stats for this cycle. 1009 stwprocs, maxprocs int32 1010 tSweepTerm, tMark, tMarkTerm, tEnd int64 // nanotime() of phase start 1011 1012 pauseNS int64 // total STW time this cycle 1013 pauseStart int64 // nanotime() of last STW 1014 1015 // debug.gctrace heap sizes for this cycle. 1016 heap0, heap1, heap2, heapGoal uint64 1017 } 1018 1019 // GC runs a garbage collection and blocks the caller until the 1020 // garbage collection is complete. It may also block the entire 1021 // program. 1022 func GC() { 1023 // We consider a cycle to be: sweep termination, mark, mark 1024 // termination, and sweep. This function shouldn't return 1025 // until a full cycle has been completed, from beginning to 1026 // end. Hence, we always want to finish up the current cycle 1027 // and start a new one. That means: 1028 // 1029 // 1. In sweep termination, mark, or mark termination of cycle 1030 // N, wait until mark termination N completes and transitions 1031 // to sweep N. 1032 // 1033 // 2. In sweep N, help with sweep N. 1034 // 1035 // At this point we can begin a full cycle N+1. 1036 // 1037 // 3. Trigger cycle N+1 by starting sweep termination N+1. 1038 // 1039 // 4. Wait for mark termination N+1 to complete. 1040 // 1041 // 5. Help with sweep N+1 until it's done. 1042 // 1043 // This all has to be written to deal with the fact that the 1044 // GC may move ahead on its own. For example, when we block 1045 // until mark termination N, we may wake up in cycle N+2. 1046 1047 // Wait until the current sweep termination, mark, and mark 1048 // termination complete. 1049 n := atomic.Load(&work.cycles) 1050 gcWaitOnMark(n) 1051 1052 // We're now in sweep N or later. Trigger GC cycle N+1, which 1053 // will first finish sweep N if necessary and then enter sweep 1054 // termination N+1. 1055 gcStart(gcTrigger{kind: gcTriggerCycle, n: n + 1}) 1056 1057 // Wait for mark termination N+1 to complete. 1058 gcWaitOnMark(n + 1) 1059 1060 // Finish sweep N+1 before returning. We do this both to 1061 // complete the cycle and because runtime.GC() is often used 1062 // as part of tests and benchmarks to get the system into a 1063 // relatively stable and isolated state. 1064 for atomic.Load(&work.cycles) == n+1 && sweepone() != ^uintptr(0) { 1065 sweep.nbgsweep++ 1066 Gosched() 1067 } 1068 1069 // Callers may assume that the heap profile reflects the 1070 // just-completed cycle when this returns (historically this 1071 // happened because this was a STW GC), but right now the 1072 // profile still reflects mark termination N, not N+1. 1073 // 1074 // As soon as all of the sweep frees from cycle N+1 are done, 1075 // we can go ahead and publish the heap profile. 1076 // 1077 // First, wait for sweeping to finish. (We know there are no 1078 // more spans on the sweep queue, but we may be concurrently 1079 // sweeping spans, so we have to wait.) 1080 for atomic.Load(&work.cycles) == n+1 && atomic.Load(&mheap_.sweepers) != 0 { 1081 Gosched() 1082 } 1083 1084 // Now we're really done with sweeping, so we can publish the 1085 // stable heap profile. Only do this if we haven't already hit 1086 // another mark termination. 1087 mp := acquirem() 1088 cycle := atomic.Load(&work.cycles) 1089 if cycle == n+1 || (gcphase == _GCmark && cycle == n+2) { 1090 mProf_PostSweep() 1091 } 1092 releasem(mp) 1093 } 1094 1095 // gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has 1096 // already completed this mark phase, it returns immediately. 1097 func gcWaitOnMark(n uint32) { 1098 for { 1099 // Disable phase transitions. 1100 lock(&work.sweepWaiters.lock) 1101 nMarks := atomic.Load(&work.cycles) 1102 if gcphase != _GCmark { 1103 // We've already completed this cycle's mark. 1104 nMarks++ 1105 } 1106 if nMarks > n { 1107 // We're done. 1108 unlock(&work.sweepWaiters.lock) 1109 return 1110 } 1111 1112 // Wait until sweep termination, mark, and mark 1113 // termination of cycle N complete. 1114 work.sweepWaiters.list.push(getg()) 1115 goparkunlock(&work.sweepWaiters.lock, waitReasonWaitForGCCycle, traceEvGoBlock, 1) 1116 } 1117 } 1118 1119 // gcMode indicates how concurrent a GC cycle should be. 1120 type gcMode int 1121 1122 const ( 1123 gcBackgroundMode gcMode = iota // concurrent GC and sweep 1124 gcForceMode // stop-the-world GC now, concurrent sweep 1125 gcForceBlockMode // stop-the-world GC now and STW sweep (forced by user) 1126 ) 1127 1128 // A gcTrigger is a predicate for starting a GC cycle. Specifically, 1129 // it is an exit condition for the _GCoff phase. 1130 type gcTrigger struct { 1131 kind gcTriggerKind 1132 now int64 // gcTriggerTime: current time 1133 n uint32 // gcTriggerCycle: cycle number to start 1134 } 1135 1136 type gcTriggerKind int 1137 1138 const ( 1139 // gcTriggerAlways indicates that a cycle should be started 1140 // unconditionally, even if GOGC is off or we're in a cycle 1141 // right now. This cannot be consolidated with other cycles. 1142 gcTriggerAlways gcTriggerKind = iota 1143 1144 // gcTriggerHeap indicates that a cycle should be started when 1145 // the heap size reaches the trigger heap size computed by the 1146 // controller. 1147 gcTriggerHeap 1148 1149 // gcTriggerTime indicates that a cycle should be started when 1150 // it's been more than forcegcperiod nanoseconds since the 1151 // previous GC cycle. 1152 gcTriggerTime 1153 1154 // gcTriggerCycle indicates that a cycle should be started if 1155 // we have not yet started cycle number gcTrigger.n (relative 1156 // to work.cycles). 1157 gcTriggerCycle 1158 ) 1159 1160 // test reports whether the trigger condition is satisfied, meaning 1161 // that the exit condition for the _GCoff phase has been met. The exit 1162 // condition should be tested when allocating. 1163 func (t gcTrigger) test() bool { 1164 if !memstats.enablegc || panicking != 0 { 1165 return false 1166 } 1167 if t.kind == gcTriggerAlways { 1168 return true 1169 } 1170 if gcphase != _GCoff { 1171 return false 1172 } 1173 switch t.kind { 1174 case gcTriggerHeap: 1175 // Non-atomic access to heap_live for performance. If 1176 // we are going to trigger on this, this thread just 1177 // atomically wrote heap_live anyway and we'll see our 1178 // own write. 1179 return memstats.heap_live >= memstats.gc_trigger 1180 case gcTriggerTime: 1181 if gcpercent < 0 { 1182 return false 1183 } 1184 lastgc := int64(atomic.Load64(&memstats.last_gc_nanotime)) 1185 return lastgc != 0 && t.now-lastgc > forcegcperiod 1186 case gcTriggerCycle: 1187 // t.n > work.cycles, but accounting for wraparound. 1188 return int32(t.n-work.cycles) > 0 1189 } 1190 return true 1191 } 1192 1193 // gcStart starts the GC. It transitions from _GCoff to _GCmark (if 1194 // debug.gcstoptheworld == 0) or performs all of GC (if 1195 // debug.gcstoptheworld != 0). 1196 // 1197 // This may return without performing this transition in some cases, 1198 // such as when called on a system stack or with locks held. 1199 func gcStart(trigger gcTrigger) { 1200 // Since this is called from malloc and malloc is called in 1201 // the guts of a number of libraries that might be holding 1202 // locks, don't attempt to start GC in non-preemptible or 1203 // potentially unstable situations. 1204 mp := acquirem() 1205 if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" { 1206 releasem(mp) 1207 return 1208 } 1209 releasem(mp) 1210 mp = nil 1211 1212 // Pick up the remaining unswept/not being swept spans concurrently 1213 // 1214 // This shouldn't happen if we're being invoked in background 1215 // mode since proportional sweep should have just finished 1216 // sweeping everything, but rounding errors, etc, may leave a 1217 // few spans unswept. In forced mode, this is necessary since 1218 // GC can be forced at any point in the sweeping cycle. 1219 // 1220 // We check the transition condition continuously here in case 1221 // this G gets delayed in to the next GC cycle. 1222 for trigger.test() && sweepone() != ^uintptr(0) { 1223 sweep.nbgsweep++ 1224 } 1225 1226 // Perform GC initialization and the sweep termination 1227 // transition. 1228 semacquire(&work.startSema) 1229 // Re-check transition condition under transition lock. 1230 if !trigger.test() { 1231 semrelease(&work.startSema) 1232 return 1233 } 1234 1235 // For stats, check if this GC was forced by the user. 1236 work.userForced = trigger.kind == gcTriggerAlways || trigger.kind == gcTriggerCycle 1237 1238 // In gcstoptheworld debug mode, upgrade the mode accordingly. 1239 // We do this after re-checking the transition condition so 1240 // that multiple goroutines that detect the heap trigger don't 1241 // start multiple STW GCs. 1242 mode := gcBackgroundMode 1243 if debug.gcstoptheworld == 1 { 1244 mode = gcForceMode 1245 } else if debug.gcstoptheworld == 2 { 1246 mode = gcForceBlockMode 1247 } 1248 1249 // Ok, we're doing it! Stop everybody else 1250 semacquire(&worldsema) 1251 1252 if trace.enabled { 1253 traceGCStart() 1254 } 1255 1256 // Check that all Ps have finished deferred mcache flushes. 1257 for _, p := range allp { 1258 if fg := atomic.Load(&p.mcache.flushGen); fg != mheap_.sweepgen { 1259 println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen) 1260 throw("p mcache not flushed") 1261 } 1262 } 1263 1264 gcBgMarkStartWorkers() 1265 1266 gcResetMarkState() 1267 1268 work.stwprocs, work.maxprocs = gomaxprocs, gomaxprocs 1269 if work.stwprocs > ncpu { 1270 // This is used to compute CPU time of the STW phases, 1271 // so it can't be more than ncpu, even if GOMAXPROCS is. 1272 work.stwprocs = ncpu 1273 } 1274 work.heap0 = atomic.Load64(&memstats.heap_live) 1275 work.pauseNS = 0 1276 work.mode = mode 1277 1278 now := nanotime() 1279 work.tSweepTerm = now 1280 work.pauseStart = now 1281 if trace.enabled { 1282 traceGCSTWStart(1) 1283 } 1284 systemstack(stopTheWorldWithSema) 1285 // Finish sweep before we start concurrent scan. 1286 systemstack(func() { 1287 finishsweep_m() 1288 }) 1289 // clearpools before we start the GC. If we wait they memory will not be 1290 // reclaimed until the next GC cycle. 1291 clearpools() 1292 1293 work.cycles++ 1294 1295 gcController.startCycle() 1296 work.heapGoal = memstats.next_gc 1297 1298 // In STW mode, disable scheduling of user Gs. This may also 1299 // disable scheduling of this goroutine, so it may block as 1300 // soon as we start the world again. 1301 if mode != gcBackgroundMode { 1302 schedEnableUser(false) 1303 } 1304 1305 // Enter concurrent mark phase and enable 1306 // write barriers. 1307 // 1308 // Because the world is stopped, all Ps will 1309 // observe that write barriers are enabled by 1310 // the time we start the world and begin 1311 // scanning. 1312 // 1313 // Write barriers must be enabled before assists are 1314 // enabled because they must be enabled before 1315 // any non-leaf heap objects are marked. Since 1316 // allocations are blocked until assists can 1317 // happen, we want enable assists as early as 1318 // possible. 1319 setGCPhase(_GCmark) 1320 1321 gcBgMarkPrepare() // Must happen before assist enable. 1322 gcMarkRootPrepare() 1323 1324 // Mark all active tinyalloc blocks. Since we're 1325 // allocating from these, they need to be black like 1326 // other allocations. The alternative is to blacken 1327 // the tiny block on every allocation from it, which 1328 // would slow down the tiny allocator. 1329 gcMarkTinyAllocs() 1330 1331 // At this point all Ps have enabled the write 1332 // barrier, thus maintaining the no white to 1333 // black invariant. Enable mutator assists to 1334 // put back-pressure on fast allocating 1335 // mutators. 1336 atomic.Store(&gcBlackenEnabled, 1) 1337 1338 // Assists and workers can start the moment we start 1339 // the world. 1340 gcController.markStartTime = now 1341 1342 // Concurrent mark. 1343 systemstack(func() { 1344 now = startTheWorldWithSema(trace.enabled) 1345 work.pauseNS += now - work.pauseStart 1346 work.tMark = now 1347 }) 1348 // In STW mode, we could block the instant systemstack 1349 // returns, so don't do anything important here. Make sure we 1350 // block rather than returning to user code. 1351 if mode != gcBackgroundMode { 1352 Gosched() 1353 } 1354 1355 semrelease(&work.startSema) 1356 } 1357 1358 // gcMarkDoneFlushed counts the number of P's with flushed work. 1359 // 1360 // Ideally this would be a captured local in gcMarkDone, but forEachP 1361 // escapes its callback closure, so it can't capture anything. 1362 // 1363 // This is protected by markDoneSema. 1364 var gcMarkDoneFlushed uint32 1365 1366 // debugCachedWork enables extra checks for debugging premature mark 1367 // termination. 1368 // 1369 // For debugging issue #27993. 1370 const debugCachedWork = false 1371 1372 // gcWorkPauseGen is for debugging the mark completion algorithm. 1373 // gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen. 1374 // Only used if debugCachedWork is true. 1375 // 1376 // For debugging issue #27993. 1377 var gcWorkPauseGen uint32 = 1 1378 1379 // gcMarkDone transitions the GC from mark to mark termination if all 1380 // reachable objects have been marked (that is, there are no grey 1381 // objects and can be no more in the future). Otherwise, it flushes 1382 // all local work to the global queues where it can be discovered by 1383 // other workers. 1384 // 1385 // This should be called when all local mark work has been drained and 1386 // there are no remaining workers. Specifically, when 1387 // 1388 // work.nwait == work.nproc && !gcMarkWorkAvailable(p) 1389 // 1390 // The calling context must be preemptible. 1391 // 1392 // Flushing local work is important because idle Ps may have local 1393 // work queued. This is the only way to make that work visible and 1394 // drive GC to completion. 1395 // 1396 // It is explicitly okay to have write barriers in this function. If 1397 // it does transition to mark termination, then all reachable objects 1398 // have been marked, so the write barrier cannot shade any more 1399 // objects. 1400 func gcMarkDone() { 1401 // Ensure only one thread is running the ragged barrier at a 1402 // time. 1403 semacquire(&work.markDoneSema) 1404 1405 top: 1406 // Re-check transition condition under transition lock. 1407 // 1408 // It's critical that this checks the global work queues are 1409 // empty before performing the ragged barrier. Otherwise, 1410 // there could be global work that a P could take after the P 1411 // has passed the ragged barrier. 1412 if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) { 1413 semrelease(&work.markDoneSema) 1414 return 1415 } 1416 1417 // Flush all local buffers and collect flushedWork flags. 1418 gcMarkDoneFlushed = 0 1419 systemstack(func() { 1420 gp := getg().m.curg 1421 // Mark the user stack as preemptible so that it may be scanned. 1422 // Otherwise, our attempt to force all P's to a safepoint could 1423 // result in a deadlock as we attempt to preempt a worker that's 1424 // trying to preempt us (e.g. for a stack scan). 1425 casgstatus(gp, _Grunning, _Gwaiting) 1426 forEachP(func(_p_ *p) { 1427 // Flush the write barrier buffer, since this may add 1428 // work to the gcWork. 1429 wbBufFlush1(_p_) 1430 // For debugging, shrink the write barrier 1431 // buffer so it flushes immediately. 1432 // wbBuf.reset will keep it at this size as 1433 // long as throwOnGCWork is set. 1434 if debugCachedWork { 1435 b := &_p_.wbBuf 1436 b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) 1437 b.debugGen = gcWorkPauseGen 1438 } 1439 // Flush the gcWork, since this may create global work 1440 // and set the flushedWork flag. 1441 // 1442 // TODO(austin): Break up these workbufs to 1443 // better distribute work. 1444 _p_.gcw.dispose() 1445 // Collect the flushedWork flag. 1446 if _p_.gcw.flushedWork { 1447 atomic.Xadd(&gcMarkDoneFlushed, 1) 1448 _p_.gcw.flushedWork = false 1449 } else if debugCachedWork { 1450 // For debugging, freeze the gcWork 1451 // until we know whether we've reached 1452 // completion or not. If we think 1453 // we've reached completion, but 1454 // there's a paused gcWork, then 1455 // that's a bug. 1456 _p_.gcw.pauseGen = gcWorkPauseGen 1457 // Capture the G's stack. 1458 for i := range _p_.gcw.pauseStack { 1459 _p_.gcw.pauseStack[i] = 0 1460 } 1461 callers(1, _p_.gcw.pauseStack[:]) 1462 } 1463 }) 1464 casgstatus(gp, _Gwaiting, _Grunning) 1465 }) 1466 1467 if gcMarkDoneFlushed != 0 { 1468 if debugCachedWork { 1469 // Release paused gcWorks. 1470 atomic.Xadd(&gcWorkPauseGen, 1) 1471 } 1472 // More grey objects were discovered since the 1473 // previous termination check, so there may be more 1474 // work to do. Keep going. It's possible the 1475 // transition condition became true again during the 1476 // ragged barrier, so re-check it. 1477 goto top 1478 } 1479 1480 if debugCachedWork { 1481 throwOnGCWork = true 1482 // Release paused gcWorks. If there are any, they 1483 // should now observe throwOnGCWork and panic. 1484 atomic.Xadd(&gcWorkPauseGen, 1) 1485 } 1486 1487 // There was no global work, no local work, and no Ps 1488 // communicated work since we took markDoneSema. Therefore 1489 // there are no grey objects and no more objects can be 1490 // shaded. Transition to mark termination. 1491 now := nanotime() 1492 work.tMarkTerm = now 1493 work.pauseStart = now 1494 getg().m.preemptoff = "gcing" 1495 if trace.enabled { 1496 traceGCSTWStart(0) 1497 } 1498 systemstack(stopTheWorldWithSema) 1499 // The gcphase is _GCmark, it will transition to _GCmarktermination 1500 // below. The important thing is that the wb remains active until 1501 // all marking is complete. This includes writes made by the GC. 1502 1503 if debugCachedWork { 1504 // For debugging, double check that no work was added after we 1505 // went around above and disable write barrier buffering. 1506 for _, p := range allp { 1507 gcw := &p.gcw 1508 if !gcw.empty() { 1509 printlock() 1510 print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) 1511 if gcw.wbuf1 == nil { 1512 print(" wbuf1=<nil>") 1513 } else { 1514 print(" wbuf1.n=", gcw.wbuf1.nobj) 1515 } 1516 if gcw.wbuf2 == nil { 1517 print(" wbuf2=<nil>") 1518 } else { 1519 print(" wbuf2.n=", gcw.wbuf2.nobj) 1520 } 1521 print("\n") 1522 if gcw.pauseGen == gcw.putGen { 1523 println("runtime: checkPut already failed at this generation") 1524 } 1525 throw("throwOnGCWork") 1526 } 1527 } 1528 } else { 1529 // For unknown reasons (see issue #27993), there is 1530 // sometimes work left over when we enter mark 1531 // termination. Detect this and resume concurrent 1532 // mark. This is obviously unfortunate. 1533 // 1534 // Switch to the system stack to call wbBufFlush1, 1535 // though in this case it doesn't matter because we're 1536 // non-preemptible anyway. 1537 restart := false 1538 systemstack(func() { 1539 for _, p := range allp { 1540 wbBufFlush1(p) 1541 if !p.gcw.empty() { 1542 restart = true 1543 break 1544 } 1545 } 1546 }) 1547 if restart { 1548 getg().m.preemptoff = "" 1549 systemstack(func() { 1550 now := startTheWorldWithSema(true) 1551 work.pauseNS += now - work.pauseStart 1552 }) 1553 goto top 1554 } 1555 } 1556 1557 // Disable assists and background workers. We must do 1558 // this before waking blocked assists. 1559 atomic.Store(&gcBlackenEnabled, 0) 1560 1561 // Wake all blocked assists. These will run when we 1562 // start the world again. 1563 gcWakeAllAssists() 1564 1565 // Likewise, release the transition lock. Blocked 1566 // workers and assists will run when we start the 1567 // world again. 1568 semrelease(&work.markDoneSema) 1569 1570 // In STW mode, re-enable user goroutines. These will be 1571 // queued to run after we start the world. 1572 schedEnableUser(true) 1573 1574 // endCycle depends on all gcWork cache stats being flushed. 1575 // The termination algorithm above ensured that up to 1576 // allocations since the ragged barrier. 1577 nextTriggerRatio := gcController.endCycle() 1578 1579 // Perform mark termination. This will restart the world. 1580 gcMarkTermination(nextTriggerRatio) 1581 } 1582 1583 func gcMarkTermination(nextTriggerRatio float64) { 1584 // World is stopped. 1585 // Start marktermination which includes enabling the write barrier. 1586 atomic.Store(&gcBlackenEnabled, 0) 1587 setGCPhase(_GCmarktermination) 1588 1589 work.heap1 = memstats.heap_live 1590 startTime := nanotime() 1591 1592 mp := acquirem() 1593 mp.preemptoff = "gcing" 1594 _g_ := getg() 1595 _g_.m.traceback = 2 1596 gp := _g_.m.curg 1597 casgstatus(gp, _Grunning, _Gwaiting) 1598 gp.waitreason = waitReasonGarbageCollection 1599 1600 // Run gc on the g0 stack. We do this so that the g stack 1601 // we're currently running on will no longer change. Cuts 1602 // the root set down a bit (g0 stacks are not scanned, and 1603 // we don't need to scan gc's internal state). We also 1604 // need to switch to g0 so we can shrink the stack. 1605 systemstack(func() { 1606 gcMark(startTime) 1607 // Must return immediately. 1608 // The outer function's stack may have moved 1609 // during gcMark (it shrinks stacks, including the 1610 // outer function's stack), so we must not refer 1611 // to any of its variables. Return back to the 1612 // non-system stack to pick up the new addresses 1613 // before continuing. 1614 }) 1615 1616 systemstack(func() { 1617 work.heap2 = work.bytesMarked 1618 if debug.gccheckmark > 0 { 1619 // Run a full non-parallel, stop-the-world 1620 // mark using checkmark bits, to check that we 1621 // didn't forget to mark anything during the 1622 // concurrent mark process. 1623 gcResetMarkState() 1624 initCheckmarks() 1625 gcw := &getg().m.p.ptr().gcw 1626 gcDrain(gcw, 0) 1627 wbBufFlush1(getg().m.p.ptr()) 1628 gcw.dispose() 1629 clearCheckmarks() 1630 } 1631 1632 // marking is complete so we can turn the write barrier off 1633 setGCPhase(_GCoff) 1634 gcSweep(work.mode) 1635 }) 1636 1637 _g_.m.traceback = 0 1638 casgstatus(gp, _Gwaiting, _Grunning) 1639 1640 if trace.enabled { 1641 traceGCDone() 1642 } 1643 1644 // all done 1645 mp.preemptoff = "" 1646 1647 if gcphase != _GCoff { 1648 throw("gc done but gcphase != _GCoff") 1649 } 1650 1651 // Update GC trigger and pacing for the next cycle. 1652 gcSetTriggerRatio(nextTriggerRatio) 1653 1654 // Update timing memstats 1655 now := nanotime() 1656 sec, nsec, _ := time_now() 1657 unixNow := sec*1e9 + int64(nsec) 1658 work.pauseNS += now - work.pauseStart 1659 work.tEnd = now 1660 atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user 1661 atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us 1662 memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) 1663 memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow) 1664 memstats.pause_total_ns += uint64(work.pauseNS) 1665 1666 // Update work.totaltime. 1667 sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm) 1668 // We report idle marking time below, but omit it from the 1669 // overall utilization here since it's "free". 1670 markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime 1671 markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm) 1672 cycleCpu := sweepTermCpu + markCpu + markTermCpu 1673 work.totaltime += cycleCpu 1674 1675 // Compute overall GC CPU utilization. 1676 totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs) 1677 memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu) 1678 1679 // Reset sweep state. 1680 sweep.nbgsweep = 0 1681 sweep.npausesweep = 0 1682 1683 if work.userForced { 1684 memstats.numforcedgc++ 1685 } 1686 1687 // Bump GC cycle count and wake goroutines waiting on sweep. 1688 lock(&work.sweepWaiters.lock) 1689 memstats.numgc++ 1690 injectglist(&work.sweepWaiters.list) 1691 unlock(&work.sweepWaiters.lock) 1692 1693 // Finish the current heap profiling cycle and start a new 1694 // heap profiling cycle. We do this before starting the world 1695 // so events don't leak into the wrong cycle. 1696 mProf_NextCycle() 1697 1698 systemstack(func() { startTheWorldWithSema(true) }) 1699 1700 // Flush the heap profile so we can start a new cycle next GC. 1701 // This is relatively expensive, so we don't do it with the 1702 // world stopped. 1703 mProf_Flush() 1704 1705 // Prepare workbufs for freeing by the sweeper. We do this 1706 // asynchronously because it can take non-trivial time. 1707 prepareFreeWorkbufs() 1708 1709 // Free stack spans. This must be done between GC cycles. 1710 systemstack(freeStackSpans) 1711 1712 // Ensure all mcaches are flushed. Each P will flush its own 1713 // mcache before allocating, but idle Ps may not. Since this 1714 // is necessary to sweep all spans, we need to ensure all 1715 // mcaches are flushed before we start the next GC cycle. 1716 systemstack(func() { 1717 forEachP(func(_p_ *p) { 1718 _p_.mcache.prepareForSweep() 1719 }) 1720 }) 1721 1722 // Print gctrace before dropping worldsema. As soon as we drop 1723 // worldsema another cycle could start and smash the stats 1724 // we're trying to print. 1725 if debug.gctrace > 0 { 1726 util := int(memstats.gc_cpu_fraction * 100) 1727 1728 var sbuf [24]byte 1729 printlock() 1730 print("gc ", memstats.numgc, 1731 " @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", 1732 util, "%: ") 1733 prev := work.tSweepTerm 1734 for i, ns := range []int64{work.tMark, work.tMarkTerm, work.tEnd} { 1735 if i != 0 { 1736 print("+") 1737 } 1738 print(string(fmtNSAsMS(sbuf[:], uint64(ns-prev)))) 1739 prev = ns 1740 } 1741 print(" ms clock, ") 1742 for i, ns := range []int64{sweepTermCpu, gcController.assistTime, gcController.dedicatedMarkTime + gcController.fractionalMarkTime, gcController.idleMarkTime, markTermCpu} { 1743 if i == 2 || i == 3 { 1744 // Separate mark time components with /. 1745 print("/") 1746 } else if i != 0 { 1747 print("+") 1748 } 1749 print(string(fmtNSAsMS(sbuf[:], uint64(ns)))) 1750 } 1751 print(" ms cpu, ", 1752 work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ", 1753 work.heapGoal>>20, " MB goal, ", 1754 work.maxprocs, " P") 1755 if work.userForced { 1756 print(" (forced)") 1757 } 1758 print("\n") 1759 printunlock() 1760 } 1761 1762 semrelease(&worldsema) 1763 // Careful: another GC cycle may start now. 1764 1765 releasem(mp) 1766 mp = nil 1767 1768 // now that gc is done, kick off finalizer thread if needed 1769 if !concurrentSweep { 1770 // give the queued finalizers, if any, a chance to run 1771 Gosched() 1772 } 1773 } 1774 1775 // gcBgMarkStartWorkers prepares background mark worker goroutines. 1776 // These goroutines will not run until the mark phase, but they must 1777 // be started while the work is not stopped and from a regular G 1778 // stack. The caller must hold worldsema. 1779 func gcBgMarkStartWorkers() { 1780 // Background marking is performed by per-P G's. Ensure that 1781 // each P has a background GC G. 1782 for _, p := range allp { 1783 if p.gcBgMarkWorker == 0 { 1784 go gcBgMarkWorker(p) 1785 notetsleepg(&work.bgMarkReady, -1) 1786 noteclear(&work.bgMarkReady) 1787 } 1788 } 1789 } 1790 1791 // gcBgMarkPrepare sets up state for background marking. 1792 // Mutator assists must not yet be enabled. 1793 func gcBgMarkPrepare() { 1794 // Background marking will stop when the work queues are empty 1795 // and there are no more workers (note that, since this is 1796 // concurrent, this may be a transient state, but mark 1797 // termination will clean it up). Between background workers 1798 // and assists, we don't really know how many workers there 1799 // will be, so we pretend to have an arbitrarily large number 1800 // of workers, almost all of which are "waiting". While a 1801 // worker is working it decrements nwait. If nproc == nwait, 1802 // there are no workers. 1803 work.nproc = ^uint32(0) 1804 work.nwait = ^uint32(0) 1805 } 1806 1807 func gcBgMarkWorker(_p_ *p) { 1808 gp := getg() 1809 1810 type parkInfo struct { 1811 m muintptr // Release this m on park. 1812 attach puintptr // If non-nil, attach to this p on park. 1813 } 1814 // We pass park to a gopark unlock function, so it can't be on 1815 // the stack (see gopark). Prevent deadlock from recursively 1816 // starting GC by disabling preemption. 1817 gp.m.preemptoff = "GC worker init" 1818 park := new(parkInfo) 1819 gp.m.preemptoff = "" 1820 1821 park.m.set(acquirem()) 1822 park.attach.set(_p_) 1823 // Inform gcBgMarkStartWorkers that this worker is ready. 1824 // After this point, the background mark worker is scheduled 1825 // cooperatively by gcController.findRunnable. Hence, it must 1826 // never be preempted, as this would put it into _Grunnable 1827 // and put it on a run queue. Instead, when the preempt flag 1828 // is set, this puts itself into _Gwaiting to be woken up by 1829 // gcController.findRunnable at the appropriate time. 1830 notewakeup(&work.bgMarkReady) 1831 1832 for { 1833 // Go to sleep until woken by gcController.findRunnable. 1834 // We can't releasem yet since even the call to gopark 1835 // may be preempted. 1836 gopark(func(g *g, parkp unsafe.Pointer) bool { 1837 park := (*parkInfo)(parkp) 1838 1839 // The worker G is no longer running, so it's 1840 // now safe to allow preemption. 1841 releasem(park.m.ptr()) 1842 1843 // If the worker isn't attached to its P, 1844 // attach now. During initialization and after 1845 // a phase change, the worker may have been 1846 // running on a different P. As soon as we 1847 // attach, the owner P may schedule the 1848 // worker, so this must be done after the G is 1849 // stopped. 1850 if park.attach != 0 { 1851 p := park.attach.ptr() 1852 park.attach.set(nil) 1853 // cas the worker because we may be 1854 // racing with a new worker starting 1855 // on this P. 1856 if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) { 1857 // The P got a new worker. 1858 // Exit this worker. 1859 return false 1860 } 1861 } 1862 return true 1863 }, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0) 1864 1865 // Loop until the P dies and disassociates this 1866 // worker (the P may later be reused, in which case 1867 // it will get a new worker) or we failed to associate. 1868 if _p_.gcBgMarkWorker.ptr() != gp { 1869 break 1870 } 1871 1872 // Disable preemption so we can use the gcw. If the 1873 // scheduler wants to preempt us, we'll stop draining, 1874 // dispose the gcw, and then preempt. 1875 park.m.set(acquirem()) 1876 1877 if gcBlackenEnabled == 0 { 1878 throw("gcBgMarkWorker: blackening not enabled") 1879 } 1880 1881 startTime := nanotime() 1882 _p_.gcMarkWorkerStartTime = startTime 1883 1884 decnwait := atomic.Xadd(&work.nwait, -1) 1885 if decnwait == work.nproc { 1886 println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) 1887 throw("work.nwait was > work.nproc") 1888 } 1889 1890 systemstack(func() { 1891 // Mark our goroutine preemptible so its stack 1892 // can be scanned. This lets two mark workers 1893 // scan each other (otherwise, they would 1894 // deadlock). We must not modify anything on 1895 // the G stack. However, stack shrinking is 1896 // disabled for mark workers, so it is safe to 1897 // read from the G stack. 1898 casgstatus(gp, _Grunning, _Gwaiting) 1899 switch _p_.gcMarkWorkerMode { 1900 default: 1901 throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") 1902 case gcMarkWorkerDedicatedMode: 1903 gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) 1904 if gp.preempt { 1905 // We were preempted. This is 1906 // a useful signal to kick 1907 // everything out of the run 1908 // queue so it can run 1909 // somewhere else. 1910 lock(&sched.lock) 1911 for { 1912 gp, _ := runqget(_p_) 1913 if gp == nil { 1914 break 1915 } 1916 globrunqput(gp) 1917 } 1918 unlock(&sched.lock) 1919 } 1920 // Go back to draining, this time 1921 // without preemption. 1922 gcDrain(&_p_.gcw, gcDrainFlushBgCredit) 1923 case gcMarkWorkerFractionalMode: 1924 gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) 1925 case gcMarkWorkerIdleMode: 1926 gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) 1927 } 1928 casgstatus(gp, _Gwaiting, _Grunning) 1929 }) 1930 1931 // Account for time. 1932 duration := nanotime() - startTime 1933 switch _p_.gcMarkWorkerMode { 1934 case gcMarkWorkerDedicatedMode: 1935 atomic.Xaddint64(&gcController.dedicatedMarkTime, duration) 1936 atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) 1937 case gcMarkWorkerFractionalMode: 1938 atomic.Xaddint64(&gcController.fractionalMarkTime, duration) 1939 atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration) 1940 case gcMarkWorkerIdleMode: 1941 atomic.Xaddint64(&gcController.idleMarkTime, duration) 1942 } 1943 1944 // Was this the last worker and did we run out 1945 // of work? 1946 incnwait := atomic.Xadd(&work.nwait, +1) 1947 if incnwait > work.nproc { 1948 println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode, 1949 "work.nwait=", incnwait, "work.nproc=", work.nproc) 1950 throw("work.nwait > work.nproc") 1951 } 1952 1953 // If this worker reached a background mark completion 1954 // point, signal the main GC goroutine. 1955 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { 1956 // Make this G preemptible and disassociate it 1957 // as the worker for this P so 1958 // findRunnableGCWorker doesn't try to 1959 // schedule it. 1960 _p_.gcBgMarkWorker.set(nil) 1961 releasem(park.m.ptr()) 1962 1963 gcMarkDone() 1964 1965 // Disable preemption and prepare to reattach 1966 // to the P. 1967 // 1968 // We may be running on a different P at this 1969 // point, so we can't reattach until this G is 1970 // parked. 1971 park.m.set(acquirem()) 1972 park.attach.set(_p_) 1973 } 1974 } 1975 } 1976 1977 // gcMarkWorkAvailable reports whether executing a mark worker 1978 // on p is potentially useful. p may be nil, in which case it only 1979 // checks the global sources of work. 1980 func gcMarkWorkAvailable(p *p) bool { 1981 if p != nil && !p.gcw.empty() { 1982 return true 1983 } 1984 if !work.full.empty() { 1985 return true // global work available 1986 } 1987 if work.markrootNext < work.markrootJobs { 1988 return true // root scan work available 1989 } 1990 return false 1991 } 1992 1993 // gcMark runs the mark (or, for concurrent GC, mark termination) 1994 // All gcWork caches must be empty. 1995 // STW is in effect at this point. 1996 //TODO go:nowritebarrier 1997 func gcMark(start_time int64) { 1998 if debug.allocfreetrace > 0 { 1999 tracegc() 2000 } 2001 2002 if gcphase != _GCmarktermination { 2003 throw("in gcMark expecting to see gcphase as _GCmarktermination") 2004 } 2005 work.tstart = start_time 2006 2007 // Check that there's no marking work remaining. 2008 if work.full != 0 || work.markrootNext < work.markrootJobs { 2009 print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") 2010 panic("non-empty mark queue after concurrent mark") 2011 } 2012 2013 if debug.gccheckmark > 0 { 2014 // This is expensive when there's a large number of 2015 // Gs, so only do it if checkmark is also enabled. 2016 gcMarkRootCheck() 2017 } 2018 if work.full != 0 { 2019 throw("work.full != 0") 2020 } 2021 2022 // Clear out buffers and double-check that all gcWork caches 2023 // are empty. This should be ensured by gcMarkDone before we 2024 // enter mark termination. 2025 // 2026 // TODO: We could clear out buffers just before mark if this 2027 // has a non-negligible impact on STW time. 2028 for _, p := range allp { 2029 // The write barrier may have buffered pointers since 2030 // the gcMarkDone barrier. However, since the barrier 2031 // ensured all reachable objects were marked, all of 2032 // these must be pointers to black objects. Hence we 2033 // can just discard the write barrier buffer. 2034 if debug.gccheckmark > 0 || throwOnGCWork { 2035 // For debugging, flush the buffer and make 2036 // sure it really was all marked. 2037 wbBufFlush1(p) 2038 } else { 2039 p.wbBuf.reset() 2040 } 2041 2042 gcw := &p.gcw 2043 if !gcw.empty() { 2044 printlock() 2045 print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) 2046 if gcw.wbuf1 == nil { 2047 print(" wbuf1=<nil>") 2048 } else { 2049 print(" wbuf1.n=", gcw.wbuf1.nobj) 2050 } 2051 if gcw.wbuf2 == nil { 2052 print(" wbuf2=<nil>") 2053 } else { 2054 print(" wbuf2.n=", gcw.wbuf2.nobj) 2055 } 2056 print("\n") 2057 throw("P has cached GC work at end of mark termination") 2058 } 2059 // There may still be cached empty buffers, which we 2060 // need to flush since we're going to free them. Also, 2061 // there may be non-zero stats because we allocated 2062 // black after the gcMarkDone barrier. 2063 gcw.dispose() 2064 } 2065 2066 throwOnGCWork = false 2067 2068 cachestats() 2069 2070 // Update the marked heap stat. 2071 memstats.heap_marked = work.bytesMarked 2072 2073 // Update other GC heap size stats. This must happen after 2074 // cachestats (which flushes local statistics to these) and 2075 // flushallmcaches (which modifies heap_live). 2076 memstats.heap_live = work.bytesMarked 2077 memstats.heap_scan = uint64(gcController.scanWork) 2078 2079 if trace.enabled { 2080 traceHeapAlloc() 2081 } 2082 } 2083 2084 func gcSweep(mode gcMode) { 2085 if gcphase != _GCoff { 2086 throw("gcSweep being done but phase is not GCoff") 2087 } 2088 2089 lock(&mheap_.lock) 2090 mheap_.sweepgen += 2 2091 mheap_.sweepdone = 0 2092 if mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 { 2093 // We should have drained this list during the last 2094 // sweep phase. We certainly need to start this phase 2095 // with an empty swept list. 2096 throw("non-empty swept list") 2097 } 2098 mheap_.pagesSwept = 0 2099 mheap_.sweepArenas = mheap_.allArenas 2100 mheap_.reclaimIndex = 0 2101 mheap_.reclaimCredit = 0 2102 unlock(&mheap_.lock) 2103 2104 if !_ConcurrentSweep || mode == gcForceBlockMode { 2105 // Special case synchronous sweep. 2106 // Record that no proportional sweeping has to happen. 2107 lock(&mheap_.lock) 2108 mheap_.sweepPagesPerByte = 0 2109 unlock(&mheap_.lock) 2110 // Sweep all spans eagerly. 2111 for sweepone() != ^uintptr(0) { 2112 sweep.npausesweep++ 2113 } 2114 // Free workbufs eagerly. 2115 prepareFreeWorkbufs() 2116 for freeSomeWbufs(false) { 2117 } 2118 // All "free" events for this mark/sweep cycle have 2119 // now happened, so we can make this profile cycle 2120 // available immediately. 2121 mProf_NextCycle() 2122 mProf_Flush() 2123 return 2124 } 2125 2126 // Background sweep. 2127 lock(&sweep.lock) 2128 if sweep.parked { 2129 sweep.parked = false 2130 ready(sweep.g, 0, true) 2131 } 2132 unlock(&sweep.lock) 2133 } 2134 2135 // gcResetMarkState resets global state prior to marking (concurrent 2136 // or STW) and resets the stack scan state of all Gs. 2137 // 2138 // This is safe to do without the world stopped because any Gs created 2139 // during or after this will start out in the reset state. 2140 func gcResetMarkState() { 2141 // This may be called during a concurrent phase, so make sure 2142 // allgs doesn't change. 2143 lock(&allglock) 2144 for _, gp := range allgs { 2145 gp.gcscandone = false // set to true in gcphasework 2146 gp.gcscanvalid = false // stack has not been scanned 2147 gp.gcAssistBytes = 0 2148 } 2149 unlock(&allglock) 2150 2151 // Clear page marks. This is just 1MB per 64GB of heap, so the 2152 // time here is pretty trivial. 2153 lock(&mheap_.lock) 2154 arenas := mheap_.allArenas 2155 unlock(&mheap_.lock) 2156 for _, ai := range arenas { 2157 ha := mheap_.arenas[ai.l1()][ai.l2()] 2158 for i := range ha.pageMarks { 2159 ha.pageMarks[i] = 0 2160 } 2161 } 2162 2163 work.bytesMarked = 0 2164 work.initialHeapLive = atomic.Load64(&memstats.heap_live) 2165 } 2166 2167 // Hooks for other packages 2168 2169 var poolcleanup func() 2170 2171 //go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup 2172 func sync_runtime_registerPoolCleanup(f func()) { 2173 poolcleanup = f 2174 } 2175 2176 func clearpools() { 2177 // clear sync.Pools 2178 if poolcleanup != nil { 2179 poolcleanup() 2180 } 2181 2182 // Clear central sudog cache. 2183 // Leave per-P caches alone, they have strictly bounded size. 2184 // Disconnect cached list before dropping it on the floor, 2185 // so that a dangling ref to one entry does not pin all of them. 2186 lock(&sched.sudoglock) 2187 var sg, sgnext *sudog 2188 for sg = sched.sudogcache; sg != nil; sg = sgnext { 2189 sgnext = sg.next 2190 sg.next = nil 2191 } 2192 sched.sudogcache = nil 2193 unlock(&sched.sudoglock) 2194 2195 // Clear central defer pools. 2196 // Leave per-P pools alone, they have strictly bounded size. 2197 lock(&sched.deferlock) 2198 for i := range sched.deferpool { 2199 // disconnect cached list before dropping it on the floor, 2200 // so that a dangling ref to one entry does not pin all of them. 2201 var d, dlink *_defer 2202 for d = sched.deferpool[i]; d != nil; d = dlink { 2203 dlink = d.link 2204 d.link = nil 2205 } 2206 sched.deferpool[i] = nil 2207 } 2208 unlock(&sched.deferlock) 2209 } 2210 2211 // Timing 2212 2213 // itoaDiv formats val/(10**dec) into buf. 2214 func itoaDiv(buf []byte, val uint64, dec int) []byte { 2215 i := len(buf) - 1 2216 idec := i - dec 2217 for val >= 10 || i >= idec { 2218 buf[i] = byte(val%10 + '0') 2219 i-- 2220 if i == idec { 2221 buf[i] = '.' 2222 i-- 2223 } 2224 val /= 10 2225 } 2226 buf[i] = byte(val + '0') 2227 return buf[i:] 2228 } 2229 2230 // fmtNSAsMS nicely formats ns nanoseconds as milliseconds. 2231 func fmtNSAsMS(buf []byte, ns uint64) []byte { 2232 if ns >= 10e6 { 2233 // Format as whole milliseconds. 2234 return itoaDiv(buf, ns/1e6, 0) 2235 } 2236 // Format two digits of precision, with at most three decimal places. 2237 x := ns / 1e3 2238 if x == 0 { 2239 buf[0] = '0' 2240 return buf[:1] 2241 } 2242 dec := 3 2243 for x >= 100 { 2244 x /= 10 2245 dec-- 2246 } 2247 return itoaDiv(buf, x, dec) 2248 }