github.com/aloncn/graphics-go@v0.0.1/src/runtime/mgcmark.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector: marking and scanning 6 7 package runtime 8 9 import ( 10 "runtime/internal/atomic" 11 "runtime/internal/sys" 12 "unsafe" 13 ) 14 15 const ( 16 fixedRootFinalizers = iota 17 fixedRootFlushCaches 18 fixedRootCount 19 20 // rootBlockBytes is the number of bytes to scan per data or 21 // BSS root. 22 rootBlockBytes = 256 << 10 23 24 // rootBlockSpans is the number of spans to scan per span 25 // root. 26 rootBlockSpans = 8 * 1024 // 64MB worth of spans 27 ) 28 29 // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and 30 // some miscellany) and initializes scanning-related state. 31 // 32 // The caller must have call gcCopySpans(). 33 // 34 //go:nowritebarrier 35 func gcMarkRootPrepare() { 36 // Compute how many data and BSS root blocks there are. 37 nBlocks := func(bytes uintptr) int { 38 return int((bytes + rootBlockBytes - 1) / rootBlockBytes) 39 } 40 41 work.nDataRoots = 0 42 for datap := &firstmoduledata; datap != nil; datap = datap.next { 43 nDataRoots := nBlocks(datap.edata - datap.data) 44 if nDataRoots > work.nDataRoots { 45 work.nDataRoots = nDataRoots 46 } 47 } 48 49 work.nBSSRoots = 0 50 for datap := &firstmoduledata; datap != nil; datap = datap.next { 51 nBSSRoots := nBlocks(datap.ebss - datap.bss) 52 if nBSSRoots > work.nBSSRoots { 53 work.nBSSRoots = nBSSRoots 54 } 55 } 56 57 // Compute number of span roots. 58 work.nSpanRoots = (len(work.spans) + rootBlockSpans - 1) / rootBlockSpans 59 60 // Snapshot of allglen. During concurrent scan, we just need 61 // to be consistent about how many markroot jobs we create and 62 // how many Gs we check. Gs may be created after this point, 63 // but it's okay that we ignore them because they begin life 64 // without any roots, so there's nothing to scan, and any 65 // roots they create during the concurrent phase will be 66 // scanned during mark termination. During mark termination, 67 // allglen isn't changing, so we'll scan all Gs. 68 work.nStackRoots = int(atomic.Loaduintptr(&allglen)) 69 70 work.markrootNext = 0 71 work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots) 72 } 73 74 // gcMarkRootCheck checks that all roots have been scanned. It is 75 // purely for debugging. 76 func gcMarkRootCheck() { 77 if work.markrootNext < work.markrootJobs { 78 print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n") 79 throw("left over markroot jobs") 80 } 81 82 lock(&allglock) 83 // Check that gc work is done. 84 for i := 0; i < work.nStackRoots; i++ { 85 gp := allgs[i] 86 if !gp.gcscandone { 87 throw("scan missed a g") 88 } 89 } 90 unlock(&allglock) 91 } 92 93 // ptrmask for an allocation containing a single pointer. 94 var oneptrmask = [...]uint8{1} 95 96 // markroot scans the i'th root. 97 // 98 // Preemption must be disabled (because this uses a gcWork). 99 // 100 //go:nowritebarrier 101 func markroot(i uint32) { 102 // TODO: Consider using getg().m.p.ptr().gcw. 103 var gcw gcWork 104 105 baseData := uint32(fixedRootCount) 106 baseBSS := baseData + uint32(work.nDataRoots) 107 baseSpans := baseBSS + uint32(work.nBSSRoots) 108 baseStacks := baseSpans + uint32(work.nSpanRoots) 109 110 // Note: if you add a case here, please also update heapdump.go:dumproots. 111 switch { 112 case baseData <= i && i < baseBSS: 113 for datap := &firstmoduledata; datap != nil; datap = datap.next { 114 markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, &gcw, int(i-baseData)) 115 } 116 117 case baseBSS <= i && i < baseSpans: 118 for datap := &firstmoduledata; datap != nil; datap = datap.next { 119 markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, &gcw, int(i-baseBSS)) 120 } 121 122 case i == fixedRootFinalizers: 123 for fb := allfin; fb != nil; fb = fb.alllink { 124 scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], &gcw) 125 } 126 127 case i == fixedRootFlushCaches: 128 if gcphase == _GCmarktermination { // Do not flush mcaches during concurrent phase. 129 flushallmcaches() 130 } 131 132 case baseSpans <= i && i < baseStacks: 133 // mark MSpan.specials 134 markrootSpans(&gcw, int(i-baseSpans)) 135 136 default: 137 // the rest is scanning goroutine stacks 138 if uintptr(i-baseStacks) >= allglen { 139 throw("markroot: bad index") 140 } 141 gp := allgs[i-baseStacks] 142 143 // remember when we've first observed the G blocked 144 // needed only to output in traceback 145 status := readgstatus(gp) // We are not in a scan state 146 if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 { 147 gp.waitsince = work.tstart 148 } 149 150 // Shrink a stack if not much of it is being used but not in the scan phase. 151 if gcphase == _GCmarktermination { 152 // Shrink during STW GCmarktermination phase thus avoiding 153 // complications introduced by shrinking during 154 // non-STW phases. 155 shrinkstack(gp) 156 } 157 158 if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC { 159 // GC background workers may be 160 // non-preemptible, so we may deadlock if we 161 // try to scan them during a concurrent phase. 162 // They also have tiny stacks, so just ignore 163 // them until mark termination. 164 gp.gcscandone = true 165 break 166 } 167 168 // scang must be done on the system stack in case 169 // we're trying to scan our own stack. 170 systemstack(func() { 171 // If this is a self-scan, put the user G in 172 // _Gwaiting to prevent self-deadlock. It may 173 // already be in _Gwaiting if this is mark 174 // termination. 175 userG := getg().m.curg 176 selfScan := gp == userG && readgstatus(userG) == _Grunning 177 if selfScan { 178 casgstatus(userG, _Grunning, _Gwaiting) 179 userG.waitreason = "garbage collection scan" 180 } 181 182 // TODO: scang blocks until gp's stack has 183 // been scanned, which may take a while for 184 // running goroutines. Consider doing this in 185 // two phases where the first is non-blocking: 186 // we scan the stacks we can and ask running 187 // goroutines to scan themselves; and the 188 // second blocks. 189 scang(gp) 190 191 if selfScan { 192 casgstatus(userG, _Gwaiting, _Grunning) 193 } 194 }) 195 } 196 197 gcw.dispose() 198 } 199 200 // markrootBlock scans the shard'th shard of the block of memory [b0, 201 // b0+n0), with the given pointer mask. 202 // 203 //go:nowritebarrier 204 func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) { 205 if rootBlockBytes%(8*sys.PtrSize) != 0 { 206 // This is necessary to pick byte offsets in ptrmask0. 207 throw("rootBlockBytes must be a multiple of 8*ptrSize") 208 } 209 210 b := b0 + uintptr(shard)*rootBlockBytes 211 if b >= b0+n0 { 212 return 213 } 214 ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*sys.PtrSize)))) 215 n := uintptr(rootBlockBytes) 216 if b+n > b0+n0 { 217 n = b0 + n0 - b 218 } 219 220 // Scan this shard. 221 scanblock(b, n, ptrmask, gcw) 222 } 223 224 // markrootSpans marks roots for one shard of work.spans. 225 // 226 //go:nowritebarrier 227 func markrootSpans(gcw *gcWork, shard int) { 228 // Objects with finalizers have two GC-related invariants: 229 // 230 // 1) Everything reachable from the object must be marked. 231 // This ensures that when we pass the object to its finalizer, 232 // everything the finalizer can reach will be retained. 233 // 234 // 2) Finalizer specials (which are not in the garbage 235 // collected heap) are roots. In practice, this means the fn 236 // field must be scanned. 237 // 238 // TODO(austin): There are several ideas for making this more 239 // efficient in issue #11485. 240 241 // We process objects with finalizers only during the first 242 // markroot pass. In concurrent GC, this happens during 243 // concurrent scan and we depend on addfinalizer to ensure the 244 // above invariants for objects that get finalizers after 245 // concurrent scan. In STW GC, this will happen during mark 246 // termination. 247 if work.finalizersDone { 248 return 249 } 250 251 sg := mheap_.sweepgen 252 startSpan := shard * rootBlockSpans 253 endSpan := (shard + 1) * rootBlockSpans 254 if endSpan > len(work.spans) { 255 endSpan = len(work.spans) 256 } 257 // Note that work.spans may not include spans that were 258 // allocated between entering the scan phase and now. This is 259 // okay because any objects with finalizers in those spans 260 // must have been allocated and given finalizers after we 261 // entered the scan phase, so addfinalizer will have ensured 262 // the above invariants for them. 263 for _, s := range work.spans[startSpan:endSpan] { 264 if s.state != mSpanInUse { 265 continue 266 } 267 if !useCheckmark && s.sweepgen != sg { 268 // sweepgen was updated (+2) during non-checkmark GC pass 269 print("sweep ", s.sweepgen, " ", sg, "\n") 270 throw("gc: unswept span") 271 } 272 273 // Speculatively check if there are any specials 274 // without acquiring the span lock. This may race with 275 // adding the first special to a span, but in that 276 // case addfinalizer will observe that the GC is 277 // active (which is globally synchronized) and ensure 278 // the above invariants. We may also ensure the 279 // invariants, but it's okay to scan an object twice. 280 if s.specials == nil { 281 continue 282 } 283 284 // Lock the specials to prevent a special from being 285 // removed from the list while we're traversing it. 286 lock(&s.speciallock) 287 288 for sp := s.specials; sp != nil; sp = sp.next { 289 if sp.kind != _KindSpecialFinalizer { 290 continue 291 } 292 // don't mark finalized object, but scan it so we 293 // retain everything it points to. 294 spf := (*specialfinalizer)(unsafe.Pointer(sp)) 295 // A finalizer can be set for an inner byte of an object, find object beginning. 296 p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize 297 298 // Mark everything that can be reached from 299 // the object (but *not* the object itself or 300 // we'll never collect it). 301 scanobject(p, gcw) 302 303 // The special itself is a root. 304 scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw) 305 } 306 307 unlock(&s.speciallock) 308 } 309 } 310 311 // gcAssistAlloc performs GC work to make gp's assist debt positive. 312 // gp must be the calling user gorountine. 313 // 314 // This must be called with preemption enabled. 315 //go:nowritebarrier 316 func gcAssistAlloc(gp *g) { 317 // Don't assist in non-preemptible contexts. These are 318 // generally fragile and won't allow the assist to block. 319 if getg() == gp.m.g0 { 320 return 321 } 322 if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" { 323 return 324 } 325 326 // Compute the amount of scan work we need to do to make the 327 // balance positive. We over-assist to build up credit for 328 // future allocations and amortize the cost of assisting. 329 debtBytes := -gp.gcAssistBytes + gcOverAssistBytes 330 scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes)) 331 332 retry: 333 // Steal as much credit as we can from the background GC's 334 // scan credit. This is racy and may drop the background 335 // credit below 0 if two mutators steal at the same time. This 336 // will just cause steals to fail until credit is accumulated 337 // again, so in the long run it doesn't really matter, but we 338 // do have to handle the negative credit case. 339 bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit) 340 stolen := int64(0) 341 if bgScanCredit > 0 { 342 if bgScanCredit < scanWork { 343 stolen = bgScanCredit 344 gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen)) 345 } else { 346 stolen = scanWork 347 gp.gcAssistBytes += debtBytes 348 } 349 atomic.Xaddint64(&gcController.bgScanCredit, -stolen) 350 351 scanWork -= stolen 352 353 if scanWork == 0 { 354 // We were able to steal all of the credit we 355 // needed. 356 return 357 } 358 } 359 360 // Perform assist work 361 completed := false 362 systemstack(func() { 363 if atomic.Load(&gcBlackenEnabled) == 0 { 364 // The gcBlackenEnabled check in malloc races with the 365 // store that clears it but an atomic check in every malloc 366 // would be a performance hit. 367 // Instead we recheck it here on the non-preemptable system 368 // stack to determine if we should preform an assist. 369 370 // GC is done, so ignore any remaining debt. 371 gp.gcAssistBytes = 0 372 return 373 } 374 // Track time spent in this assist. Since we're on the 375 // system stack, this is non-preemptible, so we can 376 // just measure start and end time. 377 startTime := nanotime() 378 379 decnwait := atomic.Xadd(&work.nwait, -1) 380 if decnwait == work.nproc { 381 println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc) 382 throw("nwait > work.nprocs") 383 } 384 385 // drain own cached work first in the hopes that it 386 // will be more cache friendly. 387 gcw := &getg().m.p.ptr().gcw 388 workDone := gcDrainN(gcw, scanWork) 389 // If we are near the end of the mark phase 390 // dispose of the gcw. 391 if gcBlackenPromptly { 392 gcw.dispose() 393 } 394 395 // Record that we did this much scan work. 396 // 397 // Back out the number of bytes of assist credit that 398 // this scan work counts for. The "1+" is a poor man's 399 // round-up, to ensure this adds credit even if 400 // assistBytesPerWork is very low. 401 gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone)) 402 403 // If this is the last worker and we ran out of work, 404 // signal a completion point. 405 incnwait := atomic.Xadd(&work.nwait, +1) 406 if incnwait > work.nproc { 407 println("runtime: work.nwait=", incnwait, 408 "work.nproc=", work.nproc, 409 "gcBlackenPromptly=", gcBlackenPromptly) 410 throw("work.nwait > work.nproc") 411 } 412 413 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { 414 // This has reached a background completion 415 // point. 416 completed = true 417 } 418 duration := nanotime() - startTime 419 _p_ := gp.m.p.ptr() 420 _p_.gcAssistTime += duration 421 if _p_.gcAssistTime > gcAssistTimeSlack { 422 atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime) 423 _p_.gcAssistTime = 0 424 } 425 }) 426 427 if completed { 428 gcMarkDone() 429 } 430 431 if gp.gcAssistBytes < 0 { 432 // We were unable steal enough credit or perform 433 // enough work to pay off the assist debt. We need to 434 // do one of these before letting the mutator allocate 435 // more to prevent over-allocation. 436 // 437 // If this is because we were preempted, reschedule 438 // and try some more. 439 if gp.preempt { 440 Gosched() 441 goto retry 442 } 443 444 // Add this G to an assist queue and park. When the GC 445 // has more background credit, it will satisfy queued 446 // assists before flushing to the global credit pool. 447 // 448 // Note that this does *not* get woken up when more 449 // work is added to the work list. The theory is that 450 // there wasn't enough work to do anyway, so we might 451 // as well let background marking take care of the 452 // work that is available. 453 lock(&work.assistQueue.lock) 454 455 // If the GC cycle is over, just return. This is the 456 // likely path if we completed above. We do this 457 // under the lock to prevent a GC cycle from ending 458 // between this check and queuing the assist. 459 if atomic.Load(&gcBlackenEnabled) == 0 { 460 unlock(&work.assistQueue.lock) 461 return 462 } 463 464 oldHead, oldTail := work.assistQueue.head, work.assistQueue.tail 465 if oldHead == 0 { 466 work.assistQueue.head.set(gp) 467 } else { 468 oldTail.ptr().schedlink.set(gp) 469 } 470 work.assistQueue.tail.set(gp) 471 gp.schedlink.set(nil) 472 // Recheck for background credit now that this G is in 473 // the queue, but can still back out. This avoids a 474 // race in case background marking has flushed more 475 // credit since we checked above. 476 if atomic.Loadint64(&gcController.bgScanCredit) > 0 { 477 work.assistQueue.head = oldHead 478 work.assistQueue.tail = oldTail 479 if oldTail != 0 { 480 oldTail.ptr().schedlink.set(nil) 481 } 482 unlock(&work.assistQueue.lock) 483 goto retry 484 } 485 // Park for real. 486 goparkunlock(&work.assistQueue.lock, "GC assist wait", traceEvGoBlock, 2) 487 488 // At this point either background GC has satisfied 489 // this G's assist debt, or the GC cycle is over. 490 } 491 } 492 493 // gcWakeAllAssists wakes all currently blocked assists. This is used 494 // at the end of a GC cycle. gcBlackenEnabled must be false to prevent 495 // new assists from going to sleep after this point. 496 func gcWakeAllAssists() { 497 lock(&work.assistQueue.lock) 498 injectglist(work.assistQueue.head.ptr()) 499 work.assistQueue.head.set(nil) 500 work.assistQueue.tail.set(nil) 501 unlock(&work.assistQueue.lock) 502 } 503 504 // gcFlushBgCredit flushes scanWork units of background scan work 505 // credit. This first satisfies blocked assists on the 506 // work.assistQueue and then flushes any remaining credit to 507 // gcController.bgScanCredit. 508 // 509 // Write barriers are disallowed because this is used by gcDrain after 510 // it has ensured that all work is drained and this must preserve that 511 // condition. 512 // 513 //go:nowritebarrierrec 514 func gcFlushBgCredit(scanWork int64) { 515 if work.assistQueue.head == 0 { 516 // Fast path; there are no blocked assists. There's a 517 // small window here where an assist may add itself to 518 // the blocked queue and park. If that happens, we'll 519 // just get it on the next flush. 520 atomic.Xaddint64(&gcController.bgScanCredit, scanWork) 521 return 522 } 523 524 scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork) 525 526 lock(&work.assistQueue.lock) 527 gp := work.assistQueue.head.ptr() 528 for gp != nil && scanBytes > 0 { 529 // Note that gp.gcAssistBytes is negative because gp 530 // is in debt. Think carefully about the signs below. 531 if scanBytes+gp.gcAssistBytes >= 0 { 532 // Satisfy this entire assist debt. 533 scanBytes += gp.gcAssistBytes 534 gp.gcAssistBytes = 0 535 xgp := gp 536 gp = gp.schedlink.ptr() 537 ready(xgp, 0) 538 } else { 539 // Partially satisfy this assist. 540 gp.gcAssistBytes += scanBytes 541 scanBytes = 0 542 // As a heuristic, we move this assist to the 543 // back of the queue so that large assists 544 // can't clog up the assist queue and 545 // substantially delay small assists. 546 xgp := gp 547 gp = gp.schedlink.ptr() 548 if gp == nil { 549 // gp is the only assist in the queue. 550 gp = xgp 551 } else { 552 xgp.schedlink = 0 553 work.assistQueue.tail.ptr().schedlink.set(xgp) 554 work.assistQueue.tail.set(xgp) 555 } 556 break 557 } 558 } 559 work.assistQueue.head.set(gp) 560 if gp == nil { 561 work.assistQueue.tail.set(nil) 562 } 563 564 if scanBytes > 0 { 565 // Convert from scan bytes back to work. 566 scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte) 567 atomic.Xaddint64(&gcController.bgScanCredit, scanWork) 568 } 569 unlock(&work.assistQueue.lock) 570 } 571 572 //go:nowritebarrier 573 func scanstack(gp *g) { 574 if gp.gcscanvalid { 575 if gcphase == _GCmarktermination { 576 gcRemoveStackBarriers(gp) 577 } 578 return 579 } 580 581 if readgstatus(gp)&_Gscan == 0 { 582 print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n") 583 throw("scanstack - bad status") 584 } 585 586 switch readgstatus(gp) &^ _Gscan { 587 default: 588 print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 589 throw("mark - bad status") 590 case _Gdead: 591 return 592 case _Grunning: 593 print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 594 throw("scanstack: goroutine not stopped") 595 case _Grunnable, _Gsyscall, _Gwaiting: 596 // ok 597 } 598 599 if gp == getg() { 600 throw("can't scan our own stack") 601 } 602 mp := gp.m 603 if mp != nil && mp.helpgc != 0 { 604 throw("can't scan gchelper stack") 605 } 606 607 var sp, barrierOffset, nextBarrier uintptr 608 if gp.syscallsp != 0 { 609 sp = gp.syscallsp 610 } else { 611 sp = gp.sched.sp 612 } 613 switch gcphase { 614 case _GCmark: 615 // Install stack barriers during stack scan. 616 barrierOffset = uintptr(firstStackBarrierOffset) 617 nextBarrier = sp + barrierOffset 618 619 if debug.gcstackbarrieroff > 0 { 620 nextBarrier = ^uintptr(0) 621 } 622 623 if gp.stkbarPos != 0 || len(gp.stkbar) != 0 { 624 // If this happens, it's probably because we 625 // scanned a stack twice in the same phase. 626 print("stkbarPos=", gp.stkbarPos, " len(stkbar)=", len(gp.stkbar), " goid=", gp.goid, " gcphase=", gcphase, "\n") 627 throw("g already has stack barriers") 628 } 629 630 gcLockStackBarriers(gp) 631 632 case _GCmarktermination: 633 if int(gp.stkbarPos) == len(gp.stkbar) { 634 // gp hit all of the stack barriers (or there 635 // were none). Re-scan the whole stack. 636 nextBarrier = ^uintptr(0) 637 } else { 638 // Only re-scan up to the lowest un-hit 639 // barrier. Any frames above this have not 640 // executed since the concurrent scan of gp and 641 // any writes through up-pointers to above 642 // this barrier had write barriers. 643 nextBarrier = gp.stkbar[gp.stkbarPos].savedLRPtr 644 if debugStackBarrier { 645 print("rescan below ", hex(nextBarrier), " in [", hex(sp), ",", hex(gp.stack.hi), ") goid=", gp.goid, "\n") 646 } 647 } 648 649 gcRemoveStackBarriers(gp) 650 651 default: 652 throw("scanstack in wrong phase") 653 } 654 655 var cache pcvalueCache 656 gcw := &getg().m.p.ptr().gcw 657 n := 0 658 scanframe := func(frame *stkframe, unused unsafe.Pointer) bool { 659 scanframeworker(frame, &cache, gcw) 660 661 if frame.fp > nextBarrier { 662 // We skip installing a barrier on bottom-most 663 // frame because on LR machines this LR is not 664 // on the stack. 665 if gcphase == _GCmark && n != 0 { 666 if gcInstallStackBarrier(gp, frame) { 667 barrierOffset *= 2 668 nextBarrier = sp + barrierOffset 669 } 670 } else if gcphase == _GCmarktermination { 671 // We just scanned a frame containing 672 // a return to a stack barrier. Since 673 // this frame never returned, we can 674 // stop scanning. 675 return false 676 } 677 } 678 n++ 679 680 return true 681 } 682 gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0) 683 tracebackdefers(gp, scanframe, nil) 684 if gcphase == _GCmarktermination { 685 gcw.dispose() 686 } 687 if gcphase == _GCmark { 688 gcUnlockStackBarriers(gp) 689 } 690 gp.gcscanvalid = true 691 } 692 693 // Scan a stack frame: local variables and function arguments/results. 694 //go:nowritebarrier 695 func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) { 696 697 f := frame.fn 698 targetpc := frame.continpc 699 if targetpc == 0 { 700 // Frame is dead. 701 return 702 } 703 if _DebugGC > 1 { 704 print("scanframe ", funcname(f), "\n") 705 } 706 if targetpc != f.entry { 707 targetpc-- 708 } 709 pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, cache) 710 if pcdata == -1 { 711 // We do not have a valid pcdata value but there might be a 712 // stackmap for this function. It is likely that we are looking 713 // at the function prologue, assume so and hope for the best. 714 pcdata = 0 715 } 716 717 // Scan local variables if stack frame has been allocated. 718 size := frame.varp - frame.sp 719 var minsize uintptr 720 switch sys.TheChar { 721 case '7': 722 minsize = sys.SpAlign 723 default: 724 minsize = sys.MinFrameSize 725 } 726 if size > minsize { 727 stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) 728 if stkmap == nil || stkmap.n <= 0 { 729 print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n") 730 throw("missing stackmap") 731 } 732 733 // Locals bitmap information, scan just the pointers in locals. 734 if pcdata < 0 || pcdata >= stkmap.n { 735 // don't know where we are 736 print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n") 737 throw("scanframe: bad symbol table") 738 } 739 bv := stackmapdata(stkmap, pcdata) 740 size = uintptr(bv.n) * sys.PtrSize 741 scanblock(frame.varp-size, size, bv.bytedata, gcw) 742 } 743 744 // Scan arguments. 745 if frame.arglen > 0 { 746 var bv bitvector 747 if frame.argmap != nil { 748 bv = *frame.argmap 749 } else { 750 stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps)) 751 if stkmap == nil || stkmap.n <= 0 { 752 print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n") 753 throw("missing stackmap") 754 } 755 if pcdata < 0 || pcdata >= stkmap.n { 756 // don't know where we are 757 print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n") 758 throw("scanframe: bad symbol table") 759 } 760 bv = stackmapdata(stkmap, pcdata) 761 } 762 scanblock(frame.argp, uintptr(bv.n)*sys.PtrSize, bv.bytedata, gcw) 763 } 764 } 765 766 type gcDrainFlags int 767 768 const ( 769 gcDrainUntilPreempt gcDrainFlags = 1 << iota 770 gcDrainNoBlock 771 gcDrainFlushBgCredit 772 773 // gcDrainBlock means neither gcDrainUntilPreempt or 774 // gcDrainNoBlock. It is the default, but callers should use 775 // the constant for documentation purposes. 776 gcDrainBlock gcDrainFlags = 0 777 ) 778 779 // gcDrain scans roots and objects in work buffers, blackening grey 780 // objects until all roots and work buffers have been drained. 781 // 782 // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt 783 // is set. This implies gcDrainNoBlock. 784 // 785 // If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is 786 // unable to get more work. Otherwise, it will block until all 787 // blocking calls are blocked in gcDrain. 788 // 789 // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work 790 // credit to gcController.bgScanCredit every gcCreditSlack units of 791 // scan work. 792 // 793 //go:nowritebarrier 794 func gcDrain(gcw *gcWork, flags gcDrainFlags) { 795 if !writeBarrier.needed { 796 throw("gcDrain phase incorrect") 797 } 798 799 gp := getg() 800 preemptible := flags&gcDrainUntilPreempt != 0 801 blocking := flags&(gcDrainUntilPreempt|gcDrainNoBlock) == 0 802 flushBgCredit := flags&gcDrainFlushBgCredit != 0 803 804 // Drain root marking jobs. 805 if work.markrootNext < work.markrootJobs { 806 for blocking || !gp.preempt { 807 job := atomic.Xadd(&work.markrootNext, +1) - 1 808 if job >= work.markrootJobs { 809 break 810 } 811 // TODO: Pass in gcw. 812 markroot(job) 813 } 814 } 815 816 initScanWork := gcw.scanWork 817 818 // Drain heap marking jobs. 819 for !(preemptible && gp.preempt) { 820 // Try to keep work available on the global queue. We used to 821 // check if there were waiting workers, but it's better to 822 // just keep work available than to make workers wait. In the 823 // worst case, we'll do O(log(_WorkbufSize)) unnecessary 824 // balances. 825 if work.full == 0 { 826 gcw.balance() 827 } 828 829 var b uintptr 830 if blocking { 831 b = gcw.get() 832 } else { 833 b = gcw.tryGet() 834 } 835 if b == 0 { 836 // work barrier reached or tryGet failed. 837 break 838 } 839 scanobject(b, gcw) 840 841 // Flush background scan work credit to the global 842 // account if we've accumulated enough locally so 843 // mutator assists can draw on it. 844 if gcw.scanWork >= gcCreditSlack { 845 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) 846 if flushBgCredit { 847 gcFlushBgCredit(gcw.scanWork - initScanWork) 848 initScanWork = 0 849 } 850 gcw.scanWork = 0 851 } 852 } 853 854 // In blocking mode, write barriers are not allowed after this 855 // point because we must preserve the condition that the work 856 // buffers are empty. 857 858 // Flush remaining scan work credit. 859 if gcw.scanWork > 0 { 860 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) 861 if flushBgCredit { 862 gcFlushBgCredit(gcw.scanWork - initScanWork) 863 } 864 gcw.scanWork = 0 865 } 866 } 867 868 // gcDrainN blackens grey objects until it has performed roughly 869 // scanWork units of scan work or the G is preempted. This is 870 // best-effort, so it may perform less work if it fails to get a work 871 // buffer. Otherwise, it will perform at least n units of work, but 872 // may perform more because scanning is always done in whole object 873 // increments. It returns the amount of scan work performed. 874 //go:nowritebarrier 875 func gcDrainN(gcw *gcWork, scanWork int64) int64 { 876 if !writeBarrier.needed { 877 throw("gcDrainN phase incorrect") 878 } 879 880 // There may already be scan work on the gcw, which we don't 881 // want to claim was done by this call. 882 workFlushed := -gcw.scanWork 883 884 gp := getg().m.curg 885 for !gp.preempt && workFlushed+gcw.scanWork < scanWork { 886 // See gcDrain comment. 887 if work.full == 0 { 888 gcw.balance() 889 } 890 891 // This might be a good place to add prefetch code... 892 // if(wbuf.nobj > 4) { 893 // PREFETCH(wbuf->obj[wbuf.nobj - 3]; 894 // } 895 // 896 b := gcw.tryGet() 897 if b == 0 { 898 break 899 } 900 scanobject(b, gcw) 901 902 // Flush background scan work credit. 903 if gcw.scanWork >= gcCreditSlack { 904 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) 905 workFlushed += gcw.scanWork 906 gcw.scanWork = 0 907 } 908 } 909 910 // Unlike gcDrain, there's no need to flush remaining work 911 // here because this never flushes to bgScanCredit and 912 // gcw.dispose will flush any remaining work to scanWork. 913 914 return workFlushed + gcw.scanWork 915 } 916 917 // scanblock scans b as scanobject would, but using an explicit 918 // pointer bitmap instead of the heap bitmap. 919 // 920 // This is used to scan non-heap roots, so it does not update 921 // gcw.bytesMarked or gcw.scanWork. 922 // 923 //go:nowritebarrier 924 func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { 925 // Use local copies of original parameters, so that a stack trace 926 // due to one of the throws below shows the original block 927 // base and extent. 928 b := b0 929 n := n0 930 931 arena_start := mheap_.arena_start 932 arena_used := mheap_.arena_used 933 934 for i := uintptr(0); i < n; { 935 // Find bits for the next word. 936 bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8))) 937 if bits == 0 { 938 i += sys.PtrSize * 8 939 continue 940 } 941 for j := 0; j < 8 && i < n; j++ { 942 if bits&1 != 0 { 943 // Same work as in scanobject; see comments there. 944 obj := *(*uintptr)(unsafe.Pointer(b + i)) 945 if obj != 0 && arena_start <= obj && obj < arena_used { 946 if obj, hbits, span := heapBitsForObject(obj, b, i); obj != 0 { 947 greyobject(obj, b, i, hbits, span, gcw) 948 } 949 } 950 } 951 bits >>= 1 952 i += sys.PtrSize 953 } 954 } 955 } 956 957 // scanobject scans the object starting at b, adding pointers to gcw. 958 // b must point to the beginning of a heap object; scanobject consults 959 // the GC bitmap for the pointer mask and the spans for the size of the 960 // object (it ignores n). 961 //go:nowritebarrier 962 func scanobject(b uintptr, gcw *gcWork) { 963 // Note that arena_used may change concurrently during 964 // scanobject and hence scanobject may encounter a pointer to 965 // a newly allocated heap object that is *not* in 966 // [start,used). It will not mark this object; however, we 967 // know that it was just installed by a mutator, which means 968 // that mutator will execute a write barrier and take care of 969 // marking it. This is even more pronounced on relaxed memory 970 // architectures since we access arena_used without barriers 971 // or synchronization, but the same logic applies. 972 arena_start := mheap_.arena_start 973 arena_used := mheap_.arena_used 974 975 // Find bits of the beginning of the object. 976 // b must point to the beginning of a heap object, so 977 // we can get its bits and span directly. 978 hbits := heapBitsForAddr(b) 979 s := spanOfUnchecked(b) 980 n := s.elemsize 981 if n == 0 { 982 throw("scanobject n == 0") 983 } 984 985 var i uintptr 986 for i = 0; i < n; i += sys.PtrSize { 987 // Find bits for this word. 988 if i != 0 { 989 // Avoid needless hbits.next() on last iteration. 990 hbits = hbits.next() 991 } 992 // During checkmarking, 1-word objects store the checkmark 993 // in the type bit for the one word. The only one-word objects 994 // are pointers, or else they'd be merged with other non-pointer 995 // data into larger allocations. 996 bits := hbits.bits() 997 if i >= 2*sys.PtrSize && bits&bitMarked == 0 { 998 break // no more pointers in this object 999 } 1000 if bits&bitPointer == 0 { 1001 continue // not a pointer 1002 } 1003 1004 // Work here is duplicated in scanblock and above. 1005 // If you make changes here, make changes there too. 1006 obj := *(*uintptr)(unsafe.Pointer(b + i)) 1007 1008 // At this point we have extracted the next potential pointer. 1009 // Check if it points into heap and not back at the current object. 1010 if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n { 1011 // Mark the object. 1012 if obj, hbits, span := heapBitsForObject(obj, b, i); obj != 0 { 1013 greyobject(obj, b, i, hbits, span, gcw) 1014 } 1015 } 1016 } 1017 gcw.bytesMarked += uint64(n) 1018 gcw.scanWork += int64(i) 1019 } 1020 1021 // Shade the object if it isn't already. 1022 // The object is not nil and known to be in the heap. 1023 // Preemption must be disabled. 1024 //go:nowritebarrier 1025 func shade(b uintptr) { 1026 if obj, hbits, span := heapBitsForObject(b, 0, 0); obj != 0 { 1027 gcw := &getg().m.p.ptr().gcw 1028 greyobject(obj, 0, 0, hbits, span, gcw) 1029 if gcphase == _GCmarktermination || gcBlackenPromptly { 1030 // Ps aren't allowed to cache work during mark 1031 // termination. 1032 gcw.dispose() 1033 } 1034 } 1035 } 1036 1037 // obj is the start of an object with mark mbits. 1038 // If it isn't already marked, mark it and enqueue into gcw. 1039 // base and off are for debugging only and could be removed. 1040 //go:nowritebarrierrec 1041 func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork) { 1042 // obj should be start of allocation, and so must be at least pointer-aligned. 1043 if obj&(sys.PtrSize-1) != 0 { 1044 throw("greyobject: obj not pointer-aligned") 1045 } 1046 1047 if useCheckmark { 1048 if !hbits.isMarked() { 1049 printlock() 1050 print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n") 1051 print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") 1052 1053 // Dump the source (base) object 1054 gcDumpObject("base", base, off) 1055 1056 // Dump the object 1057 gcDumpObject("obj", obj, ^uintptr(0)) 1058 1059 throw("checkmark found unmarked object") 1060 } 1061 if hbits.isCheckmarked(span.elemsize) { 1062 return 1063 } 1064 hbits.setCheckmarked(span.elemsize) 1065 if !hbits.isCheckmarked(span.elemsize) { 1066 throw("setCheckmarked and isCheckmarked disagree") 1067 } 1068 } else { 1069 // If marked we have nothing to do. 1070 if hbits.isMarked() { 1071 return 1072 } 1073 hbits.setMarked() 1074 1075 // If this is a noscan object, fast-track it to black 1076 // instead of greying it. 1077 if !hbits.hasPointers(span.elemsize) { 1078 gcw.bytesMarked += uint64(span.elemsize) 1079 return 1080 } 1081 } 1082 1083 // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but 1084 // seems like a nice optimization that can be added back in. 1085 // There needs to be time between the PREFETCH and the use. 1086 // Previously we put the obj in an 8 element buffer that is drained at a rate 1087 // to give the PREFETCH time to do its work. 1088 // Use of PREFETCHNTA might be more appropriate than PREFETCH 1089 1090 gcw.put(obj) 1091 } 1092 1093 // gcDumpObject dumps the contents of obj for debugging and marks the 1094 // field at byte offset off in obj. 1095 func gcDumpObject(label string, obj, off uintptr) { 1096 if obj < mheap_.arena_start || obj >= mheap_.arena_used { 1097 print(label, "=", hex(obj), " is not in the Go heap\n") 1098 return 1099 } 1100 k := obj >> _PageShift 1101 x := k 1102 x -= mheap_.arena_start >> _PageShift 1103 s := h_spans[x] 1104 print(label, "=", hex(obj), " k=", hex(k)) 1105 if s == nil { 1106 print(" s=nil\n") 1107 return 1108 } 1109 print(" s.start*_PageSize=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n") 1110 skipped := false 1111 for i := uintptr(0); i < s.elemsize; i += sys.PtrSize { 1112 // For big objects, just print the beginning (because 1113 // that usually hints at the object's type) and the 1114 // fields around off. 1115 if !(i < 128*sys.PtrSize || off-16*sys.PtrSize < i && i < off+16*sys.PtrSize) { 1116 skipped = true 1117 continue 1118 } 1119 if skipped { 1120 print(" ...\n") 1121 skipped = false 1122 } 1123 print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i))))) 1124 if i == off { 1125 print(" <==") 1126 } 1127 print("\n") 1128 } 1129 if skipped { 1130 print(" ...\n") 1131 } 1132 } 1133 1134 // If gcBlackenPromptly is true we are in the second mark phase phase so we allocate black. 1135 //go:nowritebarrier 1136 func gcmarknewobject_m(obj, size uintptr) { 1137 if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen. 1138 throw("gcmarknewobject called while doing checkmark") 1139 } 1140 heapBitsForAddr(obj).setMarked() 1141 atomic.Xadd64(&work.bytesMarked, int64(size)) 1142 } 1143 1144 // Checkmarking 1145 1146 // To help debug the concurrent GC we remark with the world 1147 // stopped ensuring that any object encountered has their normal 1148 // mark bit set. To do this we use an orthogonal bit 1149 // pattern to indicate the object is marked. The following pattern 1150 // uses the upper two bits in the object's boundary nibble. 1151 // 01: scalar not marked 1152 // 10: pointer not marked 1153 // 11: pointer marked 1154 // 00: scalar marked 1155 // Xoring with 01 will flip the pattern from marked to unmarked and vica versa. 1156 // The higher bit is 1 for pointers and 0 for scalars, whether the object 1157 // is marked or not. 1158 // The first nibble no longer holds the typeDead pattern indicating that the 1159 // there are no more pointers in the object. This information is held 1160 // in the second nibble. 1161 1162 // If useCheckmark is true, marking of an object uses the 1163 // checkmark bits (encoding above) instead of the standard 1164 // mark bits. 1165 var useCheckmark = false 1166 1167 //go:nowritebarrier 1168 func initCheckmarks() { 1169 useCheckmark = true 1170 for _, s := range work.spans { 1171 if s.state == _MSpanInUse { 1172 heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout()) 1173 } 1174 } 1175 } 1176 1177 func clearCheckmarks() { 1178 useCheckmark = false 1179 for _, s := range work.spans { 1180 if s.state == _MSpanInUse { 1181 heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout()) 1182 } 1183 } 1184 }