github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/runtime/mgcmark.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector: marking and scanning 6 7 package runtime 8 9 import ( 10 "runtime/internal/atomic" 11 "runtime/internal/sys" 12 "unsafe" 13 ) 14 15 const ( 16 fixedRootFinalizers = iota 17 fixedRootFreeGStacks 18 fixedRootCount 19 20 // rootBlockBytes is the number of bytes to scan per data or 21 // BSS root. 22 rootBlockBytes = 256 << 10 23 24 // maxObletBytes is the maximum bytes of an object to scan at 25 // once. Larger objects will be split up into "oblets" of at 26 // most this size. Since we can scan 1–2 MB/ms, 128 KB bounds 27 // scan preemption at ~100 µs. 28 // 29 // This must be > _MaxSmallSize so that the object base is the 30 // span base. 31 maxObletBytes = 128 << 10 32 33 // drainCheckThreshold specifies how many units of work to do 34 // between self-preemption checks in gcDrain. Assuming a scan 35 // rate of 1 MB/ms, this is ~100 µs. Lower values have higher 36 // overhead in the scan loop (the scheduler check may perform 37 // a syscall, so its overhead is nontrivial). Higher values 38 // make the system less responsive to incoming work. 39 drainCheckThreshold = 100000 40 41 // pagesPerSpanRoot indicates how many pages to scan from a span root 42 // at a time. Used by special root marking. 43 // 44 // Higher values improve throughput by increasing locality, but 45 // increase the minimum latency of a marking operation. 46 // 47 // Must be a multiple of the pageInUse bitmap element size and 48 // must also evenly divide pagesPerArena. 49 pagesPerSpanRoot = 512 50 ) 51 52 // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and 53 // some miscellany) and initializes scanning-related state. 54 // 55 // The world must be stopped. 56 func gcMarkRootPrepare() { 57 assertWorldStopped() 58 59 // Compute how many data and BSS root blocks there are. 60 nBlocks := func(bytes uintptr) int { 61 return int(divRoundUp(bytes, rootBlockBytes)) 62 } 63 64 work.nDataRoots = 0 65 work.nBSSRoots = 0 66 67 // Scan globals. 68 for _, datap := range activeModules() { 69 nDataRoots := nBlocks(datap.edata - datap.data) 70 if nDataRoots > work.nDataRoots { 71 work.nDataRoots = nDataRoots 72 } 73 } 74 75 for _, datap := range activeModules() { 76 nBSSRoots := nBlocks(datap.ebss - datap.bss) 77 if nBSSRoots > work.nBSSRoots { 78 work.nBSSRoots = nBSSRoots 79 } 80 } 81 82 // Scan span roots for finalizer specials. 83 // 84 // We depend on addfinalizer to mark objects that get 85 // finalizers after root marking. 86 // 87 // We're going to scan the whole heap (that was available at the time the 88 // mark phase started, i.e. markArenas) for in-use spans which have specials. 89 // 90 // Break up the work into arenas, and further into chunks. 91 // 92 // Snapshot allArenas as markArenas. This snapshot is safe because allArenas 93 // is append-only. 94 mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)] 95 work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot) 96 97 // Scan stacks. 98 // 99 // Gs may be created after this point, but it's okay that we 100 // ignore them because they begin life without any roots, so 101 // there's nothing to scan, and any roots they create during 102 // the concurrent phase will be caught by the write barrier. 103 work.nStackRoots = int(atomic.Loaduintptr(&allglen)) 104 105 work.markrootNext = 0 106 work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots) 107 108 // Calculate base indexes of each root type 109 work.baseData = uint32(fixedRootCount) 110 work.baseBSS = work.baseData + uint32(work.nDataRoots) 111 work.baseSpans = work.baseBSS + uint32(work.nBSSRoots) 112 work.baseStacks = work.baseSpans + uint32(work.nSpanRoots) 113 work.baseEnd = work.baseStacks + uint32(work.nStackRoots) 114 } 115 116 // gcMarkRootCheck checks that all roots have been scanned. It is 117 // purely for debugging. 118 func gcMarkRootCheck() { 119 if work.markrootNext < work.markrootJobs { 120 print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n") 121 throw("left over markroot jobs") 122 } 123 124 // Check that stacks have been scanned. 125 // 126 // We only check the first nStackRoots Gs that we should have scanned. 127 // Since we don't care about newer Gs (see comment in 128 // gcMarkRootPrepare), no locking is required. 129 i := 0 130 forEachGRace(func(gp *g) { 131 if i >= work.nStackRoots { 132 return 133 } 134 135 if !gp.gcscandone { 136 println("gp", gp, "goid", gp.goid, 137 "status", readgstatus(gp), 138 "gcscandone", gp.gcscandone) 139 throw("scan missed a g") 140 } 141 142 i++ 143 }) 144 } 145 146 // ptrmask for an allocation containing a single pointer. 147 var oneptrmask = [...]uint8{1} 148 149 // markroot scans the i'th root. 150 // 151 // Preemption must be disabled (because this uses a gcWork). 152 // 153 // nowritebarrier is only advisory here. 154 // 155 //go:nowritebarrier 156 func markroot(gcw *gcWork, i uint32) { 157 // Note: if you add a case here, please also update heapdump.go:dumproots. 158 switch { 159 case work.baseData <= i && i < work.baseBSS: 160 for _, datap := range activeModules() { 161 markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-work.baseData)) 162 } 163 164 case work.baseBSS <= i && i < work.baseSpans: 165 for _, datap := range activeModules() { 166 markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-work.baseBSS)) 167 } 168 169 case i == fixedRootFinalizers: 170 for fb := allfin; fb != nil; fb = fb.alllink { 171 cnt := uintptr(atomic.Load(&fb.cnt)) 172 scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw, nil) 173 } 174 175 case i == fixedRootFreeGStacks: 176 // Switch to the system stack so we can call 177 // stackfree. 178 systemstack(markrootFreeGStacks) 179 180 case work.baseSpans <= i && i < work.baseStacks: 181 // mark mspan.specials 182 markrootSpans(gcw, int(i-work.baseSpans)) 183 184 default: 185 // the rest is scanning goroutine stacks 186 var gp *g 187 if work.baseStacks <= i && i < work.baseEnd { 188 // N.B. Atomic read of allglen in gcMarkRootPrepare 189 // acts as a barrier to ensure that allgs must be large 190 // enough to contain all relevant Gs. 191 gp = allgs[i-work.baseStacks] 192 } else { 193 throw("markroot: bad index") 194 } 195 196 // remember when we've first observed the G blocked 197 // needed only to output in traceback 198 status := readgstatus(gp) // We are not in a scan state 199 if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 { 200 gp.waitsince = work.tstart 201 } 202 203 // scanstack must be done on the system stack in case 204 // we're trying to scan our own stack. 205 systemstack(func() { 206 // If this is a self-scan, put the user G in 207 // _Gwaiting to prevent self-deadlock. It may 208 // already be in _Gwaiting if this is a mark 209 // worker or we're in mark termination. 210 userG := getg().m.curg 211 selfScan := gp == userG && readgstatus(userG) == _Grunning 212 if selfScan { 213 casgstatus(userG, _Grunning, _Gwaiting) 214 userG.waitreason = waitReasonGarbageCollectionScan 215 } 216 217 // TODO: suspendG blocks (and spins) until gp 218 // stops, which may take a while for 219 // running goroutines. Consider doing this in 220 // two phases where the first is non-blocking: 221 // we scan the stacks we can and ask running 222 // goroutines to scan themselves; and the 223 // second blocks. 224 stopped := suspendG(gp) 225 if stopped.dead { 226 gp.gcscandone = true 227 return 228 } 229 if gp.gcscandone { 230 throw("g already scanned") 231 } 232 scanstack(gp, gcw) 233 gp.gcscandone = true 234 resumeG(stopped) 235 236 if selfScan { 237 casgstatus(userG, _Gwaiting, _Grunning) 238 } 239 }) 240 } 241 } 242 243 // markrootBlock scans the shard'th shard of the block of memory [b0, 244 // b0+n0), with the given pointer mask. 245 // 246 //go:nowritebarrier 247 func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) { 248 if rootBlockBytes%(8*sys.PtrSize) != 0 { 249 // This is necessary to pick byte offsets in ptrmask0. 250 throw("rootBlockBytes must be a multiple of 8*ptrSize") 251 } 252 253 // Note that if b0 is toward the end of the address space, 254 // then b0 + rootBlockBytes might wrap around. 255 // These tests are written to avoid any possible overflow. 256 off := uintptr(shard) * rootBlockBytes 257 if off >= n0 { 258 return 259 } 260 b := b0 + off 261 ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*sys.PtrSize)))) 262 n := uintptr(rootBlockBytes) 263 if off+n > n0 { 264 n = n0 - off 265 } 266 267 // Scan this shard. 268 scanblock(b, n, ptrmask, gcw, nil) 269 } 270 271 // markrootFreeGStacks frees stacks of dead Gs. 272 // 273 // This does not free stacks of dead Gs cached on Ps, but having a few 274 // cached stacks around isn't a problem. 275 func markrootFreeGStacks() { 276 // Take list of dead Gs with stacks. 277 lock(&sched.gFree.lock) 278 list := sched.gFree.stack 279 sched.gFree.stack = gList{} 280 unlock(&sched.gFree.lock) 281 if list.empty() { 282 return 283 } 284 285 // Free stacks. 286 q := gQueue{list.head, list.head} 287 for gp := list.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { 288 stackfree(gp.stack) 289 gp.stack.lo = 0 290 gp.stack.hi = 0 291 // Manipulate the queue directly since the Gs are 292 // already all linked the right way. 293 q.tail.set(gp) 294 } 295 296 // Put Gs back on the free list. 297 lock(&sched.gFree.lock) 298 sched.gFree.noStack.pushAll(q) 299 unlock(&sched.gFree.lock) 300 } 301 302 // markrootSpans marks roots for one shard of markArenas. 303 // 304 //go:nowritebarrier 305 func markrootSpans(gcw *gcWork, shard int) { 306 // Objects with finalizers have two GC-related invariants: 307 // 308 // 1) Everything reachable from the object must be marked. 309 // This ensures that when we pass the object to its finalizer, 310 // everything the finalizer can reach will be retained. 311 // 312 // 2) Finalizer specials (which are not in the garbage 313 // collected heap) are roots. In practice, this means the fn 314 // field must be scanned. 315 sg := mheap_.sweepgen 316 317 // Find the arena and page index into that arena for this shard. 318 ai := mheap_.markArenas[shard/(pagesPerArena/pagesPerSpanRoot)] 319 ha := mheap_.arenas[ai.l1()][ai.l2()] 320 arenaPage := uint(uintptr(shard) * pagesPerSpanRoot % pagesPerArena) 321 322 // Construct slice of bitmap which we'll iterate over. 323 specialsbits := ha.pageSpecials[arenaPage/8:] 324 specialsbits = specialsbits[:pagesPerSpanRoot/8] 325 for i := range specialsbits { 326 // Find set bits, which correspond to spans with specials. 327 specials := atomic.Load8(&specialsbits[i]) 328 if specials == 0 { 329 continue 330 } 331 for j := uint(0); j < 8; j++ { 332 if specials&(1<<j) == 0 { 333 continue 334 } 335 // Find the span for this bit. 336 // 337 // This value is guaranteed to be non-nil because having 338 // specials implies that the span is in-use, and since we're 339 // currently marking we can be sure that we don't have to worry 340 // about the span being freed and re-used. 341 s := ha.spans[arenaPage+uint(i)*8+j] 342 343 // The state must be mSpanInUse if the specials bit is set, so 344 // sanity check that. 345 if state := s.state.get(); state != mSpanInUse { 346 print("s.state = ", state, "\n") 347 throw("non in-use span found with specials bit set") 348 } 349 // Check that this span was swept (it may be cached or uncached). 350 if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) { 351 // sweepgen was updated (+2) during non-checkmark GC pass 352 print("sweep ", s.sweepgen, " ", sg, "\n") 353 throw("gc: unswept span") 354 } 355 356 // Lock the specials to prevent a special from being 357 // removed from the list while we're traversing it. 358 lock(&s.speciallock) 359 for sp := s.specials; sp != nil; sp = sp.next { 360 if sp.kind != _KindSpecialFinalizer { 361 continue 362 } 363 // don't mark finalized object, but scan it so we 364 // retain everything it points to. 365 spf := (*specialfinalizer)(unsafe.Pointer(sp)) 366 // A finalizer can be set for an inner byte of an object, find object beginning. 367 p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize 368 369 // Mark everything that can be reached from 370 // the object (but *not* the object itself or 371 // we'll never collect it). 372 scanobject(p, gcw) 373 374 // The special itself is a root. 375 scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil) 376 } 377 unlock(&s.speciallock) 378 } 379 } 380 } 381 382 // gcAssistAlloc performs GC work to make gp's assist debt positive. 383 // gp must be the calling user gorountine. 384 // 385 // This must be called with preemption enabled. 386 func gcAssistAlloc(gp *g) { 387 // Don't assist in non-preemptible contexts. These are 388 // generally fragile and won't allow the assist to block. 389 if getg() == gp.m.g0 { 390 return 391 } 392 if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" { 393 return 394 } 395 396 traced := false 397 retry: 398 // Compute the amount of scan work we need to do to make the 399 // balance positive. When the required amount of work is low, 400 // we over-assist to build up credit for future allocations 401 // and amortize the cost of assisting. 402 assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte)) 403 assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork)) 404 debtBytes := -gp.gcAssistBytes 405 scanWork := int64(assistWorkPerByte * float64(debtBytes)) 406 if scanWork < gcOverAssistWork { 407 scanWork = gcOverAssistWork 408 debtBytes = int64(assistBytesPerWork * float64(scanWork)) 409 } 410 411 // Steal as much credit as we can from the background GC's 412 // scan credit. This is racy and may drop the background 413 // credit below 0 if two mutators steal at the same time. This 414 // will just cause steals to fail until credit is accumulated 415 // again, so in the long run it doesn't really matter, but we 416 // do have to handle the negative credit case. 417 bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit) 418 stolen := int64(0) 419 if bgScanCredit > 0 { 420 if bgScanCredit < scanWork { 421 stolen = bgScanCredit 422 gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(stolen)) 423 } else { 424 stolen = scanWork 425 gp.gcAssistBytes += debtBytes 426 } 427 atomic.Xaddint64(&gcController.bgScanCredit, -stolen) 428 429 scanWork -= stolen 430 431 if scanWork == 0 { 432 // We were able to steal all of the credit we 433 // needed. 434 if traced { 435 traceGCMarkAssistDone() 436 } 437 return 438 } 439 } 440 441 if trace.enabled && !traced { 442 traced = true 443 traceGCMarkAssistStart() 444 } 445 446 // Perform assist work 447 systemstack(func() { 448 gcAssistAlloc1(gp, scanWork) 449 // The user stack may have moved, so this can't touch 450 // anything on it until it returns from systemstack. 451 }) 452 453 completed := gp.param != nil 454 gp.param = nil 455 if completed { 456 gcMarkDone() 457 } 458 459 if gp.gcAssistBytes < 0 { 460 // We were unable steal enough credit or perform 461 // enough work to pay off the assist debt. We need to 462 // do one of these before letting the mutator allocate 463 // more to prevent over-allocation. 464 // 465 // If this is because we were preempted, reschedule 466 // and try some more. 467 if gp.preempt { 468 Gosched() 469 goto retry 470 } 471 472 // Add this G to an assist queue and park. When the GC 473 // has more background credit, it will satisfy queued 474 // assists before flushing to the global credit pool. 475 // 476 // Note that this does *not* get woken up when more 477 // work is added to the work list. The theory is that 478 // there wasn't enough work to do anyway, so we might 479 // as well let background marking take care of the 480 // work that is available. 481 if !gcParkAssist() { 482 goto retry 483 } 484 485 // At this point either background GC has satisfied 486 // this G's assist debt, or the GC cycle is over. 487 } 488 if traced { 489 traceGCMarkAssistDone() 490 } 491 } 492 493 // gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system 494 // stack. This is a separate function to make it easier to see that 495 // we're not capturing anything from the user stack, since the user 496 // stack may move while we're in this function. 497 // 498 // gcAssistAlloc1 indicates whether this assist completed the mark 499 // phase by setting gp.param to non-nil. This can't be communicated on 500 // the stack since it may move. 501 // 502 //go:systemstack 503 func gcAssistAlloc1(gp *g, scanWork int64) { 504 // Clear the flag indicating that this assist completed the 505 // mark phase. 506 gp.param = nil 507 508 if atomic.Load(&gcBlackenEnabled) == 0 { 509 // The gcBlackenEnabled check in malloc races with the 510 // store that clears it but an atomic check in every malloc 511 // would be a performance hit. 512 // Instead we recheck it here on the non-preemptable system 513 // stack to determine if we should perform an assist. 514 515 // GC is done, so ignore any remaining debt. 516 gp.gcAssistBytes = 0 517 return 518 } 519 // Track time spent in this assist. Since we're on the 520 // system stack, this is non-preemptible, so we can 521 // just measure start and end time. 522 startTime := nanotime() 523 524 decnwait := atomic.Xadd(&work.nwait, -1) 525 if decnwait == work.nproc { 526 println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc) 527 throw("nwait > work.nprocs") 528 } 529 530 // gcDrainN requires the caller to be preemptible. 531 casgstatus(gp, _Grunning, _Gwaiting) 532 gp.waitreason = waitReasonGCAssistMarking 533 534 // drain own cached work first in the hopes that it 535 // will be more cache friendly. 536 gcw := &getg().m.p.ptr().gcw 537 workDone := gcDrainN(gcw, scanWork) 538 539 casgstatus(gp, _Gwaiting, _Grunning) 540 541 // Record that we did this much scan work. 542 // 543 // Back out the number of bytes of assist credit that 544 // this scan work counts for. The "1+" is a poor man's 545 // round-up, to ensure this adds credit even if 546 // assistBytesPerWork is very low. 547 assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork)) 548 gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(workDone)) 549 550 // If this is the last worker and we ran out of work, 551 // signal a completion point. 552 incnwait := atomic.Xadd(&work.nwait, +1) 553 if incnwait > work.nproc { 554 println("runtime: work.nwait=", incnwait, 555 "work.nproc=", work.nproc) 556 throw("work.nwait > work.nproc") 557 } 558 559 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { 560 // This has reached a background completion point. Set 561 // gp.param to a non-nil value to indicate this. It 562 // doesn't matter what we set it to (it just has to be 563 // a valid pointer). 564 gp.param = unsafe.Pointer(gp) 565 } 566 duration := nanotime() - startTime 567 _p_ := gp.m.p.ptr() 568 _p_.gcAssistTime += duration 569 if _p_.gcAssistTime > gcAssistTimeSlack { 570 atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime) 571 _p_.gcAssistTime = 0 572 } 573 } 574 575 // gcWakeAllAssists wakes all currently blocked assists. This is used 576 // at the end of a GC cycle. gcBlackenEnabled must be false to prevent 577 // new assists from going to sleep after this point. 578 func gcWakeAllAssists() { 579 lock(&work.assistQueue.lock) 580 list := work.assistQueue.q.popList() 581 injectglist(&list) 582 unlock(&work.assistQueue.lock) 583 } 584 585 // gcParkAssist puts the current goroutine on the assist queue and parks. 586 // 587 // gcParkAssist reports whether the assist is now satisfied. If it 588 // returns false, the caller must retry the assist. 589 // 590 //go:nowritebarrier 591 func gcParkAssist() bool { 592 lock(&work.assistQueue.lock) 593 // If the GC cycle finished while we were getting the lock, 594 // exit the assist. The cycle can't finish while we hold the 595 // lock. 596 if atomic.Load(&gcBlackenEnabled) == 0 { 597 unlock(&work.assistQueue.lock) 598 return true 599 } 600 601 gp := getg() 602 oldList := work.assistQueue.q 603 work.assistQueue.q.pushBack(gp) 604 605 // Recheck for background credit now that this G is in 606 // the queue, but can still back out. This avoids a 607 // race in case background marking has flushed more 608 // credit since we checked above. 609 if atomic.Loadint64(&gcController.bgScanCredit) > 0 { 610 work.assistQueue.q = oldList 611 if oldList.tail != 0 { 612 oldList.tail.ptr().schedlink.set(nil) 613 } 614 unlock(&work.assistQueue.lock) 615 return false 616 } 617 // Park. 618 goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2) 619 return true 620 } 621 622 // gcFlushBgCredit flushes scanWork units of background scan work 623 // credit. This first satisfies blocked assists on the 624 // work.assistQueue and then flushes any remaining credit to 625 // gcController.bgScanCredit. 626 // 627 // Write barriers are disallowed because this is used by gcDrain after 628 // it has ensured that all work is drained and this must preserve that 629 // condition. 630 // 631 //go:nowritebarrierrec 632 func gcFlushBgCredit(scanWork int64) { 633 if work.assistQueue.q.empty() { 634 // Fast path; there are no blocked assists. There's a 635 // small window here where an assist may add itself to 636 // the blocked queue and park. If that happens, we'll 637 // just get it on the next flush. 638 atomic.Xaddint64(&gcController.bgScanCredit, scanWork) 639 return 640 } 641 642 assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork)) 643 scanBytes := int64(float64(scanWork) * assistBytesPerWork) 644 645 lock(&work.assistQueue.lock) 646 for !work.assistQueue.q.empty() && scanBytes > 0 { 647 gp := work.assistQueue.q.pop() 648 // Note that gp.gcAssistBytes is negative because gp 649 // is in debt. Think carefully about the signs below. 650 if scanBytes+gp.gcAssistBytes >= 0 { 651 // Satisfy this entire assist debt. 652 scanBytes += gp.gcAssistBytes 653 gp.gcAssistBytes = 0 654 // It's important that we *not* put gp in 655 // runnext. Otherwise, it's possible for user 656 // code to exploit the GC worker's high 657 // scheduler priority to get itself always run 658 // before other goroutines and always in the 659 // fresh quantum started by GC. 660 ready(gp, 0, false) 661 } else { 662 // Partially satisfy this assist. 663 gp.gcAssistBytes += scanBytes 664 scanBytes = 0 665 // As a heuristic, we move this assist to the 666 // back of the queue so that large assists 667 // can't clog up the assist queue and 668 // substantially delay small assists. 669 work.assistQueue.q.pushBack(gp) 670 break 671 } 672 } 673 674 if scanBytes > 0 { 675 // Convert from scan bytes back to work. 676 assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte)) 677 scanWork = int64(float64(scanBytes) * assistWorkPerByte) 678 atomic.Xaddint64(&gcController.bgScanCredit, scanWork) 679 } 680 unlock(&work.assistQueue.lock) 681 } 682 683 // scanstack scans gp's stack, greying all pointers found on the stack. 684 // 685 // scanstack will also shrink the stack if it is safe to do so. If it 686 // is not, it schedules a stack shrink for the next synchronous safe 687 // point. 688 // 689 // scanstack is marked go:systemstack because it must not be preempted 690 // while using a workbuf. 691 // 692 //go:nowritebarrier 693 //go:systemstack 694 func scanstack(gp *g, gcw *gcWork) { 695 if readgstatus(gp)&_Gscan == 0 { 696 print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n") 697 throw("scanstack - bad status") 698 } 699 700 switch readgstatus(gp) &^ _Gscan { 701 default: 702 print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 703 throw("mark - bad status") 704 case _Gdead: 705 return 706 case _Grunning: 707 print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 708 throw("scanstack: goroutine not stopped") 709 case _Grunnable, _Gsyscall, _Gwaiting: 710 // ok 711 } 712 713 if gp == getg() { 714 throw("can't scan our own stack") 715 } 716 717 if isShrinkStackSafe(gp) { 718 // Shrink the stack if not much of it is being used. 719 shrinkstack(gp) 720 } else { 721 // Otherwise, shrink the stack at the next sync safe point. 722 gp.preemptShrink = true 723 } 724 725 var state stackScanState 726 state.stack = gp.stack 727 728 if stackTraceDebug { 729 println("stack trace goroutine", gp.goid) 730 } 731 732 if debugScanConservative && gp.asyncSafePoint { 733 print("scanning async preempted goroutine ", gp.goid, " stack [", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n") 734 } 735 736 // Scan the saved context register. This is effectively a live 737 // register that gets moved back and forth between the 738 // register and sched.ctxt without a write barrier. 739 if gp.sched.ctxt != nil { 740 scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw, &state) 741 } 742 743 // Scan the stack. Accumulate a list of stack objects. 744 scanframe := func(frame *stkframe, unused unsafe.Pointer) bool { 745 scanframeworker(frame, &state, gcw) 746 return true 747 } 748 gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0) 749 750 // Find additional pointers that point into the stack from the heap. 751 // Currently this includes defers and panics. See also function copystack. 752 753 // Find and trace all defer arguments. 754 tracebackdefers(gp, scanframe, nil) 755 756 // Find and trace other pointers in defer records. 757 for d := gp._defer; d != nil; d = d.link { 758 if d.fn != nil { 759 // tracebackdefers above does not scan the func value, which could 760 // be a stack allocated closure. See issue 30453. 761 scanblock(uintptr(unsafe.Pointer(&d.fn)), sys.PtrSize, &oneptrmask[0], gcw, &state) 762 } 763 if d.link != nil { 764 // The link field of a stack-allocated defer record might point 765 // to a heap-allocated defer record. Keep that heap record live. 766 scanblock(uintptr(unsafe.Pointer(&d.link)), sys.PtrSize, &oneptrmask[0], gcw, &state) 767 } 768 // Retain defers records themselves. 769 // Defer records might not be reachable from the G through regular heap 770 // tracing because the defer linked list might weave between the stack and the heap. 771 if d.heap { 772 scanblock(uintptr(unsafe.Pointer(&d)), sys.PtrSize, &oneptrmask[0], gcw, &state) 773 } 774 } 775 if gp._panic != nil { 776 // Panics are always stack allocated. 777 state.putPtr(uintptr(unsafe.Pointer(gp._panic)), false) 778 } 779 780 // Find and scan all reachable stack objects. 781 // 782 // The state's pointer queue prioritizes precise pointers over 783 // conservative pointers so that we'll prefer scanning stack 784 // objects precisely. 785 state.buildIndex() 786 for { 787 p, conservative := state.getPtr() 788 if p == 0 { 789 break 790 } 791 obj := state.findObject(p) 792 if obj == nil { 793 continue 794 } 795 r := obj.r 796 if r == nil { 797 // We've already scanned this object. 798 continue 799 } 800 obj.setRecord(nil) // Don't scan it again. 801 if stackTraceDebug { 802 printlock() 803 print(" live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of size", obj.size) 804 if conservative { 805 print(" (conservative)") 806 } 807 println() 808 printunlock() 809 } 810 gcdata := r.gcdata 811 var s *mspan 812 if r.useGCProg() { 813 // This path is pretty unlikely, an object large enough 814 // to have a GC program allocated on the stack. 815 // We need some space to unpack the program into a straight 816 // bitmask, which we allocate/free here. 817 // TODO: it would be nice if there were a way to run a GC 818 // program without having to store all its bits. We'd have 819 // to change from a Lempel-Ziv style program to something else. 820 // Or we can forbid putting objects on stacks if they require 821 // a gc program (see issue 27447). 822 s = materializeGCProg(r.ptrdata(), gcdata) 823 gcdata = (*byte)(unsafe.Pointer(s.startAddr)) 824 } 825 826 b := state.stack.lo + uintptr(obj.off) 827 if conservative { 828 scanConservative(b, r.ptrdata(), gcdata, gcw, &state) 829 } else { 830 scanblock(b, r.ptrdata(), gcdata, gcw, &state) 831 } 832 833 if s != nil { 834 dematerializeGCProg(s) 835 } 836 } 837 838 // Deallocate object buffers. 839 // (Pointer buffers were all deallocated in the loop above.) 840 for state.head != nil { 841 x := state.head 842 state.head = x.next 843 if stackTraceDebug { 844 for i := 0; i < x.nobj; i++ { 845 obj := &x.obj[i] 846 if obj.r == nil { // reachable 847 continue 848 } 849 println(" dead stkobj at", hex(gp.stack.lo+uintptr(obj.off)), "of size", obj.r.size) 850 // Note: not necessarily really dead - only reachable-from-ptr dead. 851 } 852 } 853 x.nobj = 0 854 putempty((*workbuf)(unsafe.Pointer(x))) 855 } 856 if state.buf != nil || state.cbuf != nil || state.freeBuf != nil { 857 throw("remaining pointer buffers") 858 } 859 } 860 861 // Scan a stack frame: local variables and function arguments/results. 862 //go:nowritebarrier 863 func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) { 864 if _DebugGC > 1 && frame.continpc != 0 { 865 print("scanframe ", funcname(frame.fn), "\n") 866 } 867 868 isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == funcID_asyncPreempt 869 isDebugCall := frame.fn.valid() && frame.fn.funcID == funcID_debugCallV2 870 if state.conservative || isAsyncPreempt || isDebugCall { 871 if debugScanConservative { 872 println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc)) 873 } 874 875 // Conservatively scan the frame. Unlike the precise 876 // case, this includes the outgoing argument space 877 // since we may have stopped while this function was 878 // setting up a call. 879 // 880 // TODO: We could narrow this down if the compiler 881 // produced a single map per function of stack slots 882 // and registers that ever contain a pointer. 883 if frame.varp != 0 { 884 size := frame.varp - frame.sp 885 if size > 0 { 886 scanConservative(frame.sp, size, nil, gcw, state) 887 } 888 } 889 890 // Scan arguments to this frame. 891 if frame.arglen != 0 { 892 // TODO: We could pass the entry argument map 893 // to narrow this down further. 894 scanConservative(frame.argp, frame.arglen, nil, gcw, state) 895 } 896 897 if isAsyncPreempt || isDebugCall { 898 // This function's frame contained the 899 // registers for the asynchronously stopped 900 // parent frame. Scan the parent 901 // conservatively. 902 state.conservative = true 903 } else { 904 // We only wanted to scan those two frames 905 // conservatively. Clear the flag for future 906 // frames. 907 state.conservative = false 908 } 909 return 910 } 911 912 locals, args, objs := getStackMap(frame, &state.cache, false) 913 914 // Scan local variables if stack frame has been allocated. 915 if locals.n > 0 { 916 size := uintptr(locals.n) * sys.PtrSize 917 scanblock(frame.varp-size, size, locals.bytedata, gcw, state) 918 } 919 920 // Scan arguments. 921 if args.n > 0 { 922 scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw, state) 923 } 924 925 // Add all stack objects to the stack object list. 926 if frame.varp != 0 { 927 // varp is 0 for defers, where there are no locals. 928 // In that case, there can't be a pointer to its args, either. 929 // (And all args would be scanned above anyway.) 930 for i, obj := range objs { 931 off := obj.off 932 base := frame.varp // locals base pointer 933 if off >= 0 { 934 base = frame.argp // arguments and return values base pointer 935 } 936 ptr := base + uintptr(off) 937 if ptr < frame.sp { 938 // object hasn't been allocated in the frame yet. 939 continue 940 } 941 if stackTraceDebug { 942 println("stkobj at", hex(ptr), "of size", obj.size) 943 } 944 state.addObject(ptr, &objs[i]) 945 } 946 } 947 } 948 949 type gcDrainFlags int 950 951 const ( 952 gcDrainUntilPreempt gcDrainFlags = 1 << iota 953 gcDrainFlushBgCredit 954 gcDrainIdle 955 gcDrainFractional 956 ) 957 958 // gcDrain scans roots and objects in work buffers, blackening grey 959 // objects until it is unable to get more work. It may return before 960 // GC is done; it's the caller's responsibility to balance work from 961 // other Ps. 962 // 963 // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt 964 // is set. 965 // 966 // If flags&gcDrainIdle != 0, gcDrain returns when there is other work 967 // to do. 968 // 969 // If flags&gcDrainFractional != 0, gcDrain self-preempts when 970 // pollFractionalWorkerExit() returns true. This implies 971 // gcDrainNoBlock. 972 // 973 // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work 974 // credit to gcController.bgScanCredit every gcCreditSlack units of 975 // scan work. 976 // 977 // gcDrain will always return if there is a pending STW. 978 // 979 //go:nowritebarrier 980 func gcDrain(gcw *gcWork, flags gcDrainFlags) { 981 if !writeBarrier.needed { 982 throw("gcDrain phase incorrect") 983 } 984 985 gp := getg().m.curg 986 preemptible := flags&gcDrainUntilPreempt != 0 987 flushBgCredit := flags&gcDrainFlushBgCredit != 0 988 idle := flags&gcDrainIdle != 0 989 990 initScanWork := gcw.scanWork 991 992 // checkWork is the scan work before performing the next 993 // self-preempt check. 994 checkWork := int64(1<<63 - 1) 995 var check func() bool 996 if flags&(gcDrainIdle|gcDrainFractional) != 0 { 997 checkWork = initScanWork + drainCheckThreshold 998 if idle { 999 check = pollWork 1000 } else if flags&gcDrainFractional != 0 { 1001 check = pollFractionalWorkerExit 1002 } 1003 } 1004 1005 // Drain root marking jobs. 1006 if work.markrootNext < work.markrootJobs { 1007 // Stop if we're preemptible or if someone wants to STW. 1008 for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) { 1009 job := atomic.Xadd(&work.markrootNext, +1) - 1 1010 if job >= work.markrootJobs { 1011 break 1012 } 1013 markroot(gcw, job) 1014 if check != nil && check() { 1015 goto done 1016 } 1017 } 1018 } 1019 1020 // Drain heap marking jobs. 1021 // Stop if we're preemptible or if someone wants to STW. 1022 for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) { 1023 // Try to keep work available on the global queue. We used to 1024 // check if there were waiting workers, but it's better to 1025 // just keep work available than to make workers wait. In the 1026 // worst case, we'll do O(log(_WorkbufSize)) unnecessary 1027 // balances. 1028 if work.full == 0 { 1029 gcw.balance() 1030 } 1031 1032 b := gcw.tryGetFast() 1033 if b == 0 { 1034 b = gcw.tryGet() 1035 if b == 0 { 1036 // Flush the write barrier 1037 // buffer; this may create 1038 // more work. 1039 wbBufFlush(nil, 0) 1040 b = gcw.tryGet() 1041 } 1042 } 1043 if b == 0 { 1044 // Unable to get work. 1045 break 1046 } 1047 scanobject(b, gcw) 1048 1049 // Flush background scan work credit to the global 1050 // account if we've accumulated enough locally so 1051 // mutator assists can draw on it. 1052 if gcw.scanWork >= gcCreditSlack { 1053 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) 1054 if flushBgCredit { 1055 gcFlushBgCredit(gcw.scanWork - initScanWork) 1056 initScanWork = 0 1057 } 1058 checkWork -= gcw.scanWork 1059 gcw.scanWork = 0 1060 1061 if checkWork <= 0 { 1062 checkWork += drainCheckThreshold 1063 if check != nil && check() { 1064 break 1065 } 1066 } 1067 } 1068 } 1069 1070 done: 1071 // Flush remaining scan work credit. 1072 if gcw.scanWork > 0 { 1073 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) 1074 if flushBgCredit { 1075 gcFlushBgCredit(gcw.scanWork - initScanWork) 1076 } 1077 gcw.scanWork = 0 1078 } 1079 } 1080 1081 // gcDrainN blackens grey objects until it has performed roughly 1082 // scanWork units of scan work or the G is preempted. This is 1083 // best-effort, so it may perform less work if it fails to get a work 1084 // buffer. Otherwise, it will perform at least n units of work, but 1085 // may perform more because scanning is always done in whole object 1086 // increments. It returns the amount of scan work performed. 1087 // 1088 // The caller goroutine must be in a preemptible state (e.g., 1089 // _Gwaiting) to prevent deadlocks during stack scanning. As a 1090 // consequence, this must be called on the system stack. 1091 // 1092 //go:nowritebarrier 1093 //go:systemstack 1094 func gcDrainN(gcw *gcWork, scanWork int64) int64 { 1095 if !writeBarrier.needed { 1096 throw("gcDrainN phase incorrect") 1097 } 1098 1099 // There may already be scan work on the gcw, which we don't 1100 // want to claim was done by this call. 1101 workFlushed := -gcw.scanWork 1102 1103 gp := getg().m.curg 1104 for !gp.preempt && workFlushed+gcw.scanWork < scanWork { 1105 // See gcDrain comment. 1106 if work.full == 0 { 1107 gcw.balance() 1108 } 1109 1110 // This might be a good place to add prefetch code... 1111 // if(wbuf.nobj > 4) { 1112 // PREFETCH(wbuf->obj[wbuf.nobj - 3]; 1113 // } 1114 // 1115 b := gcw.tryGetFast() 1116 if b == 0 { 1117 b = gcw.tryGet() 1118 if b == 0 { 1119 // Flush the write barrier buffer; 1120 // this may create more work. 1121 wbBufFlush(nil, 0) 1122 b = gcw.tryGet() 1123 } 1124 } 1125 1126 if b == 0 { 1127 // Try to do a root job. 1128 // 1129 // TODO: Assists should get credit for this 1130 // work. 1131 if work.markrootNext < work.markrootJobs { 1132 job := atomic.Xadd(&work.markrootNext, +1) - 1 1133 if job < work.markrootJobs { 1134 markroot(gcw, job) 1135 continue 1136 } 1137 } 1138 // No heap or root jobs. 1139 break 1140 } 1141 scanobject(b, gcw) 1142 1143 // Flush background scan work credit. 1144 if gcw.scanWork >= gcCreditSlack { 1145 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) 1146 workFlushed += gcw.scanWork 1147 gcw.scanWork = 0 1148 } 1149 } 1150 1151 // Unlike gcDrain, there's no need to flush remaining work 1152 // here because this never flushes to bgScanCredit and 1153 // gcw.dispose will flush any remaining work to scanWork. 1154 1155 return workFlushed + gcw.scanWork 1156 } 1157 1158 // scanblock scans b as scanobject would, but using an explicit 1159 // pointer bitmap instead of the heap bitmap. 1160 // 1161 // This is used to scan non-heap roots, so it does not update 1162 // gcw.bytesMarked or gcw.scanWork. 1163 // 1164 // If stk != nil, possible stack pointers are also reported to stk.putPtr. 1165 //go:nowritebarrier 1166 func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) { 1167 // Use local copies of original parameters, so that a stack trace 1168 // due to one of the throws below shows the original block 1169 // base and extent. 1170 b := b0 1171 n := n0 1172 1173 for i := uintptr(0); i < n; { 1174 // Find bits for the next word. 1175 bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8))) 1176 if bits == 0 { 1177 i += sys.PtrSize * 8 1178 continue 1179 } 1180 for j := 0; j < 8 && i < n; j++ { 1181 if bits&1 != 0 { 1182 // Same work as in scanobject; see comments there. 1183 p := *(*uintptr)(unsafe.Pointer(b + i)) 1184 if p != 0 { 1185 if obj, span, objIndex := findObject(p, b, i); obj != 0 { 1186 greyobject(obj, b, i, span, gcw, objIndex) 1187 } else if stk != nil && p >= stk.stack.lo && p < stk.stack.hi { 1188 stk.putPtr(p, false) 1189 } 1190 } 1191 } 1192 bits >>= 1 1193 i += sys.PtrSize 1194 } 1195 } 1196 } 1197 1198 // scanobject scans the object starting at b, adding pointers to gcw. 1199 // b must point to the beginning of a heap object or an oblet. 1200 // scanobject consults the GC bitmap for the pointer mask and the 1201 // spans for the size of the object. 1202 // 1203 //go:nowritebarrier 1204 func scanobject(b uintptr, gcw *gcWork) { 1205 // Find the bits for b and the size of the object at b. 1206 // 1207 // b is either the beginning of an object, in which case this 1208 // is the size of the object to scan, or it points to an 1209 // oblet, in which case we compute the size to scan below. 1210 hbits := heapBitsForAddr(b) 1211 s := spanOfUnchecked(b) 1212 n := s.elemsize 1213 if n == 0 { 1214 throw("scanobject n == 0") 1215 } 1216 1217 if n > maxObletBytes { 1218 // Large object. Break into oblets for better 1219 // parallelism and lower latency. 1220 if b == s.base() { 1221 // It's possible this is a noscan object (not 1222 // from greyobject, but from other code 1223 // paths), in which case we must *not* enqueue 1224 // oblets since their bitmaps will be 1225 // uninitialized. 1226 if s.spanclass.noscan() { 1227 // Bypass the whole scan. 1228 gcw.bytesMarked += uint64(n) 1229 return 1230 } 1231 1232 // Enqueue the other oblets to scan later. 1233 // Some oblets may be in b's scalar tail, but 1234 // these will be marked as "no more pointers", 1235 // so we'll drop out immediately when we go to 1236 // scan those. 1237 for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes { 1238 if !gcw.putFast(oblet) { 1239 gcw.put(oblet) 1240 } 1241 } 1242 } 1243 1244 // Compute the size of the oblet. Since this object 1245 // must be a large object, s.base() is the beginning 1246 // of the object. 1247 n = s.base() + s.elemsize - b 1248 if n > maxObletBytes { 1249 n = maxObletBytes 1250 } 1251 } 1252 1253 var i uintptr 1254 for i = 0; i < n; i, hbits = i+sys.PtrSize, hbits.next() { 1255 // Load bits once. See CL 22712 and issue 16973 for discussion. 1256 bits := hbits.bits() 1257 if bits&bitScan == 0 { 1258 break // no more pointers in this object 1259 } 1260 if bits&bitPointer == 0 { 1261 continue // not a pointer 1262 } 1263 1264 // Work here is duplicated in scanblock and above. 1265 // If you make changes here, make changes there too. 1266 obj := *(*uintptr)(unsafe.Pointer(b + i)) 1267 1268 // At this point we have extracted the next potential pointer. 1269 // Quickly filter out nil and pointers back to the current object. 1270 if obj != 0 && obj-b >= n { 1271 // Test if obj points into the Go heap and, if so, 1272 // mark the object. 1273 // 1274 // Note that it's possible for findObject to 1275 // fail if obj points to a just-allocated heap 1276 // object because of a race with growing the 1277 // heap. In this case, we know the object was 1278 // just allocated and hence will be marked by 1279 // allocation itself. 1280 if obj, span, objIndex := findObject(obj, b, i); obj != 0 { 1281 greyobject(obj, b, i, span, gcw, objIndex) 1282 } 1283 } 1284 } 1285 gcw.bytesMarked += uint64(n) 1286 gcw.scanWork += int64(i) 1287 } 1288 1289 // scanConservative scans block [b, b+n) conservatively, treating any 1290 // pointer-like value in the block as a pointer. 1291 // 1292 // If ptrmask != nil, only words that are marked in ptrmask are 1293 // considered as potential pointers. 1294 // 1295 // If state != nil, it's assumed that [b, b+n) is a block in the stack 1296 // and may contain pointers to stack objects. 1297 func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackScanState) { 1298 if debugScanConservative { 1299 printlock() 1300 print("conservatively scanning [", hex(b), ",", hex(b+n), ")\n") 1301 hexdumpWords(b, b+n, func(p uintptr) byte { 1302 if ptrmask != nil { 1303 word := (p - b) / sys.PtrSize 1304 bits := *addb(ptrmask, word/8) 1305 if (bits>>(word%8))&1 == 0 { 1306 return '$' 1307 } 1308 } 1309 1310 val := *(*uintptr)(unsafe.Pointer(p)) 1311 if state != nil && state.stack.lo <= val && val < state.stack.hi { 1312 return '@' 1313 } 1314 1315 span := spanOfHeap(val) 1316 if span == nil { 1317 return ' ' 1318 } 1319 idx := span.objIndex(val) 1320 if span.isFree(idx) { 1321 return ' ' 1322 } 1323 return '*' 1324 }) 1325 printunlock() 1326 } 1327 1328 for i := uintptr(0); i < n; i += sys.PtrSize { 1329 if ptrmask != nil { 1330 word := i / sys.PtrSize 1331 bits := *addb(ptrmask, word/8) 1332 if bits == 0 { 1333 // Skip 8 words (the loop increment will do the 8th) 1334 // 1335 // This must be the first time we've 1336 // seen this word of ptrmask, so i 1337 // must be 8-word-aligned, but check 1338 // our reasoning just in case. 1339 if i%(sys.PtrSize*8) != 0 { 1340 throw("misaligned mask") 1341 } 1342 i += sys.PtrSize*8 - sys.PtrSize 1343 continue 1344 } 1345 if (bits>>(word%8))&1 == 0 { 1346 continue 1347 } 1348 } 1349 1350 val := *(*uintptr)(unsafe.Pointer(b + i)) 1351 1352 // Check if val points into the stack. 1353 if state != nil && state.stack.lo <= val && val < state.stack.hi { 1354 // val may point to a stack object. This 1355 // object may be dead from last cycle and 1356 // hence may contain pointers to unallocated 1357 // objects, but unlike heap objects we can't 1358 // tell if it's already dead. Hence, if all 1359 // pointers to this object are from 1360 // conservative scanning, we have to scan it 1361 // defensively, too. 1362 state.putPtr(val, true) 1363 continue 1364 } 1365 1366 // Check if val points to a heap span. 1367 span := spanOfHeap(val) 1368 if span == nil { 1369 continue 1370 } 1371 1372 // Check if val points to an allocated object. 1373 idx := span.objIndex(val) 1374 if span.isFree(idx) { 1375 continue 1376 } 1377 1378 // val points to an allocated object. Mark it. 1379 obj := span.base() + idx*span.elemsize 1380 greyobject(obj, b, i, span, gcw, idx) 1381 } 1382 } 1383 1384 // Shade the object if it isn't already. 1385 // The object is not nil and known to be in the heap. 1386 // Preemption must be disabled. 1387 //go:nowritebarrier 1388 func shade(b uintptr) { 1389 if obj, span, objIndex := findObject(b, 0, 0); obj != 0 { 1390 gcw := &getg().m.p.ptr().gcw 1391 greyobject(obj, 0, 0, span, gcw, objIndex) 1392 } 1393 } 1394 1395 // obj is the start of an object with mark mbits. 1396 // If it isn't already marked, mark it and enqueue into gcw. 1397 // base and off are for debugging only and could be removed. 1398 // 1399 // See also wbBufFlush1, which partially duplicates this logic. 1400 // 1401 //go:nowritebarrierrec 1402 func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr) { 1403 // obj should be start of allocation, and so must be at least pointer-aligned. 1404 if obj&(sys.PtrSize-1) != 0 { 1405 throw("greyobject: obj not pointer-aligned") 1406 } 1407 mbits := span.markBitsForIndex(objIndex) 1408 1409 if useCheckmark { 1410 if setCheckmark(obj, base, off, mbits) { 1411 // Already marked. 1412 return 1413 } 1414 } else { 1415 if debug.gccheckmark > 0 && span.isFree(objIndex) { 1416 print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n") 1417 gcDumpObject("base", base, off) 1418 gcDumpObject("obj", obj, ^uintptr(0)) 1419 getg().m.traceback = 2 1420 throw("marking free object") 1421 } 1422 1423 // If marked we have nothing to do. 1424 if mbits.isMarked() { 1425 return 1426 } 1427 mbits.setMarked() 1428 1429 // Mark span. 1430 arena, pageIdx, pageMask := pageIndexOf(span.base()) 1431 if arena.pageMarks[pageIdx]&pageMask == 0 { 1432 atomic.Or8(&arena.pageMarks[pageIdx], pageMask) 1433 } 1434 1435 // If this is a noscan object, fast-track it to black 1436 // instead of greying it. 1437 if span.spanclass.noscan() { 1438 gcw.bytesMarked += uint64(span.elemsize) 1439 return 1440 } 1441 } 1442 1443 // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but 1444 // seems like a nice optimization that can be added back in. 1445 // There needs to be time between the PREFETCH and the use. 1446 // Previously we put the obj in an 8 element buffer that is drained at a rate 1447 // to give the PREFETCH time to do its work. 1448 // Use of PREFETCHNTA might be more appropriate than PREFETCH 1449 if !gcw.putFast(obj) { 1450 gcw.put(obj) 1451 } 1452 } 1453 1454 // gcDumpObject dumps the contents of obj for debugging and marks the 1455 // field at byte offset off in obj. 1456 func gcDumpObject(label string, obj, off uintptr) { 1457 s := spanOf(obj) 1458 print(label, "=", hex(obj)) 1459 if s == nil { 1460 print(" s=nil\n") 1461 return 1462 } 1463 print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.spanclass=", s.spanclass, " s.elemsize=", s.elemsize, " s.state=") 1464 if state := s.state.get(); 0 <= state && int(state) < len(mSpanStateNames) { 1465 print(mSpanStateNames[state], "\n") 1466 } else { 1467 print("unknown(", state, ")\n") 1468 } 1469 1470 skipped := false 1471 size := s.elemsize 1472 if s.state.get() == mSpanManual && size == 0 { 1473 // We're printing something from a stack frame. We 1474 // don't know how big it is, so just show up to an 1475 // including off. 1476 size = off + sys.PtrSize 1477 } 1478 for i := uintptr(0); i < size; i += sys.PtrSize { 1479 // For big objects, just print the beginning (because 1480 // that usually hints at the object's type) and the 1481 // fields around off. 1482 if !(i < 128*sys.PtrSize || off-16*sys.PtrSize < i && i < off+16*sys.PtrSize) { 1483 skipped = true 1484 continue 1485 } 1486 if skipped { 1487 print(" ...\n") 1488 skipped = false 1489 } 1490 print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + i)))) 1491 if i == off { 1492 print(" <==") 1493 } 1494 print("\n") 1495 } 1496 if skipped { 1497 print(" ...\n") 1498 } 1499 } 1500 1501 // gcmarknewobject marks a newly allocated object black. obj must 1502 // not contain any non-nil pointers. 1503 // 1504 // This is nosplit so it can manipulate a gcWork without preemption. 1505 // 1506 //go:nowritebarrier 1507 //go:nosplit 1508 func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) { 1509 if useCheckmark { // The world should be stopped so this should not happen. 1510 throw("gcmarknewobject called while doing checkmark") 1511 } 1512 1513 // Mark object. 1514 objIndex := span.objIndex(obj) 1515 span.markBitsForIndex(objIndex).setMarked() 1516 1517 // Mark span. 1518 arena, pageIdx, pageMask := pageIndexOf(span.base()) 1519 if arena.pageMarks[pageIdx]&pageMask == 0 { 1520 atomic.Or8(&arena.pageMarks[pageIdx], pageMask) 1521 } 1522 1523 gcw := &getg().m.p.ptr().gcw 1524 gcw.bytesMarked += uint64(size) 1525 gcw.scanWork += int64(scanSize) 1526 } 1527 1528 // gcMarkTinyAllocs greys all active tiny alloc blocks. 1529 // 1530 // The world must be stopped. 1531 func gcMarkTinyAllocs() { 1532 assertWorldStopped() 1533 1534 for _, p := range allp { 1535 c := p.mcache 1536 if c == nil || c.tiny == 0 { 1537 continue 1538 } 1539 _, span, objIndex := findObject(c.tiny, 0, 0) 1540 gcw := &p.gcw 1541 greyobject(c.tiny, 0, 0, span, gcw, objIndex) 1542 } 1543 }