github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/mbitmap.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector: type and heap bitmaps. 6 // 7 // Stack, data, and bss bitmaps 8 // 9 // Stack frames and global variables in the data and bss sections are 10 // described by bitmaps with 1 bit per pointer-sized word. A "1" bit 11 // means the word is a live pointer to be visited by the GC (referred to 12 // as "pointer"). A "0" bit means the word should be ignored by GC 13 // (referred to as "scalar", though it could be a dead pointer value). 14 // 15 // Heap bitmap 16 // 17 // The heap bitmap comprises 1 bit for each pointer-sized word in the heap, 18 // recording whether a pointer is stored in that word or not. This bitmap 19 // is stored in the heapArena metadata backing each heap arena. 20 // That is, if ha is the heapArena for the arena starting at "start", 21 // then ha.bitmap[0] holds the 64 bits for the 64 words "start" 22 // through start+63*ptrSize, ha.bitmap[1] holds the entries for 23 // start+64*ptrSize through start+127*ptrSize, and so on. 24 // Bits correspond to words in little-endian order. ha.bitmap[0]&1 represents 25 // the word at "start", ha.bitmap[0]>>1&1 represents the word at start+8, etc. 26 // (For 32-bit platforms, s/64/32/.) 27 // 28 // We also keep a noMorePtrs bitmap which allows us to stop scanning 29 // the heap bitmap early in certain situations. If ha.noMorePtrs[i]>>j&1 30 // is 1, then the object containing the last word described by ha.bitmap[8*i+j] 31 // has no more pointers beyond those described by ha.bitmap[8*i+j]. 32 // If ha.noMorePtrs[i]>>j&1 is set, the entries in ha.bitmap[8*i+j+1] and 33 // beyond must all be zero until the start of the next object. 34 // 35 // The bitmap for noscan spans is set to all zero at span allocation time. 36 // 37 // The bitmap for unallocated objects in scannable spans is not maintained 38 // (can be junk). 39 40 package runtime 41 42 import ( 43 "internal/goarch" 44 "runtime/internal/atomic" 45 "runtime/internal/sys" 46 "unsafe" 47 ) 48 49 // addb returns the byte pointer p+n. 50 // 51 //go:nowritebarrier 52 //go:nosplit 53 func addb(p *byte, n uintptr) *byte { 54 // Note: wrote out full expression instead of calling add(p, n) 55 // to reduce the number of temporaries generated by the 56 // compiler for this trivial expression during inlining. 57 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) 58 } 59 60 // subtractb returns the byte pointer p-n. 61 // 62 //go:nowritebarrier 63 //go:nosplit 64 func subtractb(p *byte, n uintptr) *byte { 65 // Note: wrote out full expression instead of calling add(p, -n) 66 // to reduce the number of temporaries generated by the 67 // compiler for this trivial expression during inlining. 68 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n)) 69 } 70 71 // add1 returns the byte pointer p+1. 72 // 73 //go:nowritebarrier 74 //go:nosplit 75 func add1(p *byte) *byte { 76 // Note: wrote out full expression instead of calling addb(p, 1) 77 // to reduce the number of temporaries generated by the 78 // compiler for this trivial expression during inlining. 79 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) 80 } 81 82 // subtract1 returns the byte pointer p-1. 83 // 84 // nosplit because it is used during write barriers and must not be preempted. 85 // 86 //go:nowritebarrier 87 //go:nosplit 88 func subtract1(p *byte) *byte { 89 // Note: wrote out full expression instead of calling subtractb(p, 1) 90 // to reduce the number of temporaries generated by the 91 // compiler for this trivial expression during inlining. 92 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) 93 } 94 95 // markBits provides access to the mark bit for an object in the heap. 96 // bytep points to the byte holding the mark bit. 97 // mask is a byte with a single bit set that can be &ed with *bytep 98 // to see if the bit has been set. 99 // *m.byte&m.mask != 0 indicates the mark bit is set. 100 // index can be used along with span information to generate 101 // the address of the object in the heap. 102 // We maintain one set of mark bits for allocation and one for 103 // marking purposes. 104 type markBits struct { 105 bytep *uint8 106 mask uint8 107 index uintptr 108 } 109 110 //go:nosplit 111 func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits { 112 bytep, mask := s.allocBits.bitp(allocBitIndex) 113 return markBits{bytep, mask, allocBitIndex} 114 } 115 116 // refillAllocCache takes 8 bytes s.allocBits starting at whichByte 117 // and negates them so that ctz (count trailing zeros) instructions 118 // can be used. It then places these 8 bytes into the cached 64 bit 119 // s.allocCache. 120 func (s *mspan) refillAllocCache(whichByte uintptr) { 121 bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(whichByte))) 122 aCache := uint64(0) 123 aCache |= uint64(bytes[0]) 124 aCache |= uint64(bytes[1]) << (1 * 8) 125 aCache |= uint64(bytes[2]) << (2 * 8) 126 aCache |= uint64(bytes[3]) << (3 * 8) 127 aCache |= uint64(bytes[4]) << (4 * 8) 128 aCache |= uint64(bytes[5]) << (5 * 8) 129 aCache |= uint64(bytes[6]) << (6 * 8) 130 aCache |= uint64(bytes[7]) << (7 * 8) 131 s.allocCache = ^aCache 132 } 133 134 // nextFreeIndex returns the index of the next free object in s at 135 // or after s.freeindex. 136 // There are hardware instructions that can be used to make this 137 // faster if profiling warrants it. 138 func (s *mspan) nextFreeIndex() uintptr { 139 sfreeindex := s.freeindex 140 snelems := s.nelems 141 if sfreeindex == snelems { 142 return sfreeindex 143 } 144 if sfreeindex > snelems { 145 throw("s.freeindex > s.nelems") 146 } 147 148 aCache := s.allocCache 149 150 bitIndex := sys.TrailingZeros64(aCache) 151 for bitIndex == 64 { 152 // Move index to start of next cached bits. 153 sfreeindex = (sfreeindex + 64) &^ (64 - 1) 154 if sfreeindex >= snelems { 155 s.freeindex = snelems 156 return snelems 157 } 158 whichByte := sfreeindex / 8 159 // Refill s.allocCache with the next 64 alloc bits. 160 s.refillAllocCache(whichByte) 161 aCache = s.allocCache 162 bitIndex = sys.TrailingZeros64(aCache) 163 // nothing available in cached bits 164 // grab the next 8 bytes and try again. 165 } 166 result := sfreeindex + uintptr(bitIndex) 167 if result >= snelems { 168 s.freeindex = snelems 169 return snelems 170 } 171 172 s.allocCache >>= uint(bitIndex + 1) 173 sfreeindex = result + 1 174 175 if sfreeindex%64 == 0 && sfreeindex != snelems { 176 // We just incremented s.freeindex so it isn't 0. 177 // As each 1 in s.allocCache was encountered and used for allocation 178 // it was shifted away. At this point s.allocCache contains all 0s. 179 // Refill s.allocCache so that it corresponds 180 // to the bits at s.allocBits starting at s.freeindex. 181 whichByte := sfreeindex / 8 182 s.refillAllocCache(whichByte) 183 } 184 s.freeindex = sfreeindex 185 return result 186 } 187 188 // isFree reports whether the index'th object in s is unallocated. 189 // 190 // The caller must ensure s.state is mSpanInUse, and there must have 191 // been no preemption points since ensuring this (which could allow a 192 // GC transition, which would allow the state to change). 193 func (s *mspan) isFree(index uintptr) bool { 194 if index < s.freeIndexForScan { 195 return false 196 } 197 bytep, mask := s.allocBits.bitp(index) 198 return *bytep&mask == 0 199 } 200 201 // divideByElemSize returns n/s.elemsize. 202 // n must be within [0, s.npages*_PageSize), 203 // or may be exactly s.npages*_PageSize 204 // if s.elemsize is from sizeclasses.go. 205 func (s *mspan) divideByElemSize(n uintptr) uintptr { 206 const doubleCheck = false 207 208 // See explanation in mksizeclasses.go's computeDivMagic. 209 q := uintptr((uint64(n) * uint64(s.divMul)) >> 32) 210 211 if doubleCheck && q != n/s.elemsize { 212 println(n, "/", s.elemsize, "should be", n/s.elemsize, "but got", q) 213 throw("bad magic division") 214 } 215 return q 216 } 217 218 func (s *mspan) objIndex(p uintptr) uintptr { 219 return s.divideByElemSize(p - s.base()) 220 } 221 222 func markBitsForAddr(p uintptr) markBits { 223 s := spanOf(p) 224 objIndex := s.objIndex(p) 225 return s.markBitsForIndex(objIndex) 226 } 227 228 func (s *mspan) markBitsForIndex(objIndex uintptr) markBits { 229 bytep, mask := s.gcmarkBits.bitp(objIndex) 230 return markBits{bytep, mask, objIndex} 231 } 232 233 func (s *mspan) markBitsForBase() markBits { 234 return markBits{&s.gcmarkBits.x, uint8(1), 0} 235 } 236 237 // isMarked reports whether mark bit m is set. 238 func (m markBits) isMarked() bool { 239 return *m.bytep&m.mask != 0 240 } 241 242 // setMarked sets the marked bit in the markbits, atomically. 243 func (m markBits) setMarked() { 244 // Might be racing with other updates, so use atomic update always. 245 // We used to be clever here and use a non-atomic update in certain 246 // cases, but it's not worth the risk. 247 atomic.Or8(m.bytep, m.mask) 248 } 249 250 // setMarkedNonAtomic sets the marked bit in the markbits, non-atomically. 251 func (m markBits) setMarkedNonAtomic() { 252 *m.bytep |= m.mask 253 } 254 255 // clearMarked clears the marked bit in the markbits, atomically. 256 func (m markBits) clearMarked() { 257 // Might be racing with other updates, so use atomic update always. 258 // We used to be clever here and use a non-atomic update in certain 259 // cases, but it's not worth the risk. 260 atomic.And8(m.bytep, ^m.mask) 261 } 262 263 // markBitsForSpan returns the markBits for the span base address base. 264 func markBitsForSpan(base uintptr) (mbits markBits) { 265 mbits = markBitsForAddr(base) 266 if mbits.mask != 1 { 267 throw("markBitsForSpan: unaligned start") 268 } 269 return mbits 270 } 271 272 // advance advances the markBits to the next object in the span. 273 func (m *markBits) advance() { 274 if m.mask == 1<<7 { 275 m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1)) 276 m.mask = 1 277 } else { 278 m.mask = m.mask << 1 279 } 280 m.index++ 281 } 282 283 // clobberdeadPtr is a special value that is used by the compiler to 284 // clobber dead stack slots, when -clobberdead flag is set. 285 const clobberdeadPtr = uintptr(0xdeaddead | 0xdeaddead<<((^uintptr(0)>>63)*32)) 286 287 // badPointer throws bad pointer in heap panic. 288 func badPointer(s *mspan, p, refBase, refOff uintptr) { 289 // Typically this indicates an incorrect use 290 // of unsafe or cgo to store a bad pointer in 291 // the Go heap. It may also indicate a runtime 292 // bug. 293 // 294 // TODO(austin): We could be more aggressive 295 // and detect pointers to unallocated objects 296 // in allocated spans. 297 printlock() 298 print("runtime: pointer ", hex(p)) 299 if s != nil { 300 state := s.state.get() 301 if state != mSpanInUse { 302 print(" to unallocated span") 303 } else { 304 print(" to unused region of span") 305 } 306 print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", state) 307 } 308 print("\n") 309 if refBase != 0 { 310 print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n") 311 gcDumpObject("object", refBase, refOff) 312 } 313 getg().m.traceback = 2 314 throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)") 315 } 316 317 // findObject returns the base address for the heap object containing 318 // the address p, the object's span, and the index of the object in s. 319 // If p does not point into a heap object, it returns base == 0. 320 // 321 // If p points is an invalid heap pointer and debug.invalidptr != 0, 322 // findObject panics. 323 // 324 // refBase and refOff optionally give the base address of the object 325 // in which the pointer p was found and the byte offset at which it 326 // was found. These are used for error reporting. 327 // 328 // It is nosplit so it is safe for p to be a pointer to the current goroutine's stack. 329 // Since p is a uintptr, it would not be adjusted if the stack were to move. 330 // 331 //go:nosplit 332 func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex uintptr) { 333 s = spanOf(p) 334 // If s is nil, the virtual address has never been part of the heap. 335 // This pointer may be to some mmap'd region, so we allow it. 336 if s == nil { 337 if (GOARCH == "amd64" || GOARCH == "arm64") && p == clobberdeadPtr && debug.invalidptr != 0 { 338 // Crash if clobberdeadPtr is seen. Only on AMD64 and ARM64 for now, 339 // as they are the only platform where compiler's clobberdead mode is 340 // implemented. On these platforms clobberdeadPtr cannot be a valid address. 341 badPointer(s, p, refBase, refOff) 342 } 343 return 344 } 345 // If p is a bad pointer, it may not be in s's bounds. 346 // 347 // Check s.state to synchronize with span initialization 348 // before checking other fields. See also spanOfHeap. 349 if state := s.state.get(); state != mSpanInUse || p < s.base() || p >= s.limit { 350 // Pointers into stacks are also ok, the runtime manages these explicitly. 351 if state == mSpanManual { 352 return 353 } 354 // The following ensures that we are rigorous about what data 355 // structures hold valid pointers. 356 if debug.invalidptr != 0 { 357 badPointer(s, p, refBase, refOff) 358 } 359 return 360 } 361 362 objIndex = s.objIndex(p) 363 base = s.base() + objIndex*s.elemsize 364 return 365 } 366 367 // reflect_verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok. 368 // 369 //go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr 370 func reflect_verifyNotInHeapPtr(p uintptr) bool { 371 // Conversion to a pointer is ok as long as findObject above does not call badPointer. 372 // Since we're already promised that p doesn't point into the heap, just disallow heap 373 // pointers and the special clobbered pointer. 374 return spanOf(p) == nil && p != clobberdeadPtr 375 } 376 377 const ptrBits = 8 * goarch.PtrSize 378 379 // heapBits provides access to the bitmap bits for a single heap word. 380 // The methods on heapBits take value receivers so that the compiler 381 // can more easily inline calls to those methods and registerize the 382 // struct fields independently. 383 type heapBits struct { 384 // heapBits will report on pointers in the range [addr,addr+size). 385 // The low bit of mask contains the pointerness of the word at addr 386 // (assuming valid>0). 387 addr, size uintptr 388 389 // The next few pointer bits representing words starting at addr. 390 // Those bits already returned by next() are zeroed. 391 mask uintptr 392 // Number of bits in mask that are valid. mask is always less than 1<<valid. 393 valid uintptr 394 } 395 396 // heapBitsForAddr returns the heapBits for the address addr. 397 // The caller must ensure [addr,addr+size) is in an allocated span. 398 // In particular, be careful not to point past the end of an object. 399 // 400 // nosplit because it is used during write barriers and must not be preempted. 401 // 402 //go:nosplit 403 func heapBitsForAddr(addr, size uintptr) heapBits { 404 // Find arena 405 ai := arenaIndex(addr) 406 ha := mheap_.arenas[ai.l1()][ai.l2()] 407 408 // Word index in arena. 409 word := addr / goarch.PtrSize % heapArenaWords 410 411 // Word index and bit offset in bitmap array. 412 idx := word / ptrBits 413 off := word % ptrBits 414 415 // Grab relevant bits of bitmap. 416 mask := ha.bitmap[idx] >> off 417 valid := ptrBits - off 418 419 // Process depending on where the object ends. 420 nptr := size / goarch.PtrSize 421 if nptr < valid { 422 // Bits for this object end before the end of this bitmap word. 423 // Squash bits for the following objects. 424 mask &= 1<<(nptr&(ptrBits-1)) - 1 425 valid = nptr 426 } else if nptr == valid { 427 // Bits for this object end at exactly the end of this bitmap word. 428 // All good. 429 } else { 430 // Bits for this object extend into the next bitmap word. See if there 431 // may be any pointers recorded there. 432 if uintptr(ha.noMorePtrs[idx/8])>>(idx%8)&1 != 0 { 433 // No more pointers in this object after this bitmap word. 434 // Update size so we know not to look there. 435 size = valid * goarch.PtrSize 436 } 437 } 438 439 return heapBits{addr: addr, size: size, mask: mask, valid: valid} 440 } 441 442 // Returns the (absolute) address of the next known pointer and 443 // a heapBits iterator representing any remaining pointers. 444 // If there are no more pointers, returns address 0. 445 // Note that next does not modify h. The caller must record the result. 446 // 447 // nosplit because it is used during write barriers and must not be preempted. 448 // 449 //go:nosplit 450 func (h heapBits) next() (heapBits, uintptr) { 451 for { 452 if h.mask != 0 { 453 var i int 454 if goarch.PtrSize == 8 { 455 i = sys.TrailingZeros64(uint64(h.mask)) 456 } else { 457 i = sys.TrailingZeros32(uint32(h.mask)) 458 } 459 h.mask ^= uintptr(1) << (i & (ptrBits - 1)) 460 return h, h.addr + uintptr(i)*goarch.PtrSize 461 } 462 463 // Skip words that we've already processed. 464 h.addr += h.valid * goarch.PtrSize 465 h.size -= h.valid * goarch.PtrSize 466 if h.size == 0 { 467 return h, 0 // no more pointers 468 } 469 470 // Grab more bits and try again. 471 h = heapBitsForAddr(h.addr, h.size) 472 } 473 } 474 475 // nextFast is like next, but can return 0 even when there are more pointers 476 // to be found. Callers should call next if nextFast returns 0 as its second 477 // return value. 478 // 479 // if addr, h = h.nextFast(); addr == 0 { 480 // if addr, h = h.next(); addr == 0 { 481 // ... no more pointers ... 482 // } 483 // } 484 // ... process pointer at addr ... 485 // 486 // nextFast is designed to be inlineable. 487 // 488 //go:nosplit 489 func (h heapBits) nextFast() (heapBits, uintptr) { 490 // TESTQ/JEQ 491 if h.mask == 0 { 492 return h, 0 493 } 494 // BSFQ 495 var i int 496 if goarch.PtrSize == 8 { 497 i = sys.TrailingZeros64(uint64(h.mask)) 498 } else { 499 i = sys.TrailingZeros32(uint32(h.mask)) 500 } 501 // BTCQ 502 h.mask ^= uintptr(1) << (i & (ptrBits - 1)) 503 // LEAQ (XX)(XX*8) 504 return h, h.addr + uintptr(i)*goarch.PtrSize 505 } 506 507 // bulkBarrierPreWrite executes a write barrier 508 // for every pointer slot in the memory range [src, src+size), 509 // using pointer/scalar information from [dst, dst+size). 510 // This executes the write barriers necessary before a memmove. 511 // src, dst, and size must be pointer-aligned. 512 // The range [dst, dst+size) must lie within a single object. 513 // It does not perform the actual writes. 514 // 515 // As a special case, src == 0 indicates that this is being used for a 516 // memclr. bulkBarrierPreWrite will pass 0 for the src of each write 517 // barrier. 518 // 519 // Callers should call bulkBarrierPreWrite immediately before 520 // calling memmove(dst, src, size). This function is marked nosplit 521 // to avoid being preempted; the GC must not stop the goroutine 522 // between the memmove and the execution of the barriers. 523 // The caller is also responsible for cgo pointer checks if this 524 // may be writing Go pointers into non-Go memory. 525 // 526 // The pointer bitmap is not maintained for allocations containing 527 // no pointers at all; any caller of bulkBarrierPreWrite must first 528 // make sure the underlying allocation contains pointers, usually 529 // by checking typ.PtrBytes. 530 // 531 // Callers must perform cgo checks if goexperiment.CgoCheck2. 532 // 533 //go:nosplit 534 func bulkBarrierPreWrite(dst, src, size uintptr) { 535 if (dst|src|size)&(goarch.PtrSize-1) != 0 { 536 throw("bulkBarrierPreWrite: unaligned arguments") 537 } 538 if !writeBarrier.needed { 539 return 540 } 541 if s := spanOf(dst); s == nil { 542 // If dst is a global, use the data or BSS bitmaps to 543 // execute write barriers. 544 for _, datap := range activeModules() { 545 if datap.data <= dst && dst < datap.edata { 546 bulkBarrierBitmap(dst, src, size, dst-datap.data, datap.gcdatamask.bytedata) 547 return 548 } 549 } 550 for _, datap := range activeModules() { 551 if datap.bss <= dst && dst < datap.ebss { 552 bulkBarrierBitmap(dst, src, size, dst-datap.bss, datap.gcbssmask.bytedata) 553 return 554 } 555 } 556 return 557 } else if s.state.get() != mSpanInUse || dst < s.base() || s.limit <= dst { 558 // dst was heap memory at some point, but isn't now. 559 // It can't be a global. It must be either our stack, 560 // or in the case of direct channel sends, it could be 561 // another stack. Either way, no need for barriers. 562 // This will also catch if dst is in a freed span, 563 // though that should never have. 564 return 565 } 566 567 buf := &getg().m.p.ptr().wbBuf 568 h := heapBitsForAddr(dst, size) 569 if src == 0 { 570 for { 571 var addr uintptr 572 if h, addr = h.next(); addr == 0 { 573 break 574 } 575 dstx := (*uintptr)(unsafe.Pointer(addr)) 576 p := buf.get1() 577 p[0] = *dstx 578 } 579 } else { 580 for { 581 var addr uintptr 582 if h, addr = h.next(); addr == 0 { 583 break 584 } 585 dstx := (*uintptr)(unsafe.Pointer(addr)) 586 srcx := (*uintptr)(unsafe.Pointer(src + (addr - dst))) 587 p := buf.get2() 588 p[0] = *dstx 589 p[1] = *srcx 590 } 591 } 592 } 593 594 // bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but 595 // does not execute write barriers for [dst, dst+size). 596 // 597 // In addition to the requirements of bulkBarrierPreWrite 598 // callers need to ensure [dst, dst+size) is zeroed. 599 // 600 // This is used for special cases where e.g. dst was just 601 // created and zeroed with malloc. 602 // 603 //go:nosplit 604 func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr) { 605 if (dst|src|size)&(goarch.PtrSize-1) != 0 { 606 throw("bulkBarrierPreWrite: unaligned arguments") 607 } 608 if !writeBarrier.needed { 609 return 610 } 611 buf := &getg().m.p.ptr().wbBuf 612 h := heapBitsForAddr(dst, size) 613 for { 614 var addr uintptr 615 if h, addr = h.next(); addr == 0 { 616 break 617 } 618 srcx := (*uintptr)(unsafe.Pointer(addr - dst + src)) 619 p := buf.get1() 620 p[0] = *srcx 621 } 622 } 623 624 // bulkBarrierBitmap executes write barriers for copying from [src, 625 // src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is 626 // assumed to start maskOffset bytes into the data covered by the 627 // bitmap in bits (which may not be a multiple of 8). 628 // 629 // This is used by bulkBarrierPreWrite for writes to data and BSS. 630 // 631 //go:nosplit 632 func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { 633 word := maskOffset / goarch.PtrSize 634 bits = addb(bits, word/8) 635 mask := uint8(1) << (word % 8) 636 637 buf := &getg().m.p.ptr().wbBuf 638 for i := uintptr(0); i < size; i += goarch.PtrSize { 639 if mask == 0 { 640 bits = addb(bits, 1) 641 if *bits == 0 { 642 // Skip 8 words. 643 i += 7 * goarch.PtrSize 644 continue 645 } 646 mask = 1 647 } 648 if *bits&mask != 0 { 649 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 650 if src == 0 { 651 p := buf.get1() 652 p[0] = *dstx 653 } else { 654 srcx := (*uintptr)(unsafe.Pointer(src + i)) 655 p := buf.get2() 656 p[0] = *dstx 657 p[1] = *srcx 658 } 659 } 660 mask <<= 1 661 } 662 } 663 664 // typeBitsBulkBarrier executes a write barrier for every 665 // pointer that would be copied from [src, src+size) to [dst, 666 // dst+size) by a memmove using the type bitmap to locate those 667 // pointer slots. 668 // 669 // The type typ must correspond exactly to [src, src+size) and [dst, dst+size). 670 // dst, src, and size must be pointer-aligned. 671 // The type typ must have a plain bitmap, not a GC program. 672 // The only use of this function is in channel sends, and the 673 // 64 kB channel element limit takes care of this for us. 674 // 675 // Must not be preempted because it typically runs right before memmove, 676 // and the GC must observe them as an atomic action. 677 // 678 // Callers must perform cgo checks if goexperiment.CgoCheck2. 679 // 680 //go:nosplit 681 func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) { 682 if typ == nil { 683 throw("runtime: typeBitsBulkBarrier without type") 684 } 685 if typ.Size_ != size { 686 println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " of size ", typ.Size_, " but memory size", size) 687 throw("runtime: invalid typeBitsBulkBarrier") 688 } 689 if typ.Kind_&kindGCProg != 0 { 690 println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " with GC prog") 691 throw("runtime: invalid typeBitsBulkBarrier") 692 } 693 if !writeBarrier.needed { 694 return 695 } 696 ptrmask := typ.GCData 697 buf := &getg().m.p.ptr().wbBuf 698 var bits uint32 699 for i := uintptr(0); i < typ.PtrBytes; i += goarch.PtrSize { 700 if i&(goarch.PtrSize*8-1) == 0 { 701 bits = uint32(*ptrmask) 702 ptrmask = addb(ptrmask, 1) 703 } else { 704 bits = bits >> 1 705 } 706 if bits&1 != 0 { 707 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 708 srcx := (*uintptr)(unsafe.Pointer(src + i)) 709 p := buf.get2() 710 p[0] = *dstx 711 p[1] = *srcx 712 } 713 } 714 } 715 716 // initHeapBits initializes the heap bitmap for a span. 717 // If this is a span of single pointer allocations, it initializes all 718 // words to pointer. If force is true, clears all bits. 719 func (s *mspan) initHeapBits(forceClear bool) { 720 if forceClear || s.spanclass.noscan() { 721 // Set all the pointer bits to zero. We do this once 722 // when the span is allocated so we don't have to do it 723 // for each object allocation. 724 base := s.base() 725 size := s.npages * pageSize 726 h := writeHeapBitsForAddr(base) 727 h.flush(base, size) 728 return 729 } 730 isPtrs := goarch.PtrSize == 8 && s.elemsize == goarch.PtrSize 731 if !isPtrs { 732 return // nothing to do 733 } 734 h := writeHeapBitsForAddr(s.base()) 735 size := s.npages * pageSize 736 nptrs := size / goarch.PtrSize 737 for i := uintptr(0); i < nptrs; i += ptrBits { 738 h = h.write(^uintptr(0), ptrBits) 739 } 740 h.flush(s.base(), size) 741 } 742 743 // countAlloc returns the number of objects allocated in span s by 744 // scanning the allocation bitmap. 745 func (s *mspan) countAlloc() int { 746 count := 0 747 bytes := divRoundUp(s.nelems, 8) 748 // Iterate over each 8-byte chunk and count allocations 749 // with an intrinsic. Note that newMarkBits guarantees that 750 // gcmarkBits will be 8-byte aligned, so we don't have to 751 // worry about edge cases, irrelevant bits will simply be zero. 752 for i := uintptr(0); i < bytes; i += 8 { 753 // Extract 64 bits from the byte pointer and get a OnesCount. 754 // Note that the unsafe cast here doesn't preserve endianness, 755 // but that's OK. We only care about how many bits are 1, not 756 // about the order we discover them in. 757 mrkBits := *(*uint64)(unsafe.Pointer(s.gcmarkBits.bytep(i))) 758 count += sys.OnesCount64(mrkBits) 759 } 760 return count 761 } 762 763 type writeHeapBits struct { 764 addr uintptr // address that the low bit of mask represents the pointer state of. 765 mask uintptr // some pointer bits starting at the address addr. 766 valid uintptr // number of bits in buf that are valid (including low) 767 low uintptr // number of low-order bits to not overwrite 768 } 769 770 func writeHeapBitsForAddr(addr uintptr) (h writeHeapBits) { 771 // We start writing bits maybe in the middle of a heap bitmap word. 772 // Remember how many bits into the word we started, so we can be sure 773 // not to overwrite the previous bits. 774 h.low = addr / goarch.PtrSize % ptrBits 775 776 // round down to heap word that starts the bitmap word. 777 h.addr = addr - h.low*goarch.PtrSize 778 779 // We don't have any bits yet. 780 h.mask = 0 781 h.valid = h.low 782 783 return 784 } 785 786 // write appends the pointerness of the next valid pointer slots 787 // using the low valid bits of bits. 1=pointer, 0=scalar. 788 func (h writeHeapBits) write(bits, valid uintptr) writeHeapBits { 789 if h.valid+valid <= ptrBits { 790 // Fast path - just accumulate the bits. 791 h.mask |= bits << h.valid 792 h.valid += valid 793 return h 794 } 795 // Too many bits to fit in this word. Write the current word 796 // out and move on to the next word. 797 798 data := h.mask | bits<<h.valid // mask for this word 799 h.mask = bits >> (ptrBits - h.valid) // leftover for next word 800 h.valid += valid - ptrBits // have h.valid+valid bits, writing ptrBits of them 801 802 // Flush mask to the memory bitmap. 803 // TODO: figure out how to cache arena lookup. 804 ai := arenaIndex(h.addr) 805 ha := mheap_.arenas[ai.l1()][ai.l2()] 806 idx := h.addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords 807 m := uintptr(1)<<h.low - 1 808 ha.bitmap[idx] = ha.bitmap[idx]&m | data 809 // Note: no synchronization required for this write because 810 // the allocator has exclusive access to the page, and the bitmap 811 // entries are all for a single page. Also, visibility of these 812 // writes is guaranteed by the publication barrier in mallocgc. 813 814 // Clear noMorePtrs bit, since we're going to be writing bits 815 // into the following word. 816 ha.noMorePtrs[idx/8] &^= uint8(1) << (idx % 8) 817 // Note: same as above 818 819 // Move to next word of bitmap. 820 h.addr += ptrBits * goarch.PtrSize 821 h.low = 0 822 return h 823 } 824 825 // Add padding of size bytes. 826 func (h writeHeapBits) pad(size uintptr) writeHeapBits { 827 if size == 0 { 828 return h 829 } 830 words := size / goarch.PtrSize 831 for words > ptrBits { 832 h = h.write(0, ptrBits) 833 words -= ptrBits 834 } 835 return h.write(0, words) 836 } 837 838 // Flush the bits that have been written, and add zeros as needed 839 // to cover the full object [addr, addr+size). 840 func (h writeHeapBits) flush(addr, size uintptr) { 841 // zeros counts the number of bits needed to represent the object minus the 842 // number of bits we've already written. This is the number of 0 bits 843 // that need to be added. 844 zeros := (addr+size-h.addr)/goarch.PtrSize - h.valid 845 846 // Add zero bits up to the bitmap word boundary 847 if zeros > 0 { 848 z := ptrBits - h.valid 849 if z > zeros { 850 z = zeros 851 } 852 h.valid += z 853 zeros -= z 854 } 855 856 // Find word in bitmap that we're going to write. 857 ai := arenaIndex(h.addr) 858 ha := mheap_.arenas[ai.l1()][ai.l2()] 859 idx := h.addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords 860 861 // Write remaining bits. 862 if h.valid != h.low { 863 m := uintptr(1)<<h.low - 1 // don't clear existing bits below "low" 864 m |= ^(uintptr(1)<<h.valid - 1) // don't clear existing bits above "valid" 865 ha.bitmap[idx] = ha.bitmap[idx]&m | h.mask 866 } 867 if zeros == 0 { 868 return 869 } 870 871 // Record in the noMorePtrs map that there won't be any more 1 bits, 872 // so readers can stop early. 873 ha.noMorePtrs[idx/8] |= uint8(1) << (idx % 8) 874 875 // Advance to next bitmap word. 876 h.addr += ptrBits * goarch.PtrSize 877 878 // Continue on writing zeros for the rest of the object. 879 // For standard use of the ptr bits this is not required, as 880 // the bits are read from the beginning of the object. Some uses, 881 // like noscan spans, oblets, bulk write barriers, and cgocheck, might 882 // start mid-object, so these writes are still required. 883 for { 884 // Write zero bits. 885 ai := arenaIndex(h.addr) 886 ha := mheap_.arenas[ai.l1()][ai.l2()] 887 idx := h.addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords 888 if zeros < ptrBits { 889 ha.bitmap[idx] &^= uintptr(1)<<zeros - 1 890 break 891 } else if zeros == ptrBits { 892 ha.bitmap[idx] = 0 893 break 894 } else { 895 ha.bitmap[idx] = 0 896 zeros -= ptrBits 897 } 898 ha.noMorePtrs[idx/8] |= uint8(1) << (idx % 8) 899 h.addr += ptrBits * goarch.PtrSize 900 } 901 } 902 903 // Read the bytes starting at the aligned pointer p into a uintptr. 904 // Read is little-endian. 905 func readUintptr(p *byte) uintptr { 906 x := *(*uintptr)(unsafe.Pointer(p)) 907 if goarch.BigEndian { 908 if goarch.PtrSize == 8 { 909 return uintptr(sys.Bswap64(uint64(x))) 910 } 911 return uintptr(sys.Bswap32(uint32(x))) 912 } 913 return x 914 } 915 916 // heapBitsSetType records that the new allocation [x, x+size) 917 // holds in [x, x+dataSize) one or more values of type typ. 918 // (The number of values is given by dataSize / typ.Size.) 919 // If dataSize < size, the fragment [x+dataSize, x+size) is 920 // recorded as non-pointer data. 921 // It is known that the type has pointers somewhere; 922 // malloc does not call heapBitsSetType when there are no pointers, 923 // because all free objects are marked as noscan during 924 // heapBitsSweepSpan. 925 // 926 // There can only be one allocation from a given span active at a time, 927 // and the bitmap for a span always falls on word boundaries, 928 // so there are no write-write races for access to the heap bitmap. 929 // Hence, heapBitsSetType can access the bitmap without atomics. 930 // 931 // There can be read-write races between heapBitsSetType and things 932 // that read the heap bitmap like scanobject. However, since 933 // heapBitsSetType is only used for objects that have not yet been 934 // made reachable, readers will ignore bits being modified by this 935 // function. This does mean this function cannot transiently modify 936 // bits that belong to neighboring objects. Also, on weakly-ordered 937 // machines, callers must execute a store/store (publication) barrier 938 // between calling this function and making the object reachable. 939 func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { 940 const doubleCheck = false // slow but helpful; enable to test modifications to this code 941 942 if doubleCheck && dataSize%typ.Size_ != 0 { 943 throw("heapBitsSetType: dataSize not a multiple of typ.Size") 944 } 945 946 if goarch.PtrSize == 8 && size == goarch.PtrSize { 947 // It's one word and it has pointers, it must be a pointer. 948 // Since all allocated one-word objects are pointers 949 // (non-pointers are aggregated into tinySize allocations), 950 // (*mspan).initHeapBits sets the pointer bits for us. 951 // Nothing to do here. 952 if doubleCheck { 953 h, addr := heapBitsForAddr(x, size).next() 954 if addr != x { 955 throw("heapBitsSetType: pointer bit missing") 956 } 957 _, addr = h.next() 958 if addr != 0 { 959 throw("heapBitsSetType: second pointer bit found") 960 } 961 } 962 return 963 } 964 965 h := writeHeapBitsForAddr(x) 966 967 // Handle GC program. 968 if typ.Kind_&kindGCProg != 0 { 969 // Expand the gc program into the storage we're going to use for the actual object. 970 obj := (*uint8)(unsafe.Pointer(x)) 971 n := runGCProg(addb(typ.GCData, 4), obj) 972 // Use the expanded program to set the heap bits. 973 for i := uintptr(0); true; i += typ.Size_ { 974 // Copy expanded program to heap bitmap. 975 p := obj 976 j := n 977 for j > 8 { 978 h = h.write(uintptr(*p), 8) 979 p = add1(p) 980 j -= 8 981 } 982 h = h.write(uintptr(*p), j) 983 984 if i+typ.Size_ == dataSize { 985 break // no padding after last element 986 } 987 988 // Pad with zeros to the start of the next element. 989 h = h.pad(typ.Size_ - n*goarch.PtrSize) 990 } 991 992 h.flush(x, size) 993 994 // Erase the expanded GC program. 995 memclrNoHeapPointers(unsafe.Pointer(obj), (n+7)/8) 996 return 997 } 998 999 // Note about sizes: 1000 // 1001 // typ.Size is the number of words in the object, 1002 // and typ.PtrBytes is the number of words in the prefix 1003 // of the object that contains pointers. That is, the final 1004 // typ.Size - typ.PtrBytes words contain no pointers. 1005 // This allows optimization of a common pattern where 1006 // an object has a small header followed by a large scalar 1007 // buffer. If we know the pointers are over, we don't have 1008 // to scan the buffer's heap bitmap at all. 1009 // The 1-bit ptrmasks are sized to contain only bits for 1010 // the typ.PtrBytes prefix, zero padded out to a full byte 1011 // of bitmap. If there is more room in the allocated object, 1012 // that space is pointerless. The noMorePtrs bitmap will prevent 1013 // scanning large pointerless tails of an object. 1014 // 1015 // Replicated copies are not as nice: if there is an array of 1016 // objects with scalar tails, all but the last tail does have to 1017 // be initialized, because there is no way to say "skip forward". 1018 1019 ptrs := typ.PtrBytes / goarch.PtrSize 1020 if typ.Size_ == dataSize { // Single element 1021 if ptrs <= ptrBits { // Single small element 1022 m := readUintptr(typ.GCData) 1023 h = h.write(m, ptrs) 1024 } else { // Single large element 1025 p := typ.GCData 1026 for { 1027 h = h.write(readUintptr(p), ptrBits) 1028 p = addb(p, ptrBits/8) 1029 ptrs -= ptrBits 1030 if ptrs <= ptrBits { 1031 break 1032 } 1033 } 1034 m := readUintptr(p) 1035 h = h.write(m, ptrs) 1036 } 1037 } else { // Repeated element 1038 words := typ.Size_ / goarch.PtrSize // total words, including scalar tail 1039 if words <= ptrBits { // Repeated small element 1040 n := dataSize / typ.Size_ 1041 m := readUintptr(typ.GCData) 1042 // Make larger unit to repeat 1043 for words <= ptrBits/2 { 1044 if n&1 != 0 { 1045 h = h.write(m, words) 1046 } 1047 n /= 2 1048 m |= m << words 1049 ptrs += words 1050 words *= 2 1051 if n == 1 { 1052 break 1053 } 1054 } 1055 for n > 1 { 1056 h = h.write(m, words) 1057 n-- 1058 } 1059 h = h.write(m, ptrs) 1060 } else { // Repeated large element 1061 for i := uintptr(0); true; i += typ.Size_ { 1062 p := typ.GCData 1063 j := ptrs 1064 for j > ptrBits { 1065 h = h.write(readUintptr(p), ptrBits) 1066 p = addb(p, ptrBits/8) 1067 j -= ptrBits 1068 } 1069 m := readUintptr(p) 1070 h = h.write(m, j) 1071 if i+typ.Size_ == dataSize { 1072 break // don't need the trailing nonptr bits on the last element. 1073 } 1074 // Pad with zeros to the start of the next element. 1075 h = h.pad(typ.Size_ - typ.PtrBytes) 1076 } 1077 } 1078 } 1079 h.flush(x, size) 1080 1081 if doubleCheck { 1082 h := heapBitsForAddr(x, size) 1083 for i := uintptr(0); i < size; i += goarch.PtrSize { 1084 // Compute the pointer bit we want at offset i. 1085 want := false 1086 if i < dataSize { 1087 off := i % typ.Size_ 1088 if off < typ.PtrBytes { 1089 j := off / goarch.PtrSize 1090 want = *addb(typ.GCData, j/8)>>(j%8)&1 != 0 1091 } 1092 } 1093 if want { 1094 var addr uintptr 1095 h, addr = h.next() 1096 if addr != x+i { 1097 throw("heapBitsSetType: pointer entry not correct") 1098 } 1099 } 1100 } 1101 if _, addr := h.next(); addr != 0 { 1102 throw("heapBitsSetType: extra pointer") 1103 } 1104 } 1105 } 1106 1107 var debugPtrmask struct { 1108 lock mutex 1109 data *byte 1110 } 1111 1112 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog. 1113 // size the size of the region described by prog, in bytes. 1114 // The resulting bitvector will have no more than size/goarch.PtrSize bits. 1115 func progToPointerMask(prog *byte, size uintptr) bitvector { 1116 n := (size/goarch.PtrSize + 7) / 8 1117 x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] 1118 x[len(x)-1] = 0xa1 // overflow check sentinel 1119 n = runGCProg(prog, &x[0]) 1120 if x[len(x)-1] != 0xa1 { 1121 throw("progToPointerMask: overflow") 1122 } 1123 return bitvector{int32(n), &x[0]} 1124 } 1125 1126 // Packed GC pointer bitmaps, aka GC programs. 1127 // 1128 // For large types containing arrays, the type information has a 1129 // natural repetition that can be encoded to save space in the 1130 // binary and in the memory representation of the type information. 1131 // 1132 // The encoding is a simple Lempel-Ziv style bytecode machine 1133 // with the following instructions: 1134 // 1135 // 00000000: stop 1136 // 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes 1137 // 10000000 n c: repeat the previous n bits c times; n, c are varints 1138 // 1nnnnnnn c: repeat the previous n bits c times; c is a varint 1139 1140 // runGCProg returns the number of 1-bit entries written to memory. 1141 func runGCProg(prog, dst *byte) uintptr { 1142 dstStart := dst 1143 1144 // Bits waiting to be written to memory. 1145 var bits uintptr 1146 var nbits uintptr 1147 1148 p := prog 1149 Run: 1150 for { 1151 // Flush accumulated full bytes. 1152 // The rest of the loop assumes that nbits <= 7. 1153 for ; nbits >= 8; nbits -= 8 { 1154 *dst = uint8(bits) 1155 dst = add1(dst) 1156 bits >>= 8 1157 } 1158 1159 // Process one instruction. 1160 inst := uintptr(*p) 1161 p = add1(p) 1162 n := inst & 0x7F 1163 if inst&0x80 == 0 { 1164 // Literal bits; n == 0 means end of program. 1165 if n == 0 { 1166 // Program is over. 1167 break Run 1168 } 1169 nbyte := n / 8 1170 for i := uintptr(0); i < nbyte; i++ { 1171 bits |= uintptr(*p) << nbits 1172 p = add1(p) 1173 *dst = uint8(bits) 1174 dst = add1(dst) 1175 bits >>= 8 1176 } 1177 if n %= 8; n > 0 { 1178 bits |= uintptr(*p) << nbits 1179 p = add1(p) 1180 nbits += n 1181 } 1182 continue Run 1183 } 1184 1185 // Repeat. If n == 0, it is encoded in a varint in the next bytes. 1186 if n == 0 { 1187 for off := uint(0); ; off += 7 { 1188 x := uintptr(*p) 1189 p = add1(p) 1190 n |= (x & 0x7F) << off 1191 if x&0x80 == 0 { 1192 break 1193 } 1194 } 1195 } 1196 1197 // Count is encoded in a varint in the next bytes. 1198 c := uintptr(0) 1199 for off := uint(0); ; off += 7 { 1200 x := uintptr(*p) 1201 p = add1(p) 1202 c |= (x & 0x7F) << off 1203 if x&0x80 == 0 { 1204 break 1205 } 1206 } 1207 c *= n // now total number of bits to copy 1208 1209 // If the number of bits being repeated is small, load them 1210 // into a register and use that register for the entire loop 1211 // instead of repeatedly reading from memory. 1212 // Handling fewer than 8 bits here makes the general loop simpler. 1213 // The cutoff is goarch.PtrSize*8 - 7 to guarantee that when we add 1214 // the pattern to a bit buffer holding at most 7 bits (a partial byte) 1215 // it will not overflow. 1216 src := dst 1217 const maxBits = goarch.PtrSize*8 - 7 1218 if n <= maxBits { 1219 // Start with bits in output buffer. 1220 pattern := bits 1221 npattern := nbits 1222 1223 // If we need more bits, fetch them from memory. 1224 src = subtract1(src) 1225 for npattern < n { 1226 pattern <<= 8 1227 pattern |= uintptr(*src) 1228 src = subtract1(src) 1229 npattern += 8 1230 } 1231 1232 // We started with the whole bit output buffer, 1233 // and then we loaded bits from whole bytes. 1234 // Either way, we might now have too many instead of too few. 1235 // Discard the extra. 1236 if npattern > n { 1237 pattern >>= npattern - n 1238 npattern = n 1239 } 1240 1241 // Replicate pattern to at most maxBits. 1242 if npattern == 1 { 1243 // One bit being repeated. 1244 // If the bit is 1, make the pattern all 1s. 1245 // If the bit is 0, the pattern is already all 0s, 1246 // but we can claim that the number of bits 1247 // in the word is equal to the number we need (c), 1248 // because right shift of bits will zero fill. 1249 if pattern == 1 { 1250 pattern = 1<<maxBits - 1 1251 npattern = maxBits 1252 } else { 1253 npattern = c 1254 } 1255 } else { 1256 b := pattern 1257 nb := npattern 1258 if nb+nb <= maxBits { 1259 // Double pattern until the whole uintptr is filled. 1260 for nb <= goarch.PtrSize*8 { 1261 b |= b << nb 1262 nb += nb 1263 } 1264 // Trim away incomplete copy of original pattern in high bits. 1265 // TODO(rsc): Replace with table lookup or loop on systems without divide? 1266 nb = maxBits / npattern * npattern 1267 b &= 1<<nb - 1 1268 pattern = b 1269 npattern = nb 1270 } 1271 } 1272 1273 // Add pattern to bit buffer and flush bit buffer, c/npattern times. 1274 // Since pattern contains >8 bits, there will be full bytes to flush 1275 // on each iteration. 1276 for ; c >= npattern; c -= npattern { 1277 bits |= pattern << nbits 1278 nbits += npattern 1279 for nbits >= 8 { 1280 *dst = uint8(bits) 1281 dst = add1(dst) 1282 bits >>= 8 1283 nbits -= 8 1284 } 1285 } 1286 1287 // Add final fragment to bit buffer. 1288 if c > 0 { 1289 pattern &= 1<<c - 1 1290 bits |= pattern << nbits 1291 nbits += c 1292 } 1293 continue Run 1294 } 1295 1296 // Repeat; n too large to fit in a register. 1297 // Since nbits <= 7, we know the first few bytes of repeated data 1298 // are already written to memory. 1299 off := n - nbits // n > nbits because n > maxBits and nbits <= 7 1300 // Leading src fragment. 1301 src = subtractb(src, (off+7)/8) 1302 if frag := off & 7; frag != 0 { 1303 bits |= uintptr(*src) >> (8 - frag) << nbits 1304 src = add1(src) 1305 nbits += frag 1306 c -= frag 1307 } 1308 // Main loop: load one byte, write another. 1309 // The bits are rotating through the bit buffer. 1310 for i := c / 8; i > 0; i-- { 1311 bits |= uintptr(*src) << nbits 1312 src = add1(src) 1313 *dst = uint8(bits) 1314 dst = add1(dst) 1315 bits >>= 8 1316 } 1317 // Final src fragment. 1318 if c %= 8; c > 0 { 1319 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1320 nbits += c 1321 } 1322 } 1323 1324 // Write any final bits out, using full-byte writes, even for the final byte. 1325 totalBits := (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits 1326 nbits += -nbits & 7 1327 for ; nbits > 0; nbits -= 8 { 1328 *dst = uint8(bits) 1329 dst = add1(dst) 1330 bits >>= 8 1331 } 1332 return totalBits 1333 } 1334 1335 // materializeGCProg allocates space for the (1-bit) pointer bitmask 1336 // for an object of size ptrdata. Then it fills that space with the 1337 // pointer bitmask specified by the program prog. 1338 // The bitmask starts at s.startAddr. 1339 // The result must be deallocated with dematerializeGCProg. 1340 func materializeGCProg(ptrdata uintptr, prog *byte) *mspan { 1341 // Each word of ptrdata needs one bit in the bitmap. 1342 bitmapBytes := divRoundUp(ptrdata, 8*goarch.PtrSize) 1343 // Compute the number of pages needed for bitmapBytes. 1344 pages := divRoundUp(bitmapBytes, pageSize) 1345 s := mheap_.allocManual(pages, spanAllocPtrScalarBits) 1346 runGCProg(addb(prog, 4), (*byte)(unsafe.Pointer(s.startAddr))) 1347 return s 1348 } 1349 func dematerializeGCProg(s *mspan) { 1350 mheap_.freeManual(s, spanAllocPtrScalarBits) 1351 } 1352 1353 func dumpGCProg(p *byte) { 1354 nptr := 0 1355 for { 1356 x := *p 1357 p = add1(p) 1358 if x == 0 { 1359 print("\t", nptr, " end\n") 1360 break 1361 } 1362 if x&0x80 == 0 { 1363 print("\t", nptr, " lit ", x, ":") 1364 n := int(x+7) / 8 1365 for i := 0; i < n; i++ { 1366 print(" ", hex(*p)) 1367 p = add1(p) 1368 } 1369 print("\n") 1370 nptr += int(x) 1371 } else { 1372 nbit := int(x &^ 0x80) 1373 if nbit == 0 { 1374 for nb := uint(0); ; nb += 7 { 1375 x := *p 1376 p = add1(p) 1377 nbit |= int(x&0x7f) << nb 1378 if x&0x80 == 0 { 1379 break 1380 } 1381 } 1382 } 1383 count := 0 1384 for nb := uint(0); ; nb += 7 { 1385 x := *p 1386 p = add1(p) 1387 count |= int(x&0x7f) << nb 1388 if x&0x80 == 0 { 1389 break 1390 } 1391 } 1392 print("\t", nptr, " repeat ", nbit, " × ", count, "\n") 1393 nptr += nbit * count 1394 } 1395 } 1396 } 1397 1398 // Testing. 1399 1400 // reflect_gcbits returns the GC type info for x, for testing. 1401 // The result is the bitmap entries (0 or 1), one entry per byte. 1402 // 1403 //go:linkname reflect_gcbits reflect.gcbits 1404 func reflect_gcbits(x any) []byte { 1405 return getgcmask(x) 1406 } 1407 1408 // Returns GC type info for the pointer stored in ep for testing. 1409 // If ep points to the stack, only static live information will be returned 1410 // (i.e. not for objects which are only dynamically live stack objects). 1411 func getgcmask(ep any) (mask []byte) { 1412 e := *efaceOf(&ep) 1413 p := e.data 1414 t := e._type 1415 // data or bss 1416 for _, datap := range activeModules() { 1417 // data 1418 if datap.data <= uintptr(p) && uintptr(p) < datap.edata { 1419 bitmap := datap.gcdatamask.bytedata 1420 n := (*ptrtype)(unsafe.Pointer(t)).Elem.Size_ 1421 mask = make([]byte, n/goarch.PtrSize) 1422 for i := uintptr(0); i < n; i += goarch.PtrSize { 1423 off := (uintptr(p) + i - datap.data) / goarch.PtrSize 1424 mask[i/goarch.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1425 } 1426 return 1427 } 1428 1429 // bss 1430 if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss { 1431 bitmap := datap.gcbssmask.bytedata 1432 n := (*ptrtype)(unsafe.Pointer(t)).Elem.Size_ 1433 mask = make([]byte, n/goarch.PtrSize) 1434 for i := uintptr(0); i < n; i += goarch.PtrSize { 1435 off := (uintptr(p) + i - datap.bss) / goarch.PtrSize 1436 mask[i/goarch.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1437 } 1438 return 1439 } 1440 } 1441 1442 // heap 1443 if base, s, _ := findObject(uintptr(p), 0, 0); base != 0 { 1444 if s.spanclass.noscan() { 1445 return nil 1446 } 1447 n := s.elemsize 1448 hbits := heapBitsForAddr(base, n) 1449 mask = make([]byte, n/goarch.PtrSize) 1450 for { 1451 var addr uintptr 1452 if hbits, addr = hbits.next(); addr == 0 { 1453 break 1454 } 1455 mask[(addr-base)/goarch.PtrSize] = 1 1456 } 1457 // Callers expect this mask to end at the last pointer. 1458 for len(mask) > 0 && mask[len(mask)-1] == 0 { 1459 mask = mask[:len(mask)-1] 1460 } 1461 return 1462 } 1463 1464 // stack 1465 if gp := getg(); gp.m.curg.stack.lo <= uintptr(p) && uintptr(p) < gp.m.curg.stack.hi { 1466 found := false 1467 var u unwinder 1468 for u.initAt(gp.m.curg.sched.pc, gp.m.curg.sched.sp, 0, gp.m.curg, 0); u.valid(); u.next() { 1469 if u.frame.sp <= uintptr(p) && uintptr(p) < u.frame.varp { 1470 found = true 1471 break 1472 } 1473 } 1474 if found { 1475 locals, _, _ := u.frame.getStackMap(nil, false) 1476 if locals.n == 0 { 1477 return 1478 } 1479 size := uintptr(locals.n) * goarch.PtrSize 1480 n := (*ptrtype)(unsafe.Pointer(t)).Elem.Size_ 1481 mask = make([]byte, n/goarch.PtrSize) 1482 for i := uintptr(0); i < n; i += goarch.PtrSize { 1483 off := (uintptr(p) + i - u.frame.varp + size) / goarch.PtrSize 1484 mask[i/goarch.PtrSize] = locals.ptrbit(off) 1485 } 1486 } 1487 return 1488 } 1489 1490 // otherwise, not something the GC knows about. 1491 // possibly read-only data, like malloc(0). 1492 // must not have pointers 1493 return 1494 }