github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/runtime/mbitmap.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector: type and heap bitmaps. 6 // 7 // Stack, data, and bss bitmaps 8 // 9 // Stack frames and global variables in the data and bss sections are described 10 // by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer 11 // to be visited during GC. The bits in each byte are consumed starting with 12 // the low bit: 1<<0, 1<<1, and so on. 13 // 14 // Heap bitmap 15 // 16 // The allocated heap comes from a subset of the memory in the range [start, used), 17 // where start == mheap_.arena_start and used == mheap_.arena_used. 18 // The heap bitmap comprises 2 bits for each pointer-sized word in that range, 19 // stored in bytes indexed backward in memory from start. 20 // That is, the byte at address start-1 holds the 2-bit entries for the four words 21 // start through start+3*ptrSize, the byte at start-2 holds the entries for 22 // start+4*ptrSize through start+7*ptrSize, and so on. 23 // 24 // In each 2-bit entry, the lower bit holds the same information as in the 1-bit 25 // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. 26 // The meaning of the high bit depends on the position of the word being described 27 // in its allocated object. In all words *except* the second word, the 28 // high bit indicates that the object is still being described. In 29 // these words, if a bit pair with a high bit 0 is encountered, the 30 // low bit can also be assumed to be 0, and the object description is 31 // over. This 00 is called the ``dead'' encoding: it signals that the 32 // rest of the words in the object are uninteresting to the garbage 33 // collector. 34 // 35 // In the second word, the high bit is the GC ``checkmarked'' bit (see below). 36 // 37 // The 2-bit entries are split when written into the byte, so that the top half 38 // of the byte contains 4 high bits and the bottom half contains 4 low (pointer) 39 // bits. 40 // This form allows a copy from the 1-bit to the 4-bit form to keep the 41 // pointer bits contiguous, instead of having to space them out. 42 // 43 // The code makes use of the fact that the zero value for a heap bitmap 44 // has no live pointer bit set and is (depending on position), not used, 45 // not checkmarked, and is the dead encoding. 46 // These properties must be preserved when modifying the encoding. 47 // 48 // Checkmarks 49 // 50 // In a concurrent garbage collector, one worries about failing to mark 51 // a live object due to mutations without write barriers or bugs in the 52 // collector implementation. As a sanity check, the GC has a 'checkmark' 53 // mode that retraverses the object graph with the world stopped, to make 54 // sure that everything that should be marked is marked. 55 // In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry 56 // for the second word of the object holds the checkmark bit. 57 // When not in checkmark mode, this bit is set to 1. 58 // 59 // The smallest possible allocation is 8 bytes. On a 32-bit machine, that 60 // means every allocated object has two words, so there is room for the 61 // checkmark bit. On a 64-bit machine, however, the 8-byte allocation is 62 // just one word, so the second bit pair is not available for encoding the 63 // checkmark. However, because non-pointer allocations are combined 64 // into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation 65 // must be a pointer, so the type bit in the first word is not actually needed. 66 // It is still used in general, except in checkmark the type bit is repurposed 67 // as the checkmark bit and then reinitialized (to 1) as the type bit when 68 // finished. 69 // 70 71 package runtime 72 73 import ( 74 "runtime/internal/atomic" 75 "runtime/internal/sys" 76 "unsafe" 77 ) 78 79 const ( 80 bitPointer = 1 << 0 81 bitMarked = 1 << 4 // TODO: Rename bitScan. 82 83 heapBitsShift = 1 // shift offset between successive bitPointer or bitMarked entries 84 heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte 85 86 // all mark/pointer bits in a byte 87 bitMarkedAll = bitMarked | bitMarked<<heapBitsShift | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift) 88 bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift) 89 ) 90 91 // addb returns the byte pointer p+n. 92 //go:nowritebarrier 93 //go:nosplit 94 func addb(p *byte, n uintptr) *byte { 95 // Note: wrote out full expression instead of calling add(p, n) 96 // to reduce the number of temporaries generated by the 97 // compiler for this trivial expression during inlining. 98 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) 99 } 100 101 // subtractb returns the byte pointer p-n. 102 // subtractb is typically used when traversing the pointer tables referred to by hbits 103 // which are arranged in reverse order. 104 //go:nowritebarrier 105 //go:nosplit 106 func subtractb(p *byte, n uintptr) *byte { 107 // Note: wrote out full expression instead of calling add(p, -n) 108 // to reduce the number of temporaries generated by the 109 // compiler for this trivial expression during inlining. 110 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n)) 111 } 112 113 // add1 returns the byte pointer p+1. 114 //go:nowritebarrier 115 //go:nosplit 116 func add1(p *byte) *byte { 117 // Note: wrote out full expression instead of calling addb(p, 1) 118 // to reduce the number of temporaries generated by the 119 // compiler for this trivial expression during inlining. 120 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) 121 } 122 123 // subtract1 returns the byte pointer p-1. 124 // subtract1 is typically used when traversing the pointer tables referred to by hbits 125 // which are arranged in reverse order. 126 //go:nowritebarrier 127 // 128 // nosplit because it is used during write barriers and must not be preempted. 129 //go:nosplit 130 func subtract1(p *byte) *byte { 131 // Note: wrote out full expression instead of calling subtractb(p, 1) 132 // to reduce the number of temporaries generated by the 133 // compiler for this trivial expression during inlining. 134 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) 135 } 136 137 // mHeap_MapBits is called each time arena_used is extended. 138 // It maps any additional bitmap memory needed for the new arena memory. 139 // It must be called with the expected new value of arena_used, 140 // *before* h.arena_used has been updated. 141 // Waiting to update arena_used until after the memory has been mapped 142 // avoids faults when other threads try access the bitmap immediately 143 // after observing the change to arena_used. 144 // 145 //go:nowritebarrier 146 func (h *mheap) mapBits(arena_used uintptr) { 147 // Caller has added extra mappings to the arena. 148 // Add extra mappings of bitmap words as needed. 149 // We allocate extra bitmap pieces in chunks of bitmapChunk. 150 const bitmapChunk = 8192 151 152 n := (arena_used - mheap_.arena_start) / heapBitmapScale 153 n = round(n, bitmapChunk) 154 n = round(n, physPageSize) 155 if h.bitmap_mapped >= n { 156 return 157 } 158 159 sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys) 160 h.bitmap_mapped = n 161 } 162 163 // heapBits provides access to the bitmap bits for a single heap word. 164 // The methods on heapBits take value receivers so that the compiler 165 // can more easily inline calls to those methods and registerize the 166 // struct fields independently. 167 type heapBits struct { 168 bitp *uint8 169 shift uint32 170 } 171 172 // markBits provides access to the mark bit for an object in the heap. 173 // bytep points to the byte holding the mark bit. 174 // mask is a byte with a single bit set that can be &ed with *bytep 175 // to see if the bit has been set. 176 // *m.byte&m.mask != 0 indicates the mark bit is set. 177 // index can be used along with span information to generate 178 // the address of the object in the heap. 179 // We maintain one set of mark bits for allocation and one for 180 // marking purposes. 181 type markBits struct { 182 bytep *uint8 183 mask uint8 184 index uintptr 185 } 186 187 //go:nosplit 188 func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits { 189 whichByte := allocBitIndex / 8 190 whichBit := allocBitIndex % 8 191 bytePtr := addb(s.allocBits, whichByte) 192 return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex} 193 } 194 195 // refillaCache takes 8 bytes s.allocBits starting at whichByte 196 // and negates them so that ctz (count trailing zeros) instructions 197 // can be used. It then places these 8 bytes into the cached 64 bit 198 // s.allocCache. 199 func (s *mspan) refillAllocCache(whichByte uintptr) { 200 bytes := (*[8]uint8)(unsafe.Pointer(addb(s.allocBits, whichByte))) 201 aCache := uint64(0) 202 aCache |= uint64(bytes[0]) 203 aCache |= uint64(bytes[1]) << (1 * 8) 204 aCache |= uint64(bytes[2]) << (2 * 8) 205 aCache |= uint64(bytes[3]) << (3 * 8) 206 aCache |= uint64(bytes[4]) << (4 * 8) 207 aCache |= uint64(bytes[5]) << (5 * 8) 208 aCache |= uint64(bytes[6]) << (6 * 8) 209 aCache |= uint64(bytes[7]) << (7 * 8) 210 s.allocCache = ^aCache 211 } 212 213 // nextFreeIndex returns the index of the next free object in s at 214 // or after s.freeindex. 215 // There are hardware instructions that can be used to make this 216 // faster if profiling warrants it. 217 func (s *mspan) nextFreeIndex() uintptr { 218 sfreeindex := s.freeindex 219 snelems := s.nelems 220 if sfreeindex == snelems { 221 return sfreeindex 222 } 223 if sfreeindex > snelems { 224 throw("s.freeindex > s.nelems") 225 } 226 227 aCache := s.allocCache 228 229 bitIndex := sys.Ctz64(aCache) 230 for bitIndex == 64 { 231 // Move index to start of next cached bits. 232 sfreeindex = (sfreeindex + 64) &^ (64 - 1) 233 if sfreeindex >= snelems { 234 s.freeindex = snelems 235 return snelems 236 } 237 whichByte := sfreeindex / 8 238 // Refill s.allocCache with the next 64 alloc bits. 239 s.refillAllocCache(whichByte) 240 aCache = s.allocCache 241 bitIndex = sys.Ctz64(aCache) 242 // nothing available in cached bits 243 // grab the next 8 bytes and try again. 244 } 245 result := sfreeindex + uintptr(bitIndex) 246 if result >= snelems { 247 s.freeindex = snelems 248 return snelems 249 } 250 251 s.allocCache >>= (bitIndex + 1) 252 sfreeindex = result + 1 253 254 if sfreeindex%64 == 0 && sfreeindex != snelems { 255 // We just incremented s.freeindex so it isn't 0. 256 // As each 1 in s.allocCache was encountered and used for allocation 257 // it was shifted away. At this point s.allocCache contains all 0s. 258 // Refill s.allocCache so that it corresponds 259 // to the bits at s.allocBits starting at s.freeindex. 260 whichByte := sfreeindex / 8 261 s.refillAllocCache(whichByte) 262 } 263 s.freeindex = sfreeindex 264 return result 265 } 266 267 func (s *mspan) isFree(index uintptr) bool { 268 whichByte := index / 8 269 whichBit := index % 8 270 byteVal := *addb(s.allocBits, whichByte) 271 return byteVal&uint8(1<<whichBit) == 0 272 } 273 274 func (s *mspan) objIndex(p uintptr) uintptr { 275 byteOffset := p - s.base() 276 if byteOffset == 0 { 277 return 0 278 } 279 if s.baseMask != 0 { 280 // s.baseMask is 0, elemsize is a power of two, so shift by s.divShift 281 return byteOffset >> s.divShift 282 } 283 return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2) 284 } 285 286 func markBitsForAddr(p uintptr) markBits { 287 s := spanOf(p) 288 objIndex := s.objIndex(p) 289 return s.markBitsForIndex(objIndex) 290 } 291 292 func (s *mspan) markBitsForIndex(objIndex uintptr) markBits { 293 whichByte := objIndex / 8 294 bitMask := uint8(1 << (objIndex % 8)) // low 3 bits hold the bit index 295 bytePtr := addb(s.gcmarkBits, whichByte) 296 return markBits{bytePtr, bitMask, objIndex} 297 } 298 299 func (s *mspan) markBitsForBase() markBits { 300 return markBits{s.gcmarkBits, uint8(1), 0} 301 } 302 303 // isMarked reports whether mark bit m is set. 304 func (m markBits) isMarked() bool { 305 return *m.bytep&m.mask != 0 306 } 307 308 // setMarked sets the marked bit in the markbits, atomically. Some compilers 309 // are not able to inline atomic.Or8 function so if it appears as a hot spot consider 310 // inlining it manually. 311 func (m markBits) setMarked() { 312 // Might be racing with other updates, so use atomic update always. 313 // We used to be clever here and use a non-atomic update in certain 314 // cases, but it's not worth the risk. 315 atomic.Or8(m.bytep, m.mask) 316 } 317 318 // setMarkedNonAtomic sets the marked bit in the markbits, non-atomically. 319 func (m markBits) setMarkedNonAtomic() { 320 *m.bytep |= m.mask 321 } 322 323 // clearMarked clears the marked bit in the markbits, atomically. 324 func (m markBits) clearMarked() { 325 // Might be racing with other updates, so use atomic update always. 326 // We used to be clever here and use a non-atomic update in certain 327 // cases, but it's not worth the risk. 328 atomic.And8(m.bytep, ^m.mask) 329 } 330 331 // clearMarkedNonAtomic clears the marked bit non-atomically. 332 func (m markBits) clearMarkedNonAtomic() { 333 *m.bytep ^= m.mask 334 } 335 336 // markBitsForSpan returns the markBits for the span base address base. 337 func markBitsForSpan(base uintptr) (mbits markBits) { 338 if base < mheap_.arena_start || base >= mheap_.arena_used { 339 throw("heapBitsForSpan: base out of range") 340 } 341 mbits = markBitsForAddr(base) 342 if mbits.mask != 1 { 343 throw("markBitsForSpan: unaligned start") 344 } 345 return mbits 346 } 347 348 // advance advances the markBits to the next object in the span. 349 func (m *markBits) advance() { 350 if m.mask == 1<<7 { 351 m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1)) 352 m.mask = 1 353 } else { 354 m.mask = m.mask << 1 355 } 356 m.index++ 357 } 358 359 // heapBitsForAddr returns the heapBits for the address addr. 360 // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). 361 // 362 // nosplit because it is used during write barriers and must not be preempted. 363 //go:nosplit 364 func heapBitsForAddr(addr uintptr) heapBits { 365 // 2 bits per work, 4 pairs per byte, and a mask is hard coded. 366 off := (addr - mheap_.arena_start) / sys.PtrSize 367 return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)} 368 } 369 370 // heapBitsForSpan returns the heapBits for the span base address base. 371 func heapBitsForSpan(base uintptr) (hbits heapBits) { 372 if base < mheap_.arena_start || base >= mheap_.arena_used { 373 throw("heapBitsForSpan: base out of range") 374 } 375 return heapBitsForAddr(base) 376 } 377 378 // heapBitsForObject returns the base address for the heap object 379 // containing the address p, the heapBits for base, 380 // the object's span, and of the index of the object in s. 381 // If p does not point into a heap object, 382 // return base == 0 383 // otherwise return the base of the object. 384 // 385 // refBase and refOff optionally give the base address of the object 386 // in which the pointer p was found and the byte offset at which it 387 // was found. These are used for error reporting. 388 func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) { 389 arenaStart := mheap_.arena_start 390 if p < arenaStart || p >= mheap_.arena_used { 391 return 392 } 393 off := p - arenaStart 394 idx := off >> _PageShift 395 // p points into the heap, but possibly to the middle of an object. 396 // Consult the span table to find the block beginning. 397 s = h_spans[idx] 398 if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse { 399 if s == nil || s.state == _MSpanStack { 400 // If s is nil, the virtual address has never been part of the heap. 401 // This pointer may be to some mmap'd region, so we allow it. 402 // Pointers into stacks are also ok, the runtime manages these explicitly. 403 return 404 } 405 406 // The following ensures that we are rigorous about what data 407 // structures hold valid pointers. 408 if debug.invalidptr != 0 { 409 // Typically this indicates an incorrect use 410 // of unsafe or cgo to store a bad pointer in 411 // the Go heap. It may also indicate a runtime 412 // bug. 413 // 414 // TODO(austin): We could be more aggressive 415 // and detect pointers to unallocated objects 416 // in allocated spans. 417 printlock() 418 print("runtime: pointer ", hex(p)) 419 if s.state != mSpanInUse { 420 print(" to unallocated span") 421 } else { 422 print(" to unused region of span") 423 } 424 print("idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n") 425 if refBase != 0 { 426 print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n") 427 gcDumpObject("object", refBase, refOff) 428 } 429 throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)") 430 } 431 return 432 } 433 // If this span holds object of a power of 2 size, just mask off the bits to 434 // the interior of the object. Otherwise use the size to get the base. 435 if s.baseMask != 0 { 436 // optimize for power of 2 sized objects. 437 base = s.base() 438 base = base + (p-base)&s.baseMask 439 objIndex = (base - s.base()) >> s.divShift 440 // base = p & s.baseMask is faster for small spans, 441 // but doesn't work for large spans. 442 // Overall, it's faster to use the more general computation above. 443 } else { 444 base = s.base() 445 if p-base >= s.elemsize { 446 // n := (p - base) / s.elemsize, using division by multiplication 447 objIndex = uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2) 448 base += objIndex * s.elemsize 449 } 450 } 451 // Now that we know the actual base, compute heapBits to return to caller. 452 hbits = heapBitsForAddr(base) 453 return 454 } 455 456 // prefetch the bits. 457 func (h heapBits) prefetch() { 458 prefetchnta(uintptr(unsafe.Pointer((h.bitp)))) 459 } 460 461 // next returns the heapBits describing the next pointer-sized word in memory. 462 // That is, if h describes address p, h.next() describes p+ptrSize. 463 // Note that next does not modify h. The caller must record the result. 464 // 465 // nosplit because it is used during write barriers and must not be preempted. 466 //go:nosplit 467 func (h heapBits) next() heapBits { 468 if h.shift < 3*heapBitsShift { 469 return heapBits{h.bitp, h.shift + heapBitsShift} 470 } 471 return heapBits{subtract1(h.bitp), 0} 472 } 473 474 // forward returns the heapBits describing n pointer-sized words ahead of h in memory. 475 // That is, if h describes address p, h.forward(n) describes p+n*ptrSize. 476 // h.forward(1) is equivalent to h.next(), just slower. 477 // Note that forward does not modify h. The caller must record the result. 478 // bits returns the heap bits for the current word. 479 func (h heapBits) forward(n uintptr) heapBits { 480 n += uintptr(h.shift) / heapBitsShift 481 return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift} 482 } 483 484 // The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer. 485 // The result includes in its higher bits the bits for subsequent words 486 // described by the same bitmap byte. 487 func (h heapBits) bits() uint32 { 488 // The (shift & 31) eliminates a test and conditional branch 489 // from the generated code. 490 return uint32(*h.bitp) >> (h.shift & 31) 491 } 492 493 // morePointers returns true if this word and all remaining words in this object 494 // are scalars. 495 // h must not describe the second word of the object. 496 func (h heapBits) morePointers() bool { 497 return h.bits()&bitMarked != 0 498 } 499 500 // isPointer reports whether the heap bits describe a pointer word. 501 // 502 // nosplit because it is used during write barriers and must not be preempted. 503 //go:nosplit 504 func (h heapBits) isPointer() bool { 505 return h.bits()&bitPointer != 0 506 } 507 508 // hasPointers reports whether the given object has any pointers. 509 // It must be told how large the object at h is for efficiency. 510 // h must describe the initial word of the object. 511 func (h heapBits) hasPointers(size uintptr) bool { 512 if size == sys.PtrSize { // 1-word objects are always pointers 513 return true 514 } 515 return (*h.bitp>>h.shift)&bitMarked != 0 516 } 517 518 // isCheckmarked reports whether the heap bits have the checkmarked bit set. 519 // It must be told how large the object at h is, because the encoding of the 520 // checkmark bit varies by size. 521 // h must describe the initial word of the object. 522 func (h heapBits) isCheckmarked(size uintptr) bool { 523 if size == sys.PtrSize { 524 return (*h.bitp>>h.shift)&bitPointer != 0 525 } 526 // All multiword objects are 2-word aligned, 527 // so we know that the initial word's 2-bit pair 528 // and the second word's 2-bit pair are in the 529 // same heap bitmap byte, *h.bitp. 530 return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0 531 } 532 533 // setCheckmarked sets the checkmarked bit. 534 // It must be told how large the object at h is, because the encoding of the 535 // checkmark bit varies by size. 536 // h must describe the initial word of the object. 537 func (h heapBits) setCheckmarked(size uintptr) { 538 if size == sys.PtrSize { 539 atomic.Or8(h.bitp, bitPointer<<h.shift) 540 return 541 } 542 atomic.Or8(h.bitp, bitMarked<<(heapBitsShift+h.shift)) 543 } 544 545 // heapBitsBulkBarrier executes writebarrierptr_nostore 546 // for every pointer slot in the memory range [p, p+size), 547 // using the heap, data, or BSS bitmap to locate those pointer slots. 548 // This executes the write barriers necessary after a memmove. 549 // Both p and size must be pointer-aligned. 550 // The range [p, p+size) must lie within a single object. 551 // 552 // Callers should call heapBitsBulkBarrier immediately after 553 // calling memmove(p, src, size). This function is marked nosplit 554 // to avoid being preempted; the GC must not stop the goroutine 555 // between the memmove and the execution of the barriers. 556 // 557 // The heap bitmap is not maintained for allocations containing 558 // no pointers at all; any caller of heapBitsBulkBarrier must first 559 // make sure the underlying allocation contains pointers, usually 560 // by checking typ.kind&kindNoPointers. 561 // 562 //go:nosplit 563 func heapBitsBulkBarrier(p, size uintptr) { 564 if (p|size)&(sys.PtrSize-1) != 0 { 565 throw("heapBitsBulkBarrier: unaligned arguments") 566 } 567 if !writeBarrier.needed { 568 return 569 } 570 if !inheap(p) { 571 // If p is on the stack and in a higher frame than the 572 // caller, we either need to execute write barriers on 573 // it (which is what happens for normal stack writes 574 // through pointers to higher frames), or we need to 575 // force the mark termination stack scan to scan the 576 // frame containing p. 577 // 578 // Executing write barriers on p is complicated in the 579 // general case because we either need to unwind the 580 // stack to get the stack map, or we need the type's 581 // bitmap, which may be a GC program. 582 // 583 // Hence, we opt for forcing the re-scan to scan the 584 // frame containing p, which we can do by simply 585 // unwinding the stack barriers between the current SP 586 // and p's frame. 587 gp := getg().m.curg 588 if gp != nil && gp.stack.lo <= p && p < gp.stack.hi { 589 // Run on the system stack to give it more 590 // stack space. 591 systemstack(func() { 592 gcUnwindBarriers(gp, p) 593 }) 594 return 595 } 596 597 // If p is a global, use the data or BSS bitmaps to 598 // execute write barriers. 599 for datap := &firstmoduledata; datap != nil; datap = datap.next { 600 if datap.data <= p && p < datap.edata { 601 bulkBarrierBitmap(p, size, p-datap.data, datap.gcdatamask.bytedata) 602 return 603 } 604 } 605 for datap := &firstmoduledata; datap != nil; datap = datap.next { 606 if datap.bss <= p && p < datap.ebss { 607 bulkBarrierBitmap(p, size, p-datap.bss, datap.gcbssmask.bytedata) 608 return 609 } 610 } 611 return 612 } 613 614 h := heapBitsForAddr(p) 615 for i := uintptr(0); i < size; i += sys.PtrSize { 616 if h.isPointer() { 617 x := (*uintptr)(unsafe.Pointer(p + i)) 618 writebarrierptr_nostore(x, *x) 619 } 620 h = h.next() 621 } 622 } 623 624 // bulkBarrierBitmap executes write barriers for [p, p+size) using a 625 // 1-bit pointer bitmap. p is assumed to start maskOffset bytes into 626 // the data covered by the bitmap in bits. 627 // 628 // This is used by heapBitsBulkBarrier for writes to data and BSS. 629 // 630 //go:nosplit 631 func bulkBarrierBitmap(p, size, maskOffset uintptr, bits *uint8) { 632 word := maskOffset / sys.PtrSize 633 bits = addb(bits, word/8) 634 mask := uint8(1) << (word % 8) 635 636 for i := uintptr(0); i < size; i += sys.PtrSize { 637 if mask == 0 { 638 bits = addb(bits, 1) 639 if *bits == 0 { 640 // Skip 8 words. 641 i += 7 * sys.PtrSize 642 continue 643 } 644 mask = 1 645 } 646 if *bits&mask != 0 { 647 x := (*uintptr)(unsafe.Pointer(p + i)) 648 writebarrierptr_nostore(x, *x) 649 } 650 mask <<= 1 651 } 652 } 653 654 // typeBitsBulkBarrier executes writebarrierptr_nostore 655 // for every pointer slot in the memory range [p, p+size), 656 // using the type bitmap to locate those pointer slots. 657 // The type typ must correspond exactly to [p, p+size). 658 // This executes the write barriers necessary after a copy. 659 // Both p and size must be pointer-aligned. 660 // The type typ must have a plain bitmap, not a GC program. 661 // The only use of this function is in channel sends, and the 662 // 64 kB channel element limit takes care of this for us. 663 // 664 // Must not be preempted because it typically runs right after memmove, 665 // and the GC must not complete between those two. 666 // 667 //go:nosplit 668 func typeBitsBulkBarrier(typ *_type, p, size uintptr) { 669 if typ == nil { 670 throw("runtime: typeBitsBulkBarrier without type") 671 } 672 if typ.size != size { 673 println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size) 674 throw("runtime: invalid typeBitsBulkBarrier") 675 } 676 if typ.kind&kindGCProg != 0 { 677 println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog") 678 throw("runtime: invalid typeBitsBulkBarrier") 679 } 680 if !writeBarrier.needed { 681 return 682 } 683 ptrmask := typ.gcdata 684 var bits uint32 685 for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize { 686 if i&(sys.PtrSize*8-1) == 0 { 687 bits = uint32(*ptrmask) 688 ptrmask = addb(ptrmask, 1) 689 } else { 690 bits = bits >> 1 691 } 692 if bits&1 != 0 { 693 x := (*uintptr)(unsafe.Pointer(p + i)) 694 writebarrierptr_nostore(x, *x) 695 } 696 } 697 } 698 699 // The methods operating on spans all require that h has been returned 700 // by heapBitsForSpan and that size, n, total are the span layout description 701 // returned by the mspan's layout method. 702 // If total > size*n, it means that there is extra leftover memory in the span, 703 // usually due to rounding. 704 // 705 // TODO(rsc): Perhaps introduce a different heapBitsSpan type. 706 707 // initSpan initializes the heap bitmap for a span. 708 // It clears all checkmark bits. 709 // If this is a span of pointer-sized objects, it initializes all 710 // words to pointer/scan. 711 // Otherwise, it initializes all words to scalar/dead. 712 func (h heapBits) initSpan(s *mspan) { 713 size, n, total := s.layout() 714 715 // Init the markbit structures 716 s.freeindex = 0 717 s.allocCache = ^uint64(0) // all 1s indicating all free. 718 s.nelems = n 719 s.allocBits = nil 720 s.gcmarkBits = nil 721 s.gcmarkBits = newMarkBits(s.nelems) 722 s.allocBits = newAllocBits(s.nelems) 723 724 // Clear bits corresponding to objects. 725 if total%heapBitmapScale != 0 { 726 throw("initSpan: unaligned length") 727 } 728 nbyte := total / heapBitmapScale 729 if sys.PtrSize == 8 && size == sys.PtrSize { 730 end := h.bitp 731 bitp := subtractb(end, nbyte-1) 732 for { 733 *bitp = bitPointerAll | bitMarkedAll 734 if bitp == end { 735 break 736 } 737 bitp = add1(bitp) 738 } 739 return 740 } 741 memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte) 742 } 743 744 // initCheckmarkSpan initializes a span for being checkmarked. 745 // It clears the checkmark bits, which are set to 1 in normal operation. 746 func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { 747 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. 748 if sys.PtrSize == 8 && size == sys.PtrSize { 749 // Checkmark bit is type bit, bottom bit of every 2-bit entry. 750 // Only possible on 64-bit system, since minimum size is 8. 751 // Must clear type bit (checkmark bit) of every word. 752 // The type bit is the lower of every two-bit pair. 753 bitp := h.bitp 754 for i := uintptr(0); i < n; i += 4 { 755 *bitp &^= bitPointerAll 756 bitp = subtract1(bitp) 757 } 758 return 759 } 760 for i := uintptr(0); i < n; i++ { 761 *h.bitp &^= bitMarked << (heapBitsShift + h.shift) 762 h = h.forward(size / sys.PtrSize) 763 } 764 } 765 766 // clearCheckmarkSpan undoes all the checkmarking in a span. 767 // The actual checkmark bits are ignored, so the only work to do 768 // is to fix the pointer bits. (Pointer bits are ignored by scanobject 769 // but consulted by typedmemmove.) 770 func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { 771 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. 772 if sys.PtrSize == 8 && size == sys.PtrSize { 773 // Checkmark bit is type bit, bottom bit of every 2-bit entry. 774 // Only possible on 64-bit system, since minimum size is 8. 775 // Must clear type bit (checkmark bit) of every word. 776 // The type bit is the lower of every two-bit pair. 777 bitp := h.bitp 778 for i := uintptr(0); i < n; i += 4 { 779 *bitp |= bitPointerAll 780 bitp = subtract1(bitp) 781 } 782 } 783 } 784 785 // oneBitCount is indexed by byte and produces the 786 // number of 1 bits in that byte. For example 128 has 1 bit set 787 // and oneBitCount[128] will holds 1. 788 var oneBitCount = [256]uint8{ 789 0, 1, 1, 2, 1, 2, 2, 3, 790 1, 2, 2, 3, 2, 3, 3, 4, 791 1, 2, 2, 3, 2, 3, 3, 4, 792 2, 3, 3, 4, 3, 4, 4, 5, 793 1, 2, 2, 3, 2, 3, 3, 4, 794 2, 3, 3, 4, 3, 4, 4, 5, 795 2, 3, 3, 4, 3, 4, 4, 5, 796 3, 4, 4, 5, 4, 5, 5, 6, 797 1, 2, 2, 3, 2, 3, 3, 4, 798 2, 3, 3, 4, 3, 4, 4, 5, 799 2, 3, 3, 4, 3, 4, 4, 5, 800 3, 4, 4, 5, 4, 5, 5, 6, 801 2, 3, 3, 4, 3, 4, 4, 5, 802 3, 4, 4, 5, 4, 5, 5, 6, 803 3, 4, 4, 5, 4, 5, 5, 6, 804 4, 5, 5, 6, 5, 6, 6, 7, 805 1, 2, 2, 3, 2, 3, 3, 4, 806 2, 3, 3, 4, 3, 4, 4, 5, 807 2, 3, 3, 4, 3, 4, 4, 5, 808 3, 4, 4, 5, 4, 5, 5, 6, 809 2, 3, 3, 4, 3, 4, 4, 5, 810 3, 4, 4, 5, 4, 5, 5, 6, 811 3, 4, 4, 5, 4, 5, 5, 6, 812 4, 5, 5, 6, 5, 6, 6, 7, 813 2, 3, 3, 4, 3, 4, 4, 5, 814 3, 4, 4, 5, 4, 5, 5, 6, 815 3, 4, 4, 5, 4, 5, 5, 6, 816 4, 5, 5, 6, 5, 6, 6, 7, 817 3, 4, 4, 5, 4, 5, 5, 6, 818 4, 5, 5, 6, 5, 6, 6, 7, 819 4, 5, 5, 6, 5, 6, 6, 7, 820 5, 6, 6, 7, 6, 7, 7, 8} 821 822 // countFree runs through the mark bits in a span and counts the number of free objects 823 // in the span. 824 // TODO:(rlh) Use popcount intrinsic. 825 func (s *mspan) countFree() int { 826 count := 0 827 maxIndex := s.nelems / 8 828 for i := uintptr(0); i < maxIndex; i++ { 829 mrkBits := *addb(s.gcmarkBits, i) 830 count += int(oneBitCount[mrkBits]) 831 } 832 if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 { 833 mrkBits := *addb(s.gcmarkBits, maxIndex) 834 mask := uint8((1 << bitsInLastByte) - 1) 835 bits := mrkBits & mask 836 count += int(oneBitCount[bits]) 837 } 838 return int(s.nelems) - count 839 } 840 841 // heapBitsSetType records that the new allocation [x, x+size) 842 // holds in [x, x+dataSize) one or more values of type typ. 843 // (The number of values is given by dataSize / typ.size.) 844 // If dataSize < size, the fragment [x+dataSize, x+size) is 845 // recorded as non-pointer data. 846 // It is known that the type has pointers somewhere; 847 // malloc does not call heapBitsSetType when there are no pointers, 848 // because all free objects are marked as noscan during 849 // heapBitsSweepSpan. 850 // 851 // There can only be one allocation from a given span active at a time, 852 // and the bitmap for a span always falls on byte boundaries, 853 // so there are no write-write races for access to the heap bitmap. 854 // Hence, heapBitsSetType can access the bitmap without atomics. 855 // 856 // There can be read-write races between heapBitsSetType and things 857 // that read the heap bitmap like scanobject. However, since 858 // heapBitsSetType is only used for objects that have not yet been 859 // made reachable, readers will ignore bits being modified by this 860 // function. This does mean this function cannot transiently modify 861 // bits that belong to neighboring objects. Also, on weakly-ordered 862 // machines, callers must execute a store/store (publication) barrier 863 // between calling this function and making the object reachable. 864 // 865 // TODO: This still has atomic accesses left over from when it could 866 // race with GC accessing mark bits in the bitmap. Remove these. 867 func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { 868 const doubleCheck = false // slow but helpful; enable to test modifications to this code 869 870 // dataSize is always size rounded up to the next malloc size class, 871 // except in the case of allocating a defer block, in which case 872 // size is sizeof(_defer{}) (at least 6 words) and dataSize may be 873 // arbitrarily larger. 874 // 875 // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore 876 // assume that dataSize == size without checking it explicitly. 877 878 if sys.PtrSize == 8 && size == sys.PtrSize { 879 // It's one word and it has pointers, it must be a pointer. 880 // In general we'd need an atomic update here if the 881 // concurrent GC were marking objects in this span, 882 // because each bitmap byte describes 3 other objects 883 // in addition to the one being allocated. 884 // However, since all allocated one-word objects are pointers 885 // (non-pointers are aggregated into tinySize allocations), 886 // initSpan sets the pointer bits for us. Nothing to do here. 887 if doubleCheck { 888 h := heapBitsForAddr(x) 889 if !h.isPointer() { 890 throw("heapBitsSetType: pointer bit missing") 891 } 892 if !h.morePointers() { 893 throw("heapBitsSetType: scan bit missing") 894 } 895 } 896 return 897 } 898 899 h := heapBitsForAddr(x) 900 ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below) 901 902 // Heap bitmap bits for 2-word object are only 4 bits, 903 // so also shared with objects next to it; use atomic updates. 904 // This is called out as a special case primarily for 32-bit systems, 905 // so that on 32-bit systems the code below can assume all objects 906 // are 4-word aligned (because they're all 16-byte aligned). 907 if size == 2*sys.PtrSize { 908 if typ.size == sys.PtrSize { 909 // We're allocating a block big enough to hold two pointers. 910 // On 64-bit, that means the actual object must be two pointers, 911 // or else we'd have used the one-pointer-sized block. 912 // On 32-bit, however, this is the 8-byte block, the smallest one. 913 // So it could be that we're allocating one pointer and this was 914 // just the smallest block available. Distinguish by checking dataSize. 915 // (In general the number of instances of typ being allocated is 916 // dataSize/typ.size.) 917 if sys.PtrSize == 4 && dataSize == sys.PtrSize { 918 // 1 pointer object. On 32-bit machines clear the bit for the 919 // unused second word. 920 if gcphase == _GCoff { 921 *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift 922 *h.bitp |= (bitPointer | bitMarked) << h.shift 923 } else { 924 atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift)) 925 atomic.Or8(h.bitp, (bitPointer|bitMarked)<<h.shift) 926 } 927 } else { 928 // 2-element slice of pointer. 929 if gcphase == _GCoff { 930 *h.bitp |= (bitPointer | bitMarked | bitPointer<<heapBitsShift) << h.shift 931 } else { 932 atomic.Or8(h.bitp, (bitPointer|bitMarked|bitPointer<<heapBitsShift)<<h.shift) 933 } 934 } 935 return 936 } 937 // Otherwise typ.size must be 2*sys.PtrSize, 938 // and typ.kind&kindGCProg == 0. 939 if doubleCheck { 940 if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 { 941 print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n") 942 throw("heapBitsSetType") 943 } 944 } 945 b := uint32(*ptrmask) 946 hb := (b & 3) | bitMarked 947 if gcphase == _GCoff { 948 // bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1. 949 // 110011 is shifted h.shift and complemented. 950 // This clears out the bits that are about to be 951 // ored into *h.hbitp in the next instructions. 952 *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift 953 *h.bitp |= uint8(hb << h.shift) 954 } else { 955 // TODO:(rlh) since the GC is not concurrently setting the 956 // mark bits in the heap map anymore and malloc 957 // owns the span we are allocating in why does this have 958 // to be atomic? 959 960 atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift)) 961 atomic.Or8(h.bitp, uint8(hb<<h.shift)) 962 } 963 return 964 } 965 966 // Copy from 1-bit ptrmask into 2-bit bitmap. 967 // The basic approach is to use a single uintptr as a bit buffer, 968 // alternating between reloading the buffer and writing bitmap bytes. 969 // In general, one load can supply two bitmap byte writes. 970 // This is a lot of lines of code, but it compiles into relatively few 971 // machine instructions. 972 973 var ( 974 // Ptrmask input. 975 p *byte // last ptrmask byte read 976 b uintptr // ptrmask bits already loaded 977 nb uintptr // number of bits in b at next read 978 endp *byte // final ptrmask byte to read (then repeat) 979 endnb uintptr // number of valid bits in *endp 980 pbits uintptr // alternate source of bits 981 982 // Heap bitmap output. 983 w uintptr // words processed 984 nw uintptr // number of words to process 985 hbitp *byte // next heap bitmap byte to write 986 hb uintptr // bits being prepared for *hbitp 987 ) 988 989 hbitp = h.bitp 990 991 // Handle GC program. Delayed until this part of the code 992 // so that we can use the same double-checking mechanism 993 // as the 1-bit case. Nothing above could have encountered 994 // GC programs: the cases were all too small. 995 if typ.kind&kindGCProg != 0 { 996 heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4)) 997 if doubleCheck { 998 // Double-check the heap bits written by GC program 999 // by running the GC program to create a 1-bit pointer mask 1000 // and then jumping to the double-check code below. 1001 // This doesn't catch bugs shared between the 1-bit and 4-bit 1002 // GC program execution, but it does catch mistakes specific 1003 // to just one of those and bugs in heapBitsSetTypeGCProg's 1004 // implementation of arrays. 1005 lock(&debugPtrmask.lock) 1006 if debugPtrmask.data == nil { 1007 debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys)) 1008 } 1009 ptrmask = debugPtrmask.data 1010 runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1) 1011 goto Phase4 1012 } 1013 return 1014 } 1015 1016 // Note about sizes: 1017 // 1018 // typ.size is the number of words in the object, 1019 // and typ.ptrdata is the number of words in the prefix 1020 // of the object that contains pointers. That is, the final 1021 // typ.size - typ.ptrdata words contain no pointers. 1022 // This allows optimization of a common pattern where 1023 // an object has a small header followed by a large scalar 1024 // buffer. If we know the pointers are over, we don't have 1025 // to scan the buffer's heap bitmap at all. 1026 // The 1-bit ptrmasks are sized to contain only bits for 1027 // the typ.ptrdata prefix, zero padded out to a full byte 1028 // of bitmap. This code sets nw (below) so that heap bitmap 1029 // bits are only written for the typ.ptrdata prefix; if there is 1030 // more room in the allocated object, the next heap bitmap 1031 // entry is a 00, indicating that there are no more pointers 1032 // to scan. So only the ptrmask for the ptrdata bytes is needed. 1033 // 1034 // Replicated copies are not as nice: if there is an array of 1035 // objects with scalar tails, all but the last tail does have to 1036 // be initialized, because there is no way to say "skip forward". 1037 // However, because of the possibility of a repeated type with 1038 // size not a multiple of 4 pointers (one heap bitmap byte), 1039 // the code already must handle the last ptrmask byte specially 1040 // by treating it as containing only the bits for endnb pointers, 1041 // where endnb <= 4. We represent large scalar tails that must 1042 // be expanded in the replication by setting endnb larger than 4. 1043 // This will have the effect of reading many bits out of b, 1044 // but once the real bits are shifted out, b will supply as many 1045 // zero bits as we try to read, which is exactly what we need. 1046 1047 p = ptrmask 1048 if typ.size < dataSize { 1049 // Filling in bits for an array of typ. 1050 // Set up for repetition of ptrmask during main loop. 1051 // Note that ptrmask describes only a prefix of 1052 const maxBits = sys.PtrSize*8 - 7 1053 if typ.ptrdata/sys.PtrSize <= maxBits { 1054 // Entire ptrmask fits in uintptr with room for a byte fragment. 1055 // Load into pbits and never read from ptrmask again. 1056 // This is especially important when the ptrmask has 1057 // fewer than 8 bits in it; otherwise the reload in the middle 1058 // of the Phase 2 loop would itself need to loop to gather 1059 // at least 8 bits. 1060 1061 // Accumulate ptrmask into b. 1062 // ptrmask is sized to describe only typ.ptrdata, but we record 1063 // it as describing typ.size bytes, since all the high bits are zero. 1064 nb = typ.ptrdata / sys.PtrSize 1065 for i := uintptr(0); i < nb; i += 8 { 1066 b |= uintptr(*p) << i 1067 p = add1(p) 1068 } 1069 nb = typ.size / sys.PtrSize 1070 1071 // Replicate ptrmask to fill entire pbits uintptr. 1072 // Doubling and truncating is fewer steps than 1073 // iterating by nb each time. (nb could be 1.) 1074 // Since we loaded typ.ptrdata/sys.PtrSize bits 1075 // but are pretending to have typ.size/sys.PtrSize, 1076 // there might be no replication necessary/possible. 1077 pbits = b 1078 endnb = nb 1079 if nb+nb <= maxBits { 1080 for endnb <= sys.PtrSize*8 { 1081 pbits |= pbits << endnb 1082 endnb += endnb 1083 } 1084 // Truncate to a multiple of original ptrmask. 1085 endnb = maxBits / nb * nb 1086 pbits &= 1<<endnb - 1 1087 b = pbits 1088 nb = endnb 1089 } 1090 1091 // Clear p and endp as sentinel for using pbits. 1092 // Checked during Phase 2 loop. 1093 p = nil 1094 endp = nil 1095 } else { 1096 // Ptrmask is larger. Read it multiple times. 1097 n := (typ.ptrdata/sys.PtrSize+7)/8 - 1 1098 endp = addb(ptrmask, n) 1099 endnb = typ.size/sys.PtrSize - n*8 1100 } 1101 } 1102 if p != nil { 1103 b = uintptr(*p) 1104 p = add1(p) 1105 nb = 8 1106 } 1107 1108 if typ.size == dataSize { 1109 // Single entry: can stop once we reach the non-pointer data. 1110 nw = typ.ptrdata / sys.PtrSize 1111 } else { 1112 // Repeated instances of typ in an array. 1113 // Have to process first N-1 entries in full, but can stop 1114 // once we reach the non-pointer data in the final entry. 1115 nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize 1116 } 1117 if nw == 0 { 1118 // No pointers! Caller was supposed to check. 1119 println("runtime: invalid type ", typ.string()) 1120 throw("heapBitsSetType: called with non-pointer type") 1121 return 1122 } 1123 if nw < 2 { 1124 // Must write at least 2 words, because the "no scan" 1125 // encoding doesn't take effect until the third word. 1126 nw = 2 1127 } 1128 1129 // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4). 1130 // The leading byte is special because it contains the bits for word 1, 1131 // which does not have the marked bits set. 1132 // The leading half-byte is special because it's a half a byte and must be 1133 // manipulated atomically. 1134 switch { 1135 default: 1136 throw("heapBitsSetType: unexpected shift") 1137 1138 case h.shift == 0: 1139 // Ptrmask and heap bitmap are aligned. 1140 // Handle first byte of bitmap specially. 1141 // 1142 // The first byte we write out covers the first four 1143 // words of the object. The scan/dead bit on the first 1144 // word must be set to scan since there are pointers 1145 // somewhere in the object. The scan/dead bit on the 1146 // second word is the checkmark, so we don't set it. 1147 // In all following words, we set the scan/dead 1148 // appropriately to indicate that the object contains 1149 // to the next 2-bit entry in the bitmap. 1150 // 1151 // TODO: It doesn't matter if we set the checkmark, so 1152 // maybe this case isn't needed any more. 1153 hb = b & bitPointerAll 1154 hb |= bitMarked | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift) 1155 if w += 4; w >= nw { 1156 goto Phase3 1157 } 1158 *hbitp = uint8(hb) 1159 hbitp = subtract1(hbitp) 1160 b >>= 4 1161 nb -= 4 1162 1163 case sys.PtrSize == 8 && h.shift == 2: 1164 // Ptrmask and heap bitmap are misaligned. 1165 // The bits for the first two words are in a byte shared with another object 1166 // and must be updated atomically. 1167 // NOTE(rsc): The atomic here may not be necessary. 1168 // We took care of 1-word and 2-word objects above, 1169 // so this is at least a 6-word object, so our start bits 1170 // are shared only with the type bits of another object, 1171 // not with its mark bit. Since there is only one allocation 1172 // from a given span at a time, we should be able to set 1173 // these bits non-atomically. Not worth the risk right now. 1174 hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift) 1175 // This is not noscan, so set the scan bit in the 1176 // first word. 1177 hb |= bitMarked << (2 * heapBitsShift) 1178 b >>= 2 1179 nb -= 2 1180 // Note: no bitMarker for second word because that's 1181 // the checkmark. 1182 if gcphase == _GCoff { 1183 *hbitp &^= uint8((bitPointer | bitMarked | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) 1184 *hbitp |= uint8(hb) 1185 } else { 1186 atomic.And8(hbitp, ^(uint8(bitPointer|bitMarked|bitPointer<<heapBitsShift) << (2 * heapBitsShift))) 1187 atomic.Or8(hbitp, uint8(hb)) 1188 } 1189 hbitp = subtract1(hbitp) 1190 if w += 2; w >= nw { 1191 // We know that there is more data, because we handled 2-word objects above. 1192 // This must be at least a 6-word object. If we're out of pointer words, 1193 // mark no scan in next bitmap byte and finish. 1194 hb = 0 1195 w += 4 1196 goto Phase3 1197 } 1198 } 1199 1200 // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap. 1201 // The loop computes the bits for that last write but does not execute the write; 1202 // it leaves the bits in hb for processing by phase 3. 1203 // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to 1204 // use in the first half of the loop right now, and then we only adjust nb explicitly 1205 // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop. 1206 nb -= 4 1207 for { 1208 // Emit bitmap byte. 1209 // b has at least nb+4 bits, with one exception: 1210 // if w+4 >= nw, then b has only nw-w bits, 1211 // but we'll stop at the break and then truncate 1212 // appropriately in Phase 3. 1213 hb = b & bitPointerAll 1214 hb |= bitMarkedAll 1215 if w += 4; w >= nw { 1216 break 1217 } 1218 *hbitp = uint8(hb) 1219 hbitp = subtract1(hbitp) 1220 b >>= 4 1221 1222 // Load more bits. b has nb right now. 1223 if p != endp { 1224 // Fast path: keep reading from ptrmask. 1225 // nb unmodified: we just loaded 8 bits, 1226 // and the next iteration will consume 8 bits, 1227 // leaving us with the same nb the next time we're here. 1228 if nb < 8 { 1229 b |= uintptr(*p) << nb 1230 p = add1(p) 1231 } else { 1232 // Reduce the number of bits in b. 1233 // This is important if we skipped 1234 // over a scalar tail, since nb could 1235 // be larger than the bit width of b. 1236 nb -= 8 1237 } 1238 } else if p == nil { 1239 // Almost as fast path: track bit count and refill from pbits. 1240 // For short repetitions. 1241 if nb < 8 { 1242 b |= pbits << nb 1243 nb += endnb 1244 } 1245 nb -= 8 // for next iteration 1246 } else { 1247 // Slow path: reached end of ptrmask. 1248 // Process final partial byte and rewind to start. 1249 b |= uintptr(*p) << nb 1250 nb += endnb 1251 if nb < 8 { 1252 b |= uintptr(*ptrmask) << nb 1253 p = add1(ptrmask) 1254 } else { 1255 nb -= 8 1256 p = ptrmask 1257 } 1258 } 1259 1260 // Emit bitmap byte. 1261 hb = b & bitPointerAll 1262 hb |= bitMarkedAll 1263 if w += 4; w >= nw { 1264 break 1265 } 1266 *hbitp = uint8(hb) 1267 hbitp = subtract1(hbitp) 1268 b >>= 4 1269 } 1270 1271 Phase3: 1272 // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries. 1273 if w > nw { 1274 // Counting the 4 entries in hb not yet written to memory, 1275 // there are more entries than possible pointer slots. 1276 // Discard the excess entries (can't be more than 3). 1277 mask := uintptr(1)<<(4-(w-nw)) - 1 1278 hb &= mask | mask<<4 // apply mask to both pointer bits and mark bits 1279 } 1280 1281 // Change nw from counting possibly-pointer words to total words in allocation. 1282 nw = size / sys.PtrSize 1283 1284 // Write whole bitmap bytes. 1285 // The first is hb, the rest are zero. 1286 if w <= nw { 1287 *hbitp = uint8(hb) 1288 hbitp = subtract1(hbitp) 1289 hb = 0 // for possible final half-byte below 1290 for w += 4; w <= nw; w += 4 { 1291 *hbitp = 0 1292 hbitp = subtract1(hbitp) 1293 } 1294 } 1295 1296 // Write final partial bitmap byte if any. 1297 // We know w > nw, or else we'd still be in the loop above. 1298 // It can be bigger only due to the 4 entries in hb that it counts. 1299 // If w == nw+4 then there's nothing left to do: we wrote all nw entries 1300 // and can discard the 4 sitting in hb. 1301 // But if w == nw+2, we need to write first two in hb. 1302 // The byte is shared with the next object so we may need an atomic. 1303 if w == nw+2 { 1304 if gcphase == _GCoff { 1305 *hbitp = *hbitp&^(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift) | uint8(hb) 1306 } else { 1307 atomic.And8(hbitp, ^uint8(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift)) 1308 atomic.Or8(hbitp, uint8(hb)) 1309 } 1310 } 1311 1312 Phase4: 1313 // Phase 4: all done, but perhaps double check. 1314 if doubleCheck { 1315 end := heapBitsForAddr(x + size) 1316 if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { 1317 println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size) 1318 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") 1319 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") 1320 h0 := heapBitsForAddr(x) 1321 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") 1322 print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n") 1323 throw("bad heapBitsSetType") 1324 } 1325 1326 // Double-check that bits to be written were written correctly. 1327 // Does not check that other bits were not written, unfortunately. 1328 h := heapBitsForAddr(x) 1329 nptr := typ.ptrdata / sys.PtrSize 1330 ndata := typ.size / sys.PtrSize 1331 count := dataSize / typ.size 1332 totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize 1333 for i := uintptr(0); i < size/sys.PtrSize; i++ { 1334 j := i % ndata 1335 var have, want uint8 1336 have = (*h.bitp >> h.shift) & (bitPointer | bitMarked) 1337 if i >= totalptr { 1338 want = 0 // deadmarker 1339 if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 { 1340 want = bitMarked 1341 } 1342 } else { 1343 if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { 1344 want |= bitPointer 1345 } 1346 if i != 1 { 1347 want |= bitMarked 1348 } else { 1349 have &^= bitMarked 1350 } 1351 } 1352 if have != want { 1353 println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size) 1354 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") 1355 print("kindGCProg=", typ.kind&kindGCProg != 0, "\n") 1356 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") 1357 h0 := heapBitsForAddr(x) 1358 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") 1359 print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n") 1360 print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n") 1361 println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want) 1362 if typ.kind&kindGCProg != 0 { 1363 println("GC program:") 1364 dumpGCProg(addb(typ.gcdata, 4)) 1365 } 1366 throw("bad heapBitsSetType") 1367 } 1368 h = h.next() 1369 } 1370 if ptrmask == debugPtrmask.data { 1371 unlock(&debugPtrmask.lock) 1372 } 1373 } 1374 } 1375 1376 // heapBitsSetTypeNoScan marks x as noscan by setting the first word 1377 // of x in the heap bitmap to scalar/dead. 1378 func heapBitsSetTypeNoScan(x uintptr) { 1379 h := heapBitsForAddr(uintptr(x)) 1380 *h.bitp &^= (bitPointer | bitMarked) << h.shift 1381 } 1382 1383 var debugPtrmask struct { 1384 lock mutex 1385 data *byte 1386 } 1387 1388 // heapBitsSetTypeGCProg implements heapBitsSetType using a GC program. 1389 // progSize is the size of the memory described by the program. 1390 // elemSize is the size of the element that the GC program describes (a prefix of). 1391 // dataSize is the total size of the intended data, a multiple of elemSize. 1392 // allocSize is the total size of the allocated memory. 1393 // 1394 // GC programs are only used for large allocations. 1395 // heapBitsSetType requires that allocSize is a multiple of 4 words, 1396 // so that the relevant bitmap bytes are not shared with surrounding 1397 // objects and need not be accessed with atomic instructions. 1398 func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) { 1399 if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 { 1400 // Alignment will be wrong. 1401 throw("heapBitsSetTypeGCProg: small allocation") 1402 } 1403 var totalBits uintptr 1404 if elemSize == dataSize { 1405 totalBits = runGCProg(prog, nil, h.bitp, 2) 1406 if totalBits*sys.PtrSize != progSize { 1407 println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize) 1408 throw("heapBitsSetTypeGCProg: unexpected bit count") 1409 } 1410 } else { 1411 count := dataSize / elemSize 1412 1413 // Piece together program trailer to run after prog that does: 1414 // literal(0) 1415 // repeat(1, elemSize-progSize-1) // zeros to fill element size 1416 // repeat(elemSize, count-1) // repeat that element for count 1417 // This zero-pads the data remaining in the first element and then 1418 // repeats that first element to fill the array. 1419 var trailer [40]byte // 3 varints (max 10 each) + some bytes 1420 i := 0 1421 if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 { 1422 // literal(0) 1423 trailer[i] = 0x01 1424 i++ 1425 trailer[i] = 0 1426 i++ 1427 if n > 1 { 1428 // repeat(1, n-1) 1429 trailer[i] = 0x81 1430 i++ 1431 n-- 1432 for ; n >= 0x80; n >>= 7 { 1433 trailer[i] = byte(n | 0x80) 1434 i++ 1435 } 1436 trailer[i] = byte(n) 1437 i++ 1438 } 1439 } 1440 // repeat(elemSize/ptrSize, count-1) 1441 trailer[i] = 0x80 1442 i++ 1443 n := elemSize / sys.PtrSize 1444 for ; n >= 0x80; n >>= 7 { 1445 trailer[i] = byte(n | 0x80) 1446 i++ 1447 } 1448 trailer[i] = byte(n) 1449 i++ 1450 n = count - 1 1451 for ; n >= 0x80; n >>= 7 { 1452 trailer[i] = byte(n | 0x80) 1453 i++ 1454 } 1455 trailer[i] = byte(n) 1456 i++ 1457 trailer[i] = 0 1458 i++ 1459 1460 runGCProg(prog, &trailer[0], h.bitp, 2) 1461 1462 // Even though we filled in the full array just now, 1463 // record that we only filled in up to the ptrdata of the 1464 // last element. This will cause the code below to 1465 // memclr the dead section of the final array element, 1466 // so that scanobject can stop early in the final element. 1467 totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize 1468 } 1469 endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4)) 1470 endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale)) 1471 memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc)) 1472 } 1473 1474 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog. 1475 // size the size of the region described by prog, in bytes. 1476 // The resulting bitvector will have no more than size/sys.PtrSize bits. 1477 func progToPointerMask(prog *byte, size uintptr) bitvector { 1478 n := (size/sys.PtrSize + 7) / 8 1479 x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] 1480 x[len(x)-1] = 0xa1 // overflow check sentinel 1481 n = runGCProg(prog, nil, &x[0], 1) 1482 if x[len(x)-1] != 0xa1 { 1483 throw("progToPointerMask: overflow") 1484 } 1485 return bitvector{int32(n), &x[0]} 1486 } 1487 1488 // Packed GC pointer bitmaps, aka GC programs. 1489 // 1490 // For large types containing arrays, the type information has a 1491 // natural repetition that can be encoded to save space in the 1492 // binary and in the memory representation of the type information. 1493 // 1494 // The encoding is a simple Lempel-Ziv style bytecode machine 1495 // with the following instructions: 1496 // 1497 // 00000000: stop 1498 // 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes 1499 // 10000000 n c: repeat the previous n bits c times; n, c are varints 1500 // 1nnnnnnn c: repeat the previous n bits c times; c is a varint 1501 1502 // runGCProg executes the GC program prog, and then trailer if non-nil, 1503 // writing to dst with entries of the given size. 1504 // If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst. 1505 // If size == 2, dst is the 2-bit heap bitmap, and writes move backward 1506 // starting at dst (because the heap bitmap does). In this case, the caller guarantees 1507 // that only whole bytes in dst need to be written. 1508 // 1509 // runGCProg returns the number of 1- or 2-bit entries written to memory. 1510 func runGCProg(prog, trailer, dst *byte, size int) uintptr { 1511 dstStart := dst 1512 1513 // Bits waiting to be written to memory. 1514 var bits uintptr 1515 var nbits uintptr 1516 1517 p := prog 1518 Run: 1519 for { 1520 // Flush accumulated full bytes. 1521 // The rest of the loop assumes that nbits <= 7. 1522 for ; nbits >= 8; nbits -= 8 { 1523 if size == 1 { 1524 *dst = uint8(bits) 1525 dst = add1(dst) 1526 bits >>= 8 1527 } else { 1528 v := bits&bitPointerAll | bitMarkedAll 1529 *dst = uint8(v) 1530 dst = subtract1(dst) 1531 bits >>= 4 1532 v = bits&bitPointerAll | bitMarkedAll 1533 *dst = uint8(v) 1534 dst = subtract1(dst) 1535 bits >>= 4 1536 } 1537 } 1538 1539 // Process one instruction. 1540 inst := uintptr(*p) 1541 p = add1(p) 1542 n := inst & 0x7F 1543 if inst&0x80 == 0 { 1544 // Literal bits; n == 0 means end of program. 1545 if n == 0 { 1546 // Program is over; continue in trailer if present. 1547 if trailer != nil { 1548 //println("trailer") 1549 p = trailer 1550 trailer = nil 1551 continue 1552 } 1553 //println("done") 1554 break Run 1555 } 1556 //println("lit", n, dst) 1557 nbyte := n / 8 1558 for i := uintptr(0); i < nbyte; i++ { 1559 bits |= uintptr(*p) << nbits 1560 p = add1(p) 1561 if size == 1 { 1562 *dst = uint8(bits) 1563 dst = add1(dst) 1564 bits >>= 8 1565 } else { 1566 v := bits&0xf | bitMarkedAll 1567 *dst = uint8(v) 1568 dst = subtract1(dst) 1569 bits >>= 4 1570 v = bits&0xf | bitMarkedAll 1571 *dst = uint8(v) 1572 dst = subtract1(dst) 1573 bits >>= 4 1574 } 1575 } 1576 if n %= 8; n > 0 { 1577 bits |= uintptr(*p) << nbits 1578 p = add1(p) 1579 nbits += n 1580 } 1581 continue Run 1582 } 1583 1584 // Repeat. If n == 0, it is encoded in a varint in the next bytes. 1585 if n == 0 { 1586 for off := uint(0); ; off += 7 { 1587 x := uintptr(*p) 1588 p = add1(p) 1589 n |= (x & 0x7F) << off 1590 if x&0x80 == 0 { 1591 break 1592 } 1593 } 1594 } 1595 1596 // Count is encoded in a varint in the next bytes. 1597 c := uintptr(0) 1598 for off := uint(0); ; off += 7 { 1599 x := uintptr(*p) 1600 p = add1(p) 1601 c |= (x & 0x7F) << off 1602 if x&0x80 == 0 { 1603 break 1604 } 1605 } 1606 c *= n // now total number of bits to copy 1607 1608 // If the number of bits being repeated is small, load them 1609 // into a register and use that register for the entire loop 1610 // instead of repeatedly reading from memory. 1611 // Handling fewer than 8 bits here makes the general loop simpler. 1612 // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add 1613 // the pattern to a bit buffer holding at most 7 bits (a partial byte) 1614 // it will not overflow. 1615 src := dst 1616 const maxBits = sys.PtrSize*8 - 7 1617 if n <= maxBits { 1618 // Start with bits in output buffer. 1619 pattern := bits 1620 npattern := nbits 1621 1622 // If we need more bits, fetch them from memory. 1623 if size == 1 { 1624 src = subtract1(src) 1625 for npattern < n { 1626 pattern <<= 8 1627 pattern |= uintptr(*src) 1628 src = subtract1(src) 1629 npattern += 8 1630 } 1631 } else { 1632 src = add1(src) 1633 for npattern < n { 1634 pattern <<= 4 1635 pattern |= uintptr(*src) & 0xf 1636 src = add1(src) 1637 npattern += 4 1638 } 1639 } 1640 1641 // We started with the whole bit output buffer, 1642 // and then we loaded bits from whole bytes. 1643 // Either way, we might now have too many instead of too few. 1644 // Discard the extra. 1645 if npattern > n { 1646 pattern >>= npattern - n 1647 npattern = n 1648 } 1649 1650 // Replicate pattern to at most maxBits. 1651 if npattern == 1 { 1652 // One bit being repeated. 1653 // If the bit is 1, make the pattern all 1s. 1654 // If the bit is 0, the pattern is already all 0s, 1655 // but we can claim that the number of bits 1656 // in the word is equal to the number we need (c), 1657 // because right shift of bits will zero fill. 1658 if pattern == 1 { 1659 pattern = 1<<maxBits - 1 1660 npattern = maxBits 1661 } else { 1662 npattern = c 1663 } 1664 } else { 1665 b := pattern 1666 nb := npattern 1667 if nb+nb <= maxBits { 1668 // Double pattern until the whole uintptr is filled. 1669 for nb <= sys.PtrSize*8 { 1670 b |= b << nb 1671 nb += nb 1672 } 1673 // Trim away incomplete copy of original pattern in high bits. 1674 // TODO(rsc): Replace with table lookup or loop on systems without divide? 1675 nb = maxBits / npattern * npattern 1676 b &= 1<<nb - 1 1677 pattern = b 1678 npattern = nb 1679 } 1680 } 1681 1682 // Add pattern to bit buffer and flush bit buffer, c/npattern times. 1683 // Since pattern contains >8 bits, there will be full bytes to flush 1684 // on each iteration. 1685 for ; c >= npattern; c -= npattern { 1686 bits |= pattern << nbits 1687 nbits += npattern 1688 if size == 1 { 1689 for nbits >= 8 { 1690 *dst = uint8(bits) 1691 dst = add1(dst) 1692 bits >>= 8 1693 nbits -= 8 1694 } 1695 } else { 1696 for nbits >= 4 { 1697 *dst = uint8(bits&0xf | bitMarkedAll) 1698 dst = subtract1(dst) 1699 bits >>= 4 1700 nbits -= 4 1701 } 1702 } 1703 } 1704 1705 // Add final fragment to bit buffer. 1706 if c > 0 { 1707 pattern &= 1<<c - 1 1708 bits |= pattern << nbits 1709 nbits += c 1710 } 1711 continue Run 1712 } 1713 1714 // Repeat; n too large to fit in a register. 1715 // Since nbits <= 7, we know the first few bytes of repeated data 1716 // are already written to memory. 1717 off := n - nbits // n > nbits because n > maxBits and nbits <= 7 1718 if size == 1 { 1719 // Leading src fragment. 1720 src = subtractb(src, (off+7)/8) 1721 if frag := off & 7; frag != 0 { 1722 bits |= uintptr(*src) >> (8 - frag) << nbits 1723 src = add1(src) 1724 nbits += frag 1725 c -= frag 1726 } 1727 // Main loop: load one byte, write another. 1728 // The bits are rotating through the bit buffer. 1729 for i := c / 8; i > 0; i-- { 1730 bits |= uintptr(*src) << nbits 1731 src = add1(src) 1732 *dst = uint8(bits) 1733 dst = add1(dst) 1734 bits >>= 8 1735 } 1736 // Final src fragment. 1737 if c %= 8; c > 0 { 1738 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1739 nbits += c 1740 } 1741 } else { 1742 // Leading src fragment. 1743 src = addb(src, (off+3)/4) 1744 if frag := off & 3; frag != 0 { 1745 bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits 1746 src = subtract1(src) 1747 nbits += frag 1748 c -= frag 1749 } 1750 // Main loop: load one byte, write another. 1751 // The bits are rotating through the bit buffer. 1752 for i := c / 4; i > 0; i-- { 1753 bits |= (uintptr(*src) & 0xf) << nbits 1754 src = subtract1(src) 1755 *dst = uint8(bits&0xf | bitMarkedAll) 1756 dst = subtract1(dst) 1757 bits >>= 4 1758 } 1759 // Final src fragment. 1760 if c %= 4; c > 0 { 1761 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1762 nbits += c 1763 } 1764 } 1765 } 1766 1767 // Write any final bits out, using full-byte writes, even for the final byte. 1768 var totalBits uintptr 1769 if size == 1 { 1770 totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits 1771 nbits += -nbits & 7 1772 for ; nbits > 0; nbits -= 8 { 1773 *dst = uint8(bits) 1774 dst = add1(dst) 1775 bits >>= 8 1776 } 1777 } else { 1778 totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits 1779 nbits += -nbits & 3 1780 for ; nbits > 0; nbits -= 4 { 1781 v := bits&0xf | bitMarkedAll 1782 *dst = uint8(v) 1783 dst = subtract1(dst) 1784 bits >>= 4 1785 } 1786 } 1787 return totalBits 1788 } 1789 1790 func dumpGCProg(p *byte) { 1791 nptr := 0 1792 for { 1793 x := *p 1794 p = add1(p) 1795 if x == 0 { 1796 print("\t", nptr, " end\n") 1797 break 1798 } 1799 if x&0x80 == 0 { 1800 print("\t", nptr, " lit ", x, ":") 1801 n := int(x+7) / 8 1802 for i := 0; i < n; i++ { 1803 print(" ", hex(*p)) 1804 p = add1(p) 1805 } 1806 print("\n") 1807 nptr += int(x) 1808 } else { 1809 nbit := int(x &^ 0x80) 1810 if nbit == 0 { 1811 for nb := uint(0); ; nb += 7 { 1812 x := *p 1813 p = add1(p) 1814 nbit |= int(x&0x7f) << nb 1815 if x&0x80 == 0 { 1816 break 1817 } 1818 } 1819 } 1820 count := 0 1821 for nb := uint(0); ; nb += 7 { 1822 x := *p 1823 p = add1(p) 1824 count |= int(x&0x7f) << nb 1825 if x&0x80 == 0 { 1826 break 1827 } 1828 } 1829 print("\t", nptr, " repeat ", nbit, " × ", count, "\n") 1830 nptr += nbit * count 1831 } 1832 } 1833 } 1834 1835 // Testing. 1836 1837 func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool { 1838 target := (*stkframe)(ctxt) 1839 if frame.sp <= target.sp && target.sp < frame.varp { 1840 *target = *frame 1841 return false 1842 } 1843 return true 1844 } 1845 1846 // gcbits returns the GC type info for x, for testing. 1847 // The result is the bitmap entries (0 or 1), one entry per byte. 1848 //go:linkname reflect_gcbits reflect.gcbits 1849 func reflect_gcbits(x interface{}) []byte { 1850 ret := getgcmask(x) 1851 typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem 1852 nptr := typ.ptrdata / sys.PtrSize 1853 for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 { 1854 ret = ret[:len(ret)-1] 1855 } 1856 return ret 1857 } 1858 1859 // Returns GC type info for object p for testing. 1860 func getgcmask(ep interface{}) (mask []byte) { 1861 e := *efaceOf(&ep) 1862 p := e.data 1863 t := e._type 1864 // data or bss 1865 for datap := &firstmoduledata; datap != nil; datap = datap.next { 1866 // data 1867 if datap.data <= uintptr(p) && uintptr(p) < datap.edata { 1868 bitmap := datap.gcdatamask.bytedata 1869 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 1870 mask = make([]byte, n/sys.PtrSize) 1871 for i := uintptr(0); i < n; i += sys.PtrSize { 1872 off := (uintptr(p) + i - datap.data) / sys.PtrSize 1873 mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1874 } 1875 return 1876 } 1877 1878 // bss 1879 if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss { 1880 bitmap := datap.gcbssmask.bytedata 1881 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 1882 mask = make([]byte, n/sys.PtrSize) 1883 for i := uintptr(0); i < n; i += sys.PtrSize { 1884 off := (uintptr(p) + i - datap.bss) / sys.PtrSize 1885 mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1886 } 1887 return 1888 } 1889 } 1890 1891 // heap 1892 var n uintptr 1893 var base uintptr 1894 if mlookup(uintptr(p), &base, &n, nil) != 0 { 1895 mask = make([]byte, n/sys.PtrSize) 1896 for i := uintptr(0); i < n; i += sys.PtrSize { 1897 hbits := heapBitsForAddr(base + i) 1898 if hbits.isPointer() { 1899 mask[i/sys.PtrSize] = 1 1900 } 1901 if i != 1*sys.PtrSize && !hbits.morePointers() { 1902 mask = mask[:i/sys.PtrSize] 1903 break 1904 } 1905 } 1906 return 1907 } 1908 1909 // stack 1910 if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi { 1911 var frame stkframe 1912 frame.sp = uintptr(p) 1913 _g_ := getg() 1914 gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0) 1915 if frame.fn != nil { 1916 f := frame.fn 1917 targetpc := frame.continpc 1918 if targetpc == 0 { 1919 return 1920 } 1921 // SPARC64's PC holds the address of the *current* instruction. 1922 if targetpc != f.entry && sys.GoarchSparc64 == 0 { 1923 targetpc-- 1924 } 1925 pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, nil) 1926 if pcdata == -1 { 1927 return 1928 } 1929 stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) 1930 if stkmap == nil || stkmap.n <= 0 { 1931 return 1932 } 1933 bv := stackmapdata(stkmap, pcdata) 1934 size := uintptr(bv.n) * sys.PtrSize 1935 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 1936 mask = make([]byte, n/sys.PtrSize) 1937 for i := uintptr(0); i < n; i += sys.PtrSize { 1938 bitmap := bv.bytedata 1939 off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize 1940 mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1941 } 1942 } 1943 return 1944 } 1945 1946 // otherwise, not something the GC knows about. 1947 // possibly read-only data, like malloc(0). 1948 // must not have pointers 1949 return 1950 }