github.com/zxy12/golang_with_comment@v0.0.0-20190701084843-0e6b2aff5ef3/runtime/mbitmap.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector: type and heap bitmaps. 6 // 7 // Stack, data, and bss bitmaps 8 // 9 // Stack frames and global variables in the data and bss sections are described 10 // by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer 11 // to be visited during GC. The bits in each byte are consumed starting with 12 // the low bit: 1<<0, 1<<1, and so on. 13 // 14 // Heap bitmap 15 // 16 // The allocated heap comes from a subset of the memory in the range [start, used), 17 // where start == mheap_.arena_start and used == mheap_.arena_used. 18 // The heap bitmap comprises 2 bits for each pointer-sized word in that range, 19 // stored in bytes indexed backward in memory from start. 20 // That is, the byte at address start-1 holds the 2-bit entries for the four words 21 // start through start+3*ptrSize, the byte at start-2 holds the entries for 22 // start+4*ptrSize through start+7*ptrSize, and so on. 23 // 24 // In each 2-bit entry, the lower bit holds the same information as in the 1-bit 25 // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. 26 // The meaning of the high bit depends on the position of the word being described 27 // in its allocated object. In all words *except* the second word, the 28 // high bit indicates that the object is still being described. In 29 // these words, if a bit pair with a high bit 0 is encountered, the 30 // low bit can also be assumed to be 0, and the object description is 31 // over. This 00 is called the ``dead'' encoding: it signals that the 32 // rest of the words in the object are uninteresting to the garbage 33 // collector. 34 // 35 // In the second word, the high bit is the GC ``checkmarked'' bit (see below). 36 // 37 // The 2-bit entries are split when written into the byte, so that the top half 38 // of the byte contains 4 high bits and the bottom half contains 4 low (pointer) 39 // bits. 40 // This form allows a copy from the 1-bit to the 4-bit form to keep the 41 // pointer bits contiguous, instead of having to space them out. 42 // 43 // The code makes use of the fact that the zero value for a heap bitmap 44 // has no live pointer bit set and is (depending on position), not used, 45 // not checkmarked, and is the dead encoding. 46 // These properties must be preserved when modifying the encoding. 47 // 48 // The bitmap for noscan spans is not maintained. Code must ensure 49 // that an object is scannable before consulting its bitmap by 50 // checking either the noscan bit in the span or by consulting its 51 // type's information. 52 // 53 // Checkmarks 54 // 55 // In a concurrent garbage collector, one worries about failing to mark 56 // a live object due to mutations without write barriers or bugs in the 57 // collector implementation. As a sanity check, the GC has a 'checkmark' 58 // mode that retraverses the object graph with the world stopped, to make 59 // sure that everything that should be marked is marked. 60 // In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry 61 // for the second word of the object holds the checkmark bit. 62 // When not in checkmark mode, this bit is set to 1. 63 // 64 // The smallest possible allocation is 8 bytes. On a 32-bit machine, that 65 // means every allocated object has two words, so there is room for the 66 // checkmark bit. On a 64-bit machine, however, the 8-byte allocation is 67 // just one word, so the second bit pair is not available for encoding the 68 // checkmark. However, because non-pointer allocations are combined 69 // into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation 70 // must be a pointer, so the type bit in the first word is not actually needed. 71 // It is still used in general, except in checkmark the type bit is repurposed 72 // as the checkmark bit and then reinitialized (to 1) as the type bit when 73 // finished. 74 // 75 76 package runtime 77 78 import ( 79 "runtime/internal/atomic" 80 "runtime/internal/sys" 81 "unsafe" 82 ) 83 84 const ( 85 bitPointer = 1 << 0 86 bitScan = 1 << 4 87 88 heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries 89 heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte 90 91 // all scan/pointer bits in a byte 92 bitScanAll = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift) 93 bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift) 94 ) 95 96 // addb returns the byte pointer p+n. 97 //go:nowritebarrier 98 //go:nosplit 99 func addb(p *byte, n uintptr) *byte { 100 // Note: wrote out full expression instead of calling add(p, n) 101 // to reduce the number of temporaries generated by the 102 // compiler for this trivial expression during inlining. 103 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) 104 } 105 106 // subtractb returns the byte pointer p-n. 107 // subtractb is typically used when traversing the pointer tables referred to by hbits 108 // which are arranged in reverse order. 109 //go:nowritebarrier 110 //go:nosplit 111 func subtractb(p *byte, n uintptr) *byte { 112 // Note: wrote out full expression instead of calling add(p, -n) 113 // to reduce the number of temporaries generated by the 114 // compiler for this trivial expression during inlining. 115 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n)) 116 } 117 118 // add1 returns the byte pointer p+1. 119 //go:nowritebarrier 120 //go:nosplit 121 func add1(p *byte) *byte { 122 // Note: wrote out full expression instead of calling addb(p, 1) 123 // to reduce the number of temporaries generated by the 124 // compiler for this trivial expression during inlining. 125 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) 126 } 127 128 // subtract1 returns the byte pointer p-1. 129 // subtract1 is typically used when traversing the pointer tables referred to by hbits 130 // which are arranged in reverse order. 131 //go:nowritebarrier 132 // 133 // nosplit because it is used during write barriers and must not be preempted. 134 //go:nosplit 135 func subtract1(p *byte) *byte { 136 // Note: wrote out full expression instead of calling subtractb(p, 1) 137 // to reduce the number of temporaries generated by the 138 // compiler for this trivial expression during inlining. 139 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) 140 } 141 142 // mapBits maps any additional bitmap memory needed for the new arena memory. 143 // 144 // Don't call this directly. Call mheap.setArenaUsed. 145 // 146 //go:nowritebarrier 147 func (h *mheap) mapBits(arena_used uintptr) { 148 // Caller has added extra mappings to the arena. 149 // Add extra mappings of bitmap words as needed. 150 // We allocate extra bitmap pieces in chunks of bitmapChunk. 151 const bitmapChunk = 8192 152 //println("mapBits - start:", "arena_used=", arena_used, "mheap_.arena_start=", mheap_.arena_start) 153 //println("heapBitmapScale=", heapBitmapScale, "sys.PtrSize=", sys.PtrSize) 154 n := (arena_used - mheap_.arena_start) / heapBitmapScale 155 156 //s := uintptr(1) 157 //println("round 1=", round((s), bitmapChunk)) 158 //println("round 1=", round((s), physPageSize)) 159 n = round(n, bitmapChunk) 160 n = round(n, physPageSize) 161 //println("h.bitmap_mapped=", h.bitmap_mapped, "n = ", n) 162 163 // bitmap_mapped 用来记录虚拟内存实际使用了多少 164 // n-bitmap_mapped 165 // | | 166 // |-------spans-------| ------- bitmap ------|====|====| ----------arena------------| 167 // bitmap-n 168 169 if h.bitmap_mapped >= n { 170 return 171 } 172 173 sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys) 174 h.bitmap_mapped = n 175 } 176 177 // heapBits provides access to the bitmap bits for a single heap word. 178 // The methods on heapBits take value receivers so that the compiler 179 // can more easily inline calls to those methods and registerize the 180 // struct fields independently. 181 type heapBits struct { 182 bitp *uint8 183 shift uint32 184 } 185 186 // markBits provides access to the mark bit for an object in the heap. 187 // bytep points to the byte holding the mark bit. 188 // mask is a byte with a single bit set that can be &ed with *bytep 189 // to see if the bit has been set. 190 // *m.byte&m.mask != 0 indicates the mark bit is set. 191 // index can be used along with span information to generate 192 // the address of the object in the heap. 193 // We maintain one set of mark bits for allocation and one for 194 // marking purposes. 195 type markBits struct { 196 bytep *uint8 197 mask uint8 198 index uintptr 199 } 200 201 //go:nosplit 202 func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits { 203 bytep, mask := s.allocBits.bitp(allocBitIndex) 204 return markBits{bytep, mask, allocBitIndex} 205 } 206 207 // refillaCache takes 8 bytes s.allocBits starting at whichByte 208 // and negates them so that ctz (count trailing zeros) instructions 209 // can be used. It then places these 8 bytes into the cached 64 bit 210 // s.allocCache. 211 func (s *mspan) refillAllocCache(whichByte uintptr) { 212 bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(whichByte))) 213 aCache := uint64(0) 214 aCache |= uint64(bytes[0]) 215 aCache |= uint64(bytes[1]) << (1 * 8) 216 aCache |= uint64(bytes[2]) << (2 * 8) 217 aCache |= uint64(bytes[3]) << (3 * 8) 218 aCache |= uint64(bytes[4]) << (4 * 8) 219 aCache |= uint64(bytes[5]) << (5 * 8) 220 aCache |= uint64(bytes[6]) << (6 * 8) 221 aCache |= uint64(bytes[7]) << (7 * 8) 222 s.allocCache = ^aCache 223 } 224 225 // nextFreeIndex returns the index of the next free object in s at 226 // or after s.freeindex. 227 // There are hardware instructions that can be used to make this 228 // faster if profiling warrants it. 229 func (s *mspan) nextFreeIndex() uintptr { 230 sfreeindex := s.freeindex 231 snelems := s.nelems 232 if sfreeindex == snelems { 233 return sfreeindex 234 } 235 if sfreeindex > snelems { 236 throw("s.freeindex > s.nelems") 237 } 238 239 aCache := s.allocCache 240 241 bitIndex := sys.Ctz64(aCache) 242 for bitIndex == 64 { 243 // Move index to start of next cached bits. 244 sfreeindex = (sfreeindex + 64) &^ (64 - 1) 245 if sfreeindex >= snelems { 246 s.freeindex = snelems 247 return snelems 248 } 249 whichByte := sfreeindex / 8 250 // Refill s.allocCache with the next 64 alloc bits. 251 s.refillAllocCache(whichByte) 252 aCache = s.allocCache 253 bitIndex = sys.Ctz64(aCache) 254 // nothing available in cached bits 255 // grab the next 8 bytes and try again. 256 } 257 result := sfreeindex + uintptr(bitIndex) 258 if result >= snelems { 259 s.freeindex = snelems 260 return snelems 261 } 262 263 s.allocCache >>= uint(bitIndex + 1) 264 sfreeindex = result + 1 265 266 if sfreeindex%64 == 0 && sfreeindex != snelems { 267 // We just incremented s.freeindex so it isn't 0. 268 // As each 1 in s.allocCache was encountered and used for allocation 269 // it was shifted away. At this point s.allocCache contains all 0s. 270 // Refill s.allocCache so that it corresponds 271 // to the bits at s.allocBits starting at s.freeindex. 272 whichByte := sfreeindex / 8 273 s.refillAllocCache(whichByte) 274 } 275 s.freeindex = sfreeindex 276 return result 277 } 278 279 // isFree returns whether the index'th object in s is unallocated. 280 func (s *mspan) isFree(index uintptr) bool { 281 if index < s.freeindex { 282 return false 283 } 284 bytep, mask := s.allocBits.bitp(index) 285 return *bytep&mask == 0 286 } 287 288 func (s *mspan) objIndex(p uintptr) uintptr { 289 byteOffset := p - s.base() 290 if byteOffset == 0 { 291 return 0 292 } 293 if s.baseMask != 0 { 294 // s.baseMask is 0, elemsize is a power of two, so shift by s.divShift 295 return byteOffset >> s.divShift 296 } 297 return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2) 298 } 299 300 func markBitsForAddr(p uintptr) markBits { 301 s := spanOf(p) 302 objIndex := s.objIndex(p) 303 return s.markBitsForIndex(objIndex) 304 } 305 306 func (s *mspan) markBitsForIndex(objIndex uintptr) markBits { 307 bytep, mask := s.gcmarkBits.bitp(objIndex) 308 return markBits{bytep, mask, objIndex} 309 } 310 311 func (s *mspan) markBitsForBase() markBits { 312 return markBits{(*uint8)(s.gcmarkBits), uint8(1), 0} 313 } 314 315 // isMarked reports whether mark bit m is set. 316 func (m markBits) isMarked() bool { 317 return *m.bytep&m.mask != 0 318 } 319 320 // setMarked sets the marked bit in the markbits, atomically. Some compilers 321 // are not able to inline atomic.Or8 function so if it appears as a hot spot consider 322 // inlining it manually. 323 func (m markBits) setMarked() { 324 // Might be racing with other updates, so use atomic update always. 325 // We used to be clever here and use a non-atomic update in certain 326 // cases, but it's not worth the risk. 327 atomic.Or8(m.bytep, m.mask) 328 } 329 330 // setMarkedNonAtomic sets the marked bit in the markbits, non-atomically. 331 func (m markBits) setMarkedNonAtomic() { 332 *m.bytep |= m.mask 333 } 334 335 // clearMarked clears the marked bit in the markbits, atomically. 336 func (m markBits) clearMarked() { 337 // Might be racing with other updates, so use atomic update always. 338 // We used to be clever here and use a non-atomic update in certain 339 // cases, but it's not worth the risk. 340 atomic.And8(m.bytep, ^m.mask) 341 } 342 343 // markBitsForSpan returns the markBits for the span base address base. 344 func markBitsForSpan(base uintptr) (mbits markBits) { 345 if base < mheap_.arena_start || base >= mheap_.arena_used { 346 throw("markBitsForSpan: base out of range") 347 } 348 mbits = markBitsForAddr(base) 349 if mbits.mask != 1 { 350 throw("markBitsForSpan: unaligned start") 351 } 352 return mbits 353 } 354 355 // advance advances the markBits to the next object in the span. 356 func (m *markBits) advance() { 357 if m.mask == 1<<7 { 358 m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1)) 359 m.mask = 1 360 } else { 361 m.mask = m.mask << 1 362 } 363 m.index++ 364 } 365 366 // heapBitsForAddr returns the heapBits for the address addr. 367 // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). 368 // 369 // nosplit because it is used during write barriers and must not be preempted. 370 //go:nosplit 371 func heapBitsForAddr(addr uintptr) heapBits { 372 // 2 bits per work, 4 pairs per byte, and a mask is hard coded. 373 off := (addr - mheap_.arena_start) / sys.PtrSize 374 return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)} 375 } 376 377 // heapBitsForSpan returns the heapBits for the span base address base. 378 func heapBitsForSpan(base uintptr) (hbits heapBits) { 379 if base < mheap_.arena_start || base >= mheap_.arena_used { 380 print("runtime: base ", hex(base), " not in range [", hex(mheap_.arena_start), ",", hex(mheap_.arena_used), ")\n") 381 throw("heapBitsForSpan: base out of range") 382 } 383 return heapBitsForAddr(base) 384 } 385 386 // heapBitsForObject returns the base address for the heap object 387 // containing the address p, the heapBits for base, 388 // the object's span, and of the index of the object in s. 389 // If p does not point into a heap object, 390 // return base == 0 391 // otherwise return the base of the object. 392 // 393 // refBase and refOff optionally give the base address of the object 394 // in which the pointer p was found and the byte offset at which it 395 // was found. These are used for error reporting. 396 func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) { 397 arenaStart := mheap_.arena_start 398 if p < arenaStart || p >= mheap_.arena_used { 399 return 400 } 401 off := p - arenaStart 402 idx := off >> _PageShift 403 // p points into the heap, but possibly to the middle of an object. 404 // Consult the span table to find the block beginning. 405 s = mheap_.spans[idx] 406 if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse { 407 if s == nil || s.state == _MSpanManual { 408 // If s is nil, the virtual address has never been part of the heap. 409 // This pointer may be to some mmap'd region, so we allow it. 410 // Pointers into stacks are also ok, the runtime manages these explicitly. 411 return 412 } 413 414 // The following ensures that we are rigorous about what data 415 // structures hold valid pointers. 416 if debug.invalidptr != 0 { 417 // Typically this indicates an incorrect use 418 // of unsafe or cgo to store a bad pointer in 419 // the Go heap. It may also indicate a runtime 420 // bug. 421 // 422 // TODO(austin): We could be more aggressive 423 // and detect pointers to unallocated objects 424 // in allocated spans. 425 printlock() 426 print("runtime: pointer ", hex(p)) 427 if s.state != mSpanInUse { 428 print(" to unallocated span") 429 } else { 430 print(" to unused region of span") 431 } 432 print(" idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n") 433 if refBase != 0 { 434 print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n") 435 gcDumpObject("object", refBase, refOff) 436 } 437 getg().m.traceback = 2 438 throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)") 439 } 440 return 441 } 442 // If this span holds object of a power of 2 size, just mask off the bits to 443 // the interior of the object. Otherwise use the size to get the base. 444 if s.baseMask != 0 { 445 // optimize for power of 2 sized objects. 446 base = s.base() 447 base = base + (p-base)&uintptr(s.baseMask) 448 objIndex = (base - s.base()) >> s.divShift 449 // base = p & s.baseMask is faster for small spans, 450 // but doesn't work for large spans. 451 // Overall, it's faster to use the more general computation above. 452 } else { 453 base = s.base() 454 if p-base >= s.elemsize { 455 // n := (p - base) / s.elemsize, using division by multiplication 456 objIndex = uintptr(p-base) >> s.divShift * uintptr(s.divMul) >> s.divShift2 457 base += objIndex * s.elemsize 458 } 459 } 460 // Now that we know the actual base, compute heapBits to return to caller. 461 hbits = heapBitsForAddr(base) 462 return 463 } 464 465 // prefetch the bits. 466 func (h heapBits) prefetch() { 467 prefetchnta(uintptr(unsafe.Pointer((h.bitp)))) 468 } 469 470 // next returns the heapBits describing the next pointer-sized word in memory. 471 // That is, if h describes address p, h.next() describes p+ptrSize. 472 // Note that next does not modify h. The caller must record the result. 473 // 474 // nosplit because it is used during write barriers and must not be preempted. 475 //go:nosplit 476 func (h heapBits) next() heapBits { 477 if h.shift < 3*heapBitsShift { 478 return heapBits{h.bitp, h.shift + heapBitsShift} 479 } 480 return heapBits{subtract1(h.bitp), 0} 481 } 482 483 // forward returns the heapBits describing n pointer-sized words ahead of h in memory. 484 // That is, if h describes address p, h.forward(n) describes p+n*ptrSize. 485 // h.forward(1) is equivalent to h.next(), just slower. 486 // Note that forward does not modify h. The caller must record the result. 487 // bits returns the heap bits for the current word. 488 func (h heapBits) forward(n uintptr) heapBits { 489 n += uintptr(h.shift) / heapBitsShift 490 return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift} 491 } 492 493 // The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer. 494 // The result includes in its higher bits the bits for subsequent words 495 // described by the same bitmap byte. 496 func (h heapBits) bits() uint32 { 497 // The (shift & 31) eliminates a test and conditional branch 498 // from the generated code. 499 return uint32(*h.bitp) >> (h.shift & 31) 500 } 501 502 // morePointers returns true if this word and all remaining words in this object 503 // are scalars. 504 // h must not describe the second word of the object. 505 func (h heapBits) morePointers() bool { 506 return h.bits()&bitScan != 0 507 } 508 509 // isPointer reports whether the heap bits describe a pointer word. 510 // 511 // nosplit because it is used during write barriers and must not be preempted. 512 //go:nosplit 513 func (h heapBits) isPointer() bool { 514 return h.bits()&bitPointer != 0 515 } 516 517 // isCheckmarked reports whether the heap bits have the checkmarked bit set. 518 // It must be told how large the object at h is, because the encoding of the 519 // checkmark bit varies by size. 520 // h must describe the initial word of the object. 521 func (h heapBits) isCheckmarked(size uintptr) bool { 522 if size == sys.PtrSize { 523 return (*h.bitp>>h.shift)&bitPointer != 0 524 } 525 // All multiword objects are 2-word aligned, 526 // so we know that the initial word's 2-bit pair 527 // and the second word's 2-bit pair are in the 528 // same heap bitmap byte, *h.bitp. 529 return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0 530 } 531 532 // setCheckmarked sets the checkmarked bit. 533 // It must be told how large the object at h is, because the encoding of the 534 // checkmark bit varies by size. 535 // h must describe the initial word of the object. 536 func (h heapBits) setCheckmarked(size uintptr) { 537 if size == sys.PtrSize { 538 atomic.Or8(h.bitp, bitPointer<<h.shift) 539 return 540 } 541 atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift)) 542 } 543 544 // bulkBarrierPreWrite executes writebarrierptr_prewrite1 545 // for every pointer slot in the memory range [src, src+size), 546 // using pointer/scalar information from [dst, dst+size). 547 // This executes the write barriers necessary before a memmove. 548 // src, dst, and size must be pointer-aligned. 549 // The range [dst, dst+size) must lie within a single object. 550 // 551 // As a special case, src == 0 indicates that this is being used for a 552 // memclr. bulkBarrierPreWrite will pass 0 for the src of each write 553 // barrier. 554 // 555 // Callers should call bulkBarrierPreWrite immediately before 556 // calling memmove(dst, src, size). This function is marked nosplit 557 // to avoid being preempted; the GC must not stop the goroutine 558 // between the memmove and the execution of the barriers. 559 // The caller is also responsible for cgo pointer checks if this 560 // may be writing Go pointers into non-Go memory. 561 // 562 // The pointer bitmap is not maintained for allocations containing 563 // no pointers at all; any caller of bulkBarrierPreWrite must first 564 // make sure the underlying allocation contains pointers, usually 565 // by checking typ.kind&kindNoPointers. 566 // 567 //go:nosplit 568 func bulkBarrierPreWrite(dst, src, size uintptr) { 569 if (dst|src|size)&(sys.PtrSize-1) != 0 { 570 throw("bulkBarrierPreWrite: unaligned arguments") 571 } 572 if !writeBarrier.needed { 573 return 574 } 575 if !inheap(dst) { 576 gp := getg().m.curg 577 if gp != nil && gp.stack.lo <= dst && dst < gp.stack.hi { 578 // Destination is our own stack. No need for barriers. 579 return 580 } 581 582 // If dst is a global, use the data or BSS bitmaps to 583 // execute write barriers. 584 for _, datap := range activeModules() { 585 if datap.data <= dst && dst < datap.edata { 586 bulkBarrierBitmap(dst, src, size, dst-datap.data, datap.gcdatamask.bytedata) 587 return 588 } 589 } 590 for _, datap := range activeModules() { 591 if datap.bss <= dst && dst < datap.ebss { 592 bulkBarrierBitmap(dst, src, size, dst-datap.bss, datap.gcbssmask.bytedata) 593 return 594 } 595 } 596 return 597 } 598 599 h := heapBitsForAddr(dst) 600 if src == 0 { 601 for i := uintptr(0); i < size; i += sys.PtrSize { 602 if h.isPointer() { 603 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 604 writebarrierptr_prewrite1(dstx, 0) 605 } 606 h = h.next() 607 } 608 } else { 609 for i := uintptr(0); i < size; i += sys.PtrSize { 610 if h.isPointer() { 611 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 612 srcx := (*uintptr)(unsafe.Pointer(src + i)) 613 writebarrierptr_prewrite1(dstx, *srcx) 614 } 615 h = h.next() 616 } 617 } 618 } 619 620 // bulkBarrierBitmap executes write barriers for copying from [src, 621 // src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is 622 // assumed to start maskOffset bytes into the data covered by the 623 // bitmap in bits (which may not be a multiple of 8). 624 // 625 // This is used by bulkBarrierPreWrite for writes to data and BSS. 626 // 627 //go:nosplit 628 func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { 629 word := maskOffset / sys.PtrSize 630 bits = addb(bits, word/8) 631 mask := uint8(1) << (word % 8) 632 633 for i := uintptr(0); i < size; i += sys.PtrSize { 634 if mask == 0 { 635 bits = addb(bits, 1) 636 if *bits == 0 { 637 // Skip 8 words. 638 i += 7 * sys.PtrSize 639 continue 640 } 641 mask = 1 642 } 643 if *bits&mask != 0 { 644 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 645 if src == 0 { 646 writebarrierptr_prewrite1(dstx, 0) 647 } else { 648 srcx := (*uintptr)(unsafe.Pointer(src + i)) 649 writebarrierptr_prewrite1(dstx, *srcx) 650 } 651 } 652 mask <<= 1 653 } 654 } 655 656 // typeBitsBulkBarrier executes writebarrierptr_prewrite for every 657 // pointer that would be copied from [src, src+size) to [dst, 658 // dst+size) by a memmove using the type bitmap to locate those 659 // pointer slots. 660 // 661 // The type typ must correspond exactly to [src, src+size) and [dst, dst+size). 662 // dst, src, and size must be pointer-aligned. 663 // The type typ must have a plain bitmap, not a GC program. 664 // The only use of this function is in channel sends, and the 665 // 64 kB channel element limit takes care of this for us. 666 // 667 // Must not be preempted because it typically runs right before memmove, 668 // and the GC must observe them as an atomic action. 669 // 670 //go:nosplit 671 func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) { 672 if typ == nil { 673 throw("runtime: typeBitsBulkBarrier without type") 674 } 675 if typ.size != size { 676 println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size) 677 throw("runtime: invalid typeBitsBulkBarrier") 678 } 679 if typ.kind&kindGCProg != 0 { 680 println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog") 681 throw("runtime: invalid typeBitsBulkBarrier") 682 } 683 if !writeBarrier.needed { 684 return 685 } 686 ptrmask := typ.gcdata 687 var bits uint32 688 for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize { 689 if i&(sys.PtrSize*8-1) == 0 { 690 bits = uint32(*ptrmask) 691 ptrmask = addb(ptrmask, 1) 692 } else { 693 bits = bits >> 1 694 } 695 if bits&1 != 0 { 696 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 697 srcx := (*uintptr)(unsafe.Pointer(src + i)) 698 writebarrierptr_prewrite(dstx, *srcx) 699 } 700 } 701 } 702 703 // The methods operating on spans all require that h has been returned 704 // by heapBitsForSpan and that size, n, total are the span layout description 705 // returned by the mspan's layout method. 706 // If total > size*n, it means that there is extra leftover memory in the span, 707 // usually due to rounding. 708 // 709 // TODO(rsc): Perhaps introduce a different heapBitsSpan type. 710 711 // initSpan initializes the heap bitmap for a span. 712 // It clears all checkmark bits. 713 // If this is a span of pointer-sized objects, it initializes all 714 // words to pointer/scan. 715 // Otherwise, it initializes all words to scalar/dead. 716 func (h heapBits) initSpan(s *mspan) { 717 size, n, total := s.layout() 718 719 // Init the markbit structures 720 s.freeindex = 0 721 s.allocCache = ^uint64(0) // all 1s indicating all free. 722 s.nelems = n 723 s.allocBits = nil 724 s.gcmarkBits = nil 725 s.gcmarkBits = newMarkBits(s.nelems) 726 s.allocBits = newAllocBits(s.nelems) 727 728 // Clear bits corresponding to objects. 729 if total%heapBitmapScale != 0 { 730 throw("initSpan: unaligned length") 731 } 732 nbyte := total / heapBitmapScale 733 if sys.PtrSize == 8 && size == sys.PtrSize { 734 end := h.bitp 735 bitp := subtractb(end, nbyte-1) 736 for { 737 *bitp = bitPointerAll | bitScanAll 738 if bitp == end { 739 break 740 } 741 bitp = add1(bitp) 742 } 743 return 744 } 745 memclrNoHeapPointers(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte) 746 } 747 748 // initCheckmarkSpan initializes a span for being checkmarked. 749 // It clears the checkmark bits, which are set to 1 in normal operation. 750 func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { 751 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. 752 if sys.PtrSize == 8 && size == sys.PtrSize { 753 // Checkmark bit is type bit, bottom bit of every 2-bit entry. 754 // Only possible on 64-bit system, since minimum size is 8. 755 // Must clear type bit (checkmark bit) of every word. 756 // The type bit is the lower of every two-bit pair. 757 bitp := h.bitp 758 for i := uintptr(0); i < n; i += 4 { 759 *bitp &^= bitPointerAll 760 bitp = subtract1(bitp) 761 } 762 return 763 } 764 for i := uintptr(0); i < n; i++ { 765 *h.bitp &^= bitScan << (heapBitsShift + h.shift) 766 h = h.forward(size / sys.PtrSize) 767 } 768 } 769 770 // clearCheckmarkSpan undoes all the checkmarking in a span. 771 // The actual checkmark bits are ignored, so the only work to do 772 // is to fix the pointer bits. (Pointer bits are ignored by scanobject 773 // but consulted by typedmemmove.) 774 func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { 775 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. 776 if sys.PtrSize == 8 && size == sys.PtrSize { 777 // Checkmark bit is type bit, bottom bit of every 2-bit entry. 778 // Only possible on 64-bit system, since minimum size is 8. 779 // Must clear type bit (checkmark bit) of every word. 780 // The type bit is the lower of every two-bit pair. 781 bitp := h.bitp 782 for i := uintptr(0); i < n; i += 4 { 783 *bitp |= bitPointerAll 784 bitp = subtract1(bitp) 785 } 786 } 787 } 788 789 // oneBitCount is indexed by byte and produces the 790 // number of 1 bits in that byte. For example 128 has 1 bit set 791 // and oneBitCount[128] will holds 1. 792 var oneBitCount = [256]uint8{ 793 0, 1, 1, 2, 1, 2, 2, 3, 794 1, 2, 2, 3, 2, 3, 3, 4, 795 1, 2, 2, 3, 2, 3, 3, 4, 796 2, 3, 3, 4, 3, 4, 4, 5, 797 1, 2, 2, 3, 2, 3, 3, 4, 798 2, 3, 3, 4, 3, 4, 4, 5, 799 2, 3, 3, 4, 3, 4, 4, 5, 800 3, 4, 4, 5, 4, 5, 5, 6, 801 1, 2, 2, 3, 2, 3, 3, 4, 802 2, 3, 3, 4, 3, 4, 4, 5, 803 2, 3, 3, 4, 3, 4, 4, 5, 804 3, 4, 4, 5, 4, 5, 5, 6, 805 2, 3, 3, 4, 3, 4, 4, 5, 806 3, 4, 4, 5, 4, 5, 5, 6, 807 3, 4, 4, 5, 4, 5, 5, 6, 808 4, 5, 5, 6, 5, 6, 6, 7, 809 1, 2, 2, 3, 2, 3, 3, 4, 810 2, 3, 3, 4, 3, 4, 4, 5, 811 2, 3, 3, 4, 3, 4, 4, 5, 812 3, 4, 4, 5, 4, 5, 5, 6, 813 2, 3, 3, 4, 3, 4, 4, 5, 814 3, 4, 4, 5, 4, 5, 5, 6, 815 3, 4, 4, 5, 4, 5, 5, 6, 816 4, 5, 5, 6, 5, 6, 6, 7, 817 2, 3, 3, 4, 3, 4, 4, 5, 818 3, 4, 4, 5, 4, 5, 5, 6, 819 3, 4, 4, 5, 4, 5, 5, 6, 820 4, 5, 5, 6, 5, 6, 6, 7, 821 3, 4, 4, 5, 4, 5, 5, 6, 822 4, 5, 5, 6, 5, 6, 6, 7, 823 4, 5, 5, 6, 5, 6, 6, 7, 824 5, 6, 6, 7, 6, 7, 7, 8} 825 826 // countAlloc returns the number of objects allocated in span s by 827 // scanning the allocation bitmap. 828 // TODO:(rlh) Use popcount intrinsic. 829 func (s *mspan) countAlloc() int { 830 count := 0 831 maxIndex := s.nelems / 8 832 for i := uintptr(0); i < maxIndex; i++ { 833 mrkBits := *s.gcmarkBits.bytep(i) 834 count += int(oneBitCount[mrkBits]) 835 } 836 if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 { 837 mrkBits := *s.gcmarkBits.bytep(maxIndex) 838 mask := uint8((1 << bitsInLastByte) - 1) 839 bits := mrkBits & mask 840 count += int(oneBitCount[bits]) 841 } 842 return count 843 } 844 845 // heapBitsSetType records that the new allocation [x, x+size) 846 // holds in [x, x+dataSize) one or more values of type typ. 847 // (The number of values is given by dataSize / typ.size.) 848 // If dataSize < size, the fragment [x+dataSize, x+size) is 849 // recorded as non-pointer data. 850 // It is known that the type has pointers somewhere; 851 // malloc does not call heapBitsSetType when there are no pointers, 852 // because all free objects are marked as noscan during 853 // heapBitsSweepSpan. 854 // 855 // There can only be one allocation from a given span active at a time, 856 // and the bitmap for a span always falls on byte boundaries, 857 // so there are no write-write races for access to the heap bitmap. 858 // Hence, heapBitsSetType can access the bitmap without atomics. 859 // 860 // There can be read-write races between heapBitsSetType and things 861 // that read the heap bitmap like scanobject. However, since 862 // heapBitsSetType is only used for objects that have not yet been 863 // made reachable, readers will ignore bits being modified by this 864 // function. This does mean this function cannot transiently modify 865 // bits that belong to neighboring objects. Also, on weakly-ordered 866 // machines, callers must execute a store/store (publication) barrier 867 // between calling this function and making the object reachable. 868 func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { 869 const doubleCheck = false // slow but helpful; enable to test modifications to this code 870 871 // dataSize is always size rounded up to the next malloc size class, 872 // except in the case of allocating a defer block, in which case 873 // size is sizeof(_defer{}) (at least 6 words) and dataSize may be 874 // arbitrarily larger. 875 // 876 // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore 877 // assume that dataSize == size without checking it explicitly. 878 879 if sys.PtrSize == 8 && size == sys.PtrSize { 880 // It's one word and it has pointers, it must be a pointer. 881 // Since all allocated one-word objects are pointers 882 // (non-pointers are aggregated into tinySize allocations), 883 // initSpan sets the pointer bits for us. Nothing to do here. 884 if doubleCheck { 885 h := heapBitsForAddr(x) 886 if !h.isPointer() { 887 throw("heapBitsSetType: pointer bit missing") 888 } 889 if !h.morePointers() { 890 throw("heapBitsSetType: scan bit missing") 891 } 892 } 893 return 894 } 895 896 h := heapBitsForAddr(x) 897 ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below) 898 899 // Heap bitmap bits for 2-word object are only 4 bits, 900 // so also shared with objects next to it. 901 // This is called out as a special case primarily for 32-bit systems, 902 // so that on 32-bit systems the code below can assume all objects 903 // are 4-word aligned (because they're all 16-byte aligned). 904 if size == 2*sys.PtrSize { 905 if typ.size == sys.PtrSize { 906 // We're allocating a block big enough to hold two pointers. 907 // On 64-bit, that means the actual object must be two pointers, 908 // or else we'd have used the one-pointer-sized block. 909 // On 32-bit, however, this is the 8-byte block, the smallest one. 910 // So it could be that we're allocating one pointer and this was 911 // just the smallest block available. Distinguish by checking dataSize. 912 // (In general the number of instances of typ being allocated is 913 // dataSize/typ.size.) 914 if sys.PtrSize == 4 && dataSize == sys.PtrSize { 915 // 1 pointer object. On 32-bit machines clear the bit for the 916 // unused second word. 917 *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift 918 *h.bitp |= (bitPointer | bitScan) << h.shift 919 } else { 920 // 2-element slice of pointer. 921 *h.bitp |= (bitPointer | bitScan | bitPointer<<heapBitsShift) << h.shift 922 } 923 return 924 } 925 // Otherwise typ.size must be 2*sys.PtrSize, 926 // and typ.kind&kindGCProg == 0. 927 if doubleCheck { 928 if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 { 929 print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n") 930 throw("heapBitsSetType") 931 } 932 } 933 b := uint32(*ptrmask) 934 hb := (b & 3) | bitScan 935 // bitPointer == 1, bitScan is 1 << 4, heapBitsShift is 1. 936 // 110011 is shifted h.shift and complemented. 937 // This clears out the bits that are about to be 938 // ored into *h.hbitp in the next instructions. 939 *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift 940 *h.bitp |= uint8(hb << h.shift) 941 return 942 } 943 944 // Copy from 1-bit ptrmask into 2-bit bitmap. 945 // The basic approach is to use a single uintptr as a bit buffer, 946 // alternating between reloading the buffer and writing bitmap bytes. 947 // In general, one load can supply two bitmap byte writes. 948 // This is a lot of lines of code, but it compiles into relatively few 949 // machine instructions. 950 951 var ( 952 // Ptrmask input. 953 p *byte // last ptrmask byte read 954 b uintptr // ptrmask bits already loaded 955 nb uintptr // number of bits in b at next read 956 endp *byte // final ptrmask byte to read (then repeat) 957 endnb uintptr // number of valid bits in *endp 958 pbits uintptr // alternate source of bits 959 960 // Heap bitmap output. 961 w uintptr // words processed 962 nw uintptr // number of words to process 963 hbitp *byte // next heap bitmap byte to write 964 hb uintptr // bits being prepared for *hbitp 965 ) 966 967 hbitp = h.bitp 968 969 // Handle GC program. Delayed until this part of the code 970 // so that we can use the same double-checking mechanism 971 // as the 1-bit case. Nothing above could have encountered 972 // GC programs: the cases were all too small. 973 if typ.kind&kindGCProg != 0 { 974 heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4)) 975 if doubleCheck { 976 // Double-check the heap bits written by GC program 977 // by running the GC program to create a 1-bit pointer mask 978 // and then jumping to the double-check code below. 979 // This doesn't catch bugs shared between the 1-bit and 4-bit 980 // GC program execution, but it does catch mistakes specific 981 // to just one of those and bugs in heapBitsSetTypeGCProg's 982 // implementation of arrays. 983 lock(&debugPtrmask.lock) 984 if debugPtrmask.data == nil { 985 debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys)) 986 } 987 ptrmask = debugPtrmask.data 988 runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1) 989 goto Phase4 990 } 991 return 992 } 993 994 // Note about sizes: 995 // 996 // typ.size is the number of words in the object, 997 // and typ.ptrdata is the number of words in the prefix 998 // of the object that contains pointers. That is, the final 999 // typ.size - typ.ptrdata words contain no pointers. 1000 // This allows optimization of a common pattern where 1001 // an object has a small header followed by a large scalar 1002 // buffer. If we know the pointers are over, we don't have 1003 // to scan the buffer's heap bitmap at all. 1004 // The 1-bit ptrmasks are sized to contain only bits for 1005 // the typ.ptrdata prefix, zero padded out to a full byte 1006 // of bitmap. This code sets nw (below) so that heap bitmap 1007 // bits are only written for the typ.ptrdata prefix; if there is 1008 // more room in the allocated object, the next heap bitmap 1009 // entry is a 00, indicating that there are no more pointers 1010 // to scan. So only the ptrmask for the ptrdata bytes is needed. 1011 // 1012 // Replicated copies are not as nice: if there is an array of 1013 // objects with scalar tails, all but the last tail does have to 1014 // be initialized, because there is no way to say "skip forward". 1015 // However, because of the possibility of a repeated type with 1016 // size not a multiple of 4 pointers (one heap bitmap byte), 1017 // the code already must handle the last ptrmask byte specially 1018 // by treating it as containing only the bits for endnb pointers, 1019 // where endnb <= 4. We represent large scalar tails that must 1020 // be expanded in the replication by setting endnb larger than 4. 1021 // This will have the effect of reading many bits out of b, 1022 // but once the real bits are shifted out, b will supply as many 1023 // zero bits as we try to read, which is exactly what we need. 1024 1025 p = ptrmask 1026 if typ.size < dataSize { 1027 // Filling in bits for an array of typ. 1028 // Set up for repetition of ptrmask during main loop. 1029 // Note that ptrmask describes only a prefix of 1030 const maxBits = sys.PtrSize*8 - 7 1031 if typ.ptrdata/sys.PtrSize <= maxBits { 1032 // Entire ptrmask fits in uintptr with room for a byte fragment. 1033 // Load into pbits and never read from ptrmask again. 1034 // This is especially important when the ptrmask has 1035 // fewer than 8 bits in it; otherwise the reload in the middle 1036 // of the Phase 2 loop would itself need to loop to gather 1037 // at least 8 bits. 1038 1039 // Accumulate ptrmask into b. 1040 // ptrmask is sized to describe only typ.ptrdata, but we record 1041 // it as describing typ.size bytes, since all the high bits are zero. 1042 nb = typ.ptrdata / sys.PtrSize 1043 for i := uintptr(0); i < nb; i += 8 { 1044 b |= uintptr(*p) << i 1045 p = add1(p) 1046 } 1047 nb = typ.size / sys.PtrSize 1048 1049 // Replicate ptrmask to fill entire pbits uintptr. 1050 // Doubling and truncating is fewer steps than 1051 // iterating by nb each time. (nb could be 1.) 1052 // Since we loaded typ.ptrdata/sys.PtrSize bits 1053 // but are pretending to have typ.size/sys.PtrSize, 1054 // there might be no replication necessary/possible. 1055 pbits = b 1056 endnb = nb 1057 if nb+nb <= maxBits { 1058 for endnb <= sys.PtrSize*8 { 1059 pbits |= pbits << endnb 1060 endnb += endnb 1061 } 1062 // Truncate to a multiple of original ptrmask. 1063 // Because nb+nb <= maxBits, nb fits in a byte. 1064 // Byte division is cheaper than uintptr division. 1065 endnb = uintptr(maxBits/byte(nb)) * nb 1066 pbits &= 1<<endnb - 1 1067 b = pbits 1068 nb = endnb 1069 } 1070 1071 // Clear p and endp as sentinel for using pbits. 1072 // Checked during Phase 2 loop. 1073 p = nil 1074 endp = nil 1075 } else { 1076 // Ptrmask is larger. Read it multiple times. 1077 n := (typ.ptrdata/sys.PtrSize+7)/8 - 1 1078 endp = addb(ptrmask, n) 1079 endnb = typ.size/sys.PtrSize - n*8 1080 } 1081 } 1082 if p != nil { 1083 b = uintptr(*p) 1084 p = add1(p) 1085 nb = 8 1086 } 1087 1088 if typ.size == dataSize { 1089 // Single entry: can stop once we reach the non-pointer data. 1090 nw = typ.ptrdata / sys.PtrSize 1091 } else { 1092 // Repeated instances of typ in an array. 1093 // Have to process first N-1 entries in full, but can stop 1094 // once we reach the non-pointer data in the final entry. 1095 nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize 1096 } 1097 if nw == 0 { 1098 // No pointers! Caller was supposed to check. 1099 println("runtime: invalid type ", typ.string()) 1100 throw("heapBitsSetType: called with non-pointer type") 1101 return 1102 } 1103 if nw < 2 { 1104 // Must write at least 2 words, because the "no scan" 1105 // encoding doesn't take effect until the third word. 1106 nw = 2 1107 } 1108 1109 // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4). 1110 // The leading byte is special because it contains the bits for word 1, 1111 // which does not have the scan bit set. 1112 // The leading half-byte is special because it's a half a byte, 1113 // so we have to be careful with the bits already there. 1114 switch { 1115 default: 1116 throw("heapBitsSetType: unexpected shift") 1117 1118 case h.shift == 0: 1119 // Ptrmask and heap bitmap are aligned. 1120 // Handle first byte of bitmap specially. 1121 // 1122 // The first byte we write out covers the first four 1123 // words of the object. The scan/dead bit on the first 1124 // word must be set to scan since there are pointers 1125 // somewhere in the object. The scan/dead bit on the 1126 // second word is the checkmark, so we don't set it. 1127 // In all following words, we set the scan/dead 1128 // appropriately to indicate that the object contains 1129 // to the next 2-bit entry in the bitmap. 1130 // 1131 // TODO: It doesn't matter if we set the checkmark, so 1132 // maybe this case isn't needed any more. 1133 hb = b & bitPointerAll 1134 hb |= bitScan | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift) 1135 if w += 4; w >= nw { 1136 goto Phase3 1137 } 1138 *hbitp = uint8(hb) 1139 hbitp = subtract1(hbitp) 1140 b >>= 4 1141 nb -= 4 1142 1143 case sys.PtrSize == 8 && h.shift == 2: 1144 // Ptrmask and heap bitmap are misaligned. 1145 // The bits for the first two words are in a byte shared 1146 // with another object, so we must be careful with the bits 1147 // already there. 1148 // We took care of 1-word and 2-word objects above, 1149 // so this is at least a 6-word object. 1150 hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift) 1151 // This is not noscan, so set the scan bit in the 1152 // first word. 1153 hb |= bitScan << (2 * heapBitsShift) 1154 b >>= 2 1155 nb -= 2 1156 // Note: no bitScan for second word because that's 1157 // the checkmark. 1158 *hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) 1159 *hbitp |= uint8(hb) 1160 hbitp = subtract1(hbitp) 1161 if w += 2; w >= nw { 1162 // We know that there is more data, because we handled 2-word objects above. 1163 // This must be at least a 6-word object. If we're out of pointer words, 1164 // mark no scan in next bitmap byte and finish. 1165 hb = 0 1166 w += 4 1167 goto Phase3 1168 } 1169 } 1170 1171 // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap. 1172 // The loop computes the bits for that last write but does not execute the write; 1173 // it leaves the bits in hb for processing by phase 3. 1174 // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to 1175 // use in the first half of the loop right now, and then we only adjust nb explicitly 1176 // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop. 1177 nb -= 4 1178 for { 1179 // Emit bitmap byte. 1180 // b has at least nb+4 bits, with one exception: 1181 // if w+4 >= nw, then b has only nw-w bits, 1182 // but we'll stop at the break and then truncate 1183 // appropriately in Phase 3. 1184 hb = b & bitPointerAll 1185 hb |= bitScanAll 1186 if w += 4; w >= nw { 1187 break 1188 } 1189 *hbitp = uint8(hb) 1190 hbitp = subtract1(hbitp) 1191 b >>= 4 1192 1193 // Load more bits. b has nb right now. 1194 if p != endp { 1195 // Fast path: keep reading from ptrmask. 1196 // nb unmodified: we just loaded 8 bits, 1197 // and the next iteration will consume 8 bits, 1198 // leaving us with the same nb the next time we're here. 1199 if nb < 8 { 1200 b |= uintptr(*p) << nb 1201 p = add1(p) 1202 } else { 1203 // Reduce the number of bits in b. 1204 // This is important if we skipped 1205 // over a scalar tail, since nb could 1206 // be larger than the bit width of b. 1207 nb -= 8 1208 } 1209 } else if p == nil { 1210 // Almost as fast path: track bit count and refill from pbits. 1211 // For short repetitions. 1212 if nb < 8 { 1213 b |= pbits << nb 1214 nb += endnb 1215 } 1216 nb -= 8 // for next iteration 1217 } else { 1218 // Slow path: reached end of ptrmask. 1219 // Process final partial byte and rewind to start. 1220 b |= uintptr(*p) << nb 1221 nb += endnb 1222 if nb < 8 { 1223 b |= uintptr(*ptrmask) << nb 1224 p = add1(ptrmask) 1225 } else { 1226 nb -= 8 1227 p = ptrmask 1228 } 1229 } 1230 1231 // Emit bitmap byte. 1232 hb = b & bitPointerAll 1233 hb |= bitScanAll 1234 if w += 4; w >= nw { 1235 break 1236 } 1237 *hbitp = uint8(hb) 1238 hbitp = subtract1(hbitp) 1239 b >>= 4 1240 } 1241 1242 Phase3: 1243 // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries. 1244 if w > nw { 1245 // Counting the 4 entries in hb not yet written to memory, 1246 // there are more entries than possible pointer slots. 1247 // Discard the excess entries (can't be more than 3). 1248 mask := uintptr(1)<<(4-(w-nw)) - 1 1249 hb &= mask | mask<<4 // apply mask to both pointer bits and scan bits 1250 } 1251 1252 // Change nw from counting possibly-pointer words to total words in allocation. 1253 nw = size / sys.PtrSize 1254 1255 // Write whole bitmap bytes. 1256 // The first is hb, the rest are zero. 1257 if w <= nw { 1258 *hbitp = uint8(hb) 1259 hbitp = subtract1(hbitp) 1260 hb = 0 // for possible final half-byte below 1261 for w += 4; w <= nw; w += 4 { 1262 *hbitp = 0 1263 hbitp = subtract1(hbitp) 1264 } 1265 } 1266 1267 // Write final partial bitmap byte if any. 1268 // We know w > nw, or else we'd still be in the loop above. 1269 // It can be bigger only due to the 4 entries in hb that it counts. 1270 // If w == nw+4 then there's nothing left to do: we wrote all nw entries 1271 // and can discard the 4 sitting in hb. 1272 // But if w == nw+2, we need to write first two in hb. 1273 // The byte is shared with the next object, so be careful with 1274 // existing bits. 1275 if w == nw+2 { 1276 *hbitp = *hbitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | uint8(hb) 1277 } 1278 1279 Phase4: 1280 // Phase 4: all done, but perhaps double check. 1281 if doubleCheck { 1282 end := heapBitsForAddr(x + size) 1283 if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { 1284 println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size) 1285 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") 1286 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") 1287 h0 := heapBitsForAddr(x) 1288 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") 1289 print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n") 1290 throw("bad heapBitsSetType") 1291 } 1292 1293 // Double-check that bits to be written were written correctly. 1294 // Does not check that other bits were not written, unfortunately. 1295 h := heapBitsForAddr(x) 1296 nptr := typ.ptrdata / sys.PtrSize 1297 ndata := typ.size / sys.PtrSize 1298 count := dataSize / typ.size 1299 totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize 1300 for i := uintptr(0); i < size/sys.PtrSize; i++ { 1301 j := i % ndata 1302 var have, want uint8 1303 have = (*h.bitp >> h.shift) & (bitPointer | bitScan) 1304 if i >= totalptr { 1305 want = 0 // deadmarker 1306 if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 { 1307 want = bitScan 1308 } 1309 } else { 1310 if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { 1311 want |= bitPointer 1312 } 1313 if i != 1 { 1314 want |= bitScan 1315 } else { 1316 have &^= bitScan 1317 } 1318 } 1319 if have != want { 1320 println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size) 1321 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") 1322 print("kindGCProg=", typ.kind&kindGCProg != 0, "\n") 1323 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") 1324 h0 := heapBitsForAddr(x) 1325 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") 1326 print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n") 1327 print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n") 1328 println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want) 1329 if typ.kind&kindGCProg != 0 { 1330 println("GC program:") 1331 dumpGCProg(addb(typ.gcdata, 4)) 1332 } 1333 throw("bad heapBitsSetType") 1334 } 1335 h = h.next() 1336 } 1337 if ptrmask == debugPtrmask.data { 1338 unlock(&debugPtrmask.lock) 1339 } 1340 } 1341 } 1342 1343 var debugPtrmask struct { 1344 lock mutex 1345 data *byte 1346 } 1347 1348 // heapBitsSetTypeGCProg implements heapBitsSetType using a GC program. 1349 // progSize is the size of the memory described by the program. 1350 // elemSize is the size of the element that the GC program describes (a prefix of). 1351 // dataSize is the total size of the intended data, a multiple of elemSize. 1352 // allocSize is the total size of the allocated memory. 1353 // 1354 // GC programs are only used for large allocations. 1355 // heapBitsSetType requires that allocSize is a multiple of 4 words, 1356 // so that the relevant bitmap bytes are not shared with surrounding 1357 // objects. 1358 func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) { 1359 if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 { 1360 // Alignment will be wrong. 1361 throw("heapBitsSetTypeGCProg: small allocation") 1362 } 1363 var totalBits uintptr 1364 if elemSize == dataSize { 1365 totalBits = runGCProg(prog, nil, h.bitp, 2) 1366 if totalBits*sys.PtrSize != progSize { 1367 println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize) 1368 throw("heapBitsSetTypeGCProg: unexpected bit count") 1369 } 1370 } else { 1371 count := dataSize / elemSize 1372 1373 // Piece together program trailer to run after prog that does: 1374 // literal(0) 1375 // repeat(1, elemSize-progSize-1) // zeros to fill element size 1376 // repeat(elemSize, count-1) // repeat that element for count 1377 // This zero-pads the data remaining in the first element and then 1378 // repeats that first element to fill the array. 1379 var trailer [40]byte // 3 varints (max 10 each) + some bytes 1380 i := 0 1381 if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 { 1382 // literal(0) 1383 trailer[i] = 0x01 1384 i++ 1385 trailer[i] = 0 1386 i++ 1387 if n > 1 { 1388 // repeat(1, n-1) 1389 trailer[i] = 0x81 1390 i++ 1391 n-- 1392 for ; n >= 0x80; n >>= 7 { 1393 trailer[i] = byte(n | 0x80) 1394 i++ 1395 } 1396 trailer[i] = byte(n) 1397 i++ 1398 } 1399 } 1400 // repeat(elemSize/ptrSize, count-1) 1401 trailer[i] = 0x80 1402 i++ 1403 n := elemSize / sys.PtrSize 1404 for ; n >= 0x80; n >>= 7 { 1405 trailer[i] = byte(n | 0x80) 1406 i++ 1407 } 1408 trailer[i] = byte(n) 1409 i++ 1410 n = count - 1 1411 for ; n >= 0x80; n >>= 7 { 1412 trailer[i] = byte(n | 0x80) 1413 i++ 1414 } 1415 trailer[i] = byte(n) 1416 i++ 1417 trailer[i] = 0 1418 i++ 1419 1420 runGCProg(prog, &trailer[0], h.bitp, 2) 1421 1422 // Even though we filled in the full array just now, 1423 // record that we only filled in up to the ptrdata of the 1424 // last element. This will cause the code below to 1425 // memclr the dead section of the final array element, 1426 // so that scanobject can stop early in the final element. 1427 totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize 1428 } 1429 endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4)) 1430 endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale)) 1431 memclrNoHeapPointers(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc)) 1432 } 1433 1434 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog. 1435 // size the size of the region described by prog, in bytes. 1436 // The resulting bitvector will have no more than size/sys.PtrSize bits. 1437 func progToPointerMask(prog *byte, size uintptr) bitvector { 1438 n := (size/sys.PtrSize + 7) / 8 1439 x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] 1440 x[len(x)-1] = 0xa1 // overflow check sentinel 1441 n = runGCProg(prog, nil, &x[0], 1) 1442 if x[len(x)-1] != 0xa1 { 1443 throw("progToPointerMask: overflow") 1444 } 1445 return bitvector{int32(n), &x[0]} 1446 } 1447 1448 // Packed GC pointer bitmaps, aka GC programs. 1449 // 1450 // For large types containing arrays, the type information has a 1451 // natural repetition that can be encoded to save space in the 1452 // binary and in the memory representation of the type information. 1453 // 1454 // The encoding is a simple Lempel-Ziv style bytecode machine 1455 // with the following instructions: 1456 // 1457 // 00000000: stop 1458 // 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes 1459 // 10000000 n c: repeat the previous n bits c times; n, c are varints 1460 // 1nnnnnnn c: repeat the previous n bits c times; c is a varint 1461 1462 // runGCProg executes the GC program prog, and then trailer if non-nil, 1463 // writing to dst with entries of the given size. 1464 // If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst. 1465 // If size == 2, dst is the 2-bit heap bitmap, and writes move backward 1466 // starting at dst (because the heap bitmap does). In this case, the caller guarantees 1467 // that only whole bytes in dst need to be written. 1468 // 1469 // runGCProg returns the number of 1- or 2-bit entries written to memory. 1470 func runGCProg(prog, trailer, dst *byte, size int) uintptr { 1471 dstStart := dst 1472 1473 // Bits waiting to be written to memory. 1474 var bits uintptr 1475 var nbits uintptr 1476 1477 p := prog 1478 Run: 1479 for { 1480 // Flush accumulated full bytes. 1481 // The rest of the loop assumes that nbits <= 7. 1482 for ; nbits >= 8; nbits -= 8 { 1483 if size == 1 { 1484 *dst = uint8(bits) 1485 dst = add1(dst) 1486 bits >>= 8 1487 } else { 1488 v := bits&bitPointerAll | bitScanAll 1489 *dst = uint8(v) 1490 dst = subtract1(dst) 1491 bits >>= 4 1492 v = bits&bitPointerAll | bitScanAll 1493 *dst = uint8(v) 1494 dst = subtract1(dst) 1495 bits >>= 4 1496 } 1497 } 1498 1499 // Process one instruction. 1500 inst := uintptr(*p) 1501 p = add1(p) 1502 n := inst & 0x7F 1503 if inst&0x80 == 0 { 1504 // Literal bits; n == 0 means end of program. 1505 if n == 0 { 1506 // Program is over; continue in trailer if present. 1507 if trailer != nil { 1508 //println("trailer") 1509 p = trailer 1510 trailer = nil 1511 continue 1512 } 1513 //println("done") 1514 break Run 1515 } 1516 //println("lit", n, dst) 1517 nbyte := n / 8 1518 for i := uintptr(0); i < nbyte; i++ { 1519 bits |= uintptr(*p) << nbits 1520 p = add1(p) 1521 if size == 1 { 1522 *dst = uint8(bits) 1523 dst = add1(dst) 1524 bits >>= 8 1525 } else { 1526 v := bits&0xf | bitScanAll 1527 *dst = uint8(v) 1528 dst = subtract1(dst) 1529 bits >>= 4 1530 v = bits&0xf | bitScanAll 1531 *dst = uint8(v) 1532 dst = subtract1(dst) 1533 bits >>= 4 1534 } 1535 } 1536 if n %= 8; n > 0 { 1537 bits |= uintptr(*p) << nbits 1538 p = add1(p) 1539 nbits += n 1540 } 1541 continue Run 1542 } 1543 1544 // Repeat. If n == 0, it is encoded in a varint in the next bytes. 1545 if n == 0 { 1546 for off := uint(0); ; off += 7 { 1547 x := uintptr(*p) 1548 p = add1(p) 1549 n |= (x & 0x7F) << off 1550 if x&0x80 == 0 { 1551 break 1552 } 1553 } 1554 } 1555 1556 // Count is encoded in a varint in the next bytes. 1557 c := uintptr(0) 1558 for off := uint(0); ; off += 7 { 1559 x := uintptr(*p) 1560 p = add1(p) 1561 c |= (x & 0x7F) << off 1562 if x&0x80 == 0 { 1563 break 1564 } 1565 } 1566 c *= n // now total number of bits to copy 1567 1568 // If the number of bits being repeated is small, load them 1569 // into a register and use that register for the entire loop 1570 // instead of repeatedly reading from memory. 1571 // Handling fewer than 8 bits here makes the general loop simpler. 1572 // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add 1573 // the pattern to a bit buffer holding at most 7 bits (a partial byte) 1574 // it will not overflow. 1575 src := dst 1576 const maxBits = sys.PtrSize*8 - 7 1577 if n <= maxBits { 1578 // Start with bits in output buffer. 1579 pattern := bits 1580 npattern := nbits 1581 1582 // If we need more bits, fetch them from memory. 1583 if size == 1 { 1584 src = subtract1(src) 1585 for npattern < n { 1586 pattern <<= 8 1587 pattern |= uintptr(*src) 1588 src = subtract1(src) 1589 npattern += 8 1590 } 1591 } else { 1592 src = add1(src) 1593 for npattern < n { 1594 pattern <<= 4 1595 pattern |= uintptr(*src) & 0xf 1596 src = add1(src) 1597 npattern += 4 1598 } 1599 } 1600 1601 // We started with the whole bit output buffer, 1602 // and then we loaded bits from whole bytes. 1603 // Either way, we might now have too many instead of too few. 1604 // Discard the extra. 1605 if npattern > n { 1606 pattern >>= npattern - n 1607 npattern = n 1608 } 1609 1610 // Replicate pattern to at most maxBits. 1611 if npattern == 1 { 1612 // One bit being repeated. 1613 // If the bit is 1, make the pattern all 1s. 1614 // If the bit is 0, the pattern is already all 0s, 1615 // but we can claim that the number of bits 1616 // in the word is equal to the number we need (c), 1617 // because right shift of bits will zero fill. 1618 if pattern == 1 { 1619 pattern = 1<<maxBits - 1 1620 npattern = maxBits 1621 } else { 1622 npattern = c 1623 } 1624 } else { 1625 b := pattern 1626 nb := npattern 1627 if nb+nb <= maxBits { 1628 // Double pattern until the whole uintptr is filled. 1629 for nb <= sys.PtrSize*8 { 1630 b |= b << nb 1631 nb += nb 1632 } 1633 // Trim away incomplete copy of original pattern in high bits. 1634 // TODO(rsc): Replace with table lookup or loop on systems without divide? 1635 nb = maxBits / npattern * npattern 1636 b &= 1<<nb - 1 1637 pattern = b 1638 npattern = nb 1639 } 1640 } 1641 1642 // Add pattern to bit buffer and flush bit buffer, c/npattern times. 1643 // Since pattern contains >8 bits, there will be full bytes to flush 1644 // on each iteration. 1645 for ; c >= npattern; c -= npattern { 1646 bits |= pattern << nbits 1647 nbits += npattern 1648 if size == 1 { 1649 for nbits >= 8 { 1650 *dst = uint8(bits) 1651 dst = add1(dst) 1652 bits >>= 8 1653 nbits -= 8 1654 } 1655 } else { 1656 for nbits >= 4 { 1657 *dst = uint8(bits&0xf | bitScanAll) 1658 dst = subtract1(dst) 1659 bits >>= 4 1660 nbits -= 4 1661 } 1662 } 1663 } 1664 1665 // Add final fragment to bit buffer. 1666 if c > 0 { 1667 pattern &= 1<<c - 1 1668 bits |= pattern << nbits 1669 nbits += c 1670 } 1671 continue Run 1672 } 1673 1674 // Repeat; n too large to fit in a register. 1675 // Since nbits <= 7, we know the first few bytes of repeated data 1676 // are already written to memory. 1677 off := n - nbits // n > nbits because n > maxBits and nbits <= 7 1678 if size == 1 { 1679 // Leading src fragment. 1680 src = subtractb(src, (off+7)/8) 1681 if frag := off & 7; frag != 0 { 1682 bits |= uintptr(*src) >> (8 - frag) << nbits 1683 src = add1(src) 1684 nbits += frag 1685 c -= frag 1686 } 1687 // Main loop: load one byte, write another. 1688 // The bits are rotating through the bit buffer. 1689 for i := c / 8; i > 0; i-- { 1690 bits |= uintptr(*src) << nbits 1691 src = add1(src) 1692 *dst = uint8(bits) 1693 dst = add1(dst) 1694 bits >>= 8 1695 } 1696 // Final src fragment. 1697 if c %= 8; c > 0 { 1698 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1699 nbits += c 1700 } 1701 } else { 1702 // Leading src fragment. 1703 src = addb(src, (off+3)/4) 1704 if frag := off & 3; frag != 0 { 1705 bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits 1706 src = subtract1(src) 1707 nbits += frag 1708 c -= frag 1709 } 1710 // Main loop: load one byte, write another. 1711 // The bits are rotating through the bit buffer. 1712 for i := c / 4; i > 0; i-- { 1713 bits |= (uintptr(*src) & 0xf) << nbits 1714 src = subtract1(src) 1715 *dst = uint8(bits&0xf | bitScanAll) 1716 dst = subtract1(dst) 1717 bits >>= 4 1718 } 1719 // Final src fragment. 1720 if c %= 4; c > 0 { 1721 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1722 nbits += c 1723 } 1724 } 1725 } 1726 1727 // Write any final bits out, using full-byte writes, even for the final byte. 1728 var totalBits uintptr 1729 if size == 1 { 1730 totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits 1731 nbits += -nbits & 7 1732 for ; nbits > 0; nbits -= 8 { 1733 *dst = uint8(bits) 1734 dst = add1(dst) 1735 bits >>= 8 1736 } 1737 } else { 1738 totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits 1739 nbits += -nbits & 3 1740 for ; nbits > 0; nbits -= 4 { 1741 v := bits&0xf | bitScanAll 1742 *dst = uint8(v) 1743 dst = subtract1(dst) 1744 bits >>= 4 1745 } 1746 } 1747 return totalBits 1748 } 1749 1750 func dumpGCProg(p *byte) { 1751 nptr := 0 1752 for { 1753 x := *p 1754 p = add1(p) 1755 if x == 0 { 1756 print("\t", nptr, " end\n") 1757 break 1758 } 1759 if x&0x80 == 0 { 1760 print("\t", nptr, " lit ", x, ":") 1761 n := int(x+7) / 8 1762 for i := 0; i < n; i++ { 1763 print(" ", hex(*p)) 1764 p = add1(p) 1765 } 1766 print("\n") 1767 nptr += int(x) 1768 } else { 1769 nbit := int(x &^ 0x80) 1770 if nbit == 0 { 1771 for nb := uint(0); ; nb += 7 { 1772 x := *p 1773 p = add1(p) 1774 nbit |= int(x&0x7f) << nb 1775 if x&0x80 == 0 { 1776 break 1777 } 1778 } 1779 } 1780 count := 0 1781 for nb := uint(0); ; nb += 7 { 1782 x := *p 1783 p = add1(p) 1784 count |= int(x&0x7f) << nb 1785 if x&0x80 == 0 { 1786 break 1787 } 1788 } 1789 print("\t", nptr, " repeat ", nbit, " × ", count, "\n") 1790 nptr += nbit * count 1791 } 1792 } 1793 } 1794 1795 // Testing. 1796 1797 func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool { 1798 target := (*stkframe)(ctxt) 1799 if frame.sp <= target.sp && target.sp < frame.varp { 1800 *target = *frame 1801 return false 1802 } 1803 return true 1804 } 1805 1806 // gcbits returns the GC type info for x, for testing. 1807 // The result is the bitmap entries (0 or 1), one entry per byte. 1808 //go:linkname reflect_gcbits reflect.gcbits 1809 func reflect_gcbits(x interface{}) []byte { 1810 ret := getgcmask(x) 1811 typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem 1812 nptr := typ.ptrdata / sys.PtrSize 1813 for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 { 1814 ret = ret[:len(ret)-1] 1815 } 1816 return ret 1817 } 1818 1819 // Returns GC type info for object p for testing. 1820 func getgcmask(ep interface{}) (mask []byte) { 1821 e := *efaceOf(&ep) 1822 p := e.data 1823 t := e._type 1824 // data or bss 1825 for _, datap := range activeModules() { 1826 // data 1827 if datap.data <= uintptr(p) && uintptr(p) < datap.edata { 1828 bitmap := datap.gcdatamask.bytedata 1829 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 1830 mask = make([]byte, n/sys.PtrSize) 1831 for i := uintptr(0); i < n; i += sys.PtrSize { 1832 off := (uintptr(p) + i - datap.data) / sys.PtrSize 1833 mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1834 } 1835 return 1836 } 1837 1838 // bss 1839 if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss { 1840 bitmap := datap.gcbssmask.bytedata 1841 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 1842 mask = make([]byte, n/sys.PtrSize) 1843 for i := uintptr(0); i < n; i += sys.PtrSize { 1844 off := (uintptr(p) + i - datap.bss) / sys.PtrSize 1845 mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1846 } 1847 return 1848 } 1849 } 1850 1851 // heap 1852 var n uintptr 1853 var base uintptr 1854 if mlookup(uintptr(p), &base, &n, nil) != 0 { 1855 mask = make([]byte, n/sys.PtrSize) 1856 for i := uintptr(0); i < n; i += sys.PtrSize { 1857 hbits := heapBitsForAddr(base + i) 1858 if hbits.isPointer() { 1859 mask[i/sys.PtrSize] = 1 1860 } 1861 if i != 1*sys.PtrSize && !hbits.morePointers() { 1862 mask = mask[:i/sys.PtrSize] 1863 break 1864 } 1865 } 1866 return 1867 } 1868 1869 // stack 1870 if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi { 1871 var frame stkframe 1872 frame.sp = uintptr(p) 1873 _g_ := getg() 1874 gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0) 1875 if frame.fn.valid() { 1876 f := frame.fn 1877 targetpc := frame.continpc 1878 if targetpc == 0 { 1879 return 1880 } 1881 if targetpc != f.entry { 1882 targetpc-- 1883 } 1884 pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, nil) 1885 if pcdata == -1 { 1886 return 1887 } 1888 stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) 1889 if stkmap == nil || stkmap.n <= 0 { 1890 return 1891 } 1892 bv := stackmapdata(stkmap, pcdata) 1893 size := uintptr(bv.n) * sys.PtrSize 1894 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 1895 mask = make([]byte, n/sys.PtrSize) 1896 for i := uintptr(0); i < n; i += sys.PtrSize { 1897 bitmap := bv.bytedata 1898 off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize 1899 mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1900 } 1901 } 1902 return 1903 } 1904 1905 // otherwise, not something the GC knows about. 1906 // possibly read-only data, like malloc(0). 1907 // must not have pointers 1908 return 1909 }