github.com/guyezi/gofrontend@v0.0.0-20200228202240-7a62a49e62c0/libgo/go/runtime/mbitmap.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector: type and heap bitmaps. 6 // 7 // Stack, data, and bss bitmaps 8 // 9 // Stack frames and global variables in the data and bss sections are described 10 // by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer 11 // to be visited during GC. The bits in each byte are consumed starting with 12 // the low bit: 1<<0, 1<<1, and so on. 13 // 14 // Heap bitmap 15 // 16 // The heap bitmap comprises 2 bits for each pointer-sized word in the heap, 17 // stored in the heapArena metadata backing each heap arena. 18 // That is, if ha is the heapArena for the arena starting a start, 19 // then ha.bitmap[0] holds the 2-bit entries for the four words start 20 // through start+3*ptrSize, ha.bitmap[1] holds the entries for 21 // start+4*ptrSize through start+7*ptrSize, and so on. 22 // 23 // In each 2-bit entry, the lower bit holds the same information as in the 1-bit 24 // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. 25 // The meaning of the high bit depends on the position of the word being described 26 // in its allocated object. In all words *except* the second word, the 27 // high bit indicates that the object is still being described. In 28 // these words, if a bit pair with a high bit 0 is encountered, the 29 // low bit can also be assumed to be 0, and the object description is 30 // over. This 00 is called the ``dead'' encoding: it signals that the 31 // rest of the words in the object are uninteresting to the garbage 32 // collector. 33 // 34 // In the second word, the high bit is the GC ``checkmarked'' bit (see below). 35 // 36 // The 2-bit entries are split when written into the byte, so that the top half 37 // of the byte contains 4 high bits and the bottom half contains 4 low (pointer) 38 // bits. 39 // This form allows a copy from the 1-bit to the 4-bit form to keep the 40 // pointer bits contiguous, instead of having to space them out. 41 // 42 // The code makes use of the fact that the zero value for a heap bitmap 43 // has no live pointer bit set and is (depending on position), not used, 44 // not checkmarked, and is the dead encoding. 45 // These properties must be preserved when modifying the encoding. 46 // 47 // The bitmap for noscan spans is not maintained. Code must ensure 48 // that an object is scannable before consulting its bitmap by 49 // checking either the noscan bit in the span or by consulting its 50 // type's information. 51 // 52 // Checkmarks 53 // 54 // In a concurrent garbage collector, one worries about failing to mark 55 // a live object due to mutations without write barriers or bugs in the 56 // collector implementation. As a sanity check, the GC has a 'checkmark' 57 // mode that retraverses the object graph with the world stopped, to make 58 // sure that everything that should be marked is marked. 59 // In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry 60 // for the second word of the object holds the checkmark bit. 61 // When not in checkmark mode, this bit is set to 1. 62 // 63 // The smallest possible allocation is 8 bytes. On a 32-bit machine, that 64 // means every allocated object has two words, so there is room for the 65 // checkmark bit. On a 64-bit machine, however, the 8-byte allocation is 66 // just one word, so the second bit pair is not available for encoding the 67 // checkmark. However, because non-pointer allocations are combined 68 // into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation 69 // must be a pointer, so the type bit in the first word is not actually needed. 70 // It is still used in general, except in checkmark the type bit is repurposed 71 // as the checkmark bit and then reinitialized (to 1) as the type bit when 72 // finished. 73 // 74 75 package runtime 76 77 import ( 78 "runtime/internal/atomic" 79 "runtime/internal/sys" 80 "unsafe" 81 ) 82 83 const ( 84 bitPointer = 1 << 0 85 bitScan = 1 << 4 86 87 heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries 88 wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte 89 90 // all scan/pointer bits in a byte 91 bitScanAll = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift) 92 bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift) 93 ) 94 95 // addb returns the byte pointer p+n. 96 //go:nowritebarrier 97 //go:nosplit 98 func addb(p *byte, n uintptr) *byte { 99 // Note: wrote out full expression instead of calling add(p, n) 100 // to reduce the number of temporaries generated by the 101 // compiler for this trivial expression during inlining. 102 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) 103 } 104 105 // subtractb returns the byte pointer p-n. 106 //go:nowritebarrier 107 //go:nosplit 108 func subtractb(p *byte, n uintptr) *byte { 109 // Note: wrote out full expression instead of calling add(p, -n) 110 // to reduce the number of temporaries generated by the 111 // compiler for this trivial expression during inlining. 112 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n)) 113 } 114 115 // add1 returns the byte pointer p+1. 116 //go:nowritebarrier 117 //go:nosplit 118 func add1(p *byte) *byte { 119 // Note: wrote out full expression instead of calling addb(p, 1) 120 // to reduce the number of temporaries generated by the 121 // compiler for this trivial expression during inlining. 122 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) 123 } 124 125 // subtract1 returns the byte pointer p-1. 126 //go:nowritebarrier 127 // 128 // nosplit because it is used during write barriers and must not be preempted. 129 //go:nosplit 130 func subtract1(p *byte) *byte { 131 // Note: wrote out full expression instead of calling subtractb(p, 1) 132 // to reduce the number of temporaries generated by the 133 // compiler for this trivial expression during inlining. 134 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) 135 } 136 137 // heapBits provides access to the bitmap bits for a single heap word. 138 // The methods on heapBits take value receivers so that the compiler 139 // can more easily inline calls to those methods and registerize the 140 // struct fields independently. 141 type heapBits struct { 142 bitp *uint8 143 shift uint32 144 arena uint32 // Index of heap arena containing bitp 145 last *uint8 // Last byte arena's bitmap 146 } 147 148 // Make the compiler check that heapBits.arena is large enough to hold 149 // the maximum arena frame number. 150 var _ = heapBits{arena: (1<<heapAddrBits)/heapArenaBytes - 1} 151 152 // markBits provides access to the mark bit for an object in the heap. 153 // bytep points to the byte holding the mark bit. 154 // mask is a byte with a single bit set that can be &ed with *bytep 155 // to see if the bit has been set. 156 // *m.byte&m.mask != 0 indicates the mark bit is set. 157 // index can be used along with span information to generate 158 // the address of the object in the heap. 159 // We maintain one set of mark bits for allocation and one for 160 // marking purposes. 161 type markBits struct { 162 bytep *uint8 163 mask uint8 164 index uintptr 165 } 166 167 //go:nosplit 168 func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits { 169 bytep, mask := s.allocBits.bitp(allocBitIndex) 170 return markBits{bytep, mask, allocBitIndex} 171 } 172 173 // refillAllocCache takes 8 bytes s.allocBits starting at whichByte 174 // and negates them so that ctz (count trailing zeros) instructions 175 // can be used. It then places these 8 bytes into the cached 64 bit 176 // s.allocCache. 177 func (s *mspan) refillAllocCache(whichByte uintptr) { 178 bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(whichByte))) 179 aCache := uint64(0) 180 aCache |= uint64(bytes[0]) 181 aCache |= uint64(bytes[1]) << (1 * 8) 182 aCache |= uint64(bytes[2]) << (2 * 8) 183 aCache |= uint64(bytes[3]) << (3 * 8) 184 aCache |= uint64(bytes[4]) << (4 * 8) 185 aCache |= uint64(bytes[5]) << (5 * 8) 186 aCache |= uint64(bytes[6]) << (6 * 8) 187 aCache |= uint64(bytes[7]) << (7 * 8) 188 s.allocCache = ^aCache 189 } 190 191 // nextFreeIndex returns the index of the next free object in s at 192 // or after s.freeindex. 193 // There are hardware instructions that can be used to make this 194 // faster if profiling warrants it. 195 func (s *mspan) nextFreeIndex() uintptr { 196 sfreeindex := s.freeindex 197 snelems := s.nelems 198 if sfreeindex == snelems { 199 return sfreeindex 200 } 201 if sfreeindex > snelems { 202 throw("s.freeindex > s.nelems") 203 } 204 205 aCache := s.allocCache 206 207 bitIndex := sys.Ctz64(aCache) 208 for bitIndex == 64 { 209 // Move index to start of next cached bits. 210 sfreeindex = (sfreeindex + 64) &^ (64 - 1) 211 if sfreeindex >= snelems { 212 s.freeindex = snelems 213 return snelems 214 } 215 whichByte := sfreeindex / 8 216 // Refill s.allocCache with the next 64 alloc bits. 217 s.refillAllocCache(whichByte) 218 aCache = s.allocCache 219 bitIndex = sys.Ctz64(aCache) 220 // nothing available in cached bits 221 // grab the next 8 bytes and try again. 222 } 223 result := sfreeindex + uintptr(bitIndex) 224 if result >= snelems { 225 s.freeindex = snelems 226 return snelems 227 } 228 229 s.allocCache >>= uint(bitIndex + 1) 230 sfreeindex = result + 1 231 232 if sfreeindex%64 == 0 && sfreeindex != snelems { 233 // We just incremented s.freeindex so it isn't 0. 234 // As each 1 in s.allocCache was encountered and used for allocation 235 // it was shifted away. At this point s.allocCache contains all 0s. 236 // Refill s.allocCache so that it corresponds 237 // to the bits at s.allocBits starting at s.freeindex. 238 whichByte := sfreeindex / 8 239 s.refillAllocCache(whichByte) 240 } 241 s.freeindex = sfreeindex 242 return result 243 } 244 245 // isFree reports whether the index'th object in s is unallocated. 246 // 247 // The caller must ensure s.state is mSpanInUse, and there must have 248 // been no preemption points since ensuring this (which could allow a 249 // GC transition, which would allow the state to change). 250 func (s *mspan) isFree(index uintptr) bool { 251 if index < s.freeindex { 252 return false 253 } 254 bytep, mask := s.allocBits.bitp(index) 255 return *bytep&mask == 0 256 } 257 258 func (s *mspan) objIndex(p uintptr) uintptr { 259 byteOffset := p - s.base() 260 if byteOffset == 0 { 261 return 0 262 } 263 if s.baseMask != 0 { 264 // s.baseMask is non-0, elemsize is a power of two, so shift by s.divShift 265 return byteOffset >> s.divShift 266 } 267 return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2) 268 } 269 270 func markBitsForAddr(p uintptr) markBits { 271 s := spanOf(p) 272 objIndex := s.objIndex(p) 273 return s.markBitsForIndex(objIndex) 274 } 275 276 func (s *mspan) markBitsForIndex(objIndex uintptr) markBits { 277 bytep, mask := s.gcmarkBits.bitp(objIndex) 278 return markBits{bytep, mask, objIndex} 279 } 280 281 func (s *mspan) markBitsForBase() markBits { 282 return markBits{(*uint8)(s.gcmarkBits), uint8(1), 0} 283 } 284 285 // isMarked reports whether mark bit m is set. 286 func (m markBits) isMarked() bool { 287 return *m.bytep&m.mask != 0 288 } 289 290 // setMarked sets the marked bit in the markbits, atomically. 291 func (m markBits) setMarked() { 292 // Might be racing with other updates, so use atomic update always. 293 // We used to be clever here and use a non-atomic update in certain 294 // cases, but it's not worth the risk. 295 atomic.Or8(m.bytep, m.mask) 296 } 297 298 // setMarkedNonAtomic sets the marked bit in the markbits, non-atomically. 299 func (m markBits) setMarkedNonAtomic() { 300 *m.bytep |= m.mask 301 } 302 303 // clearMarked clears the marked bit in the markbits, atomically. 304 func (m markBits) clearMarked() { 305 // Might be racing with other updates, so use atomic update always. 306 // We used to be clever here and use a non-atomic update in certain 307 // cases, but it's not worth the risk. 308 atomic.And8(m.bytep, ^m.mask) 309 } 310 311 // markBitsForSpan returns the markBits for the span base address base. 312 func markBitsForSpan(base uintptr) (mbits markBits) { 313 mbits = markBitsForAddr(base) 314 if mbits.mask != 1 { 315 throw("markBitsForSpan: unaligned start") 316 } 317 return mbits 318 } 319 320 // advance advances the markBits to the next object in the span. 321 func (m *markBits) advance() { 322 if m.mask == 1<<7 { 323 m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1)) 324 m.mask = 1 325 } else { 326 m.mask = m.mask << 1 327 } 328 m.index++ 329 } 330 331 // heapBitsForAddr returns the heapBits for the address addr. 332 // The caller must ensure addr is in an allocated span. 333 // In particular, be careful not to point past the end of an object. 334 // 335 // nosplit because it is used during write barriers and must not be preempted. 336 //go:nosplit 337 func heapBitsForAddr(addr uintptr) (h heapBits) { 338 // 2 bits per word, 4 pairs per byte, and a mask is hard coded. 339 arena := arenaIndex(addr) 340 ha := mheap_.arenas[arena.l1()][arena.l2()] 341 // The compiler uses a load for nil checking ha, but in this 342 // case we'll almost never hit that cache line again, so it 343 // makes more sense to do a value check. 344 if ha == nil { 345 // addr is not in the heap. Return nil heapBits, which 346 // we expect to crash in the caller. 347 return 348 } 349 h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes] 350 h.shift = uint32((addr / sys.PtrSize) & 3) 351 h.arena = uint32(arena) 352 h.last = &ha.bitmap[len(ha.bitmap)-1] 353 return 354 } 355 356 // badPointer throws bad pointer in heap panic. 357 func badPointer(s *mspan, p, refBase, refOff uintptr) { 358 // Typically this indicates an incorrect use 359 // of unsafe or cgo to store a bad pointer in 360 // the Go heap. It may also indicate a runtime 361 // bug. 362 // 363 // TODO(austin): We could be more aggressive 364 // and detect pointers to unallocated objects 365 // in allocated spans. 366 printlock() 367 print("runtime: pointer ", hex(p)) 368 state := s.state.get() 369 if state != mSpanInUse { 370 print(" to unallocated span") 371 } else { 372 print(" to unused region of span") 373 } 374 print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", state, "\n") 375 if refBase != 0 { 376 print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n") 377 gcDumpObject("object", refBase, refOff) 378 } 379 getg().m.traceback = 2 380 throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)") 381 } 382 383 // findObject returns the base address for the heap object containing 384 // the address p, the object's span, and the index of the object in s. 385 // If p does not point into a heap object, it returns base == 0. 386 // 387 // If p points is an invalid heap pointer and debug.invalidptr != 0, 388 // findObject panics. 389 // 390 // For gccgo, the forStack parameter is true if the value came from the stack. 391 // The stack is collected conservatively and may contain invalid pointers. 392 // 393 // refBase and refOff optionally give the base address of the object 394 // in which the pointer p was found and the byte offset at which it 395 // was found. These are used for error reporting. 396 // 397 // It is nosplit so it is safe for p to be a pointer to the current goroutine's stack. 398 // Since p is a uintptr, it would not be adjusted if the stack were to move. 399 //go:nosplit 400 func findObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, s *mspan, objIndex uintptr) { 401 s = spanOf(p) 402 // If s is nil, the virtual address has never been part of the heap. 403 // This pointer may be to some mmap'd region, so we allow it. 404 if s == nil { 405 return 406 } 407 // If p is a bad pointer, it may not be in s's bounds. 408 // 409 // Check s.state to synchronize with span initialization 410 // before checking other fields. See also spanOfHeap. 411 if state := s.state.get(); state != mSpanInUse || p < s.base() || p >= s.limit { 412 // Pointers into stacks are also ok, the runtime manages these explicitly. 413 if state == mSpanManual || forStack { 414 return 415 } 416 // The following ensures that we are rigorous about what data 417 // structures hold valid pointers. 418 if debug.invalidptr != 0 { 419 badPointer(s, p, refBase, refOff) 420 } 421 return 422 } 423 424 if forStack { 425 // A span can be entered in mheap_.spans, and be set 426 // to mSpanInUse, before it is fully initialized. 427 // All we need in practice is allocBits and gcmarkBits, 428 // so make sure they are set. 429 if s.allocBits == nil || s.gcmarkBits == nil { 430 return 431 } 432 } 433 434 // If this span holds object of a power of 2 size, just mask off the bits to 435 // the interior of the object. Otherwise use the size to get the base. 436 if s.baseMask != 0 { 437 // optimize for power of 2 sized objects. 438 base = s.base() 439 base = base + (p-base)&uintptr(s.baseMask) 440 objIndex = (base - s.base()) >> s.divShift 441 // base = p & s.baseMask is faster for small spans, 442 // but doesn't work for large spans. 443 // Overall, it's faster to use the more general computation above. 444 } else { 445 base = s.base() 446 if p-base >= s.elemsize { 447 // n := (p - base) / s.elemsize, using division by multiplication 448 objIndex = uintptr(p-base) >> s.divShift * uintptr(s.divMul) >> s.divShift2 449 base += objIndex * s.elemsize 450 } 451 } 452 return 453 } 454 455 // next returns the heapBits describing the next pointer-sized word in memory. 456 // That is, if h describes address p, h.next() describes p+ptrSize. 457 // Note that next does not modify h. The caller must record the result. 458 // 459 // nosplit because it is used during write barriers and must not be preempted. 460 //go:nosplit 461 func (h heapBits) next() heapBits { 462 if h.shift < 3*heapBitsShift { 463 h.shift += heapBitsShift 464 } else if h.bitp != h.last { 465 h.bitp, h.shift = add1(h.bitp), 0 466 } else { 467 // Move to the next arena. 468 return h.nextArena() 469 } 470 return h 471 } 472 473 // nextArena advances h to the beginning of the next heap arena. 474 // 475 // This is a slow-path helper to next. gc's inliner knows that 476 // heapBits.next can be inlined even though it calls this. This is 477 // marked noinline so it doesn't get inlined into next and cause next 478 // to be too big to inline. 479 // 480 //go:nosplit 481 //go:noinline 482 func (h heapBits) nextArena() heapBits { 483 h.arena++ 484 ai := arenaIdx(h.arena) 485 l2 := mheap_.arenas[ai.l1()] 486 if l2 == nil { 487 // We just passed the end of the object, which 488 // was also the end of the heap. Poison h. It 489 // should never be dereferenced at this point. 490 return heapBits{} 491 } 492 ha := l2[ai.l2()] 493 if ha == nil { 494 return heapBits{} 495 } 496 h.bitp, h.shift = &ha.bitmap[0], 0 497 h.last = &ha.bitmap[len(ha.bitmap)-1] 498 return h 499 } 500 501 // forward returns the heapBits describing n pointer-sized words ahead of h in memory. 502 // That is, if h describes address p, h.forward(n) describes p+n*ptrSize. 503 // h.forward(1) is equivalent to h.next(), just slower. 504 // Note that forward does not modify h. The caller must record the result. 505 // bits returns the heap bits for the current word. 506 //go:nosplit 507 func (h heapBits) forward(n uintptr) heapBits { 508 n += uintptr(h.shift) / heapBitsShift 509 nbitp := uintptr(unsafe.Pointer(h.bitp)) + n/4 510 h.shift = uint32(n%4) * heapBitsShift 511 if nbitp <= uintptr(unsafe.Pointer(h.last)) { 512 h.bitp = (*uint8)(unsafe.Pointer(nbitp)) 513 return h 514 } 515 516 // We're in a new heap arena. 517 past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1) 518 h.arena += 1 + uint32(past/heapArenaBitmapBytes) 519 ai := arenaIdx(h.arena) 520 if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil { 521 a := l2[ai.l2()] 522 h.bitp = &a.bitmap[past%heapArenaBitmapBytes] 523 h.last = &a.bitmap[len(a.bitmap)-1] 524 } else { 525 h.bitp, h.last = nil, nil 526 } 527 return h 528 } 529 530 // forwardOrBoundary is like forward, but stops at boundaries between 531 // contiguous sections of the bitmap. It returns the number of words 532 // advanced over, which will be <= n. 533 func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) { 534 maxn := 4 * ((uintptr(unsafe.Pointer(h.last)) + 1) - uintptr(unsafe.Pointer(h.bitp))) 535 if n > maxn { 536 n = maxn 537 } 538 return h.forward(n), n 539 } 540 541 // The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer. 542 // The result includes in its higher bits the bits for subsequent words 543 // described by the same bitmap byte. 544 // 545 // nosplit because it is used during write barriers and must not be preempted. 546 //go:nosplit 547 func (h heapBits) bits() uint32 { 548 // The (shift & 31) eliminates a test and conditional branch 549 // from the generated code. 550 return uint32(*h.bitp) >> (h.shift & 31) 551 } 552 553 // morePointers reports whether this word and all remaining words in this object 554 // are scalars. 555 // h must not describe the second word of the object. 556 func (h heapBits) morePointers() bool { 557 return h.bits()&bitScan != 0 558 } 559 560 // isPointer reports whether the heap bits describe a pointer word. 561 // 562 // nosplit because it is used during write barriers and must not be preempted. 563 //go:nosplit 564 func (h heapBits) isPointer() bool { 565 return h.bits()&bitPointer != 0 566 } 567 568 // isCheckmarked reports whether the heap bits have the checkmarked bit set. 569 // It must be told how large the object at h is, because the encoding of the 570 // checkmark bit varies by size. 571 // h must describe the initial word of the object. 572 func (h heapBits) isCheckmarked(size uintptr) bool { 573 if size == sys.PtrSize { 574 return (*h.bitp>>h.shift)&bitPointer != 0 575 } 576 // All multiword objects are 2-word aligned, 577 // so we know that the initial word's 2-bit pair 578 // and the second word's 2-bit pair are in the 579 // same heap bitmap byte, *h.bitp. 580 return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0 581 } 582 583 // setCheckmarked sets the checkmarked bit. 584 // It must be told how large the object at h is, because the encoding of the 585 // checkmark bit varies by size. 586 // h must describe the initial word of the object. 587 func (h heapBits) setCheckmarked(size uintptr) { 588 if size == sys.PtrSize { 589 atomic.Or8(h.bitp, bitPointer<<h.shift) 590 return 591 } 592 atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift)) 593 } 594 595 // bulkBarrierPreWrite executes a write barrier 596 // for every pointer slot in the memory range [src, src+size), 597 // using pointer/scalar information from [dst, dst+size). 598 // This executes the write barriers necessary before a memmove. 599 // src, dst, and size must be pointer-aligned. 600 // The range [dst, dst+size) must lie within a single object. 601 // It does not perform the actual writes. 602 // 603 // As a special case, src == 0 indicates that this is being used for a 604 // memclr. bulkBarrierPreWrite will pass 0 for the src of each write 605 // barrier. 606 // 607 // Callers should call bulkBarrierPreWrite immediately before 608 // calling memmove(dst, src, size). This function is marked nosplit 609 // to avoid being preempted; the GC must not stop the goroutine 610 // between the memmove and the execution of the barriers. 611 // The caller is also responsible for cgo pointer checks if this 612 // may be writing Go pointers into non-Go memory. 613 // 614 // The pointer bitmap is not maintained for allocations containing 615 // no pointers at all; any caller of bulkBarrierPreWrite must first 616 // make sure the underlying allocation contains pointers, usually 617 // by checking typ.ptrdata. 618 // 619 // Callers must perform cgo checks if writeBarrier.cgo. 620 // 621 //go:nosplit 622 func bulkBarrierPreWrite(dst, src, size uintptr) { 623 if (dst|src|size)&(sys.PtrSize-1) != 0 { 624 throw("bulkBarrierPreWrite: unaligned arguments") 625 } 626 if !writeBarrier.needed { 627 return 628 } 629 if s := spanOf(dst); s == nil { 630 // If dst is a global, use the data or BSS bitmaps to 631 // execute write barriers. 632 lo := 0 633 hi := len(gcRootsIndex) 634 for lo < hi { 635 m := lo + (hi-lo)/2 636 pr := gcRootsIndex[m] 637 addr := uintptr(pr.decl) 638 if addr <= dst && dst < addr+pr.size { 639 if dst < addr+pr.ptrdata { 640 bulkBarrierBitmap(dst, src, size, dst-addr, pr.gcdata) 641 } 642 return 643 } 644 if dst < addr { 645 hi = m 646 } else { 647 lo = m + 1 648 } 649 } 650 return 651 } else if s.state.get() != mSpanInUse || dst < s.base() || s.limit <= dst { 652 // dst was heap memory at some point, but isn't now. 653 // It can't be a global. It must be either our stack, 654 // or in the case of direct channel sends, it could be 655 // another stack. Either way, no need for barriers. 656 // This will also catch if dst is in a freed span, 657 // though that should never have. 658 return 659 } 660 661 buf := &getg().m.p.ptr().wbBuf 662 h := heapBitsForAddr(dst) 663 if src == 0 { 664 for i := uintptr(0); i < size; i += sys.PtrSize { 665 if h.isPointer() { 666 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 667 if !buf.putFast(*dstx, 0) { 668 wbBufFlush(nil, 0) 669 } 670 } 671 h = h.next() 672 } 673 } else { 674 for i := uintptr(0); i < size; i += sys.PtrSize { 675 if h.isPointer() { 676 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 677 srcx := (*uintptr)(unsafe.Pointer(src + i)) 678 if !buf.putFast(*dstx, *srcx) { 679 wbBufFlush(nil, 0) 680 } 681 } 682 h = h.next() 683 } 684 } 685 } 686 687 // bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but 688 // does not execute write barriers for [dst, dst+size). 689 // 690 // In addition to the requirements of bulkBarrierPreWrite 691 // callers need to ensure [dst, dst+size) is zeroed. 692 // 693 // This is used for special cases where e.g. dst was just 694 // created and zeroed with malloc. 695 //go:nosplit 696 func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr) { 697 if (dst|src|size)&(sys.PtrSize-1) != 0 { 698 throw("bulkBarrierPreWrite: unaligned arguments") 699 } 700 if !writeBarrier.needed { 701 return 702 } 703 buf := &getg().m.p.ptr().wbBuf 704 h := heapBitsForAddr(dst) 705 for i := uintptr(0); i < size; i += sys.PtrSize { 706 if h.isPointer() { 707 srcx := (*uintptr)(unsafe.Pointer(src + i)) 708 if !buf.putFast(0, *srcx) { 709 wbBufFlush(nil, 0) 710 } 711 } 712 h = h.next() 713 } 714 } 715 716 // bulkBarrierBitmap executes write barriers for copying from [src, 717 // src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is 718 // assumed to start maskOffset bytes into the data covered by the 719 // bitmap in bits (which may not be a multiple of 8). 720 // 721 // This is used by bulkBarrierPreWrite for writes to data and BSS. 722 // 723 //go:nosplit 724 func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { 725 word := maskOffset / sys.PtrSize 726 bits = addb(bits, word/8) 727 mask := uint8(1) << (word % 8) 728 729 buf := &getg().m.p.ptr().wbBuf 730 for i := uintptr(0); i < size; i += sys.PtrSize { 731 if mask == 0 { 732 bits = addb(bits, 1) 733 if *bits == 0 { 734 // Skip 8 words. 735 i += 7 * sys.PtrSize 736 continue 737 } 738 mask = 1 739 } 740 if *bits&mask != 0 { 741 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 742 if src == 0 { 743 if !buf.putFast(*dstx, 0) { 744 wbBufFlush(nil, 0) 745 } 746 } else { 747 srcx := (*uintptr)(unsafe.Pointer(src + i)) 748 if !buf.putFast(*dstx, *srcx) { 749 wbBufFlush(nil, 0) 750 } 751 } 752 } 753 mask <<= 1 754 } 755 } 756 757 // typeBitsBulkBarrier executes a write barrier for every 758 // pointer that would be copied from [src, src+size) to [dst, 759 // dst+size) by a memmove using the type bitmap to locate those 760 // pointer slots. 761 // 762 // The type typ must correspond exactly to [src, src+size) and [dst, dst+size). 763 // dst, src, and size must be pointer-aligned. 764 // The type typ must have a plain bitmap, not a GC program. 765 // The only use of this function is in channel sends, and the 766 // 64 kB channel element limit takes care of this for us. 767 // 768 // Must not be preempted because it typically runs right before memmove, 769 // and the GC must observe them as an atomic action. 770 // 771 // Callers must perform cgo checks if writeBarrier.cgo. 772 // 773 //go:nosplit 774 func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) { 775 if typ == nil { 776 throw("runtime: typeBitsBulkBarrier without type") 777 } 778 if typ.size != size { 779 println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size) 780 throw("runtime: invalid typeBitsBulkBarrier") 781 } 782 if typ.kind&kindGCProg != 0 { 783 println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog") 784 throw("runtime: invalid typeBitsBulkBarrier") 785 } 786 if !writeBarrier.needed { 787 return 788 } 789 ptrmask := typ.gcdata 790 buf := &getg().m.p.ptr().wbBuf 791 var bits uint32 792 for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize { 793 if i&(sys.PtrSize*8-1) == 0 { 794 bits = uint32(*ptrmask) 795 ptrmask = addb(ptrmask, 1) 796 } else { 797 bits = bits >> 1 798 } 799 if bits&1 != 0 { 800 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 801 srcx := (*uintptr)(unsafe.Pointer(src + i)) 802 if !buf.putFast(*dstx, *srcx) { 803 wbBufFlush(nil, 0) 804 } 805 } 806 } 807 } 808 809 // The methods operating on spans all require that h has been returned 810 // by heapBitsForSpan and that size, n, total are the span layout description 811 // returned by the mspan's layout method. 812 // If total > size*n, it means that there is extra leftover memory in the span, 813 // usually due to rounding. 814 // 815 // TODO(rsc): Perhaps introduce a different heapBitsSpan type. 816 817 // initSpan initializes the heap bitmap for a span. 818 // It clears all checkmark bits. 819 // If this is a span of pointer-sized objects, it initializes all 820 // words to pointer/scan. 821 // Otherwise, it initializes all words to scalar/dead. 822 func (h heapBits) initSpan(s *mspan) { 823 // Clear bits corresponding to objects. 824 nw := (s.npages << _PageShift) / sys.PtrSize 825 if nw%wordsPerBitmapByte != 0 { 826 throw("initSpan: unaligned length") 827 } 828 if h.shift != 0 { 829 throw("initSpan: unaligned base") 830 } 831 isPtrs := sys.PtrSize == 8 && s.elemsize == sys.PtrSize 832 for nw > 0 { 833 hNext, anw := h.forwardOrBoundary(nw) 834 nbyte := anw / wordsPerBitmapByte 835 if isPtrs { 836 bitp := h.bitp 837 for i := uintptr(0); i < nbyte; i++ { 838 *bitp = bitPointerAll | bitScanAll 839 bitp = add1(bitp) 840 } 841 } else { 842 memclrNoHeapPointers(unsafe.Pointer(h.bitp), nbyte) 843 } 844 h = hNext 845 nw -= anw 846 } 847 } 848 849 // initCheckmarkSpan initializes a span for being checkmarked. 850 // It clears the checkmark bits, which are set to 1 in normal operation. 851 func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { 852 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. 853 if sys.PtrSize == 8 && size == sys.PtrSize { 854 // Checkmark bit is type bit, bottom bit of every 2-bit entry. 855 // Only possible on 64-bit system, since minimum size is 8. 856 // Must clear type bit (checkmark bit) of every word. 857 // The type bit is the lower of every two-bit pair. 858 for i := uintptr(0); i < n; i += wordsPerBitmapByte { 859 *h.bitp &^= bitPointerAll 860 h = h.forward(wordsPerBitmapByte) 861 } 862 return 863 } 864 for i := uintptr(0); i < n; i++ { 865 *h.bitp &^= bitScan << (heapBitsShift + h.shift) 866 h = h.forward(size / sys.PtrSize) 867 } 868 } 869 870 // clearCheckmarkSpan undoes all the checkmarking in a span. 871 // The actual checkmark bits are ignored, so the only work to do 872 // is to fix the pointer bits. (Pointer bits are ignored by scanobject 873 // but consulted by typedmemmove.) 874 func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { 875 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. 876 if sys.PtrSize == 8 && size == sys.PtrSize { 877 // Checkmark bit is type bit, bottom bit of every 2-bit entry. 878 // Only possible on 64-bit system, since minimum size is 8. 879 // Must clear type bit (checkmark bit) of every word. 880 // The type bit is the lower of every two-bit pair. 881 for i := uintptr(0); i < n; i += wordsPerBitmapByte { 882 *h.bitp |= bitPointerAll 883 h = h.forward(wordsPerBitmapByte) 884 } 885 } 886 } 887 888 // oneBitCount is indexed by byte and produces the 889 // number of 1 bits in that byte. For example 128 has 1 bit set 890 // and oneBitCount[128] will holds 1. 891 var oneBitCount = [256]uint8{ 892 0, 1, 1, 2, 1, 2, 2, 3, 893 1, 2, 2, 3, 2, 3, 3, 4, 894 1, 2, 2, 3, 2, 3, 3, 4, 895 2, 3, 3, 4, 3, 4, 4, 5, 896 1, 2, 2, 3, 2, 3, 3, 4, 897 2, 3, 3, 4, 3, 4, 4, 5, 898 2, 3, 3, 4, 3, 4, 4, 5, 899 3, 4, 4, 5, 4, 5, 5, 6, 900 1, 2, 2, 3, 2, 3, 3, 4, 901 2, 3, 3, 4, 3, 4, 4, 5, 902 2, 3, 3, 4, 3, 4, 4, 5, 903 3, 4, 4, 5, 4, 5, 5, 6, 904 2, 3, 3, 4, 3, 4, 4, 5, 905 3, 4, 4, 5, 4, 5, 5, 6, 906 3, 4, 4, 5, 4, 5, 5, 6, 907 4, 5, 5, 6, 5, 6, 6, 7, 908 1, 2, 2, 3, 2, 3, 3, 4, 909 2, 3, 3, 4, 3, 4, 4, 5, 910 2, 3, 3, 4, 3, 4, 4, 5, 911 3, 4, 4, 5, 4, 5, 5, 6, 912 2, 3, 3, 4, 3, 4, 4, 5, 913 3, 4, 4, 5, 4, 5, 5, 6, 914 3, 4, 4, 5, 4, 5, 5, 6, 915 4, 5, 5, 6, 5, 6, 6, 7, 916 2, 3, 3, 4, 3, 4, 4, 5, 917 3, 4, 4, 5, 4, 5, 5, 6, 918 3, 4, 4, 5, 4, 5, 5, 6, 919 4, 5, 5, 6, 5, 6, 6, 7, 920 3, 4, 4, 5, 4, 5, 5, 6, 921 4, 5, 5, 6, 5, 6, 6, 7, 922 4, 5, 5, 6, 5, 6, 6, 7, 923 5, 6, 6, 7, 6, 7, 7, 8} 924 925 // countAlloc returns the number of objects allocated in span s by 926 // scanning the allocation bitmap. 927 // TODO:(rlh) Use popcount intrinsic. 928 func (s *mspan) countAlloc() int { 929 count := 0 930 maxIndex := s.nelems / 8 931 for i := uintptr(0); i < maxIndex; i++ { 932 mrkBits := *s.gcmarkBits.bytep(i) 933 count += int(oneBitCount[mrkBits]) 934 } 935 if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 { 936 mrkBits := *s.gcmarkBits.bytep(maxIndex) 937 mask := uint8((1 << bitsInLastByte) - 1) 938 bits := mrkBits & mask 939 count += int(oneBitCount[bits]) 940 } 941 return count 942 } 943 944 // heapBitsSetType records that the new allocation [x, x+size) 945 // holds in [x, x+dataSize) one or more values of type typ. 946 // (The number of values is given by dataSize / typ.size.) 947 // If dataSize < size, the fragment [x+dataSize, x+size) is 948 // recorded as non-pointer data. 949 // It is known that the type has pointers somewhere; 950 // malloc does not call heapBitsSetType when there are no pointers, 951 // because all free objects are marked as noscan during 952 // heapBitsSweepSpan. 953 // 954 // There can only be one allocation from a given span active at a time, 955 // and the bitmap for a span always falls on byte boundaries, 956 // so there are no write-write races for access to the heap bitmap. 957 // Hence, heapBitsSetType can access the bitmap without atomics. 958 // 959 // There can be read-write races between heapBitsSetType and things 960 // that read the heap bitmap like scanobject. However, since 961 // heapBitsSetType is only used for objects that have not yet been 962 // made reachable, readers will ignore bits being modified by this 963 // function. This does mean this function cannot transiently modify 964 // bits that belong to neighboring objects. Also, on weakly-ordered 965 // machines, callers must execute a store/store (publication) barrier 966 // between calling this function and making the object reachable. 967 func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { 968 const doubleCheck = false // slow but helpful; enable to test modifications to this code 969 970 // dataSize is always size rounded up to the next malloc size class, 971 // except in the case of allocating a defer block, in which case 972 // size is sizeof(_defer{}) (at least 6 words) and dataSize may be 973 // arbitrarily larger. 974 // 975 // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore 976 // assume that dataSize == size without checking it explicitly. 977 978 if sys.PtrSize == 8 && size == sys.PtrSize { 979 // It's one word and it has pointers, it must be a pointer. 980 // Since all allocated one-word objects are pointers 981 // (non-pointers are aggregated into tinySize allocations), 982 // initSpan sets the pointer bits for us. Nothing to do here. 983 if doubleCheck { 984 h := heapBitsForAddr(x) 985 if !h.isPointer() { 986 throw("heapBitsSetType: pointer bit missing") 987 } 988 if !h.morePointers() { 989 throw("heapBitsSetType: scan bit missing") 990 } 991 } 992 return 993 } 994 995 h := heapBitsForAddr(x) 996 ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below) 997 998 // Heap bitmap bits for 2-word object are only 4 bits, 999 // so also shared with objects next to it. 1000 // This is called out as a special case primarily for 32-bit systems, 1001 // so that on 32-bit systems the code below can assume all objects 1002 // are 4-word aligned (because they're all 16-byte aligned). 1003 if size == 2*sys.PtrSize { 1004 if typ.size == sys.PtrSize { 1005 // We're allocating a block big enough to hold two pointers. 1006 // On 64-bit, that means the actual object must be two pointers, 1007 // or else we'd have used the one-pointer-sized block. 1008 // On 32-bit, however, this is the 8-byte block, the smallest one. 1009 // So it could be that we're allocating one pointer and this was 1010 // just the smallest block available. Distinguish by checking dataSize. 1011 // (In general the number of instances of typ being allocated is 1012 // dataSize/typ.size.) 1013 if sys.PtrSize == 4 && dataSize == sys.PtrSize { 1014 // 1 pointer object. On 32-bit machines clear the bit for the 1015 // unused second word. 1016 *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift 1017 *h.bitp |= (bitPointer | bitScan) << h.shift 1018 } else { 1019 // 2-element slice of pointer. 1020 *h.bitp |= (bitPointer | bitScan | bitPointer<<heapBitsShift) << h.shift 1021 } 1022 return 1023 } 1024 // Otherwise typ.size must be 2*sys.PtrSize, 1025 // and typ.kind&kindGCProg == 0. 1026 if doubleCheck { 1027 if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 { 1028 print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n") 1029 throw("heapBitsSetType") 1030 } 1031 } 1032 b := uint32(*ptrmask) 1033 hb := (b & 3) | bitScan 1034 // bitPointer == 1, bitScan is 1 << 4, heapBitsShift is 1. 1035 // 110011 is shifted h.shift and complemented. 1036 // This clears out the bits that are about to be 1037 // ored into *h.hbitp in the next instructions. 1038 *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift 1039 *h.bitp |= uint8(hb << h.shift) 1040 return 1041 } 1042 1043 // Copy from 1-bit ptrmask into 2-bit bitmap. 1044 // The basic approach is to use a single uintptr as a bit buffer, 1045 // alternating between reloading the buffer and writing bitmap bytes. 1046 // In general, one load can supply two bitmap byte writes. 1047 // This is a lot of lines of code, but it compiles into relatively few 1048 // machine instructions. 1049 1050 outOfPlace := false 1051 if arenaIndex(x+size-1) != arenaIdx(h.arena) || (doubleCheck && fastrand()%2 == 0) { 1052 // This object spans heap arenas, so the bitmap may be 1053 // discontiguous. Unroll it into the object instead 1054 // and then copy it out. 1055 // 1056 // In doubleCheck mode, we randomly do this anyway to 1057 // stress test the bitmap copying path. 1058 outOfPlace = true 1059 h.bitp = (*uint8)(unsafe.Pointer(x)) 1060 h.last = nil 1061 } 1062 1063 var ( 1064 // Ptrmask input. 1065 p *byte // last ptrmask byte read 1066 b uintptr // ptrmask bits already loaded 1067 nb uintptr // number of bits in b at next read 1068 endp *byte // final ptrmask byte to read (then repeat) 1069 endnb uintptr // number of valid bits in *endp 1070 pbits uintptr // alternate source of bits 1071 1072 // Heap bitmap output. 1073 w uintptr // words processed 1074 nw uintptr // number of words to process 1075 hbitp *byte // next heap bitmap byte to write 1076 hb uintptr // bits being prepared for *hbitp 1077 ) 1078 1079 hbitp = h.bitp 1080 1081 // Handle GC program. Delayed until this part of the code 1082 // so that we can use the same double-checking mechanism 1083 // as the 1-bit case. Nothing above could have encountered 1084 // GC programs: the cases were all too small. 1085 if typ.kind&kindGCProg != 0 { 1086 heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4)) 1087 if doubleCheck { 1088 // Double-check the heap bits written by GC program 1089 // by running the GC program to create a 1-bit pointer mask 1090 // and then jumping to the double-check code below. 1091 // This doesn't catch bugs shared between the 1-bit and 4-bit 1092 // GC program execution, but it does catch mistakes specific 1093 // to just one of those and bugs in heapBitsSetTypeGCProg's 1094 // implementation of arrays. 1095 lock(&debugPtrmask.lock) 1096 if debugPtrmask.data == nil { 1097 debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys)) 1098 } 1099 ptrmask = debugPtrmask.data 1100 runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1) 1101 } 1102 goto Phase4 1103 } 1104 1105 // Note about sizes: 1106 // 1107 // typ.size is the number of words in the object, 1108 // and typ.ptrdata is the number of words in the prefix 1109 // of the object that contains pointers. That is, the final 1110 // typ.size - typ.ptrdata words contain no pointers. 1111 // This allows optimization of a common pattern where 1112 // an object has a small header followed by a large scalar 1113 // buffer. If we know the pointers are over, we don't have 1114 // to scan the buffer's heap bitmap at all. 1115 // The 1-bit ptrmasks are sized to contain only bits for 1116 // the typ.ptrdata prefix, zero padded out to a full byte 1117 // of bitmap. This code sets nw (below) so that heap bitmap 1118 // bits are only written for the typ.ptrdata prefix; if there is 1119 // more room in the allocated object, the next heap bitmap 1120 // entry is a 00, indicating that there are no more pointers 1121 // to scan. So only the ptrmask for the ptrdata bytes is needed. 1122 // 1123 // Replicated copies are not as nice: if there is an array of 1124 // objects with scalar tails, all but the last tail does have to 1125 // be initialized, because there is no way to say "skip forward". 1126 // However, because of the possibility of a repeated type with 1127 // size not a multiple of 4 pointers (one heap bitmap byte), 1128 // the code already must handle the last ptrmask byte specially 1129 // by treating it as containing only the bits for endnb pointers, 1130 // where endnb <= 4. We represent large scalar tails that must 1131 // be expanded in the replication by setting endnb larger than 4. 1132 // This will have the effect of reading many bits out of b, 1133 // but once the real bits are shifted out, b will supply as many 1134 // zero bits as we try to read, which is exactly what we need. 1135 1136 p = ptrmask 1137 if typ.size < dataSize { 1138 // Filling in bits for an array of typ. 1139 // Set up for repetition of ptrmask during main loop. 1140 // Note that ptrmask describes only a prefix of 1141 const maxBits = sys.PtrSize*8 - 7 1142 if typ.ptrdata/sys.PtrSize <= maxBits { 1143 // Entire ptrmask fits in uintptr with room for a byte fragment. 1144 // Load into pbits and never read from ptrmask again. 1145 // This is especially important when the ptrmask has 1146 // fewer than 8 bits in it; otherwise the reload in the middle 1147 // of the Phase 2 loop would itself need to loop to gather 1148 // at least 8 bits. 1149 1150 // Accumulate ptrmask into b. 1151 // ptrmask is sized to describe only typ.ptrdata, but we record 1152 // it as describing typ.size bytes, since all the high bits are zero. 1153 nb = typ.ptrdata / sys.PtrSize 1154 for i := uintptr(0); i < nb; i += 8 { 1155 b |= uintptr(*p) << i 1156 p = add1(p) 1157 } 1158 nb = typ.size / sys.PtrSize 1159 1160 // Replicate ptrmask to fill entire pbits uintptr. 1161 // Doubling and truncating is fewer steps than 1162 // iterating by nb each time. (nb could be 1.) 1163 // Since we loaded typ.ptrdata/sys.PtrSize bits 1164 // but are pretending to have typ.size/sys.PtrSize, 1165 // there might be no replication necessary/possible. 1166 pbits = b 1167 endnb = nb 1168 if nb+nb <= maxBits { 1169 for endnb <= sys.PtrSize*8 { 1170 pbits |= pbits << endnb 1171 endnb += endnb 1172 } 1173 // Truncate to a multiple of original ptrmask. 1174 // Because nb+nb <= maxBits, nb fits in a byte. 1175 // Byte division is cheaper than uintptr division. 1176 endnb = uintptr(maxBits/byte(nb)) * nb 1177 pbits &= 1<<endnb - 1 1178 b = pbits 1179 nb = endnb 1180 } 1181 1182 // Clear p and endp as sentinel for using pbits. 1183 // Checked during Phase 2 loop. 1184 p = nil 1185 endp = nil 1186 } else { 1187 // Ptrmask is larger. Read it multiple times. 1188 n := (typ.ptrdata/sys.PtrSize+7)/8 - 1 1189 endp = addb(ptrmask, n) 1190 endnb = typ.size/sys.PtrSize - n*8 1191 } 1192 } 1193 if p != nil { 1194 b = uintptr(*p) 1195 p = add1(p) 1196 nb = 8 1197 } 1198 1199 if typ.size == dataSize { 1200 // Single entry: can stop once we reach the non-pointer data. 1201 nw = typ.ptrdata / sys.PtrSize 1202 } else { 1203 // Repeated instances of typ in an array. 1204 // Have to process first N-1 entries in full, but can stop 1205 // once we reach the non-pointer data in the final entry. 1206 nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize 1207 } 1208 if nw == 0 { 1209 // No pointers! Caller was supposed to check. 1210 println("runtime: invalid type ", typ.string()) 1211 throw("heapBitsSetType: called with non-pointer type") 1212 return 1213 } 1214 if nw < 2 { 1215 // Must write at least 2 words, because the "no scan" 1216 // encoding doesn't take effect until the third word. 1217 nw = 2 1218 } 1219 1220 // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2). 1221 // The leading byte is special because it contains the bits for word 1, 1222 // which does not have the scan bit set. 1223 // The leading half-byte is special because it's a half a byte, 1224 // so we have to be careful with the bits already there. 1225 switch { 1226 default: 1227 throw("heapBitsSetType: unexpected shift") 1228 1229 case h.shift == 0: 1230 // Ptrmask and heap bitmap are aligned. 1231 // Handle first byte of bitmap specially. 1232 // 1233 // The first byte we write out covers the first four 1234 // words of the object. The scan/dead bit on the first 1235 // word must be set to scan since there are pointers 1236 // somewhere in the object. The scan/dead bit on the 1237 // second word is the checkmark, so we don't set it. 1238 // In all following words, we set the scan/dead 1239 // appropriately to indicate that the object contains 1240 // to the next 2-bit entry in the bitmap. 1241 // 1242 // TODO: It doesn't matter if we set the checkmark, so 1243 // maybe this case isn't needed any more. 1244 hb = b & bitPointerAll 1245 hb |= bitScan | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift) 1246 if w += 4; w >= nw { 1247 goto Phase3 1248 } 1249 *hbitp = uint8(hb) 1250 hbitp = add1(hbitp) 1251 b >>= 4 1252 nb -= 4 1253 1254 case sys.PtrSize == 8 && h.shift == 2: 1255 // Ptrmask and heap bitmap are misaligned. 1256 // The bits for the first two words are in a byte shared 1257 // with another object, so we must be careful with the bits 1258 // already there. 1259 // We took care of 1-word and 2-word objects above, 1260 // so this is at least a 6-word object. 1261 hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift) 1262 // This is not noscan, so set the scan bit in the 1263 // first word. 1264 hb |= bitScan << (2 * heapBitsShift) 1265 b >>= 2 1266 nb -= 2 1267 // Note: no bitScan for second word because that's 1268 // the checkmark. 1269 *hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) 1270 *hbitp |= uint8(hb) 1271 hbitp = add1(hbitp) 1272 if w += 2; w >= nw { 1273 // We know that there is more data, because we handled 2-word objects above. 1274 // This must be at least a 6-word object. If we're out of pointer words, 1275 // mark no scan in next bitmap byte and finish. 1276 hb = 0 1277 w += 4 1278 goto Phase3 1279 } 1280 } 1281 1282 // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap. 1283 // The loop computes the bits for that last write but does not execute the write; 1284 // it leaves the bits in hb for processing by phase 3. 1285 // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to 1286 // use in the first half of the loop right now, and then we only adjust nb explicitly 1287 // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop. 1288 nb -= 4 1289 for { 1290 // Emit bitmap byte. 1291 // b has at least nb+4 bits, with one exception: 1292 // if w+4 >= nw, then b has only nw-w bits, 1293 // but we'll stop at the break and then truncate 1294 // appropriately in Phase 3. 1295 hb = b & bitPointerAll 1296 hb |= bitScanAll 1297 if w += 4; w >= nw { 1298 break 1299 } 1300 *hbitp = uint8(hb) 1301 hbitp = add1(hbitp) 1302 b >>= 4 1303 1304 // Load more bits. b has nb right now. 1305 if p != endp { 1306 // Fast path: keep reading from ptrmask. 1307 // nb unmodified: we just loaded 8 bits, 1308 // and the next iteration will consume 8 bits, 1309 // leaving us with the same nb the next time we're here. 1310 if nb < 8 { 1311 b |= uintptr(*p) << nb 1312 p = add1(p) 1313 } else { 1314 // Reduce the number of bits in b. 1315 // This is important if we skipped 1316 // over a scalar tail, since nb could 1317 // be larger than the bit width of b. 1318 nb -= 8 1319 } 1320 } else if p == nil { 1321 // Almost as fast path: track bit count and refill from pbits. 1322 // For short repetitions. 1323 if nb < 8 { 1324 b |= pbits << nb 1325 nb += endnb 1326 } 1327 nb -= 8 // for next iteration 1328 } else { 1329 // Slow path: reached end of ptrmask. 1330 // Process final partial byte and rewind to start. 1331 b |= uintptr(*p) << nb 1332 nb += endnb 1333 if nb < 8 { 1334 b |= uintptr(*ptrmask) << nb 1335 p = add1(ptrmask) 1336 } else { 1337 nb -= 8 1338 p = ptrmask 1339 } 1340 } 1341 1342 // Emit bitmap byte. 1343 hb = b & bitPointerAll 1344 hb |= bitScanAll 1345 if w += 4; w >= nw { 1346 break 1347 } 1348 *hbitp = uint8(hb) 1349 hbitp = add1(hbitp) 1350 b >>= 4 1351 } 1352 1353 Phase3: 1354 // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries. 1355 if w > nw { 1356 // Counting the 4 entries in hb not yet written to memory, 1357 // there are more entries than possible pointer slots. 1358 // Discard the excess entries (can't be more than 3). 1359 mask := uintptr(1)<<(4-(w-nw)) - 1 1360 hb &= mask | mask<<4 // apply mask to both pointer bits and scan bits 1361 } 1362 1363 // Change nw from counting possibly-pointer words to total words in allocation. 1364 nw = size / sys.PtrSize 1365 1366 // Write whole bitmap bytes. 1367 // The first is hb, the rest are zero. 1368 if w <= nw { 1369 *hbitp = uint8(hb) 1370 hbitp = add1(hbitp) 1371 hb = 0 // for possible final half-byte below 1372 for w += 4; w <= nw; w += 4 { 1373 *hbitp = 0 1374 hbitp = add1(hbitp) 1375 } 1376 } 1377 1378 // Write final partial bitmap byte if any. 1379 // We know w > nw, or else we'd still be in the loop above. 1380 // It can be bigger only due to the 4 entries in hb that it counts. 1381 // If w == nw+4 then there's nothing left to do: we wrote all nw entries 1382 // and can discard the 4 sitting in hb. 1383 // But if w == nw+2, we need to write first two in hb. 1384 // The byte is shared with the next object, so be careful with 1385 // existing bits. 1386 if w == nw+2 { 1387 *hbitp = *hbitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | uint8(hb) 1388 } 1389 1390 Phase4: 1391 // Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary. 1392 if outOfPlace { 1393 // TODO: We could probably make this faster by 1394 // handling [x+dataSize, x+size) specially. 1395 h := heapBitsForAddr(x) 1396 // cnw is the number of heap words, or bit pairs 1397 // remaining (like nw above). 1398 cnw := size / sys.PtrSize 1399 src := (*uint8)(unsafe.Pointer(x)) 1400 // We know the first and last byte of the bitmap are 1401 // not the same, but it's still possible for small 1402 // objects span arenas, so it may share bitmap bytes 1403 // with neighboring objects. 1404 // 1405 // Handle the first byte specially if it's shared. See 1406 // Phase 1 for why this is the only special case we need. 1407 if doubleCheck { 1408 if !(h.shift == 0 || (sys.PtrSize == 8 && h.shift == 2)) { 1409 print("x=", x, " size=", size, " cnw=", h.shift, "\n") 1410 throw("bad start shift") 1411 } 1412 } 1413 if sys.PtrSize == 8 && h.shift == 2 { 1414 *h.bitp = *h.bitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *src 1415 h = h.next().next() 1416 cnw -= 2 1417 src = addb(src, 1) 1418 } 1419 // We're now byte aligned. Copy out to per-arena 1420 // bitmaps until the last byte (which may again be 1421 // partial). 1422 for cnw >= 4 { 1423 // This loop processes four words at a time, 1424 // so round cnw down accordingly. 1425 hNext, words := h.forwardOrBoundary(cnw / 4 * 4) 1426 1427 // n is the number of bitmap bytes to copy. 1428 n := words / 4 1429 memmove(unsafe.Pointer(h.bitp), unsafe.Pointer(src), n) 1430 cnw -= words 1431 h = hNext 1432 src = addb(src, n) 1433 } 1434 if doubleCheck && h.shift != 0 { 1435 print("cnw=", cnw, " h.shift=", h.shift, "\n") 1436 throw("bad shift after block copy") 1437 } 1438 // Handle the last byte if it's shared. 1439 if cnw == 2 { 1440 *h.bitp = *h.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | *src 1441 src = addb(src, 1) 1442 h = h.next().next() 1443 } 1444 if doubleCheck { 1445 if uintptr(unsafe.Pointer(src)) > x+size { 1446 throw("copy exceeded object size") 1447 } 1448 if !(cnw == 0 || cnw == 2) { 1449 print("x=", x, " size=", size, " cnw=", cnw, "\n") 1450 throw("bad number of remaining words") 1451 } 1452 // Set up hbitp so doubleCheck code below can check it. 1453 hbitp = h.bitp 1454 } 1455 // Zero the object where we wrote the bitmap. 1456 memclrNoHeapPointers(unsafe.Pointer(x), uintptr(unsafe.Pointer(src))-x) 1457 } 1458 1459 // Double check the whole bitmap. 1460 if doubleCheck { 1461 // x+size may not point to the heap, so back up one 1462 // word and then call next(). 1463 end := heapBitsForAddr(x + size - sys.PtrSize).next() 1464 endAI := arenaIdx(end.arena) 1465 if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) { 1466 // The unrolling code above walks hbitp just 1467 // past the bitmap without moving to the next 1468 // arena. Synthesize this for end.bitp. 1469 end.arena-- 1470 endAI = arenaIdx(end.arena) 1471 end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes) 1472 end.last = nil 1473 } 1474 if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { 1475 println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size) 1476 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") 1477 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") 1478 h0 := heapBitsForAddr(x) 1479 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") 1480 print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n") 1481 throw("bad heapBitsSetType") 1482 } 1483 1484 // Double-check that bits to be written were written correctly. 1485 // Does not check that other bits were not written, unfortunately. 1486 h := heapBitsForAddr(x) 1487 nptr := typ.ptrdata / sys.PtrSize 1488 ndata := typ.size / sys.PtrSize 1489 count := dataSize / typ.size 1490 totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize 1491 for i := uintptr(0); i < size/sys.PtrSize; i++ { 1492 j := i % ndata 1493 var have, want uint8 1494 have = (*h.bitp >> h.shift) & (bitPointer | bitScan) 1495 if i >= totalptr { 1496 want = 0 // deadmarker 1497 if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 { 1498 want = bitScan 1499 } 1500 } else { 1501 if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { 1502 want |= bitPointer 1503 } 1504 if i != 1 { 1505 want |= bitScan 1506 } else { 1507 have &^= bitScan 1508 } 1509 } 1510 if have != want { 1511 println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size) 1512 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") 1513 print("kindGCProg=", typ.kind&kindGCProg != 0, " outOfPlace=", outOfPlace, "\n") 1514 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") 1515 h0 := heapBitsForAddr(x) 1516 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") 1517 print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n") 1518 print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n") 1519 println("at word", i, "offset", i*sys.PtrSize, "have", hex(have), "want", hex(want)) 1520 if typ.kind&kindGCProg != 0 { 1521 println("GC program:") 1522 dumpGCProg(addb(typ.gcdata, 4)) 1523 } 1524 throw("bad heapBitsSetType") 1525 } 1526 h = h.next() 1527 } 1528 if ptrmask == debugPtrmask.data { 1529 unlock(&debugPtrmask.lock) 1530 } 1531 } 1532 } 1533 1534 var debugPtrmask struct { 1535 lock mutex 1536 data *byte 1537 } 1538 1539 // heapBitsSetTypeGCProg implements heapBitsSetType using a GC program. 1540 // progSize is the size of the memory described by the program. 1541 // elemSize is the size of the element that the GC program describes (a prefix of). 1542 // dataSize is the total size of the intended data, a multiple of elemSize. 1543 // allocSize is the total size of the allocated memory. 1544 // 1545 // GC programs are only used for large allocations. 1546 // heapBitsSetType requires that allocSize is a multiple of 4 words, 1547 // so that the relevant bitmap bytes are not shared with surrounding 1548 // objects. 1549 func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) { 1550 if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 { 1551 // Alignment will be wrong. 1552 throw("heapBitsSetTypeGCProg: small allocation") 1553 } 1554 var totalBits uintptr 1555 if elemSize == dataSize { 1556 totalBits = runGCProg(prog, nil, h.bitp, 2) 1557 if totalBits*sys.PtrSize != progSize { 1558 println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize) 1559 throw("heapBitsSetTypeGCProg: unexpected bit count") 1560 } 1561 } else { 1562 count := dataSize / elemSize 1563 1564 // Piece together program trailer to run after prog that does: 1565 // literal(0) 1566 // repeat(1, elemSize-progSize-1) // zeros to fill element size 1567 // repeat(elemSize, count-1) // repeat that element for count 1568 // This zero-pads the data remaining in the first element and then 1569 // repeats that first element to fill the array. 1570 var trailer [40]byte // 3 varints (max 10 each) + some bytes 1571 i := 0 1572 if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 { 1573 // literal(0) 1574 trailer[i] = 0x01 1575 i++ 1576 trailer[i] = 0 1577 i++ 1578 if n > 1 { 1579 // repeat(1, n-1) 1580 trailer[i] = 0x81 1581 i++ 1582 n-- 1583 for ; n >= 0x80; n >>= 7 { 1584 trailer[i] = byte(n | 0x80) 1585 i++ 1586 } 1587 trailer[i] = byte(n) 1588 i++ 1589 } 1590 } 1591 // repeat(elemSize/ptrSize, count-1) 1592 trailer[i] = 0x80 1593 i++ 1594 n := elemSize / sys.PtrSize 1595 for ; n >= 0x80; n >>= 7 { 1596 trailer[i] = byte(n | 0x80) 1597 i++ 1598 } 1599 trailer[i] = byte(n) 1600 i++ 1601 n = count - 1 1602 for ; n >= 0x80; n >>= 7 { 1603 trailer[i] = byte(n | 0x80) 1604 i++ 1605 } 1606 trailer[i] = byte(n) 1607 i++ 1608 trailer[i] = 0 1609 i++ 1610 1611 runGCProg(prog, &trailer[0], h.bitp, 2) 1612 1613 // Even though we filled in the full array just now, 1614 // record that we only filled in up to the ptrdata of the 1615 // last element. This will cause the code below to 1616 // memclr the dead section of the final array element, 1617 // so that scanobject can stop early in the final element. 1618 totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize 1619 } 1620 endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4)) 1621 endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/sys.PtrSize/wordsPerBitmapByte)) 1622 memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg)) 1623 } 1624 1625 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog. 1626 // size the size of the region described by prog, in bytes. 1627 // The resulting bitvector will have no more than size/sys.PtrSize bits. 1628 func progToPointerMask(prog *byte, size uintptr) bitvector { 1629 n := (size/sys.PtrSize + 7) / 8 1630 x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] 1631 x[len(x)-1] = 0xa1 // overflow check sentinel 1632 n = runGCProg(prog, nil, &x[0], 1) 1633 if x[len(x)-1] != 0xa1 { 1634 throw("progToPointerMask: overflow") 1635 } 1636 return bitvector{int32(n), &x[0]} 1637 } 1638 1639 // Packed GC pointer bitmaps, aka GC programs. 1640 // 1641 // For large types containing arrays, the type information has a 1642 // natural repetition that can be encoded to save space in the 1643 // binary and in the memory representation of the type information. 1644 // 1645 // The encoding is a simple Lempel-Ziv style bytecode machine 1646 // with the following instructions: 1647 // 1648 // 00000000: stop 1649 // 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes 1650 // 10000000 n c: repeat the previous n bits c times; n, c are varints 1651 // 1nnnnnnn c: repeat the previous n bits c times; c is a varint 1652 1653 // runGCProg executes the GC program prog, and then trailer if non-nil, 1654 // writing to dst with entries of the given size. 1655 // If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst. 1656 // If size == 2, dst is the 2-bit heap bitmap, and writes move backward 1657 // starting at dst (because the heap bitmap does). In this case, the caller guarantees 1658 // that only whole bytes in dst need to be written. 1659 // 1660 // runGCProg returns the number of 1- or 2-bit entries written to memory. 1661 func runGCProg(prog, trailer, dst *byte, size int) uintptr { 1662 dstStart := dst 1663 1664 // Bits waiting to be written to memory. 1665 var bits uintptr 1666 var nbits uintptr 1667 1668 p := prog 1669 Run: 1670 for { 1671 // Flush accumulated full bytes. 1672 // The rest of the loop assumes that nbits <= 7. 1673 for ; nbits >= 8; nbits -= 8 { 1674 if size == 1 { 1675 *dst = uint8(bits) 1676 dst = add1(dst) 1677 bits >>= 8 1678 } else { 1679 v := bits&bitPointerAll | bitScanAll 1680 *dst = uint8(v) 1681 dst = add1(dst) 1682 bits >>= 4 1683 v = bits&bitPointerAll | bitScanAll 1684 *dst = uint8(v) 1685 dst = add1(dst) 1686 bits >>= 4 1687 } 1688 } 1689 1690 // Process one instruction. 1691 inst := uintptr(*p) 1692 p = add1(p) 1693 n := inst & 0x7F 1694 if inst&0x80 == 0 { 1695 // Literal bits; n == 0 means end of program. 1696 if n == 0 { 1697 // Program is over; continue in trailer if present. 1698 if trailer != nil { 1699 p = trailer 1700 trailer = nil 1701 continue 1702 } 1703 break Run 1704 } 1705 nbyte := n / 8 1706 for i := uintptr(0); i < nbyte; i++ { 1707 bits |= uintptr(*p) << nbits 1708 p = add1(p) 1709 if size == 1 { 1710 *dst = uint8(bits) 1711 dst = add1(dst) 1712 bits >>= 8 1713 } else { 1714 v := bits&0xf | bitScanAll 1715 *dst = uint8(v) 1716 dst = add1(dst) 1717 bits >>= 4 1718 v = bits&0xf | bitScanAll 1719 *dst = uint8(v) 1720 dst = add1(dst) 1721 bits >>= 4 1722 } 1723 } 1724 if n %= 8; n > 0 { 1725 bits |= uintptr(*p) << nbits 1726 p = add1(p) 1727 nbits += n 1728 } 1729 continue Run 1730 } 1731 1732 // Repeat. If n == 0, it is encoded in a varint in the next bytes. 1733 if n == 0 { 1734 for off := uint(0); ; off += 7 { 1735 x := uintptr(*p) 1736 p = add1(p) 1737 n |= (x & 0x7F) << off 1738 if x&0x80 == 0 { 1739 break 1740 } 1741 } 1742 } 1743 1744 // Count is encoded in a varint in the next bytes. 1745 c := uintptr(0) 1746 for off := uint(0); ; off += 7 { 1747 x := uintptr(*p) 1748 p = add1(p) 1749 c |= (x & 0x7F) << off 1750 if x&0x80 == 0 { 1751 break 1752 } 1753 } 1754 c *= n // now total number of bits to copy 1755 1756 // If the number of bits being repeated is small, load them 1757 // into a register and use that register for the entire loop 1758 // instead of repeatedly reading from memory. 1759 // Handling fewer than 8 bits here makes the general loop simpler. 1760 // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add 1761 // the pattern to a bit buffer holding at most 7 bits (a partial byte) 1762 // it will not overflow. 1763 src := dst 1764 const maxBits = sys.PtrSize*8 - 7 1765 if n <= maxBits { 1766 // Start with bits in output buffer. 1767 pattern := bits 1768 npattern := nbits 1769 1770 // If we need more bits, fetch them from memory. 1771 if size == 1 { 1772 src = subtract1(src) 1773 for npattern < n { 1774 pattern <<= 8 1775 pattern |= uintptr(*src) 1776 src = subtract1(src) 1777 npattern += 8 1778 } 1779 } else { 1780 src = subtract1(src) 1781 for npattern < n { 1782 pattern <<= 4 1783 pattern |= uintptr(*src) & 0xf 1784 src = subtract1(src) 1785 npattern += 4 1786 } 1787 } 1788 1789 // We started with the whole bit output buffer, 1790 // and then we loaded bits from whole bytes. 1791 // Either way, we might now have too many instead of too few. 1792 // Discard the extra. 1793 if npattern > n { 1794 pattern >>= npattern - n 1795 npattern = n 1796 } 1797 1798 // Replicate pattern to at most maxBits. 1799 if npattern == 1 { 1800 // One bit being repeated. 1801 // If the bit is 1, make the pattern all 1s. 1802 // If the bit is 0, the pattern is already all 0s, 1803 // but we can claim that the number of bits 1804 // in the word is equal to the number we need (c), 1805 // because right shift of bits will zero fill. 1806 if pattern == 1 { 1807 pattern = 1<<maxBits - 1 1808 npattern = maxBits 1809 } else { 1810 npattern = c 1811 } 1812 } else { 1813 b := pattern 1814 nb := npattern 1815 if nb+nb <= maxBits { 1816 // Double pattern until the whole uintptr is filled. 1817 for nb <= sys.PtrSize*8 { 1818 b |= b << nb 1819 nb += nb 1820 } 1821 // Trim away incomplete copy of original pattern in high bits. 1822 // TODO(rsc): Replace with table lookup or loop on systems without divide? 1823 nb = maxBits / npattern * npattern 1824 b &= 1<<nb - 1 1825 pattern = b 1826 npattern = nb 1827 } 1828 } 1829 1830 // Add pattern to bit buffer and flush bit buffer, c/npattern times. 1831 // Since pattern contains >8 bits, there will be full bytes to flush 1832 // on each iteration. 1833 for ; c >= npattern; c -= npattern { 1834 bits |= pattern << nbits 1835 nbits += npattern 1836 if size == 1 { 1837 for nbits >= 8 { 1838 *dst = uint8(bits) 1839 dst = add1(dst) 1840 bits >>= 8 1841 nbits -= 8 1842 } 1843 } else { 1844 for nbits >= 4 { 1845 *dst = uint8(bits&0xf | bitScanAll) 1846 dst = add1(dst) 1847 bits >>= 4 1848 nbits -= 4 1849 } 1850 } 1851 } 1852 1853 // Add final fragment to bit buffer. 1854 if c > 0 { 1855 pattern &= 1<<c - 1 1856 bits |= pattern << nbits 1857 nbits += c 1858 } 1859 continue Run 1860 } 1861 1862 // Repeat; n too large to fit in a register. 1863 // Since nbits <= 7, we know the first few bytes of repeated data 1864 // are already written to memory. 1865 off := n - nbits // n > nbits because n > maxBits and nbits <= 7 1866 if size == 1 { 1867 // Leading src fragment. 1868 src = subtractb(src, (off+7)/8) 1869 if frag := off & 7; frag != 0 { 1870 bits |= uintptr(*src) >> (8 - frag) << nbits 1871 src = add1(src) 1872 nbits += frag 1873 c -= frag 1874 } 1875 // Main loop: load one byte, write another. 1876 // The bits are rotating through the bit buffer. 1877 for i := c / 8; i > 0; i-- { 1878 bits |= uintptr(*src) << nbits 1879 src = add1(src) 1880 *dst = uint8(bits) 1881 dst = add1(dst) 1882 bits >>= 8 1883 } 1884 // Final src fragment. 1885 if c %= 8; c > 0 { 1886 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1887 nbits += c 1888 } 1889 } else { 1890 // Leading src fragment. 1891 src = subtractb(src, (off+3)/4) 1892 if frag := off & 3; frag != 0 { 1893 bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits 1894 src = add1(src) 1895 nbits += frag 1896 c -= frag 1897 } 1898 // Main loop: load one byte, write another. 1899 // The bits are rotating through the bit buffer. 1900 for i := c / 4; i > 0; i-- { 1901 bits |= (uintptr(*src) & 0xf) << nbits 1902 src = add1(src) 1903 *dst = uint8(bits&0xf | bitScanAll) 1904 dst = add1(dst) 1905 bits >>= 4 1906 } 1907 // Final src fragment. 1908 if c %= 4; c > 0 { 1909 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1910 nbits += c 1911 } 1912 } 1913 } 1914 1915 // Write any final bits out, using full-byte writes, even for the final byte. 1916 var totalBits uintptr 1917 if size == 1 { 1918 totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits 1919 nbits += -nbits & 7 1920 for ; nbits > 0; nbits -= 8 { 1921 *dst = uint8(bits) 1922 dst = add1(dst) 1923 bits >>= 8 1924 } 1925 } else { 1926 totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*4 + nbits 1927 nbits += -nbits & 3 1928 for ; nbits > 0; nbits -= 4 { 1929 v := bits&0xf | bitScanAll 1930 *dst = uint8(v) 1931 dst = add1(dst) 1932 bits >>= 4 1933 } 1934 } 1935 return totalBits 1936 } 1937 1938 // materializeGCProg allocates space for the (1-bit) pointer bitmask 1939 // for an object of size ptrdata. Then it fills that space with the 1940 // pointer bitmask specified by the program prog. 1941 // The bitmask starts at s.startAddr. 1942 // The result must be deallocated with dematerializeGCProg. 1943 func materializeGCProg(ptrdata uintptr, prog *byte) *mspan { 1944 s := mheap_.allocManual((ptrdata/(8*sys.PtrSize)+pageSize-1)/pageSize, &memstats.gc_sys) 1945 runGCProg(addb(prog, 4), nil, (*byte)(unsafe.Pointer(s.startAddr)), 1) 1946 return s 1947 } 1948 func dematerializeGCProg(s *mspan) { 1949 mheap_.freeManual(s, &memstats.gc_sys) 1950 } 1951 1952 func dumpGCProg(p *byte) { 1953 nptr := 0 1954 for { 1955 x := *p 1956 p = add1(p) 1957 if x == 0 { 1958 print("\t", nptr, " end\n") 1959 break 1960 } 1961 if x&0x80 == 0 { 1962 print("\t", nptr, " lit ", x, ":") 1963 n := int(x+7) / 8 1964 for i := 0; i < n; i++ { 1965 print(" ", hex(*p)) 1966 p = add1(p) 1967 } 1968 print("\n") 1969 nptr += int(x) 1970 } else { 1971 nbit := int(x &^ 0x80) 1972 if nbit == 0 { 1973 for nb := uint(0); ; nb += 7 { 1974 x := *p 1975 p = add1(p) 1976 nbit |= int(x&0x7f) << nb 1977 if x&0x80 == 0 { 1978 break 1979 } 1980 } 1981 } 1982 count := 0 1983 for nb := uint(0); ; nb += 7 { 1984 x := *p 1985 p = add1(p) 1986 count |= int(x&0x7f) << nb 1987 if x&0x80 == 0 { 1988 break 1989 } 1990 } 1991 print("\t", nptr, " repeat ", nbit, " × ", count, "\n") 1992 nptr += nbit * count 1993 } 1994 } 1995 } 1996 1997 // Testing. 1998 1999 // gcbits returns the GC type info for x, for testing. 2000 // The result is the bitmap entries (0 or 1), one entry per byte. 2001 //go:linkname reflect_gcbits reflect.gcbits 2002 func reflect_gcbits(x interface{}) []byte { 2003 ret := getgcmask(x) 2004 typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem 2005 nptr := typ.ptrdata / sys.PtrSize 2006 for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 { 2007 ret = ret[:len(ret)-1] 2008 } 2009 return ret 2010 } 2011 2012 // Returns GC type info for the pointer stored in ep for testing. 2013 // If ep points to the stack, only static live information will be returned 2014 // (i.e. not for objects which are only dynamically live stack objects). 2015 func getgcmask(ep interface{}) (mask []byte) { 2016 e := *efaceOf(&ep) 2017 p := e.data 2018 t := e._type 2019 // data or bss 2020 roots := gcRoots 2021 for roots != nil { 2022 for i := 0; i < roots.count; i++ { 2023 pr := roots.roots[i] 2024 addr := uintptr(pr.decl) 2025 if addr <= uintptr(p) && uintptr(p) < addr+pr.size { 2026 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 2027 mask = make([]byte, n/sys.PtrSize) 2028 copy(mask, (*[1 << 29]uint8)(unsafe.Pointer(pr.gcdata))[:pr.ptrdata]) 2029 } 2030 return 2031 } 2032 roots = roots.next 2033 } 2034 2035 // heap 2036 if base, s, _ := findObject(uintptr(p), 0, 0, false); base != 0 { 2037 hbits := heapBitsForAddr(base) 2038 n := s.elemsize 2039 mask = make([]byte, n/sys.PtrSize) 2040 for i := uintptr(0); i < n; i += sys.PtrSize { 2041 if hbits.isPointer() { 2042 mask[i/sys.PtrSize] = 1 2043 } 2044 if i != 1*sys.PtrSize && !hbits.morePointers() { 2045 mask = mask[:i/sys.PtrSize] 2046 break 2047 } 2048 hbits = hbits.next() 2049 } 2050 return 2051 } 2052 2053 // otherwise, not something the GC knows about. 2054 // possibly read-only data, like malloc(0). 2055 // must not have pointers 2056 // For gccgo, may live on the stack, which is collected conservatively. 2057 return 2058 }