github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/runtime/mbitmap.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Garbage collector: type and heap bitmaps. 6 // 7 // Stack, data, and bss bitmaps 8 // 9 // Stack frames and global variables in the data and bss sections are described 10 // by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer 11 // to be visited during GC. The bits in each byte are consumed starting with 12 // the low bit: 1<<0, 1<<1, and so on. 13 // 14 // Heap bitmap 15 // 16 // The allocated heap comes from a subset of the memory in the range [start, used), 17 // where start == mheap_.arena_start and used == mheap_.arena_used. 18 // The heap bitmap comprises 2 bits for each pointer-sized word in that range, 19 // stored in bytes indexed backward in memory from start. 20 // That is, the byte at address start-1 holds the 2-bit entries for the four words 21 // start through start+3*ptrSize, the byte at start-2 holds the entries for 22 // start+4*ptrSize through start+7*ptrSize, and so on. 23 // 24 // In each 2-bit entry, the lower bit holds the same information as in the 1-bit 25 // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. 26 // The meaning of the high bit depends on the position of the word being described 27 // in its allocated object. In the first word, the high bit is the GC ``marked'' bit. 28 // In the second word, the high bit is the GC ``checkmarked'' bit (see below). 29 // In the third and later words, the high bit indicates that the object is still 30 // being described. In these words, if a bit pair with a high bit 0 is encountered, 31 // the low bit can also be assumed to be 0, and the object description is over. 32 // This 00 is called the ``dead'' encoding: it signals that the rest of the words 33 // in the object are uninteresting to the garbage collector. 34 // 35 // The 2-bit entries are split when written into the byte, so that the top half 36 // of the byte contains 4 mark bits and the bottom half contains 4 pointer bits. 37 // This form allows a copy from the 1-bit to the 4-bit form to keep the 38 // pointer bits contiguous, instead of having to space them out. 39 // 40 // The code makes use of the fact that the zero value for a heap bitmap 41 // has no live pointer bit set and is (depending on position), not marked, 42 // not checkmarked, and is the dead encoding. 43 // These properties must be preserved when modifying the encoding. 44 // 45 // Checkmarks 46 // 47 // In a concurrent garbage collector, one worries about failing to mark 48 // a live object due to mutations without write barriers or bugs in the 49 // collector implementation. As a sanity check, the GC has a 'checkmark' 50 // mode that retraverses the object graph with the world stopped, to make 51 // sure that everything that should be marked is marked. 52 // In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry 53 // for the second word of the object holds the checkmark bit. 54 // When not in checkmark mode, this bit is set to 1. 55 // 56 // The smallest possible allocation is 8 bytes. On a 32-bit machine, that 57 // means every allocated object has two words, so there is room for the 58 // checkmark bit. On a 64-bit machine, however, the 8-byte allocation is 59 // just one word, so the second bit pair is not available for encoding the 60 // checkmark. However, because non-pointer allocations are combined 61 // into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation 62 // must be a pointer, so the type bit in the first word is not actually needed. 63 // It is still used in general, except in checkmark the type bit is repurposed 64 // as the checkmark bit and then reinitialized (to 1) as the type bit when 65 // finished. 66 67 package runtime 68 69 import "unsafe" 70 71 const ( 72 bitPointer = 1 << 0 73 bitMarked = 1 << 4 74 75 heapBitsShift = 1 // shift offset between successive bitPointer or bitMarked entries 76 heapBitmapScale = ptrSize * (8 / 2) // number of data bytes described by one heap bitmap byte 77 78 // all mark/pointer bits in a byte 79 bitMarkedAll = bitMarked | bitMarked<<heapBitsShift | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift) 80 bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift) 81 ) 82 83 // addb returns the byte pointer p+n. 84 //go:nowritebarrier 85 func addb(p *byte, n uintptr) *byte { 86 // Note: wrote out full expression instead of calling add(p, n) 87 // to reduce the number of temporaries generated by the 88 // compiler for this trivial expression during inlining. 89 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) 90 } 91 92 // subtractb returns the byte pointer p-n. 93 //go:nowritebarrier 94 func subtractb(p *byte, n uintptr) *byte { 95 // Note: wrote out full expression instead of calling add(p, -n) 96 // to reduce the number of temporaries generated by the 97 // compiler for this trivial expression during inlining. 98 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n)) 99 } 100 101 // add1 returns the byte pointer p+1. 102 //go:nowritebarrier 103 func add1(p *byte) *byte { 104 // Note: wrote out full expression instead of calling addb(p, 1) 105 // to reduce the number of temporaries generated by the 106 // compiler for this trivial expression during inlining. 107 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) 108 } 109 110 // subtract1 returns the byte pointer p-1. 111 //go:nowritebarrier 112 // 113 // nosplit because it is used during write barriers and must not be preempted. 114 //go:nosplit 115 func subtract1(p *byte) *byte { 116 // Note: wrote out full expression instead of calling subtractb(p, 1) 117 // to reduce the number of temporaries generated by the 118 // compiler for this trivial expression during inlining. 119 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) 120 } 121 122 // mHeap_MapBits is called each time arena_used is extended. 123 // It maps any additional bitmap memory needed for the new arena memory. 124 // It must be called with the expected new value of arena_used, 125 // *before* h.arena_used has been updated. 126 // Waiting to update arena_used until after the memory has been mapped 127 // avoids faults when other threads try access the bitmap immediately 128 // after observing the change to arena_used. 129 // 130 //go:nowritebarrier 131 func mHeap_MapBits(h *mheap, arena_used uintptr) { 132 // Caller has added extra mappings to the arena. 133 // Add extra mappings of bitmap words as needed. 134 // We allocate extra bitmap pieces in chunks of bitmapChunk. 135 const bitmapChunk = 8192 136 137 n := (arena_used - mheap_.arena_start) / heapBitmapScale 138 n = round(n, bitmapChunk) 139 n = round(n, _PhysPageSize) 140 if h.bitmap_mapped >= n { 141 return 142 } 143 144 sysMap(unsafe.Pointer(h.arena_start-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys) 145 h.bitmap_mapped = n 146 } 147 148 // heapBits provides access to the bitmap bits for a single heap word. 149 // The methods on heapBits take value receivers so that the compiler 150 // can more easily inline calls to those methods and registerize the 151 // struct fields independently. 152 type heapBits struct { 153 bitp *uint8 154 shift uint32 155 } 156 157 // heapBitsForAddr returns the heapBits for the address addr. 158 // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). 159 // 160 // nosplit because it is used during write barriers and must not be preempted. 161 //go:nosplit 162 func heapBitsForAddr(addr uintptr) heapBits { 163 // 2 bits per work, 4 pairs per byte, and a mask is hard coded. 164 off := (addr - mheap_.arena_start) / ptrSize 165 return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/4 - 1)), uint32(off & 3)} 166 } 167 168 // heapBitsForSpan returns the heapBits for the span base address base. 169 func heapBitsForSpan(base uintptr) (hbits heapBits) { 170 if base < mheap_.arena_start || base >= mheap_.arena_used { 171 throw("heapBitsForSpan: base out of range") 172 } 173 hbits = heapBitsForAddr(base) 174 if hbits.shift != 0 { 175 throw("heapBitsForSpan: unaligned start") 176 } 177 return hbits 178 } 179 180 // heapBitsForObject returns the base address for the heap object 181 // containing the address p, along with the heapBits for base. 182 // If p does not point into a heap object, 183 // return base == 0 184 // otherwise return the base of the object. 185 // 186 // refBase and refOff optionally give the base address of the object 187 // in which the pointer p was found and the byte offset at which it 188 // was found. These are used for error reporting. 189 func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan) { 190 arenaStart := mheap_.arena_start 191 if p < arenaStart || p >= mheap_.arena_used { 192 return 193 } 194 off := p - arenaStart 195 idx := off >> _PageShift 196 // p points into the heap, but possibly to the middle of an object. 197 // Consult the span table to find the block beginning. 198 k := p >> _PageShift 199 s = h_spans[idx] 200 if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse { 201 if s == nil || s.state == _MSpanStack { 202 // If s is nil, the virtual address has never been part of the heap. 203 // This pointer may be to some mmap'd region, so we allow it. 204 // Pointers into stacks are also ok, the runtime manages these explicitly. 205 return 206 } 207 208 // The following ensures that we are rigorous about what data 209 // structures hold valid pointers. 210 if debug.invalidptr != 0 { 211 // Typically this indicates an incorrect use 212 // of unsafe or cgo to store a bad pointer in 213 // the Go heap. It may also indicate a runtime 214 // bug. 215 // 216 // TODO(austin): We could be more aggressive 217 // and detect pointers to unallocated objects 218 // in allocated spans. 219 printlock() 220 print("runtime: pointer ", hex(p)) 221 if s.state != mSpanInUse { 222 print(" to unallocated span") 223 } else { 224 print(" to unused region of span") 225 } 226 print("idx=", hex(idx), " span.start=", hex(s.start<<_PageShift), " span.limit=", hex(s.limit), " span.state=", s.state, "\n") 227 if refBase != 0 { 228 print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n") 229 gcDumpObject("object", refBase, refOff) 230 } 231 throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)") 232 } 233 return 234 } 235 // If this span holds object of a power of 2 size, just mask off the bits to 236 // the interior of the object. Otherwise use the size to get the base. 237 if s.baseMask != 0 { 238 // optimize for power of 2 sized objects. 239 base = s.base() 240 base = base + (p-base)&s.baseMask 241 // base = p & s.baseMask is faster for small spans, 242 // but doesn't work for large spans. 243 // Overall, it's faster to use the more general computation above. 244 } else { 245 base = s.base() 246 if p-base >= s.elemsize { 247 // n := (p - base) / s.elemsize, using division by multiplication 248 n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2) 249 base += n * s.elemsize 250 } 251 } 252 // Now that we know the actual base, compute heapBits to return to caller. 253 hbits = heapBitsForAddr(base) 254 return 255 } 256 257 // prefetch the bits. 258 func (h heapBits) prefetch() { 259 prefetchnta(uintptr(unsafe.Pointer((h.bitp)))) 260 } 261 262 // next returns the heapBits describing the next pointer-sized word in memory. 263 // That is, if h describes address p, h.next() describes p+ptrSize. 264 // Note that next does not modify h. The caller must record the result. 265 // 266 // nosplit because it is used during write barriers and must not be preempted. 267 //go:nosplit 268 func (h heapBits) next() heapBits { 269 if h.shift < 3*heapBitsShift { 270 return heapBits{h.bitp, h.shift + heapBitsShift} 271 } 272 return heapBits{subtract1(h.bitp), 0} 273 } 274 275 // forward returns the heapBits describing n pointer-sized words ahead of h in memory. 276 // That is, if h describes address p, h.forward(n) describes p+n*ptrSize. 277 // h.forward(1) is equivalent to h.next(), just slower. 278 // Note that forward does not modify h. The caller must record the result. 279 // bits returns the heap bits for the current word. 280 func (h heapBits) forward(n uintptr) heapBits { 281 n += uintptr(h.shift) / heapBitsShift 282 return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift} 283 } 284 285 // The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer. 286 // The result includes in its higher bits the bits for subsequent words 287 // described by the same bitmap byte. 288 func (h heapBits) bits() uint32 { 289 return uint32(*h.bitp) >> h.shift 290 } 291 292 // isMarked reports whether the heap bits have the marked bit set. 293 // h must describe the initial word of the object. 294 func (h heapBits) isMarked() bool { 295 return *h.bitp&(bitMarked<<h.shift) != 0 296 } 297 298 // setMarked sets the marked bit in the heap bits, atomically. 299 // h must describe the initial word of the object. 300 func (h heapBits) setMarked() { 301 // Each byte of GC bitmap holds info for four words. 302 // Might be racing with other updates, so use atomic update always. 303 // We used to be clever here and use a non-atomic update in certain 304 // cases, but it's not worth the risk. 305 atomicor8(h.bitp, bitMarked<<h.shift) 306 } 307 308 // setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically. 309 // h must describe the initial word of the object. 310 func (h heapBits) setMarkedNonAtomic() { 311 *h.bitp |= bitMarked << h.shift 312 } 313 314 // isPointer reports whether the heap bits describe a pointer word. 315 // h must describe the initial word of the object. 316 // 317 // nosplit because it is used during write barriers and must not be preempted. 318 //go:nosplit 319 func (h heapBits) isPointer() bool { 320 return (*h.bitp>>h.shift)&bitPointer != 0 321 } 322 323 // hasPointers reports whether the given object has any pointers. 324 // It must be told how large the object at h is, so that it does not read too 325 // far into the bitmap. 326 // h must describe the initial word of the object. 327 func (h heapBits) hasPointers(size uintptr) bool { 328 if size == ptrSize { // 1-word objects are always pointers 329 return true 330 } 331 // Otherwise, at least a 2-word object, and at least 2-word aligned, 332 // so h.shift is either 0 or 4, so we know we can get the bits for the 333 // first two words out of *h.bitp. 334 // If either of the first two words is a pointer, not pointer free. 335 b := uint32(*h.bitp >> h.shift) 336 if b&(bitPointer|bitPointer<<heapBitsShift) != 0 { 337 return true 338 } 339 if size == 2*ptrSize { 340 return false 341 } 342 // At least a 4-word object. Check scan bit (aka marked bit) in third word. 343 if h.shift == 0 { 344 return b&(bitMarked<<(2*heapBitsShift)) != 0 345 } 346 return uint32(*subtract1(h.bitp))&bitMarked != 0 347 } 348 349 // isCheckmarked reports whether the heap bits have the checkmarked bit set. 350 // It must be told how large the object at h is, because the encoding of the 351 // checkmark bit varies by size. 352 // h must describe the initial word of the object. 353 func (h heapBits) isCheckmarked(size uintptr) bool { 354 if size == ptrSize { 355 return (*h.bitp>>h.shift)&bitPointer != 0 356 } 357 // All multiword objects are 2-word aligned, 358 // so we know that the initial word's 2-bit pair 359 // and the second word's 2-bit pair are in the 360 // same heap bitmap byte, *h.bitp. 361 return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0 362 } 363 364 // setCheckmarked sets the checkmarked bit. 365 // It must be told how large the object at h is, because the encoding of the 366 // checkmark bit varies by size. 367 // h must describe the initial word of the object. 368 func (h heapBits) setCheckmarked(size uintptr) { 369 if size == ptrSize { 370 atomicor8(h.bitp, bitPointer<<h.shift) 371 return 372 } 373 atomicor8(h.bitp, bitMarked<<(heapBitsShift+h.shift)) 374 } 375 376 // heapBitsBulkBarrier executes writebarrierptr_nostore 377 // for every pointer slot in the memory range [p, p+size), 378 // using the heap bitmap to locate those pointer slots. 379 // This executes the write barriers necessary after a memmove. 380 // Both p and size must be pointer-aligned. 381 // The range [p, p+size) must lie within a single allocation. 382 // 383 // Callers should call heapBitsBulkBarrier immediately after 384 // calling memmove(p, src, size). This function is marked nosplit 385 // to avoid being preempted; the GC must not stop the goroutine 386 // between the memmove and the execution of the barriers. 387 // 388 // The heap bitmap is not maintained for allocations containing 389 // no pointers at all; any caller of heapBitsBulkBarrier must first 390 // make sure the underlying allocation contains pointers, usually 391 // by checking typ.kind&kindNoPointers. 392 // 393 //go:nosplit 394 func heapBitsBulkBarrier(p, size uintptr) { 395 if (p|size)&(ptrSize-1) != 0 { 396 throw("heapBitsBulkBarrier: unaligned arguments") 397 } 398 if !writeBarrierEnabled { 399 return 400 } 401 if !inheap(p) { 402 // If p is on the stack and in a higher frame than the 403 // caller, we either need to execute write barriers on 404 // it (which is what happens for normal stack writes 405 // through pointers to higher frames), or we need to 406 // force the mark termination stack scan to scan the 407 // frame containing p. 408 // 409 // Executing write barriers on p is complicated in the 410 // general case because we either need to unwind the 411 // stack to get the stack map, or we need the type's 412 // bitmap, which may be a GC program. 413 // 414 // Hence, we opt for forcing the re-scan to scan the 415 // frame containing p, which we can do by simply 416 // unwinding the stack barriers between the current SP 417 // and p's frame. 418 gp := getg().m.curg 419 if gp != nil && gp.stack.lo <= p && p < gp.stack.hi { 420 // Run on the system stack to give it more 421 // stack space. 422 systemstack(func() { 423 gcUnwindBarriers(gp, p) 424 }) 425 } 426 return 427 } 428 429 h := heapBitsForAddr(p) 430 for i := uintptr(0); i < size; i += ptrSize { 431 if h.isPointer() { 432 x := (*uintptr)(unsafe.Pointer(p + i)) 433 writebarrierptr_nostore(x, *x) 434 } 435 h = h.next() 436 } 437 } 438 439 // typeBitsBulkBarrier executes writebarrierptr_nostore 440 // for every pointer slot in the memory range [p, p+size), 441 // using the type bitmap to locate those pointer slots. 442 // The type typ must correspond exactly to [p, p+size). 443 // This executes the write barriers necessary after a copy. 444 // Both p and size must be pointer-aligned. 445 // The type typ must have a plain bitmap, not a GC program. 446 // The only use of this function is in channel sends, and the 447 // 64 kB channel element limit takes care of this for us. 448 // 449 // Must not be preempted because it typically runs right after memmove, 450 // and the GC must not complete between those two. 451 // 452 //go:nosplit 453 func typeBitsBulkBarrier(typ *_type, p, size uintptr) { 454 if typ == nil { 455 throw("runtime: typeBitsBulkBarrier without type") 456 } 457 if typ.size != size { 458 println("runtime: typeBitsBulkBarrier with type ", *typ._string, " of size ", typ.size, " but memory size", size) 459 throw("runtime: invalid typeBitsBulkBarrier") 460 } 461 if typ.kind&kindGCProg != 0 { 462 println("runtime: typeBitsBulkBarrier with type ", *typ._string, " with GC prog") 463 throw("runtime: invalid typeBitsBulkBarrier") 464 } 465 if !writeBarrierEnabled { 466 return 467 } 468 ptrmask := typ.gcdata 469 var bits uint32 470 for i := uintptr(0); i < typ.ptrdata; i += ptrSize { 471 if i&(ptrSize*8-1) == 0 { 472 bits = uint32(*ptrmask) 473 ptrmask = addb(ptrmask, 1) 474 } else { 475 bits = bits >> 1 476 } 477 if bits&1 != 0 { 478 x := (*uintptr)(unsafe.Pointer(p + i)) 479 writebarrierptr_nostore(x, *x) 480 } 481 } 482 } 483 484 // The methods operating on spans all require that h has been returned 485 // by heapBitsForSpan and that size, n, total are the span layout description 486 // returned by the mspan's layout method. 487 // If total > size*n, it means that there is extra leftover memory in the span, 488 // usually due to rounding. 489 // 490 // TODO(rsc): Perhaps introduce a different heapBitsSpan type. 491 492 // initSpan initializes the heap bitmap for a span. 493 func (h heapBits) initSpan(size, n, total uintptr) { 494 if total%heapBitmapScale != 0 { 495 throw("initSpan: unaligned length") 496 } 497 nbyte := total / heapBitmapScale 498 if ptrSize == 8 && size == ptrSize { 499 end := h.bitp 500 bitp := subtractb(end, nbyte-1) 501 for { 502 *bitp = bitPointerAll 503 if bitp == end { 504 break 505 } 506 bitp = add1(bitp) 507 } 508 return 509 } 510 memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte) 511 } 512 513 // initCheckmarkSpan initializes a span for being checkmarked. 514 // It clears the checkmark bits, which are set to 1 in normal operation. 515 func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { 516 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. 517 if ptrSize == 8 && size == ptrSize { 518 // Checkmark bit is type bit, bottom bit of every 2-bit entry. 519 // Only possible on 64-bit system, since minimum size is 8. 520 // Must clear type bit (checkmark bit) of every word. 521 // The type bit is the lower of every two-bit pair. 522 bitp := h.bitp 523 for i := uintptr(0); i < n; i += 4 { 524 *bitp &^= bitPointerAll 525 bitp = subtract1(bitp) 526 } 527 return 528 } 529 for i := uintptr(0); i < n; i++ { 530 *h.bitp &^= bitMarked << (heapBitsShift + h.shift) 531 h = h.forward(size / ptrSize) 532 } 533 } 534 535 // clearCheckmarkSpan undoes all the checkmarking in a span. 536 // The actual checkmark bits are ignored, so the only work to do 537 // is to fix the pointer bits. (Pointer bits are ignored by scanobject 538 // but consulted by typedmemmove.) 539 func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { 540 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. 541 if ptrSize == 8 && size == ptrSize { 542 // Checkmark bit is type bit, bottom bit of every 2-bit entry. 543 // Only possible on 64-bit system, since minimum size is 8. 544 // Must clear type bit (checkmark bit) of every word. 545 // The type bit is the lower of every two-bit pair. 546 bitp := h.bitp 547 for i := uintptr(0); i < n; i += 4 { 548 *bitp |= bitPointerAll 549 bitp = subtract1(bitp) 550 } 551 } 552 } 553 554 // heapBitsSweepSpan coordinates the sweeping of a span by reading 555 // and updating the corresponding heap bitmap entries. 556 // For each free object in the span, heapBitsSweepSpan sets the type 557 // bits for the first two words (or one for single-word objects) to typeDead 558 // and then calls f(p), where p is the object's base address. 559 // f is expected to add the object to a free list. 560 // For non-free objects, heapBitsSweepSpan turns off the marked bit. 561 func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { 562 h := heapBitsForSpan(base) 563 switch { 564 default: 565 throw("heapBitsSweepSpan") 566 case ptrSize == 8 && size == ptrSize: 567 // Consider mark bits in all four 2-bit entries of each bitmap byte. 568 bitp := h.bitp 569 for i := uintptr(0); i < n; i += 4 { 570 x := uint32(*bitp) 571 // Note that unlike the other size cases, we leave the pointer bits set here. 572 // These are initialized during initSpan when the span is created and left 573 // in place the whole time the span is used for pointer-sized objects. 574 // That lets heapBitsSetType avoid an atomic update to set the pointer bit 575 // during allocation. 576 if x&bitMarked != 0 { 577 x &^= bitMarked 578 } else { 579 f(base + i*ptrSize) 580 } 581 if x&(bitMarked<<heapBitsShift) != 0 { 582 x &^= bitMarked << heapBitsShift 583 } else { 584 f(base + (i+1)*ptrSize) 585 } 586 if x&(bitMarked<<(2*heapBitsShift)) != 0 { 587 x &^= bitMarked << (2 * heapBitsShift) 588 } else { 589 f(base + (i+2)*ptrSize) 590 } 591 if x&(bitMarked<<(3*heapBitsShift)) != 0 { 592 x &^= bitMarked << (3 * heapBitsShift) 593 } else { 594 f(base + (i+3)*ptrSize) 595 } 596 *bitp = uint8(x) 597 bitp = subtract1(bitp) 598 } 599 600 case size%(4*ptrSize) == 0: 601 // Mark bit is in first word of each object. 602 // Each object starts at bit 0 of a heap bitmap byte. 603 bitp := h.bitp 604 step := size / heapBitmapScale 605 for i := uintptr(0); i < n; i++ { 606 x := uint32(*bitp) 607 if x&bitMarked != 0 { 608 x &^= bitMarked 609 } else { 610 x = 0 611 f(base + i*size) 612 } 613 *bitp = uint8(x) 614 bitp = subtractb(bitp, step) 615 } 616 617 case size%(4*ptrSize) == 2*ptrSize: 618 // Mark bit is in first word of each object, 619 // but every other object starts halfway through a heap bitmap byte. 620 // Unroll loop 2x to handle alternating shift count and step size. 621 bitp := h.bitp 622 step := size / heapBitmapScale 623 var i uintptr 624 for i = uintptr(0); i < n; i += 2 { 625 x := uint32(*bitp) 626 if x&bitMarked != 0 { 627 x &^= bitMarked 628 } else { 629 x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift 630 f(base + i*size) 631 if size > 2*ptrSize { 632 x = 0 633 } 634 } 635 *bitp = uint8(x) 636 if i+1 >= n { 637 break 638 } 639 bitp = subtractb(bitp, step) 640 x = uint32(*bitp) 641 if x&(bitMarked<<(2*heapBitsShift)) != 0 { 642 x &^= bitMarked << (2 * heapBitsShift) 643 } else { 644 x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift) 645 f(base + (i+1)*size) 646 if size > 2*ptrSize { 647 *subtract1(bitp) = 0 648 } 649 } 650 *bitp = uint8(x) 651 bitp = subtractb(bitp, step+1) 652 } 653 } 654 } 655 656 // heapBitsSetType records that the new allocation [x, x+size) 657 // holds in [x, x+dataSize) one or more values of type typ. 658 // (The number of values is given by dataSize / typ.size.) 659 // If dataSize < size, the fragment [x+dataSize, x+size) is 660 // recorded as non-pointer data. 661 // It is known that the type has pointers somewhere; 662 // malloc does not call heapBitsSetType when there are no pointers, 663 // because all free objects are marked as noscan during 664 // heapBitsSweepSpan. 665 // There can only be one allocation from a given span active at a time, 666 // so this code is not racing with other instances of itself, 667 // and we don't allocate from a span until it has been swept, 668 // so this code is not racing with heapBitsSweepSpan. 669 // It is, however, racing with the concurrent GC mark phase, 670 // which can be setting the mark bit in the leading 2-bit entry 671 // of an allocated block. The block we are modifying is not quite 672 // allocated yet, so the GC marker is not racing with updates to x's bits, 673 // but if the start or end of x shares a bitmap byte with an adjacent 674 // object, the GC marker is racing with updates to those object's mark bits. 675 func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { 676 const doubleCheck = false // slow but helpful; enable to test modifications to this code 677 678 // dataSize is always size rounded up to the next malloc size class, 679 // except in the case of allocating a defer block, in which case 680 // size is sizeof(_defer{}) (at least 6 words) and dataSize may be 681 // arbitrarily larger. 682 // 683 // The checks for size == ptrSize and size == 2*ptrSize can therefore 684 // assume that dataSize == size without checking it explicitly. 685 686 if ptrSize == 8 && size == ptrSize { 687 // It's one word and it has pointers, it must be a pointer. 688 // In general we'd need an atomic update here if the 689 // concurrent GC were marking objects in this span, 690 // because each bitmap byte describes 3 other objects 691 // in addition to the one being allocated. 692 // However, since all allocated one-word objects are pointers 693 // (non-pointers are aggregated into tinySize allocations), 694 // initSpan sets the pointer bits for us. Nothing to do here. 695 if doubleCheck { 696 h := heapBitsForAddr(x) 697 if !h.isPointer() { 698 throw("heapBitsSetType: pointer bit missing") 699 } 700 } 701 return 702 } 703 704 h := heapBitsForAddr(x) 705 ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below) 706 707 // Heap bitmap bits for 2-word object are only 4 bits, 708 // so also shared with objects next to it; use atomic updates. 709 // This is called out as a special case primarily for 32-bit systems, 710 // so that on 32-bit systems the code below can assume all objects 711 // are 4-word aligned (because they're all 16-byte aligned). 712 if size == 2*ptrSize { 713 if typ.size == ptrSize { 714 // We're allocating a block big enough to hold two pointers. 715 // On 64-bit, that means the actual object must be two pointers, 716 // or else we'd have used the one-pointer-sized block. 717 // On 32-bit, however, this is the 8-byte block, the smallest one. 718 // So it could be that we're allocating one pointer and this was 719 // just the smallest block available. Distinguish by checking dataSize. 720 // (In general the number of instances of typ being allocated is 721 // dataSize/typ.size.) 722 if ptrSize == 4 && dataSize == ptrSize { 723 // 1 pointer. 724 if gcphase == _GCoff { 725 *h.bitp |= bitPointer << h.shift 726 } else { 727 atomicor8(h.bitp, bitPointer<<h.shift) 728 } 729 } else { 730 // 2-element slice of pointer. 731 if gcphase == _GCoff { 732 *h.bitp |= (bitPointer | bitPointer<<heapBitsShift) << h.shift 733 } else { 734 atomicor8(h.bitp, (bitPointer|bitPointer<<heapBitsShift)<<h.shift) 735 } 736 } 737 return 738 } 739 // Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0. 740 if doubleCheck { 741 if typ.size != 2*ptrSize || typ.kind&kindGCProg != 0 { 742 print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n") 743 throw("heapBitsSetType") 744 } 745 } 746 b := uint32(*ptrmask) 747 hb := b & 3 748 if gcphase == _GCoff { 749 *h.bitp |= uint8(hb << h.shift) 750 } else { 751 atomicor8(h.bitp, uint8(hb<<h.shift)) 752 } 753 return 754 } 755 756 // Copy from 1-bit ptrmask into 2-bit bitmap. 757 // The basic approach is to use a single uintptr as a bit buffer, 758 // alternating between reloading the buffer and writing bitmap bytes. 759 // In general, one load can supply two bitmap byte writes. 760 // This is a lot of lines of code, but it compiles into relatively few 761 // machine instructions. 762 763 var ( 764 // Ptrmask input. 765 p *byte // last ptrmask byte read 766 b uintptr // ptrmask bits already loaded 767 nb uintptr // number of bits in b at next read 768 endp *byte // final ptrmask byte to read (then repeat) 769 endnb uintptr // number of valid bits in *endp 770 pbits uintptr // alternate source of bits 771 772 // Heap bitmap output. 773 w uintptr // words processed 774 nw uintptr // number of words to process 775 hbitp *byte // next heap bitmap byte to write 776 hb uintptr // bits being prepared for *hbitp 777 ) 778 779 hbitp = h.bitp 780 781 // Handle GC program. Delayed until this part of the code 782 // so that we can use the same double-checking mechanism 783 // as the 1-bit case. Nothing above could have encountered 784 // GC programs: the cases were all too small. 785 if typ.kind&kindGCProg != 0 { 786 heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4)) 787 if doubleCheck { 788 // Double-check the heap bits written by GC program 789 // by running the GC program to create a 1-bit pointer mask 790 // and then jumping to the double-check code below. 791 // This doesn't catch bugs shared between the 1-bit and 4-bit 792 // GC program execution, but it does catch mistakes specific 793 // to just one of those and bugs in heapBitsSetTypeGCProg's 794 // implementation of arrays. 795 lock(&debugPtrmask.lock) 796 if debugPtrmask.data == nil { 797 debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys)) 798 } 799 ptrmask = debugPtrmask.data 800 runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1) 801 goto Phase4 802 } 803 return 804 } 805 806 // Note about sizes: 807 // 808 // typ.size is the number of words in the object, 809 // and typ.ptrdata is the number of words in the prefix 810 // of the object that contains pointers. That is, the final 811 // typ.size - typ.ptrdata words contain no pointers. 812 // This allows optimization of a common pattern where 813 // an object has a small header followed by a large scalar 814 // buffer. If we know the pointers are over, we don't have 815 // to scan the buffer's heap bitmap at all. 816 // The 1-bit ptrmasks are sized to contain only bits for 817 // the typ.ptrdata prefix, zero padded out to a full byte 818 // of bitmap. This code sets nw (below) so that heap bitmap 819 // bits are only written for the typ.ptrdata prefix; if there is 820 // more room in the allocated object, the next heap bitmap 821 // entry is a 00, indicating that there are no more pointers 822 // to scan. So only the ptrmask for the ptrdata bytes is needed. 823 // 824 // Replicated copies are not as nice: if there is an array of 825 // objects with scalar tails, all but the last tail does have to 826 // be initialized, because there is no way to say "skip forward". 827 // However, because of the possibility of a repeated type with 828 // size not a multiple of 4 pointers (one heap bitmap byte), 829 // the code already must handle the last ptrmask byte specially 830 // by treating it as containing only the bits for endnb pointers, 831 // where endnb <= 4. We represent large scalar tails that must 832 // be expanded in the replication by setting endnb larger than 4. 833 // This will have the effect of reading many bits out of b, 834 // but once the real bits are shifted out, b will supply as many 835 // zero bits as we try to read, which is exactly what we need. 836 837 p = ptrmask 838 if typ.size < dataSize { 839 // Filling in bits for an array of typ. 840 // Set up for repetition of ptrmask during main loop. 841 // Note that ptrmask describes only a prefix of 842 const maxBits = ptrSize*8 - 7 843 if typ.ptrdata/ptrSize <= maxBits { 844 // Entire ptrmask fits in uintptr with room for a byte fragment. 845 // Load into pbits and never read from ptrmask again. 846 // This is especially important when the ptrmask has 847 // fewer than 8 bits in it; otherwise the reload in the middle 848 // of the Phase 2 loop would itself need to loop to gather 849 // at least 8 bits. 850 851 // Accumulate ptrmask into b. 852 // ptrmask is sized to describe only typ.ptrdata, but we record 853 // it as describing typ.size bytes, since all the high bits are zero. 854 nb = typ.ptrdata / ptrSize 855 for i := uintptr(0); i < nb; i += 8 { 856 b |= uintptr(*p) << i 857 p = add1(p) 858 } 859 nb = typ.size / ptrSize 860 861 // Replicate ptrmask to fill entire pbits uintptr. 862 // Doubling and truncating is fewer steps than 863 // iterating by nb each time. (nb could be 1.) 864 // Since we loaded typ.ptrdata/ptrSize bits 865 // but are pretending to have typ.size/ptrSize, 866 // there might be no replication necessary/possible. 867 pbits = b 868 endnb = nb 869 if nb+nb <= maxBits { 870 for endnb <= ptrSize*8 { 871 pbits |= pbits << endnb 872 endnb += endnb 873 } 874 // Truncate to a multiple of original ptrmask. 875 endnb = maxBits / nb * nb 876 pbits &= 1<<endnb - 1 877 b = pbits 878 nb = endnb 879 } 880 881 // Clear p and endp as sentinel for using pbits. 882 // Checked during Phase 2 loop. 883 p = nil 884 endp = nil 885 } else { 886 // Ptrmask is larger. Read it multiple times. 887 n := (typ.ptrdata/ptrSize+7)/8 - 1 888 endp = addb(ptrmask, n) 889 endnb = typ.size/ptrSize - n*8 890 } 891 } 892 if p != nil { 893 b = uintptr(*p) 894 p = add1(p) 895 nb = 8 896 } 897 898 if typ.size == dataSize { 899 // Single entry: can stop once we reach the non-pointer data. 900 nw = typ.ptrdata / ptrSize 901 } else { 902 // Repeated instances of typ in an array. 903 // Have to process first N-1 entries in full, but can stop 904 // once we reach the non-pointer data in the final entry. 905 nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / ptrSize 906 } 907 if nw == 0 { 908 // No pointers! Caller was supposed to check. 909 println("runtime: invalid type ", *typ._string) 910 throw("heapBitsSetType: called with non-pointer type") 911 return 912 } 913 if nw < 2 { 914 // Must write at least 2 words, because the "no scan" 915 // encoding doesn't take effect until the third word. 916 nw = 2 917 } 918 919 // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4). 920 // The leading byte is special because it contains the bits for words 0 and 1, 921 // which do not have the marked bits set. 922 // The leading half-byte is special because it's a half a byte and must be 923 // manipulated atomically. 924 switch { 925 default: 926 throw("heapBitsSetType: unexpected shift") 927 928 case h.shift == 0: 929 // Ptrmask and heap bitmap are aligned. 930 // Handle first byte of bitmap specially. 931 // The first byte we write out contains the first two words of the object. 932 // In those words, the mark bits are mark and checkmark, respectively, 933 // and must not be set. In all following words, we want to set the mark bit 934 // as a signal that the object continues to the next 2-bit entry in the bitmap. 935 hb = b & bitPointerAll 936 hb |= bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift) 937 if w += 4; w >= nw { 938 goto Phase3 939 } 940 *hbitp = uint8(hb) 941 hbitp = subtract1(hbitp) 942 b >>= 4 943 nb -= 4 944 945 case ptrSize == 8 && h.shift == 2: 946 // Ptrmask and heap bitmap are misaligned. 947 // The bits for the first two words are in a byte shared with another object 948 // and must be updated atomically. 949 // NOTE(rsc): The atomic here may not be necessary. 950 // We took care of 1-word and 2-word objects above, 951 // so this is at least a 6-word object, so our start bits 952 // are shared only with the type bits of another object, 953 // not with its mark bit. Since there is only one allocation 954 // from a given span at a time, we should be able to set 955 // these bits non-atomically. Not worth the risk right now. 956 hb = (b & 3) << (2 * heapBitsShift) 957 b >>= 2 958 nb -= 2 959 // Note: no bitMarker in hb because the first two words don't get markers from us. 960 if gcphase == _GCoff { 961 *hbitp |= uint8(hb) 962 } else { 963 atomicor8(hbitp, uint8(hb)) 964 } 965 hbitp = subtract1(hbitp) 966 if w += 2; w >= nw { 967 // We know that there is more data, because we handled 2-word objects above. 968 // This must be at least a 6-word object. If we're out of pointer words, 969 // mark no scan in next bitmap byte and finish. 970 hb = 0 971 w += 4 972 goto Phase3 973 } 974 } 975 976 // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap. 977 // The loop computes the bits for that last write but does not execute the write; 978 // it leaves the bits in hb for processing by phase 3. 979 // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to 980 // use in the first half of the loop right now, and then we only adjust nb explicitly 981 // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop. 982 nb -= 4 983 for { 984 // Emit bitmap byte. 985 // b has at least nb+4 bits, with one exception: 986 // if w+4 >= nw, then b has only nw-w bits, 987 // but we'll stop at the break and then truncate 988 // appropriately in Phase 3. 989 hb = b & bitPointerAll 990 hb |= bitMarkedAll 991 if w += 4; w >= nw { 992 break 993 } 994 *hbitp = uint8(hb) 995 hbitp = subtract1(hbitp) 996 b >>= 4 997 998 // Load more bits. b has nb right now. 999 if p != endp { 1000 // Fast path: keep reading from ptrmask. 1001 // nb unmodified: we just loaded 8 bits, 1002 // and the next iteration will consume 8 bits, 1003 // leaving us with the same nb the next time we're here. 1004 if nb < 8 { 1005 b |= uintptr(*p) << nb 1006 p = add1(p) 1007 } else { 1008 // Reduce the number of bits in b. 1009 // This is important if we skipped 1010 // over a scalar tail, since nb could 1011 // be larger than the bit width of b. 1012 nb -= 8 1013 } 1014 } else if p == nil { 1015 // Almost as fast path: track bit count and refill from pbits. 1016 // For short repetitions. 1017 if nb < 8 { 1018 b |= pbits << nb 1019 nb += endnb 1020 } 1021 nb -= 8 // for next iteration 1022 } else { 1023 // Slow path: reached end of ptrmask. 1024 // Process final partial byte and rewind to start. 1025 b |= uintptr(*p) << nb 1026 nb += endnb 1027 if nb < 8 { 1028 b |= uintptr(*ptrmask) << nb 1029 p = add1(ptrmask) 1030 } else { 1031 nb -= 8 1032 p = ptrmask 1033 } 1034 } 1035 1036 // Emit bitmap byte. 1037 hb = b & bitPointerAll 1038 hb |= bitMarkedAll 1039 if w += 4; w >= nw { 1040 break 1041 } 1042 *hbitp = uint8(hb) 1043 hbitp = subtract1(hbitp) 1044 b >>= 4 1045 } 1046 1047 Phase3: 1048 // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries. 1049 if w > nw { 1050 // Counting the 4 entries in hb not yet written to memory, 1051 // there are more entries than possible pointer slots. 1052 // Discard the excess entries (can't be more than 3). 1053 mask := uintptr(1)<<(4-(w-nw)) - 1 1054 hb &= mask | mask<<4 // apply mask to both pointer bits and mark bits 1055 } 1056 1057 // Change nw from counting possibly-pointer words to total words in allocation. 1058 nw = size / ptrSize 1059 1060 // Write whole bitmap bytes. 1061 // The first is hb, the rest are zero. 1062 if w <= nw { 1063 *hbitp = uint8(hb) 1064 hbitp = subtract1(hbitp) 1065 hb = 0 // for possible final half-byte below 1066 for w += 4; w <= nw; w += 4 { 1067 *hbitp = 0 1068 hbitp = subtract1(hbitp) 1069 } 1070 } 1071 1072 // Write final partial bitmap byte if any. 1073 // We know w > nw, or else we'd still be in the loop above. 1074 // It can be bigger only due to the 4 entries in hb that it counts. 1075 // If w == nw+4 then there's nothing left to do: we wrote all nw entries 1076 // and can discard the 4 sitting in hb. 1077 // But if w == nw+2, we need to write first two in hb. 1078 // The byte is shared with the next object so we may need an atomic. 1079 if w == nw+2 { 1080 if gcphase == _GCoff { 1081 *hbitp = *hbitp&^(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift) | uint8(hb) 1082 } else { 1083 atomicand8(hbitp, ^uint8(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift)) 1084 atomicor8(hbitp, uint8(hb)) 1085 } 1086 } 1087 1088 Phase4: 1089 // Phase 4: all done, but perhaps double check. 1090 if doubleCheck { 1091 end := heapBitsForAddr(x + size) 1092 if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { 1093 println("ended at wrong bitmap byte for", *typ._string, "x", dataSize/typ.size) 1094 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") 1095 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") 1096 h0 := heapBitsForAddr(x) 1097 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") 1098 print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n") 1099 throw("bad heapBitsSetType") 1100 } 1101 1102 // Double-check that bits to be written were written correctly. 1103 // Does not check that other bits were not written, unfortunately. 1104 h := heapBitsForAddr(x) 1105 nptr := typ.ptrdata / ptrSize 1106 ndata := typ.size / ptrSize 1107 count := dataSize / typ.size 1108 totalptr := ((count-1)*typ.size + typ.ptrdata) / ptrSize 1109 for i := uintptr(0); i < size/ptrSize; i++ { 1110 j := i % ndata 1111 var have, want uint8 1112 have = (*h.bitp >> h.shift) & (bitPointer | bitMarked) 1113 if i >= totalptr { 1114 want = 0 // deadmarker 1115 if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 { 1116 want = bitMarked 1117 } 1118 } else { 1119 if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { 1120 want |= bitPointer 1121 } 1122 if i >= 2 { 1123 want |= bitMarked 1124 } else { 1125 have &^= bitMarked 1126 } 1127 } 1128 if have != want { 1129 println("mismatch writing bits for", *typ._string, "x", dataSize/typ.size) 1130 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") 1131 print("kindGCProg=", typ.kind&kindGCProg != 0, "\n") 1132 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") 1133 h0 := heapBitsForAddr(x) 1134 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") 1135 print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n") 1136 print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n") 1137 println("at word", i, "offset", i*ptrSize, "have", have, "want", want) 1138 if typ.kind&kindGCProg != 0 { 1139 println("GC program:") 1140 dumpGCProg(addb(typ.gcdata, 4)) 1141 } 1142 throw("bad heapBitsSetType") 1143 } 1144 h = h.next() 1145 } 1146 if ptrmask == debugPtrmask.data { 1147 unlock(&debugPtrmask.lock) 1148 } 1149 } 1150 } 1151 1152 var debugPtrmask struct { 1153 lock mutex 1154 data *byte 1155 } 1156 1157 // heapBitsSetTypeGCProg implements heapBitsSetType using a GC program. 1158 // progSize is the size of the memory described by the program. 1159 // elemSize is the size of the element that the GC program describes (a prefix of). 1160 // dataSize is the total size of the intended data, a multiple of elemSize. 1161 // allocSize is the total size of the allocated memory. 1162 // 1163 // GC programs are only used for large allocations. 1164 // heapBitsSetType requires that allocSize is a multiple of 4 words, 1165 // so that the relevant bitmap bytes are not shared with surrounding 1166 // objects and need not be accessed with atomic instructions. 1167 func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) { 1168 if ptrSize == 8 && allocSize%(4*ptrSize) != 0 { 1169 // Alignment will be wrong. 1170 throw("heapBitsSetTypeGCProg: small allocation") 1171 } 1172 var totalBits uintptr 1173 if elemSize == dataSize { 1174 totalBits = runGCProg(prog, nil, h.bitp, 2) 1175 if totalBits*ptrSize != progSize { 1176 println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize) 1177 throw("heapBitsSetTypeGCProg: unexpected bit count") 1178 } 1179 } else { 1180 count := dataSize / elemSize 1181 1182 // Piece together program trailer to run after prog that does: 1183 // literal(0) 1184 // repeat(1, elemSize-progSize-1) // zeros to fill element size 1185 // repeat(elemSize, count-1) // repeat that element for count 1186 // This zero-pads the data remaining in the first element and then 1187 // repeats that first element to fill the array. 1188 var trailer [40]byte // 3 varints (max 10 each) + some bytes 1189 i := 0 1190 if n := elemSize/ptrSize - progSize/ptrSize; n > 0 { 1191 // literal(0) 1192 trailer[i] = 0x01 1193 i++ 1194 trailer[i] = 0 1195 i++ 1196 if n > 1 { 1197 // repeat(1, n-1) 1198 trailer[i] = 0x81 1199 i++ 1200 n-- 1201 for ; n >= 0x80; n >>= 7 { 1202 trailer[i] = byte(n | 0x80) 1203 i++ 1204 } 1205 trailer[i] = byte(n) 1206 i++ 1207 } 1208 } 1209 // repeat(elemSize/ptrSize, count-1) 1210 trailer[i] = 0x80 1211 i++ 1212 n := elemSize / ptrSize 1213 for ; n >= 0x80; n >>= 7 { 1214 trailer[i] = byte(n | 0x80) 1215 i++ 1216 } 1217 trailer[i] = byte(n) 1218 i++ 1219 n = count - 1 1220 for ; n >= 0x80; n >>= 7 { 1221 trailer[i] = byte(n | 0x80) 1222 i++ 1223 } 1224 trailer[i] = byte(n) 1225 i++ 1226 trailer[i] = 0 1227 i++ 1228 1229 runGCProg(prog, &trailer[0], h.bitp, 2) 1230 1231 // Even though we filled in the full array just now, 1232 // record that we only filled in up to the ptrdata of the 1233 // last element. This will cause the code below to 1234 // memclr the dead section of the final array element, 1235 // so that scanobject can stop early in the final element. 1236 totalBits = (elemSize*(count-1) + progSize) / ptrSize 1237 } 1238 endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4)) 1239 endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale)) 1240 memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc)) 1241 } 1242 1243 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog. 1244 // size the size of the region described by prog, in bytes. 1245 // The resulting bitvector will have no more than size/ptrSize bits. 1246 func progToPointerMask(prog *byte, size uintptr) bitvector { 1247 n := (size/ptrSize + 7) / 8 1248 x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] 1249 x[len(x)-1] = 0xa1 // overflow check sentinel 1250 n = runGCProg(prog, nil, &x[0], 1) 1251 if x[len(x)-1] != 0xa1 { 1252 throw("progToPointerMask: overflow") 1253 } 1254 return bitvector{int32(n), &x[0]} 1255 } 1256 1257 // Packed GC pointer bitmaps, aka GC programs. 1258 // 1259 // For large types containing arrays, the type information has a 1260 // natural repetition that can be encoded to save space in the 1261 // binary and in the memory representation of the type information. 1262 // 1263 // The encoding is a simple Lempel-Ziv style bytecode machine 1264 // with the following instructions: 1265 // 1266 // 00000000: stop 1267 // 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes 1268 // 10000000 n c: repeat the previous n bits c times; n, c are varints 1269 // 1nnnnnnn c: repeat the previous n bits c times; c is a varint 1270 1271 // runGCProg executes the GC program prog, and then trailer if non-nil, 1272 // writing to dst with entries of the given size. 1273 // If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst. 1274 // If size == 2, dst is the 2-bit heap bitmap, and writes move backward 1275 // starting at dst (because the heap bitmap does). In this case, the caller guarantees 1276 // that only whole bytes in dst need to be written. 1277 // 1278 // runGCProg returns the number of 1- or 2-bit entries written to memory. 1279 func runGCProg(prog, trailer, dst *byte, size int) uintptr { 1280 dstStart := dst 1281 1282 // Bits waiting to be written to memory. 1283 var bits uintptr 1284 var nbits uintptr 1285 1286 p := prog 1287 Run: 1288 for { 1289 // Flush accumulated full bytes. 1290 // The rest of the loop assumes that nbits <= 7. 1291 for ; nbits >= 8; nbits -= 8 { 1292 if size == 1 { 1293 *dst = uint8(bits) 1294 dst = add1(dst) 1295 bits >>= 8 1296 } else { 1297 v := bits&bitPointerAll | bitMarkedAll 1298 *dst = uint8(v) 1299 dst = subtract1(dst) 1300 bits >>= 4 1301 v = bits&bitPointerAll | bitMarkedAll 1302 *dst = uint8(v) 1303 dst = subtract1(dst) 1304 bits >>= 4 1305 } 1306 } 1307 1308 // Process one instruction. 1309 inst := uintptr(*p) 1310 p = add1(p) 1311 n := inst & 0x7F 1312 if inst&0x80 == 0 { 1313 // Literal bits; n == 0 means end of program. 1314 if n == 0 { 1315 // Program is over; continue in trailer if present. 1316 if trailer != nil { 1317 //println("trailer") 1318 p = trailer 1319 trailer = nil 1320 continue 1321 } 1322 //println("done") 1323 break Run 1324 } 1325 //println("lit", n, dst) 1326 nbyte := n / 8 1327 for i := uintptr(0); i < nbyte; i++ { 1328 bits |= uintptr(*p) << nbits 1329 p = add1(p) 1330 if size == 1 { 1331 *dst = uint8(bits) 1332 dst = add1(dst) 1333 bits >>= 8 1334 } else { 1335 v := bits&0xf | bitMarkedAll 1336 *dst = uint8(v) 1337 dst = subtract1(dst) 1338 bits >>= 4 1339 v = bits&0xf | bitMarkedAll 1340 *dst = uint8(v) 1341 dst = subtract1(dst) 1342 bits >>= 4 1343 } 1344 } 1345 if n %= 8; n > 0 { 1346 bits |= uintptr(*p) << nbits 1347 p = add1(p) 1348 nbits += n 1349 } 1350 continue Run 1351 } 1352 1353 // Repeat. If n == 0, it is encoded in a varint in the next bytes. 1354 if n == 0 { 1355 for off := uint(0); ; off += 7 { 1356 x := uintptr(*p) 1357 p = add1(p) 1358 n |= (x & 0x7F) << off 1359 if x&0x80 == 0 { 1360 break 1361 } 1362 } 1363 } 1364 1365 // Count is encoded in a varint in the next bytes. 1366 c := uintptr(0) 1367 for off := uint(0); ; off += 7 { 1368 x := uintptr(*p) 1369 p = add1(p) 1370 c |= (x & 0x7F) << off 1371 if x&0x80 == 0 { 1372 break 1373 } 1374 } 1375 c *= n // now total number of bits to copy 1376 1377 // If the number of bits being repeated is small, load them 1378 // into a register and use that register for the entire loop 1379 // instead of repeatedly reading from memory. 1380 // Handling fewer than 8 bits here makes the general loop simpler. 1381 // The cutoff is ptrSize*8 - 7 to guarantee that when we add 1382 // the pattern to a bit buffer holding at most 7 bits (a partial byte) 1383 // it will not overflow. 1384 src := dst 1385 const maxBits = ptrSize*8 - 7 1386 if n <= maxBits { 1387 // Start with bits in output buffer. 1388 pattern := bits 1389 npattern := nbits 1390 1391 // If we need more bits, fetch them from memory. 1392 if size == 1 { 1393 src = subtract1(src) 1394 for npattern < n { 1395 pattern <<= 8 1396 pattern |= uintptr(*src) 1397 src = subtract1(src) 1398 npattern += 8 1399 } 1400 } else { 1401 src = add1(src) 1402 for npattern < n { 1403 pattern <<= 4 1404 pattern |= uintptr(*src) & 0xf 1405 src = add1(src) 1406 npattern += 4 1407 } 1408 } 1409 1410 // We started with the whole bit output buffer, 1411 // and then we loaded bits from whole bytes. 1412 // Either way, we might now have too many instead of too few. 1413 // Discard the extra. 1414 if npattern > n { 1415 pattern >>= npattern - n 1416 npattern = n 1417 } 1418 1419 // Replicate pattern to at most maxBits. 1420 if npattern == 1 { 1421 // One bit being repeated. 1422 // If the bit is 1, make the pattern all 1s. 1423 // If the bit is 0, the pattern is already all 0s, 1424 // but we can claim that the number of bits 1425 // in the word is equal to the number we need (c), 1426 // because right shift of bits will zero fill. 1427 if pattern == 1 { 1428 pattern = 1<<maxBits - 1 1429 npattern = maxBits 1430 } else { 1431 npattern = c 1432 } 1433 } else { 1434 b := pattern 1435 nb := npattern 1436 if nb+nb <= maxBits { 1437 // Double pattern until the whole uintptr is filled. 1438 for nb <= ptrSize*8 { 1439 b |= b << nb 1440 nb += nb 1441 } 1442 // Trim away incomplete copy of original pattern in high bits. 1443 // TODO(rsc): Replace with table lookup or loop on systems without divide? 1444 nb = maxBits / npattern * npattern 1445 b &= 1<<nb - 1 1446 pattern = b 1447 npattern = nb 1448 } 1449 } 1450 1451 // Add pattern to bit buffer and flush bit buffer, c/npattern times. 1452 // Since pattern contains >8 bits, there will be full bytes to flush 1453 // on each iteration. 1454 for ; c >= npattern; c -= npattern { 1455 bits |= pattern << nbits 1456 nbits += npattern 1457 if size == 1 { 1458 for nbits >= 8 { 1459 *dst = uint8(bits) 1460 dst = add1(dst) 1461 bits >>= 8 1462 nbits -= 8 1463 } 1464 } else { 1465 for nbits >= 4 { 1466 *dst = uint8(bits&0xf | bitMarkedAll) 1467 dst = subtract1(dst) 1468 bits >>= 4 1469 nbits -= 4 1470 } 1471 } 1472 } 1473 1474 // Add final fragment to bit buffer. 1475 if c > 0 { 1476 pattern &= 1<<c - 1 1477 bits |= pattern << nbits 1478 nbits += c 1479 } 1480 continue Run 1481 } 1482 1483 // Repeat; n too large to fit in a register. 1484 // Since nbits <= 7, we know the first few bytes of repeated data 1485 // are already written to memory. 1486 off := n - nbits // n > nbits because n > maxBits and nbits <= 7 1487 if size == 1 { 1488 // Leading src fragment. 1489 src = subtractb(src, (off+7)/8) 1490 if frag := off & 7; frag != 0 { 1491 bits |= uintptr(*src) >> (8 - frag) << nbits 1492 src = add1(src) 1493 nbits += frag 1494 c -= frag 1495 } 1496 // Main loop: load one byte, write another. 1497 // The bits are rotating through the bit buffer. 1498 for i := c / 8; i > 0; i-- { 1499 bits |= uintptr(*src) << nbits 1500 src = add1(src) 1501 *dst = uint8(bits) 1502 dst = add1(dst) 1503 bits >>= 8 1504 } 1505 // Final src fragment. 1506 if c %= 8; c > 0 { 1507 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1508 nbits += c 1509 } 1510 } else { 1511 // Leading src fragment. 1512 src = addb(src, (off+3)/4) 1513 if frag := off & 3; frag != 0 { 1514 bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits 1515 src = subtract1(src) 1516 nbits += frag 1517 c -= frag 1518 } 1519 // Main loop: load one byte, write another. 1520 // The bits are rotating through the bit buffer. 1521 for i := c / 4; i > 0; i-- { 1522 bits |= (uintptr(*src) & 0xf) << nbits 1523 src = subtract1(src) 1524 *dst = uint8(bits&0xf | bitMarkedAll) 1525 dst = subtract1(dst) 1526 bits >>= 4 1527 } 1528 // Final src fragment. 1529 if c %= 4; c > 0 { 1530 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1531 nbits += c 1532 } 1533 } 1534 } 1535 1536 // Write any final bits out, using full-byte writes, even for the final byte. 1537 var totalBits uintptr 1538 if size == 1 { 1539 totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits 1540 nbits += -nbits & 7 1541 for ; nbits > 0; nbits -= 8 { 1542 *dst = uint8(bits) 1543 dst = add1(dst) 1544 bits >>= 8 1545 } 1546 } else { 1547 totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits 1548 nbits += -nbits & 3 1549 for ; nbits > 0; nbits -= 4 { 1550 v := bits&0xf | bitMarkedAll 1551 *dst = uint8(v) 1552 dst = subtract1(dst) 1553 bits >>= 4 1554 } 1555 // Clear the mark bits in the first two entries. 1556 // They are the actual mark and checkmark bits, 1557 // not non-dead markers. It simplified the code 1558 // above to set the marker in every bit written and 1559 // then clear these two as a special case at the end. 1560 *dstStart &^= bitMarked | bitMarked<<heapBitsShift 1561 } 1562 return totalBits 1563 } 1564 1565 func dumpGCProg(p *byte) { 1566 nptr := 0 1567 for { 1568 x := *p 1569 p = add1(p) 1570 if x == 0 { 1571 print("\t", nptr, " end\n") 1572 break 1573 } 1574 if x&0x80 == 0 { 1575 print("\t", nptr, " lit ", x, ":") 1576 n := int(x+7) / 8 1577 for i := 0; i < n; i++ { 1578 print(" ", hex(*p)) 1579 p = add1(p) 1580 } 1581 print("\n") 1582 nptr += int(x) 1583 } else { 1584 nbit := int(x &^ 0x80) 1585 if nbit == 0 { 1586 for nb := uint(0); ; nb += 7 { 1587 x := *p 1588 p = add1(p) 1589 nbit |= int(x&0x7f) << nb 1590 if x&0x80 == 0 { 1591 break 1592 } 1593 } 1594 } 1595 count := 0 1596 for nb := uint(0); ; nb += 7 { 1597 x := *p 1598 p = add1(p) 1599 count |= int(x&0x7f) << nb 1600 if x&0x80 == 0 { 1601 break 1602 } 1603 } 1604 print("\t", nptr, " repeat ", nbit, " × ", count, "\n") 1605 nptr += nbit * count 1606 } 1607 } 1608 } 1609 1610 // Testing. 1611 1612 func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool { 1613 target := (*stkframe)(ctxt) 1614 if frame.sp <= target.sp && target.sp < frame.varp { 1615 *target = *frame 1616 return false 1617 } 1618 return true 1619 } 1620 1621 // gcbits returns the GC type info for x, for testing. 1622 // The result is the bitmap entries (0 or 1), one entry per byte. 1623 //go:linkname reflect_gcbits reflect.gcbits 1624 func reflect_gcbits(x interface{}) []byte { 1625 ret := getgcmask(x) 1626 typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem 1627 nptr := typ.ptrdata / ptrSize 1628 for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 { 1629 ret = ret[:len(ret)-1] 1630 } 1631 return ret 1632 } 1633 1634 // Returns GC type info for object p for testing. 1635 func getgcmask(ep interface{}) (mask []byte) { 1636 e := *efaceOf(&ep) 1637 p := e.data 1638 t := e._type 1639 // data or bss 1640 for datap := &firstmoduledata; datap != nil; datap = datap.next { 1641 // data 1642 if datap.data <= uintptr(p) && uintptr(p) < datap.edata { 1643 bitmap := datap.gcdatamask.bytedata 1644 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 1645 mask = make([]byte, n/ptrSize) 1646 for i := uintptr(0); i < n; i += ptrSize { 1647 off := (uintptr(p) + i - datap.data) / ptrSize 1648 mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1649 } 1650 return 1651 } 1652 1653 // bss 1654 if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss { 1655 bitmap := datap.gcbssmask.bytedata 1656 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 1657 mask = make([]byte, n/ptrSize) 1658 for i := uintptr(0); i < n; i += ptrSize { 1659 off := (uintptr(p) + i - datap.bss) / ptrSize 1660 mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1661 } 1662 return 1663 } 1664 } 1665 1666 // heap 1667 var n uintptr 1668 var base uintptr 1669 if mlookup(uintptr(p), &base, &n, nil) != 0 { 1670 mask = make([]byte, n/ptrSize) 1671 for i := uintptr(0); i < n; i += ptrSize { 1672 hbits := heapBitsForAddr(base + i) 1673 if hbits.isPointer() { 1674 mask[i/ptrSize] = 1 1675 } 1676 if i >= 2*ptrSize && !hbits.isMarked() { 1677 mask = mask[:i/ptrSize] 1678 break 1679 } 1680 } 1681 return 1682 } 1683 1684 // stack 1685 if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi { 1686 var frame stkframe 1687 frame.sp = uintptr(p) 1688 _g_ := getg() 1689 gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0) 1690 if frame.fn != nil { 1691 f := frame.fn 1692 targetpc := frame.continpc 1693 if targetpc == 0 { 1694 return 1695 } 1696 if targetpc != f.entry { 1697 targetpc-- 1698 } 1699 pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, nil) 1700 if pcdata == -1 { 1701 return 1702 } 1703 stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) 1704 if stkmap == nil || stkmap.n <= 0 { 1705 return 1706 } 1707 bv := stackmapdata(stkmap, pcdata) 1708 size := uintptr(bv.n) * ptrSize 1709 n := (*ptrtype)(unsafe.Pointer(t)).elem.size 1710 mask = make([]byte, n/ptrSize) 1711 for i := uintptr(0); i < n; i += ptrSize { 1712 bitmap := bv.bytedata 1713 off := (uintptr(p) + i - frame.varp + size) / ptrSize 1714 mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1715 } 1716 } 1717 return 1718 } 1719 1720 // otherwise, not something the GC knows about. 1721 // possibly read-only data, like malloc(0). 1722 // must not have pointers 1723 return 1724 }