github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/mbitmap.go

github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/mbitmap.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"internal/goarch"
     9  	"runtime/internal/atomic"
    10  	"runtime/internal/sys"
    11  	"unsafe"
    12  )
    13  
    14  // addb returns the byte pointer p+n.
    15  //
    16  //go:nowritebarrier
    17  //go:nosplit
    18  func addb(p *byte, n uintptr) *byte {
    19  	// Note: wrote out full expression instead of calling add(p, n)
    20  	// to reduce the number of temporaries generated by the
    21  	// compiler for this trivial expression during inlining.
    22  	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
    23  }
    24  
    25  // subtractb returns the byte pointer p-n.
    26  //
    27  //go:nowritebarrier
    28  //go:nosplit
    29  func subtractb(p *byte, n uintptr) *byte {
    30  	// Note: wrote out full expression instead of calling add(p, -n)
    31  	// to reduce the number of temporaries generated by the
    32  	// compiler for this trivial expression during inlining.
    33  	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
    34  }
    35  
    36  // add1 returns the byte pointer p+1.
    37  //
    38  //go:nowritebarrier
    39  //go:nosplit
    40  func add1(p *byte) *byte {
    41  	// Note: wrote out full expression instead of calling addb(p, 1)
    42  	// to reduce the number of temporaries generated by the
    43  	// compiler for this trivial expression during inlining.
    44  	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
    45  }
    46  
    47  // subtract1 returns the byte pointer p-1.
    48  //
    49  // nosplit because it is used during write barriers and must not be preempted.
    50  //
    51  //go:nowritebarrier
    52  //go:nosplit
    53  func subtract1(p *byte) *byte {
    54  	// Note: wrote out full expression instead of calling subtractb(p, 1)
    55  	// to reduce the number of temporaries generated by the
    56  	// compiler for this trivial expression during inlining.
    57  	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
    58  }
    59  
    60  // markBits provides access to the mark bit for an object in the heap.
    61  // bytep points to the byte holding the mark bit.
    62  // mask is a byte with a single bit set that can be &ed with *bytep
    63  // to see if the bit has been set.
    64  // *m.byte&m.mask != 0 indicates the mark bit is set.
    65  // index can be used along with span information to generate
    66  // the address of the object in the heap.
    67  // We maintain one set of mark bits for allocation and one for
    68  // marking purposes.
    69  type markBits struct {
    70  	bytep *uint8
    71  	mask  uint8
    72  	index uintptr
    73  }
    74  
    75  //go:nosplit
    76  func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
    77  	bytep, mask := s.allocBits.bitp(allocBitIndex)
    78  	return markBits{bytep, mask, allocBitIndex}
    79  }
    80  
    81  // refillAllocCache takes 8 bytes s.allocBits starting at whichByte
    82  // and negates them so that ctz (count trailing zeros) instructions
    83  // can be used. It then places these 8 bytes into the cached 64 bit
    84  // s.allocCache.
    85  func (s *mspan) refillAllocCache(whichByte uint16) {
    86  	bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(uintptr(whichByte))))
    87  	aCache := uint64(0)
    88  	aCache |= uint64(bytes[0])
    89  	aCache |= uint64(bytes[1]) << (1 * 8)
    90  	aCache |= uint64(bytes[2]) << (2 * 8)
    91  	aCache |= uint64(bytes[3]) << (3 * 8)
    92  	aCache |= uint64(bytes[4]) << (4 * 8)
    93  	aCache |= uint64(bytes[5]) << (5 * 8)
    94  	aCache |= uint64(bytes[6]) << (6 * 8)
    95  	aCache |= uint64(bytes[7]) << (7 * 8)
    96  	s.allocCache = ^aCache
    97  }
    98  
    99  // nextFreeIndex returns the index of the next free object in s at
   100  // or after s.freeindex.
   101  // There are hardware instructions that can be used to make this
   102  // faster if profiling warrants it.
   103  func (s *mspan) nextFreeIndex() uint16 {
   104  	sfreeindex := s.freeindex
   105  	snelems := s.nelems
   106  	if sfreeindex == snelems {
   107  		return sfreeindex
   108  	}
   109  	if sfreeindex > snelems {
   110  		throw("s.freeindex > s.nelems")
   111  	}
   112  
   113  	aCache := s.allocCache
   114  
   115  	bitIndex := sys.TrailingZeros64(aCache)
   116  	for bitIndex == 64 {
   117  		// Move index to start of next cached bits.
   118  		sfreeindex = (sfreeindex + 64) &^ (64 - 1)
   119  		if sfreeindex >= snelems {
   120  			s.freeindex = snelems
   121  			return snelems
   122  		}
   123  		whichByte := sfreeindex / 8
   124  		// Refill s.allocCache with the next 64 alloc bits.
   125  		s.refillAllocCache(whichByte)
   126  		aCache = s.allocCache
   127  		bitIndex = sys.TrailingZeros64(aCache)
   128  		// nothing available in cached bits
   129  		// grab the next 8 bytes and try again.
   130  	}
   131  	result := sfreeindex + uint16(bitIndex)
   132  	if result >= snelems {
   133  		s.freeindex = snelems
   134  		return snelems
   135  	}
   136  
   137  	s.allocCache >>= uint(bitIndex + 1)
   138  	sfreeindex = result + 1
   139  
   140  	if sfreeindex%64 == 0 && sfreeindex != snelems {
   141  		// We just incremented s.freeindex so it isn't 0.
   142  		// As each 1 in s.allocCache was encountered and used for allocation
   143  		// it was shifted away. At this point s.allocCache contains all 0s.
   144  		// Refill s.allocCache so that it corresponds
   145  		// to the bits at s.allocBits starting at s.freeindex.
   146  		whichByte := sfreeindex / 8
   147  		s.refillAllocCache(whichByte)
   148  	}
   149  	s.freeindex = sfreeindex
   150  	return result
   151  }
   152  
   153  // isFree reports whether the index'th object in s is unallocated.
   154  //
   155  // The caller must ensure s.state is mSpanInUse, and there must have
   156  // been no preemption points since ensuring this (which could allow a
   157  // GC transition, which would allow the state to change).
   158  func (s *mspan) isFree(index uintptr) bool {
   159  	if index < uintptr(s.freeIndexForScan) {
   160  		return false
   161  	}
   162  	bytep, mask := s.allocBits.bitp(index)
   163  	return *bytep&mask == 0
   164  }
   165  
   166  // divideByElemSize returns n/s.elemsize.
   167  // n must be within [0, s.npages*_PageSize),
   168  // or may be exactly s.npages*_PageSize
   169  // if s.elemsize is from sizeclasses.go.
   170  //
   171  // nosplit, because it is called by objIndex, which is nosplit
   172  //
   173  //go:nosplit
   174  func (s *mspan) divideByElemSize(n uintptr) uintptr {
   175  	const doubleCheck = false
   176  
   177  	// See explanation in mksizeclasses.go's computeDivMagic.
   178  	q := uintptr((uint64(n) * uint64(s.divMul)) >> 32)
   179  
   180  	if doubleCheck && q != n/s.elemsize {
   181  		println(n, "/", s.elemsize, "should be", n/s.elemsize, "but got", q)
   182  		throw("bad magic division")
   183  	}
   184  	return q
   185  }
   186  
   187  // nosplit, because it is called by other nosplit code like findObject
   188  //
   189  //go:nosplit
   190  func (s *mspan) objIndex(p uintptr) uintptr {
   191  	return s.divideByElemSize(p - s.base())
   192  }
   193  
   194  func markBitsForAddr(p uintptr) markBits {
   195  	s := spanOf(p)
   196  	objIndex := s.objIndex(p)
   197  	return s.markBitsForIndex(objIndex)
   198  }
   199  
   200  func (s *mspan) markBitsForIndex(objIndex uintptr) markBits {
   201  	bytep, mask := s.gcmarkBits.bitp(objIndex)
   202  	return markBits{bytep, mask, objIndex}
   203  }
   204  
   205  func (s *mspan) markBitsForBase() markBits {
   206  	return markBits{&s.gcmarkBits.x, uint8(1), 0}
   207  }
   208  
   209  // isMarked reports whether mark bit m is set.
   210  func (m markBits) isMarked() bool {
   211  	return *m.bytep&m.mask != 0
   212  }
   213  
   214  // setMarked sets the marked bit in the markbits, atomically.
   215  func (m markBits) setMarked() {
   216  	// Might be racing with other updates, so use atomic update always.
   217  	// We used to be clever here and use a non-atomic update in certain
   218  	// cases, but it's not worth the risk.
   219  	atomic.Or8(m.bytep, m.mask)
   220  }
   221  
   222  // setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
   223  func (m markBits) setMarkedNonAtomic() {
   224  	*m.bytep |= m.mask
   225  }
   226  
   227  // clearMarked clears the marked bit in the markbits, atomically.
   228  func (m markBits) clearMarked() {
   229  	// Might be racing with other updates, so use atomic update always.
   230  	// We used to be clever here and use a non-atomic update in certain
   231  	// cases, but it's not worth the risk.
   232  	atomic.And8(m.bytep, ^m.mask)
   233  }
   234  
   235  // markBitsForSpan returns the markBits for the span base address base.
   236  func markBitsForSpan(base uintptr) (mbits markBits) {
   237  	mbits = markBitsForAddr(base)
   238  	if mbits.mask != 1 {
   239  		throw("markBitsForSpan: unaligned start")
   240  	}
   241  	return mbits
   242  }
   243  
   244  // advance advances the markBits to the next object in the span.
   245  func (m *markBits) advance() {
   246  	if m.mask == 1<<7 {
   247  		m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
   248  		m.mask = 1
   249  	} else {
   250  		m.mask = m.mask << 1
   251  	}
   252  	m.index++
   253  }
   254  
   255  // clobberdeadPtr is a special value that is used by the compiler to
   256  // clobber dead stack slots, when -clobberdead flag is set.
   257  const clobberdeadPtr = uintptr(0xdeaddead | 0xdeaddead<<((^uintptr(0)>>63)*32))
   258  
   259  // badPointer throws bad pointer in heap panic.
   260  func badPointer(s *mspan, p, refBase, refOff uintptr) {
   261  	// Typically this indicates an incorrect use
   262  	// of unsafe or cgo to store a bad pointer in
   263  	// the Go heap. It may also indicate a runtime
   264  	// bug.
   265  	//
   266  	// TODO(austin): We could be more aggressive
   267  	// and detect pointers to unallocated objects
   268  	// in allocated spans.
   269  	printlock()
   270  	print("runtime: pointer ", hex(p))
   271  	if s != nil {
   272  		state := s.state.get()
   273  		if state != mSpanInUse {
   274  			print(" to unallocated span")
   275  		} else {
   276  			print(" to unused region of span")
   277  		}
   278  		print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", state)
   279  	}
   280  	print("\n")
   281  	if refBase != 0 {
   282  		print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
   283  		gcDumpObject("object", refBase, refOff)
   284  	}
   285  	getg().m.traceback = 2
   286  	throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
   287  }
   288  
   289  // findObject returns the base address for the heap object containing
   290  // the address p, the object's span, and the index of the object in s.
   291  // If p does not point into a heap object, it returns base == 0.
   292  //
   293  // If p points is an invalid heap pointer and debug.invalidptr != 0,
   294  // findObject panics.
   295  //
   296  // refBase and refOff optionally give the base address of the object
   297  // in which the pointer p was found and the byte offset at which it
   298  // was found. These are used for error reporting.
   299  //
   300  // It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.
   301  // Since p is a uintptr, it would not be adjusted if the stack were to move.
   302  //
   303  //go:nosplit
   304  func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex uintptr) {
   305  	s = spanOf(p)
   306  	// If s is nil, the virtual address has never been part of the heap.
   307  	// This pointer may be to some mmap'd region, so we allow it.
   308  	if s == nil {
   309  		if (GOARCH == "amd64" || GOARCH == "arm64") && p == clobberdeadPtr && debug.invalidptr != 0 {
   310  			// Crash if clobberdeadPtr is seen. Only on AMD64 and ARM64 for now,
   311  			// as they are the only platform where compiler's clobberdead mode is
   312  			// implemented. On these platforms clobberdeadPtr cannot be a valid address.
   313  			badPointer(s, p, refBase, refOff)
   314  		}
   315  		return
   316  	}
   317  	// If p is a bad pointer, it may not be in s's bounds.
   318  	//
   319  	// Check s.state to synchronize with span initialization
   320  	// before checking other fields. See also spanOfHeap.
   321  	if state := s.state.get(); state != mSpanInUse || p < s.base() || p >= s.limit {
   322  		// Pointers into stacks are also ok, the runtime manages these explicitly.
   323  		if state == mSpanManual {
   324  			return
   325  		}
   326  		// The following ensures that we are rigorous about what data
   327  		// structures hold valid pointers.
   328  		if debug.invalidptr != 0 {
   329  			badPointer(s, p, refBase, refOff)
   330  		}
   331  		return
   332  	}
   333  
   334  	objIndex = s.objIndex(p)
   335  	base = s.base() + objIndex*s.elemsize
   336  	return
   337  }
   338  
   339  // reflect_verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok.
   340  //
   341  //go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr
   342  func reflect_verifyNotInHeapPtr(p uintptr) bool {
   343  	// Conversion to a pointer is ok as long as findObject above does not call badPointer.
   344  	// Since we're already promised that p doesn't point into the heap, just disallow heap
   345  	// pointers and the special clobbered pointer.
   346  	return spanOf(p) == nil && p != clobberdeadPtr
   347  }
   348  
   349  const ptrBits = 8 * goarch.PtrSize
   350  
   351  // bulkBarrierBitmap executes write barriers for copying from [src,
   352  // src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is
   353  // assumed to start maskOffset bytes into the data covered by the
   354  // bitmap in bits (which may not be a multiple of 8).
   355  //
   356  // This is used by bulkBarrierPreWrite for writes to data and BSS.
   357  //
   358  //go:nosplit
   359  func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
   360  	word := maskOffset / goarch.PtrSize
   361  	bits = addb(bits, word/8)
   362  	mask := uint8(1) << (word % 8)
   363  
   364  	buf := &getg().m.p.ptr().wbBuf
   365  	for i := uintptr(0); i < size; i += goarch.PtrSize {
   366  		if mask == 0 {
   367  			bits = addb(bits, 1)
   368  			if *bits == 0 {
   369  				// Skip 8 words.
   370  				i += 7 * goarch.PtrSize
   371  				continue
   372  			}
   373  			mask = 1
   374  		}
   375  		if *bits&mask != 0 {
   376  			dstx := (*uintptr)(unsafe.Pointer(dst + i))
   377  			if src == 0 {
   378  				p := buf.get1()
   379  				p[0] = *dstx
   380  			} else {
   381  				srcx := (*uintptr)(unsafe.Pointer(src + i))
   382  				p := buf.get2()
   383  				p[0] = *dstx
   384  				p[1] = *srcx
   385  			}
   386  		}
   387  		mask <<= 1
   388  	}
   389  }
   390  
   391  // typeBitsBulkBarrier executes a write barrier for every
   392  // pointer that would be copied from [src, src+size) to [dst,
   393  // dst+size) by a memmove using the type bitmap to locate those
   394  // pointer slots.
   395  //
   396  // The type typ must correspond exactly to [src, src+size) and [dst, dst+size).
   397  // dst, src, and size must be pointer-aligned.
   398  // The type typ must have a plain bitmap, not a GC program.
   399  // The only use of this function is in channel sends, and the
   400  // 64 kB channel element limit takes care of this for us.
   401  //
   402  // Must not be preempted because it typically runs right before memmove,
   403  // and the GC must observe them as an atomic action.
   404  //
   405  // Callers must perform cgo checks if goexperiment.CgoCheck2.
   406  //
   407  //go:nosplit
   408  func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
   409  	if typ == nil {
   410  		throw("runtime: typeBitsBulkBarrier without type")
   411  	}
   412  	if typ.Size_ != size {
   413  		println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " of size ", typ.Size_, " but memory size", size)
   414  		throw("runtime: invalid typeBitsBulkBarrier")
   415  	}
   416  	if typ.Kind_&kindGCProg != 0 {
   417  		println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " with GC prog")
   418  		throw("runtime: invalid typeBitsBulkBarrier")
   419  	}
   420  	if !writeBarrier.enabled {
   421  		return
   422  	}
   423  	ptrmask := typ.GCData
   424  	buf := &getg().m.p.ptr().wbBuf
   425  	var bits uint32
   426  	for i := uintptr(0); i < typ.PtrBytes; i += goarch.PtrSize {
   427  		if i&(goarch.PtrSize*8-1) == 0 {
   428  			bits = uint32(*ptrmask)
   429  			ptrmask = addb(ptrmask, 1)
   430  		} else {
   431  			bits = bits >> 1
   432  		}
   433  		if bits&1 != 0 {
   434  			dstx := (*uintptr)(unsafe.Pointer(dst + i))
   435  			srcx := (*uintptr)(unsafe.Pointer(src + i))
   436  			p := buf.get2()
   437  			p[0] = *dstx
   438  			p[1] = *srcx
   439  		}
   440  	}
   441  }
   442  
   443  // countAlloc returns the number of objects allocated in span s by
   444  // scanning the mark bitmap.
   445  func (s *mspan) countAlloc() int {
   446  	count := 0
   447  	bytes := divRoundUp(uintptr(s.nelems), 8)
   448  	// Iterate over each 8-byte chunk and count allocations
   449  	// with an intrinsic. Note that newMarkBits guarantees that
   450  	// gcmarkBits will be 8-byte aligned, so we don't have to
   451  	// worry about edge cases, irrelevant bits will simply be zero.
   452  	for i := uintptr(0); i < bytes; i += 8 {
   453  		// Extract 64 bits from the byte pointer and get a OnesCount.
   454  		// Note that the unsafe cast here doesn't preserve endianness,
   455  		// but that's OK. We only care about how many bits are 1, not
   456  		// about the order we discover them in.
   457  		mrkBits := *(*uint64)(unsafe.Pointer(s.gcmarkBits.bytep(i)))
   458  		count += sys.OnesCount64(mrkBits)
   459  	}
   460  	return count
   461  }
   462  
   463  // Read the bytes starting at the aligned pointer p into a uintptr.
   464  // Read is little-endian.
   465  func readUintptr(p *byte) uintptr {
   466  	x := *(*uintptr)(unsafe.Pointer(p))
   467  	if goarch.BigEndian {
   468  		if goarch.PtrSize == 8 {
   469  			return uintptr(sys.Bswap64(uint64(x)))
   470  		}
   471  		return uintptr(sys.Bswap32(uint32(x)))
   472  	}
   473  	return x
   474  }
   475  
   476  var debugPtrmask struct {
   477  	lock mutex
   478  	data *byte
   479  }
   480  
   481  // progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
   482  // size the size of the region described by prog, in bytes.
   483  // The resulting bitvector will have no more than size/goarch.PtrSize bits.
   484  func progToPointerMask(prog *byte, size uintptr) bitvector {
   485  	n := (size/goarch.PtrSize + 7) / 8
   486  	x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
   487  	x[len(x)-1] = 0xa1 // overflow check sentinel
   488  	n = runGCProg(prog, &x[0])
   489  	if x[len(x)-1] != 0xa1 {
   490  		throw("progToPointerMask: overflow")
   491  	}
   492  	return bitvector{int32(n), &x[0]}
   493  }
   494  
   495  // Packed GC pointer bitmaps, aka GC programs.
   496  //
   497  // For large types containing arrays, the type information has a
   498  // natural repetition that can be encoded to save space in the
   499  // binary and in the memory representation of the type information.
   500  //
   501  // The encoding is a simple Lempel-Ziv style bytecode machine
   502  // with the following instructions:
   503  //
   504  //	00000000: stop
   505  //	0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
   506  //	10000000 n c: repeat the previous n bits c times; n, c are varints
   507  //	1nnnnnnn c: repeat the previous n bits c times; c is a varint
   508  
   509  // runGCProg returns the number of 1-bit entries written to memory.
   510  func runGCProg(prog, dst *byte) uintptr {
   511  	dstStart := dst
   512  
   513  	// Bits waiting to be written to memory.
   514  	var bits uintptr
   515  	var nbits uintptr
   516  
   517  	p := prog
   518  Run:
   519  	for {
   520  		// Flush accumulated full bytes.
   521  		// The rest of the loop assumes that nbits <= 7.
   522  		for ; nbits >= 8; nbits -= 8 {
   523  			*dst = uint8(bits)
   524  			dst = add1(dst)
   525  			bits >>= 8
   526  		}
   527  
   528  		// Process one instruction.
   529  		inst := uintptr(*p)
   530  		p = add1(p)
   531  		n := inst & 0x7F
   532  		if inst&0x80 == 0 {
   533  			// Literal bits; n == 0 means end of program.
   534  			if n == 0 {
   535  				// Program is over.
   536  				break Run
   537  			}
   538  			nbyte := n / 8
   539  			for i := uintptr(0); i < nbyte; i++ {
   540  				bits |= uintptr(*p) << nbits
   541  				p = add1(p)
   542  				*dst = uint8(bits)
   543  				dst = add1(dst)
   544  				bits >>= 8
   545  			}
   546  			if n %= 8; n > 0 {
   547  				bits |= uintptr(*p) << nbits
   548  				p = add1(p)
   549  				nbits += n
   550  			}
   551  			continue Run
   552  		}
   553  
   554  		// Repeat. If n == 0, it is encoded in a varint in the next bytes.
   555  		if n == 0 {
   556  			for off := uint(0); ; off += 7 {
   557  				x := uintptr(*p)
   558  				p = add1(p)
   559  				n |= (x & 0x7F) << off
   560  				if x&0x80 == 0 {
   561  					break
   562  				}
   563  			}
   564  		}
   565  
   566  		// Count is encoded in a varint in the next bytes.
   567  		c := uintptr(0)
   568  		for off := uint(0); ; off += 7 {
   569  			x := uintptr(*p)
   570  			p = add1(p)
   571  			c |= (x & 0x7F) << off
   572  			if x&0x80 == 0 {
   573  				break
   574  			}
   575  		}
   576  		c *= n // now total number of bits to copy
   577  
   578  		// If the number of bits being repeated is small, load them
   579  		// into a register and use that register for the entire loop
   580  		// instead of repeatedly reading from memory.
   581  		// Handling fewer than 8 bits here makes the general loop simpler.
   582  		// The cutoff is goarch.PtrSize*8 - 7 to guarantee that when we add
   583  		// the pattern to a bit buffer holding at most 7 bits (a partial byte)
   584  		// it will not overflow.
   585  		src := dst
   586  		const maxBits = goarch.PtrSize*8 - 7
   587  		if n <= maxBits {
   588  			// Start with bits in output buffer.
   589  			pattern := bits
   590  			npattern := nbits
   591  
   592  			// If we need more bits, fetch them from memory.
   593  			src = subtract1(src)
   594  			for npattern < n {
   595  				pattern <<= 8
   596  				pattern |= uintptr(*src)
   597  				src = subtract1(src)
   598  				npattern += 8
   599  			}
   600  
   601  			// We started with the whole bit output buffer,
   602  			// and then we loaded bits from whole bytes.
   603  			// Either way, we might now have too many instead of too few.
   604  			// Discard the extra.
   605  			if npattern > n {
   606  				pattern >>= npattern - n
   607  				npattern = n
   608  			}
   609  
   610  			// Replicate pattern to at most maxBits.
   611  			if npattern == 1 {
   612  				// One bit being repeated.
   613  				// If the bit is 1, make the pattern all 1s.
   614  				// If the bit is 0, the pattern is already all 0s,
   615  				// but we can claim that the number of bits
   616  				// in the word is equal to the number we need (c),
   617  				// because right shift of bits will zero fill.
   618  				if pattern == 1 {
   619  					pattern = 1<<maxBits - 1
   620  					npattern = maxBits
   621  				} else {
   622  					npattern = c
   623  				}
   624  			} else {
   625  				b := pattern
   626  				nb := npattern
   627  				if nb+nb <= maxBits {
   628  					// Double pattern until the whole uintptr is filled.
   629  					for nb <= goarch.PtrSize*8 {
   630  						b |= b << nb
   631  						nb += nb
   632  					}
   633  					// Trim away incomplete copy of original pattern in high bits.
   634  					// TODO(rsc): Replace with table lookup or loop on systems without divide?
   635  					nb = maxBits / npattern * npattern
   636  					b &= 1<<nb - 1
   637  					pattern = b
   638  					npattern = nb
   639  				}
   640  			}
   641  
   642  			// Add pattern to bit buffer and flush bit buffer, c/npattern times.
   643  			// Since pattern contains >8 bits, there will be full bytes to flush
   644  			// on each iteration.
   645  			for ; c >= npattern; c -= npattern {
   646  				bits |= pattern << nbits
   647  				nbits += npattern
   648  				for nbits >= 8 {
   649  					*dst = uint8(bits)
   650  					dst = add1(dst)
   651  					bits >>= 8
   652  					nbits -= 8
   653  				}
   654  			}
   655  
   656  			// Add final fragment to bit buffer.
   657  			if c > 0 {
   658  				pattern &= 1<<c - 1
   659  				bits |= pattern << nbits
   660  				nbits += c
   661  			}
   662  			continue Run
   663  		}
   664  
   665  		// Repeat; n too large to fit in a register.
   666  		// Since nbits <= 7, we know the first few bytes of repeated data
   667  		// are already written to memory.
   668  		off := n - nbits // n > nbits because n > maxBits and nbits <= 7
   669  		// Leading src fragment.
   670  		src = subtractb(src, (off+7)/8)
   671  		if frag := off & 7; frag != 0 {
   672  			bits |= uintptr(*src) >> (8 - frag) << nbits
   673  			src = add1(src)
   674  			nbits += frag
   675  			c -= frag
   676  		}
   677  		// Main loop: load one byte, write another.
   678  		// The bits are rotating through the bit buffer.
   679  		for i := c / 8; i > 0; i-- {
   680  			bits |= uintptr(*src) << nbits
   681  			src = add1(src)
   682  			*dst = uint8(bits)
   683  			dst = add1(dst)
   684  			bits >>= 8
   685  		}
   686  		// Final src fragment.
   687  		if c %= 8; c > 0 {
   688  			bits |= (uintptr(*src) & (1<<c - 1)) << nbits
   689  			nbits += c
   690  		}
   691  	}
   692  
   693  	// Write any final bits out, using full-byte writes, even for the final byte.
   694  	totalBits := (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
   695  	nbits += -nbits & 7
   696  	for ; nbits > 0; nbits -= 8 {
   697  		*dst = uint8(bits)
   698  		dst = add1(dst)
   699  		bits >>= 8
   700  	}
   701  	return totalBits
   702  }
   703  
   704  // materializeGCProg allocates space for the (1-bit) pointer bitmask
   705  // for an object of size ptrdata.  Then it fills that space with the
   706  // pointer bitmask specified by the program prog.
   707  // The bitmask starts at s.startAddr.
   708  // The result must be deallocated with dematerializeGCProg.
   709  func materializeGCProg(ptrdata uintptr, prog *byte) *mspan {
   710  	// Each word of ptrdata needs one bit in the bitmap.
   711  	bitmapBytes := divRoundUp(ptrdata, 8*goarch.PtrSize)
   712  	// Compute the number of pages needed for bitmapBytes.
   713  	pages := divRoundUp(bitmapBytes, pageSize)
   714  	s := mheap_.allocManual(pages, spanAllocPtrScalarBits)
   715  	runGCProg(addb(prog, 4), (*byte)(unsafe.Pointer(s.startAddr)))
   716  	return s
   717  }
   718  func dematerializeGCProg(s *mspan) {
   719  	mheap_.freeManual(s, spanAllocPtrScalarBits)
   720  }
   721  
   722  func dumpGCProg(p *byte) {
   723  	nptr := 0
   724  	for {
   725  		x := *p
   726  		p = add1(p)
   727  		if x == 0 {
   728  			print("\t", nptr, " end\n")
   729  			break
   730  		}
   731  		if x&0x80 == 0 {
   732  			print("\t", nptr, " lit ", x, ":")
   733  			n := int(x+7) / 8
   734  			for i := 0; i < n; i++ {
   735  				print(" ", hex(*p))
   736  				p = add1(p)
   737  			}
   738  			print("\n")
   739  			nptr += int(x)
   740  		} else {
   741  			nbit := int(x &^ 0x80)
   742  			if nbit == 0 {
   743  				for nb := uint(0); ; nb += 7 {
   744  					x := *p
   745  					p = add1(p)
   746  					nbit |= int(x&0x7f) << nb
   747  					if x&0x80 == 0 {
   748  						break
   749  					}
   750  				}
   751  			}
   752  			count := 0
   753  			for nb := uint(0); ; nb += 7 {
   754  				x := *p
   755  				p = add1(p)
   756  				count |= int(x&0x7f) << nb
   757  				if x&0x80 == 0 {
   758  					break
   759  				}
   760  			}
   761  			print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
   762  			nptr += nbit * count
   763  		}
   764  	}
   765  }
   766  
   767  // Testing.
   768  
   769  // reflect_gcbits returns the GC type info for x, for testing.
   770  // The result is the bitmap entries (0 or 1), one entry per byte.
   771  //
   772  //go:linkname reflect_gcbits reflect.gcbits
   773  func reflect_gcbits(x any) []byte {
   774  	return getgcmask(x)
   775  }