github.com/c0deoo1/golang1.5@v0.0.0-20220525150107-c87c805d4593/src/runtime/malloc.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Memory allocator, based on tcmalloc.
     6  // http://goog-perftools.sourceforge.net/doc/tcmalloc.html
     7  
     8  // The main allocator works in runs of pages.
     9  // Small allocation sizes (up to and including 32 kB) are
    10  // rounded to one of about 100 size classes, each of which
    11  // has its own free list of objects of exactly that size.
    12  // Any free page of memory can be split into a set of objects
    13  // of one size class, which are then managed using free list
    14  // allocators.
    15  //
    16  // The allocator's data structures are:
    17  //
    18  //	FixAlloc: a free-list allocator for fixed-size objects,
    19  //		used to manage storage used by the allocator.
    20  //	MHeap: the malloc heap, managed at page (4096-byte) granularity.
    21  //	MSpan: a run of pages managed by the MHeap.
    22  //	MCentral: a shared free list for a given size class.
    23  //	MCache: a per-thread (in Go, per-P) cache for small objects.
    24  //	MStats: allocation statistics.
    25  //
    26  // Allocating a small object proceeds up a hierarchy of caches:
    27  //
    28  //	1. Round the size up to one of the small size classes
    29  //	   and look in the corresponding MCache free list.
    30  //	   If the list is not empty, allocate an object from it.
    31  //	   This can all be done without acquiring a lock.
    32  //
    33  //	2. If the MCache free list is empty, replenish it by
    34  //	   taking a bunch of objects from the MCentral free list.
    35  //	   Moving a bunch amortizes the cost of acquiring the MCentral lock.
    36  //
    37  //	3. If the MCentral free list is empty, replenish it by
    38  //	   allocating a run of pages from the MHeap and then
    39  //	   chopping that memory into objects of the given size.
    40  //	   Allocating many objects amortizes the cost of locking
    41  //	   the heap.
    42  //
    43  //	4. If the MHeap is empty or has no page runs large enough,
    44  //	   allocate a new group of pages (at least 1MB) from the
    45  //	   operating system.  Allocating a large run of pages
    46  //	   amortizes the cost of talking to the operating system.
    47  //
    48  // Freeing a small object proceeds up the same hierarchy:
    49  //
    50  //	1. Look up the size class for the object and add it to
    51  //	   the MCache free list.
    52  //
    53  //	2. If the MCache free list is too long or the MCache has
    54  //	   too much memory, return some to the MCentral free lists.
    55  //
    56  //	3. If all the objects in a given span have returned to
    57  //	   the MCentral list, return that span to the page heap.
    58  //
    59  //	4. If the heap has too much memory, return some to the
    60  //	   operating system.
    61  //
    62  //	TODO(rsc): Step 4 is not implemented.
    63  //
    64  // Allocating and freeing a large object uses the page heap
    65  // directly, bypassing the MCache and MCentral free lists.
    66  //
    67  // The small objects on the MCache and MCentral free lists
    68  // may or may not be zeroed.  They are zeroed if and only if
    69  // the second word of the object is zero.  A span in the
    70  // page heap is zeroed unless s->needzero is set. When a span
    71  // is allocated to break into small objects, it is zeroed if needed
    72  // and s->needzero is set. There are two main benefits to delaying the
    73  // zeroing this way:
    74  //
    75  //	1. stack frames allocated from the small object lists
    76  //	   or the page heap can avoid zeroing altogether.
    77  //	2. the cost of zeroing when reusing a small object is
    78  //	   charged to the mutator, not the garbage collector.
    79  //
    80  // This code was written with an eye toward translating to Go
    81  // in the future.  Methods have the form Type_Method(Type *t, ...).
    82  
    83  package runtime
    84  
    85  import "unsafe"
    86  
    87  const (
    88  	debugMalloc = false
    89  
    90  	flagNoScan = _FlagNoScan
    91  	flagNoZero = _FlagNoZero
    92  
    93  	maxTinySize   = _TinySize
    94  	tinySizeClass = _TinySizeClass
    95  	maxSmallSize  = _MaxSmallSize
    96  
    97  	pageShift = _PageShift
    98  	pageSize  = _PageSize
    99  	pageMask  = _PageMask
   100  
   101  	mSpanInUse = _MSpanInUse
   102  
   103  	concurrentSweep = _ConcurrentSweep
   104  )
   105  
   106  const (
   107  	_PageShift = 13
   108  	_PageSize  = 1 << _PageShift
   109  	_PageMask  = _PageSize - 1
   110  )
   111  
   112  const (
   113  	// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
   114  	_64bit = 1 << (^uintptr(0) >> 63) / 2
   115  
   116  	// Computed constant.  The definition of MaxSmallSize and the
   117  	// algorithm in msize.go produces some number of different allocation
   118  	// size classes.  NumSizeClasses is that number.  It's needed here
   119  	// because there are static arrays of this length; when msize runs its
   120  	// size choosing algorithm it double-checks that NumSizeClasses agrees.
   121  	_NumSizeClasses = 67
   122  
   123  	// Tunable constants.
   124  	_MaxSmallSize = 32 << 10
   125  
   126  	// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
   127  	_TinySize      = 16
   128  	_TinySizeClass = 2
   129  
   130  	_FixAllocChunk  = 16 << 10               // Chunk size for FixAlloc
   131  	_MaxMHeapList   = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
   132  	_HeapAllocChunk = 1 << 20                // Chunk size for heap growth
   133  
   134  	// Per-P, per order stack segment cache size.
   135  	_StackCacheSize = 32 * 1024
   136  
   137  	// Number of orders that get caching.  Order 0 is FixedStack
   138  	// and each successive order is twice as large.
   139  	// We want to cache 2KB, 4KB, 8KB, and 16KB stacks.  Larger stacks
   140  	// will be allocated directly.
   141  	// Since FixedStack is different on different systems, we
   142  	// must vary NumStackOrders to keep the same maximum cached size.
   143  	//   OS               | FixedStack | NumStackOrders
   144  	//   -----------------+------------+---------------
   145  	//   linux/darwin/bsd | 2KB        | 4
   146  	//   windows/32       | 4KB        | 3
   147  	//   windows/64       | 8KB        | 2
   148  	//   plan9            | 4KB        | 3
   149  	_NumStackOrders = 4 - ptrSize/4*goos_windows - 1*goos_plan9
   150  
   151  	// Number of bits in page to span calculations (4k pages).
   152  	// On Windows 64-bit we limit the arena to 32GB or 35 bits.
   153  	// Windows counts memory used by page table into committed memory
   154  	// of the process, so we can't reserve too much memory.
   155  	// See https://golang.org/issue/5402 and https://golang.org/issue/5236.
   156  	// On other 64-bit platforms, we limit the arena to 512GB, or 39 bits.
   157  	// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
   158  	// On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory,
   159  	// but as most devices have less than 4GB of physical memory anyway, we
   160  	// try to be conservative here, and only ask for a 2GB heap.
   161  	_MHeapMap_TotalBits = (_64bit*goos_windows)*35 + (_64bit*(1-goos_windows)*(1-goos_darwin*goarch_arm64))*39 + goos_darwin*goarch_arm64*31 + (1-_64bit)*32
   162  	_MHeapMap_Bits      = _MHeapMap_TotalBits - _PageShift
   163  
   164  	_MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
   165  
   166  	// Max number of threads to run garbage collection.
   167  	// 2, 3, and 4 are all plausible maximums depending
   168  	// on the hardware details of the machine.  The garbage
   169  	// collector scales well to 32 cpus.
   170  	_MaxGcproc = 32
   171  )
   172  
   173  // Page number (address>>pageShift)
   174  type pageID uintptr
   175  
   176  const _MaxArena32 = 2 << 30
   177  
   178  // OS-defined helpers:
   179  //
   180  // sysAlloc obtains a large chunk of zeroed memory from the
   181  // operating system, typically on the order of a hundred kilobytes
   182  // or a megabyte.
   183  // NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
   184  // may use larger alignment, so the caller must be careful to realign the
   185  // memory obtained by sysAlloc.
   186  //
   187  // SysUnused notifies the operating system that the contents
   188  // of the memory region are no longer needed and can be reused
   189  // for other purposes.
   190  // SysUsed notifies the operating system that the contents
   191  // of the memory region are needed again.
   192  //
   193  // SysFree returns it unconditionally; this is only used if
   194  // an out-of-memory error has been detected midway through
   195  // an allocation.  It is okay if SysFree is a no-op.
   196  //
   197  // SysReserve reserves address space without allocating memory.
   198  // If the pointer passed to it is non-nil, the caller wants the
   199  // reservation there, but SysReserve can still choose another
   200  // location if that one is unavailable.  On some systems and in some
   201  // cases SysReserve will simply check that the address space is
   202  // available and not actually reserve it.  If SysReserve returns
   203  // non-nil, it sets *reserved to true if the address space is
   204  // reserved, false if it has merely been checked.
   205  // NOTE: SysReserve returns OS-aligned memory, but the heap allocator
   206  // may use larger alignment, so the caller must be careful to realign the
   207  // memory obtained by sysAlloc.
   208  //
   209  // SysMap maps previously reserved address space for use.
   210  // The reserved argument is true if the address space was really
   211  // reserved, not merely checked.
   212  //
   213  // SysFault marks a (already sysAlloc'd) region to fault
   214  // if accessed.  Used only for debugging the runtime.
   215  
   216  func mallocinit() {
   217  	initSizes()
   218  
   219  	if class_to_size[_TinySizeClass] != _TinySize {
   220  		throw("bad TinySizeClass")
   221  	}
   222  
   223  	var p, bitmapSize, spansSize, pSize, limit uintptr
   224  	var reserved bool
   225  
   226  	// limit = runtime.memlimit();
   227  	// See https://golang.org/issue/5049
   228  	// TODO(rsc): Fix after 1.1.
   229  	limit = 0
   230  
   231  	// Set up the allocation arena, a contiguous area of memory where
   232  	// allocated data will be found.  The arena begins with a bitmap large
   233  	// enough to hold 4 bits per allocated word.
   234  	if ptrSize == 8 && (limit == 0 || limit > 1<<30) {
   235  		// On a 64-bit machine, allocate from a single contiguous reservation.
   236  		// 512 GB (MaxMem) should be big enough for now.
   237  		//
   238  		// The code will work with the reservation at any address, but ask
   239  		// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
   240  		// Allocating a 512 GB region takes away 39 bits, and the amd64
   241  		// doesn't let us choose the top 17 bits, so that leaves the 9 bits
   242  		// in the middle of 0x00c0 for us to choose.  Choosing 0x00c0 means
   243  		// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
   244  		// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
   245  		// UTF-8 sequences, and they are otherwise as far away from
   246  		// ff (likely a common byte) as possible.  If that fails, we try other 0xXXc0
   247  		// addresses.  An earlier attempt to use 0x11f8 caused out of memory errors
   248  		// on OS X during thread allocations.  0x00c0 causes conflicts with
   249  		// AddressSanitizer which reserves all memory up to 0x0100.
   250  		// These choices are both for debuggability and to reduce the
   251  		// odds of a conservative garbage collector (as is still used in gccgo)
   252  		// not collecting memory because some non-pointer block of memory
   253  		// had a bit pattern that matched a memory address.
   254  		//
   255  		// Actually we reserve 544 GB (because the bitmap ends up being 32 GB)
   256  		// but it hardly matters: e0 00 is not valid UTF-8 either.
   257  		//
   258  		// If this fails we fall back to the 32 bit memory mechanism
   259  		//
   260  		// However, on arm64, we ignore all this advice above and slam the
   261  		// allocation at 0x40 << 32 because when using 4k pages with 3-level
   262  		// translation buffers, the user address space is limited to 39 bits
   263  		// On darwin/arm64, the address space is even smaller.
   264  		arenaSize := round(_MaxMem, _PageSize)
   265  		bitmapSize = arenaSize / (ptrSize * 8 / 4)
   266  		spansSize = arenaSize / _PageSize * ptrSize
   267  		spansSize = round(spansSize, _PageSize)
   268  		for i := 0; i <= 0x7f; i++ {
   269  			switch {
   270  			case GOARCH == "arm64" && GOOS == "darwin":
   271  				p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
   272  			case GOARCH == "arm64":
   273  				p = uintptr(i)<<40 | uintptrMask&(0x0040<<32)
   274  			default:
   275  				p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
   276  			}
   277  			pSize = bitmapSize + spansSize + arenaSize + _PageSize
   278  			p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
   279  			if p != 0 {
   280  				break
   281  			}
   282  		}
   283  	}
   284  
   285  	if p == 0 {
   286  		// On a 32-bit machine, we can't typically get away
   287  		// with a giant virtual address space reservation.
   288  		// Instead we map the memory information bitmap
   289  		// immediately after the data segment, large enough
   290  		// to handle another 2GB of mappings (256 MB),
   291  		// along with a reservation for an initial arena.
   292  		// When that gets used up, we'll start asking the kernel
   293  		// for any memory anywhere and hope it's in the 2GB
   294  		// following the bitmap (presumably the executable begins
   295  		// near the bottom of memory, so we'll have to use up
   296  		// most of memory before the kernel resorts to giving out
   297  		// memory before the beginning of the text segment).
   298  		//
   299  		// Alternatively we could reserve 512 MB bitmap, enough
   300  		// for 4GB of mappings, and then accept any memory the
   301  		// kernel threw at us, but normally that's a waste of 512 MB
   302  		// of address space, which is probably too much in a 32-bit world.
   303  
   304  		// If we fail to allocate, try again with a smaller arena.
   305  		// This is necessary on Android L where we share a process
   306  		// with ART, which reserves virtual memory aggressively.
   307  		arenaSizes := []uintptr{
   308  			512 << 20,
   309  			256 << 20,
   310  			128 << 20,
   311  		}
   312  
   313  		for _, arenaSize := range arenaSizes {
   314  			bitmapSize = _MaxArena32 / (ptrSize * 8 / 4)
   315  			spansSize = _MaxArena32 / _PageSize * ptrSize
   316  			if limit > 0 && arenaSize+bitmapSize+spansSize > limit {
   317  				bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1)
   318  				arenaSize = bitmapSize * 8
   319  				spansSize = arenaSize / _PageSize * ptrSize
   320  			}
   321  			spansSize = round(spansSize, _PageSize)
   322  
   323  			// SysReserve treats the address we ask for, end, as a hint,
   324  			// not as an absolute requirement.  If we ask for the end
   325  			// of the data segment but the operating system requires
   326  			// a little more space before we can start allocating, it will
   327  			// give out a slightly higher pointer.  Except QEMU, which
   328  			// is buggy, as usual: it won't adjust the pointer upward.
   329  			// So adjust it upward a little bit ourselves: 1/4 MB to get
   330  			// away from the running binary image and then round up
   331  			// to a MB boundary.
   332  			p = round(firstmoduledata.end+(1<<18), 1<<20)
   333  			pSize = bitmapSize + spansSize + arenaSize + _PageSize
   334  			p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
   335  			if p != 0 {
   336  				break
   337  			}
   338  		}
   339  		if p == 0 {
   340  			throw("runtime: cannot reserve arena virtual address space")
   341  		}
   342  	}
   343  
   344  	// PageSize can be larger than OS definition of page size,
   345  	// so SysReserve can give us a PageSize-unaligned pointer.
   346  	// To overcome this we ask for PageSize more and round up the pointer.
   347  	p1 := round(p, _PageSize)
   348  
   349  	mheap_.spans = (**mspan)(unsafe.Pointer(p1))
   350  	mheap_.bitmap = p1 + spansSize
   351  	mheap_.arena_start = p1 + (spansSize + bitmapSize)
   352  	mheap_.arena_used = mheap_.arena_start
   353  	mheap_.arena_end = p + pSize
   354  	mheap_.arena_reserved = reserved
   355  
   356  	if mheap_.arena_start&(_PageSize-1) != 0 {
   357  		println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
   358  		throw("misrounded allocation in mallocinit")
   359  	}
   360  
   361  	// Initialize the rest of the allocator.
   362  	mHeap_Init(&mheap_, spansSize)
   363  	_g_ := getg()
   364  	_g_.m.mcache = allocmcache()
   365  }
   366  
   367  // sysReserveHigh reserves space somewhere high in the address space.
   368  // sysReserve doesn't actually reserve the full amount requested on
   369  // 64-bit systems, because of problems with ulimit. Instead it checks
   370  // that it can get the first 64 kB and assumes it can grab the rest as
   371  // needed. This doesn't work well with the "let the kernel pick an address"
   372  // mode, so don't do that. Pick a high address instead.
   373  func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer {
   374  	if ptrSize == 4 {
   375  		return sysReserve(nil, n, reserved)
   376  	}
   377  
   378  	for i := 0; i <= 0x7f; i++ {
   379  		p := uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
   380  		*reserved = false
   381  		p = uintptr(sysReserve(unsafe.Pointer(p), n, reserved))
   382  		if p != 0 {
   383  			return unsafe.Pointer(p)
   384  		}
   385  	}
   386  
   387  	return sysReserve(nil, n, reserved)
   388  }
   389  
   390  func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
   391  	if n > uintptr(h.arena_end)-uintptr(h.arena_used) {
   392  		// We are in 32-bit mode, maybe we didn't use all possible address space yet.
   393  		// Reserve some more space.
   394  		p_size := round(n+_PageSize, 256<<20)
   395  		new_end := h.arena_end + p_size
   396  		if new_end <= h.arena_start+_MaxArena32 {
   397  			// TODO: It would be bad if part of the arena
   398  			// is reserved and part is not.
   399  			var reserved bool
   400  			p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
   401  			if p == h.arena_end {
   402  				h.arena_end = new_end
   403  				h.arena_reserved = reserved
   404  			} else if p+p_size <= h.arena_start+_MaxArena32 {
   405  				// Keep everything page-aligned.
   406  				// Our pages are bigger than hardware pages.
   407  				h.arena_end = p + p_size
   408  				used := p + (-uintptr(p) & (_PageSize - 1))
   409  				mHeap_MapBits(h, used)
   410  				mHeap_MapSpans(h, used)
   411  				h.arena_used = used
   412  				h.arena_reserved = reserved
   413  			} else {
   414  				var stat uint64
   415  				sysFree((unsafe.Pointer)(p), p_size, &stat)
   416  			}
   417  		}
   418  	}
   419  
   420  	if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
   421  		// Keep taking from our reservation.
   422  		p := h.arena_used
   423  		sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
   424  		mHeap_MapBits(h, p+n)
   425  		mHeap_MapSpans(h, p+n)
   426  		h.arena_used = p + n
   427  		if raceenabled {
   428  			racemapshadow((unsafe.Pointer)(p), n)
   429  		}
   430  
   431  		if uintptr(p)&(_PageSize-1) != 0 {
   432  			throw("misrounded allocation in MHeap_SysAlloc")
   433  		}
   434  		return (unsafe.Pointer)(p)
   435  	}
   436  
   437  	// If using 64-bit, our reservation is all we have.
   438  	if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 {
   439  		return nil
   440  	}
   441  
   442  	// On 32-bit, once the reservation is gone we can
   443  	// try to get memory at a location chosen by the OS
   444  	// and hope that it is in the range we allocated bitmap for.
   445  	p_size := round(n, _PageSize) + _PageSize
   446  	p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
   447  	if p == 0 {
   448  		return nil
   449  	}
   450  
   451  	if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
   452  		print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
   453  		sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
   454  		return nil
   455  	}
   456  
   457  	p_end := p + p_size
   458  	p += -p & (_PageSize - 1)
   459  	if uintptr(p)+n > uintptr(h.arena_used) {
   460  		mHeap_MapBits(h, p+n)
   461  		mHeap_MapSpans(h, p+n)
   462  		h.arena_used = p + n
   463  		if p_end > h.arena_end {
   464  			h.arena_end = p_end
   465  		}
   466  		if raceenabled {
   467  			racemapshadow((unsafe.Pointer)(p), n)
   468  		}
   469  	}
   470  
   471  	if uintptr(p)&(_PageSize-1) != 0 {
   472  		throw("misrounded allocation in MHeap_SysAlloc")
   473  	}
   474  	return (unsafe.Pointer)(p)
   475  }
   476  
   477  // base address for all 0-byte allocations
   478  var zerobase uintptr
   479  
   480  const (
   481  	// flags to malloc
   482  	_FlagNoScan = 1 << 0 // GC doesn't have to scan object
   483  	_FlagNoZero = 1 << 1 // don't zero memory
   484  )
   485  
   486  // Allocate an object of size bytes.
   487  // Small objects are allocated from the per-P cache's free lists.
   488  // Large objects (> 32 kB) are allocated straight from the heap.
   489  func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
   490  	if gcphase == _GCmarktermination {
   491  		throw("mallocgc called with gcphase == _GCmarktermination")
   492  	}
   493  
   494  	if size == 0 {
   495  		return unsafe.Pointer(&zerobase)
   496  	}
   497  
   498  	if flags&flagNoScan == 0 && typ == nil {
   499  		throw("malloc missing type")
   500  	}
   501  
   502  	if debug.sbrk != 0 {
   503  		align := uintptr(16)
   504  		if typ != nil {
   505  			align = uintptr(typ.align)
   506  		}
   507  		return persistentalloc(size, align, &memstats.other_sys)
   508  	}
   509  
   510  	// Set mp.mallocing to keep from being preempted by GC.
   511  	mp := acquirem()
   512  	if mp.mallocing != 0 {
   513  		throw("malloc deadlock")
   514  	}
   515  	if mp.gsignal == getg() {
   516  		throw("malloc during signal")
   517  	}
   518  	mp.mallocing = 1
   519  
   520  	shouldhelpgc := false
   521  	dataSize := size
   522  	c := gomcache()
   523  	var s *mspan
   524  	var x unsafe.Pointer
   525  	if size <= maxSmallSize {
   526  		if flags&flagNoScan != 0 && size < maxTinySize {
   527  			// Tiny allocator.
   528  			//
   529  			// Tiny allocator combines several tiny allocation requests
   530  			// into a single memory block. The resulting memory block
   531  			// is freed when all subobjects are unreachable. The subobjects
   532  			// must be FlagNoScan (don't have pointers), this ensures that
   533  			// the amount of potentially wasted memory is bounded.
   534  			//
   535  			// Size of the memory block used for combining (maxTinySize) is tunable.
   536  			// Current setting is 16 bytes, which relates to 2x worst case memory
   537  			// wastage (when all but one subobjects are unreachable).
   538  			// 8 bytes would result in no wastage at all, but provides less
   539  			// opportunities for combining.
   540  			// 32 bytes provides more opportunities for combining,
   541  			// but can lead to 4x worst case wastage.
   542  			// The best case winning is 8x regardless of block size.
   543  			//
   544  			// Objects obtained from tiny allocator must not be freed explicitly.
   545  			// So when an object will be freed explicitly, we ensure that
   546  			// its size >= maxTinySize.
   547  			//
   548  			// SetFinalizer has a special case for objects potentially coming
   549  			// from tiny allocator, it such case it allows to set finalizers
   550  			// for an inner byte of a memory block.
   551  			//
   552  			// The main targets of tiny allocator are small strings and
   553  			// standalone escaping variables. On a json benchmark
   554  			// the allocator reduces number of allocations by ~12% and
   555  			// reduces heap size by ~20%.
   556  			off := c.tinyoffset
   557  			// Align tiny pointer for required (conservative) alignment.
   558  			if size&7 == 0 {
   559  				off = round(off, 8)
   560  			} else if size&3 == 0 {
   561  				off = round(off, 4)
   562  			} else if size&1 == 0 {
   563  				off = round(off, 2)
   564  			}
   565  			if off+size <= maxTinySize && c.tiny != nil {
   566  				// The object fits into existing tiny block.
   567  				x = add(c.tiny, off)
   568  				c.tinyoffset = off + size
   569  				c.local_tinyallocs++
   570  				mp.mallocing = 0
   571  				releasem(mp)
   572  				return x
   573  			}
   574  			// Allocate a new maxTinySize block.
   575  			s = c.alloc[tinySizeClass]
   576  			v := s.freelist
   577  			if v.ptr() == nil {
   578  				systemstack(func() {
   579  					mCache_Refill(c, tinySizeClass)
   580  				})
   581  				shouldhelpgc = true
   582  				s = c.alloc[tinySizeClass]
   583  				v = s.freelist
   584  			}
   585  			s.freelist = v.ptr().next
   586  			s.ref++
   587  			// prefetchnta offers best performance, see change list message.
   588  			prefetchnta(uintptr(v.ptr().next))
   589  			x = unsafe.Pointer(v)
   590  			(*[2]uint64)(x)[0] = 0
   591  			(*[2]uint64)(x)[1] = 0
   592  			// See if we need to replace the existing tiny block with the new one
   593  			// based on amount of remaining free space.
   594  			if size < c.tinyoffset {
   595  				c.tiny = x
   596  				c.tinyoffset = size
   597  			}
   598  			size = maxTinySize
   599  		} else {
   600  			var sizeclass int8
   601  			if size <= 1024-8 {
   602  				sizeclass = size_to_class8[(size+7)>>3]
   603  			} else {
   604  				sizeclass = size_to_class128[(size-1024+127)>>7]
   605  			}
   606  			size = uintptr(class_to_size[sizeclass])
   607  			s = c.alloc[sizeclass]
   608  			v := s.freelist
   609  			if v.ptr() == nil {
   610  				systemstack(func() {
   611  					mCache_Refill(c, int32(sizeclass))
   612  				})
   613  				shouldhelpgc = true
   614  				s = c.alloc[sizeclass]
   615  				v = s.freelist
   616  			}
   617  			s.freelist = v.ptr().next
   618  			s.ref++
   619  			// prefetchnta offers best performance, see change list message.
   620  			prefetchnta(uintptr(v.ptr().next))
   621  			x = unsafe.Pointer(v)
   622  			if flags&flagNoZero == 0 {
   623  				v.ptr().next = 0
   624  				if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 {
   625  					memclr(unsafe.Pointer(v), size)
   626  				}
   627  			}
   628  		}
   629  		c.local_cachealloc += size
   630  	} else {
   631  		var s *mspan
   632  		shouldhelpgc = true
   633  		systemstack(func() {
   634  			s = largeAlloc(size, uint32(flags))
   635  		})
   636  		x = unsafe.Pointer(uintptr(s.start << pageShift))
   637  		size = uintptr(s.elemsize)
   638  	}
   639  
   640  	if flags&flagNoScan != 0 {
   641  		// All objects are pre-marked as noscan. Nothing to do.
   642  	} else {
   643  		// If allocating a defer+arg block, now that we've picked a malloc size
   644  		// large enough to hold everything, cut the "asked for" size down to
   645  		// just the defer header, so that the GC bitmap will record the arg block
   646  		// as containing nothing at all (as if it were unused space at the end of
   647  		// a malloc block caused by size rounding).
   648  		// The defer arg areas are scanned as part of scanstack.
   649  		if typ == deferType {
   650  			dataSize = unsafe.Sizeof(_defer{})
   651  		}
   652  		heapBitsSetType(uintptr(x), size, dataSize, typ)
   653  		if dataSize > typ.size {
   654  			// Array allocation. If there are any
   655  			// pointers, GC has to scan to the last
   656  			// element.
   657  			if typ.ptrdata != 0 {
   658  				c.local_scan += dataSize - typ.size + typ.ptrdata
   659  			}
   660  		} else {
   661  			c.local_scan += typ.ptrdata
   662  		}
   663  
   664  		// Ensure that the stores above that initialize x to
   665  		// type-safe memory and set the heap bits occur before
   666  		// the caller can make x observable to the garbage
   667  		// collector. Otherwise, on weakly ordered machines,
   668  		// the garbage collector could follow a pointer to x,
   669  		// but see uninitialized memory or stale heap bits.
   670  		publicationBarrier()
   671  	}
   672  
   673  	// GCmarkterminate allocates black
   674  	// All slots hold nil so no scanning is needed.
   675  	// This may be racing with GC so do it atomically if there can be
   676  	// a race marking the bit.
   677  	if gcphase == _GCmarktermination || gcBlackenPromptly {
   678  		systemstack(func() {
   679  			gcmarknewobject_m(uintptr(x), size)
   680  		})
   681  	}
   682  
   683  	if raceenabled {
   684  		racemalloc(x, size)
   685  	}
   686  
   687  	mp.mallocing = 0
   688  	releasem(mp)
   689  
   690  	if debug.allocfreetrace != 0 {
   691  		tracealloc(x, size, typ)
   692  	}
   693  
   694  	if rate := MemProfileRate; rate > 0 {
   695  		if size < uintptr(rate) && int32(size) < c.next_sample {
   696  			c.next_sample -= int32(size)
   697  		} else {
   698  			mp := acquirem()
   699  			profilealloc(mp, x, size)
   700  			releasem(mp)
   701  		}
   702  	}
   703  
   704  	if shouldhelpgc && shouldtriggergc() {
   705  		startGC(gcBackgroundMode, false)
   706  	} else if gcBlackenEnabled != 0 {
   707  		// Assist garbage collector. We delay this until the
   708  		// epilogue so that it doesn't interfere with the
   709  		// inner working of malloc such as mcache refills that
   710  		// might happen while doing the gcAssistAlloc.
   711  		gcAssistAlloc(size, shouldhelpgc)
   712  	} else if shouldhelpgc && bggc.working != 0 {
   713  		// The GC is starting up or shutting down, so we can't
   714  		// assist, but we also can't allocate unabated. Slow
   715  		// down this G's allocation and help the GC stay
   716  		// scheduled by yielding.
   717  		//
   718  		// TODO: This is a workaround. Either help the GC make
   719  		// the transition or block.
   720  		gp := getg()
   721  		if gp != gp.m.g0 && gp.m.locks == 0 && gp.m.preemptoff == "" {
   722  			Gosched()
   723  		}
   724  	}
   725  
   726  	return x
   727  }
   728  
   729  func largeAlloc(size uintptr, flag uint32) *mspan {
   730  	// print("largeAlloc size=", size, "\n")
   731  
   732  	if size+_PageSize < size {
   733  		throw("out of memory")
   734  	}
   735  	npages := size >> _PageShift
   736  	if size&_PageMask != 0 {
   737  		npages++
   738  	}
   739  
   740  	// Deduct credit for this span allocation and sweep if
   741  	// necessary. mHeap_Alloc will also sweep npages, so this only
   742  	// pays the debt down to npage pages.
   743  	deductSweepCredit(npages*_PageSize, npages)
   744  
   745  	s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0)
   746  	if s == nil {
   747  		throw("out of memory")
   748  	}
   749  	s.limit = uintptr(s.start)<<_PageShift + size
   750  	heapBitsForSpan(s.base()).initSpan(s.layout())
   751  	return s
   752  }
   753  
   754  // implementation of new builtin
   755  func newobject(typ *_type) unsafe.Pointer {
   756  	flags := uint32(0)
   757  	if typ.kind&kindNoPointers != 0 {
   758  		flags |= flagNoScan
   759  	}
   760  	return mallocgc(uintptr(typ.size), typ, flags)
   761  }
   762  
   763  //go:linkname reflect_unsafe_New reflect.unsafe_New
   764  func reflect_unsafe_New(typ *_type) unsafe.Pointer {
   765  	return newobject(typ)
   766  }
   767  
   768  // implementation of make builtin for slices
   769  func newarray(typ *_type, n uintptr) unsafe.Pointer {
   770  	flags := uint32(0)
   771  	if typ.kind&kindNoPointers != 0 {
   772  		flags |= flagNoScan
   773  	}
   774  	if int(n) < 0 || (typ.size > 0 && n > _MaxMem/uintptr(typ.size)) {
   775  		panic("runtime: allocation size out of range")
   776  	}
   777  	return mallocgc(uintptr(typ.size)*n, typ, flags)
   778  }
   779  
   780  //go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray
   781  func reflect_unsafe_NewArray(typ *_type, n uintptr) unsafe.Pointer {
   782  	return newarray(typ, n)
   783  }
   784  
   785  // rawmem returns a chunk of pointerless memory.  It is
   786  // not zeroed.
   787  func rawmem(size uintptr) unsafe.Pointer {
   788  	return mallocgc(size, nil, flagNoScan|flagNoZero)
   789  }
   790  
   791  func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
   792  	c := mp.mcache
   793  	rate := MemProfileRate
   794  	if size < uintptr(rate) {
   795  		// pick next profile time
   796  		// If you change this, also change allocmcache.
   797  		if rate > 0x3fffffff { // make 2*rate not overflow
   798  			rate = 0x3fffffff
   799  		}
   800  		next := int32(fastrand1()) % (2 * int32(rate))
   801  		// Subtract the "remainder" of the current allocation.
   802  		// Otherwise objects that are close in size to sampling rate
   803  		// will be under-sampled, because we consistently discard this remainder.
   804  		next -= (int32(size) - c.next_sample)
   805  		if next < 0 {
   806  			next = 0
   807  		}
   808  		c.next_sample = next
   809  	}
   810  
   811  	mProf_Malloc(x, size)
   812  }
   813  
   814  type persistentAlloc struct {
   815  	base unsafe.Pointer
   816  	off  uintptr
   817  }
   818  
   819  var globalAlloc struct {
   820  	mutex
   821  	persistentAlloc
   822  }
   823  
   824  // Wrapper around sysAlloc that can allocate small chunks.
   825  // There is no associated free operation.
   826  // Intended for things like function/type/debug-related persistent data.
   827  // If align is 0, uses default align (currently 8).
   828  func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
   829  	var p unsafe.Pointer
   830  	systemstack(func() {
   831  		p = persistentalloc1(size, align, sysStat)
   832  	})
   833  	return p
   834  }
   835  
   836  // Must run on system stack because stack growth can (re)invoke it.
   837  // See issue 9174.
   838  //go:systemstack
   839  func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer {
   840  	const (
   841  		chunk    = 256 << 10
   842  		maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
   843  	)
   844  
   845  	if size == 0 {
   846  		throw("persistentalloc: size == 0")
   847  	}
   848  	if align != 0 {
   849  		if align&(align-1) != 0 {
   850  			throw("persistentalloc: align is not a power of 2")
   851  		}
   852  		if align > _PageSize {
   853  			throw("persistentalloc: align is too large")
   854  		}
   855  	} else {
   856  		align = 8
   857  	}
   858  
   859  	if size >= maxBlock {
   860  		return sysAlloc(size, sysStat)
   861  	}
   862  
   863  	mp := acquirem()
   864  	var persistent *persistentAlloc
   865  	if mp != nil && mp.p != 0 {
   866  		persistent = &mp.p.ptr().palloc
   867  	} else {
   868  		lock(&globalAlloc.mutex)
   869  		persistent = &globalAlloc.persistentAlloc
   870  	}
   871  	persistent.off = round(persistent.off, align)
   872  	if persistent.off+size > chunk || persistent.base == nil {
   873  		persistent.base = sysAlloc(chunk, &memstats.other_sys)
   874  		if persistent.base == nil {
   875  			if persistent == &globalAlloc.persistentAlloc {
   876  				unlock(&globalAlloc.mutex)
   877  			}
   878  			throw("runtime: cannot allocate memory")
   879  		}
   880  		persistent.off = 0
   881  	}
   882  	p := add(persistent.base, persistent.off)
   883  	persistent.off += size
   884  	releasem(mp)
   885  	if persistent == &globalAlloc.persistentAlloc {
   886  		unlock(&globalAlloc.mutex)
   887  	}
   888  
   889  	if sysStat != &memstats.other_sys {
   890  		mSysStatInc(sysStat, size)
   891  		mSysStatDec(&memstats.other_sys, size)
   892  	}
   893  	return p
   894  }