github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/runtime/malloc2.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import "unsafe"
     8  
     9  // Memory allocator, based on tcmalloc.
    10  // http://goog-perftools.sourceforge.net/doc/tcmalloc.html
    11  
    12  // The main allocator works in runs of pages.
    13  // Small allocation sizes (up to and including 32 kB) are
    14  // rounded to one of about 100 size classes, each of which
    15  // has its own free list of objects of exactly that size.
    16  // Any free page of memory can be split into a set of objects
    17  // of one size class, which are then managed using free list
    18  // allocators.
    19  //
    20  // The allocator's data structures are:
    21  //
    22  //	FixAlloc: a free-list allocator for fixed-size objects,
    23  //		used to manage storage used by the allocator.
    24  //	MHeap: the malloc heap, managed at page (4096-byte) granularity.
    25  //	MSpan: a run of pages managed by the MHeap.
    26  //	MCentral: a shared free list for a given size class.
    27  //	MCache: a per-thread (in Go, per-P) cache for small objects.
    28  //	MStats: allocation statistics.
    29  //
    30  // Allocating a small object proceeds up a hierarchy of caches:
    31  //
    32  //	1. Round the size up to one of the small size classes
    33  //	   and look in the corresponding MCache free list.
    34  //	   If the list is not empty, allocate an object from it.
    35  //	   This can all be done without acquiring a lock.
    36  //
    37  //	2. If the MCache free list is empty, replenish it by
    38  //	   taking a bunch of objects from the MCentral free list.
    39  //	   Moving a bunch amortizes the cost of acquiring the MCentral lock.
    40  //
    41  //	3. If the MCentral free list is empty, replenish it by
    42  //	   allocating a run of pages from the MHeap and then
    43  //	   chopping that memory into objects of the given size.
    44  //	   Allocating many objects amortizes the cost of locking
    45  //	   the heap.
    46  //
    47  //	4. If the MHeap is empty or has no page runs large enough,
    48  //	   allocate a new group of pages (at least 1MB) from the
    49  //	   operating system.  Allocating a large run of pages
    50  //	   amortizes the cost of talking to the operating system.
    51  //
    52  // Freeing a small object proceeds up the same hierarchy:
    53  //
    54  //	1. Look up the size class for the object and add it to
    55  //	   the MCache free list.
    56  //
    57  //	2. If the MCache free list is too long or the MCache has
    58  //	   too much memory, return some to the MCentral free lists.
    59  //
    60  //	3. If all the objects in a given span have returned to
    61  //	   the MCentral list, return that span to the page heap.
    62  //
    63  //	4. If the heap has too much memory, return some to the
    64  //	   operating system.
    65  //
    66  //	TODO(rsc): Step 4 is not implemented.
    67  //
    68  // Allocating and freeing a large object uses the page heap
    69  // directly, bypassing the MCache and MCentral free lists.
    70  //
    71  // The small objects on the MCache and MCentral free lists
    72  // may or may not be zeroed.  They are zeroed if and only if
    73  // the second word of the object is zero.  A span in the
    74  // page heap is zeroed unless s->needzero is set. When a span
    75  // is allocated to break into small objects, it is zeroed if needed
    76  // and s->needzero is set. There are two main benefits to delaying the
    77  // zeroing this way:
    78  //
    79  //	1. stack frames allocated from the small object lists
    80  //	   or the page heap can avoid zeroing altogether.
    81  //	2. the cost of zeroing when reusing a small object is
    82  //	   charged to the mutator, not the garbage collector.
    83  //
    84  // This C code was written with an eye toward translating to Go
    85  // in the future.  Methods have the form Type_Method(Type *t, ...).
    86  
    87  const (
    88  	_PageShift = 13
    89  	_PageSize  = 1 << _PageShift
    90  	_PageMask  = _PageSize - 1
    91  )
    92  
    93  const (
    94  	// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
    95  	_64bit = 1 << (^uintptr(0) >> 63) / 2
    96  
    97  	// Computed constant.  The definition of MaxSmallSize and the
    98  	// algorithm in msize.c produce some number of different allocation
    99  	// size classes.  NumSizeClasses is that number.  It's needed here
   100  	// because there are static arrays of this length; when msize runs its
   101  	// size choosing algorithm it double-checks that NumSizeClasses agrees.
   102  	_NumSizeClasses = 67
   103  
   104  	// Tunable constants.
   105  	_MaxSmallSize = 32 << 10
   106  
   107  	// Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc.
   108  	_TinySize      = 16
   109  	_TinySizeClass = 2
   110  
   111  	_FixAllocChunk  = 16 << 10               // Chunk size for FixAlloc
   112  	_MaxMHeapList   = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
   113  	_HeapAllocChunk = 1 << 20                // Chunk size for heap growth
   114  
   115  	// Per-P, per order stack segment cache size.
   116  	_StackCacheSize = 32 * 1024
   117  
   118  	// Number of orders that get caching.  Order 0 is FixedStack
   119  	// and each successive order is twice as large.
   120  	_NumStackOrders = 3
   121  
   122  	// Number of bits in page to span calculations (4k pages).
   123  	// On Windows 64-bit we limit the arena to 32GB or 35 bits.
   124  	// Windows counts memory used by page table into committed memory
   125  	// of the process, so we can't reserve too much memory.
   126  	// See http://golang.org/issue/5402 and http://golang.org/issue/5236.
   127  	// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
   128  	// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
   129  	_MHeapMap_TotalBits = (_64bit*goos_windows)*35 + (_64bit*(1-goos_windows))*37 + (1-_64bit)*32
   130  	_MHeapMap_Bits      = _MHeapMap_TotalBits - _PageShift
   131  
   132  	_MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
   133  
   134  	// Max number of threads to run garbage collection.
   135  	// 2, 3, and 4 are all plausible maximums depending
   136  	// on the hardware details of the machine.  The garbage
   137  	// collector scales well to 32 cpus.
   138  	_MaxGcproc = 32
   139  )
   140  
   141  // A generic linked list of blocks.  (Typically the block is bigger than sizeof(MLink).)
   142  // Since assignments to mlink.next will result in a write barrier being preformed
   143  // this can not be used by some of the internal GC structures. For example when
   144  // the sweeper is placing an unmarked object on the free list it does not want the
   145  // write barrier to be called since that could result in the object being reachable.
   146  type mlink struct {
   147  	next *mlink
   148  }
   149  
   150  // A gclink is a node in a linked list of blocks, like mlink,
   151  // but it is opaque to the garbage collector.
   152  // The GC does not trace the pointers during collection,
   153  // and the compiler does not emit write barriers for assignments
   154  // of gclinkptr values. Code should store references to gclinks
   155  // as gclinkptr, not as *gclink.
   156  type gclink struct {
   157  	next gclinkptr
   158  }
   159  
   160  // A gclinkptr is a pointer to a gclink, but it is opaque
   161  // to the garbage collector.
   162  type gclinkptr uintptr
   163  
   164  // ptr returns the *gclink form of p.
   165  // The result should be used for accessing fields, not stored
   166  // in other data structures.
   167  func (p gclinkptr) ptr() *gclink {
   168  	return (*gclink)(unsafe.Pointer(p))
   169  }
   170  
   171  // sysAlloc obtains a large chunk of zeroed memory from the
   172  // operating system, typically on the order of a hundred kilobytes
   173  // or a megabyte.
   174  // NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
   175  // may use larger alignment, so the caller must be careful to realign the
   176  // memory obtained by sysAlloc.
   177  //
   178  // SysUnused notifies the operating system that the contents
   179  // of the memory region are no longer needed and can be reused
   180  // for other purposes.
   181  // SysUsed notifies the operating system that the contents
   182  // of the memory region are needed again.
   183  //
   184  // SysFree returns it unconditionally; this is only used if
   185  // an out-of-memory error has been detected midway through
   186  // an allocation.  It is okay if SysFree is a no-op.
   187  //
   188  // SysReserve reserves address space without allocating memory.
   189  // If the pointer passed to it is non-nil, the caller wants the
   190  // reservation there, but SysReserve can still choose another
   191  // location if that one is unavailable.  On some systems and in some
   192  // cases SysReserve will simply check that the address space is
   193  // available and not actually reserve it.  If SysReserve returns
   194  // non-nil, it sets *reserved to true if the address space is
   195  // reserved, false if it has merely been checked.
   196  // NOTE: SysReserve returns OS-aligned memory, but the heap allocator
   197  // may use larger alignment, so the caller must be careful to realign the
   198  // memory obtained by sysAlloc.
   199  //
   200  // SysMap maps previously reserved address space for use.
   201  // The reserved argument is true if the address space was really
   202  // reserved, not merely checked.
   203  //
   204  // SysFault marks a (already sysAlloc'd) region to fault
   205  // if accessed.  Used only for debugging the runtime.
   206  
   207  // FixAlloc is a simple free-list allocator for fixed size objects.
   208  // Malloc uses a FixAlloc wrapped around sysAlloc to manages its
   209  // MCache and MSpan objects.
   210  //
   211  // Memory returned by FixAlloc_Alloc is not zeroed.
   212  // The caller is responsible for locking around FixAlloc calls.
   213  // Callers can keep state in the object but the first word is
   214  // smashed by freeing and reallocating.
   215  type fixalloc struct {
   216  	size   uintptr
   217  	first  unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned
   218  	arg    unsafe.Pointer
   219  	list   *mlink
   220  	chunk  *byte
   221  	nchunk uint32
   222  	inuse  uintptr // in-use bytes now
   223  	stat   *uint64
   224  }
   225  
   226  // Statistics.
   227  // Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
   228  type mstats struct {
   229  	// General statistics.
   230  	alloc       uint64 // bytes allocated and still in use
   231  	total_alloc uint64 // bytes allocated (even if freed)
   232  	sys         uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
   233  	nlookup     uint64 // number of pointer lookups
   234  	nmalloc     uint64 // number of mallocs
   235  	nfree       uint64 // number of frees
   236  
   237  	// Statistics about malloc heap.
   238  	// protected by mheap.lock
   239  	heap_alloc    uint64 // bytes allocated and still in use
   240  	heap_sys      uint64 // bytes obtained from system
   241  	heap_idle     uint64 // bytes in idle spans
   242  	heap_inuse    uint64 // bytes in non-idle spans
   243  	heap_released uint64 // bytes released to the os
   244  	heap_objects  uint64 // total number of allocated objects
   245  
   246  	// Statistics about allocation of low-level fixed-size structures.
   247  	// Protected by FixAlloc locks.
   248  	stacks_inuse uint64 // this number is included in heap_inuse above
   249  	stacks_sys   uint64 // always 0 in mstats
   250  	mspan_inuse  uint64 // mspan structures
   251  	mspan_sys    uint64
   252  	mcache_inuse uint64 // mcache structures
   253  	mcache_sys   uint64
   254  	buckhash_sys uint64 // profiling bucket hash table
   255  	gc_sys       uint64
   256  	other_sys    uint64
   257  
   258  	// Statistics about garbage collector.
   259  	// Protected by mheap or stopping the world during GC.
   260  	next_gc        uint64 // next gc (in heap_alloc time)
   261  	last_gc        uint64 // last gc (in absolute time)
   262  	pause_total_ns uint64
   263  	pause_ns       [256]uint64 // circular buffer of recent gc pause lengths
   264  	pause_end      [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
   265  	numgc          uint32
   266  	enablegc       bool
   267  	debuggc        bool
   268  
   269  	// Statistics about allocation size classes.
   270  
   271  	by_size [_NumSizeClasses]struct {
   272  		size    uint32
   273  		nmalloc uint64
   274  		nfree   uint64
   275  	}
   276  
   277  	tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
   278  }
   279  
   280  var memstats mstats
   281  
   282  // Size classes.  Computed and initialized by InitSizes.
   283  //
   284  // SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
   285  //	1 <= sizeclass < NumSizeClasses, for n.
   286  //	Size class 0 is reserved to mean "not small".
   287  //
   288  // class_to_size[i] = largest size in class i
   289  // class_to_allocnpages[i] = number of pages to allocate when
   290  //	making new objects in class i
   291  
   292  var class_to_size [_NumSizeClasses]int32
   293  var class_to_allocnpages [_NumSizeClasses]int32
   294  var size_to_class8 [1024/8 + 1]int8
   295  var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
   296  
   297  type mcachelist struct {
   298  	list  *mlink
   299  	nlist uint32
   300  }
   301  
   302  type stackfreelist struct {
   303  	list gclinkptr // linked list of free stacks
   304  	size uintptr   // total size of stacks in list
   305  }
   306  
   307  // Per-thread (in Go, per-P) cache for small objects.
   308  // No locking needed because it is per-thread (per-P).
   309  type mcache struct {
   310  	// The following members are accessed on every malloc,
   311  	// so they are grouped here for better caching.
   312  	next_sample      int32  // trigger heap sample after allocating this many bytes
   313  	local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap
   314  	// Allocator cache for tiny objects w/o pointers.
   315  	// See "Tiny allocator" comment in malloc.goc.
   316  	tiny             *byte
   317  	tinysize         uintptr
   318  	local_tinyallocs uintptr // number of tiny allocs not counted in other stats
   319  
   320  	// The rest is not accessed on every malloc.
   321  	alloc [_NumSizeClasses]*mspan // spans to allocate from
   322  
   323  	stackcache [_NumStackOrders]stackfreelist
   324  
   325  	sudogcache *sudog
   326  
   327  	// Local allocator stats, flushed during GC.
   328  	local_nlookup    uintptr                  // number of pointer lookups
   329  	local_largefree  uintptr                  // bytes freed for large objects (>maxsmallsize)
   330  	local_nlargefree uintptr                  // number of frees for large objects (>maxsmallsize)
   331  	local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
   332  }
   333  
   334  const (
   335  	_KindSpecialFinalizer = 1
   336  	_KindSpecialProfile   = 2
   337  	// Note: The finalizer special must be first because if we're freeing
   338  	// an object, a finalizer special will cause the freeing operation
   339  	// to abort, and we want to keep the other special records around
   340  	// if that happens.
   341  )
   342  
   343  type special struct {
   344  	next   *special // linked list in span
   345  	offset uint16   // span offset of object
   346  	kind   byte     // kind of special
   347  }
   348  
   349  // The described object has a finalizer set for it.
   350  type specialfinalizer struct {
   351  	special special
   352  	fn      *funcval
   353  	nret    uintptr
   354  	fint    *_type
   355  	ot      *ptrtype
   356  }
   357  
   358  // The described object is being heap profiled.
   359  type specialprofile struct {
   360  	special special
   361  	b       *bucket
   362  }
   363  
   364  // An MSpan is a run of pages.
   365  const (
   366  	_MSpanInUse = iota // allocated for garbage collected heap
   367  	_MSpanStack        // allocated for use by stack allocator
   368  	_MSpanFree
   369  	_MSpanListHead
   370  	_MSpanDead
   371  )
   372  
   373  type mspan struct {
   374  	next     *mspan    // in a span linked list
   375  	prev     *mspan    // in a span linked list
   376  	start    pageID    // starting page number
   377  	npages   uintptr   // number of pages in span
   378  	freelist gclinkptr // list of free objects
   379  	// sweep generation:
   380  	// if sweepgen == h->sweepgen - 2, the span needs sweeping
   381  	// if sweepgen == h->sweepgen - 1, the span is currently being swept
   382  	// if sweepgen == h->sweepgen, the span is swept and ready to use
   383  	// h->sweepgen is incremented by 2 after every GC
   384  	sweepgen    uint32
   385  	ref         uint16   // capacity - number of objects in freelist
   386  	sizeclass   uint8    // size class
   387  	incache     bool     // being used by an mcache
   388  	state       uint8    // mspaninuse etc
   389  	needzero    uint8    // needs to be zeroed before allocation
   390  	elemsize    uintptr  // computed from sizeclass or from npages
   391  	unusedsince int64    // first time spotted by gc in mspanfree state
   392  	npreleased  uintptr  // number of pages released to the os
   393  	limit       uintptr  // end of data in span
   394  	speciallock mutex    // guards specials list
   395  	specials    *special // linked list of special records sorted by offset.
   396  }
   397  
   398  // Every MSpan is in one doubly-linked list,
   399  // either one of the MHeap's free lists or one of the
   400  // MCentral's span lists.  We use empty MSpan structures as list heads.
   401  
   402  // Central list of free objects of a given size.
   403  type mcentral struct {
   404  	lock      mutex
   405  	sizeclass int32
   406  	nonempty  mspan // list of spans with a free object
   407  	empty     mspan // list of spans with no free objects (or cached in an mcache)
   408  }
   409  
   410  // Main malloc heap.
   411  // The heap itself is the "free[]" and "large" arrays,
   412  // but all the other global data is here too.
   413  type mheap struct {
   414  	lock      mutex
   415  	free      [_MaxMHeapList]mspan // free lists of given length
   416  	freelarge mspan                // free lists length >= _MaxMHeapList
   417  	busy      [_MaxMHeapList]mspan // busy lists of large objects of given length
   418  	busylarge mspan                // busy lists of large objects length >= _MaxMHeapList
   419  	allspans  **mspan              // all spans out there
   420  	gcspans   **mspan              // copy of allspans referenced by gc marker or sweeper
   421  	nspan     uint32
   422  	sweepgen  uint32 // sweep generation, see comment in mspan
   423  	sweepdone uint32 // all spans are swept
   424  
   425  	// span lookup
   426  	spans        **mspan
   427  	spans_mapped uintptr
   428  
   429  	// range of addresses we might see in the heap
   430  	bitmap         uintptr
   431  	bitmap_mapped  uintptr
   432  	arena_start    uintptr
   433  	arena_used     uintptr
   434  	arena_end      uintptr
   435  	arena_reserved bool
   436  
   437  	// write barrier shadow data+heap.
   438  	// 64-bit systems only, enabled by GODEBUG=wbshadow=1.
   439  	shadow_enabled  bool    // shadow should be updated and checked
   440  	shadow_reserved bool    // shadow memory is reserved
   441  	shadow_heap     uintptr // heap-addr + shadow_heap = shadow heap addr
   442  	shadow_data     uintptr // data-addr + shadow_data = shadow data addr
   443  	data_start      uintptr // start of shadowed data addresses
   444  	data_end        uintptr // end of shadowed data addresses
   445  
   446  	// central free lists for small size classes.
   447  	// the padding makes sure that the MCentrals are
   448  	// spaced CacheLineSize bytes apart, so that each MCentral.lock
   449  	// gets its own cache line.
   450  	central [_NumSizeClasses]struct {
   451  		mcentral mcentral
   452  		pad      [_CacheLineSize]byte
   453  	}
   454  
   455  	spanalloc             fixalloc // allocator for span*
   456  	cachealloc            fixalloc // allocator for mcache*
   457  	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
   458  	specialprofilealloc   fixalloc // allocator for specialprofile*
   459  	speciallock           mutex    // lock for sepcial record allocators.
   460  
   461  	// Malloc stats.
   462  	largefree  uint64                  // bytes freed for large objects (>maxsmallsize)
   463  	nlargefree uint64                  // number of frees for large objects (>maxsmallsize)
   464  	nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
   465  }
   466  
   467  var mheap_ mheap
   468  
   469  const (
   470  	// flags to malloc
   471  	_FlagNoScan = 1 << 0 // GC doesn't have to scan object
   472  	_FlagNoZero = 1 << 1 // don't zero memory
   473  )
   474  
   475  // NOTE: Layout known to queuefinalizer.
   476  type finalizer struct {
   477  	fn   *funcval       // function to call
   478  	arg  unsafe.Pointer // ptr to object
   479  	nret uintptr        // bytes of return values from fn
   480  	fint *_type         // type of first argument of fn
   481  	ot   *ptrtype       // type of ptr to object
   482  }
   483  
   484  type finblock struct {
   485  	alllink *finblock
   486  	next    *finblock
   487  	cnt     int32
   488  	_       int32
   489  	fin     [(_FinBlockSize - 2*ptrSize - 2*4) / unsafe.Sizeof(finalizer{})]finalizer
   490  }
   491  
   492  // Information from the compiler about the layout of stack frames.
   493  type bitvector struct {
   494  	n        int32 // # of bits
   495  	bytedata *uint8
   496  }
   497  
   498  type stackmap struct {
   499  	n        int32   // number of bitmaps
   500  	nbit     int32   // number of bits in each bitmap
   501  	bytedata [0]byte // bitmaps, each starting on a 32-bit boundary
   502  }
   503  
   504  // Returns pointer map data for the given stackmap index
   505  // (the index is encoded in PCDATA_StackMapIndex).
   506  
   507  // defined in mgc0.go