github.com/euank/go@v0.0.0-20160829210321-495514729181/src/runtime/msize.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Malloc small size classes.
     6  //
     7  // See malloc.go for overview.
     8  //
     9  // The size classes are chosen so that rounding an allocation
    10  // request up to the next size class wastes at most 12.5% (1.125x).
    11  //
    12  // Each size class has its own page count that gets allocated
    13  // and chopped up when new objects of the size class are needed.
    14  // That page count is chosen so that chopping up the run of
    15  // pages into objects of the given size wastes at most 12.5% (1.125x)
    16  // of the memory. It is not necessary that the cutoff here be
    17  // the same as above.
    18  //
    19  // The two sources of waste multiply, so the worst possible case
    20  // for the above constraints would be that allocations of some
    21  // size might have a 26.6% (1.266x) overhead.
    22  // In practice, only one of the wastes comes into play for a
    23  // given size (sizes < 512 waste mainly on the round-up,
    24  // sizes > 512 waste mainly on the page chopping).
    25  //
    26  // TODO(rsc): Compute max waste for any given size.
    27  
    28  package runtime
    29  
    30  // Size classes. Computed and initialized by InitSizes.
    31  //
    32  // SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
    33  //	1 <= sizeclass < NumSizeClasses, for n.
    34  //	Size class 0 is reserved to mean "not small".
    35  //
    36  // class_to_size[i] = largest size in class i
    37  // class_to_allocnpages[i] = number of pages to allocate when
    38  //	making new objects in class i
    39  
    40  // The SizeToClass lookup is implemented using two arrays,
    41  // one mapping sizes <= 1024 to their class and one mapping
    42  // sizes >= 1024 and <= MaxSmallSize to their class.
    43  // All objects are 8-aligned, so the first array is indexed by
    44  // the size divided by 8 (rounded up).  Objects >= 1024 bytes
    45  // are 128-aligned, so the second array is indexed by the
    46  // size divided by 128 (rounded up).  The arrays are filled in
    47  // by InitSizes.
    48  
    49  const (
    50  	smallSizeDiv = 8
    51  	smallSizeMax = 1024
    52  	largeSizeDiv = 128
    53  )
    54  
    55  var class_to_size [_NumSizeClasses]uint32
    56  var class_to_allocnpages [_NumSizeClasses]uint32
    57  var class_to_divmagic [_NumSizeClasses]divMagic
    58  
    59  var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8
    60  var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8
    61  
    62  func sizeToClass(size uint32) uint32 {
    63  	if size > _MaxSmallSize {
    64  		throw("invalid size")
    65  	}
    66  	if size > smallSizeMax-8 {
    67  		return uint32(size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv])
    68  	}
    69  	return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv])
    70  }
    71  
    72  func initSizes() {
    73  	// Initialize the runtime·class_to_size table (and choose class sizes in the process).
    74  	class_to_size[0] = 0
    75  	sizeclass := 1 // 0 means no class
    76  	align := 8
    77  	for size := align; size <= _MaxSmallSize; size += align {
    78  		if size&(size-1) == 0 { // bump alignment once in a while
    79  			if size >= 2048 {
    80  				align = 256
    81  			} else if size >= 128 {
    82  				align = size / 8
    83  			} else if size >= 16 {
    84  				align = 16 // required for x86 SSE instructions, if we want to use them
    85  			}
    86  		}
    87  		if align&(align-1) != 0 {
    88  			throw("incorrect alignment")
    89  		}
    90  
    91  		// Make the allocnpages big enough that
    92  		// the leftover is less than 1/8 of the total,
    93  		// so wasted space is at most 12.5%.
    94  		allocsize := _PageSize
    95  		for allocsize%size > allocsize/8 {
    96  			allocsize += _PageSize
    97  		}
    98  		npages := allocsize >> _PageShift
    99  
   100  		// If the previous sizeclass chose the same
   101  		// allocation size and fit the same number of
   102  		// objects into the page, we might as well
   103  		// use just this size instead of having two
   104  		// different sizes.
   105  		if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
   106  			class_to_size[sizeclass-1] = uint32(size)
   107  			continue
   108  		}
   109  
   110  		class_to_allocnpages[sizeclass] = uint32(npages)
   111  		class_to_size[sizeclass] = uint32(size)
   112  		sizeclass++
   113  	}
   114  	if sizeclass != _NumSizeClasses {
   115  		print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
   116  		throw("bad NumSizeClasses")
   117  	}
   118  
   119  	// Increase object sizes if we can fit the same number of larger objects
   120  	// into the same number of pages. For example, we choose size 8448 above
   121  	// with 6 objects in 7 pages. But we can well use object size 9472,
   122  	// which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
   123  	// We need to preserve at least largeSizeDiv alignment otherwise
   124  	// sizeToClass won't work.
   125  	for i := 1; i < _NumSizeClasses; i++ {
   126  		npages := class_to_allocnpages[i]
   127  		psize := npages * _PageSize
   128  		size := class_to_size[i]
   129  		new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1)
   130  		if new_size > size {
   131  			class_to_size[i] = new_size
   132  		}
   133  	}
   134  
   135  	// Check maxObjsPerSpan => number of objects invariant.
   136  	for i, size := range class_to_size {
   137  		if i != 0 && class_to_size[i-1] >= size {
   138  			throw("non-monotonic size classes")
   139  		}
   140  
   141  		if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
   142  			throw("span contains too many objects")
   143  		}
   144  		if size == 0 && i != 0 {
   145  			throw("size is 0 but class is not 0")
   146  		}
   147  	}
   148  	// Initialize the size_to_class tables.
   149  	nextsize := 0
   150  	for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
   151  		for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
   152  			size_to_class8[nextsize/8] = uint8(sizeclass)
   153  		}
   154  		if nextsize >= 1024 {
   155  			for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
   156  				size_to_class128[(nextsize-1024)/128] = uint8(sizeclass)
   157  			}
   158  		}
   159  	}
   160  
   161  	// Double-check SizeToClass.
   162  	if false {
   163  		for n := uint32(0); n < _MaxSmallSize; n++ {
   164  			sizeclass := sizeToClass(n)
   165  			if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
   166  				print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
   167  				print("incorrect SizeToClass\n")
   168  				goto dump
   169  			}
   170  			if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
   171  				print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
   172  				print("SizeToClass too big\n")
   173  				goto dump
   174  			}
   175  		}
   176  	}
   177  
   178  	testdefersizes()
   179  
   180  	// Copy out for statistics table.
   181  	for i := 0; i < len(class_to_size); i++ {
   182  		memstats.by_size[i].size = uint32(class_to_size[i])
   183  	}
   184  
   185  	for i := 1; i < len(class_to_size); i++ {
   186  		class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i]))
   187  	}
   188  
   189  	return
   190  
   191  dump:
   192  	if true {
   193  		print("runtime: NumSizeClasses=", _NumSizeClasses, "\n")
   194  		print("runtime·class_to_size:")
   195  		for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
   196  			print(" ", class_to_size[sizeclass], "")
   197  		}
   198  		print("\n\n")
   199  		print("runtime: size_to_class8:")
   200  		for i := 0; i < len(size_to_class8); i++ {
   201  			print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
   202  		}
   203  		print("\n")
   204  		print("runtime: size_to_class128:")
   205  		for i := 0; i < len(size_to_class128); i++ {
   206  			print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
   207  		}
   208  		print("\n")
   209  	}
   210  	throw("InitSizes failed")
   211  }
   212  
   213  // Returns size of the memory block that mallocgc will allocate if you ask for the size.
   214  func roundupsize(size uintptr) uintptr {
   215  	if size < _MaxSmallSize {
   216  		if size <= smallSizeMax-8 {
   217  			return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]])
   218  		} else {
   219  			return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]])
   220  		}
   221  	}
   222  	if size+_PageSize < size {
   223  		return size
   224  	}
   225  	return round(size, _PageSize)
   226  }
   227  
   228  // divMagic holds magic constants to implement division
   229  // by a particular constant as a shift, multiply, and shift.
   230  // That is, given
   231  //	m = computeMagic(d)
   232  // then
   233  //	n/d == ((n>>m.shift) * m.mul) >> m.shift2
   234  //
   235  // The magic computation picks m such that
   236  //	d = d₁*d₂
   237  //	d₂= 2^m.shift
   238  //	m.mul = ⌈2^m.shift2 / d₁⌉
   239  //
   240  // The magic computation here is tailored for malloc block sizes
   241  // and does not handle arbitrary d correctly. Malloc block sizes d are
   242  // always even, so the first shift implements the factors of 2 in d
   243  // and then the mul and second shift implement the odd factor
   244  // that remains. Because the first shift divides n by at least 2 (actually 8)
   245  // before the multiply gets involved, the huge corner cases that
   246  // require additional adjustment are impossible, so the usual
   247  // fixup is not needed.
   248  //
   249  // For more details see Hacker's Delight, Chapter 10, and
   250  // http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
   251  // http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
   252  type divMagic struct {
   253  	shift    uint8
   254  	mul      uint32
   255  	shift2   uint8
   256  	baseMask uintptr
   257  }
   258  
   259  func computeDivMagic(d uint32) divMagic {
   260  	var m divMagic
   261  
   262  	// If the size is a power of two, heapBitsForObject can divide even faster by masking.
   263  	// Compute this mask.
   264  	if d&(d-1) == 0 {
   265  		// It is a power of 2 (assuming dinptr != 1)
   266  		m.baseMask = ^(uintptr(d) - 1)
   267  	} else {
   268  		m.baseMask = 0
   269  	}
   270  
   271  	// Compute pre-shift by factoring power of 2 out of d.
   272  	for d&1 == 0 {
   273  		m.shift++
   274  		d >>= 1
   275  	}
   276  
   277  	// Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int.
   278  	// This is always a good enough approximation.
   279  	// We could use smaller k for some divisors but there's no point.
   280  	k := uint8(63)
   281  	d64 := uint64(d)
   282  	for ((1<<k)+d64-1)/d64 >= 1<<32 {
   283  		k--
   284  	}
   285  	m.mul = uint32(((1 << k) + d64 - 1) / d64) //  ⌈2^k / d⌉
   286  	m.shift2 = k
   287  
   288  	return m
   289  }