github.com/euank/go@v0.0.0-20160829210321-495514729181/src/runtime/msize.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Malloc small size classes. 6 // 7 // See malloc.go for overview. 8 // 9 // The size classes are chosen so that rounding an allocation 10 // request up to the next size class wastes at most 12.5% (1.125x). 11 // 12 // Each size class has its own page count that gets allocated 13 // and chopped up when new objects of the size class are needed. 14 // That page count is chosen so that chopping up the run of 15 // pages into objects of the given size wastes at most 12.5% (1.125x) 16 // of the memory. It is not necessary that the cutoff here be 17 // the same as above. 18 // 19 // The two sources of waste multiply, so the worst possible case 20 // for the above constraints would be that allocations of some 21 // size might have a 26.6% (1.266x) overhead. 22 // In practice, only one of the wastes comes into play for a 23 // given size (sizes < 512 waste mainly on the round-up, 24 // sizes > 512 waste mainly on the page chopping). 25 // 26 // TODO(rsc): Compute max waste for any given size. 27 28 package runtime 29 30 // Size classes. Computed and initialized by InitSizes. 31 // 32 // SizeToClass(0 <= n <= MaxSmallSize) returns the size class, 33 // 1 <= sizeclass < NumSizeClasses, for n. 34 // Size class 0 is reserved to mean "not small". 35 // 36 // class_to_size[i] = largest size in class i 37 // class_to_allocnpages[i] = number of pages to allocate when 38 // making new objects in class i 39 40 // The SizeToClass lookup is implemented using two arrays, 41 // one mapping sizes <= 1024 to their class and one mapping 42 // sizes >= 1024 and <= MaxSmallSize to their class. 43 // All objects are 8-aligned, so the first array is indexed by 44 // the size divided by 8 (rounded up). Objects >= 1024 bytes 45 // are 128-aligned, so the second array is indexed by the 46 // size divided by 128 (rounded up). The arrays are filled in 47 // by InitSizes. 48 49 const ( 50 smallSizeDiv = 8 51 smallSizeMax = 1024 52 largeSizeDiv = 128 53 ) 54 55 var class_to_size [_NumSizeClasses]uint32 56 var class_to_allocnpages [_NumSizeClasses]uint32 57 var class_to_divmagic [_NumSizeClasses]divMagic 58 59 var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8 60 var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8 61 62 func sizeToClass(size uint32) uint32 { 63 if size > _MaxSmallSize { 64 throw("invalid size") 65 } 66 if size > smallSizeMax-8 { 67 return uint32(size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]) 68 } 69 return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]) 70 } 71 72 func initSizes() { 73 // Initialize the runtime·class_to_size table (and choose class sizes in the process). 74 class_to_size[0] = 0 75 sizeclass := 1 // 0 means no class 76 align := 8 77 for size := align; size <= _MaxSmallSize; size += align { 78 if size&(size-1) == 0 { // bump alignment once in a while 79 if size >= 2048 { 80 align = 256 81 } else if size >= 128 { 82 align = size / 8 83 } else if size >= 16 { 84 align = 16 // required for x86 SSE instructions, if we want to use them 85 } 86 } 87 if align&(align-1) != 0 { 88 throw("incorrect alignment") 89 } 90 91 // Make the allocnpages big enough that 92 // the leftover is less than 1/8 of the total, 93 // so wasted space is at most 12.5%. 94 allocsize := _PageSize 95 for allocsize%size > allocsize/8 { 96 allocsize += _PageSize 97 } 98 npages := allocsize >> _PageShift 99 100 // If the previous sizeclass chose the same 101 // allocation size and fit the same number of 102 // objects into the page, we might as well 103 // use just this size instead of having two 104 // different sizes. 105 if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) { 106 class_to_size[sizeclass-1] = uint32(size) 107 continue 108 } 109 110 class_to_allocnpages[sizeclass] = uint32(npages) 111 class_to_size[sizeclass] = uint32(size) 112 sizeclass++ 113 } 114 if sizeclass != _NumSizeClasses { 115 print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n") 116 throw("bad NumSizeClasses") 117 } 118 119 // Increase object sizes if we can fit the same number of larger objects 120 // into the same number of pages. For example, we choose size 8448 above 121 // with 6 objects in 7 pages. But we can well use object size 9472, 122 // which is also 6 objects in 7 pages but +1024 bytes (+12.12%). 123 // We need to preserve at least largeSizeDiv alignment otherwise 124 // sizeToClass won't work. 125 for i := 1; i < _NumSizeClasses; i++ { 126 npages := class_to_allocnpages[i] 127 psize := npages * _PageSize 128 size := class_to_size[i] 129 new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1) 130 if new_size > size { 131 class_to_size[i] = new_size 132 } 133 } 134 135 // Check maxObjsPerSpan => number of objects invariant. 136 for i, size := range class_to_size { 137 if i != 0 && class_to_size[i-1] >= size { 138 throw("non-monotonic size classes") 139 } 140 141 if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan { 142 throw("span contains too many objects") 143 } 144 if size == 0 && i != 0 { 145 throw("size is 0 but class is not 0") 146 } 147 } 148 // Initialize the size_to_class tables. 149 nextsize := 0 150 for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ { 151 for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 { 152 size_to_class8[nextsize/8] = uint8(sizeclass) 153 } 154 if nextsize >= 1024 { 155 for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 { 156 size_to_class128[(nextsize-1024)/128] = uint8(sizeclass) 157 } 158 } 159 } 160 161 // Double-check SizeToClass. 162 if false { 163 for n := uint32(0); n < _MaxSmallSize; n++ { 164 sizeclass := sizeToClass(n) 165 if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n { 166 print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") 167 print("incorrect SizeToClass\n") 168 goto dump 169 } 170 if sizeclass > 1 && class_to_size[sizeclass-1] >= n { 171 print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") 172 print("SizeToClass too big\n") 173 goto dump 174 } 175 } 176 } 177 178 testdefersizes() 179 180 // Copy out for statistics table. 181 for i := 0; i < len(class_to_size); i++ { 182 memstats.by_size[i].size = uint32(class_to_size[i]) 183 } 184 185 for i := 1; i < len(class_to_size); i++ { 186 class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i])) 187 } 188 189 return 190 191 dump: 192 if true { 193 print("runtime: NumSizeClasses=", _NumSizeClasses, "\n") 194 print("runtime·class_to_size:") 195 for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ { 196 print(" ", class_to_size[sizeclass], "") 197 } 198 print("\n\n") 199 print("runtime: size_to_class8:") 200 for i := 0; i < len(size_to_class8); i++ { 201 print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n") 202 } 203 print("\n") 204 print("runtime: size_to_class128:") 205 for i := 0; i < len(size_to_class128); i++ { 206 print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n") 207 } 208 print("\n") 209 } 210 throw("InitSizes failed") 211 } 212 213 // Returns size of the memory block that mallocgc will allocate if you ask for the size. 214 func roundupsize(size uintptr) uintptr { 215 if size < _MaxSmallSize { 216 if size <= smallSizeMax-8 { 217 return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]]) 218 } else { 219 return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]]) 220 } 221 } 222 if size+_PageSize < size { 223 return size 224 } 225 return round(size, _PageSize) 226 } 227 228 // divMagic holds magic constants to implement division 229 // by a particular constant as a shift, multiply, and shift. 230 // That is, given 231 // m = computeMagic(d) 232 // then 233 // n/d == ((n>>m.shift) * m.mul) >> m.shift2 234 // 235 // The magic computation picks m such that 236 // d = d₁*d₂ 237 // d₂= 2^m.shift 238 // m.mul = ⌈2^m.shift2 / d₁⌉ 239 // 240 // The magic computation here is tailored for malloc block sizes 241 // and does not handle arbitrary d correctly. Malloc block sizes d are 242 // always even, so the first shift implements the factors of 2 in d 243 // and then the mul and second shift implement the odd factor 244 // that remains. Because the first shift divides n by at least 2 (actually 8) 245 // before the multiply gets involved, the huge corner cases that 246 // require additional adjustment are impossible, so the usual 247 // fixup is not needed. 248 // 249 // For more details see Hacker's Delight, Chapter 10, and 250 // http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html 251 // http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html 252 type divMagic struct { 253 shift uint8 254 mul uint32 255 shift2 uint8 256 baseMask uintptr 257 } 258 259 func computeDivMagic(d uint32) divMagic { 260 var m divMagic 261 262 // If the size is a power of two, heapBitsForObject can divide even faster by masking. 263 // Compute this mask. 264 if d&(d-1) == 0 { 265 // It is a power of 2 (assuming dinptr != 1) 266 m.baseMask = ^(uintptr(d) - 1) 267 } else { 268 m.baseMask = 0 269 } 270 271 // Compute pre-shift by factoring power of 2 out of d. 272 for d&1 == 0 { 273 m.shift++ 274 d >>= 1 275 } 276 277 // Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int. 278 // This is always a good enough approximation. 279 // We could use smaller k for some divisors but there's no point. 280 k := uint8(63) 281 d64 := uint64(d) 282 for ((1<<k)+d64-1)/d64 >= 1<<32 { 283 k-- 284 } 285 m.mul = uint32(((1 << k) + d64 - 1) / d64) // ⌈2^k / d⌉ 286 m.shift2 = k 287 288 return m 289 }