github.com/brownsys/tracing-framework-go@v0.0.0-20161210174012-0542a62412fe/go/darwin_amd64/src/runtime/malloc.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Memory allocator, based on tcmalloc. 6 // http://goog-perftools.sourceforge.net/doc/tcmalloc.html 7 8 // The main allocator works in runs of pages. 9 // Small allocation sizes (up to and including 32 kB) are 10 // rounded to one of about 100 size classes, each of which 11 // has its own free list of objects of exactly that size. 12 // Any free page of memory can be split into a set of objects 13 // of one size class, which are then managed using free list 14 // allocators. 15 // 16 // The allocator's data structures are: 17 // 18 // FixAlloc: a free-list allocator for fixed-size objects, 19 // used to manage storage used by the allocator. 20 // MHeap: the malloc heap, managed at page (4096-byte) granularity. 21 // MSpan: a run of pages managed by the MHeap. 22 // MCentral: a shared free list for a given size class. 23 // MCache: a per-thread (in Go, per-P) cache for small objects. 24 // MStats: allocation statistics. 25 // 26 // Allocating a small object proceeds up a hierarchy of caches: 27 // 28 // 1. Round the size up to one of the small size classes 29 // and look in the corresponding MCache free list. 30 // If the list is not empty, allocate an object from it. 31 // This can all be done without acquiring a lock. 32 // 33 // 2. If the MCache free list is empty, replenish it by 34 // taking a bunch of objects from the MCentral free list. 35 // Moving a bunch amortizes the cost of acquiring the MCentral lock. 36 // 37 // 3. If the MCentral free list is empty, replenish it by 38 // allocating a run of pages from the MHeap and then 39 // chopping that memory into objects of the given size. 40 // Allocating many objects amortizes the cost of locking 41 // the heap. 42 // 43 // 4. If the MHeap is empty or has no page runs large enough, 44 // allocate a new group of pages (at least 1MB) from the 45 // operating system. Allocating a large run of pages 46 // amortizes the cost of talking to the operating system. 47 // 48 // Freeing a small object proceeds up the same hierarchy: 49 // 50 // 1. Look up the size class for the object and add it to 51 // the MCache free list. 52 // 53 // 2. If the MCache free list is too long or the MCache has 54 // too much memory, return some to the MCentral free lists. 55 // 56 // 3. If all the objects in a given span have returned to 57 // the MCentral list, return that span to the page heap. 58 // 59 // 4. If the heap has too much memory, return some to the 60 // operating system. 61 // 62 // TODO(rsc): Step 4 is not implemented. 63 // 64 // Allocating and freeing a large object uses the page heap 65 // directly, bypassing the MCache and MCentral free lists. 66 // 67 // The small objects on the MCache and MCentral free lists 68 // may or may not be zeroed. They are zeroed if and only if 69 // the second word of the object is zero. A span in the 70 // page heap is zeroed unless s->needzero is set. When a span 71 // is allocated to break into small objects, it is zeroed if needed 72 // and s->needzero is set. There are two main benefits to delaying the 73 // zeroing this way: 74 // 75 // 1. stack frames allocated from the small object lists 76 // or the page heap can avoid zeroing altogether. 77 // 2. the cost of zeroing when reusing a small object is 78 // charged to the mutator, not the garbage collector. 79 80 package runtime 81 82 import ( 83 "runtime/internal/sys" 84 "unsafe" 85 ) 86 87 const ( 88 debugMalloc = false 89 90 maxTinySize = _TinySize 91 tinySizeClass = _TinySizeClass 92 maxSmallSize = _MaxSmallSize 93 94 pageShift = _PageShift 95 pageSize = _PageSize 96 pageMask = _PageMask 97 // By construction, single page spans of the smallest object class 98 // have the most objects per span. 99 maxObjsPerSpan = pageSize / 8 100 101 mSpanInUse = _MSpanInUse 102 103 concurrentSweep = _ConcurrentSweep 104 ) 105 106 const ( 107 _PageShift = 13 108 _PageSize = 1 << _PageShift 109 _PageMask = _PageSize - 1 110 ) 111 112 const ( 113 // _64bit = 1 on 64-bit systems, 0 on 32-bit systems 114 _64bit = 1 << (^uintptr(0) >> 63) / 2 115 116 // Computed constant. The definition of MaxSmallSize and the 117 // algorithm in msize.go produces some number of different allocation 118 // size classes. NumSizeClasses is that number. It's needed here 119 // because there are static arrays of this length; when msize runs its 120 // size choosing algorithm it double-checks that NumSizeClasses agrees. 121 _NumSizeClasses = 67 122 123 // Tunable constants. 124 _MaxSmallSize = 32 << 10 125 126 // Tiny allocator parameters, see "Tiny allocator" comment in malloc.go. 127 _TinySize = 16 128 _TinySizeClass = 2 129 130 _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc 131 _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. 132 _HeapAllocChunk = 1 << 20 // Chunk size for heap growth 133 134 // Per-P, per order stack segment cache size. 135 _StackCacheSize = 32 * 1024 136 137 // Number of orders that get caching. Order 0 is FixedStack 138 // and each successive order is twice as large. 139 // We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks 140 // will be allocated directly. 141 // Since FixedStack is different on different systems, we 142 // must vary NumStackOrders to keep the same maximum cached size. 143 // OS | FixedStack | NumStackOrders 144 // -----------------+------------+--------------- 145 // linux/darwin/bsd | 2KB | 4 146 // windows/32 | 4KB | 3 147 // windows/64 | 8KB | 2 148 // plan9 | 4KB | 3 149 _NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9 150 151 // Number of bits in page to span calculations (4k pages). 152 // On Windows 64-bit we limit the arena to 32GB or 35 bits. 153 // Windows counts memory used by page table into committed memory 154 // of the process, so we can't reserve too much memory. 155 // See https://golang.org/issue/5402 and https://golang.org/issue/5236. 156 // On other 64-bit platforms, we limit the arena to 512GB, or 39 bits. 157 // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. 158 // On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory, 159 // but as most devices have less than 4GB of physical memory anyway, we 160 // try to be conservative here, and only ask for a 2GB heap. 161 _MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*32 162 _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift 163 164 _MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1) 165 166 // Max number of threads to run garbage collection. 167 // 2, 3, and 4 are all plausible maximums depending 168 // on the hardware details of the machine. The garbage 169 // collector scales well to 32 cpus. 170 _MaxGcproc = 32 171 ) 172 173 const _MaxArena32 = 1<<32 - 1 174 175 // OS-defined helpers: 176 // 177 // sysAlloc obtains a large chunk of zeroed memory from the 178 // operating system, typically on the order of a hundred kilobytes 179 // or a megabyte. 180 // NOTE: sysAlloc returns OS-aligned memory, but the heap allocator 181 // may use larger alignment, so the caller must be careful to realign the 182 // memory obtained by sysAlloc. 183 // 184 // SysUnused notifies the operating system that the contents 185 // of the memory region are no longer needed and can be reused 186 // for other purposes. 187 // SysUsed notifies the operating system that the contents 188 // of the memory region are needed again. 189 // 190 // SysFree returns it unconditionally; this is only used if 191 // an out-of-memory error has been detected midway through 192 // an allocation. It is okay if SysFree is a no-op. 193 // 194 // SysReserve reserves address space without allocating memory. 195 // If the pointer passed to it is non-nil, the caller wants the 196 // reservation there, but SysReserve can still choose another 197 // location if that one is unavailable. On some systems and in some 198 // cases SysReserve will simply check that the address space is 199 // available and not actually reserve it. If SysReserve returns 200 // non-nil, it sets *reserved to true if the address space is 201 // reserved, false if it has merely been checked. 202 // NOTE: SysReserve returns OS-aligned memory, but the heap allocator 203 // may use larger alignment, so the caller must be careful to realign the 204 // memory obtained by sysAlloc. 205 // 206 // SysMap maps previously reserved address space for use. 207 // The reserved argument is true if the address space was really 208 // reserved, not merely checked. 209 // 210 // SysFault marks a (already sysAlloc'd) region to fault 211 // if accessed. Used only for debugging the runtime. 212 213 func mallocinit() { 214 initSizes() 215 216 if class_to_size[_TinySizeClass] != _TinySize { 217 throw("bad TinySizeClass") 218 } 219 220 var p, bitmapSize, spansSize, pSize, limit uintptr 221 var reserved bool 222 223 // limit = runtime.memlimit(); 224 // See https://golang.org/issue/5049 225 // TODO(rsc): Fix after 1.1. 226 limit = 0 227 228 // Set up the allocation arena, a contiguous area of memory where 229 // allocated data will be found. The arena begins with a bitmap large 230 // enough to hold 2 bits per allocated word. 231 if sys.PtrSize == 8 && (limit == 0 || limit > 1<<30) { 232 // On a 64-bit machine, allocate from a single contiguous reservation. 233 // 512 GB (MaxMem) should be big enough for now. 234 // 235 // The code will work with the reservation at any address, but ask 236 // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f). 237 // Allocating a 512 GB region takes away 39 bits, and the amd64 238 // doesn't let us choose the top 17 bits, so that leaves the 9 bits 239 // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means 240 // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df. 241 // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid 242 // UTF-8 sequences, and they are otherwise as far away from 243 // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0 244 // addresses. An earlier attempt to use 0x11f8 caused out of memory errors 245 // on OS X during thread allocations. 0x00c0 causes conflicts with 246 // AddressSanitizer which reserves all memory up to 0x0100. 247 // These choices are both for debuggability and to reduce the 248 // odds of a conservative garbage collector (as is still used in gccgo) 249 // not collecting memory because some non-pointer block of memory 250 // had a bit pattern that matched a memory address. 251 // 252 // Actually we reserve 544 GB (because the bitmap ends up being 32 GB) 253 // but it hardly matters: e0 00 is not valid UTF-8 either. 254 // 255 // If this fails we fall back to the 32 bit memory mechanism 256 // 257 // However, on arm64, we ignore all this advice above and slam the 258 // allocation at 0x40 << 32 because when using 4k pages with 3-level 259 // translation buffers, the user address space is limited to 39 bits 260 // On darwin/arm64, the address space is even smaller. 261 arenaSize := round(_MaxMem, _PageSize) 262 bitmapSize = arenaSize / (sys.PtrSize * 8 / 2) 263 spansSize = arenaSize / _PageSize * sys.PtrSize 264 spansSize = round(spansSize, _PageSize) 265 for i := 0; i <= 0x7f; i++ { 266 switch { 267 case GOARCH == "arm64" && GOOS == "darwin": 268 p = uintptr(i)<<40 | uintptrMask&(0x0013<<28) 269 case GOARCH == "arm64": 270 p = uintptr(i)<<40 | uintptrMask&(0x0040<<32) 271 default: 272 p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32) 273 } 274 pSize = bitmapSize + spansSize + arenaSize + _PageSize 275 p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) 276 if p != 0 { 277 break 278 } 279 } 280 } 281 282 if p == 0 { 283 // On a 32-bit machine, we can't typically get away 284 // with a giant virtual address space reservation. 285 // Instead we map the memory information bitmap 286 // immediately after the data segment, large enough 287 // to handle the entire 4GB address space (256 MB), 288 // along with a reservation for an initial arena. 289 // When that gets used up, we'll start asking the kernel 290 // for any memory anywhere. 291 292 // If we fail to allocate, try again with a smaller arena. 293 // This is necessary on Android L where we share a process 294 // with ART, which reserves virtual memory aggressively. 295 // In the worst case, fall back to a 0-sized initial arena, 296 // in the hope that subsequent reservations will succeed. 297 arenaSizes := []uintptr{ 298 512 << 20, 299 256 << 20, 300 128 << 20, 301 0, 302 } 303 304 for _, arenaSize := range arenaSizes { 305 bitmapSize = (_MaxArena32 + 1) / (sys.PtrSize * 8 / 2) 306 spansSize = (_MaxArena32 + 1) / _PageSize * sys.PtrSize 307 if limit > 0 && arenaSize+bitmapSize+spansSize > limit { 308 bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1) 309 arenaSize = bitmapSize * 8 310 spansSize = arenaSize / _PageSize * sys.PtrSize 311 } 312 spansSize = round(spansSize, _PageSize) 313 314 // SysReserve treats the address we ask for, end, as a hint, 315 // not as an absolute requirement. If we ask for the end 316 // of the data segment but the operating system requires 317 // a little more space before we can start allocating, it will 318 // give out a slightly higher pointer. Except QEMU, which 319 // is buggy, as usual: it won't adjust the pointer upward. 320 // So adjust it upward a little bit ourselves: 1/4 MB to get 321 // away from the running binary image and then round up 322 // to a MB boundary. 323 p = round(firstmoduledata.end+(1<<18), 1<<20) 324 pSize = bitmapSize + spansSize + arenaSize + _PageSize 325 p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) 326 if p != 0 { 327 break 328 } 329 } 330 if p == 0 { 331 throw("runtime: cannot reserve arena virtual address space") 332 } 333 } 334 335 // PageSize can be larger than OS definition of page size, 336 // so SysReserve can give us a PageSize-unaligned pointer. 337 // To overcome this we ask for PageSize more and round up the pointer. 338 p1 := round(p, _PageSize) 339 340 mheap_.spans = (**mspan)(unsafe.Pointer(p1)) 341 mheap_.bitmap = p1 + spansSize + bitmapSize 342 if sys.PtrSize == 4 { 343 // Set arena_start such that we can accept memory 344 // reservations located anywhere in the 4GB virtual space. 345 mheap_.arena_start = 0 346 } else { 347 mheap_.arena_start = p1 + (spansSize + bitmapSize) 348 } 349 mheap_.arena_end = p + pSize 350 mheap_.arena_used = p1 + (spansSize + bitmapSize) 351 mheap_.arena_reserved = reserved 352 353 if mheap_.arena_start&(_PageSize-1) != 0 { 354 println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start)) 355 throw("misrounded allocation in mallocinit") 356 } 357 358 // Initialize the rest of the allocator. 359 mheap_.init(spansSize) 360 _g_ := getg() 361 _g_.m.mcache = allocmcache() 362 } 363 364 // sysAlloc allocates the next n bytes from the heap arena. The 365 // returned pointer is always _PageSize aligned and between 366 // h.arena_start and h.arena_end. sysAlloc returns nil on failure. 367 // There is no corresponding free function. 368 func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer { 369 if n > h.arena_end-h.arena_used { 370 // We are in 32-bit mode, maybe we didn't use all possible address space yet. 371 // Reserve some more space. 372 p_size := round(n+_PageSize, 256<<20) 373 new_end := h.arena_end + p_size // Careful: can overflow 374 if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxArena32 { 375 // TODO: It would be bad if part of the arena 376 // is reserved and part is not. 377 var reserved bool 378 p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved)) 379 if p == 0 { 380 return nil 381 } 382 if p == h.arena_end { 383 h.arena_end = new_end 384 h.arena_reserved = reserved 385 } else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxArena32 { 386 // Keep everything page-aligned. 387 // Our pages are bigger than hardware pages. 388 h.arena_end = p + p_size 389 used := p + (-p & (_PageSize - 1)) 390 h.mapBits(used) 391 h.mapSpans(used) 392 h.arena_used = used 393 h.arena_reserved = reserved 394 } else { 395 // We haven't added this allocation to 396 // the stats, so subtract it from a 397 // fake stat (but avoid underflow). 398 stat := uint64(p_size) 399 sysFree(unsafe.Pointer(p), p_size, &stat) 400 } 401 } 402 } 403 404 if n <= h.arena_end-h.arena_used { 405 // Keep taking from our reservation. 406 p := h.arena_used 407 sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys) 408 h.mapBits(p + n) 409 h.mapSpans(p + n) 410 h.arena_used = p + n 411 if raceenabled { 412 racemapshadow(unsafe.Pointer(p), n) 413 } 414 415 if p&(_PageSize-1) != 0 { 416 throw("misrounded allocation in MHeap_SysAlloc") 417 } 418 return unsafe.Pointer(p) 419 } 420 421 // If using 64-bit, our reservation is all we have. 422 if h.arena_end-h.arena_start > _MaxArena32 { 423 return nil 424 } 425 426 // On 32-bit, once the reservation is gone we can 427 // try to get memory at a location chosen by the OS. 428 p_size := round(n, _PageSize) + _PageSize 429 p := uintptr(sysAlloc(p_size, &memstats.heap_sys)) 430 if p == 0 { 431 return nil 432 } 433 434 if p < h.arena_start || p+p_size-h.arena_start > _MaxArena32 { 435 top := ^uintptr(0) 436 if top-h.arena_start-1 > _MaxArena32 { 437 top = h.arena_start + _MaxArena32 + 1 438 } 439 print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n") 440 sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys) 441 return nil 442 } 443 444 p_end := p + p_size 445 p += -p & (_PageSize - 1) 446 if p+n > h.arena_used { 447 h.mapBits(p + n) 448 h.mapSpans(p + n) 449 h.arena_used = p + n 450 if p_end > h.arena_end { 451 h.arena_end = p_end 452 } 453 if raceenabled { 454 racemapshadow(unsafe.Pointer(p), n) 455 } 456 } 457 458 if p&(_PageSize-1) != 0 { 459 throw("misrounded allocation in MHeap_SysAlloc") 460 } 461 return unsafe.Pointer(p) 462 } 463 464 // base address for all 0-byte allocations 465 var zerobase uintptr 466 467 // nextFreeFast returns the next free object if one is quickly available. 468 // Otherwise it returns 0. 469 func nextFreeFast(s *mspan) gclinkptr { 470 theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache? 471 if theBit < 64 { 472 result := s.freeindex + uintptr(theBit) 473 if result < s.nelems { 474 freeidx := result + 1 475 if freeidx%64 == 0 && freeidx != s.nelems { 476 return 0 477 } 478 s.allocCache >>= (theBit + 1) 479 s.freeindex = freeidx 480 v := gclinkptr(result*s.elemsize + s.base()) 481 s.allocCount++ 482 return v 483 } 484 } 485 return 0 486 } 487 488 // nextFree returns the next free object from the cached span if one is available. 489 // Otherwise it refills the cache with a span with an available object and 490 // returns that object along with a flag indicating that this was a heavy 491 // weight allocation. If it is a heavy weight allocation the caller must 492 // determine whether a new GC cycle needs to be started or if the GC is active 493 // whether this goroutine needs to assist the GC. 494 func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, s *mspan, shouldhelpgc bool) { 495 s = c.alloc[sizeclass] 496 shouldhelpgc = false 497 freeIndex := s.nextFreeIndex() 498 if freeIndex == s.nelems { 499 // The span is full. 500 if uintptr(s.allocCount) != s.nelems { 501 println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems) 502 throw("s.allocCount != s.nelems && freeIndex == s.nelems") 503 } 504 systemstack(func() { 505 c.refill(int32(sizeclass)) 506 }) 507 shouldhelpgc = true 508 s = c.alloc[sizeclass] 509 510 freeIndex = s.nextFreeIndex() 511 } 512 513 if freeIndex >= s.nelems { 514 throw("freeIndex is not valid") 515 } 516 517 v = gclinkptr(freeIndex*s.elemsize + s.base()) 518 s.allocCount++ 519 if uintptr(s.allocCount) > s.nelems { 520 println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems) 521 throw("s.allocCount > s.nelems") 522 } 523 return 524 } 525 526 // Allocate an object of size bytes. 527 // Small objects are allocated from the per-P cache's free lists. 528 // Large objects (> 32 kB) are allocated straight from the heap. 529 func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { 530 if gcphase == _GCmarktermination { 531 throw("mallocgc called with gcphase == _GCmarktermination") 532 } 533 534 if size == 0 { 535 return unsafe.Pointer(&zerobase) 536 } 537 538 if debug.sbrk != 0 { 539 align := uintptr(16) 540 if typ != nil { 541 align = uintptr(typ.align) 542 } 543 return persistentalloc(size, align, &memstats.other_sys) 544 } 545 546 // assistG is the G to charge for this allocation, or nil if 547 // GC is not currently active. 548 var assistG *g 549 if gcBlackenEnabled != 0 { 550 // Charge the current user G for this allocation. 551 assistG = getg() 552 if assistG.m.curg != nil { 553 assistG = assistG.m.curg 554 } 555 // Charge the allocation against the G. We'll account 556 // for internal fragmentation at the end of mallocgc. 557 assistG.gcAssistBytes -= int64(size) 558 559 if assistG.gcAssistBytes < 0 { 560 // This G is in debt. Assist the GC to correct 561 // this before allocating. This must happen 562 // before disabling preemption. 563 gcAssistAlloc(assistG) 564 } 565 } 566 567 // Set mp.mallocing to keep from being preempted by GC. 568 mp := acquirem() 569 if mp.mallocing != 0 { 570 throw("malloc deadlock") 571 } 572 if mp.gsignal == getg() { 573 throw("malloc during signal") 574 } 575 mp.mallocing = 1 576 577 shouldhelpgc := false 578 dataSize := size 579 c := gomcache() 580 var x unsafe.Pointer 581 noscan := typ == nil || typ.kind&kindNoPointers != 0 582 if size <= maxSmallSize { 583 if noscan && size < maxTinySize { 584 // Tiny allocator. 585 // 586 // Tiny allocator combines several tiny allocation requests 587 // into a single memory block. The resulting memory block 588 // is freed when all subobjects are unreachable. The subobjects 589 // must be noscan (don't have pointers), this ensures that 590 // the amount of potentially wasted memory is bounded. 591 // 592 // Size of the memory block used for combining (maxTinySize) is tunable. 593 // Current setting is 16 bytes, which relates to 2x worst case memory 594 // wastage (when all but one subobjects are unreachable). 595 // 8 bytes would result in no wastage at all, but provides less 596 // opportunities for combining. 597 // 32 bytes provides more opportunities for combining, 598 // but can lead to 4x worst case wastage. 599 // The best case winning is 8x regardless of block size. 600 // 601 // Objects obtained from tiny allocator must not be freed explicitly. 602 // So when an object will be freed explicitly, we ensure that 603 // its size >= maxTinySize. 604 // 605 // SetFinalizer has a special case for objects potentially coming 606 // from tiny allocator, it such case it allows to set finalizers 607 // for an inner byte of a memory block. 608 // 609 // The main targets of tiny allocator are small strings and 610 // standalone escaping variables. On a json benchmark 611 // the allocator reduces number of allocations by ~12% and 612 // reduces heap size by ~20%. 613 off := c.tinyoffset 614 // Align tiny pointer for required (conservative) alignment. 615 if size&7 == 0 { 616 off = round(off, 8) 617 } else if size&3 == 0 { 618 off = round(off, 4) 619 } else if size&1 == 0 { 620 off = round(off, 2) 621 } 622 if off+size <= maxTinySize && c.tiny != 0 { 623 // The object fits into existing tiny block. 624 x = unsafe.Pointer(c.tiny + off) 625 c.tinyoffset = off + size 626 c.local_tinyallocs++ 627 mp.mallocing = 0 628 releasem(mp) 629 return x 630 } 631 // Allocate a new maxTinySize block. 632 span := c.alloc[tinySizeClass] 633 v := nextFreeFast(span) 634 if v == 0 { 635 v, _, shouldhelpgc = c.nextFree(tinySizeClass) 636 } 637 x = unsafe.Pointer(v) 638 (*[2]uint64)(x)[0] = 0 639 (*[2]uint64)(x)[1] = 0 640 // See if we need to replace the existing tiny block with the new one 641 // based on amount of remaining free space. 642 if size < c.tinyoffset || c.tiny == 0 { 643 c.tiny = uintptr(x) 644 c.tinyoffset = size 645 } 646 size = maxTinySize 647 } else { 648 var sizeclass int8 649 if size <= 1024-8 { 650 sizeclass = size_to_class8[(size+7)>>3] 651 } else { 652 sizeclass = size_to_class128[(size-1024+127)>>7] 653 } 654 size = uintptr(class_to_size[sizeclass]) 655 span := c.alloc[sizeclass] 656 v := nextFreeFast(span) 657 if v == 0 { 658 v, span, shouldhelpgc = c.nextFree(sizeclass) 659 } 660 x = unsafe.Pointer(v) 661 if needzero && span.needzero != 0 { 662 memclr(unsafe.Pointer(v), size) 663 } 664 } 665 } else { 666 var s *mspan 667 shouldhelpgc = true 668 systemstack(func() { 669 s = largeAlloc(size, needzero) 670 }) 671 s.freeindex = 1 672 s.allocCount = 1 673 x = unsafe.Pointer(s.base()) 674 size = s.elemsize 675 } 676 677 var scanSize uintptr 678 if noscan { 679 heapBitsSetTypeNoScan(uintptr(x)) 680 } else { 681 // If allocating a defer+arg block, now that we've picked a malloc size 682 // large enough to hold everything, cut the "asked for" size down to 683 // just the defer header, so that the GC bitmap will record the arg block 684 // as containing nothing at all (as if it were unused space at the end of 685 // a malloc block caused by size rounding). 686 // The defer arg areas are scanned as part of scanstack. 687 if typ == deferType { 688 dataSize = unsafe.Sizeof(_defer{}) 689 } 690 heapBitsSetType(uintptr(x), size, dataSize, typ) 691 if dataSize > typ.size { 692 // Array allocation. If there are any 693 // pointers, GC has to scan to the last 694 // element. 695 if typ.ptrdata != 0 { 696 scanSize = dataSize - typ.size + typ.ptrdata 697 } 698 } else { 699 scanSize = typ.ptrdata 700 } 701 c.local_scan += scanSize 702 } 703 704 // Ensure that the stores above that initialize x to 705 // type-safe memory and set the heap bits occur before 706 // the caller can make x observable to the garbage 707 // collector. Otherwise, on weakly ordered machines, 708 // the garbage collector could follow a pointer to x, 709 // but see uninitialized memory or stale heap bits. 710 publicationBarrier() 711 712 // Allocate black during GC. 713 // All slots hold nil so no scanning is needed. 714 // This may be racing with GC so do it atomically if there can be 715 // a race marking the bit. 716 if gcphase != _GCoff { 717 gcmarknewobject(uintptr(x), size, scanSize) 718 } 719 720 if raceenabled { 721 racemalloc(x, size) 722 } 723 724 if msanenabled { 725 msanmalloc(x, size) 726 } 727 728 mp.mallocing = 0 729 releasem(mp) 730 731 if debug.allocfreetrace != 0 { 732 tracealloc(x, size, typ) 733 } 734 735 if rate := MemProfileRate; rate > 0 { 736 if size < uintptr(rate) && int32(size) < c.next_sample { 737 c.next_sample -= int32(size) 738 } else { 739 mp := acquirem() 740 profilealloc(mp, x, size) 741 releasem(mp) 742 } 743 } 744 745 if assistG != nil { 746 // Account for internal fragmentation in the assist 747 // debt now that we know it. 748 assistG.gcAssistBytes -= int64(size - dataSize) 749 } 750 751 if shouldhelpgc && gcShouldStart(false) { 752 gcStart(gcBackgroundMode, false) 753 } 754 755 return x 756 } 757 758 func largeAlloc(size uintptr, needzero bool) *mspan { 759 // print("largeAlloc size=", size, "\n") 760 761 if size+_PageSize < size { 762 throw("out of memory") 763 } 764 npages := size >> _PageShift 765 if size&_PageMask != 0 { 766 npages++ 767 } 768 769 // Deduct credit for this span allocation and sweep if 770 // necessary. mHeap_Alloc will also sweep npages, so this only 771 // pays the debt down to npage pages. 772 deductSweepCredit(npages*_PageSize, npages) 773 774 s := mheap_.alloc(npages, 0, true, needzero) 775 if s == nil { 776 throw("out of memory") 777 } 778 s.limit = s.base() + size 779 heapBitsForSpan(s.base()).initSpan(s) 780 return s 781 } 782 783 // implementation of new builtin 784 func newobject(typ *_type) unsafe.Pointer { 785 return mallocgc(typ.size, typ, true) 786 } 787 788 //go:linkname reflect_unsafe_New reflect.unsafe_New 789 func reflect_unsafe_New(typ *_type) unsafe.Pointer { 790 return newobject(typ) 791 } 792 793 // newarray allocates an array of n elements of type typ. 794 func newarray(typ *_type, n int) unsafe.Pointer { 795 if n < 0 || uintptr(n) > maxSliceCap(typ.size) { 796 panic(plainError("runtime: allocation size out of range")) 797 } 798 return mallocgc(typ.size*uintptr(n), typ, true) 799 } 800 801 //go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray 802 func reflect_unsafe_NewArray(typ *_type, n int) unsafe.Pointer { 803 return newarray(typ, n) 804 } 805 806 func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { 807 mp.mcache.next_sample = nextSample() 808 mProf_Malloc(x, size) 809 } 810 811 // nextSample returns the next sampling point for heap profiling. 812 // It produces a random variable with a geometric distribution and 813 // mean MemProfileRate. This is done by generating a uniformly 814 // distributed random number and applying the cumulative distribution 815 // function for an exponential. 816 func nextSample() int32 { 817 if GOOS == "plan9" { 818 // Plan 9 doesn't support floating point in note handler. 819 if g := getg(); g == g.m.gsignal { 820 return nextSampleNoFP() 821 } 822 } 823 824 period := MemProfileRate 825 826 // make nextSample not overflow. Maximum possible step is 827 // -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period. 828 switch { 829 case period > 0x7000000: 830 period = 0x7000000 831 case period == 0: 832 return 0 833 } 834 835 // Let m be the sample rate, 836 // the probability distribution function is m*exp(-mx), so the CDF is 837 // p = 1 - exp(-mx), so 838 // q = 1 - p == exp(-mx) 839 // log_e(q) = -mx 840 // -log_e(q)/m = x 841 // x = -log_e(q) * period 842 // x = log_2(q) * (-log_e(2)) * period ; Using log_2 for efficiency 843 const randomBitCount = 26 844 q := fastrand1()%(1<<randomBitCount) + 1 845 qlog := fastlog2(float64(q)) - randomBitCount 846 if qlog > 0 { 847 qlog = 0 848 } 849 const minusLog2 = -0.6931471805599453 // -ln(2) 850 return int32(qlog*(minusLog2*float64(period))) + 1 851 } 852 853 // nextSampleNoFP is similar to nextSample, but uses older, 854 // simpler code to avoid floating point. 855 func nextSampleNoFP() int32 { 856 // Set first allocation sample size. 857 rate := MemProfileRate 858 if rate > 0x3fffffff { // make 2*rate not overflow 859 rate = 0x3fffffff 860 } 861 if rate != 0 { 862 return int32(int(fastrand1()) % (2 * rate)) 863 } 864 return 0 865 } 866 867 type persistentAlloc struct { 868 base unsafe.Pointer 869 off uintptr 870 } 871 872 var globalAlloc struct { 873 mutex 874 persistentAlloc 875 } 876 877 // Wrapper around sysAlloc that can allocate small chunks. 878 // There is no associated free operation. 879 // Intended for things like function/type/debug-related persistent data. 880 // If align is 0, uses default align (currently 8). 881 func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { 882 var p unsafe.Pointer 883 systemstack(func() { 884 p = persistentalloc1(size, align, sysStat) 885 }) 886 return p 887 } 888 889 // Must run on system stack because stack growth can (re)invoke it. 890 // See issue 9174. 891 //go:systemstack 892 func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { 893 const ( 894 chunk = 256 << 10 895 maxBlock = 64 << 10 // VM reservation granularity is 64K on windows 896 ) 897 898 if size == 0 { 899 throw("persistentalloc: size == 0") 900 } 901 if align != 0 { 902 if align&(align-1) != 0 { 903 throw("persistentalloc: align is not a power of 2") 904 } 905 if align > _PageSize { 906 throw("persistentalloc: align is too large") 907 } 908 } else { 909 align = 8 910 } 911 912 if size >= maxBlock { 913 return sysAlloc(size, sysStat) 914 } 915 916 mp := acquirem() 917 var persistent *persistentAlloc 918 if mp != nil && mp.p != 0 { 919 persistent = &mp.p.ptr().palloc 920 } else { 921 lock(&globalAlloc.mutex) 922 persistent = &globalAlloc.persistentAlloc 923 } 924 persistent.off = round(persistent.off, align) 925 if persistent.off+size > chunk || persistent.base == nil { 926 persistent.base = sysAlloc(chunk, &memstats.other_sys) 927 if persistent.base == nil { 928 if persistent == &globalAlloc.persistentAlloc { 929 unlock(&globalAlloc.mutex) 930 } 931 throw("runtime: cannot allocate memory") 932 } 933 persistent.off = 0 934 } 935 p := add(persistent.base, persistent.off) 936 persistent.off += size 937 releasem(mp) 938 if persistent == &globalAlloc.persistentAlloc { 939 unlock(&globalAlloc.mutex) 940 } 941 942 if sysStat != &memstats.other_sys { 943 mSysStatInc(sysStat, size) 944 mSysStatDec(&memstats.other_sys, size) 945 } 946 return p 947 }