github.com/emc-advanced-dev/unik@v0.0.0-20190717152701-a58d3e8e33b7/containers/compilers/rump/go/gopatches/runtime/malloc.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Memory allocator, based on tcmalloc. 6 // http://goog-perftools.sourceforge.net/doc/tcmalloc.html 7 8 // The main allocator works in runs of pages. 9 // Small allocation sizes (up to and including 32 kB) are 10 // rounded to one of about 100 size classes, each of which 11 // has its own free list of objects of exactly that size. 12 // Any free page of memory can be split into a set of objects 13 // of one size class, which are then managed using free list 14 // allocators. 15 // 16 // The allocator's data structures are: 17 // 18 // FixAlloc: a free-list allocator for fixed-size objects, 19 // used to manage storage used by the allocator. 20 // MHeap: the malloc heap, managed at page (4096-byte) granularity. 21 // MSpan: a run of pages managed by the MHeap. 22 // MCentral: a shared free list for a given size class. 23 // MCache: a per-thread (in Go, per-P) cache for small objects. 24 // MStats: allocation statistics. 25 // 26 // Allocating a small object proceeds up a hierarchy of caches: 27 // 28 // 1. Round the size up to one of the small size classes 29 // and look in the corresponding MCache free list. 30 // If the list is not empty, allocate an object from it. 31 // This can all be done without acquiring a lock. 32 // 33 // 2. If the MCache free list is empty, replenish it by 34 // taking a bunch of objects from the MCentral free list. 35 // Moving a bunch amortizes the cost of acquiring the MCentral lock. 36 // 37 // 3. If the MCentral free list is empty, replenish it by 38 // allocating a run of pages from the MHeap and then 39 // chopping that memory into objects of the given size. 40 // Allocating many objects amortizes the cost of locking 41 // the heap. 42 // 43 // 4. If the MHeap is empty or has no page runs large enough, 44 // allocate a new group of pages (at least 1MB) from the 45 // operating system. Allocating a large run of pages 46 // amortizes the cost of talking to the operating system. 47 // 48 // Freeing a small object proceeds up the same hierarchy: 49 // 50 // 1. Look up the size class for the object and add it to 51 // the MCache free list. 52 // 53 // 2. If the MCache free list is too long or the MCache has 54 // too much memory, return some to the MCentral free lists. 55 // 56 // 3. If all the objects in a given span have returned to 57 // the MCentral list, return that span to the page heap. 58 // 59 // 4. If the heap has too much memory, return some to the 60 // operating system. 61 // 62 // TODO(rsc): Step 4 is not implemented. 63 // 64 // Allocating and freeing a large object uses the page heap 65 // directly, bypassing the MCache and MCentral free lists. 66 // 67 // The small objects on the MCache and MCentral free lists 68 // may or may not be zeroed. They are zeroed if and only if 69 // the second word of the object is zero. A span in the 70 // page heap is zeroed unless s->needzero is set. When a span 71 // is allocated to break into small objects, it is zeroed if needed 72 // and s->needzero is set. There are two main benefits to delaying the 73 // zeroing this way: 74 // 75 // 1. stack frames allocated from the small object lists 76 // or the page heap can avoid zeroing altogether. 77 // 2. the cost of zeroing when reusing a small object is 78 // charged to the mutator, not the garbage collector. 79 // 80 // This code was written with an eye toward translating to Go 81 // in the future. Methods have the form Type_Method(Type *t, ...). 82 83 package runtime 84 85 import "unsafe" 86 87 const ( 88 debugMalloc = false 89 90 flagNoScan = _FlagNoScan 91 flagNoZero = _FlagNoZero 92 93 maxTinySize = _TinySize 94 tinySizeClass = _TinySizeClass 95 maxSmallSize = _MaxSmallSize 96 97 pageShift = _PageShift 98 pageSize = _PageSize 99 pageMask = _PageMask 100 101 mSpanInUse = _MSpanInUse 102 103 concurrentSweep = _ConcurrentSweep 104 ) 105 106 const ( 107 _PageShift = 13 108 _PageSize = 1 << _PageShift 109 _PageMask = _PageSize - 1 110 ) 111 112 const ( 113 // _64bit = 1 on 64-bit systems, 0 on 32-bit systems 114 _64bit = 1 << (^uintptr(0) >> 63) / 2 115 116 // Computed constant. The definition of MaxSmallSize and the 117 // algorithm in msize.go produces some number of different allocation 118 // size classes. NumSizeClasses is that number. It's needed here 119 // because there are static arrays of this length; when msize runs its 120 // size choosing algorithm it double-checks that NumSizeClasses agrees. 121 _NumSizeClasses = 67 122 123 // Tunable constants. 124 _MaxSmallSize = 32 << 10 125 126 // Tiny allocator parameters, see "Tiny allocator" comment in malloc.go. 127 _TinySize = 16 128 _TinySizeClass = 2 129 130 _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc 131 _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. 132 _HeapAllocChunk = 1 << 20 // Chunk size for heap growth 133 134 // Per-P, per order stack segment cache size. 135 _StackCacheSize = 32 * 1024 136 137 // Number of orders that get caching. Order 0 is FixedStack 138 // and each successive order is twice as large. 139 // We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks 140 // will be allocated directly. 141 // Since FixedStack is different on different systems, we 142 // must vary NumStackOrders to keep the same maximum cached size. 143 // OS | FixedStack | NumStackOrders 144 // -----------------+------------+--------------- 145 // linux/darwin/bsd | 2KB | 4 146 // windows/32 | 4KB | 3 147 // windows/64 | 8KB | 2 148 // plan9 | 4KB | 3 149 _NumStackOrders = 4 - ptrSize/4*goos_windows - 1*goos_plan9 150 151 // Number of bits in page to span calculations (4k pages). 152 // On Windows 64-bit we limit the arena to 32GB or 35 bits. 153 // Windows counts memory used by page table into committed memory 154 // of the process, so we can't reserve too much memory. 155 // See https://golang.org/issue/5402 and https://golang.org/issue/5236. 156 // On other 64-bit platforms, we limit the arena to 512GB, or 39 bits. 157 // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. 158 // On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory, 159 // but as most devices have less than 4GB of physical memory anyway, we 160 // try to be conservative here, and only ask for a 2GB heap. 161 162 // XXX: use goos_netbsd really as good_rumprun 163 _MHeapMap_TotalBits = (_64bit*goos_windows)*35 + (1-goos_rumprun)*(_64bit*(1-goos_windows)*(1-goos_darwin*goarch_arm64))*39 + goos_darwin*goarch_arm64*31 + (1-_64bit)*32*(1-goos_rumprun) + 25*goos_rumprun 164 165 _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift 166 167 _MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1) 168 169 // Max number of threads to run garbage collection. 170 // 2, 3, and 4 are all plausible maximums depending 171 // on the hardware details of the machine. The garbage 172 // collector scales well to 32 cpus. 173 _MaxGcproc = 32 174 ) 175 176 // Page number (address>>pageShift) 177 type pageID uintptr 178 179 const _MaxArena32 = 2 << 30 180 181 // OS-defined helpers: 182 // 183 // sysAlloc obtains a large chunk of zeroed memory from the 184 // operating system, typically on the order of a hundred kilobytes 185 // or a megabyte. 186 // NOTE: sysAlloc returns OS-aligned memory, but the heap allocator 187 // may use larger alignment, so the caller must be careful to realign the 188 // memory obtained by sysAlloc. 189 // 190 // SysUnused notifies the operating system that the contents 191 // of the memory region are no longer needed and can be reused 192 // for other purposes. 193 // SysUsed notifies the operating system that the contents 194 // of the memory region are needed again. 195 // 196 // SysFree returns it unconditionally; this is only used if 197 // an out-of-memory error has been detected midway through 198 // an allocation. It is okay if SysFree is a no-op. 199 // 200 // SysReserve reserves address space without allocating memory. 201 // If the pointer passed to it is non-nil, the caller wants the 202 // reservation there, but SysReserve can still choose another 203 // location if that one is unavailable. On some systems and in some 204 // cases SysReserve will simply check that the address space is 205 // available and not actually reserve it. If SysReserve returns 206 // non-nil, it sets *reserved to true if the address space is 207 // reserved, false if it has merely been checked. 208 // NOTE: SysReserve returns OS-aligned memory, but the heap allocator 209 // may use larger alignment, so the caller must be careful to realign the 210 // memory obtained by sysAlloc. 211 // 212 // SysMap maps previously reserved address space for use. 213 // The reserved argument is true if the address space was really 214 // reserved, not merely checked. 215 // 216 // SysFault marks a (already sysAlloc'd) region to fault 217 // if accessed. Used only for debugging the runtime. 218 219 func mallocinit() { 220 initSizes() 221 222 if class_to_size[_TinySizeClass] != _TinySize { 223 throw("bad TinySizeClass") 224 } 225 226 var p, bitmapSize, spansSize, pSize, limit uintptr 227 var reserved bool 228 229 // limit = runtime.memlimit(); 230 // See https://golang.org/issue/5049 231 // TODO(rsc): Fix after 1.1. 232 limit = 0 233 234 // Set up the allocation arena, a contiguous area of memory where 235 // allocated data will be found. The arena begins with a bitmap large 236 // enough to hold 4 bits per allocated word. 237 if ptrSize == 8 && (limit == 0 || limit > 1<<30) { 238 // On a 64-bit machine, allocate from a single contiguous reservation. 239 // 512 GB (MaxMem) should be big enough for now. 240 // 241 // The code will work with the reservation at any address, but ask 242 // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f). 243 // Allocating a 512 GB region takes away 39 bits, and the amd64 244 // doesn't let us choose the top 17 bits, so that leaves the 9 bits 245 // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means 246 // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df. 247 // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid 248 // UTF-8 sequences, and they are otherwise as far away from 249 // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0 250 // addresses. An earlier attempt to use 0x11f8 caused out of memory errors 251 // on OS X during thread allocations. 0x00c0 causes conflicts with 252 // AddressSanitizer which reserves all memory up to 0x0100. 253 // These choices are both for debuggability and to reduce the 254 // odds of a conservative garbage collector (as is still used in gccgo) 255 // not collecting memory because some non-pointer block of memory 256 // had a bit pattern that matched a memory address. 257 // 258 // Actually we reserve 544 GB (because the bitmap ends up being 32 GB) 259 // but it hardly matters: e0 00 is not valid UTF-8 either. 260 // 261 // If this fails we fall back to the 32 bit memory mechanism 262 // 263 // However, on arm64, we ignore all this advice above and slam the 264 // allocation at 0x40 << 32 because when using 4k pages with 3-level 265 // translation buffers, the user address space is limited to 39 bits 266 // On darwin/arm64, the address space is even smaller. 267 arenaSize := round(_MaxMem, _PageSize) 268 bitmapSize = arenaSize / (ptrSize * 8 / 4) 269 spansSize = arenaSize / _PageSize * ptrSize 270 spansSize = round(spansSize, _PageSize) 271 for i := 0; i <= 0x7f; i++ { 272 switch { 273 case GOARCH == "arm64" && GOOS == "darwin": 274 p = uintptr(i)<<40 | uintptrMask&(0x0013<<28) 275 case GOARCH == "arm64": 276 p = uintptr(i)<<40 | uintptrMask&(0x0040<<32) 277 default: 278 p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32) 279 } 280 pSize = bitmapSize + spansSize + arenaSize + _PageSize 281 p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) 282 if p != 0 { 283 break 284 } 285 } 286 } 287 288 if p == 0 { 289 // On a 32-bit machine, we can't typically get away 290 // with a giant virtual address space reservation. 291 // Instead we map the memory information bitmap 292 // immediately after the data segment, large enough 293 // to handle another 2GB of mappings (256 MB), 294 // along with a reservation for an initial arena. 295 // When that gets used up, we'll start asking the kernel 296 // for any memory anywhere and hope it's in the 2GB 297 // following the bitmap (presumably the executable begins 298 // near the bottom of memory, so we'll have to use up 299 // most of memory before the kernel resorts to giving out 300 // memory before the beginning of the text segment). 301 // 302 // Alternatively we could reserve 512 MB bitmap, enough 303 // for 4GB of mappings, and then accept any memory the 304 // kernel threw at us, but normally that's a waste of 512 MB 305 // of address space, which is probably too much in a 32-bit world. 306 307 // If we fail to allocate, try again with a smaller arena. 308 // This is necessary on Android L where we share a process 309 // with ART, which reserves virtual memory aggressively. 310 arenaSizes := []uintptr{ 311 512 << 20, 312 256 << 20, 313 128 << 20, 314 } 315 316 for _, arenaSize := range arenaSizes { 317 bitmapSize = _MaxArena32 / (ptrSize * 8 / 4) 318 spansSize = _MaxArena32 / _PageSize * ptrSize 319 if limit > 0 && arenaSize+bitmapSize+spansSize > limit { 320 bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1) 321 arenaSize = bitmapSize * 8 322 spansSize = arenaSize / _PageSize * ptrSize 323 } 324 spansSize = round(spansSize, _PageSize) 325 326 // SysReserve treats the address we ask for, end, as a hint, 327 // not as an absolute requirement. If we ask for the end 328 // of the data segment but the operating system requires 329 // a little more space before we can start allocating, it will 330 // give out a slightly higher pointer. Except QEMU, which 331 // is buggy, as usual: it won't adjust the pointer upward. 332 // So adjust it upward a little bit ourselves: 1/4 MB to get 333 // away from the running binary image and then round up 334 // to a MB boundary. 335 p = round(firstmoduledata.end+(1<<18), 1<<20) 336 pSize = bitmapSize + spansSize + arenaSize + _PageSize 337 p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) 338 if p != 0 { 339 break 340 } 341 } 342 if p == 0 { 343 throw("runtime: cannot reserve arena virtual address space") 344 } 345 } 346 347 // PageSize can be larger than OS definition of page size, 348 // so SysReserve can give us a PageSize-unaligned pointer. 349 // To overcome this we ask for PageSize more and round up the pointer. 350 p1 := round(p, _PageSize) 351 352 mheap_.spans = (**mspan)(unsafe.Pointer(p1)) 353 mheap_.bitmap = p1 + spansSize 354 mheap_.arena_start = p1 + (spansSize + bitmapSize) 355 mheap_.arena_used = mheap_.arena_start 356 mheap_.arena_end = p + pSize 357 mheap_.arena_reserved = reserved 358 359 if mheap_.arena_start&(_PageSize-1) != 0 { 360 println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start)) 361 throw("misrounded allocation in mallocinit") 362 } 363 364 // Initialize the rest of the allocator. 365 mHeap_Init(&mheap_, spansSize) 366 _g_ := getg() 367 _g_.m.mcache = allocmcache() 368 } 369 370 // sysReserveHigh reserves space somewhere high in the address space. 371 // sysReserve doesn't actually reserve the full amount requested on 372 // 64-bit systems, because of problems with ulimit. Instead it checks 373 // that it can get the first 64 kB and assumes it can grab the rest as 374 // needed. This doesn't work well with the "let the kernel pick an address" 375 // mode, so don't do that. Pick a high address instead. 376 func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer { 377 if ptrSize == 4 { 378 return sysReserve(nil, n, reserved) 379 } 380 381 for i := 0; i <= 0x7f; i++ { 382 p := uintptr(i)<<40 | uintptrMask&(0x00c0<<32) 383 *reserved = false 384 p = uintptr(sysReserve(unsafe.Pointer(p), n, reserved)) 385 if p != 0 { 386 return unsafe.Pointer(p) 387 } 388 } 389 390 return sysReserve(nil, n, reserved) 391 } 392 393 func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer { 394 if n > uintptr(h.arena_end)-uintptr(h.arena_used) { 395 // We are in 32-bit mode, maybe we didn't use all possible address space yet. 396 // Reserve some more space. 397 p_size := round(n+_PageSize, 256<<20) 398 new_end := h.arena_end + p_size // Careful: can overflow 399 if h.arena_end <= new_end && new_end <= h.arena_start+_MaxArena32 { 400 // TODO: It would be bad if part of the arena 401 // is reserved and part is not. 402 var reserved bool 403 p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved)) 404 if p == h.arena_end { 405 h.arena_end = new_end 406 h.arena_reserved = reserved 407 } else if h.arena_start <= p && p+p_size <= h.arena_start+_MaxArena32 { 408 // Keep everything page-aligned. 409 // Our pages are bigger than hardware pages. 410 h.arena_end = p + p_size 411 used := p + (-uintptr(p) & (_PageSize - 1)) 412 mHeap_MapBits(h, used) 413 mHeap_MapSpans(h, used) 414 h.arena_used = used 415 h.arena_reserved = reserved 416 } else { 417 stat := uint64(p_size) 418 sysFree((unsafe.Pointer)(p), p_size, &stat) 419 } 420 } 421 } 422 423 if n <= uintptr(h.arena_end)-uintptr(h.arena_used) { 424 // Keep taking from our reservation. 425 p := h.arena_used 426 sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys) 427 mHeap_MapBits(h, p+n) 428 mHeap_MapSpans(h, p+n) 429 h.arena_used = p + n 430 if raceenabled { 431 racemapshadow((unsafe.Pointer)(p), n) 432 } 433 434 if uintptr(p)&(_PageSize-1) != 0 { 435 throw("misrounded allocation in MHeap_SysAlloc") 436 } 437 return (unsafe.Pointer)(p) 438 } 439 440 // If using 64-bit, our reservation is all we have. 441 if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 { 442 return nil 443 } 444 445 // On 32-bit, once the reservation is gone we can 446 // try to get memory at a location chosen by the OS 447 // and hope that it is in the range we allocated bitmap for. 448 p_size := round(n, _PageSize) + _PageSize 449 p := uintptr(sysAlloc(p_size, &memstats.heap_sys)) 450 if p == 0 { 451 return nil 452 } 453 454 if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 { 455 print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n") 456 sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys) 457 return nil 458 } 459 460 p_end := p + p_size 461 p += -p & (_PageSize - 1) 462 if uintptr(p)+n > uintptr(h.arena_used) { 463 mHeap_MapBits(h, p+n) 464 mHeap_MapSpans(h, p+n) 465 h.arena_used = p + n 466 if p_end > h.arena_end { 467 h.arena_end = p_end 468 } 469 if raceenabled { 470 racemapshadow((unsafe.Pointer)(p), n) 471 } 472 } 473 474 if uintptr(p)&(_PageSize-1) != 0 { 475 throw("misrounded allocation in MHeap_SysAlloc") 476 } 477 return (unsafe.Pointer)(p) 478 } 479 480 // base address for all 0-byte allocations 481 var zerobase uintptr 482 483 const ( 484 // flags to malloc 485 _FlagNoScan = 1 << 0 // GC doesn't have to scan object 486 _FlagNoZero = 1 << 1 // don't zero memory 487 ) 488 489 // Allocate an object of size bytes. 490 // Small objects are allocated from the per-P cache's free lists. 491 // Large objects (> 32 kB) are allocated straight from the heap. 492 func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { 493 if gcphase == _GCmarktermination { 494 throw("mallocgc called with gcphase == _GCmarktermination") 495 } 496 497 if size == 0 { 498 return unsafe.Pointer(&zerobase) 499 } 500 501 if flags&flagNoScan == 0 && typ == nil { 502 throw("malloc missing type") 503 } 504 505 if debug.sbrk != 0 { 506 align := uintptr(16) 507 if typ != nil { 508 align = uintptr(typ.align) 509 } 510 return persistentalloc(size, align, &memstats.other_sys) 511 } 512 513 // Set mp.mallocing to keep from being preempted by GC. 514 mp := acquirem() 515 if mp.mallocing != 0 { 516 throw("malloc deadlock") 517 } 518 if mp.gsignal == getg() { 519 throw("malloc during signal") 520 } 521 mp.mallocing = 1 522 523 shouldhelpgc := false 524 dataSize := size 525 c := gomcache() 526 var s *mspan 527 var x unsafe.Pointer 528 if size <= maxSmallSize { 529 if flags&flagNoScan != 0 && size < maxTinySize { 530 // Tiny allocator. 531 // 532 // Tiny allocator combines several tiny allocation requests 533 // into a single memory block. The resulting memory block 534 // is freed when all subobjects are unreachable. The subobjects 535 // must be FlagNoScan (don't have pointers), this ensures that 536 // the amount of potentially wasted memory is bounded. 537 // 538 // Size of the memory block used for combining (maxTinySize) is tunable. 539 // Current setting is 16 bytes, which relates to 2x worst case memory 540 // wastage (when all but one subobjects are unreachable). 541 // 8 bytes would result in no wastage at all, but provides less 542 // opportunities for combining. 543 // 32 bytes provides more opportunities for combining, 544 // but can lead to 4x worst case wastage. 545 // The best case winning is 8x regardless of block size. 546 // 547 // Objects obtained from tiny allocator must not be freed explicitly. 548 // So when an object will be freed explicitly, we ensure that 549 // its size >= maxTinySize. 550 // 551 // SetFinalizer has a special case for objects potentially coming 552 // from tiny allocator, it such case it allows to set finalizers 553 // for an inner byte of a memory block. 554 // 555 // The main targets of tiny allocator are small strings and 556 // standalone escaping variables. On a json benchmark 557 // the allocator reduces number of allocations by ~12% and 558 // reduces heap size by ~20%. 559 off := c.tinyoffset 560 // Align tiny pointer for required (conservative) alignment. 561 if size&7 == 0 { 562 off = round(off, 8) 563 } else if size&3 == 0 { 564 off = round(off, 4) 565 } else if size&1 == 0 { 566 off = round(off, 2) 567 } 568 if off+size <= maxTinySize && c.tiny != nil { 569 // The object fits into existing tiny block. 570 x = add(c.tiny, off) 571 c.tinyoffset = off + size 572 c.local_tinyallocs++ 573 mp.mallocing = 0 574 releasem(mp) 575 return x 576 } 577 // Allocate a new maxTinySize block. 578 s = c.alloc[tinySizeClass] 579 v := s.freelist 580 if v.ptr() == nil { 581 systemstack(func() { 582 mCache_Refill(c, tinySizeClass) 583 }) 584 shouldhelpgc = true 585 s = c.alloc[tinySizeClass] 586 v = s.freelist 587 } 588 s.freelist = v.ptr().next 589 s.ref++ 590 // prefetchnta offers best performance, see change list message. 591 prefetchnta(uintptr(v.ptr().next)) 592 x = unsafe.Pointer(v) 593 (*[2]uint64)(x)[0] = 0 594 (*[2]uint64)(x)[1] = 0 595 // See if we need to replace the existing tiny block with the new one 596 // based on amount of remaining free space. 597 if size < c.tinyoffset { 598 c.tiny = x 599 c.tinyoffset = size 600 } 601 size = maxTinySize 602 } else { 603 var sizeclass int8 604 if size <= 1024-8 { 605 sizeclass = size_to_class8[(size+7)>>3] 606 } else { 607 sizeclass = size_to_class128[(size-1024+127)>>7] 608 } 609 size = uintptr(class_to_size[sizeclass]) 610 s = c.alloc[sizeclass] 611 v := s.freelist 612 if v.ptr() == nil { 613 systemstack(func() { 614 mCache_Refill(c, int32(sizeclass)) 615 }) 616 shouldhelpgc = true 617 s = c.alloc[sizeclass] 618 v = s.freelist 619 } 620 s.freelist = v.ptr().next 621 s.ref++ 622 // prefetchnta offers best performance, see change list message. 623 prefetchnta(uintptr(v.ptr().next)) 624 x = unsafe.Pointer(v) 625 if flags&flagNoZero == 0 { 626 v.ptr().next = 0 627 if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 { 628 memclr(unsafe.Pointer(v), size) 629 } 630 } 631 } 632 c.local_cachealloc += size 633 } else { 634 var s *mspan 635 shouldhelpgc = true 636 systemstack(func() { 637 s = largeAlloc(size, uint32(flags)) 638 }) 639 x = unsafe.Pointer(uintptr(s.start << pageShift)) 640 size = uintptr(s.elemsize) 641 } 642 643 if flags&flagNoScan != 0 { 644 // All objects are pre-marked as noscan. Nothing to do. 645 } else { 646 // If allocating a defer+arg block, now that we've picked a malloc size 647 // large enough to hold everything, cut the "asked for" size down to 648 // just the defer header, so that the GC bitmap will record the arg block 649 // as containing nothing at all (as if it were unused space at the end of 650 // a malloc block caused by size rounding). 651 // The defer arg areas are scanned as part of scanstack. 652 if typ == deferType { 653 dataSize = unsafe.Sizeof(_defer{}) 654 } 655 heapBitsSetType(uintptr(x), size, dataSize, typ) 656 if dataSize > typ.size { 657 // Array allocation. If there are any 658 // pointers, GC has to scan to the last 659 // element. 660 if typ.ptrdata != 0 { 661 c.local_scan += dataSize - typ.size + typ.ptrdata 662 } 663 } else { 664 c.local_scan += typ.ptrdata 665 } 666 667 // Ensure that the stores above that initialize x to 668 // type-safe memory and set the heap bits occur before 669 // the caller can make x observable to the garbage 670 // collector. Otherwise, on weakly ordered machines, 671 // the garbage collector could follow a pointer to x, 672 // but see uninitialized memory or stale heap bits. 673 publicationBarrier() 674 } 675 676 // GCmarkterminate allocates black 677 // All slots hold nil so no scanning is needed. 678 // This may be racing with GC so do it atomically if there can be 679 // a race marking the bit. 680 if gcphase == _GCmarktermination || gcBlackenPromptly { 681 systemstack(func() { 682 gcmarknewobject_m(uintptr(x), size) 683 }) 684 } 685 686 if raceenabled { 687 racemalloc(x, size) 688 } 689 690 mp.mallocing = 0 691 releasem(mp) 692 693 if debug.allocfreetrace != 0 { 694 tracealloc(x, size, typ) 695 } 696 697 if rate := MemProfileRate; rate > 0 { 698 if size < uintptr(rate) && int32(size) < c.next_sample { 699 c.next_sample -= int32(size) 700 } else { 701 mp := acquirem() 702 profilealloc(mp, x, size) 703 releasem(mp) 704 } 705 } 706 707 if shouldhelpgc && shouldtriggergc() { 708 startGC(gcBackgroundMode, false) 709 } else if gcBlackenEnabled != 0 { 710 // Assist garbage collector. We delay this until the 711 // epilogue so that it doesn't interfere with the 712 // inner working of malloc such as mcache refills that 713 // might happen while doing the gcAssistAlloc. 714 gcAssistAlloc(size, shouldhelpgc) 715 } else if shouldhelpgc && bggc.working != 0 { 716 // The GC is starting up or shutting down, so we can't 717 // assist, but we also can't allocate unabated. Slow 718 // down this G's allocation and help the GC stay 719 // scheduled by yielding. 720 // 721 // TODO: This is a workaround. Either help the GC make 722 // the transition or block. 723 gp := getg() 724 if gp != gp.m.g0 && gp.m.locks == 0 && gp.m.preemptoff == "" { 725 Gosched() 726 } 727 } 728 729 return x 730 } 731 732 func largeAlloc(size uintptr, flag uint32) *mspan { 733 // print("largeAlloc size=", size, "\n") 734 735 if size+_PageSize < size { 736 throw("out of memory") 737 } 738 npages := size >> _PageShift 739 if size&_PageMask != 0 { 740 npages++ 741 } 742 743 // Deduct credit for this span allocation and sweep if 744 // necessary. mHeap_Alloc will also sweep npages, so this only 745 // pays the debt down to npage pages. 746 deductSweepCredit(npages*_PageSize, npages) 747 748 s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0) 749 if s == nil { 750 throw("out of memory") 751 } 752 s.limit = uintptr(s.start)<<_PageShift + size 753 heapBitsForSpan(s.base()).initSpan(s.layout()) 754 return s 755 } 756 757 // implementation of new builtin 758 func newobject(typ *_type) unsafe.Pointer { 759 flags := uint32(0) 760 if typ.kind&kindNoPointers != 0 { 761 flags |= flagNoScan 762 } 763 return mallocgc(uintptr(typ.size), typ, flags) 764 } 765 766 //go:linkname reflect_unsafe_New reflect.unsafe_New 767 func reflect_unsafe_New(typ *_type) unsafe.Pointer { 768 return newobject(typ) 769 } 770 771 // implementation of make builtin for slices 772 func newarray(typ *_type, n uintptr) unsafe.Pointer { 773 flags := uint32(0) 774 if typ.kind&kindNoPointers != 0 { 775 flags |= flagNoScan 776 } 777 if int(n) < 0 || (typ.size > 0 && n > _MaxMem/uintptr(typ.size)) { 778 panic("runtime: allocation size out of range") 779 } 780 return mallocgc(uintptr(typ.size)*n, typ, flags) 781 } 782 783 //go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray 784 func reflect_unsafe_NewArray(typ *_type, n uintptr) unsafe.Pointer { 785 return newarray(typ, n) 786 } 787 788 // rawmem returns a chunk of pointerless memory. It is 789 // not zeroed. 790 func rawmem(size uintptr) unsafe.Pointer { 791 return mallocgc(size, nil, flagNoScan|flagNoZero) 792 } 793 794 func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { 795 c := mp.mcache 796 rate := MemProfileRate 797 if size < uintptr(rate) { 798 // pick next profile time 799 // If you change this, also change allocmcache. 800 if rate > 0x3fffffff { // make 2*rate not overflow 801 rate = 0x3fffffff 802 } 803 next := int32(fastrand1()) % (2 * int32(rate)) 804 // Subtract the "remainder" of the current allocation. 805 // Otherwise objects that are close in size to sampling rate 806 // will be under-sampled, because we consistently discard this remainder. 807 next -= (int32(size) - c.next_sample) 808 if next < 0 { 809 next = 0 810 } 811 c.next_sample = next 812 } 813 814 mProf_Malloc(x, size) 815 } 816 817 type persistentAlloc struct { 818 base unsafe.Pointer 819 off uintptr 820 } 821 822 var globalAlloc struct { 823 mutex 824 persistentAlloc 825 } 826 827 // Wrapper around sysAlloc that can allocate small chunks. 828 // There is no associated free operation. 829 // Intended for things like function/type/debug-related persistent data. 830 // If align is 0, uses default align (currently 8). 831 func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { 832 var p unsafe.Pointer 833 systemstack(func() { 834 p = persistentalloc1(size, align, sysStat) 835 }) 836 return p 837 } 838 839 // Must run on system stack because stack growth can (re)invoke it. 840 // See issue 9174. 841 //go:systemstack 842 func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { 843 const ( 844 chunk = 256 << 10 845 maxBlock = 64 << 10 // VM reservation granularity is 64K on windows 846 ) 847 848 if size == 0 { 849 throw("persistentalloc: size == 0") 850 } 851 if align != 0 { 852 if align&(align-1) != 0 { 853 throw("persistentalloc: align is not a power of 2") 854 } 855 if align > _PageSize { 856 throw("persistentalloc: align is too large") 857 } 858 } else { 859 align = 8 860 } 861 862 if size >= maxBlock { 863 return sysAlloc(size, sysStat) 864 } 865 866 mp := acquirem() 867 var persistent *persistentAlloc 868 if mp != nil && mp.p != 0 { 869 persistent = &mp.p.ptr().palloc 870 } else { 871 lock(&globalAlloc.mutex) 872 persistent = &globalAlloc.persistentAlloc 873 } 874 persistent.off = round(persistent.off, align) 875 if persistent.off+size > chunk || persistent.base == nil { 876 persistent.base = sysAlloc(chunk, &memstats.other_sys) 877 if persistent.base == nil { 878 if persistent == &globalAlloc.persistentAlloc { 879 unlock(&globalAlloc.mutex) 880 } 881 throw("runtime: cannot allocate memory") 882 } 883 persistent.off = 0 884 } 885 p := add(persistent.base, persistent.off) 886 persistent.off += size 887 releasem(mp) 888 if persistent == &globalAlloc.persistentAlloc { 889 unlock(&globalAlloc.mutex) 890 } 891 892 if sysStat != &memstats.other_sys { 893 mSysStatInc(sysStat, size) 894 mSysStatDec(&memstats.other_sys, size) 895 } 896 return p 897 }