github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/runtime/malloc.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Memory allocator. 6 // 7 // This was originally based on tcmalloc, but has diverged quite a bit. 8 // http://goog-perftools.sourceforge.net/doc/tcmalloc.html 9 10 // The main allocator works in runs of pages. 11 // Small allocation sizes (up to and including 32 kB) are 12 // rounded to one of about 70 size classes, each of which 13 // has its own free set of objects of exactly that size. 14 // Any free page of memory can be split into a set of objects 15 // of one size class, which are then managed using a free bitmap. 16 // 17 // The allocator's data structures are: 18 // 19 // fixalloc: a free-list allocator for fixed-size off-heap objects, 20 // used to manage storage used by the allocator. 21 // mheap: the malloc heap, managed at page (8192-byte) granularity. 22 // mspan: a run of in-use pages managed by the mheap. 23 // mcentral: collects all spans of a given size class. 24 // mcache: a per-P cache of mspans with free space. 25 // mstats: allocation statistics. 26 // 27 // Allocating a small object proceeds up a hierarchy of caches: 28 // 29 // 1. Round the size up to one of the small size classes 30 // and look in the corresponding mspan in this P's mcache. 31 // Scan the mspan's free bitmap to find a free slot. 32 // If there is a free slot, allocate it. 33 // This can all be done without acquiring a lock. 34 // 35 // 2. If the mspan has no free slots, obtain a new mspan 36 // from the mcentral's list of mspans of the required size 37 // class that have free space. 38 // Obtaining a whole span amortizes the cost of locking 39 // the mcentral. 40 // 41 // 3. If the mcentral's mspan list is empty, obtain a run 42 // of pages from the mheap to use for the mspan. 43 // 44 // 4. If the mheap is empty or has no page runs large enough, 45 // allocate a new group of pages (at least 1MB) from the 46 // operating system. Allocating a large run of pages 47 // amortizes the cost of talking to the operating system. 48 // 49 // Sweeping an mspan and freeing objects on it proceeds up a similar 50 // hierarchy: 51 // 52 // 1. If the mspan is being swept in response to allocation, it 53 // is returned to the mcache to satisfy the allocation. 54 // 55 // 2. Otherwise, if the mspan still has allocated objects in it, 56 // it is placed on the mcentral free list for the mspan's size 57 // class. 58 // 59 // 3. Otherwise, if all objects in the mspan are free, the mspan's 60 // pages are returned to the mheap and the mspan is now dead. 61 // 62 // Allocating and freeing a large object uses the mheap 63 // directly, bypassing the mcache and mcentral. 64 // 65 // Free object slots in an mspan are zeroed only if mspan.needzero is 66 // false. If needzero is true, objects are zeroed as they are 67 // allocated. There are various benefits to delaying zeroing this way: 68 // 69 // 1. Stack frame allocation can avoid zeroing altogether. 70 // 71 // 2. It exhibits better temporal locality, since the program is 72 // probably about to write to the memory. 73 // 74 // 3. We don't zero pages that never get reused. 75 76 // Virtual memory layout 77 // 78 // The heap consists of a set of arenas, which are 64MB on 64-bit and 79 // 4MB on 32-bit (heapArenaBytes). Each arena's start address is also 80 // aligned to the arena size. 81 // 82 // Each arena has an associated heapArena object that stores the 83 // metadata for that arena: the heap bitmap for all words in the arena 84 // and the span map for all pages in the arena. heapArena objects are 85 // themselves allocated off-heap. 86 // 87 // Since arenas are aligned, the address space can be viewed as a 88 // series of arena frames. The arena map (mheap_.arenas) maps from 89 // arena frame number to *heapArena, or nil for parts of the address 90 // space not backed by the Go heap. The arena map is structured as a 91 // two-level array consisting of a "L1" arena map and many "L2" arena 92 // maps; however, since arenas are large, on many architectures, the 93 // arena map consists of a single, large L2 map. 94 // 95 // The arena map covers the entire possible address space, allowing 96 // the Go heap to use any part of the address space. The allocator 97 // attempts to keep arenas contiguous so that large spans (and hence 98 // large objects) can cross arenas. 99 100 package runtime 101 102 import ( 103 "github.com/x04/go/src/runtime/internal/atomic" 104 "github.com/x04/go/src/runtime/internal/math" 105 "github.com/x04/go/src/runtime/internal/sys" 106 "github.com/x04/go/src/unsafe" 107 ) 108 109 const ( 110 debugMalloc = false 111 112 maxTinySize = _TinySize 113 tinySizeClass = _TinySizeClass 114 maxSmallSize = _MaxSmallSize 115 116 pageShift = _PageShift 117 pageSize = _PageSize 118 pageMask = _PageMask 119 // By construction, single page spans of the smallest object class 120 // have the most objects per span. 121 maxObjsPerSpan = pageSize / 8 122 123 concurrentSweep = _ConcurrentSweep 124 125 _PageSize = 1 << _PageShift 126 _PageMask = _PageSize - 1 127 128 // _64bit = 1 on 64-bit systems, 0 on 32-bit systems 129 _64bit = 1 << (^uintptr(0) >> 63) / 2 130 131 // Tiny allocator parameters, see "Tiny allocator" comment in malloc.go. 132 _TinySize = 16 133 _TinySizeClass = int8(2) 134 135 _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc 136 137 // Per-P, per order stack segment cache size. 138 _StackCacheSize = 32 * 1024 139 140 // Number of orders that get caching. Order 0 is FixedStack 141 // and each successive order is twice as large. 142 // We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks 143 // will be allocated directly. 144 // Since FixedStack is different on different systems, we 145 // must vary NumStackOrders to keep the same maximum cached size. 146 // OS | FixedStack | NumStackOrders 147 // -----------------+------------+--------------- 148 // linux/darwin/bsd | 2KB | 4 149 // windows/32 | 4KB | 3 150 // windows/64 | 8KB | 2 151 // plan9 | 4KB | 3 152 _NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9 153 154 // heapAddrBits is the number of bits in a heap address. On 155 // amd64, addresses are sign-extended beyond heapAddrBits. On 156 // other arches, they are zero-extended. 157 // 158 // On most 64-bit platforms, we limit this to 48 bits based on a 159 // combination of hardware and OS limitations. 160 // 161 // amd64 hardware limits addresses to 48 bits, sign-extended 162 // to 64 bits. Addresses where the top 16 bits are not either 163 // all 0 or all 1 are "non-canonical" and invalid. Because of 164 // these "negative" addresses, we offset addresses by 1<<47 165 // (arenaBaseOffset) on amd64 before computing indexes into 166 // the heap arenas index. In 2017, amd64 hardware added 167 // support for 57 bit addresses; however, currently only Linux 168 // supports this extension and the kernel will never choose an 169 // address above 1<<47 unless mmap is called with a hint 170 // address above 1<<47 (which we never do). 171 // 172 // arm64 hardware (as of ARMv8) limits user addresses to 48 173 // bits, in the range [0, 1<<48). 174 // 175 // ppc64, mips64, and s390x support arbitrary 64 bit addresses 176 // in hardware. On Linux, Go leans on stricter OS limits. Based 177 // on Linux's processor.h, the user address space is limited as 178 // follows on 64-bit architectures: 179 // 180 // Architecture Name Maximum Value (exclusive) 181 // --------------------------------------------------------------------- 182 // amd64 TASK_SIZE_MAX 0x007ffffffff000 (47 bit addresses) 183 // arm64 TASK_SIZE_64 0x01000000000000 (48 bit addresses) 184 // ppc64{,le} TASK_SIZE_USER64 0x00400000000000 (46 bit addresses) 185 // mips64{,le} TASK_SIZE64 0x00010000000000 (40 bit addresses) 186 // s390x TASK_SIZE 1<<64 (64 bit addresses) 187 // 188 // These limits may increase over time, but are currently at 189 // most 48 bits except on s390x. On all architectures, Linux 190 // starts placing mmap'd regions at addresses that are 191 // significantly below 48 bits, so even if it's possible to 192 // exceed Go's 48 bit limit, it's extremely unlikely in 193 // practice. 194 // 195 // On 32-bit platforms, we accept the full 32-bit address 196 // space because doing so is cheap. 197 // mips32 only has access to the low 2GB of virtual memory, so 198 // we further limit it to 31 bits. 199 // 200 // On darwin/arm64, although 64-bit pointers are presumably 201 // available, pointers are truncated to 33 bits. Furthermore, 202 // only the top 4 GiB of the address space are actually available 203 // to the application, but we allow the whole 33 bits anyway for 204 // simplicity. 205 // TODO(mknyszek): Consider limiting it to 32 bits and using 206 // arenaBaseOffset to offset into the top 4 GiB. 207 // 208 // WebAssembly currently has a limit of 4GB linear memory. 209 heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosDarwin*sys.GoarchArm64))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 33*sys.GoosDarwin*sys.GoarchArm64 210 211 // maxAlloc is the maximum size of an allocation. On 64-bit, 212 // it's theoretically possible to allocate 1<<heapAddrBits bytes. On 213 // 32-bit, however, this is one less than 1<<32 because the 214 // number of bytes in the address space doesn't actually fit 215 // in a uintptr. 216 maxAlloc = (1 << heapAddrBits) - (1-_64bit)*1 217 218 // The number of bits in a heap address, the size of heap 219 // arenas, and the L1 and L2 arena map sizes are related by 220 // 221 // (1 << addr bits) = arena size * L1 entries * L2 entries 222 // 223 // Currently, we balance these as follows: 224 // 225 // Platform Addr bits Arena size L1 entries L2 entries 226 // -------------- --------- ---------- ---------- ----------- 227 // */64-bit 48 64MB 1 4M (32MB) 228 // windows/64-bit 48 4MB 64 1M (8MB) 229 // */32-bit 32 4MB 1 1024 (4KB) 230 // */mips(le) 31 4MB 1 512 (2KB) 231 232 // heapArenaBytes is the size of a heap arena. The heap 233 // consists of mappings of size heapArenaBytes, aligned to 234 // heapArenaBytes. The initial heap mapping is one arena. 235 // 236 // This is currently 64MB on 64-bit non-Windows and 4MB on 237 // 32-bit and on Windows. We use smaller arenas on Windows 238 // because all committed memory is charged to the process, 239 // even if it's not touched. Hence, for processes with small 240 // heaps, the mapped arena space needs to be commensurate. 241 // This is particularly important with the race detector, 242 // since it significantly amplifies the cost of committed 243 // memory. 244 heapArenaBytes = 1 << logHeapArenaBytes 245 246 // logHeapArenaBytes is log_2 of heapArenaBytes. For clarity, 247 // prefer using heapArenaBytes where possible (we need the 248 // constant to compute some other constants). 249 logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)*(1-sys.GoarchWasm)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + (2+20)*sys.GoarchWasm 250 251 // heapArenaBitmapBytes is the size of each heap arena's bitmap. 252 heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2) 253 254 pagesPerArena = heapArenaBytes / pageSize 255 256 // arenaL1Bits is the number of bits of the arena number 257 // covered by the first level arena map. 258 // 259 // This number should be small, since the first level arena 260 // map requires PtrSize*(1<<arenaL1Bits) of space in the 261 // binary's BSS. It can be zero, in which case the first level 262 // index is effectively unused. There is a performance benefit 263 // to this, since the generated code can be more efficient, 264 // but comes at the cost of having a large L2 mapping. 265 // 266 // We use the L1 map on 64-bit Windows because the arena size 267 // is small, but the address space is still 48 bits, and 268 // there's a high cost to having a large L2. 269 arenaL1Bits = 6 * (_64bit * sys.GoosWindows) 270 271 // arenaL2Bits is the number of bits of the arena number 272 // covered by the second level arena index. 273 // 274 // The size of each arena map allocation is proportional to 275 // 1<<arenaL2Bits, so it's important that this not be too 276 // large. 48 bits leads to 32MB arena index allocations, which 277 // is about the practical threshold. 278 arenaL2Bits = heapAddrBits - logHeapArenaBytes - arenaL1Bits 279 280 // arenaL1Shift is the number of bits to shift an arena frame 281 // number by to compute an index into the first level arena map. 282 arenaL1Shift = arenaL2Bits 283 284 // arenaBits is the total bits in a combined arena map index. 285 // This is split between the index into the L1 arena map and 286 // the L2 arena map. 287 arenaBits = arenaL1Bits + arenaL2Bits 288 289 // arenaBaseOffset is the pointer value that corresponds to 290 // index 0 in the heap arena map. 291 // 292 // On amd64, the address space is 48 bits, sign extended to 64 293 // bits. This offset lets us handle "negative" addresses (or 294 // high addresses if viewed as unsigned). 295 // 296 // On aix/ppc64, this offset allows to keep the heapAddrBits to 297 // 48. Otherwize, it would be 60 in order to handle mmap addresses 298 // (in range 0x0a00000000000000 - 0x0afffffffffffff). But in this 299 // case, the memory reserved in (s *pageAlloc).init for chunks 300 // is causing important slowdowns. 301 // 302 // On other platforms, the user address space is contiguous 303 // and starts at 0, so no offset is necessary. 304 arenaBaseOffset = sys.GoarchAmd64*(1<<47) + (^0x0a00000000000000+1)&uintptrMask*sys.GoosAix 305 306 // Max number of threads to run garbage collection. 307 // 2, 3, and 4 are all plausible maximums depending 308 // on the hardware details of the machine. The garbage 309 // collector scales well to 32 cpus. 310 _MaxGcproc = 32 311 312 // minLegalPointer is the smallest possible legal pointer. 313 // This is the smallest possible architectural page size, 314 // since we assume that the first page is never mapped. 315 // 316 // This should agree with minZeroPage in the compiler. 317 minLegalPointer uintptr = 4096 318 ) 319 320 // physPageSize is the size in bytes of the OS's physical pages. 321 // Mapping and unmapping operations must be done at multiples of 322 // physPageSize. 323 // 324 // This must be set by the OS init code (typically in osinit) before 325 // mallocinit. 326 var physPageSize uintptr 327 328 // physHugePageSize is the size in bytes of the OS's default physical huge 329 // page size whose allocation is opaque to the application. It is assumed 330 // and verified to be a power of two. 331 // 332 // If set, this must be set by the OS init code (typically in osinit) before 333 // mallocinit. However, setting it at all is optional, and leaving the default 334 // value is always safe (though potentially less efficient). 335 // 336 // Since physHugePageSize is always assumed to be a power of two, 337 // physHugePageShift is defined as physHugePageSize == 1 << physHugePageShift. 338 // The purpose of physHugePageShift is to avoid doing divisions in 339 // performance critical functions. 340 var ( 341 physHugePageSize uintptr 342 physHugePageShift uint 343 ) 344 345 // OS memory management abstraction layer 346 // 347 // Regions of the address space managed by the runtime may be in one of four 348 // states at any given time: 349 // 1) None - Unreserved and unmapped, the default state of any region. 350 // 2) Reserved - Owned by the runtime, but accessing it would cause a fault. 351 // Does not count against the process' memory footprint. 352 // 3) Prepared - Reserved, intended not to be backed by physical memory (though 353 // an OS may implement this lazily). Can transition efficiently to 354 // Ready. Accessing memory in such a region is undefined (may 355 // fault, may give back unexpected zeroes, etc.). 356 // 4) Ready - may be accessed safely. 357 // 358 // This set of states is more than is strictly necessary to support all the 359 // currently supported platforms. One could get by with just None, Reserved, and 360 // Ready. However, the Prepared state gives us flexibility for performance 361 // purposes. For example, on POSIX-y operating systems, Reserved is usually a 362 // private anonymous mmap'd region with PROT_NONE set, and to transition 363 // to Ready would require setting PROT_READ|PROT_WRITE. However the 364 // underspecification of Prepared lets us use just MADV_FREE to transition from 365 // Ready to Prepared. Thus with the Prepared state we can set the permission 366 // bits just once early on, we can efficiently tell the OS that it's free to 367 // take pages away from us when we don't strictly need them. 368 // 369 // For each OS there is a common set of helpers defined that transition 370 // memory regions between these states. The helpers are as follows: 371 // 372 // sysAlloc transitions an OS-chosen region of memory from None to Ready. 373 // More specifically, it obtains a large chunk of zeroed memory from the 374 // operating system, typically on the order of a hundred kilobytes 375 // or a megabyte. This memory is always immediately available for use. 376 // 377 // sysFree transitions a memory region from any state to None. Therefore, it 378 // returns memory unconditionally. It is used if an out-of-memory error has been 379 // detected midway through an allocation or to carve out an aligned section of 380 // the address space. It is okay if sysFree is a no-op only if sysReserve always 381 // returns a memory region aligned to the heap allocator's alignment 382 // restrictions. 383 // 384 // sysReserve transitions a memory region from None to Reserved. It reserves 385 // address space in such a way that it would cause a fatal fault upon access 386 // (either via permissions or not committing the memory). Such a reservation is 387 // thus never backed by physical memory. 388 // If the pointer passed to it is non-nil, the caller wants the 389 // reservation there, but sysReserve can still choose another 390 // location if that one is unavailable. 391 // NOTE: sysReserve returns OS-aligned memory, but the heap allocator 392 // may use larger alignment, so the caller must be careful to realign the 393 // memory obtained by sysReserve. 394 // 395 // sysMap transitions a memory region from Reserved to Prepared. It ensures the 396 // memory region can be efficiently transitioned to Ready. 397 // 398 // sysUsed transitions a memory region from Prepared to Ready. It notifies the 399 // operating system that the memory region is needed and ensures that the region 400 // may be safely accessed. This is typically a no-op on systems that don't have 401 // an explicit commit step and hard over-commit limits, but is critical on 402 // Windows, for example. 403 // 404 // sysUnused transitions a memory region from Ready to Prepared. It notifies the 405 // operating system that the physical pages backing this memory region are no 406 // longer needed and can be reused for other purposes. The contents of a 407 // sysUnused memory region are considered forfeit and the region must not be 408 // accessed again until sysUsed is called. 409 // 410 // sysFault transitions a memory region from Ready or Prepared to Reserved. It 411 // marks a region such that it will always fault if accessed. Used only for 412 // debugging the runtime. 413 414 func mallocinit() { 415 if class_to_size[_TinySizeClass] != _TinySize { 416 throw("bad TinySizeClass") 417 } 418 419 testdefersizes() 420 421 if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 { 422 // heapBits expects modular arithmetic on bitmap 423 // addresses to work. 424 throw("heapArenaBitmapBytes not a power of 2") 425 } 426 427 // Copy class sizes out for statistics table. 428 for i := range class_to_size { 429 memstats.by_size[i].size = uint32(class_to_size[i]) 430 } 431 432 // Check physPageSize. 433 if physPageSize == 0 { 434 // The OS init code failed to fetch the physical page size. 435 throw("failed to get system page size") 436 } 437 if physPageSize > maxPhysPageSize { 438 print("system page size (", physPageSize, ") is larger than maximum page size (", maxPhysPageSize, ")\n") 439 throw("bad system page size") 440 } 441 if physPageSize < minPhysPageSize { 442 print("system page size (", physPageSize, ") is smaller than minimum page size (", minPhysPageSize, ")\n") 443 throw("bad system page size") 444 } 445 if physPageSize&(physPageSize-1) != 0 { 446 print("system page size (", physPageSize, ") must be a power of 2\n") 447 throw("bad system page size") 448 } 449 if physHugePageSize&(physHugePageSize-1) != 0 { 450 print("system huge page size (", physHugePageSize, ") must be a power of 2\n") 451 throw("bad system huge page size") 452 } 453 if physHugePageSize > maxPhysHugePageSize { 454 // physHugePageSize is greater than the maximum supported huge page size. 455 // Don't throw here, like in the other cases, since a system configured 456 // in this way isn't wrong, we just don't have the code to support them. 457 // Instead, silently set the huge page size to zero. 458 physHugePageSize = 0 459 } 460 if physHugePageSize != 0 { 461 // Since physHugePageSize is a power of 2, it suffices to increase 462 // physHugePageShift until 1<<physHugePageShift == physHugePageSize. 463 for 1<<physHugePageShift != physHugePageSize { 464 physHugePageShift++ 465 } 466 } 467 468 // Initialize the heap. 469 mheap_.init() 470 _g_ := getg() 471 _g_.m.mcache = allocmcache() 472 473 // Create initial arena growth hints. 474 if sys.PtrSize == 8 { 475 // On a 64-bit machine, we pick the following hints 476 // because: 477 // 478 // 1. Starting from the middle of the address space 479 // makes it easier to grow out a contiguous range 480 // without running in to some other mapping. 481 // 482 // 2. This makes Go heap addresses more easily 483 // recognizable when debugging. 484 // 485 // 3. Stack scanning in gccgo is still conservative, 486 // so it's important that addresses be distinguishable 487 // from other data. 488 // 489 // Starting at 0x00c0 means that the valid memory addresses 490 // will begin 0x00c0, 0x00c1, ... 491 // In little-endian, that's c0 00, c1 00, ... None of those are valid 492 // UTF-8 sequences, and they are otherwise as far away from 493 // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0 494 // addresses. An earlier attempt to use 0x11f8 caused out of memory errors 495 // on OS X during thread allocations. 0x00c0 causes conflicts with 496 // AddressSanitizer which reserves all memory up to 0x0100. 497 // These choices reduce the odds of a conservative garbage collector 498 // not collecting memory because some non-pointer block of memory 499 // had a bit pattern that matched a memory address. 500 // 501 // However, on arm64, we ignore all this advice above and slam the 502 // allocation at 0x40 << 32 because when using 4k pages with 3-level 503 // translation buffers, the user address space is limited to 39 bits 504 // On darwin/arm64, the address space is even smaller. 505 // 506 // On AIX, mmaps starts at 0x0A00000000000000 for 64-bit. 507 // processes. 508 for i := 0x7f; i >= 0; i-- { 509 var p uintptr 510 switch { 511 case GOARCH == "arm64" && GOOS == "darwin": 512 p = uintptr(i)<<40 | uintptrMask&(0x0013<<28) 513 case GOARCH == "arm64": 514 p = uintptr(i)<<40 | uintptrMask&(0x0040<<32) 515 case GOOS == "aix": 516 if i == 0 { 517 // We don't use addresses directly after 0x0A00000000000000 518 // to avoid collisions with others mmaps done by non-go programs. 519 continue 520 } 521 p = uintptr(i)<<40 | uintptrMask&(0xa0<<52) 522 case raceenabled: 523 // The TSAN runtime requires the heap 524 // to be in the range [0x00c000000000, 525 // 0x00e000000000). 526 p = uintptr(i)<<32 | uintptrMask&(0x00c0<<32) 527 if p >= uintptrMask&0x00e000000000 { 528 continue 529 } 530 default: 531 p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32) 532 } 533 hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc()) 534 hint.addr = p 535 hint.next, mheap_.arenaHints = mheap_.arenaHints, hint 536 } 537 } else { 538 // On a 32-bit machine, we're much more concerned 539 // about keeping the usable heap contiguous. 540 // Hence: 541 // 542 // 1. We reserve space for all heapArenas up front so 543 // they don't get interleaved with the heap. They're 544 // ~258MB, so this isn't too bad. (We could reserve a 545 // smaller amount of space up front if this is a 546 // problem.) 547 // 548 // 2. We hint the heap to start right above the end of 549 // the binary so we have the best chance of keeping it 550 // contiguous. 551 // 552 // 3. We try to stake out a reasonably large initial 553 // heap reservation. 554 555 const arenaMetaSize = (1 << arenaBits) * unsafe.Sizeof(heapArena{}) 556 meta := uintptr(sysReserve(nil, arenaMetaSize)) 557 if meta != 0 { 558 mheap_.heapArenaAlloc.init(meta, arenaMetaSize) 559 } 560 561 // We want to start the arena low, but if we're linked 562 // against C code, it's possible global constructors 563 // have called malloc and adjusted the process' brk. 564 // Query the brk so we can avoid trying to map the 565 // region over it (which will cause the kernel to put 566 // the region somewhere else, likely at a high 567 // address). 568 procBrk := sbrk0() 569 570 // If we ask for the end of the data segment but the 571 // operating system requires a little more space 572 // before we can start allocating, it will give out a 573 // slightly higher pointer. Except QEMU, which is 574 // buggy, as usual: it won't adjust the pointer 575 // upward. So adjust it upward a little bit ourselves: 576 // 1/4 MB to get away from the running binary image. 577 p := firstmoduledata.end 578 if p < procBrk { 579 p = procBrk 580 } 581 if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end { 582 p = mheap_.heapArenaAlloc.end 583 } 584 p = alignUp(p+(256<<10), heapArenaBytes) 585 // Because we're worried about fragmentation on 586 // 32-bit, we try to make a large initial reservation. 587 arenaSizes := []uintptr{ 588 512 << 20, 589 256 << 20, 590 128 << 20, 591 } 592 for _, arenaSize := range arenaSizes { 593 a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes) 594 if a != nil { 595 mheap_.arena.init(uintptr(a), size) 596 p = uintptr(a) + size // For hint below 597 break 598 } 599 } 600 hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc()) 601 hint.addr = p 602 hint.next, mheap_.arenaHints = mheap_.arenaHints, hint 603 } 604 } 605 606 // sysAlloc allocates heap arena space for at least n bytes. The 607 // returned pointer is always heapArenaBytes-aligned and backed by 608 // h.arenas metadata. The returned size is always a multiple of 609 // heapArenaBytes. sysAlloc returns nil on failure. 610 // There is no corresponding free function. 611 // 612 // sysAlloc returns a memory region in the Prepared state. This region must 613 // be transitioned to Ready before use. 614 // 615 // h must be locked. 616 func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { 617 n = alignUp(n, heapArenaBytes) 618 619 // First, try the arena pre-reservation. 620 v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys) 621 if v != nil { 622 size = n 623 goto mapped 624 } 625 626 // Try to grow the heap at a hint address. 627 for h.arenaHints != nil { 628 hint := h.arenaHints 629 p := hint.addr 630 if hint.down { 631 p -= n 632 } 633 if p+n < p { 634 // We can't use this, so don't ask. 635 v = nil 636 } else if arenaIndex(p+n-1) >= 1<<arenaBits { 637 // Outside addressable heap. Can't use. 638 v = nil 639 } else { 640 v = sysReserve(unsafe.Pointer(p), n) 641 } 642 if p == uintptr(v) { 643 // Success. Update the hint. 644 if !hint.down { 645 p += n 646 } 647 hint.addr = p 648 size = n 649 break 650 } 651 // Failed. Discard this hint and try the next. 652 // 653 // TODO: This would be cleaner if sysReserve could be 654 // told to only return the requested address. In 655 // particular, this is already how Windows behaves, so 656 // it would simplify things there. 657 if v != nil { 658 sysFree(v, n, nil) 659 } 660 h.arenaHints = hint.next 661 h.arenaHintAlloc.free(unsafe.Pointer(hint)) 662 } 663 664 if size == 0 { 665 if raceenabled { 666 // The race detector assumes the heap lives in 667 // [0x00c000000000, 0x00e000000000), but we 668 // just ran out of hints in this region. Give 669 // a nice failure. 670 throw("too many address space collisions for -race mode") 671 } 672 673 // All of the hints failed, so we'll take any 674 // (sufficiently aligned) address the kernel will give 675 // us. 676 v, size = sysReserveAligned(nil, n, heapArenaBytes) 677 if v == nil { 678 return nil, 0 679 } 680 681 // Create new hints for extending this region. 682 hint := (*arenaHint)(h.arenaHintAlloc.alloc()) 683 hint.addr, hint.down = uintptr(v), true 684 hint.next, mheap_.arenaHints = mheap_.arenaHints, hint 685 hint = (*arenaHint)(h.arenaHintAlloc.alloc()) 686 hint.addr = uintptr(v) + size 687 hint.next, mheap_.arenaHints = mheap_.arenaHints, hint 688 } 689 690 // Check for bad pointers or pointers we can't use. 691 { 692 var bad string 693 p := uintptr(v) 694 if p+size < p { 695 bad = "region exceeds uintptr range" 696 } else if arenaIndex(p) >= 1<<arenaBits { 697 bad = "base outside usable address space" 698 } else if arenaIndex(p+size-1) >= 1<<arenaBits { 699 bad = "end outside usable address space" 700 } 701 if bad != "" { 702 // This should be impossible on most architectures, 703 // but it would be really confusing to debug. 704 print("runtime: memory allocated by OS [", hex(p), ", ", hex(p+size), ") not in usable address space: ", bad, "\n") 705 throw("memory reservation exceeds address space limit") 706 } 707 } 708 709 if uintptr(v)&(heapArenaBytes-1) != 0 { 710 throw("misrounded allocation in sysAlloc") 711 } 712 713 // Transition from Reserved to Prepared. 714 sysMap(v, size, &memstats.heap_sys) 715 716 mapped: 717 // Create arena metadata. 718 for ri := arenaIndex(uintptr(v)); ri <= arenaIndex(uintptr(v)+size-1); ri++ { 719 l2 := h.arenas[ri.l1()] 720 if l2 == nil { 721 // Allocate an L2 arena map. 722 l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil)) 723 if l2 == nil { 724 throw("out of memory allocating heap arena map") 725 } 726 atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2)) 727 } 728 729 if l2[ri.l2()] != nil { 730 throw("arena already initialized") 731 } 732 var r *heapArena 733 r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) 734 if r == nil { 735 r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) 736 if r == nil { 737 throw("out of memory allocating heap arena metadata") 738 } 739 } 740 741 // Add the arena to the arenas list. 742 if len(h.allArenas) == cap(h.allArenas) { 743 size := 2 * uintptr(cap(h.allArenas)) * sys.PtrSize 744 if size == 0 { 745 size = physPageSize 746 } 747 newArray := (*notInHeap)(persistentalloc(size, sys.PtrSize, &memstats.gc_sys)) 748 if newArray == nil { 749 throw("out of memory allocating allArenas") 750 } 751 oldSlice := h.allArenas 752 *(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / sys.PtrSize)} 753 copy(h.allArenas, oldSlice) 754 // Do not free the old backing array because 755 // there may be concurrent readers. Since we 756 // double the array each time, this can lead 757 // to at most 2x waste. 758 } 759 h.allArenas = h.allArenas[:len(h.allArenas)+1] 760 h.allArenas[len(h.allArenas)-1] = ri 761 762 // Store atomically just in case an object from the 763 // new heap arena becomes visible before the heap lock 764 // is released (which shouldn't happen, but there's 765 // little downside to this). 766 atomic.StorepNoWB(unsafe.Pointer(&l2[ri.l2()]), unsafe.Pointer(r)) 767 } 768 769 // Tell the race detector about the new heap memory. 770 if raceenabled { 771 racemapshadow(v, size) 772 } 773 774 return 775 } 776 777 // sysReserveAligned is like sysReserve, but the returned pointer is 778 // aligned to align bytes. It may reserve either n or n+align bytes, 779 // so it returns the size that was reserved. 780 func sysReserveAligned(v unsafe.Pointer, size, align uintptr) (unsafe.Pointer, uintptr) { 781 // Since the alignment is rather large in uses of this 782 // function, we're not likely to get it by chance, so we ask 783 // for a larger region and remove the parts we don't need. 784 retries := 0 785 retry: 786 p := uintptr(sysReserve(v, size+align)) 787 switch { 788 case p == 0: 789 return nil, 0 790 case p&(align-1) == 0: 791 // We got lucky and got an aligned region, so we can 792 // use the whole thing. 793 return unsafe.Pointer(p), size + align 794 case GOOS == "windows": 795 // On Windows we can't release pieces of a 796 // reservation, so we release the whole thing and 797 // re-reserve the aligned sub-region. This may race, 798 // so we may have to try again. 799 sysFree(unsafe.Pointer(p), size+align, nil) 800 p = alignUp(p, align) 801 p2 := sysReserve(unsafe.Pointer(p), size) 802 if p != uintptr(p2) { 803 // Must have raced. Try again. 804 sysFree(p2, size, nil) 805 if retries++; retries == 100 { 806 throw("failed to allocate aligned heap memory; too many retries") 807 } 808 goto retry 809 } 810 // Success. 811 return p2, size 812 default: 813 // Trim off the unaligned parts. 814 pAligned := alignUp(p, align) 815 sysFree(unsafe.Pointer(p), pAligned-p, nil) 816 end := pAligned + size 817 endLen := (p + size + align) - end 818 if endLen > 0 { 819 sysFree(unsafe.Pointer(end), endLen, nil) 820 } 821 return unsafe.Pointer(pAligned), size 822 } 823 } 824 825 // base address for all 0-byte allocations 826 var zerobase uintptr 827 828 // nextFreeFast returns the next free object if one is quickly available. 829 // Otherwise it returns 0. 830 func nextFreeFast(s *mspan) gclinkptr { 831 theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache? 832 if theBit < 64 { 833 result := s.freeindex + uintptr(theBit) 834 if result < s.nelems { 835 freeidx := result + 1 836 if freeidx%64 == 0 && freeidx != s.nelems { 837 return 0 838 } 839 s.allocCache >>= uint(theBit + 1) 840 s.freeindex = freeidx 841 s.allocCount++ 842 return gclinkptr(result*s.elemsize + s.base()) 843 } 844 } 845 return 0 846 } 847 848 // nextFree returns the next free object from the cached span if one is available. 849 // Otherwise it refills the cache with a span with an available object and 850 // returns that object along with a flag indicating that this was a heavy 851 // weight allocation. If it is a heavy weight allocation the caller must 852 // determine whether a new GC cycle needs to be started or if the GC is active 853 // whether this goroutine needs to assist the GC. 854 // 855 // Must run in a non-preemptible context since otherwise the owner of 856 // c could change. 857 func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, shouldhelpgc bool) { 858 s = c.alloc[spc] 859 shouldhelpgc = false 860 freeIndex := s.nextFreeIndex() 861 if freeIndex == s.nelems { 862 // The span is full. 863 if uintptr(s.allocCount) != s.nelems { 864 println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems) 865 throw("s.allocCount != s.nelems && freeIndex == s.nelems") 866 } 867 c.refill(spc) 868 shouldhelpgc = true 869 s = c.alloc[spc] 870 871 freeIndex = s.nextFreeIndex() 872 } 873 874 if freeIndex >= s.nelems { 875 throw("freeIndex is not valid") 876 } 877 878 v = gclinkptr(freeIndex*s.elemsize + s.base()) 879 s.allocCount++ 880 if uintptr(s.allocCount) > s.nelems { 881 println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems) 882 throw("s.allocCount > s.nelems") 883 } 884 return 885 } 886 887 // Allocate an object of size bytes. 888 // Small objects are allocated from the per-P cache's free lists. 889 // Large objects (> 32 kB) are allocated straight from the heap. 890 func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { 891 if gcphase == _GCmarktermination { 892 throw("mallocgc called with gcphase == _GCmarktermination") 893 } 894 895 if size == 0 { 896 return unsafe.Pointer(&zerobase) 897 } 898 899 if debug.sbrk != 0 { 900 align := uintptr(16) 901 if typ != nil { 902 // TODO(austin): This should be just 903 // align = uintptr(typ.align) 904 // but that's only 4 on 32-bit platforms, 905 // even if there's a uint64 field in typ (see #599). 906 // This causes 64-bit atomic accesses to panic. 907 // Hence, we use stricter alignment that matches 908 // the normal allocator better. 909 if size&7 == 0 { 910 align = 8 911 } else if size&3 == 0 { 912 align = 4 913 } else if size&1 == 0 { 914 align = 2 915 } else { 916 align = 1 917 } 918 } 919 return persistentalloc(size, align, &memstats.other_sys) 920 } 921 922 // assistG is the G to charge for this allocation, or nil if 923 // GC is not currently active. 924 var assistG *g 925 if gcBlackenEnabled != 0 { 926 // Charge the current user G for this allocation. 927 assistG = getg() 928 if assistG.m.curg != nil { 929 assistG = assistG.m.curg 930 } 931 // Charge the allocation against the G. We'll account 932 // for internal fragmentation at the end of mallocgc. 933 assistG.gcAssistBytes -= int64(size) 934 935 if assistG.gcAssistBytes < 0 { 936 // This G is in debt. Assist the GC to correct 937 // this before allocating. This must happen 938 // before disabling preemption. 939 gcAssistAlloc(assistG) 940 } 941 } 942 943 // Set mp.mallocing to keep from being preempted by GC. 944 mp := acquirem() 945 if mp.mallocing != 0 { 946 throw("malloc deadlock") 947 } 948 if mp.gsignal == getg() { 949 throw("malloc during signal") 950 } 951 mp.mallocing = 1 952 953 shouldhelpgc := false 954 dataSize := size 955 c := gomcache() 956 var x unsafe.Pointer 957 noscan := typ == nil || typ.ptrdata == 0 958 if size <= maxSmallSize { 959 if noscan && size < maxTinySize { 960 // Tiny allocator. 961 // 962 // Tiny allocator combines several tiny allocation requests 963 // into a single memory block. The resulting memory block 964 // is freed when all subobjects are unreachable. The subobjects 965 // must be noscan (don't have pointers), this ensures that 966 // the amount of potentially wasted memory is bounded. 967 // 968 // Size of the memory block used for combining (maxTinySize) is tunable. 969 // Current setting is 16 bytes, which relates to 2x worst case memory 970 // wastage (when all but one subobjects are unreachable). 971 // 8 bytes would result in no wastage at all, but provides less 972 // opportunities for combining. 973 // 32 bytes provides more opportunities for combining, 974 // but can lead to 4x worst case wastage. 975 // The best case winning is 8x regardless of block size. 976 // 977 // Objects obtained from tiny allocator must not be freed explicitly. 978 // So when an object will be freed explicitly, we ensure that 979 // its size >= maxTinySize. 980 // 981 // SetFinalizer has a special case for objects potentially coming 982 // from tiny allocator, it such case it allows to set finalizers 983 // for an inner byte of a memory block. 984 // 985 // The main targets of tiny allocator are small strings and 986 // standalone escaping variables. On a json benchmark 987 // the allocator reduces number of allocations by ~12% and 988 // reduces heap size by ~20%. 989 off := c.tinyoffset 990 // Align tiny pointer for required (conservative) alignment. 991 if size&7 == 0 { 992 off = alignUp(off, 8) 993 } else if size&3 == 0 { 994 off = alignUp(off, 4) 995 } else if size&1 == 0 { 996 off = alignUp(off, 2) 997 } 998 if off+size <= maxTinySize && c.tiny != 0 { 999 // The object fits into existing tiny block. 1000 x = unsafe.Pointer(c.tiny + off) 1001 c.tinyoffset = off + size 1002 c.local_tinyallocs++ 1003 mp.mallocing = 0 1004 releasem(mp) 1005 return x 1006 } 1007 // Allocate a new maxTinySize block. 1008 span := c.alloc[tinySpanClass] 1009 v := nextFreeFast(span) 1010 if v == 0 { 1011 v, _, shouldhelpgc = c.nextFree(tinySpanClass) 1012 } 1013 x = unsafe.Pointer(v) 1014 (*[2]uint64)(x)[0] = 0 1015 (*[2]uint64)(x)[1] = 0 1016 // See if we need to replace the existing tiny block with the new one 1017 // based on amount of remaining free space. 1018 if size < c.tinyoffset || c.tiny == 0 { 1019 c.tiny = uintptr(x) 1020 c.tinyoffset = size 1021 } 1022 size = maxTinySize 1023 } else { 1024 var sizeclass uint8 1025 if size <= smallSizeMax-8 { 1026 sizeclass = size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv] 1027 } else { 1028 sizeclass = size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv] 1029 } 1030 size = uintptr(class_to_size[sizeclass]) 1031 spc := makeSpanClass(sizeclass, noscan) 1032 span := c.alloc[spc] 1033 v := nextFreeFast(span) 1034 if v == 0 { 1035 v, span, shouldhelpgc = c.nextFree(spc) 1036 } 1037 x = unsafe.Pointer(v) 1038 if needzero && span.needzero != 0 { 1039 memclrNoHeapPointers(unsafe.Pointer(v), size) 1040 } 1041 } 1042 } else { 1043 var s *mspan 1044 shouldhelpgc = true 1045 systemstack(func() { 1046 s = largeAlloc(size, needzero, noscan) 1047 }) 1048 s.freeindex = 1 1049 s.allocCount = 1 1050 x = unsafe.Pointer(s.base()) 1051 size = s.elemsize 1052 } 1053 1054 var scanSize uintptr 1055 if !noscan { 1056 // If allocating a defer+arg block, now that we've picked a malloc size 1057 // large enough to hold everything, cut the "asked for" size down to 1058 // just the defer header, so that the GC bitmap will record the arg block 1059 // as containing nothing at all (as if it were unused space at the end of 1060 // a malloc block caused by size rounding). 1061 // The defer arg areas are scanned as part of scanstack. 1062 if typ == deferType { 1063 dataSize = unsafe.Sizeof(_defer{}) 1064 } 1065 heapBitsSetType(uintptr(x), size, dataSize, typ) 1066 if dataSize > typ.size { 1067 // Array allocation. If there are any 1068 // pointers, GC has to scan to the last 1069 // element. 1070 if typ.ptrdata != 0 { 1071 scanSize = dataSize - typ.size + typ.ptrdata 1072 } 1073 } else { 1074 scanSize = typ.ptrdata 1075 } 1076 c.local_scan += scanSize 1077 } 1078 1079 // Ensure that the stores above that initialize x to 1080 // type-safe memory and set the heap bits occur before 1081 // the caller can make x observable to the garbage 1082 // collector. Otherwise, on weakly ordered machines, 1083 // the garbage collector could follow a pointer to x, 1084 // but see uninitialized memory or stale heap bits. 1085 publicationBarrier() 1086 1087 // Allocate black during GC. 1088 // All slots hold nil so no scanning is needed. 1089 // This may be racing with GC so do it atomically if there can be 1090 // a race marking the bit. 1091 if gcphase != _GCoff { 1092 gcmarknewobject(uintptr(x), size, scanSize) 1093 } 1094 1095 if raceenabled { 1096 racemalloc(x, size) 1097 } 1098 1099 if msanenabled { 1100 msanmalloc(x, size) 1101 } 1102 1103 mp.mallocing = 0 1104 releasem(mp) 1105 1106 if debug.allocfreetrace != 0 { 1107 tracealloc(x, size, typ) 1108 } 1109 1110 if rate := MemProfileRate; rate > 0 { 1111 if rate != 1 && size < c.next_sample { 1112 c.next_sample -= size 1113 } else { 1114 mp := acquirem() 1115 profilealloc(mp, x, size) 1116 releasem(mp) 1117 } 1118 } 1119 1120 if assistG != nil { 1121 // Account for internal fragmentation in the assist 1122 // debt now that we know it. 1123 assistG.gcAssistBytes -= int64(size - dataSize) 1124 } 1125 1126 if shouldhelpgc { 1127 if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { 1128 gcStart(t) 1129 } 1130 } 1131 1132 return x 1133 } 1134 1135 func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { 1136 // print("largeAlloc size=", size, "\n") 1137 1138 if size+_PageSize < size { 1139 throw("out of memory") 1140 } 1141 npages := size >> _PageShift 1142 if size&_PageMask != 0 { 1143 npages++ 1144 } 1145 1146 // Deduct credit for this span allocation and sweep if 1147 // necessary. mHeap_Alloc will also sweep npages, so this only 1148 // pays the debt down to npage pages. 1149 deductSweepCredit(npages*_PageSize, npages) 1150 1151 s := mheap_.alloc(npages, makeSpanClass(0, noscan), needzero) 1152 if s == nil { 1153 throw("out of memory") 1154 } 1155 s.limit = s.base() + size 1156 heapBitsForAddr(s.base()).initSpan(s) 1157 return s 1158 } 1159 1160 // implementation of new builtin 1161 // compiler (both frontend and SSA backend) knows the signature 1162 // of this function 1163 func newobject(typ *_type) unsafe.Pointer { 1164 return mallocgc(typ.size, typ, true) 1165 } 1166 1167 //go:linkname reflect_unsafe_New reflect.unsafe_New 1168 func reflect_unsafe_New(typ *_type) unsafe.Pointer { 1169 return mallocgc(typ.size, typ, true) 1170 } 1171 1172 //go:linkname reflectlite_unsafe_New internal/reflectlite.unsafe_New 1173 func reflectlite_unsafe_New(typ *_type) unsafe.Pointer { 1174 return mallocgc(typ.size, typ, true) 1175 } 1176 1177 // newarray allocates an array of n elements of type typ. 1178 func newarray(typ *_type, n int) unsafe.Pointer { 1179 if n == 1 { 1180 return mallocgc(typ.size, typ, true) 1181 } 1182 mem, overflow := math.MulUintptr(typ.size, uintptr(n)) 1183 if overflow || mem > maxAlloc || n < 0 { 1184 panic(plainError("runtime: allocation size out of range")) 1185 } 1186 return mallocgc(mem, typ, true) 1187 } 1188 1189 //go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray 1190 func reflect_unsafe_NewArray(typ *_type, n int) unsafe.Pointer { 1191 return newarray(typ, n) 1192 } 1193 1194 func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { 1195 mp.mcache.next_sample = nextSample() 1196 mProf_Malloc(x, size) 1197 } 1198 1199 // nextSample returns the next sampling point for heap profiling. The goal is 1200 // to sample allocations on average every MemProfileRate bytes, but with a 1201 // completely random distribution over the allocation timeline; this 1202 // corresponds to a Poisson process with parameter MemProfileRate. In Poisson 1203 // processes, the distance between two samples follows the exponential 1204 // distribution (exp(MemProfileRate)), so the best return value is a random 1205 // number taken from an exponential distribution whose mean is MemProfileRate. 1206 func nextSample() uintptr { 1207 if GOOS == "plan9" { 1208 // Plan 9 doesn't support floating point in note handler. 1209 if g := getg(); g == g.m.gsignal { 1210 return nextSampleNoFP() 1211 } 1212 } 1213 1214 return uintptr(fastexprand(MemProfileRate)) 1215 } 1216 1217 // fastexprand returns a random number from an exponential distribution with 1218 // the specified mean. 1219 func fastexprand(mean int) int32 { 1220 // Avoid overflow. Maximum possible step is 1221 // -ln(1/(1<<randomBitCount)) * mean, approximately 20 * mean. 1222 switch { 1223 case mean > 0x7000000: 1224 mean = 0x7000000 1225 case mean == 0: 1226 return 0 1227 } 1228 1229 // Take a random sample of the exponential distribution exp(-mean*x). 1230 // The probability distribution function is mean*exp(-mean*x), so the CDF is 1231 // p = 1 - exp(-mean*x), so 1232 // q = 1 - p == exp(-mean*x) 1233 // log_e(q) = -mean*x 1234 // -log_e(q)/mean = x 1235 // x = -log_e(q) * mean 1236 // x = log_2(q) * (-log_e(2)) * mean ; Using log_2 for efficiency 1237 const randomBitCount = 26 1238 q := fastrand()%(1<<randomBitCount) + 1 1239 qlog := fastlog2(float64(q)) - randomBitCount 1240 if qlog > 0 { 1241 qlog = 0 1242 } 1243 const minusLog2 = -0.6931471805599453 // -ln(2) 1244 return int32(qlog*(minusLog2*float64(mean))) + 1 1245 } 1246 1247 // nextSampleNoFP is similar to nextSample, but uses older, 1248 // simpler code to avoid floating point. 1249 func nextSampleNoFP() uintptr { 1250 // Set first allocation sample size. 1251 rate := MemProfileRate 1252 if rate > 0x3fffffff { // make 2*rate not overflow 1253 rate = 0x3fffffff 1254 } 1255 if rate != 0 { 1256 return uintptr(fastrand() % uint32(2*rate)) 1257 } 1258 return 0 1259 } 1260 1261 type persistentAlloc struct { 1262 base *notInHeap 1263 off uintptr 1264 } 1265 1266 var globalAlloc struct { 1267 mutex 1268 persistentAlloc 1269 } 1270 1271 // persistentChunkSize is the number of bytes we allocate when we grow 1272 // a persistentAlloc. 1273 const persistentChunkSize = 256 << 10 1274 1275 // persistentChunks is a list of all the persistent chunks we have 1276 // allocated. The list is maintained through the first word in the 1277 // persistent chunk. This is updated atomically. 1278 var persistentChunks *notInHeap 1279 1280 // Wrapper around sysAlloc that can allocate small chunks. 1281 // There is no associated free operation. 1282 // Intended for things like function/type/debug-related persistent data. 1283 // If align is 0, uses default align (currently 8). 1284 // The returned memory will be zeroed. 1285 // 1286 // Consider marking persistentalloc'd types go:notinheap. 1287 func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { 1288 var p *notInHeap 1289 systemstack(func() { 1290 p = persistentalloc1(size, align, sysStat) 1291 }) 1292 return unsafe.Pointer(p) 1293 } 1294 1295 // Must run on system stack because stack growth can (re)invoke it. 1296 // See issue 9174. 1297 //go:systemstack 1298 func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { 1299 const ( 1300 maxBlock = 64 << 10 // VM reservation granularity is 64K on windows 1301 ) 1302 1303 if size == 0 { 1304 throw("persistentalloc: size == 0") 1305 } 1306 if align != 0 { 1307 if align&(align-1) != 0 { 1308 throw("persistentalloc: align is not a power of 2") 1309 } 1310 if align > _PageSize { 1311 throw("persistentalloc: align is too large") 1312 } 1313 } else { 1314 align = 8 1315 } 1316 1317 if size >= maxBlock { 1318 return (*notInHeap)(sysAlloc(size, sysStat)) 1319 } 1320 1321 mp := acquirem() 1322 var persistent *persistentAlloc 1323 if mp != nil && mp.p != 0 { 1324 persistent = &mp.p.ptr().palloc 1325 } else { 1326 lock(&globalAlloc.mutex) 1327 persistent = &globalAlloc.persistentAlloc 1328 } 1329 persistent.off = alignUp(persistent.off, align) 1330 if persistent.off+size > persistentChunkSize || persistent.base == nil { 1331 persistent.base = (*notInHeap)(sysAlloc(persistentChunkSize, &memstats.other_sys)) 1332 if persistent.base == nil { 1333 if persistent == &globalAlloc.persistentAlloc { 1334 unlock(&globalAlloc.mutex) 1335 } 1336 throw("runtime: cannot allocate memory") 1337 } 1338 1339 // Add the new chunk to the persistentChunks list. 1340 for { 1341 chunks := uintptr(unsafe.Pointer(persistentChunks)) 1342 *(*uintptr)(unsafe.Pointer(persistent.base)) = chunks 1343 if atomic.Casuintptr((*uintptr)(unsafe.Pointer(&persistentChunks)), chunks, uintptr(unsafe.Pointer(persistent.base))) { 1344 break 1345 } 1346 } 1347 persistent.off = alignUp(sys.PtrSize, align) 1348 } 1349 p := persistent.base.add(persistent.off) 1350 persistent.off += size 1351 releasem(mp) 1352 if persistent == &globalAlloc.persistentAlloc { 1353 unlock(&globalAlloc.mutex) 1354 } 1355 1356 if sysStat != &memstats.other_sys { 1357 mSysStatInc(sysStat, size) 1358 mSysStatDec(&memstats.other_sys, size) 1359 } 1360 return p 1361 } 1362 1363 // inPersistentAlloc reports whether p points to memory allocated by 1364 // persistentalloc. This must be nosplit because it is called by the 1365 // cgo checker code, which is called by the write barrier code. 1366 //go:nosplit 1367 func inPersistentAlloc(p uintptr) bool { 1368 chunk := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&persistentChunks))) 1369 for chunk != 0 { 1370 if p >= chunk && p < chunk+persistentChunkSize { 1371 return true 1372 } 1373 chunk = *(*uintptr)(unsafe.Pointer(chunk)) 1374 } 1375 return false 1376 } 1377 1378 // linearAlloc is a simple linear allocator that pre-reserves a region 1379 // of memory and then maps that region into the Ready state as needed. The 1380 // caller is responsible for locking. 1381 type linearAlloc struct { 1382 next uintptr // next free byte 1383 mapped uintptr // one byte past end of mapped space 1384 end uintptr // end of reserved space 1385 } 1386 1387 func (l *linearAlloc) init(base, size uintptr) { 1388 l.next, l.mapped = base, base 1389 l.end = base + size 1390 } 1391 1392 func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { 1393 p := alignUp(l.next, align) 1394 if p+size > l.end { 1395 return nil 1396 } 1397 l.next = p + size 1398 if pEnd := alignUp(l.next-1, physPageSize); pEnd > l.mapped { 1399 // Transition from Reserved to Prepared to Ready. 1400 sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, sysStat) 1401 sysUsed(unsafe.Pointer(l.mapped), pEnd-l.mapped) 1402 l.mapped = pEnd 1403 } 1404 return unsafe.Pointer(p) 1405 } 1406 1407 // notInHeap is off-heap memory allocated by a lower-level allocator 1408 // like sysAlloc or persistentAlloc. 1409 // 1410 // In general, it's better to use real types marked as go:notinheap, 1411 // but this serves as a generic type for situations where that isn't 1412 // possible (like in the allocators). 1413 // 1414 // TODO: Use this as the return type of sysAlloc, persistentAlloc, etc? 1415 // 1416 //go:notinheap 1417 type notInHeap struct{} 1418 1419 func (p *notInHeap) add(bytes uintptr) *notInHeap { 1420 return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes)) 1421 }