github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/runtime/mem_linux.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "runtime/internal/atomic" 9 "unsafe" 10 ) 11 12 const ( 13 _EACCES = 13 14 _EINVAL = 22 15 ) 16 17 // Don't split the stack as this method may be invoked without a valid G, which 18 // prevents us from allocating more stack. 19 //go:nosplit 20 func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer { 21 p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 22 if err != 0 { 23 if err == _EACCES { 24 print("runtime: mmap: access denied\n") 25 exit(2) 26 } 27 if err == _EAGAIN { 28 print("runtime: mmap: too much locked memory (check 'ulimit -l').\n") 29 exit(2) 30 } 31 return nil 32 } 33 sysStat.add(int64(n)) 34 return p 35 } 36 37 var adviseUnused = uint32(_MADV_FREE) 38 39 func sysUnused(v unsafe.Pointer, n uintptr) { 40 // By default, Linux's "transparent huge page" support will 41 // merge pages into a huge page if there's even a single 42 // present regular page, undoing the effects of madvise(adviseUnused) 43 // below. On amd64, that means khugepaged can turn a single 44 // 4KB page to 2MB, bloating the process's RSS by as much as 45 // 512X. (See issue #8832 and Linux kernel bug 46 // https://bugzilla.kernel.org/show_bug.cgi?id=93111) 47 // 48 // To work around this, we explicitly disable transparent huge 49 // pages when we release pages of the heap. However, we have 50 // to do this carefully because changing this flag tends to 51 // split the VMA (memory mapping) containing v in to three 52 // VMAs in order to track the different values of the 53 // MADV_NOHUGEPAGE flag in the different regions. There's a 54 // default limit of 65530 VMAs per address space (sysctl 55 // vm.max_map_count), so we must be careful not to create too 56 // many VMAs (see issue #12233). 57 // 58 // Since huge pages are huge, there's little use in adjusting 59 // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid 60 // exploding the number of VMAs by only adjusting the 61 // MADV_NOHUGEPAGE flag on a large granularity. This still 62 // gets most of the benefit of huge pages while keeping the 63 // number of VMAs under control. With hugePageSize = 2MB, even 64 // a pessimal heap can reach 128GB before running out of VMAs. 65 if physHugePageSize != 0 { 66 // If it's a large allocation, we want to leave huge 67 // pages enabled. Hence, we only adjust the huge page 68 // flag on the huge pages containing v and v+n-1, and 69 // only if those aren't aligned. 70 var head, tail uintptr 71 if uintptr(v)&(physHugePageSize-1) != 0 { 72 // Compute huge page containing v. 73 head = alignDown(uintptr(v), physHugePageSize) 74 } 75 if (uintptr(v)+n)&(physHugePageSize-1) != 0 { 76 // Compute huge page containing v+n-1. 77 tail = alignDown(uintptr(v)+n-1, physHugePageSize) 78 } 79 80 // Note that madvise will return EINVAL if the flag is 81 // already set, which is quite likely. We ignore 82 // errors. 83 if head != 0 && head+physHugePageSize == tail { 84 // head and tail are different but adjacent, 85 // so do this in one call. 86 madvise(unsafe.Pointer(head), 2*physHugePageSize, _MADV_NOHUGEPAGE) 87 } else { 88 // Advise the huge pages containing v and v+n-1. 89 if head != 0 { 90 madvise(unsafe.Pointer(head), physHugePageSize, _MADV_NOHUGEPAGE) 91 } 92 if tail != 0 && tail != head { 93 madvise(unsafe.Pointer(tail), physHugePageSize, _MADV_NOHUGEPAGE) 94 } 95 } 96 } 97 98 if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 { 99 // madvise will round this to any physical page 100 // *covered* by this range, so an unaligned madvise 101 // will release more memory than intended. 102 throw("unaligned sysUnused") 103 } 104 105 var advise uint32 106 if debug.madvdontneed != 0 { 107 advise = _MADV_DONTNEED 108 } else { 109 advise = atomic.Load(&adviseUnused) 110 } 111 if errno := madvise(v, n, int32(advise)); advise == _MADV_FREE && errno != 0 { 112 // MADV_FREE was added in Linux 4.5. Fall back to MADV_DONTNEED if it is 113 // not supported. 114 atomic.Store(&adviseUnused, _MADV_DONTNEED) 115 madvise(v, n, _MADV_DONTNEED) 116 } 117 } 118 119 func sysUsed(v unsafe.Pointer, n uintptr) { 120 // Partially undo the NOHUGEPAGE marks from sysUnused 121 // for whole huge pages between v and v+n. This may 122 // leave huge pages off at the end points v and v+n 123 // even though allocations may cover these entire huge 124 // pages. We could detect this and undo NOHUGEPAGE on 125 // the end points as well, but it's probably not worth 126 // the cost because when neighboring allocations are 127 // freed sysUnused will just set NOHUGEPAGE again. 128 sysHugePage(v, n) 129 } 130 131 func sysHugePage(v unsafe.Pointer, n uintptr) { 132 if physHugePageSize != 0 { 133 // Round v up to a huge page boundary. 134 beg := alignUp(uintptr(v), physHugePageSize) 135 // Round v+n down to a huge page boundary. 136 end := alignDown(uintptr(v)+n, physHugePageSize) 137 138 if beg < end { 139 madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE) 140 } 141 } 142 } 143 144 // Don't split the stack as this function may be invoked without a valid G, 145 // which prevents us from allocating more stack. 146 //go:nosplit 147 func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { 148 sysStat.add(-int64(n)) 149 munmap(v, n) 150 } 151 152 func sysFault(v unsafe.Pointer, n uintptr) { 153 mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0) 154 } 155 156 func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer { 157 p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 158 if err != 0 { 159 return nil 160 } 161 return p 162 } 163 164 func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { 165 sysStat.add(int64(n)) 166 167 p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) 168 if err == _ENOMEM { 169 throw("runtime: out of memory") 170 } 171 if p != v || err != 0 { 172 throw("runtime: cannot map pages in arena address space") 173 } 174 }