github.com/liujq9674git/golang-src-1.7@v0.0.0-20230517174348-17f6ec47f3f8/src/runtime/mem_linux.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "runtime/internal/sys" 9 "unsafe" 10 ) 11 12 const ( 13 _EACCES = 13 14 _EINVAL = 22 15 ) 16 17 // NOTE: vec must be just 1 byte long here. 18 // Mincore returns ENOMEM if any of the pages are unmapped, 19 // but we want to know that all of the pages are unmapped. 20 // To make these the same, we can only ask about one page 21 // at a time. See golang.org/issue/7476. 22 var addrspace_vec [1]byte 23 24 func addrspace_free(v unsafe.Pointer, n uintptr) bool { 25 // Step by the minimum possible physical page size. This is 26 // safe even if we have the wrong physical page size; mincore 27 // will just return EINVAL for unaligned addresses. 28 for off := uintptr(0); off < n; off += minPhysPageSize { 29 // Use a length of 1 byte, which the kernel will round 30 // up to one physical page regardless of the true 31 // physical page size. 32 errval := mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0]) 33 if errval == -_EINVAL { 34 // Address is not a multiple of the physical 35 // page size. That's fine. 36 continue 37 } 38 // ENOMEM means unmapped, which is what we want. 39 // Anything else we assume means the pages are mapped. 40 if errval != -_ENOMEM { 41 return false 42 } 43 } 44 return true 45 } 46 47 func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer { 48 p := mmap(v, n, prot, flags, fd, offset) 49 // On some systems, mmap ignores v without 50 // MAP_FIXED, so retry if the address space is free. 51 if p != v && addrspace_free(v, n) { 52 if uintptr(p) > 4096 { 53 munmap(p, n) 54 } 55 p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset) 56 } 57 return p 58 } 59 60 // Don't split the stack as this method may be invoked without a valid G, which 61 // prevents us from allocating more stack. 62 //go:nosplit 63 func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { 64 p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 65 if uintptr(p) < 4096 { 66 if uintptr(p) == _EACCES { 67 print("runtime: mmap: access denied\n") 68 exit(2) 69 } 70 if uintptr(p) == _EAGAIN { 71 print("runtime: mmap: too much locked memory (check 'ulimit -l').\n") 72 exit(2) 73 } 74 return nil 75 } 76 mSysStatInc(sysStat, n) 77 return p 78 } 79 80 func sysUnused(v unsafe.Pointer, n uintptr) { 81 // By default, Linux's "transparent huge page" support will 82 // merge pages into a huge page if there's even a single 83 // present regular page, undoing the effects of the DONTNEED 84 // below. On amd64, that means khugepaged can turn a single 85 // 4KB page to 2MB, bloating the process's RSS by as much as 86 // 512X. (See issue #8832 and Linux kernel bug 87 // https://bugzilla.kernel.org/show_bug.cgi?id=93111) 88 // 89 // To work around this, we explicitly disable transparent huge 90 // pages when we release pages of the heap. However, we have 91 // to do this carefully because changing this flag tends to 92 // split the VMA (memory mapping) containing v in to three 93 // VMAs in order to track the different values of the 94 // MADV_NOHUGEPAGE flag in the different regions. There's a 95 // default limit of 65530 VMAs per address space (sysctl 96 // vm.max_map_count), so we must be careful not to create too 97 // many VMAs (see issue #12233). 98 // 99 // Since huge pages are huge, there's little use in adjusting 100 // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid 101 // exploding the number of VMAs by only adjusting the 102 // MADV_NOHUGEPAGE flag on a large granularity. This still 103 // gets most of the benefit of huge pages while keeping the 104 // number of VMAs under control. With hugePageSize = 2MB, even 105 // a pessimal heap can reach 128GB before running out of VMAs. 106 if sys.HugePageSize != 0 { 107 var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :( 108 109 // If it's a large allocation, we want to leave huge 110 // pages enabled. Hence, we only adjust the huge page 111 // flag on the huge pages containing v and v+n-1, and 112 // only if those aren't aligned. 113 var head, tail uintptr 114 if uintptr(v)%s != 0 { 115 // Compute huge page containing v. 116 head = uintptr(v) &^ (s - 1) 117 } 118 if (uintptr(v)+n)%s != 0 { 119 // Compute huge page containing v+n-1. 120 tail = (uintptr(v) + n - 1) &^ (s - 1) 121 } 122 123 // Note that madvise will return EINVAL if the flag is 124 // already set, which is quite likely. We ignore 125 // errors. 126 if head != 0 && head+sys.HugePageSize == tail { 127 // head and tail are different but adjacent, 128 // so do this in one call. 129 madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE) 130 } else { 131 // Advise the huge pages containing v and v+n-1. 132 if head != 0 { 133 madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE) 134 } 135 if tail != 0 && tail != head { 136 madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE) 137 } 138 } 139 } 140 141 if uintptr(v)&(sys.PhysPageSize-1) != 0 || n&(sys.PhysPageSize-1) != 0 { 142 // madvise will round this to any physical page 143 // *covered* by this range, so an unaligned madvise 144 // will release more memory than intended. 145 throw("unaligned sysUnused") 146 } 147 148 madvise(v, n, _MADV_DONTNEED) 149 } 150 151 func sysUsed(v unsafe.Pointer, n uintptr) { 152 if sys.HugePageSize != 0 { 153 // Partially undo the NOHUGEPAGE marks from sysUnused 154 // for whole huge pages between v and v+n. This may 155 // leave huge pages off at the end points v and v+n 156 // even though allocations may cover these entire huge 157 // pages. We could detect this and undo NOHUGEPAGE on 158 // the end points as well, but it's probably not worth 159 // the cost because when neighboring allocations are 160 // freed sysUnused will just set NOHUGEPAGE again. 161 var s uintptr = sys.HugePageSize 162 163 // Round v up to a huge page boundary. 164 beg := (uintptr(v) + (s - 1)) &^ (s - 1) 165 // Round v+n down to a huge page boundary. 166 end := (uintptr(v) + n) &^ (s - 1) 167 168 if beg < end { 169 madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE) 170 } 171 } 172 } 173 174 // Don't split the stack as this function may be invoked without a valid G, 175 // which prevents us from allocating more stack. 176 //go:nosplit 177 func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { 178 mSysStatDec(sysStat, n) 179 munmap(v, n) 180 } 181 182 func sysFault(v unsafe.Pointer, n uintptr) { 183 mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0) 184 } 185 186 func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { 187 // On 64-bit, people with ulimit -v set complain if we reserve too 188 // much address space. Instead, assume that the reservation is okay 189 // if we can reserve at least 64K and check the assumption in SysMap. 190 // Only user-mode Linux (UML) rejects these requests. 191 if sys.PtrSize == 8 && uint64(n) > 1<<32 { 192 p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 193 if p != v { 194 if uintptr(p) >= 4096 { 195 munmap(p, 64<<10) 196 } 197 return nil 198 } 199 munmap(p, 64<<10) 200 *reserved = false 201 return v 202 } 203 204 p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 205 if uintptr(p) < 4096 { 206 return nil 207 } 208 *reserved = true 209 return p 210 } 211 212 func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { 213 mSysStatInc(sysStat, n) 214 215 // On 64-bit, we don't actually have v reserved, so tread carefully. 216 if !reserved { 217 p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 218 if uintptr(p) == _ENOMEM { 219 throw("runtime: out of memory") 220 } 221 if p != v { 222 print("runtime: address space conflict: map(", v, ") = ", p, "\n") 223 throw("runtime: address space conflict") 224 } 225 return 226 } 227 228 p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) 229 if uintptr(p) == _ENOMEM { 230 throw("runtime: out of memory") 231 } 232 if p != v { 233 throw("runtime: cannot map pages in arena address space") 234 } 235 }