github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/runtime/mem_linux.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import "unsafe" 8 9 const ( 10 _PAGE_SIZE = _PhysPageSize 11 _EACCES = 13 12 ) 13 14 // NOTE: vec must be just 1 byte long here. 15 // Mincore returns ENOMEM if any of the pages are unmapped, 16 // but we want to know that all of the pages are unmapped. 17 // To make these the same, we can only ask about one page 18 // at a time. See golang.org/issue/7476. 19 var addrspace_vec [1]byte 20 21 func addrspace_free(v unsafe.Pointer, n uintptr) bool { 22 var chunk uintptr 23 for off := uintptr(0); off < n; off += chunk { 24 chunk = _PAGE_SIZE * uintptr(len(addrspace_vec)) 25 if chunk > (n - off) { 26 chunk = n - off 27 } 28 errval := mincore(unsafe.Pointer(uintptr(v)+off), chunk, &addrspace_vec[0]) 29 // ENOMEM means unmapped, which is what we want. 30 // Anything else we assume means the pages are mapped. 31 if errval != -_ENOMEM { 32 return false 33 } 34 } 35 return true 36 } 37 38 func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer { 39 p := mmap(v, n, prot, flags, fd, offset) 40 // On some systems, mmap ignores v without 41 // MAP_FIXED, so retry if the address space is free. 42 if p != v && addrspace_free(v, n) { 43 if uintptr(p) > 4096 { 44 munmap(p, n) 45 } 46 p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset) 47 } 48 return p 49 } 50 51 // Don't split the stack as this method may be invoked without a valid G, which 52 // prevents us from allocating more stack. 53 //go:nosplit 54 func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { 55 p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 56 if uintptr(p) < 4096 { 57 if uintptr(p) == _EACCES { 58 print("runtime: mmap: access denied\n") 59 exit(2) 60 } 61 if uintptr(p) == _EAGAIN { 62 print("runtime: mmap: too much locked memory (check 'ulimit -l').\n") 63 exit(2) 64 } 65 return nil 66 } 67 mSysStatInc(sysStat, n) 68 return p 69 } 70 71 func sysUnused(v unsafe.Pointer, n uintptr) { 72 // By default, Linux's "transparent huge page" support will 73 // merge pages into a huge page if there's even a single 74 // present regular page, undoing the effects of the DONTNEED 75 // below. On amd64, that means khugepaged can turn a single 76 // 4KB page to 2MB, bloating the process's RSS by as much as 77 // 512X. (See issue #8832 and Linux kernel bug 78 // https://bugzilla.kernel.org/show_bug.cgi?id=93111) 79 // 80 // To work around this, we explicitly disable transparent huge 81 // pages when we release pages of the heap. However, we have 82 // to do this carefully because changing this flag tends to 83 // split the VMA (memory mapping) containing v in to three 84 // VMAs in order to track the different values of the 85 // MADV_NOHUGEPAGE flag in the different regions. There's a 86 // default limit of 65530 VMAs per address space (sysctl 87 // vm.max_map_count), so we must be careful not to create too 88 // many VMAs (see issue #12233). 89 // 90 // Since huge pages are huge, there's little use in adjusting 91 // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid 92 // exploding the number of VMAs by only adjusting the 93 // MADV_NOHUGEPAGE flag on a large granularity. This still 94 // gets most of the benefit of huge pages while keeping the 95 // number of VMAs under control. With hugePageSize = 2MB, even 96 // a pessimal heap can reach 128GB before running out of VMAs. 97 if hugePageSize != 0 { 98 var s uintptr = hugePageSize // division by constant 0 is a compile-time error :( 99 100 // If it's a large allocation, we want to leave huge 101 // pages enabled. Hence, we only adjust the huge page 102 // flag on the huge pages containing v and v+n-1, and 103 // only if those aren't aligned. 104 var head, tail uintptr 105 if uintptr(v)%s != 0 { 106 // Compute huge page containing v. 107 head = uintptr(v) &^ (s - 1) 108 } 109 if (uintptr(v)+n)%s != 0 { 110 // Compute huge page containing v+n-1. 111 tail = (uintptr(v) + n - 1) &^ (s - 1) 112 } 113 114 // Note that madvise will return EINVAL if the flag is 115 // already set, which is quite likely. We ignore 116 // errors. 117 if head != 0 && head+hugePageSize == tail { 118 // head and tail are different but adjacent, 119 // so do this in one call. 120 madvise(unsafe.Pointer(head), 2*hugePageSize, _MADV_NOHUGEPAGE) 121 } else { 122 // Advise the huge pages containing v and v+n-1. 123 if head != 0 { 124 madvise(unsafe.Pointer(head), hugePageSize, _MADV_NOHUGEPAGE) 125 } 126 if tail != 0 && tail != head { 127 madvise(unsafe.Pointer(tail), hugePageSize, _MADV_NOHUGEPAGE) 128 } 129 } 130 } 131 132 madvise(v, n, _MADV_DONTNEED) 133 } 134 135 func sysUsed(v unsafe.Pointer, n uintptr) { 136 if hugePageSize != 0 { 137 // Partially undo the NOHUGEPAGE marks from sysUnused 138 // for whole huge pages between v and v+n. This may 139 // leave huge pages off at the end points v and v+n 140 // even though allocations may cover these entire huge 141 // pages. We could detect this and undo NOHUGEPAGE on 142 // the end points as well, but it's probably not worth 143 // the cost because when neighboring allocations are 144 // freed sysUnused will just set NOHUGEPAGE again. 145 var s uintptr = hugePageSize 146 147 // Round v up to a huge page boundary. 148 beg := (uintptr(v) + (s - 1)) &^ (s - 1) 149 // Round v+n down to a huge page boundary. 150 end := (uintptr(v) + n) &^ (s - 1) 151 152 if beg < end { 153 madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE) 154 } 155 } 156 } 157 158 // Don't split the stack as this function may be invoked without a valid G, 159 // which prevents us from allocating more stack. 160 //go:nosplit 161 func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { 162 mSysStatDec(sysStat, n) 163 munmap(v, n) 164 } 165 166 func sysFault(v unsafe.Pointer, n uintptr) { 167 mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0) 168 } 169 170 func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { 171 // On 64-bit, people with ulimit -v set complain if we reserve too 172 // much address space. Instead, assume that the reservation is okay 173 // if we can reserve at least 64K and check the assumption in SysMap. 174 // Only user-mode Linux (UML) rejects these requests. 175 if ptrSize == 8 && uint64(n) > 1<<32 { 176 p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 177 if p != v { 178 if uintptr(p) >= 4096 { 179 munmap(p, 64<<10) 180 } 181 return nil 182 } 183 munmap(p, 64<<10) 184 *reserved = false 185 return v 186 } 187 188 p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 189 if uintptr(p) < 4096 { 190 return nil 191 } 192 *reserved = true 193 return p 194 } 195 196 func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { 197 mSysStatInc(sysStat, n) 198 199 // On 64-bit, we don't actually have v reserved, so tread carefully. 200 if !reserved { 201 p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) 202 if uintptr(p) == _ENOMEM { 203 throw("runtime: out of memory") 204 } 205 if p != v { 206 print("runtime: address space conflict: map(", v, ") = ", p, "\n") 207 throw("runtime: address space conflict") 208 } 209 return 210 } 211 212 p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) 213 if uintptr(p) == _ENOMEM { 214 throw("runtime: out of memory") 215 } 216 if p != v { 217 throw("runtime: cannot map pages in arena address space") 218 } 219 }