github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/runtime/mem_linux.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"runtime/internal/sys"
     9  	"unsafe"
    10  )
    11  
    12  const (
    13  	_EACCES = 13
    14  	_EINVAL = 22
    15  )
    16  
    17  // NOTE: vec must be just 1 byte long here.
    18  // Mincore returns ENOMEM if any of the pages are unmapped,
    19  // but we want to know that all of the pages are unmapped.
    20  // To make these the same, we can only ask about one page
    21  // at a time. See golang.org/issue/7476.
    22  var addrspace_vec [1]byte
    23  
    24  func addrspace_free(v unsafe.Pointer, n uintptr) bool {
    25  	for off := uintptr(0); off < n; off += physPageSize {
    26  		// Use a length of 1 byte, which the kernel will round
    27  		// up to one physical page regardless of the true
    28  		// physical page size.
    29  		errval := mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0])
    30  		if errval == -_EINVAL {
    31  			// Address is not a multiple of the physical
    32  			// page size. Shouldn't happen, but just ignore it.
    33  			continue
    34  		}
    35  		// ENOMEM means unmapped, which is what we want.
    36  		// Anything else we assume means the pages are mapped.
    37  		if errval != -_ENOMEM {
    38  			return false
    39  		}
    40  	}
    41  	return true
    42  }
    43  
    44  func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer {
    45  	p := mmap(v, n, prot, flags, fd, offset)
    46  	// On some systems, mmap ignores v without
    47  	// MAP_FIXED, so retry if the address space is free.
    48  	if p != v && addrspace_free(v, n) {
    49  		if uintptr(p) > 4096 {
    50  			munmap(p, n)
    51  		}
    52  		p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
    53  	}
    54  	return p
    55  }
    56  
    57  // Don't split the stack as this method may be invoked without a valid G, which
    58  // prevents us from allocating more stack.
    59  //go:nosplit
    60  func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
    61  	p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    62  	if uintptr(p) < 4096 {
    63  		if uintptr(p) == _EACCES {
    64  			print("runtime: mmap: access denied\n")
    65  			exit(2)
    66  		}
    67  		if uintptr(p) == _EAGAIN {
    68  			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
    69  			exit(2)
    70  		}
    71  		return nil
    72  	}
    73  	mSysStatInc(sysStat, n)
    74  	return p
    75  }
    76  
    77  func sysUnused(v unsafe.Pointer, n uintptr) {
    78  	// By default, Linux's "transparent huge page" support will
    79  	// merge pages into a huge page if there's even a single
    80  	// present regular page, undoing the effects of the DONTNEED
    81  	// below. On amd64, that means khugepaged can turn a single
    82  	// 4KB page to 2MB, bloating the process's RSS by as much as
    83  	// 512X. (See issue #8832 and Linux kernel bug
    84  	// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
    85  	//
    86  	// To work around this, we explicitly disable transparent huge
    87  	// pages when we release pages of the heap. However, we have
    88  	// to do this carefully because changing this flag tends to
    89  	// split the VMA (memory mapping) containing v in to three
    90  	// VMAs in order to track the different values of the
    91  	// MADV_NOHUGEPAGE flag in the different regions. There's a
    92  	// default limit of 65530 VMAs per address space (sysctl
    93  	// vm.max_map_count), so we must be careful not to create too
    94  	// many VMAs (see issue #12233).
    95  	//
    96  	// Since huge pages are huge, there's little use in adjusting
    97  	// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
    98  	// exploding the number of VMAs by only adjusting the
    99  	// MADV_NOHUGEPAGE flag on a large granularity. This still
   100  	// gets most of the benefit of huge pages while keeping the
   101  	// number of VMAs under control. With hugePageSize = 2MB, even
   102  	// a pessimal heap can reach 128GB before running out of VMAs.
   103  	if sys.HugePageSize != 0 {
   104  		var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :(
   105  
   106  		// If it's a large allocation, we want to leave huge
   107  		// pages enabled. Hence, we only adjust the huge page
   108  		// flag on the huge pages containing v and v+n-1, and
   109  		// only if those aren't aligned.
   110  		var head, tail uintptr
   111  		if uintptr(v)%s != 0 {
   112  			// Compute huge page containing v.
   113  			head = uintptr(v) &^ (s - 1)
   114  		}
   115  		if (uintptr(v)+n)%s != 0 {
   116  			// Compute huge page containing v+n-1.
   117  			tail = (uintptr(v) + n - 1) &^ (s - 1)
   118  		}
   119  
   120  		// Note that madvise will return EINVAL if the flag is
   121  		// already set, which is quite likely. We ignore
   122  		// errors.
   123  		if head != 0 && head+sys.HugePageSize == tail {
   124  			// head and tail are different but adjacent,
   125  			// so do this in one call.
   126  			madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE)
   127  		} else {
   128  			// Advise the huge pages containing v and v+n-1.
   129  			if head != 0 {
   130  				madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE)
   131  			}
   132  			if tail != 0 && tail != head {
   133  				madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE)
   134  			}
   135  		}
   136  	}
   137  
   138  	if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
   139  		// madvise will round this to any physical page
   140  		// *covered* by this range, so an unaligned madvise
   141  		// will release more memory than intended.
   142  		throw("unaligned sysUnused")
   143  	}
   144  
   145  	madvise(v, n, _MADV_DONTNEED)
   146  }
   147  
   148  func sysUsed(v unsafe.Pointer, n uintptr) {
   149  	if sys.HugePageSize != 0 {
   150  		// Partially undo the NOHUGEPAGE marks from sysUnused
   151  		// for whole huge pages between v and v+n. This may
   152  		// leave huge pages off at the end points v and v+n
   153  		// even though allocations may cover these entire huge
   154  		// pages. We could detect this and undo NOHUGEPAGE on
   155  		// the end points as well, but it's probably not worth
   156  		// the cost because when neighboring allocations are
   157  		// freed sysUnused will just set NOHUGEPAGE again.
   158  		var s uintptr = sys.HugePageSize
   159  
   160  		// Round v up to a huge page boundary.
   161  		beg := (uintptr(v) + (s - 1)) &^ (s - 1)
   162  		// Round v+n down to a huge page boundary.
   163  		end := (uintptr(v) + n) &^ (s - 1)
   164  
   165  		if beg < end {
   166  			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
   167  		}
   168  	}
   169  }
   170  
   171  // Don't split the stack as this function may be invoked without a valid G,
   172  // which prevents us from allocating more stack.
   173  //go:nosplit
   174  func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
   175  	mSysStatDec(sysStat, n)
   176  	munmap(v, n)
   177  }
   178  
   179  func sysFault(v unsafe.Pointer, n uintptr) {
   180  	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
   181  }
   182  
   183  func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
   184  	// On 64-bit, people with ulimit -v set complain if we reserve too
   185  	// much address space. Instead, assume that the reservation is okay
   186  	// if we can reserve at least 64K and check the assumption in SysMap.
   187  	// Only user-mode Linux (UML) rejects these requests.
   188  	if sys.PtrSize == 8 && uint64(n) > 1<<32 {
   189  		p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   190  		if p != v {
   191  			if uintptr(p) >= 4096 {
   192  				munmap(p, 64<<10)
   193  			}
   194  			return nil
   195  		}
   196  		munmap(p, 64<<10)
   197  		*reserved = false
   198  		return v
   199  	}
   200  
   201  	p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   202  	if uintptr(p) < 4096 {
   203  		return nil
   204  	}
   205  	*reserved = true
   206  	return p
   207  }
   208  
   209  func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
   210  	mSysStatInc(sysStat, n)
   211  
   212  	// On 64-bit, we don't actually have v reserved, so tread carefully.
   213  	if !reserved {
   214  		p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   215  		if uintptr(p) == _ENOMEM {
   216  			throw("runtime: out of memory")
   217  		}
   218  		if p != v {
   219  			print("runtime: address space conflict: map(", v, ") = ", p, "\n")
   220  			throw("runtime: address space conflict")
   221  		}
   222  		return
   223  	}
   224  
   225  	p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
   226  	if uintptr(p) == _ENOMEM {
   227  		throw("runtime: out of memory")
   228  	}
   229  	if p != v {
   230  		throw("runtime: cannot map pages in arena address space")
   231  	}
   232  }