github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/runtime/mem_linux.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"runtime/internal/sys"
     9  	"unsafe"
    10  )
    11  
    12  const (
    13  	_PAGE_SIZE = sys.PhysPageSize
    14  	_EACCES    = 13
    15  )
    16  
    17  // NOTE: vec must be just 1 byte long here.
    18  // Mincore returns ENOMEM if any of the pages are unmapped,
    19  // but we want to know that all of the pages are unmapped.
    20  // To make these the same, we can only ask about one page
    21  // at a time. See golang.org/issue/7476.
    22  var addrspace_vec [1]byte
    23  
    24  func addrspace_free(v unsafe.Pointer, n uintptr) bool {
    25  	var chunk uintptr
    26  	for off := uintptr(0); off < n; off += chunk {
    27  		chunk = _PAGE_SIZE * uintptr(len(addrspace_vec))
    28  		if chunk > (n - off) {
    29  			chunk = n - off
    30  		}
    31  		errval := mincore(unsafe.Pointer(uintptr(v)+off), chunk, &addrspace_vec[0])
    32  		// ENOMEM means unmapped, which is what we want.
    33  		// Anything else we assume means the pages are mapped.
    34  		if errval != -_ENOMEM {
    35  			return false
    36  		}
    37  	}
    38  	return true
    39  }
    40  
    41  func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer {
    42  	p := mmap(v, n, prot, flags, fd, offset)
    43  	// On some systems, mmap ignores v without
    44  	// MAP_FIXED, so retry if the address space is free.
    45  	if p != v && addrspace_free(v, n) {
    46  		if uintptr(p) > 4096 {
    47  			munmap(p, n)
    48  		}
    49  		p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
    50  	}
    51  	return p
    52  }
    53  
    54  // Don't split the stack as this method may be invoked without a valid G, which
    55  // prevents us from allocating more stack.
    56  //go:nosplit
    57  func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
    58  	p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    59  	if uintptr(p) < 4096 {
    60  		if uintptr(p) == _EACCES {
    61  			print("runtime: mmap: access denied\n")
    62  			exit(2)
    63  		}
    64  		if uintptr(p) == _EAGAIN {
    65  			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
    66  			exit(2)
    67  		}
    68  		return nil
    69  	}
    70  	mSysStatInc(sysStat, n)
    71  	return p
    72  }
    73  
    74  func sysUnused(v unsafe.Pointer, n uintptr) {
    75  	// By default, Linux's "transparent huge page" support will
    76  	// merge pages into a huge page if there's even a single
    77  	// present regular page, undoing the effects of the DONTNEED
    78  	// below. On amd64, that means khugepaged can turn a single
    79  	// 4KB page to 2MB, bloating the process's RSS by as much as
    80  	// 512X. (See issue #8832 and Linux kernel bug
    81  	// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
    82  	//
    83  	// To work around this, we explicitly disable transparent huge
    84  	// pages when we release pages of the heap. However, we have
    85  	// to do this carefully because changing this flag tends to
    86  	// split the VMA (memory mapping) containing v in to three
    87  	// VMAs in order to track the different values of the
    88  	// MADV_NOHUGEPAGE flag in the different regions. There's a
    89  	// default limit of 65530 VMAs per address space (sysctl
    90  	// vm.max_map_count), so we must be careful not to create too
    91  	// many VMAs (see issue #12233).
    92  	//
    93  	// Since huge pages are huge, there's little use in adjusting
    94  	// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
    95  	// exploding the number of VMAs by only adjusting the
    96  	// MADV_NOHUGEPAGE flag on a large granularity. This still
    97  	// gets most of the benefit of huge pages while keeping the
    98  	// number of VMAs under control. With hugePageSize = 2MB, even
    99  	// a pessimal heap can reach 128GB before running out of VMAs.
   100  	if sys.HugePageSize != 0 {
   101  		var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :(
   102  
   103  		// If it's a large allocation, we want to leave huge
   104  		// pages enabled. Hence, we only adjust the huge page
   105  		// flag on the huge pages containing v and v+n-1, and
   106  		// only if those aren't aligned.
   107  		var head, tail uintptr
   108  		if uintptr(v)%s != 0 {
   109  			// Compute huge page containing v.
   110  			head = uintptr(v) &^ (s - 1)
   111  		}
   112  		if (uintptr(v)+n)%s != 0 {
   113  			// Compute huge page containing v+n-1.
   114  			tail = (uintptr(v) + n - 1) &^ (s - 1)
   115  		}
   116  
   117  		// Note that madvise will return EINVAL if the flag is
   118  		// already set, which is quite likely. We ignore
   119  		// errors.
   120  		if head != 0 && head+sys.HugePageSize == tail {
   121  			// head and tail are different but adjacent,
   122  			// so do this in one call.
   123  			madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE)
   124  		} else {
   125  			// Advise the huge pages containing v and v+n-1.
   126  			if head != 0 {
   127  				madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE)
   128  			}
   129  			if tail != 0 && tail != head {
   130  				madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE)
   131  			}
   132  		}
   133  	}
   134  
   135  	madvise(v, n, _MADV_DONTNEED)
   136  }
   137  
   138  func sysUsed(v unsafe.Pointer, n uintptr) {
   139  	if sys.HugePageSize != 0 {
   140  		// Partially undo the NOHUGEPAGE marks from sysUnused
   141  		// for whole huge pages between v and v+n. This may
   142  		// leave huge pages off at the end points v and v+n
   143  		// even though allocations may cover these entire huge
   144  		// pages. We could detect this and undo NOHUGEPAGE on
   145  		// the end points as well, but it's probably not worth
   146  		// the cost because when neighboring allocations are
   147  		// freed sysUnused will just set NOHUGEPAGE again.
   148  		var s uintptr = sys.HugePageSize
   149  
   150  		// Round v up to a huge page boundary.
   151  		beg := (uintptr(v) + (s - 1)) &^ (s - 1)
   152  		// Round v+n down to a huge page boundary.
   153  		end := (uintptr(v) + n) &^ (s - 1)
   154  
   155  		if beg < end {
   156  			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
   157  		}
   158  	}
   159  }
   160  
   161  // Don't split the stack as this function may be invoked without a valid G,
   162  // which prevents us from allocating more stack.
   163  //go:nosplit
   164  func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
   165  	mSysStatDec(sysStat, n)
   166  	munmap(v, n)
   167  }
   168  
   169  func sysFault(v unsafe.Pointer, n uintptr) {
   170  	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
   171  }
   172  
   173  func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
   174  	// On 64-bit, people with ulimit -v set complain if we reserve too
   175  	// much address space. Instead, assume that the reservation is okay
   176  	// if we can reserve at least 64K and check the assumption in SysMap.
   177  	// Only user-mode Linux (UML) rejects these requests.
   178  	if sys.PtrSize == 8 && uint64(n) > 1<<32 {
   179  		p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   180  		if p != v {
   181  			if uintptr(p) >= 4096 {
   182  				munmap(p, 64<<10)
   183  			}
   184  			return nil
   185  		}
   186  		munmap(p, 64<<10)
   187  		*reserved = false
   188  		return v
   189  	}
   190  
   191  	p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   192  	if uintptr(p) < 4096 {
   193  		return nil
   194  	}
   195  	*reserved = true
   196  	return p
   197  }
   198  
   199  func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
   200  	mSysStatInc(sysStat, n)
   201  
   202  	// On 64-bit, we don't actually have v reserved, so tread carefully.
   203  	if !reserved {
   204  		p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   205  		if uintptr(p) == _ENOMEM {
   206  			throw("runtime: out of memory")
   207  		}
   208  		if p != v {
   209  			print("runtime: address space conflict: map(", v, ") = ", p, "\n")
   210  			throw("runtime: address space conflict")
   211  		}
   212  		return
   213  	}
   214  
   215  	p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
   216  	if uintptr(p) == _ENOMEM {
   217  		throw("runtime: out of memory")
   218  	}
   219  	if p != v {
   220  		throw("runtime: cannot map pages in arena address space")
   221  	}
   222  }