github.com/liujq9674git/golang-src-1.7@v0.0.0-20230517174348-17f6ec47f3f8/src/runtime/mem_linux.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"runtime/internal/sys"
     9  	"unsafe"
    10  )
    11  
    12  const (
    13  	_EACCES = 13
    14  	_EINVAL = 22
    15  )
    16  
    17  // NOTE: vec must be just 1 byte long here.
    18  // Mincore returns ENOMEM if any of the pages are unmapped,
    19  // but we want to know that all of the pages are unmapped.
    20  // To make these the same, we can only ask about one page
    21  // at a time. See golang.org/issue/7476.
    22  var addrspace_vec [1]byte
    23  
    24  func addrspace_free(v unsafe.Pointer, n uintptr) bool {
    25  	// Step by the minimum possible physical page size. This is
    26  	// safe even if we have the wrong physical page size; mincore
    27  	// will just return EINVAL for unaligned addresses.
    28  	for off := uintptr(0); off < n; off += minPhysPageSize {
    29  		// Use a length of 1 byte, which the kernel will round
    30  		// up to one physical page regardless of the true
    31  		// physical page size.
    32  		errval := mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0])
    33  		if errval == -_EINVAL {
    34  			// Address is not a multiple of the physical
    35  			// page size. That's fine.
    36  			continue
    37  		}
    38  		// ENOMEM means unmapped, which is what we want.
    39  		// Anything else we assume means the pages are mapped.
    40  		if errval != -_ENOMEM {
    41  			return false
    42  		}
    43  	}
    44  	return true
    45  }
    46  
    47  func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer {
    48  	p := mmap(v, n, prot, flags, fd, offset)
    49  	// On some systems, mmap ignores v without
    50  	// MAP_FIXED, so retry if the address space is free.
    51  	if p != v && addrspace_free(v, n) {
    52  		if uintptr(p) > 4096 {
    53  			munmap(p, n)
    54  		}
    55  		p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
    56  	}
    57  	return p
    58  }
    59  
    60  // Don't split the stack as this method may be invoked without a valid G, which
    61  // prevents us from allocating more stack.
    62  //go:nosplit
    63  func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
    64  	p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    65  	if uintptr(p) < 4096 {
    66  		if uintptr(p) == _EACCES {
    67  			print("runtime: mmap: access denied\n")
    68  			exit(2)
    69  		}
    70  		if uintptr(p) == _EAGAIN {
    71  			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
    72  			exit(2)
    73  		}
    74  		return nil
    75  	}
    76  	mSysStatInc(sysStat, n)
    77  	return p
    78  }
    79  
    80  func sysUnused(v unsafe.Pointer, n uintptr) {
    81  	// By default, Linux's "transparent huge page" support will
    82  	// merge pages into a huge page if there's even a single
    83  	// present regular page, undoing the effects of the DONTNEED
    84  	// below. On amd64, that means khugepaged can turn a single
    85  	// 4KB page to 2MB, bloating the process's RSS by as much as
    86  	// 512X. (See issue #8832 and Linux kernel bug
    87  	// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
    88  	//
    89  	// To work around this, we explicitly disable transparent huge
    90  	// pages when we release pages of the heap. However, we have
    91  	// to do this carefully because changing this flag tends to
    92  	// split the VMA (memory mapping) containing v in to three
    93  	// VMAs in order to track the different values of the
    94  	// MADV_NOHUGEPAGE flag in the different regions. There's a
    95  	// default limit of 65530 VMAs per address space (sysctl
    96  	// vm.max_map_count), so we must be careful not to create too
    97  	// many VMAs (see issue #12233).
    98  	//
    99  	// Since huge pages are huge, there's little use in adjusting
   100  	// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
   101  	// exploding the number of VMAs by only adjusting the
   102  	// MADV_NOHUGEPAGE flag on a large granularity. This still
   103  	// gets most of the benefit of huge pages while keeping the
   104  	// number of VMAs under control. With hugePageSize = 2MB, even
   105  	// a pessimal heap can reach 128GB before running out of VMAs.
   106  	if sys.HugePageSize != 0 {
   107  		var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :(
   108  
   109  		// If it's a large allocation, we want to leave huge
   110  		// pages enabled. Hence, we only adjust the huge page
   111  		// flag on the huge pages containing v and v+n-1, and
   112  		// only if those aren't aligned.
   113  		var head, tail uintptr
   114  		if uintptr(v)%s != 0 {
   115  			// Compute huge page containing v.
   116  			head = uintptr(v) &^ (s - 1)
   117  		}
   118  		if (uintptr(v)+n)%s != 0 {
   119  			// Compute huge page containing v+n-1.
   120  			tail = (uintptr(v) + n - 1) &^ (s - 1)
   121  		}
   122  
   123  		// Note that madvise will return EINVAL if the flag is
   124  		// already set, which is quite likely. We ignore
   125  		// errors.
   126  		if head != 0 && head+sys.HugePageSize == tail {
   127  			// head and tail are different but adjacent,
   128  			// so do this in one call.
   129  			madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE)
   130  		} else {
   131  			// Advise the huge pages containing v and v+n-1.
   132  			if head != 0 {
   133  				madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE)
   134  			}
   135  			if tail != 0 && tail != head {
   136  				madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE)
   137  			}
   138  		}
   139  	}
   140  
   141  	if uintptr(v)&(sys.PhysPageSize-1) != 0 || n&(sys.PhysPageSize-1) != 0 {
   142  		// madvise will round this to any physical page
   143  		// *covered* by this range, so an unaligned madvise
   144  		// will release more memory than intended.
   145  		throw("unaligned sysUnused")
   146  	}
   147  
   148  	madvise(v, n, _MADV_DONTNEED)
   149  }
   150  
   151  func sysUsed(v unsafe.Pointer, n uintptr) {
   152  	if sys.HugePageSize != 0 {
   153  		// Partially undo the NOHUGEPAGE marks from sysUnused
   154  		// for whole huge pages between v and v+n. This may
   155  		// leave huge pages off at the end points v and v+n
   156  		// even though allocations may cover these entire huge
   157  		// pages. We could detect this and undo NOHUGEPAGE on
   158  		// the end points as well, but it's probably not worth
   159  		// the cost because when neighboring allocations are
   160  		// freed sysUnused will just set NOHUGEPAGE again.
   161  		var s uintptr = sys.HugePageSize
   162  
   163  		// Round v up to a huge page boundary.
   164  		beg := (uintptr(v) + (s - 1)) &^ (s - 1)
   165  		// Round v+n down to a huge page boundary.
   166  		end := (uintptr(v) + n) &^ (s - 1)
   167  
   168  		if beg < end {
   169  			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
   170  		}
   171  	}
   172  }
   173  
   174  // Don't split the stack as this function may be invoked without a valid G,
   175  // which prevents us from allocating more stack.
   176  //go:nosplit
   177  func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
   178  	mSysStatDec(sysStat, n)
   179  	munmap(v, n)
   180  }
   181  
   182  func sysFault(v unsafe.Pointer, n uintptr) {
   183  	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
   184  }
   185  
   186  func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
   187  	// On 64-bit, people with ulimit -v set complain if we reserve too
   188  	// much address space. Instead, assume that the reservation is okay
   189  	// if we can reserve at least 64K and check the assumption in SysMap.
   190  	// Only user-mode Linux (UML) rejects these requests.
   191  	if sys.PtrSize == 8 && uint64(n) > 1<<32 {
   192  		p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   193  		if p != v {
   194  			if uintptr(p) >= 4096 {
   195  				munmap(p, 64<<10)
   196  			}
   197  			return nil
   198  		}
   199  		munmap(p, 64<<10)
   200  		*reserved = false
   201  		return v
   202  	}
   203  
   204  	p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   205  	if uintptr(p) < 4096 {
   206  		return nil
   207  	}
   208  	*reserved = true
   209  	return p
   210  }
   211  
   212  func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
   213  	mSysStatInc(sysStat, n)
   214  
   215  	// On 64-bit, we don't actually have v reserved, so tread carefully.
   216  	if !reserved {
   217  		p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   218  		if uintptr(p) == _ENOMEM {
   219  			throw("runtime: out of memory")
   220  		}
   221  		if p != v {
   222  			print("runtime: address space conflict: map(", v, ") = ", p, "\n")
   223  			throw("runtime: address space conflict")
   224  		}
   225  		return
   226  	}
   227  
   228  	p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
   229  	if uintptr(p) == _ENOMEM {
   230  		throw("runtime: out of memory")
   231  	}
   232  	if p != v {
   233  		throw("runtime: cannot map pages in arena address space")
   234  	}
   235  }