github.com/lrita/numa@v1.0.2/numa_linux.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  package numa
     5  
     6  import (
     7  	"fmt"
     8  	"io/ioutil"
     9  	"strconv"
    10  	"strings"
    11  	"syscall"
    12  	"unsafe"
    13  )
    14  
    15  func init() {
    16  	_, _, e1 := syscall.Syscall6(syscall.SYS_GET_MEMPOLICY, 0, 0, 0, 0, 0, 0)
    17  	available = e1 != syscall.ENOSYS
    18  	nnodemax = setupnodemask() // max nodes
    19  	memnodes = NewBitmask(NodePossibleCount())
    20  	numanodes = NewBitmask(NodePossibleCount())
    21  	nconfigurednode = setupconfigurednodes() // configured nodes
    22  	ncpumax = setupncpu()                    // max cpu
    23  	nconfiguredcpu = setupnconfiguredcpu()   // configured cpu
    24  	setupconstraints()
    25  }
    26  
    27  // GetMemPolicy retrieves the NUMA policy of the calling process or of a
    28  // memory address, depending on the setting of flags.
    29  // Details to see manpage of get_mempolicy.
    30  //
    31  // If flags is specified as 0, then information about the calling process's
    32  // default policy (as set by set_mempolicy(2)) is returned. The policy
    33  // returned [mode and nodemask] may be used to restore the process's policy
    34  // to its state at the time of the call to get_mempolicy() using set_mempolicy(2).
    35  //
    36  // If flags specifies MPOL_F_MEMS_ALLOWED (available since Linux 2.6.24),
    37  // the mode argument is ignored and the set of nodes [memories] that the
    38  // process is allowed to specify in subsequent calls to mbind(2) or
    39  // set_mempolicy(2) [in the absence of any mode flags] is returned in
    40  // nodemask. It is not permitted to combine MPOL_F_MEMS_ALLOWED with
    41  // either MPOL_F_ADDR or MPOL_F_NODE.
    42  //
    43  // If flags specifies MPOL_F_ADDR, then information is returned about the
    44  // policy governing the memory address given in addr. This policy may be
    45  // different from the process's default policy if mbind(2) or one of the
    46  // helper functions described in numa(3) has been used to establish a policy
    47  // for the memory range containing addr.
    48  //
    49  // If flags specifies both MPOL_F_NODE and MPOL_F_ADDR, get_mempolicy() will
    50  // return the node ID of the node on which the address addr is allocated into
    51  // the location pointed to by mode. If no page has yet been allocated for the
    52  // specified address, get_mempolicy() will allocate a page as if the process
    53  // had performed a read [load] access to that address, and return the ID of
    54  // the node where that page was allocated.
    55  //
    56  // If flags specifies MPOL_F_NODE, but not MPOL_F_ADDR, and the process's
    57  // current policy is MPOL_INTERLEAVE, then get_mempolicy() will return in
    58  // the location pointed to by a non-NULL mode argument, the node ID of the
    59  // next node that will be used for interleaving of internal kernel pages
    60  // allocated on behalf of the process. These allocations include pages for
    61  // memory mapped files in process memory ranges mapped using the mmap(2)
    62  // call with the MAP_PRIVATE flag for read accesses, and in memory ranges
    63  // mapped with the MAP_SHARED flag for all accesses.
    64  func GetMemPolicy(nodemask Bitmask, addr unsafe.Pointer, flags int) (mode int, err error) {
    65  	var mask, maxnode uintptr
    66  	if maxnode = uintptr(nodemask.Len()); maxnode != 0 {
    67  		mask = uintptr(unsafe.Pointer(&nodemask[0]))
    68  	}
    69  	_, _, errno := syscall.Syscall6(syscall.SYS_GET_MEMPOLICY,
    70  		uintptr(unsafe.Pointer(&mode)), mask, maxnode,
    71  		uintptr(addr), uintptr(flags), 0)
    72  	if errno != 0 {
    73  		err = errno
    74  	}
    75  	return
    76  }
    77  
    78  // SetMemPolicy sets the NUMA memory policy of the calling process, which
    79  // consists of a policy mode and zero or more nodes, to the values specified
    80  // by the mode, nodemask and maxnode arguments.
    81  // Details to see manpage of set_mempolicy.
    82  //
    83  // A NUMA machine has different memory controllers with different distances
    84  // to specific CPUs. The memory policy defines from which node memory is
    85  // allocated for the process.
    86  
    87  // This system call defines the default policy for the process. The process
    88  // policy governs allocation of pages in the process's address space outside
    89  // of memory ranges controlled by a more specific policy set by mbind(2). The
    90  // process default policy also controls allocation of any pages for memory
    91  // mapped files mapped using the mmap(2) call with the MAP_PRIVATE flag and
    92  // that are only read [loaded] from by the process and of memory mapped files
    93  // mapped using the mmap(2) call with the MAP_SHARED flag, regardless of the
    94  // access type. The policy is applied only when a new page is allocated for the
    95  // process. For anonymous memory this is when the page is first touched by the
    96  // application.
    97  //
    98  // The mode argument must specify one of MPOL_DEFAULT, MPOL_BIND,
    99  // MPOL_INTERLEAVE or MPOL_PREFERRED.
   100  // All modes except MPOL_DEFAULT require the caller to specify via the nodemask
   101  // argument one or more nodes.
   102  //
   103  // The mode argument may also include an optional mode flag. The supported mode
   104  // flags are: MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES.
   105  //
   106  // Where a nodemask is required, it must contain at least one node that is
   107  // on-line, allowed by the process's current cpuset context,
   108  // [unless the MPOL_F_STATIC_NODES mode flag is specified], and contains memory.
   109  // If the MPOL_F_STATIC_NODES is set in mode and a required nodemask contains
   110  // no nodes that are allowed by the process's current cpuset context, the memory
   111  // policy reverts to local  allocation. This effectively overrides the
   112  // specified policy until the process's cpuset context includes one or more of
   113  // the nodes specified by nodemask.
   114  func SetMemPolicy(mode int, nodemask Bitmask) (err error) {
   115  	var mask, maxnode uintptr
   116  	if maxnode = uintptr(nodemask.Len()); maxnode != 0 {
   117  		mask = uintptr(unsafe.Pointer(&nodemask[0]))
   118  	}
   119  	_, _, errno := syscall.Syscall(syscall.SYS_SET_MEMPOLICY,
   120  		uintptr(mode), mask, maxnode)
   121  	if errno != 0 {
   122  		err = errno
   123  	}
   124  	return
   125  }
   126  
   127  // MBind sets the NUMA memory policy, which consists of a policy mode and zero
   128  // or more nodes, for the memory range starting with addr and continuing for
   129  // length bytes. The memory policy defines from which node memory is allocated.
   130  // Details to see manpage of mbind.
   131  //
   132  // If the memory range specified by the addr and length arguments includes an
   133  // "anonymous" region of memory that is a region of memory created using the
   134  // mmap(2) system call with the MAP_ANONYMOUS or a memory mapped file, mapped
   135  // using the mmap(2) system call with the MAP_PRIVATE flag, pages will be
   136  // allocated only according to the specified policy when the application writes
   137  // [stores] to the page. For anonymous regions, an initial read access will
   138  // use a shared page in the kernel containing all zeros. For a file mapped with
   139  // MAP_PRIVATE, an initial read access will allocate pages according to the
   140  // process policy of the process that causes the page to be allocated. This may
   141  // not be the process that called mbind().
   142  //
   143  // The specified policy will be ignored for any MAP_SHARED mappings in the
   144  // specified memory range. Rather the pages will be allocated according to the
   145  // process policy of the process that caused the page to be allocated. Again,
   146  // this may not be the process that called mbind().
   147  //
   148  // If the specified memory range includes a shared memory region created using
   149  // the shmget(2) system call and attached using the shmat(2) system call, pages
   150  // allocated for the anonymous or shared memory region will be allocated
   151  // according to the policy specified, regardless which process attached to the
   152  // shared  memory segment causes the allocation. If, however, the shared memory
   153  // region was created with the SHM_HUGETLB flag, the huge pages will be
   154  // allocated according to the policy specified only if the page allocation is
   155  // caused by the process that calls mbind() for that region.
   156  //
   157  // By default, mbind() has an effect only for new allocations; if the pages
   158  // inside the range have been already touched before setting the policy, then
   159  // the policy has no effect. This default behavior may be overridden by the
   160  // MPOL_MF_MOVE and MPOL_MF_MOVE_ALL flags described below.
   161  func MBind(addr unsafe.Pointer, length, mode, flags int, nodemask Bitmask) (err error) {
   162  	var mask, maxnode uintptr
   163  	if maxnode = uintptr(nodemask.Len()); maxnode != 0 {
   164  		mask = uintptr(unsafe.Pointer(&nodemask[0]))
   165  	}
   166  	_, _, errno := syscall.Syscall6(syscall.SYS_MBIND, uintptr(addr),
   167  		uintptr(length), uintptr(mode), mask, maxnode, uintptr(flags))
   168  	if errno != 0 {
   169  		err = errno
   170  	}
   171  	return
   172  }
   173  
   174  // GetSchedAffinity writes the affinity mask of the process whose ID is pid
   175  // into the input mask. If pid is zero, then the mask of the calling process
   176  // is returned.
   177  func GetSchedAffinity(pid int, cpumask Bitmask) (int, error) {
   178  	var mask, maxnode uintptr
   179  	if maxnode = uintptr(cpumask.Len() / 8); maxnode != 0 {
   180  		mask = uintptr(unsafe.Pointer(&cpumask[0]))
   181  	}
   182  	len, _, e1 := syscall.Syscall(syscall.SYS_SCHED_GETAFFINITY,
   183  		uintptr(pid), maxnode, mask)
   184  	if e1 != 0 {
   185  		return 0, e1
   186  	}
   187  	return int(len), nil
   188  }
   189  
   190  // SetSchedAffinity sets the CPU affinity mask of the process whose ID
   191  // is pid to the value specified by mask. If pid is zero, then the calling
   192  // process is used.
   193  func SetSchedAffinity(pid int, cpumask Bitmask) error {
   194  	var mask, maxnode uintptr
   195  	if maxnode = uintptr(cpumask.Len() / 8); maxnode != 0 {
   196  		mask = uintptr(unsafe.Pointer(&cpumask[0]))
   197  	}
   198  	_, _, e1 := syscall.Syscall(syscall.SYS_SCHED_SETAFFINITY,
   199  		uintptr(pid), maxnode, mask)
   200  	if e1 != 0 {
   201  		return e1
   202  	}
   203  	return nil
   204  }
   205  
   206  /*
   207   * (do this the way Paul Jackson's libcpuset does it)
   208   * The nodemask values in /proc/self/status are in an
   209   * ascii format that uses 9 characters for each 32 bits of mask.
   210   * (this could also be used to find the cpumask size)
   211   */
   212  func setupnodemask() (n int) {
   213  	d, err := ioutil.ReadFile("/proc/self/status")
   214  	if err == nil {
   215  		const stp = "Mems_allowed:\t"
   216  		for _, line := range strings.Split(string(d), "\n") {
   217  			if !strings.HasPrefix(line, stp) {
   218  				continue
   219  			}
   220  			n = (len(line) - len(stp) + 1) * 32 / 9
   221  		}
   222  	}
   223  	if n == 0 {
   224  		n = 16
   225  		for n < 4096*8 {
   226  			n <<= 1
   227  			mask := NewBitmask(n)
   228  			if _, err := GetMemPolicy(mask, nil, 0); err != nil && err != syscall.EINVAL {
   229  				break
   230  			}
   231  		}
   232  	}
   233  	return
   234  }
   235  
   236  func setupconfigurednodes() (n int) {
   237  	files, err := ioutil.ReadDir("/sys/devices/system/node")
   238  	if err != nil {
   239  		return 1
   240  	}
   241  	for _, f := range files {
   242  		if !strings.HasPrefix(f.Name(), "node") {
   243  			continue
   244  		}
   245  		i, _ := strconv.Atoi(f.Name()[4:])
   246  		if n < i {
   247  			n = i // maybe some node absence
   248  		}
   249  		numanodes.Set(i, true)
   250  		if _, _, err := NodeMemSize64(i); err == nil {
   251  			memnodes.Set(i, true)
   252  		}
   253  	}
   254  	n++
   255  	return
   256  }
   257  
   258  func setupncpu() (n int) {
   259  	length := 4096
   260  	for {
   261  		mask := NewBitmask(length)
   262  		nn, err := GetSchedAffinity(0, mask)
   263  		if err == nil {
   264  			return nn * 8
   265  		}
   266  		if err != syscall.EINVAL {
   267  			return 128
   268  		}
   269  		length *= 2
   270  	}
   271  }
   272  
   273  func setupnconfiguredcpu() (n int) {
   274  	// sysconf(_SC_NPROCESSORS_CONF)
   275  	files, err := ioutil.ReadDir("/sys/devices/system/cpu")
   276  	if err == nil {
   277  		for _, f := range files {
   278  			if !f.IsDir() || !strings.HasPrefix(f.Name(), "cpu") {
   279  				continue
   280  			}
   281  			if _, err := strconv.Atoi(f.Name()[3:]); err == nil {
   282  				n++
   283  			}
   284  		}
   285  		return
   286  	}
   287  	// fail back
   288  	d, _ := ioutil.ReadFile("/proc/cpuinfo")
   289  	for _, line := range strings.Split(string(d), "\n") {
   290  		if strings.HasPrefix(line, "processor") {
   291  			n++
   292  		}
   293  	}
   294  	if n == 0 {
   295  		n = 1
   296  	}
   297  	return
   298  }
   299  
   300  func setupconstraints() {
   301  	node2cpu = make(map[int]Bitmask)
   302  	cpu2node = make(map[int]int)
   303  	for i := 0; i < numanodes.Len(); i++ {
   304  		if !numanodes.Get(i) {
   305  			continue
   306  		}
   307  		fname := fmt.Sprintf("/sys/devices/system/node/node%d/cpumap", i)
   308  		d, err := ioutil.ReadFile(fname)
   309  		if err != nil {
   310  			continue
   311  		}
   312  		nn := 32
   313  		cpumask := NewBitmask(CPUCount())
   314  		tokens := strings.Split(strings.TrimSpace(string(d)), ",")
   315  		for j := 0; j < len(tokens); j++ {
   316  			mask, _ := strconv.ParseUint(tokens[len(tokens)-1-j], 16, 64)
   317  			for k := 0; k < nn; k++ {
   318  				if (mask>>uint64(k))&0x01 != 0 {
   319  					cpumask.Set(k+j*nn, true)
   320  				}
   321  			}
   322  		}
   323  		node2cpu[i] = cpumask
   324  		for j := 0; j < cpumask.Len(); j++ {
   325  			if cpumask.Get(j) {
   326  				cpu2node[j] = i
   327  			}
   328  		}
   329  	}
   330  }
   331  
   332  // NodeMemSize64 return the memory total size and free size of given node.
   333  func NodeMemSize64(node int) (total int64, free int64, err error) {
   334  	var (
   335  		d     []byte
   336  		fname = fmt.Sprintf("/sys/devices/system/node/node%d/meminfo", node)
   337  	)
   338  	d, err = ioutil.ReadFile(fname)
   339  	if err != nil {
   340  		return
   341  	}
   342  	split := func(s, d string) string {
   343  		return strings.TrimFunc(
   344  			s[strings.Index(s, d)+len(d):], func(x rune) bool {
   345  				return x < '0' || x > '9'
   346  			})
   347  	}
   348  	for _, line := range strings.Split(string(d), "\n") {
   349  		if !strings.HasSuffix(line, "kB") {
   350  			continue
   351  		}
   352  		switch {
   353  		case strings.Contains(line, "MemTotal"):
   354  			total, err = strconv.ParseInt(split(line, "MemTotal"), 10, 64)
   355  			if err != nil {
   356  				return
   357  			}
   358  			total *= 1024
   359  		case strings.Contains(line, "MemFree"):
   360  			free, err = strconv.ParseInt(split(line, "MemFree:"), 10, 64)
   361  			if err != nil {
   362  				return
   363  			}
   364  			free *= 1024
   365  		}
   366  	}
   367  	return
   368  }