github.com/lrita/numa@v1.0.2/numa_linux.go (about) 1 //go:build linux 2 // +build linux 3 4 package numa 5 6 import ( 7 "fmt" 8 "io/ioutil" 9 "strconv" 10 "strings" 11 "syscall" 12 "unsafe" 13 ) 14 15 func init() { 16 _, _, e1 := syscall.Syscall6(syscall.SYS_GET_MEMPOLICY, 0, 0, 0, 0, 0, 0) 17 available = e1 != syscall.ENOSYS 18 nnodemax = setupnodemask() // max nodes 19 memnodes = NewBitmask(NodePossibleCount()) 20 numanodes = NewBitmask(NodePossibleCount()) 21 nconfigurednode = setupconfigurednodes() // configured nodes 22 ncpumax = setupncpu() // max cpu 23 nconfiguredcpu = setupnconfiguredcpu() // configured cpu 24 setupconstraints() 25 } 26 27 // GetMemPolicy retrieves the NUMA policy of the calling process or of a 28 // memory address, depending on the setting of flags. 29 // Details to see manpage of get_mempolicy. 30 // 31 // If flags is specified as 0, then information about the calling process's 32 // default policy (as set by set_mempolicy(2)) is returned. The policy 33 // returned [mode and nodemask] may be used to restore the process's policy 34 // to its state at the time of the call to get_mempolicy() using set_mempolicy(2). 35 // 36 // If flags specifies MPOL_F_MEMS_ALLOWED (available since Linux 2.6.24), 37 // the mode argument is ignored and the set of nodes [memories] that the 38 // process is allowed to specify in subsequent calls to mbind(2) or 39 // set_mempolicy(2) [in the absence of any mode flags] is returned in 40 // nodemask. It is not permitted to combine MPOL_F_MEMS_ALLOWED with 41 // either MPOL_F_ADDR or MPOL_F_NODE. 42 // 43 // If flags specifies MPOL_F_ADDR, then information is returned about the 44 // policy governing the memory address given in addr. This policy may be 45 // different from the process's default policy if mbind(2) or one of the 46 // helper functions described in numa(3) has been used to establish a policy 47 // for the memory range containing addr. 48 // 49 // If flags specifies both MPOL_F_NODE and MPOL_F_ADDR, get_mempolicy() will 50 // return the node ID of the node on which the address addr is allocated into 51 // the location pointed to by mode. If no page has yet been allocated for the 52 // specified address, get_mempolicy() will allocate a page as if the process 53 // had performed a read [load] access to that address, and return the ID of 54 // the node where that page was allocated. 55 // 56 // If flags specifies MPOL_F_NODE, but not MPOL_F_ADDR, and the process's 57 // current policy is MPOL_INTERLEAVE, then get_mempolicy() will return in 58 // the location pointed to by a non-NULL mode argument, the node ID of the 59 // next node that will be used for interleaving of internal kernel pages 60 // allocated on behalf of the process. These allocations include pages for 61 // memory mapped files in process memory ranges mapped using the mmap(2) 62 // call with the MAP_PRIVATE flag for read accesses, and in memory ranges 63 // mapped with the MAP_SHARED flag for all accesses. 64 func GetMemPolicy(nodemask Bitmask, addr unsafe.Pointer, flags int) (mode int, err error) { 65 var mask, maxnode uintptr 66 if maxnode = uintptr(nodemask.Len()); maxnode != 0 { 67 mask = uintptr(unsafe.Pointer(&nodemask[0])) 68 } 69 _, _, errno := syscall.Syscall6(syscall.SYS_GET_MEMPOLICY, 70 uintptr(unsafe.Pointer(&mode)), mask, maxnode, 71 uintptr(addr), uintptr(flags), 0) 72 if errno != 0 { 73 err = errno 74 } 75 return 76 } 77 78 // SetMemPolicy sets the NUMA memory policy of the calling process, which 79 // consists of a policy mode and zero or more nodes, to the values specified 80 // by the mode, nodemask and maxnode arguments. 81 // Details to see manpage of set_mempolicy. 82 // 83 // A NUMA machine has different memory controllers with different distances 84 // to specific CPUs. The memory policy defines from which node memory is 85 // allocated for the process. 86 87 // This system call defines the default policy for the process. The process 88 // policy governs allocation of pages in the process's address space outside 89 // of memory ranges controlled by a more specific policy set by mbind(2). The 90 // process default policy also controls allocation of any pages for memory 91 // mapped files mapped using the mmap(2) call with the MAP_PRIVATE flag and 92 // that are only read [loaded] from by the process and of memory mapped files 93 // mapped using the mmap(2) call with the MAP_SHARED flag, regardless of the 94 // access type. The policy is applied only when a new page is allocated for the 95 // process. For anonymous memory this is when the page is first touched by the 96 // application. 97 // 98 // The mode argument must specify one of MPOL_DEFAULT, MPOL_BIND, 99 // MPOL_INTERLEAVE or MPOL_PREFERRED. 100 // All modes except MPOL_DEFAULT require the caller to specify via the nodemask 101 // argument one or more nodes. 102 // 103 // The mode argument may also include an optional mode flag. The supported mode 104 // flags are: MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES. 105 // 106 // Where a nodemask is required, it must contain at least one node that is 107 // on-line, allowed by the process's current cpuset context, 108 // [unless the MPOL_F_STATIC_NODES mode flag is specified], and contains memory. 109 // If the MPOL_F_STATIC_NODES is set in mode and a required nodemask contains 110 // no nodes that are allowed by the process's current cpuset context, the memory 111 // policy reverts to local allocation. This effectively overrides the 112 // specified policy until the process's cpuset context includes one or more of 113 // the nodes specified by nodemask. 114 func SetMemPolicy(mode int, nodemask Bitmask) (err error) { 115 var mask, maxnode uintptr 116 if maxnode = uintptr(nodemask.Len()); maxnode != 0 { 117 mask = uintptr(unsafe.Pointer(&nodemask[0])) 118 } 119 _, _, errno := syscall.Syscall(syscall.SYS_SET_MEMPOLICY, 120 uintptr(mode), mask, maxnode) 121 if errno != 0 { 122 err = errno 123 } 124 return 125 } 126 127 // MBind sets the NUMA memory policy, which consists of a policy mode and zero 128 // or more nodes, for the memory range starting with addr and continuing for 129 // length bytes. The memory policy defines from which node memory is allocated. 130 // Details to see manpage of mbind. 131 // 132 // If the memory range specified by the addr and length arguments includes an 133 // "anonymous" region of memory that is a region of memory created using the 134 // mmap(2) system call with the MAP_ANONYMOUS or a memory mapped file, mapped 135 // using the mmap(2) system call with the MAP_PRIVATE flag, pages will be 136 // allocated only according to the specified policy when the application writes 137 // [stores] to the page. For anonymous regions, an initial read access will 138 // use a shared page in the kernel containing all zeros. For a file mapped with 139 // MAP_PRIVATE, an initial read access will allocate pages according to the 140 // process policy of the process that causes the page to be allocated. This may 141 // not be the process that called mbind(). 142 // 143 // The specified policy will be ignored for any MAP_SHARED mappings in the 144 // specified memory range. Rather the pages will be allocated according to the 145 // process policy of the process that caused the page to be allocated. Again, 146 // this may not be the process that called mbind(). 147 // 148 // If the specified memory range includes a shared memory region created using 149 // the shmget(2) system call and attached using the shmat(2) system call, pages 150 // allocated for the anonymous or shared memory region will be allocated 151 // according to the policy specified, regardless which process attached to the 152 // shared memory segment causes the allocation. If, however, the shared memory 153 // region was created with the SHM_HUGETLB flag, the huge pages will be 154 // allocated according to the policy specified only if the page allocation is 155 // caused by the process that calls mbind() for that region. 156 // 157 // By default, mbind() has an effect only for new allocations; if the pages 158 // inside the range have been already touched before setting the policy, then 159 // the policy has no effect. This default behavior may be overridden by the 160 // MPOL_MF_MOVE and MPOL_MF_MOVE_ALL flags described below. 161 func MBind(addr unsafe.Pointer, length, mode, flags int, nodemask Bitmask) (err error) { 162 var mask, maxnode uintptr 163 if maxnode = uintptr(nodemask.Len()); maxnode != 0 { 164 mask = uintptr(unsafe.Pointer(&nodemask[0])) 165 } 166 _, _, errno := syscall.Syscall6(syscall.SYS_MBIND, uintptr(addr), 167 uintptr(length), uintptr(mode), mask, maxnode, uintptr(flags)) 168 if errno != 0 { 169 err = errno 170 } 171 return 172 } 173 174 // GetSchedAffinity writes the affinity mask of the process whose ID is pid 175 // into the input mask. If pid is zero, then the mask of the calling process 176 // is returned. 177 func GetSchedAffinity(pid int, cpumask Bitmask) (int, error) { 178 var mask, maxnode uintptr 179 if maxnode = uintptr(cpumask.Len() / 8); maxnode != 0 { 180 mask = uintptr(unsafe.Pointer(&cpumask[0])) 181 } 182 len, _, e1 := syscall.Syscall(syscall.SYS_SCHED_GETAFFINITY, 183 uintptr(pid), maxnode, mask) 184 if e1 != 0 { 185 return 0, e1 186 } 187 return int(len), nil 188 } 189 190 // SetSchedAffinity sets the CPU affinity mask of the process whose ID 191 // is pid to the value specified by mask. If pid is zero, then the calling 192 // process is used. 193 func SetSchedAffinity(pid int, cpumask Bitmask) error { 194 var mask, maxnode uintptr 195 if maxnode = uintptr(cpumask.Len() / 8); maxnode != 0 { 196 mask = uintptr(unsafe.Pointer(&cpumask[0])) 197 } 198 _, _, e1 := syscall.Syscall(syscall.SYS_SCHED_SETAFFINITY, 199 uintptr(pid), maxnode, mask) 200 if e1 != 0 { 201 return e1 202 } 203 return nil 204 } 205 206 /* 207 * (do this the way Paul Jackson's libcpuset does it) 208 * The nodemask values in /proc/self/status are in an 209 * ascii format that uses 9 characters for each 32 bits of mask. 210 * (this could also be used to find the cpumask size) 211 */ 212 func setupnodemask() (n int) { 213 d, err := ioutil.ReadFile("/proc/self/status") 214 if err == nil { 215 const stp = "Mems_allowed:\t" 216 for _, line := range strings.Split(string(d), "\n") { 217 if !strings.HasPrefix(line, stp) { 218 continue 219 } 220 n = (len(line) - len(stp) + 1) * 32 / 9 221 } 222 } 223 if n == 0 { 224 n = 16 225 for n < 4096*8 { 226 n <<= 1 227 mask := NewBitmask(n) 228 if _, err := GetMemPolicy(mask, nil, 0); err != nil && err != syscall.EINVAL { 229 break 230 } 231 } 232 } 233 return 234 } 235 236 func setupconfigurednodes() (n int) { 237 files, err := ioutil.ReadDir("/sys/devices/system/node") 238 if err != nil { 239 return 1 240 } 241 for _, f := range files { 242 if !strings.HasPrefix(f.Name(), "node") { 243 continue 244 } 245 i, _ := strconv.Atoi(f.Name()[4:]) 246 if n < i { 247 n = i // maybe some node absence 248 } 249 numanodes.Set(i, true) 250 if _, _, err := NodeMemSize64(i); err == nil { 251 memnodes.Set(i, true) 252 } 253 } 254 n++ 255 return 256 } 257 258 func setupncpu() (n int) { 259 length := 4096 260 for { 261 mask := NewBitmask(length) 262 nn, err := GetSchedAffinity(0, mask) 263 if err == nil { 264 return nn * 8 265 } 266 if err != syscall.EINVAL { 267 return 128 268 } 269 length *= 2 270 } 271 } 272 273 func setupnconfiguredcpu() (n int) { 274 // sysconf(_SC_NPROCESSORS_CONF) 275 files, err := ioutil.ReadDir("/sys/devices/system/cpu") 276 if err == nil { 277 for _, f := range files { 278 if !f.IsDir() || !strings.HasPrefix(f.Name(), "cpu") { 279 continue 280 } 281 if _, err := strconv.Atoi(f.Name()[3:]); err == nil { 282 n++ 283 } 284 } 285 return 286 } 287 // fail back 288 d, _ := ioutil.ReadFile("/proc/cpuinfo") 289 for _, line := range strings.Split(string(d), "\n") { 290 if strings.HasPrefix(line, "processor") { 291 n++ 292 } 293 } 294 if n == 0 { 295 n = 1 296 } 297 return 298 } 299 300 func setupconstraints() { 301 node2cpu = make(map[int]Bitmask) 302 cpu2node = make(map[int]int) 303 for i := 0; i < numanodes.Len(); i++ { 304 if !numanodes.Get(i) { 305 continue 306 } 307 fname := fmt.Sprintf("/sys/devices/system/node/node%d/cpumap", i) 308 d, err := ioutil.ReadFile(fname) 309 if err != nil { 310 continue 311 } 312 nn := 32 313 cpumask := NewBitmask(CPUCount()) 314 tokens := strings.Split(strings.TrimSpace(string(d)), ",") 315 for j := 0; j < len(tokens); j++ { 316 mask, _ := strconv.ParseUint(tokens[len(tokens)-1-j], 16, 64) 317 for k := 0; k < nn; k++ { 318 if (mask>>uint64(k))&0x01 != 0 { 319 cpumask.Set(k+j*nn, true) 320 } 321 } 322 } 323 node2cpu[i] = cpumask 324 for j := 0; j < cpumask.Len(); j++ { 325 if cpumask.Get(j) { 326 cpu2node[j] = i 327 } 328 } 329 } 330 } 331 332 // NodeMemSize64 return the memory total size and free size of given node. 333 func NodeMemSize64(node int) (total int64, free int64, err error) { 334 var ( 335 d []byte 336 fname = fmt.Sprintf("/sys/devices/system/node/node%d/meminfo", node) 337 ) 338 d, err = ioutil.ReadFile(fname) 339 if err != nil { 340 return 341 } 342 split := func(s, d string) string { 343 return strings.TrimFunc( 344 s[strings.Index(s, d)+len(d):], func(x rune) bool { 345 return x < '0' || x > '9' 346 }) 347 } 348 for _, line := range strings.Split(string(d), "\n") { 349 if !strings.HasSuffix(line, "kB") { 350 continue 351 } 352 switch { 353 case strings.Contains(line, "MemTotal"): 354 total, err = strconv.ParseInt(split(line, "MemTotal"), 10, 64) 355 if err != nil { 356 return 357 } 358 total *= 1024 359 case strings.Contains(line, "MemFree"): 360 free, err = strconv.ParseInt(split(line, "MemFree:"), 10, 64) 361 if err != nil { 362 return 363 } 364 free *= 1024 365 } 366 } 367 return 368 }