github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/rlimit/rlimit.go (about) 1 // Package rlimit allows raising RLIMIT_MEMLOCK if necessary for the use of BPF. 2 package rlimit 3 4 import ( 5 "errors" 6 "fmt" 7 "sync" 8 9 "github.com/cilium/ebpf/internal" 10 "github.com/cilium/ebpf/internal/sys" 11 "github.com/cilium/ebpf/internal/unix" 12 ) 13 14 var ( 15 unsupportedMemcgAccounting = &internal.UnsupportedFeatureError{ 16 MinimumVersion: internal.Version{5, 11, 0}, 17 Name: "memcg-based accounting for BPF memory", 18 } 19 haveMemcgAccounting error 20 21 rlimitMu sync.Mutex 22 ) 23 24 func init() { 25 // We have to run this feature test at init, since it relies on changing 26 // RLIMIT_MEMLOCK. Doing so is not safe in a concurrent program. Instead, 27 // we rely on the initialization order guaranteed by the Go runtime to 28 // execute the test in a safe environment: 29 // 30 // the invocation of init functions happens in a single goroutine, 31 // sequentially, one package at a time. 32 // 33 // This is also the reason why RemoveMemlock is in its own package: 34 // we only want to run the initializer if RemoveMemlock is called 35 // from somewhere. 36 haveMemcgAccounting = detectMemcgAccounting() 37 } 38 39 func detectMemcgAccounting() error { 40 // Retrieve the original limit to prevent lowering Max, since 41 // doing so is a permanent operation when running unprivileged. 42 var oldLimit unix.Rlimit 43 if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, nil, &oldLimit); err != nil { 44 return fmt.Errorf("getting original memlock rlimit: %s", err) 45 } 46 47 // Drop the current limit to zero, maintaining the old Max value. 48 // This is always permitted by the kernel for unprivileged users. 49 // Retrieve a new copy of the old limit tuple to minimize the chances 50 // of failing the restore operation below. 51 zeroLimit := unix.Rlimit{Cur: 0, Max: oldLimit.Max} 52 if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &zeroLimit, &oldLimit); err != nil { 53 return fmt.Errorf("lowering memlock rlimit: %s", err) 54 } 55 56 attr := sys.MapCreateAttr{ 57 MapType: 2, /* Array */ 58 KeySize: 4, 59 ValueSize: 4, 60 MaxEntries: 1, 61 } 62 63 // Creating a map allocates shared (and locked) memory that counts against 64 // the rlimit on pre-5.11 kernels, but against the memory cgroup budget on 65 // kernels 5.11 and over. If this call succeeds with the process' memlock 66 // rlimit set to 0, we can reasonably assume memcg accounting is supported. 67 fd, mapErr := sys.MapCreate(&attr) 68 69 // Restore old limits regardless of what happened. 70 if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &oldLimit, nil); err != nil { 71 return fmt.Errorf("restoring old memlock rlimit: %s", err) 72 } 73 74 // Map creation successful, memcg accounting supported. 75 if mapErr == nil { 76 fd.Close() 77 return nil 78 } 79 80 // EPERM shows up when map creation would exceed the memory budget. 81 if errors.Is(mapErr, unix.EPERM) { 82 return unsupportedMemcgAccounting 83 } 84 85 // This shouldn't happen really. 86 return fmt.Errorf("unexpected error detecting memory cgroup accounting: %s", mapErr) 87 } 88 89 // RemoveMemlock removes the limit on the amount of memory the current 90 // process can lock into RAM, if necessary. 91 // 92 // This is not required to load eBPF resources on kernel versions 5.11+ 93 // due to the introduction of cgroup-based memory accounting. On such kernels 94 // the function is a no-op. 95 // 96 // Since the function may change global per-process limits it should be invoked 97 // at program start up, in main() or init(). 98 // 99 // This function exists as a convenience and should only be used when 100 // permanently raising RLIMIT_MEMLOCK to infinite is appropriate. Consider 101 // invoking prlimit(2) directly with a more reasonable limit if desired. 102 // 103 // Requires CAP_SYS_RESOURCE on kernels < 5.11. 104 func RemoveMemlock() error { 105 if haveMemcgAccounting == nil { 106 return nil 107 } 108 109 if !errors.Is(haveMemcgAccounting, unsupportedMemcgAccounting) { 110 return haveMemcgAccounting 111 } 112 113 rlimitMu.Lock() 114 defer rlimitMu.Unlock() 115 116 // pid 0 affects the current process. Requires CAP_SYS_RESOURCE. 117 newLimit := unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY} 118 if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &newLimit, nil); err != nil { 119 return fmt.Errorf("failed to set memlock rlimit: %w", err) 120 } 121 122 return nil 123 }