github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/rlimit/rlimit.go (about)

     1  // Package rlimit allows raising RLIMIT_MEMLOCK if necessary for the use of BPF.
     2  package rlimit
     3  
     4  import (
     5  	"errors"
     6  	"fmt"
     7  	"sync"
     8  
     9  	"github.com/cilium/ebpf/internal"
    10  	"github.com/cilium/ebpf/internal/sys"
    11  	"github.com/cilium/ebpf/internal/unix"
    12  )
    13  
    14  var (
    15  	unsupportedMemcgAccounting = &internal.UnsupportedFeatureError{
    16  		MinimumVersion: internal.Version{5, 11, 0},
    17  		Name:           "memcg-based accounting for BPF memory",
    18  	}
    19  	haveMemcgAccounting error
    20  
    21  	rlimitMu sync.Mutex
    22  )
    23  
    24  func init() {
    25  	// We have to run this feature test at init, since it relies on changing
    26  	// RLIMIT_MEMLOCK. Doing so is not safe in a concurrent program. Instead,
    27  	// we rely on the initialization order guaranteed by the Go runtime to
    28  	// execute the test in a safe environment:
    29  	//
    30  	//    the invocation of init functions happens in a single goroutine,
    31  	//    sequentially, one package at a time.
    32  	//
    33  	// This is also the reason why RemoveMemlock is in its own package:
    34  	// we only want to run the initializer if RemoveMemlock is called
    35  	// from somewhere.
    36  	haveMemcgAccounting = detectMemcgAccounting()
    37  }
    38  
    39  func detectMemcgAccounting() error {
    40  	// Retrieve the original limit to prevent lowering Max, since
    41  	// doing so is a permanent operation when running unprivileged.
    42  	var oldLimit unix.Rlimit
    43  	if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, nil, &oldLimit); err != nil {
    44  		return fmt.Errorf("getting original memlock rlimit: %s", err)
    45  	}
    46  
    47  	// Drop the current limit to zero, maintaining the old Max value.
    48  	// This is always permitted by the kernel for unprivileged users.
    49  	// Retrieve a new copy of the old limit tuple to minimize the chances
    50  	// of failing the restore operation below.
    51  	zeroLimit := unix.Rlimit{Cur: 0, Max: oldLimit.Max}
    52  	if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &zeroLimit, &oldLimit); err != nil {
    53  		return fmt.Errorf("lowering memlock rlimit: %s", err)
    54  	}
    55  
    56  	attr := sys.MapCreateAttr{
    57  		MapType:    2, /* Array */
    58  		KeySize:    4,
    59  		ValueSize:  4,
    60  		MaxEntries: 1,
    61  	}
    62  
    63  	// Creating a map allocates shared (and locked) memory that counts against
    64  	// the rlimit on pre-5.11 kernels, but against the memory cgroup budget on
    65  	// kernels 5.11 and over. If this call succeeds with the process' memlock
    66  	// rlimit set to 0, we can reasonably assume memcg accounting is supported.
    67  	fd, mapErr := sys.MapCreate(&attr)
    68  
    69  	// Restore old limits regardless of what happened.
    70  	if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &oldLimit, nil); err != nil {
    71  		return fmt.Errorf("restoring old memlock rlimit: %s", err)
    72  	}
    73  
    74  	// Map creation successful, memcg accounting supported.
    75  	if mapErr == nil {
    76  		fd.Close()
    77  		return nil
    78  	}
    79  
    80  	// EPERM shows up when map creation would exceed the memory budget.
    81  	if errors.Is(mapErr, unix.EPERM) {
    82  		return unsupportedMemcgAccounting
    83  	}
    84  
    85  	// This shouldn't happen really.
    86  	return fmt.Errorf("unexpected error detecting memory cgroup accounting: %s", mapErr)
    87  }
    88  
    89  // RemoveMemlock removes the limit on the amount of memory the current
    90  // process can lock into RAM, if necessary.
    91  //
    92  // This is not required to load eBPF resources on kernel versions 5.11+
    93  // due to the introduction of cgroup-based memory accounting. On such kernels
    94  // the function is a no-op.
    95  //
    96  // Since the function may change global per-process limits it should be invoked
    97  // at program start up, in main() or init().
    98  //
    99  // This function exists as a convenience and should only be used when
   100  // permanently raising RLIMIT_MEMLOCK to infinite is appropriate. Consider
   101  // invoking prlimit(2) directly with a more reasonable limit if desired.
   102  //
   103  // Requires CAP_SYS_RESOURCE on kernels < 5.11.
   104  func RemoveMemlock() error {
   105  	if haveMemcgAccounting == nil {
   106  		return nil
   107  	}
   108  
   109  	if !errors.Is(haveMemcgAccounting, unsupportedMemcgAccounting) {
   110  		return haveMemcgAccounting
   111  	}
   112  
   113  	rlimitMu.Lock()
   114  	defer rlimitMu.Unlock()
   115  
   116  	// pid 0 affects the current process. Requires CAP_SYS_RESOURCE.
   117  	newLimit := unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY}
   118  	if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &newLimit, nil); err != nil {
   119  		return fmt.Errorf("failed to set memlock rlimit: %w", err)
   120  	}
   121  
   122  	return nil
   123  }