github.com/cilium/ebpf@v0.16.0/link/kprobe.go (about)

     1  package link
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"runtime"
     8  	"strings"
     9  	"unsafe"
    10  
    11  	"github.com/cilium/ebpf"
    12  	"github.com/cilium/ebpf/internal"
    13  	"github.com/cilium/ebpf/internal/sys"
    14  	"github.com/cilium/ebpf/internal/tracefs"
    15  	"github.com/cilium/ebpf/internal/unix"
    16  )
    17  
    18  // KprobeOptions defines additional parameters that will be used
    19  // when loading Kprobes.
    20  type KprobeOptions struct {
    21  	// Arbitrary value that can be fetched from an eBPF program
    22  	// via `bpf_get_attach_cookie()`.
    23  	//
    24  	// Needs kernel 5.15+.
    25  	Cookie uint64
    26  	// Offset of the kprobe relative to the traced symbol.
    27  	// Can be used to insert kprobes at arbitrary offsets in kernel functions,
    28  	// e.g. in places where functions have been inlined.
    29  	Offset uint64
    30  	// Increase the maximum number of concurrent invocations of a kretprobe.
    31  	// Required when tracing some long running functions in the kernel.
    32  	//
    33  	// Deprecated: this setting forces the use of an outdated kernel API and is not portable
    34  	// across kernel versions.
    35  	RetprobeMaxActive int
    36  	// Prefix used for the event name if the kprobe must be attached using tracefs.
    37  	// The group name will be formatted as `<prefix>_<randomstr>`.
    38  	// The default empty string is equivalent to "ebpf" as the prefix.
    39  	TraceFSPrefix string
    40  }
    41  
    42  func (ko *KprobeOptions) cookie() uint64 {
    43  	if ko == nil {
    44  		return 0
    45  	}
    46  	return ko.Cookie
    47  }
    48  
    49  // Kprobe attaches the given eBPF program to a perf event that fires when the
    50  // given kernel symbol starts executing. See /proc/kallsyms for available
    51  // symbols. For example, printk():
    52  //
    53  //	kp, err := Kprobe("printk", prog, nil)
    54  //
    55  // Losing the reference to the resulting Link (kp) will close the Kprobe
    56  // and prevent further execution of prog. The Link must be Closed during
    57  // program shutdown to avoid leaking system resources.
    58  //
    59  // If attaching to symbol fails, automatically retries with the running
    60  // platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
    61  // in a portable fashion.
    62  //
    63  // The returned Link may implement [PerfEvent].
    64  func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
    65  	k, err := kprobe(symbol, prog, opts, false)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  
    70  	lnk, err := attachPerfEvent(k, prog, opts.cookie())
    71  	if err != nil {
    72  		k.Close()
    73  		return nil, err
    74  	}
    75  
    76  	return lnk, nil
    77  }
    78  
    79  // Kretprobe attaches the given eBPF program to a perf event that fires right
    80  // before the given kernel symbol exits, with the function stack left intact.
    81  // See /proc/kallsyms for available symbols. For example, printk():
    82  //
    83  //	kp, err := Kretprobe("printk", prog, nil)
    84  //
    85  // Losing the reference to the resulting Link (kp) will close the Kretprobe
    86  // and prevent further execution of prog. The Link must be Closed during
    87  // program shutdown to avoid leaking system resources.
    88  //
    89  // If attaching to symbol fails, automatically retries with the running
    90  // platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
    91  // in a portable fashion.
    92  //
    93  // On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol
    94  // incorrectly returns unix.EINVAL instead of os.ErrNotExist.
    95  //
    96  // The returned Link may implement [PerfEvent].
    97  func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
    98  	k, err := kprobe(symbol, prog, opts, true)
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  
   103  	lnk, err := attachPerfEvent(k, prog, opts.cookie())
   104  	if err != nil {
   105  		k.Close()
   106  		return nil, err
   107  	}
   108  
   109  	return lnk, nil
   110  }
   111  
   112  // isValidKprobeSymbol implements the equivalent of a regex match
   113  // against "^[a-zA-Z_][0-9a-zA-Z_.]*$".
   114  func isValidKprobeSymbol(s string) bool {
   115  	if len(s) < 1 {
   116  		return false
   117  	}
   118  
   119  	for i, c := range []byte(s) {
   120  		switch {
   121  		case c >= 'a' && c <= 'z':
   122  		case c >= 'A' && c <= 'Z':
   123  		case c == '_':
   124  		case i > 0 && c >= '0' && c <= '9':
   125  
   126  		// Allow `.` in symbol name. GCC-compiled kernel may change symbol name
   127  		// to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`.
   128  		// See: https://gcc.gnu.org/gcc-10/changes.html
   129  		case i > 0 && c == '.':
   130  
   131  		default:
   132  			return false
   133  		}
   134  	}
   135  
   136  	return true
   137  }
   138  
   139  // kprobe opens a perf event on the given symbol and attaches prog to it.
   140  // If ret is true, create a kretprobe.
   141  func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) {
   142  	if symbol == "" {
   143  		return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput)
   144  	}
   145  	if prog == nil {
   146  		return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
   147  	}
   148  	if !isValidKprobeSymbol(symbol) {
   149  		return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput)
   150  	}
   151  	if prog.Type() != ebpf.Kprobe {
   152  		return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput)
   153  	}
   154  
   155  	args := tracefs.ProbeArgs{
   156  		Type:   tracefs.Kprobe,
   157  		Pid:    perfAllThreads,
   158  		Symbol: symbol,
   159  		Ret:    ret,
   160  	}
   161  
   162  	if opts != nil {
   163  		args.RetprobeMaxActive = opts.RetprobeMaxActive
   164  		args.Cookie = opts.Cookie
   165  		args.Offset = opts.Offset
   166  		args.Group = opts.TraceFSPrefix
   167  	}
   168  
   169  	// Use kprobe PMU if the kernel has it available.
   170  	tp, err := pmuProbe(args)
   171  	if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
   172  		if prefix := internal.PlatformPrefix(); prefix != "" {
   173  			args.Symbol = prefix + symbol
   174  			tp, err = pmuProbe(args)
   175  		}
   176  	}
   177  	if err == nil {
   178  		return tp, nil
   179  	}
   180  	if err != nil && !errors.Is(err, ErrNotSupported) {
   181  		return nil, fmt.Errorf("creating perf_kprobe PMU (arch-specific fallback for %q): %w", symbol, err)
   182  	}
   183  
   184  	// Use tracefs if kprobe PMU is missing.
   185  	args.Symbol = symbol
   186  	tp, err = tracefsProbe(args)
   187  	if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
   188  		if prefix := internal.PlatformPrefix(); prefix != "" {
   189  			args.Symbol = prefix + symbol
   190  			tp, err = tracefsProbe(args)
   191  		}
   192  	}
   193  	if err != nil {
   194  		return nil, fmt.Errorf("creating tracefs event (arch-specific fallback for %q): %w", symbol, err)
   195  	}
   196  
   197  	return tp, nil
   198  }
   199  
   200  // pmuProbe opens a perf event based on a Performance Monitoring Unit.
   201  //
   202  // Requires at least a 4.17 kernel.
   203  // e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU"
   204  // 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
   205  //
   206  // Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
   207  func pmuProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
   208  	// Getting the PMU type will fail if the kernel doesn't support
   209  	// the perf_[k,u]probe PMU.
   210  	eventType, err := internal.ReadUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", args.Type.String(), "type")
   211  	if errors.Is(err, os.ErrNotExist) {
   212  		return nil, fmt.Errorf("%s: %w", args.Type, ErrNotSupported)
   213  	}
   214  	if err != nil {
   215  		return nil, err
   216  	}
   217  
   218  	// Use tracefs if we want to set kretprobe's retprobeMaxActive.
   219  	if args.RetprobeMaxActive != 0 {
   220  		return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported)
   221  	}
   222  
   223  	var config uint64
   224  	if args.Ret {
   225  		bit, err := internal.ReadUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", args.Type.String(), "/format/retprobe")
   226  		if err != nil {
   227  			return nil, err
   228  		}
   229  		config |= 1 << bit
   230  	}
   231  
   232  	var (
   233  		attr  unix.PerfEventAttr
   234  		sp    unsafe.Pointer
   235  		token string
   236  	)
   237  	switch args.Type {
   238  	case tracefs.Kprobe:
   239  		// Create a pointer to a NUL-terminated string for the kernel.
   240  		sp, err = unsafeStringPtr(args.Symbol)
   241  		if err != nil {
   242  			return nil, err
   243  		}
   244  
   245  		token = tracefs.KprobeToken(args)
   246  
   247  		attr = unix.PerfEventAttr{
   248  			// The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1,
   249  			// since it added the config2 (Ext2) field. Use Ext2 as probe_offset.
   250  			Size:   unix.PERF_ATTR_SIZE_VER1,
   251  			Type:   uint32(eventType),   // PMU event type read from sysfs
   252  			Ext1:   uint64(uintptr(sp)), // Kernel symbol to trace
   253  			Ext2:   args.Offset,         // Kernel symbol offset
   254  			Config: config,              // Retprobe flag
   255  		}
   256  	case tracefs.Uprobe:
   257  		sp, err = unsafeStringPtr(args.Path)
   258  		if err != nil {
   259  			return nil, err
   260  		}
   261  
   262  		if args.RefCtrOffset != 0 {
   263  			config |= args.RefCtrOffset << uprobeRefCtrOffsetShift
   264  		}
   265  
   266  		token = tracefs.UprobeToken(args)
   267  
   268  		attr = unix.PerfEventAttr{
   269  			// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
   270  			// since it added the config2 (Ext2) field. The Size field controls the
   271  			// size of the internal buffer the kernel allocates for reading the
   272  			// perf_event_attr argument from userspace.
   273  			Size:   unix.PERF_ATTR_SIZE_VER1,
   274  			Type:   uint32(eventType),   // PMU event type read from sysfs
   275  			Ext1:   uint64(uintptr(sp)), // Uprobe path
   276  			Ext2:   args.Offset,         // Uprobe offset
   277  			Config: config,              // RefCtrOffset, Retprobe flag
   278  		}
   279  	}
   280  
   281  	cpu := 0
   282  	if args.Pid != perfAllThreads {
   283  		cpu = -1
   284  	}
   285  	rawFd, err := unix.PerfEventOpen(&attr, args.Pid, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC)
   286  
   287  	// On some old kernels, kprobe PMU doesn't allow `.` in symbol names and
   288  	// return -EINVAL. Return ErrNotSupported to allow falling back to tracefs.
   289  	// https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343
   290  	if errors.Is(err, unix.EINVAL) && strings.Contains(args.Symbol, ".") {
   291  		return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported)
   292  	}
   293  	// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
   294  	// when trying to create a retprobe for a missing symbol.
   295  	if errors.Is(err, os.ErrNotExist) {
   296  		return nil, fmt.Errorf("token %s: not found: %w", token, err)
   297  	}
   298  	// Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved
   299  	// to an invalid insn boundary. The exact conditions that trigger this error are
   300  	// arch specific however.
   301  	if errors.Is(err, unix.EILSEQ) {
   302  		return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
   303  	}
   304  	// Since at least commit cb9a19fe4aa51, ENOTSUPP is returned
   305  	// when attempting to set a uprobe on a trap instruction.
   306  	if errors.Is(err, sys.ENOTSUPP) {
   307  		return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.Offset, err)
   308  	}
   309  
   310  	if err != nil {
   311  		return nil, fmt.Errorf("token %s: opening perf event: %w", token, err)
   312  	}
   313  
   314  	// Ensure the string pointer is not collected before PerfEventOpen returns.
   315  	runtime.KeepAlive(sp)
   316  
   317  	fd, err := sys.NewFD(rawFd)
   318  	if err != nil {
   319  		return nil, err
   320  	}
   321  
   322  	// Kernel has perf_[k,u]probe PMU available, initialize perf event.
   323  	return newPerfEvent(fd, nil), nil
   324  }
   325  
   326  // tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
   327  // A new trace event group name is generated on every call to support creating
   328  // multiple trace events for the same kernel or userspace symbol.
   329  // Path and offset are only set in the case of uprobe(s) and are used to set
   330  // the executable/library path on the filesystem and the offset where the probe is inserted.
   331  // A perf event is then opened on the newly-created trace event and returned to the caller.
   332  func tracefsProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
   333  	groupPrefix := "ebpf"
   334  	if args.Group != "" {
   335  		groupPrefix = args.Group
   336  	}
   337  
   338  	// Generate a random string for each trace event we attempt to create.
   339  	// This value is used as the 'group' token in tracefs to allow creating
   340  	// multiple kprobe trace events with the same name.
   341  	group, err := tracefs.RandomGroup(groupPrefix)
   342  	if err != nil {
   343  		return nil, fmt.Errorf("randomizing group name: %w", err)
   344  	}
   345  	args.Group = group
   346  
   347  	// Create the [k,u]probe trace event using tracefs.
   348  	evt, err := tracefs.NewEvent(args)
   349  	if err != nil {
   350  		return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
   351  	}
   352  
   353  	// Kprobes are ephemeral tracepoints and share the same perf event type.
   354  	fd, err := openTracepointPerfEvent(evt.ID(), args.Pid)
   355  	if err != nil {
   356  		// Make sure we clean up the created tracefs event when we return error.
   357  		// If a livepatch handler is already active on the symbol, the write to
   358  		// tracefs will succeed, a trace event will show up, but creating the
   359  		// perf event will fail with EBUSY.
   360  		_ = evt.Close()
   361  		return nil, err
   362  	}
   363  
   364  	return newPerfEvent(fd, evt), nil
   365  }