github.com/cilium/ebpf@v0.10.0/link/perf_event.go (about)

     1  package link
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  	"runtime"
    10  	"strings"
    11  	"sync"
    12  	"unsafe"
    13  
    14  	"github.com/cilium/ebpf"
    15  	"github.com/cilium/ebpf/asm"
    16  	"github.com/cilium/ebpf/internal"
    17  	"github.com/cilium/ebpf/internal/sys"
    18  	"github.com/cilium/ebpf/internal/unix"
    19  )
    20  
    21  // Getting the terminology right is usually the hardest part. For posterity and
    22  // for staying sane during implementation:
    23  //
    24  // - trace event: Representation of a kernel runtime hook. Filesystem entries
    25  //   under <tracefs>/events. Can be tracepoints (static), kprobes or uprobes.
    26  //   Can be instantiated into perf events (see below).
    27  // - tracepoint: A predetermined hook point in the kernel. Exposed as trace
    28  //   events in (sub)directories under <tracefs>/events. Cannot be closed or
    29  //   removed, they are static.
    30  // - k(ret)probe: Ephemeral trace events based on entry or exit points of
    31  //   exported kernel symbols. kprobe-based (tracefs) trace events can be
    32  //   created system-wide by writing to the <tracefs>/kprobe_events file, or
    33  //   they can be scoped to the current process by creating PMU perf events.
    34  // - u(ret)probe: Ephemeral trace events based on user provides ELF binaries
    35  //   and offsets. uprobe-based (tracefs) trace events can be
    36  //   created system-wide by writing to the <tracefs>/uprobe_events file, or
    37  //   they can be scoped to the current process by creating PMU perf events.
    38  // - perf event: An object instantiated based on an existing trace event or
    39  //   kernel symbol. Referred to by fd in userspace.
    40  //   Exactly one eBPF program can be attached to a perf event. Multiple perf
    41  //   events can be created from a single trace event. Closing a perf event
    42  //   stops any further invocations of the attached eBPF program.
    43  
    44  var (
    45  	tracefsPath = "/sys/kernel/debug/tracing"
    46  
    47  	errInvalidInput = errors.New("invalid input")
    48  )
    49  
    50  const (
    51  	perfAllThreads = -1
    52  )
    53  
    54  type perfEventType uint8
    55  
    56  const (
    57  	tracepointEvent perfEventType = iota
    58  	kprobeEvent
    59  	kretprobeEvent
    60  	uprobeEvent
    61  	uretprobeEvent
    62  )
    63  
    64  // A perfEvent represents a perf event kernel object. Exactly one eBPF program
    65  // can be attached to it. It is created based on a tracefs trace event or a
    66  // Performance Monitoring Unit (PMU).
    67  type perfEvent struct {
    68  	// The event type determines the types of programs that can be attached.
    69  	typ perfEventType
    70  
    71  	// Group and name of the tracepoint/kprobe/uprobe.
    72  	group string
    73  	name  string
    74  
    75  	// PMU event ID read from sysfs. Valid IDs are non-zero.
    76  	pmuID uint64
    77  	// ID of the trace event read from tracefs. Valid IDs are non-zero.
    78  	tracefsID uint64
    79  
    80  	// User provided arbitrary value.
    81  	cookie uint64
    82  
    83  	// This is the perf event FD.
    84  	fd *sys.FD
    85  }
    86  
    87  func (pe *perfEvent) Close() error {
    88  	if err := pe.fd.Close(); err != nil {
    89  		return fmt.Errorf("closing perf event fd: %w", err)
    90  	}
    91  
    92  	switch pe.typ {
    93  	case kprobeEvent, kretprobeEvent:
    94  		// Clean up kprobe tracefs entry.
    95  		if pe.tracefsID != 0 {
    96  			return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name)
    97  		}
    98  	case uprobeEvent, uretprobeEvent:
    99  		// Clean up uprobe tracefs entry.
   100  		if pe.tracefsID != 0 {
   101  			return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name)
   102  		}
   103  	case tracepointEvent:
   104  		// Tracepoint trace events don't hold any extra resources.
   105  		return nil
   106  	}
   107  
   108  	return nil
   109  }
   110  
   111  // perfEventLink represents a bpf perf link.
   112  type perfEventLink struct {
   113  	RawLink
   114  	pe *perfEvent
   115  }
   116  
   117  func (pl *perfEventLink) isLink() {}
   118  
   119  // Pinning requires the underlying perf event FD to stay open.
   120  //
   121  // | PerfEvent FD | BpfLink FD | Works |
   122  // |--------------|------------|-------|
   123  // | Open         | Open       | Yes   |
   124  // | Closed       | Open       | No    |
   125  // | Open         | Closed     | No (Pin() -> EINVAL) |
   126  // | Closed       | Closed     | No (Pin() -> EINVAL) |
   127  //
   128  // There is currently no pretty way to recover the perf event FD
   129  // when loading a pinned link, so leave as not supported for now.
   130  func (pl *perfEventLink) Pin(string) error {
   131  	return fmt.Errorf("perf event link pin: %w", ErrNotSupported)
   132  }
   133  
   134  func (pl *perfEventLink) Unpin() error {
   135  	return fmt.Errorf("perf event link unpin: %w", ErrNotSupported)
   136  }
   137  
   138  func (pl *perfEventLink) Close() error {
   139  	if err := pl.pe.Close(); err != nil {
   140  		return fmt.Errorf("perf event link close: %w", err)
   141  	}
   142  	return pl.fd.Close()
   143  }
   144  
   145  func (pl *perfEventLink) Update(prog *ebpf.Program) error {
   146  	return fmt.Errorf("perf event link update: %w", ErrNotSupported)
   147  }
   148  
   149  // perfEventIoctl implements Link and handles the perf event lifecycle
   150  // via ioctl().
   151  type perfEventIoctl struct {
   152  	*perfEvent
   153  }
   154  
   155  func (pi *perfEventIoctl) isLink() {}
   156  
   157  // Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
   158  // calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
   159  // owned by the perf event, which means multiple programs can be attached
   160  // simultaneously.
   161  //
   162  // Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
   163  // returns EEXIST.
   164  //
   165  // Detaching a program from a perf event is currently not possible, so a
   166  // program replacement mechanism cannot be implemented for perf events.
   167  func (pi *perfEventIoctl) Update(prog *ebpf.Program) error {
   168  	return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported)
   169  }
   170  
   171  func (pi *perfEventIoctl) Pin(string) error {
   172  	return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported)
   173  }
   174  
   175  func (pi *perfEventIoctl) Unpin() error {
   176  	return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported)
   177  }
   178  
   179  func (pi *perfEventIoctl) Info() (*Info, error) {
   180  	return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported)
   181  }
   182  
   183  // attach the given eBPF prog to the perf event stored in pe.
   184  // pe must contain a valid perf event fd.
   185  // prog's type must match the program type stored in pe.
   186  func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
   187  	if prog == nil {
   188  		return nil, errors.New("cannot attach a nil program")
   189  	}
   190  	if prog.FD() < 0 {
   191  		return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
   192  	}
   193  
   194  	switch pe.typ {
   195  	case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
   196  		if t := prog.Type(); t != ebpf.Kprobe {
   197  			return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
   198  		}
   199  	case tracepointEvent:
   200  		if t := prog.Type(); t != ebpf.TracePoint {
   201  			return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
   202  		}
   203  	default:
   204  		return nil, fmt.Errorf("unknown perf event type: %d", pe.typ)
   205  	}
   206  
   207  	if err := haveBPFLinkPerfEvent(); err == nil {
   208  		return attachPerfEventLink(pe, prog)
   209  	}
   210  	return attachPerfEventIoctl(pe, prog)
   211  }
   212  
   213  func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) {
   214  	if pe.cookie != 0 {
   215  		return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
   216  	}
   217  
   218  	// Assign the eBPF program to the perf event.
   219  	err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
   220  	if err != nil {
   221  		return nil, fmt.Errorf("setting perf event bpf program: %w", err)
   222  	}
   223  
   224  	// PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values.
   225  	if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
   226  		return nil, fmt.Errorf("enable perf event: %s", err)
   227  	}
   228  
   229  	pi := &perfEventIoctl{pe}
   230  
   231  	// Close the perf event when its reference is lost to avoid leaking system resources.
   232  	runtime.SetFinalizer(pi, (*perfEventIoctl).Close)
   233  	return pi, nil
   234  }
   235  
   236  // Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
   237  //
   238  // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
   239  func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) {
   240  	fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
   241  		ProgFd:     uint32(prog.FD()),
   242  		TargetFd:   pe.fd.Uint(),
   243  		AttachType: sys.BPF_PERF_EVENT,
   244  		BpfCookie:  pe.cookie,
   245  	})
   246  	if err != nil {
   247  		return nil, fmt.Errorf("cannot create bpf perf link: %v", err)
   248  	}
   249  
   250  	pl := &perfEventLink{RawLink{fd: fd}, pe}
   251  
   252  	// Close the perf event when its reference is lost to avoid leaking system resources.
   253  	runtime.SetFinalizer(pl, (*perfEventLink).Close)
   254  	return pl, nil
   255  }
   256  
   257  // unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
   258  func unsafeStringPtr(str string) (unsafe.Pointer, error) {
   259  	p, err := unix.BytePtrFromString(str)
   260  	if err != nil {
   261  		return nil, err
   262  	}
   263  	return unsafe.Pointer(p), nil
   264  }
   265  
   266  // getTraceEventID reads a trace event's ID from tracefs given its group and name.
   267  // The kernel requires group and name to be alphanumeric or underscore.
   268  //
   269  // name automatically has its invalid symbols converted to underscores so the caller
   270  // can pass a raw symbol name, e.g. a kernel symbol containing dots.
   271  func getTraceEventID(group, name string) (uint64, error) {
   272  	name = sanitizeSymbol(name)
   273  	path, err := sanitizePath(tracefsPath, "events", group, name, "id")
   274  	if err != nil {
   275  		return 0, err
   276  	}
   277  	tid, err := readUint64FromFile("%d\n", path)
   278  	if errors.Is(err, os.ErrNotExist) {
   279  		return 0, err
   280  	}
   281  	if err != nil {
   282  		return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
   283  	}
   284  
   285  	return tid, nil
   286  }
   287  
   288  // openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
   289  // [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints
   290  // behind the scenes, and can be attached to using these perf events.
   291  func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) {
   292  	attr := unix.PerfEventAttr{
   293  		Type:        unix.PERF_TYPE_TRACEPOINT,
   294  		Config:      tid,
   295  		Sample_type: unix.PERF_SAMPLE_RAW,
   296  		Sample:      1,
   297  		Wakeup:      1,
   298  	}
   299  
   300  	fd, err := unix.PerfEventOpen(&attr, pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
   301  	if err != nil {
   302  		return nil, fmt.Errorf("opening tracepoint perf event: %w", err)
   303  	}
   304  
   305  	return sys.NewFD(fd)
   306  }
   307  
   308  func sanitizePath(base string, path ...string) (string, error) {
   309  	l := filepath.Join(path...)
   310  	p := filepath.Join(base, l)
   311  	if !strings.HasPrefix(p, base) {
   312  		return "", fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput)
   313  	}
   314  	return p, nil
   315  }
   316  
   317  // readUint64FromFile reads a uint64 from a file.
   318  //
   319  // format specifies the contents of the file in fmt.Scanf syntax.
   320  func readUint64FromFile(format string, path ...string) (uint64, error) {
   321  	filename := filepath.Join(path...)
   322  	data, err := os.ReadFile(filename)
   323  	if err != nil {
   324  		return 0, fmt.Errorf("reading file %q: %w", filename, err)
   325  	}
   326  
   327  	var value uint64
   328  	n, err := fmt.Fscanf(bytes.NewReader(data), format, &value)
   329  	if err != nil {
   330  		return 0, fmt.Errorf("parsing file %q: %w", filename, err)
   331  	}
   332  	if n != 1 {
   333  		return 0, fmt.Errorf("parsing file %q: expected 1 item, got %d", filename, n)
   334  	}
   335  
   336  	return value, nil
   337  }
   338  
   339  type uint64FromFileKey struct {
   340  	format, path string
   341  }
   342  
   343  var uint64FromFileCache = struct {
   344  	sync.RWMutex
   345  	values map[uint64FromFileKey]uint64
   346  }{
   347  	values: map[uint64FromFileKey]uint64{},
   348  }
   349  
   350  // readUint64FromFileOnce is like readUint64FromFile but memoizes the result.
   351  func readUint64FromFileOnce(format string, path ...string) (uint64, error) {
   352  	filename := filepath.Join(path...)
   353  	key := uint64FromFileKey{format, filename}
   354  
   355  	uint64FromFileCache.RLock()
   356  	if value, ok := uint64FromFileCache.values[key]; ok {
   357  		uint64FromFileCache.RUnlock()
   358  		return value, nil
   359  	}
   360  	uint64FromFileCache.RUnlock()
   361  
   362  	value, err := readUint64FromFile(format, filename)
   363  	if err != nil {
   364  		return 0, err
   365  	}
   366  
   367  	uint64FromFileCache.Lock()
   368  	defer uint64FromFileCache.Unlock()
   369  
   370  	if value, ok := uint64FromFileCache.values[key]; ok {
   371  		// Someone else got here before us, use what is cached.
   372  		return value, nil
   373  	}
   374  
   375  	uint64FromFileCache.values[key] = value
   376  	return value, nil
   377  }
   378  
   379  // Probe BPF perf link.
   380  //
   381  // https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307
   382  // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
   383  var haveBPFLinkPerfEvent = internal.NewFeatureTest("bpf_link_perf_event", "5.15", func() error {
   384  	prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
   385  		Name: "probe_bpf_perf_link",
   386  		Type: ebpf.Kprobe,
   387  		Instructions: asm.Instructions{
   388  			asm.Mov.Imm(asm.R0, 0),
   389  			asm.Return(),
   390  		},
   391  		License: "MIT",
   392  	})
   393  	if err != nil {
   394  		return err
   395  	}
   396  	defer prog.Close()
   397  
   398  	_, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
   399  		ProgFd:     uint32(prog.FD()),
   400  		AttachType: sys.BPF_PERF_EVENT,
   401  	})
   402  	if errors.Is(err, unix.EINVAL) {
   403  		return internal.ErrNotSupported
   404  	}
   405  	if errors.Is(err, unix.EBADF) {
   406  		return nil
   407  	}
   408  	return err
   409  })
   410  
   411  // isValidTraceID implements the equivalent of a regex match
   412  // against "^[a-zA-Z_][0-9a-zA-Z_]*$".
   413  //
   414  // Trace event groups, names and kernel symbols must adhere to this set
   415  // of characters. Non-empty, first character must not be a number, all
   416  // characters must be alphanumeric or underscore.
   417  func isValidTraceID(s string) bool {
   418  	if len(s) < 1 {
   419  		return false
   420  	}
   421  	for i, c := range []byte(s) {
   422  		switch {
   423  		case c >= 'a' && c <= 'z':
   424  		case c >= 'A' && c <= 'Z':
   425  		case c == '_':
   426  		case i > 0 && c >= '0' && c <= '9':
   427  
   428  		default:
   429  			return false
   430  		}
   431  	}
   432  
   433  	return true
   434  }