github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/link/perf_event.go (about)

     1  package link
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"runtime"
     8  	"unsafe"
     9  
    10  	"github.com/cilium/ebpf"
    11  	"github.com/cilium/ebpf/asm"
    12  	"github.com/cilium/ebpf/internal"
    13  	"github.com/cilium/ebpf/internal/sys"
    14  	"github.com/cilium/ebpf/internal/tracefs"
    15  	"github.com/cilium/ebpf/internal/unix"
    16  )
    17  
    18  // Getting the terminology right is usually the hardest part. For posterity and
    19  // for staying sane during implementation:
    20  //
    21  // - trace event: Representation of a kernel runtime hook. Filesystem entries
    22  //   under <tracefs>/events. Can be tracepoints (static), kprobes or uprobes.
    23  //   Can be instantiated into perf events (see below).
    24  // - tracepoint: A predetermined hook point in the kernel. Exposed as trace
    25  //   events in (sub)directories under <tracefs>/events. Cannot be closed or
    26  //   removed, they are static.
    27  // - k(ret)probe: Ephemeral trace events based on entry or exit points of
    28  //   exported kernel symbols. kprobe-based (tracefs) trace events can be
    29  //   created system-wide by writing to the <tracefs>/kprobe_events file, or
    30  //   they can be scoped to the current process by creating PMU perf events.
    31  // - u(ret)probe: Ephemeral trace events based on user provides ELF binaries
    32  //   and offsets. uprobe-based (tracefs) trace events can be
    33  //   created system-wide by writing to the <tracefs>/uprobe_events file, or
    34  //   they can be scoped to the current process by creating PMU perf events.
    35  // - perf event: An object instantiated based on an existing trace event or
    36  //   kernel symbol. Referred to by fd in userspace.
    37  //   Exactly one eBPF program can be attached to a perf event. Multiple perf
    38  //   events can be created from a single trace event. Closing a perf event
    39  //   stops any further invocations of the attached eBPF program.
    40  
    41  var (
    42  	errInvalidInput = tracefs.ErrInvalidInput
    43  )
    44  
    45  const (
    46  	perfAllThreads = -1
    47  )
    48  
    49  // A perfEvent represents a perf event kernel object. Exactly one eBPF program
    50  // can be attached to it. It is created based on a tracefs trace event or a
    51  // Performance Monitoring Unit (PMU).
    52  type perfEvent struct {
    53  	// Trace event backing this perfEvent. May be nil.
    54  	tracefsEvent *tracefs.Event
    55  
    56  	// This is the perf event FD.
    57  	fd *sys.FD
    58  }
    59  
    60  func newPerfEvent(fd *sys.FD, event *tracefs.Event) *perfEvent {
    61  	pe := &perfEvent{event, fd}
    62  	// Both event and fd have their own finalizer, but we want to
    63  	// guarantee that they are closed in a certain order.
    64  	runtime.SetFinalizer(pe, (*perfEvent).Close)
    65  	return pe
    66  }
    67  
    68  func (pe *perfEvent) Close() error {
    69  	runtime.SetFinalizer(pe, nil)
    70  
    71  	if err := pe.fd.Close(); err != nil {
    72  		return fmt.Errorf("closing perf event fd: %w", err)
    73  	}
    74  
    75  	if pe.tracefsEvent != nil {
    76  		return pe.tracefsEvent.Close()
    77  	}
    78  
    79  	return nil
    80  }
    81  
    82  // PerfEvent is implemented by some Link types which use a perf event under
    83  // the hood.
    84  type PerfEvent interface {
    85  	// PerfEvent returns a file for the underlying perf event.
    86  	//
    87  	// It is the callers responsibility to close the returned file.
    88  	//
    89  	// Making changes to the associated perf event lead to
    90  	// undefined behaviour.
    91  	PerfEvent() (*os.File, error)
    92  }
    93  
    94  // perfEventLink represents a bpf perf link.
    95  type perfEventLink struct {
    96  	RawLink
    97  	pe *perfEvent
    98  }
    99  
   100  func (pl *perfEventLink) isLink() {}
   101  
   102  // Pinning requires the underlying perf event FD to stay open.
   103  //
   104  // | PerfEvent FD | BpfLink FD | Works |
   105  // |--------------|------------|-------|
   106  // | Open         | Open       | Yes   |
   107  // | Closed       | Open       | No    |
   108  // | Open         | Closed     | No (Pin() -> EINVAL) |
   109  // | Closed       | Closed     | No (Pin() -> EINVAL) |
   110  //
   111  // There is currently no pretty way to recover the perf event FD
   112  // when loading a pinned link, so leave as not supported for now.
   113  func (pl *perfEventLink) Pin(string) error {
   114  	return fmt.Errorf("perf event link pin: %w", ErrNotSupported)
   115  }
   116  
   117  func (pl *perfEventLink) Unpin() error {
   118  	return fmt.Errorf("perf event link unpin: %w", ErrNotSupported)
   119  }
   120  
   121  func (pl *perfEventLink) Close() error {
   122  	if err := pl.fd.Close(); err != nil {
   123  		return fmt.Errorf("perf link close: %w", err)
   124  	}
   125  
   126  	if err := pl.pe.Close(); err != nil {
   127  		return fmt.Errorf("perf event close: %w", err)
   128  	}
   129  	return nil
   130  }
   131  
   132  func (pl *perfEventLink) Update(prog *ebpf.Program) error {
   133  	return fmt.Errorf("perf event link update: %w", ErrNotSupported)
   134  }
   135  
   136  var _ PerfEvent = (*perfEventLink)(nil)
   137  
   138  func (pl *perfEventLink) PerfEvent() (*os.File, error) {
   139  	fd, err := pl.pe.fd.Dup()
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  
   144  	return fd.File("perf-event"), nil
   145  }
   146  
   147  // perfEventIoctl implements Link and handles the perf event lifecycle
   148  // via ioctl().
   149  type perfEventIoctl struct {
   150  	*perfEvent
   151  }
   152  
   153  func (pi *perfEventIoctl) isLink() {}
   154  
   155  // Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
   156  // calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
   157  // owned by the perf event, which means multiple programs can be attached
   158  // simultaneously.
   159  //
   160  // Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
   161  // returns EEXIST.
   162  //
   163  // Detaching a program from a perf event is currently not possible, so a
   164  // program replacement mechanism cannot be implemented for perf events.
   165  func (pi *perfEventIoctl) Update(prog *ebpf.Program) error {
   166  	return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported)
   167  }
   168  
   169  func (pi *perfEventIoctl) Pin(string) error {
   170  	return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported)
   171  }
   172  
   173  func (pi *perfEventIoctl) Unpin() error {
   174  	return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported)
   175  }
   176  
   177  func (pi *perfEventIoctl) Info() (*Info, error) {
   178  	return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported)
   179  }
   180  
   181  var _ PerfEvent = (*perfEventIoctl)(nil)
   182  
   183  func (pi *perfEventIoctl) PerfEvent() (*os.File, error) {
   184  	fd, err := pi.fd.Dup()
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  
   189  	return fd.File("perf-event"), nil
   190  }
   191  
   192  // attach the given eBPF prog to the perf event stored in pe.
   193  // pe must contain a valid perf event fd.
   194  // prog's type must match the program type stored in pe.
   195  func attachPerfEvent(pe *perfEvent, prog *ebpf.Program, cookie uint64) (Link, error) {
   196  	if prog == nil {
   197  		return nil, errors.New("cannot attach a nil program")
   198  	}
   199  	if prog.FD() < 0 {
   200  		return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
   201  	}
   202  
   203  	if err := haveBPFLinkPerfEvent(); err == nil {
   204  		return attachPerfEventLink(pe, prog, cookie)
   205  	}
   206  
   207  	if cookie != 0 {
   208  		return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
   209  	}
   210  
   211  	return attachPerfEventIoctl(pe, prog)
   212  }
   213  
   214  func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) {
   215  	// Assign the eBPF program to the perf event.
   216  	err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
   217  	if err != nil {
   218  		return nil, fmt.Errorf("setting perf event bpf program: %w", err)
   219  	}
   220  
   221  	// PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values.
   222  	if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
   223  		return nil, fmt.Errorf("enable perf event: %s", err)
   224  	}
   225  
   226  	return &perfEventIoctl{pe}, nil
   227  }
   228  
   229  // Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
   230  //
   231  // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
   232  func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program, cookie uint64) (*perfEventLink, error) {
   233  	fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
   234  		ProgFd:     uint32(prog.FD()),
   235  		TargetFd:   pe.fd.Uint(),
   236  		AttachType: sys.BPF_PERF_EVENT,
   237  		BpfCookie:  cookie,
   238  	})
   239  	if err != nil {
   240  		return nil, fmt.Errorf("cannot create bpf perf link: %v", err)
   241  	}
   242  
   243  	return &perfEventLink{RawLink{fd: fd}, pe}, nil
   244  }
   245  
   246  // unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
   247  func unsafeStringPtr(str string) (unsafe.Pointer, error) {
   248  	p, err := unix.BytePtrFromString(str)
   249  	if err != nil {
   250  		return nil, err
   251  	}
   252  	return unsafe.Pointer(p), nil
   253  }
   254  
   255  // openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
   256  // [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints
   257  // behind the scenes, and can be attached to using these perf events.
   258  func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) {
   259  	attr := unix.PerfEventAttr{
   260  		Type:        unix.PERF_TYPE_TRACEPOINT,
   261  		Config:      tid,
   262  		Sample_type: unix.PERF_SAMPLE_RAW,
   263  		Sample:      1,
   264  		Wakeup:      1,
   265  	}
   266  
   267  	cpu := 0
   268  	if pid != perfAllThreads {
   269  		cpu = -1
   270  	}
   271  	fd, err := unix.PerfEventOpen(&attr, pid, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC)
   272  	if err != nil {
   273  		return nil, fmt.Errorf("opening tracepoint perf event: %w", err)
   274  	}
   275  
   276  	return sys.NewFD(fd)
   277  }
   278  
   279  // Probe BPF perf link.
   280  //
   281  // https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307
   282  // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
   283  var haveBPFLinkPerfEvent = internal.NewFeatureTest("bpf_link_perf_event", "5.15", func() error {
   284  	prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
   285  		Name: "probe_bpf_perf_link",
   286  		Type: ebpf.Kprobe,
   287  		Instructions: asm.Instructions{
   288  			asm.Mov.Imm(asm.R0, 0),
   289  			asm.Return(),
   290  		},
   291  		License: "MIT",
   292  	})
   293  	if err != nil {
   294  		return err
   295  	}
   296  	defer prog.Close()
   297  
   298  	_, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
   299  		ProgFd:     uint32(prog.FD()),
   300  		AttachType: sys.BPF_PERF_EVENT,
   301  	})
   302  	if errors.Is(err, unix.EINVAL) {
   303  		return internal.ErrNotSupported
   304  	}
   305  	if errors.Is(err, unix.EBADF) {
   306  		return nil
   307  	}
   308  	return err
   309  })