github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/link/perf_event.go (about) 1 package link 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "runtime" 8 "unsafe" 9 10 "github.com/cilium/ebpf" 11 "github.com/cilium/ebpf/asm" 12 "github.com/cilium/ebpf/internal" 13 "github.com/cilium/ebpf/internal/sys" 14 "github.com/cilium/ebpf/internal/tracefs" 15 "github.com/cilium/ebpf/internal/unix" 16 ) 17 18 // Getting the terminology right is usually the hardest part. For posterity and 19 // for staying sane during implementation: 20 // 21 // - trace event: Representation of a kernel runtime hook. Filesystem entries 22 // under <tracefs>/events. Can be tracepoints (static), kprobes or uprobes. 23 // Can be instantiated into perf events (see below). 24 // - tracepoint: A predetermined hook point in the kernel. Exposed as trace 25 // events in (sub)directories under <tracefs>/events. Cannot be closed or 26 // removed, they are static. 27 // - k(ret)probe: Ephemeral trace events based on entry or exit points of 28 // exported kernel symbols. kprobe-based (tracefs) trace events can be 29 // created system-wide by writing to the <tracefs>/kprobe_events file, or 30 // they can be scoped to the current process by creating PMU perf events. 31 // - u(ret)probe: Ephemeral trace events based on user provides ELF binaries 32 // and offsets. uprobe-based (tracefs) trace events can be 33 // created system-wide by writing to the <tracefs>/uprobe_events file, or 34 // they can be scoped to the current process by creating PMU perf events. 35 // - perf event: An object instantiated based on an existing trace event or 36 // kernel symbol. Referred to by fd in userspace. 37 // Exactly one eBPF program can be attached to a perf event. Multiple perf 38 // events can be created from a single trace event. Closing a perf event 39 // stops any further invocations of the attached eBPF program. 40 41 var ( 42 errInvalidInput = tracefs.ErrInvalidInput 43 ) 44 45 const ( 46 perfAllThreads = -1 47 ) 48 49 // A perfEvent represents a perf event kernel object. Exactly one eBPF program 50 // can be attached to it. It is created based on a tracefs trace event or a 51 // Performance Monitoring Unit (PMU). 52 type perfEvent struct { 53 // Trace event backing this perfEvent. May be nil. 54 tracefsEvent *tracefs.Event 55 56 // This is the perf event FD. 57 fd *sys.FD 58 } 59 60 func newPerfEvent(fd *sys.FD, event *tracefs.Event) *perfEvent { 61 pe := &perfEvent{event, fd} 62 // Both event and fd have their own finalizer, but we want to 63 // guarantee that they are closed in a certain order. 64 runtime.SetFinalizer(pe, (*perfEvent).Close) 65 return pe 66 } 67 68 func (pe *perfEvent) Close() error { 69 runtime.SetFinalizer(pe, nil) 70 71 if err := pe.fd.Close(); err != nil { 72 return fmt.Errorf("closing perf event fd: %w", err) 73 } 74 75 if pe.tracefsEvent != nil { 76 return pe.tracefsEvent.Close() 77 } 78 79 return nil 80 } 81 82 // PerfEvent is implemented by some Link types which use a perf event under 83 // the hood. 84 type PerfEvent interface { 85 // PerfEvent returns a file for the underlying perf event. 86 // 87 // It is the callers responsibility to close the returned file. 88 // 89 // Making changes to the associated perf event lead to 90 // undefined behaviour. 91 PerfEvent() (*os.File, error) 92 } 93 94 // perfEventLink represents a bpf perf link. 95 type perfEventLink struct { 96 RawLink 97 pe *perfEvent 98 } 99 100 func (pl *perfEventLink) isLink() {} 101 102 // Pinning requires the underlying perf event FD to stay open. 103 // 104 // | PerfEvent FD | BpfLink FD | Works | 105 // |--------------|------------|-------| 106 // | Open | Open | Yes | 107 // | Closed | Open | No | 108 // | Open | Closed | No (Pin() -> EINVAL) | 109 // | Closed | Closed | No (Pin() -> EINVAL) | 110 // 111 // There is currently no pretty way to recover the perf event FD 112 // when loading a pinned link, so leave as not supported for now. 113 func (pl *perfEventLink) Pin(string) error { 114 return fmt.Errorf("perf event link pin: %w", ErrNotSupported) 115 } 116 117 func (pl *perfEventLink) Unpin() error { 118 return fmt.Errorf("perf event link unpin: %w", ErrNotSupported) 119 } 120 121 func (pl *perfEventLink) Close() error { 122 if err := pl.fd.Close(); err != nil { 123 return fmt.Errorf("perf link close: %w", err) 124 } 125 126 if err := pl.pe.Close(); err != nil { 127 return fmt.Errorf("perf event close: %w", err) 128 } 129 return nil 130 } 131 132 func (pl *perfEventLink) Update(prog *ebpf.Program) error { 133 return fmt.Errorf("perf event link update: %w", ErrNotSupported) 134 } 135 136 var _ PerfEvent = (*perfEventLink)(nil) 137 138 func (pl *perfEventLink) PerfEvent() (*os.File, error) { 139 fd, err := pl.pe.fd.Dup() 140 if err != nil { 141 return nil, err 142 } 143 144 return fd.File("perf-event"), nil 145 } 146 147 // perfEventIoctl implements Link and handles the perf event lifecycle 148 // via ioctl(). 149 type perfEventIoctl struct { 150 *perfEvent 151 } 152 153 func (pi *perfEventIoctl) isLink() {} 154 155 // Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"), 156 // calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array 157 // owned by the perf event, which means multiple programs can be attached 158 // simultaneously. 159 // 160 // Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event 161 // returns EEXIST. 162 // 163 // Detaching a program from a perf event is currently not possible, so a 164 // program replacement mechanism cannot be implemented for perf events. 165 func (pi *perfEventIoctl) Update(prog *ebpf.Program) error { 166 return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported) 167 } 168 169 func (pi *perfEventIoctl) Pin(string) error { 170 return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported) 171 } 172 173 func (pi *perfEventIoctl) Unpin() error { 174 return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported) 175 } 176 177 func (pi *perfEventIoctl) Info() (*Info, error) { 178 return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported) 179 } 180 181 var _ PerfEvent = (*perfEventIoctl)(nil) 182 183 func (pi *perfEventIoctl) PerfEvent() (*os.File, error) { 184 fd, err := pi.fd.Dup() 185 if err != nil { 186 return nil, err 187 } 188 189 return fd.File("perf-event"), nil 190 } 191 192 // attach the given eBPF prog to the perf event stored in pe. 193 // pe must contain a valid perf event fd. 194 // prog's type must match the program type stored in pe. 195 func attachPerfEvent(pe *perfEvent, prog *ebpf.Program, cookie uint64) (Link, error) { 196 if prog == nil { 197 return nil, errors.New("cannot attach a nil program") 198 } 199 if prog.FD() < 0 { 200 return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd) 201 } 202 203 if err := haveBPFLinkPerfEvent(); err == nil { 204 return attachPerfEventLink(pe, prog, cookie) 205 } 206 207 if cookie != 0 { 208 return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported) 209 } 210 211 return attachPerfEventIoctl(pe, prog) 212 } 213 214 func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) { 215 // Assign the eBPF program to the perf event. 216 err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD()) 217 if err != nil { 218 return nil, fmt.Errorf("setting perf event bpf program: %w", err) 219 } 220 221 // PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values. 222 if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil { 223 return nil, fmt.Errorf("enable perf event: %s", err) 224 } 225 226 return &perfEventIoctl{pe}, nil 227 } 228 229 // Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+). 230 // 231 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e 232 func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program, cookie uint64) (*perfEventLink, error) { 233 fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ 234 ProgFd: uint32(prog.FD()), 235 TargetFd: pe.fd.Uint(), 236 AttachType: sys.BPF_PERF_EVENT, 237 BpfCookie: cookie, 238 }) 239 if err != nil { 240 return nil, fmt.Errorf("cannot create bpf perf link: %v", err) 241 } 242 243 return &perfEventLink{RawLink{fd: fd}, pe}, nil 244 } 245 246 // unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str. 247 func unsafeStringPtr(str string) (unsafe.Pointer, error) { 248 p, err := unix.BytePtrFromString(str) 249 if err != nil { 250 return nil, err 251 } 252 return unsafe.Pointer(p), nil 253 } 254 255 // openTracepointPerfEvent opens a tracepoint-type perf event. System-wide 256 // [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints 257 // behind the scenes, and can be attached to using these perf events. 258 func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) { 259 attr := unix.PerfEventAttr{ 260 Type: unix.PERF_TYPE_TRACEPOINT, 261 Config: tid, 262 Sample_type: unix.PERF_SAMPLE_RAW, 263 Sample: 1, 264 Wakeup: 1, 265 } 266 267 cpu := 0 268 if pid != perfAllThreads { 269 cpu = -1 270 } 271 fd, err := unix.PerfEventOpen(&attr, pid, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC) 272 if err != nil { 273 return nil, fmt.Errorf("opening tracepoint perf event: %w", err) 274 } 275 276 return sys.NewFD(fd) 277 } 278 279 // Probe BPF perf link. 280 // 281 // https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307 282 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e 283 var haveBPFLinkPerfEvent = internal.NewFeatureTest("bpf_link_perf_event", "5.15", func() error { 284 prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ 285 Name: "probe_bpf_perf_link", 286 Type: ebpf.Kprobe, 287 Instructions: asm.Instructions{ 288 asm.Mov.Imm(asm.R0, 0), 289 asm.Return(), 290 }, 291 License: "MIT", 292 }) 293 if err != nil { 294 return err 295 } 296 defer prog.Close() 297 298 _, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ 299 ProgFd: uint32(prog.FD()), 300 AttachType: sys.BPF_PERF_EVENT, 301 }) 302 if errors.Is(err, unix.EINVAL) { 303 return internal.ErrNotSupported 304 } 305 if errors.Is(err, unix.EBADF) { 306 return nil 307 } 308 return err 309 })