github.com/cilium/ebpf@v0.16.0/link/perf_event.go (about) 1 package link 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "runtime" 8 "unsafe" 9 10 "github.com/cilium/ebpf" 11 "github.com/cilium/ebpf/asm" 12 "github.com/cilium/ebpf/internal" 13 "github.com/cilium/ebpf/internal/sys" 14 "github.com/cilium/ebpf/internal/tracefs" 15 "github.com/cilium/ebpf/internal/unix" 16 ) 17 18 // Getting the terminology right is usually the hardest part. For posterity and 19 // for staying sane during implementation: 20 // 21 // - trace event: Representation of a kernel runtime hook. Filesystem entries 22 // under <tracefs>/events. Can be tracepoints (static), kprobes or uprobes. 23 // Can be instantiated into perf events (see below). 24 // - tracepoint: A predetermined hook point in the kernel. Exposed as trace 25 // events in (sub)directories under <tracefs>/events. Cannot be closed or 26 // removed, they are static. 27 // - k(ret)probe: Ephemeral trace events based on entry or exit points of 28 // exported kernel symbols. kprobe-based (tracefs) trace events can be 29 // created system-wide by writing to the <tracefs>/kprobe_events file, or 30 // they can be scoped to the current process by creating PMU perf events. 31 // - u(ret)probe: Ephemeral trace events based on user provides ELF binaries 32 // and offsets. uprobe-based (tracefs) trace events can be 33 // created system-wide by writing to the <tracefs>/uprobe_events file, or 34 // they can be scoped to the current process by creating PMU perf events. 35 // - perf event: An object instantiated based on an existing trace event or 36 // kernel symbol. Referred to by fd in userspace. 37 // Exactly one eBPF program can be attached to a perf event. Multiple perf 38 // events can be created from a single trace event. Closing a perf event 39 // stops any further invocations of the attached eBPF program. 40 41 var ( 42 errInvalidInput = tracefs.ErrInvalidInput 43 ) 44 45 const ( 46 perfAllThreads = -1 47 ) 48 49 // A perfEvent represents a perf event kernel object. Exactly one eBPF program 50 // can be attached to it. It is created based on a tracefs trace event or a 51 // Performance Monitoring Unit (PMU). 52 type perfEvent struct { 53 // Trace event backing this perfEvent. May be nil. 54 tracefsEvent *tracefs.Event 55 56 // This is the perf event FD. 57 fd *sys.FD 58 } 59 60 func newPerfEvent(fd *sys.FD, event *tracefs.Event) *perfEvent { 61 pe := &perfEvent{event, fd} 62 // Both event and fd have their own finalizer, but we want to 63 // guarantee that they are closed in a certain order. 64 runtime.SetFinalizer(pe, (*perfEvent).Close) 65 return pe 66 } 67 68 func (pe *perfEvent) Close() error { 69 runtime.SetFinalizer(pe, nil) 70 71 if err := pe.fd.Close(); err != nil { 72 return fmt.Errorf("closing perf event fd: %w", err) 73 } 74 75 if pe.tracefsEvent != nil { 76 return pe.tracefsEvent.Close() 77 } 78 79 return nil 80 } 81 82 // PerfEvent is implemented by some Link types which use a perf event under 83 // the hood. 84 type PerfEvent interface { 85 // PerfEvent returns a file for the underlying perf event. 86 // 87 // It is the callers responsibility to close the returned file. 88 // 89 // Making changes to the associated perf event lead to 90 // undefined behaviour. 91 PerfEvent() (*os.File, error) 92 } 93 94 // perfEventLink represents a bpf perf link. 95 type perfEventLink struct { 96 RawLink 97 pe *perfEvent 98 } 99 100 func (pl *perfEventLink) isLink() {} 101 102 func (pl *perfEventLink) Close() error { 103 if err := pl.fd.Close(); err != nil { 104 return fmt.Errorf("perf link close: %w", err) 105 } 106 107 // when created from pinned link 108 if pl.pe == nil { 109 return nil 110 } 111 112 if err := pl.pe.Close(); err != nil { 113 return fmt.Errorf("perf event close: %w", err) 114 } 115 return nil 116 } 117 118 func (pl *perfEventLink) Update(prog *ebpf.Program) error { 119 return fmt.Errorf("perf event link update: %w", ErrNotSupported) 120 } 121 122 var _ PerfEvent = (*perfEventLink)(nil) 123 124 func (pl *perfEventLink) PerfEvent() (*os.File, error) { 125 // when created from pinned link 126 if pl.pe == nil { 127 return nil, ErrNotSupported 128 } 129 130 fd, err := pl.pe.fd.Dup() 131 if err != nil { 132 return nil, err 133 } 134 135 return fd.File("perf-event"), nil 136 } 137 138 func (pl *perfEventLink) Info() (*Info, error) { 139 var info sys.PerfEventLinkInfo 140 if err := sys.ObjInfo(pl.fd, &info); err != nil { 141 return nil, fmt.Errorf("perf event link info: %s", err) 142 } 143 144 var extra2 interface{} 145 switch info.PerfEventType { 146 case sys.BPF_PERF_EVENT_KPROBE, sys.BPF_PERF_EVENT_KRETPROBE: 147 var kprobeInfo sys.KprobeLinkInfo 148 if err := sys.ObjInfo(pl.fd, &kprobeInfo); err != nil { 149 return nil, fmt.Errorf("kprobe link info: %s", err) 150 } 151 extra2 = &KprobeInfo{ 152 address: kprobeInfo.Addr, 153 missed: kprobeInfo.Missed, 154 } 155 } 156 157 extra := &PerfEventInfo{ 158 Type: info.PerfEventType, 159 extra: extra2, 160 } 161 162 return &Info{ 163 info.Type, 164 info.Id, 165 ebpf.ProgramID(info.ProgId), 166 extra, 167 }, nil 168 } 169 170 // perfEventIoctl implements Link and handles the perf event lifecycle 171 // via ioctl(). 172 type perfEventIoctl struct { 173 *perfEvent 174 } 175 176 func (pi *perfEventIoctl) isLink() {} 177 178 // Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"), 179 // calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array 180 // owned by the perf event, which means multiple programs can be attached 181 // simultaneously. 182 // 183 // Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event 184 // returns EEXIST. 185 // 186 // Detaching a program from a perf event is currently not possible, so a 187 // program replacement mechanism cannot be implemented for perf events. 188 func (pi *perfEventIoctl) Update(prog *ebpf.Program) error { 189 return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported) 190 } 191 192 func (pi *perfEventIoctl) Pin(string) error { 193 return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported) 194 } 195 196 func (pi *perfEventIoctl) Unpin() error { 197 return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported) 198 } 199 200 func (pi *perfEventIoctl) Info() (*Info, error) { 201 return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported) 202 } 203 204 var _ PerfEvent = (*perfEventIoctl)(nil) 205 206 func (pi *perfEventIoctl) PerfEvent() (*os.File, error) { 207 fd, err := pi.fd.Dup() 208 if err != nil { 209 return nil, err 210 } 211 212 return fd.File("perf-event"), nil 213 } 214 215 // attach the given eBPF prog to the perf event stored in pe. 216 // pe must contain a valid perf event fd. 217 // prog's type must match the program type stored in pe. 218 func attachPerfEvent(pe *perfEvent, prog *ebpf.Program, cookie uint64) (Link, error) { 219 if prog == nil { 220 return nil, errors.New("cannot attach a nil program") 221 } 222 if prog.FD() < 0 { 223 return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd) 224 } 225 226 if err := haveBPFLinkPerfEvent(); err == nil { 227 return attachPerfEventLink(pe, prog, cookie) 228 } 229 230 if cookie != 0 { 231 return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported) 232 } 233 234 return attachPerfEventIoctl(pe, prog) 235 } 236 237 func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) { 238 // Assign the eBPF program to the perf event. 239 err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD()) 240 if err != nil { 241 return nil, fmt.Errorf("setting perf event bpf program: %w", err) 242 } 243 244 // PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values. 245 if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil { 246 return nil, fmt.Errorf("enable perf event: %s", err) 247 } 248 249 return &perfEventIoctl{pe}, nil 250 } 251 252 // Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+). 253 // 254 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e 255 func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program, cookie uint64) (*perfEventLink, error) { 256 fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ 257 ProgFd: uint32(prog.FD()), 258 TargetFd: pe.fd.Uint(), 259 AttachType: sys.BPF_PERF_EVENT, 260 BpfCookie: cookie, 261 }) 262 if err != nil { 263 return nil, fmt.Errorf("cannot create bpf perf link: %v", err) 264 } 265 266 return &perfEventLink{RawLink{fd: fd}, pe}, nil 267 } 268 269 // unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str. 270 func unsafeStringPtr(str string) (unsafe.Pointer, error) { 271 p, err := unix.BytePtrFromString(str) 272 if err != nil { 273 return nil, err 274 } 275 return unsafe.Pointer(p), nil 276 } 277 278 // openTracepointPerfEvent opens a tracepoint-type perf event. System-wide 279 // [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints 280 // behind the scenes, and can be attached to using these perf events. 281 func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) { 282 attr := unix.PerfEventAttr{ 283 Type: unix.PERF_TYPE_TRACEPOINT, 284 Config: tid, 285 Sample_type: unix.PERF_SAMPLE_RAW, 286 Sample: 1, 287 Wakeup: 1, 288 } 289 290 cpu := 0 291 if pid != perfAllThreads { 292 cpu = -1 293 } 294 fd, err := unix.PerfEventOpen(&attr, pid, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC) 295 if err != nil { 296 return nil, fmt.Errorf("opening tracepoint perf event: %w", err) 297 } 298 299 return sys.NewFD(fd) 300 } 301 302 // Probe BPF perf link. 303 // 304 // https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307 305 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e 306 var haveBPFLinkPerfEvent = internal.NewFeatureTest("bpf_link_perf_event", "5.15", func() error { 307 prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ 308 Name: "probe_bpf_perf_link", 309 Type: ebpf.Kprobe, 310 Instructions: asm.Instructions{ 311 asm.Mov.Imm(asm.R0, 0), 312 asm.Return(), 313 }, 314 License: "MIT", 315 }) 316 if err != nil { 317 return err 318 } 319 defer prog.Close() 320 321 _, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ 322 ProgFd: uint32(prog.FD()), 323 AttachType: sys.BPF_PERF_EVENT, 324 }) 325 if errors.Is(err, unix.EINVAL) { 326 return internal.ErrNotSupported 327 } 328 if errors.Is(err, unix.EBADF) { 329 return nil 330 } 331 return err 332 })