github.com/cilium/ebpf@v0.10.0/link/perf_event.go (about) 1 package link 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "os" 8 "path/filepath" 9 "runtime" 10 "strings" 11 "sync" 12 "unsafe" 13 14 "github.com/cilium/ebpf" 15 "github.com/cilium/ebpf/asm" 16 "github.com/cilium/ebpf/internal" 17 "github.com/cilium/ebpf/internal/sys" 18 "github.com/cilium/ebpf/internal/unix" 19 ) 20 21 // Getting the terminology right is usually the hardest part. For posterity and 22 // for staying sane during implementation: 23 // 24 // - trace event: Representation of a kernel runtime hook. Filesystem entries 25 // under <tracefs>/events. Can be tracepoints (static), kprobes or uprobes. 26 // Can be instantiated into perf events (see below). 27 // - tracepoint: A predetermined hook point in the kernel. Exposed as trace 28 // events in (sub)directories under <tracefs>/events. Cannot be closed or 29 // removed, they are static. 30 // - k(ret)probe: Ephemeral trace events based on entry or exit points of 31 // exported kernel symbols. kprobe-based (tracefs) trace events can be 32 // created system-wide by writing to the <tracefs>/kprobe_events file, or 33 // they can be scoped to the current process by creating PMU perf events. 34 // - u(ret)probe: Ephemeral trace events based on user provides ELF binaries 35 // and offsets. uprobe-based (tracefs) trace events can be 36 // created system-wide by writing to the <tracefs>/uprobe_events file, or 37 // they can be scoped to the current process by creating PMU perf events. 38 // - perf event: An object instantiated based on an existing trace event or 39 // kernel symbol. Referred to by fd in userspace. 40 // Exactly one eBPF program can be attached to a perf event. Multiple perf 41 // events can be created from a single trace event. Closing a perf event 42 // stops any further invocations of the attached eBPF program. 43 44 var ( 45 tracefsPath = "/sys/kernel/debug/tracing" 46 47 errInvalidInput = errors.New("invalid input") 48 ) 49 50 const ( 51 perfAllThreads = -1 52 ) 53 54 type perfEventType uint8 55 56 const ( 57 tracepointEvent perfEventType = iota 58 kprobeEvent 59 kretprobeEvent 60 uprobeEvent 61 uretprobeEvent 62 ) 63 64 // A perfEvent represents a perf event kernel object. Exactly one eBPF program 65 // can be attached to it. It is created based on a tracefs trace event or a 66 // Performance Monitoring Unit (PMU). 67 type perfEvent struct { 68 // The event type determines the types of programs that can be attached. 69 typ perfEventType 70 71 // Group and name of the tracepoint/kprobe/uprobe. 72 group string 73 name string 74 75 // PMU event ID read from sysfs. Valid IDs are non-zero. 76 pmuID uint64 77 // ID of the trace event read from tracefs. Valid IDs are non-zero. 78 tracefsID uint64 79 80 // User provided arbitrary value. 81 cookie uint64 82 83 // This is the perf event FD. 84 fd *sys.FD 85 } 86 87 func (pe *perfEvent) Close() error { 88 if err := pe.fd.Close(); err != nil { 89 return fmt.Errorf("closing perf event fd: %w", err) 90 } 91 92 switch pe.typ { 93 case kprobeEvent, kretprobeEvent: 94 // Clean up kprobe tracefs entry. 95 if pe.tracefsID != 0 { 96 return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name) 97 } 98 case uprobeEvent, uretprobeEvent: 99 // Clean up uprobe tracefs entry. 100 if pe.tracefsID != 0 { 101 return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name) 102 } 103 case tracepointEvent: 104 // Tracepoint trace events don't hold any extra resources. 105 return nil 106 } 107 108 return nil 109 } 110 111 // perfEventLink represents a bpf perf link. 112 type perfEventLink struct { 113 RawLink 114 pe *perfEvent 115 } 116 117 func (pl *perfEventLink) isLink() {} 118 119 // Pinning requires the underlying perf event FD to stay open. 120 // 121 // | PerfEvent FD | BpfLink FD | Works | 122 // |--------------|------------|-------| 123 // | Open | Open | Yes | 124 // | Closed | Open | No | 125 // | Open | Closed | No (Pin() -> EINVAL) | 126 // | Closed | Closed | No (Pin() -> EINVAL) | 127 // 128 // There is currently no pretty way to recover the perf event FD 129 // when loading a pinned link, so leave as not supported for now. 130 func (pl *perfEventLink) Pin(string) error { 131 return fmt.Errorf("perf event link pin: %w", ErrNotSupported) 132 } 133 134 func (pl *perfEventLink) Unpin() error { 135 return fmt.Errorf("perf event link unpin: %w", ErrNotSupported) 136 } 137 138 func (pl *perfEventLink) Close() error { 139 if err := pl.pe.Close(); err != nil { 140 return fmt.Errorf("perf event link close: %w", err) 141 } 142 return pl.fd.Close() 143 } 144 145 func (pl *perfEventLink) Update(prog *ebpf.Program) error { 146 return fmt.Errorf("perf event link update: %w", ErrNotSupported) 147 } 148 149 // perfEventIoctl implements Link and handles the perf event lifecycle 150 // via ioctl(). 151 type perfEventIoctl struct { 152 *perfEvent 153 } 154 155 func (pi *perfEventIoctl) isLink() {} 156 157 // Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"), 158 // calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array 159 // owned by the perf event, which means multiple programs can be attached 160 // simultaneously. 161 // 162 // Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event 163 // returns EEXIST. 164 // 165 // Detaching a program from a perf event is currently not possible, so a 166 // program replacement mechanism cannot be implemented for perf events. 167 func (pi *perfEventIoctl) Update(prog *ebpf.Program) error { 168 return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported) 169 } 170 171 func (pi *perfEventIoctl) Pin(string) error { 172 return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported) 173 } 174 175 func (pi *perfEventIoctl) Unpin() error { 176 return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported) 177 } 178 179 func (pi *perfEventIoctl) Info() (*Info, error) { 180 return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported) 181 } 182 183 // attach the given eBPF prog to the perf event stored in pe. 184 // pe must contain a valid perf event fd. 185 // prog's type must match the program type stored in pe. 186 func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) { 187 if prog == nil { 188 return nil, errors.New("cannot attach a nil program") 189 } 190 if prog.FD() < 0 { 191 return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd) 192 } 193 194 switch pe.typ { 195 case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent: 196 if t := prog.Type(); t != ebpf.Kprobe { 197 return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t) 198 } 199 case tracepointEvent: 200 if t := prog.Type(); t != ebpf.TracePoint { 201 return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t) 202 } 203 default: 204 return nil, fmt.Errorf("unknown perf event type: %d", pe.typ) 205 } 206 207 if err := haveBPFLinkPerfEvent(); err == nil { 208 return attachPerfEventLink(pe, prog) 209 } 210 return attachPerfEventIoctl(pe, prog) 211 } 212 213 func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) { 214 if pe.cookie != 0 { 215 return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported) 216 } 217 218 // Assign the eBPF program to the perf event. 219 err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD()) 220 if err != nil { 221 return nil, fmt.Errorf("setting perf event bpf program: %w", err) 222 } 223 224 // PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values. 225 if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil { 226 return nil, fmt.Errorf("enable perf event: %s", err) 227 } 228 229 pi := &perfEventIoctl{pe} 230 231 // Close the perf event when its reference is lost to avoid leaking system resources. 232 runtime.SetFinalizer(pi, (*perfEventIoctl).Close) 233 return pi, nil 234 } 235 236 // Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+). 237 // 238 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e 239 func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) { 240 fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ 241 ProgFd: uint32(prog.FD()), 242 TargetFd: pe.fd.Uint(), 243 AttachType: sys.BPF_PERF_EVENT, 244 BpfCookie: pe.cookie, 245 }) 246 if err != nil { 247 return nil, fmt.Errorf("cannot create bpf perf link: %v", err) 248 } 249 250 pl := &perfEventLink{RawLink{fd: fd}, pe} 251 252 // Close the perf event when its reference is lost to avoid leaking system resources. 253 runtime.SetFinalizer(pl, (*perfEventLink).Close) 254 return pl, nil 255 } 256 257 // unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str. 258 func unsafeStringPtr(str string) (unsafe.Pointer, error) { 259 p, err := unix.BytePtrFromString(str) 260 if err != nil { 261 return nil, err 262 } 263 return unsafe.Pointer(p), nil 264 } 265 266 // getTraceEventID reads a trace event's ID from tracefs given its group and name. 267 // The kernel requires group and name to be alphanumeric or underscore. 268 // 269 // name automatically has its invalid symbols converted to underscores so the caller 270 // can pass a raw symbol name, e.g. a kernel symbol containing dots. 271 func getTraceEventID(group, name string) (uint64, error) { 272 name = sanitizeSymbol(name) 273 path, err := sanitizePath(tracefsPath, "events", group, name, "id") 274 if err != nil { 275 return 0, err 276 } 277 tid, err := readUint64FromFile("%d\n", path) 278 if errors.Is(err, os.ErrNotExist) { 279 return 0, err 280 } 281 if err != nil { 282 return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err) 283 } 284 285 return tid, nil 286 } 287 288 // openTracepointPerfEvent opens a tracepoint-type perf event. System-wide 289 // [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints 290 // behind the scenes, and can be attached to using these perf events. 291 func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) { 292 attr := unix.PerfEventAttr{ 293 Type: unix.PERF_TYPE_TRACEPOINT, 294 Config: tid, 295 Sample_type: unix.PERF_SAMPLE_RAW, 296 Sample: 1, 297 Wakeup: 1, 298 } 299 300 fd, err := unix.PerfEventOpen(&attr, pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) 301 if err != nil { 302 return nil, fmt.Errorf("opening tracepoint perf event: %w", err) 303 } 304 305 return sys.NewFD(fd) 306 } 307 308 func sanitizePath(base string, path ...string) (string, error) { 309 l := filepath.Join(path...) 310 p := filepath.Join(base, l) 311 if !strings.HasPrefix(p, base) { 312 return "", fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput) 313 } 314 return p, nil 315 } 316 317 // readUint64FromFile reads a uint64 from a file. 318 // 319 // format specifies the contents of the file in fmt.Scanf syntax. 320 func readUint64FromFile(format string, path ...string) (uint64, error) { 321 filename := filepath.Join(path...) 322 data, err := os.ReadFile(filename) 323 if err != nil { 324 return 0, fmt.Errorf("reading file %q: %w", filename, err) 325 } 326 327 var value uint64 328 n, err := fmt.Fscanf(bytes.NewReader(data), format, &value) 329 if err != nil { 330 return 0, fmt.Errorf("parsing file %q: %w", filename, err) 331 } 332 if n != 1 { 333 return 0, fmt.Errorf("parsing file %q: expected 1 item, got %d", filename, n) 334 } 335 336 return value, nil 337 } 338 339 type uint64FromFileKey struct { 340 format, path string 341 } 342 343 var uint64FromFileCache = struct { 344 sync.RWMutex 345 values map[uint64FromFileKey]uint64 346 }{ 347 values: map[uint64FromFileKey]uint64{}, 348 } 349 350 // readUint64FromFileOnce is like readUint64FromFile but memoizes the result. 351 func readUint64FromFileOnce(format string, path ...string) (uint64, error) { 352 filename := filepath.Join(path...) 353 key := uint64FromFileKey{format, filename} 354 355 uint64FromFileCache.RLock() 356 if value, ok := uint64FromFileCache.values[key]; ok { 357 uint64FromFileCache.RUnlock() 358 return value, nil 359 } 360 uint64FromFileCache.RUnlock() 361 362 value, err := readUint64FromFile(format, filename) 363 if err != nil { 364 return 0, err 365 } 366 367 uint64FromFileCache.Lock() 368 defer uint64FromFileCache.Unlock() 369 370 if value, ok := uint64FromFileCache.values[key]; ok { 371 // Someone else got here before us, use what is cached. 372 return value, nil 373 } 374 375 uint64FromFileCache.values[key] = value 376 return value, nil 377 } 378 379 // Probe BPF perf link. 380 // 381 // https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307 382 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e 383 var haveBPFLinkPerfEvent = internal.NewFeatureTest("bpf_link_perf_event", "5.15", func() error { 384 prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ 385 Name: "probe_bpf_perf_link", 386 Type: ebpf.Kprobe, 387 Instructions: asm.Instructions{ 388 asm.Mov.Imm(asm.R0, 0), 389 asm.Return(), 390 }, 391 License: "MIT", 392 }) 393 if err != nil { 394 return err 395 } 396 defer prog.Close() 397 398 _, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ 399 ProgFd: uint32(prog.FD()), 400 AttachType: sys.BPF_PERF_EVENT, 401 }) 402 if errors.Is(err, unix.EINVAL) { 403 return internal.ErrNotSupported 404 } 405 if errors.Is(err, unix.EBADF) { 406 return nil 407 } 408 return err 409 }) 410 411 // isValidTraceID implements the equivalent of a regex match 412 // against "^[a-zA-Z_][0-9a-zA-Z_]*$". 413 // 414 // Trace event groups, names and kernel symbols must adhere to this set 415 // of characters. Non-empty, first character must not be a number, all 416 // characters must be alphanumeric or underscore. 417 func isValidTraceID(s string) bool { 418 if len(s) < 1 { 419 return false 420 } 421 for i, c := range []byte(s) { 422 switch { 423 case c >= 'a' && c <= 'z': 424 case c >= 'A' && c <= 'Z': 425 case c == '_': 426 case i > 0 && c >= '0' && c <= '9': 427 428 default: 429 return false 430 } 431 } 432 433 return true 434 }