github.com/cilium/ebpf@v0.10.0/link/kprobe.go (about) 1 package link 2 3 import ( 4 "crypto/rand" 5 "errors" 6 "fmt" 7 "os" 8 "path/filepath" 9 "runtime" 10 "strings" 11 "syscall" 12 "unsafe" 13 14 "github.com/cilium/ebpf" 15 "github.com/cilium/ebpf/internal/sys" 16 "github.com/cilium/ebpf/internal/unix" 17 ) 18 19 var ( 20 kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events") 21 ) 22 23 type probeType uint8 24 25 type probeArgs struct { 26 symbol, group, path string 27 offset, refCtrOffset, cookie uint64 28 pid, retprobeMaxActive int 29 ret bool 30 } 31 32 // KprobeOptions defines additional parameters that will be used 33 // when loading Kprobes. 34 type KprobeOptions struct { 35 // Arbitrary value that can be fetched from an eBPF program 36 // via `bpf_get_attach_cookie()`. 37 // 38 // Needs kernel 5.15+. 39 Cookie uint64 40 // Offset of the kprobe relative to the traced symbol. 41 // Can be used to insert kprobes at arbitrary offsets in kernel functions, 42 // e.g. in places where functions have been inlined. 43 Offset uint64 44 // Increase the maximum number of concurrent invocations of a kretprobe. 45 // Required when tracing some long running functions in the kernel. 46 // 47 // Deprecated: this setting forces the use of an outdated kernel API and is not portable 48 // across kernel versions. 49 RetprobeMaxActive int 50 } 51 52 const ( 53 kprobeType probeType = iota 54 uprobeType 55 ) 56 57 func (pt probeType) String() string { 58 if pt == kprobeType { 59 return "kprobe" 60 } 61 return "uprobe" 62 } 63 64 func (pt probeType) EventsPath() string { 65 if pt == kprobeType { 66 return kprobeEventsPath 67 } 68 return uprobeEventsPath 69 } 70 71 func (pt probeType) PerfEventType(ret bool) perfEventType { 72 if pt == kprobeType { 73 if ret { 74 return kretprobeEvent 75 } 76 return kprobeEvent 77 } 78 if ret { 79 return uretprobeEvent 80 } 81 return uprobeEvent 82 } 83 84 // Kprobe attaches the given eBPF program to a perf event that fires when the 85 // given kernel symbol starts executing. See /proc/kallsyms for available 86 // symbols. For example, printk(): 87 // 88 // kp, err := Kprobe("printk", prog, nil) 89 // 90 // Losing the reference to the resulting Link (kp) will close the Kprobe 91 // and prevent further execution of prog. The Link must be Closed during 92 // program shutdown to avoid leaking system resources. 93 func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { 94 k, err := kprobe(symbol, prog, opts, false) 95 if err != nil { 96 return nil, err 97 } 98 99 lnk, err := attachPerfEvent(k, prog) 100 if err != nil { 101 k.Close() 102 return nil, err 103 } 104 105 return lnk, nil 106 } 107 108 // Kretprobe attaches the given eBPF program to a perf event that fires right 109 // before the given kernel symbol exits, with the function stack left intact. 110 // See /proc/kallsyms for available symbols. For example, printk(): 111 // 112 // kp, err := Kretprobe("printk", prog, nil) 113 // 114 // Losing the reference to the resulting Link (kp) will close the Kretprobe 115 // and prevent further execution of prog. The Link must be Closed during 116 // program shutdown to avoid leaking system resources. 117 // 118 // On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol 119 // incorrectly returns unix.EINVAL instead of os.ErrNotExist. 120 func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { 121 k, err := kprobe(symbol, prog, opts, true) 122 if err != nil { 123 return nil, err 124 } 125 126 lnk, err := attachPerfEvent(k, prog) 127 if err != nil { 128 k.Close() 129 return nil, err 130 } 131 132 return lnk, nil 133 } 134 135 // isValidKprobeSymbol implements the equivalent of a regex match 136 // against "^[a-zA-Z_][0-9a-zA-Z_.]*$". 137 func isValidKprobeSymbol(s string) bool { 138 if len(s) < 1 { 139 return false 140 } 141 142 for i, c := range []byte(s) { 143 switch { 144 case c >= 'a' && c <= 'z': 145 case c >= 'A' && c <= 'Z': 146 case c == '_': 147 case i > 0 && c >= '0' && c <= '9': 148 149 // Allow `.` in symbol name. GCC-compiled kernel may change symbol name 150 // to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`. 151 // See: https://gcc.gnu.org/gcc-10/changes.html 152 case i > 0 && c == '.': 153 154 default: 155 return false 156 } 157 } 158 159 return true 160 } 161 162 // kprobe opens a perf event on the given symbol and attaches prog to it. 163 // If ret is true, create a kretprobe. 164 func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) { 165 if symbol == "" { 166 return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput) 167 } 168 if prog == nil { 169 return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) 170 } 171 if !isValidKprobeSymbol(symbol) { 172 return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput) 173 } 174 if prog.Type() != ebpf.Kprobe { 175 return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput) 176 } 177 178 args := probeArgs{ 179 pid: perfAllThreads, 180 symbol: symbol, 181 ret: ret, 182 } 183 184 if opts != nil { 185 args.retprobeMaxActive = opts.RetprobeMaxActive 186 args.cookie = opts.Cookie 187 args.offset = opts.Offset 188 } 189 190 // Use kprobe PMU if the kernel has it available. 191 tp, err := pmuKprobe(args) 192 if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { 193 args.symbol = platformPrefix(symbol) 194 tp, err = pmuKprobe(args) 195 } 196 if err == nil { 197 return tp, nil 198 } 199 if err != nil && !errors.Is(err, ErrNotSupported) { 200 return nil, fmt.Errorf("creating perf_kprobe PMU: %w", err) 201 } 202 203 // Use tracefs if kprobe PMU is missing. 204 args.symbol = symbol 205 tp, err = tracefsKprobe(args) 206 if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { 207 args.symbol = platformPrefix(symbol) 208 tp, err = tracefsKprobe(args) 209 } 210 if err != nil { 211 return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err) 212 } 213 214 return tp, nil 215 } 216 217 // pmuKprobe opens a perf event based on the kprobe PMU. 218 // Returns os.ErrNotExist if the given symbol does not exist in the kernel. 219 func pmuKprobe(args probeArgs) (*perfEvent, error) { 220 return pmuProbe(kprobeType, args) 221 } 222 223 // pmuProbe opens a perf event based on a Performance Monitoring Unit. 224 // 225 // Requires at least a 4.17 kernel. 226 // e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU" 227 // 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU" 228 // 229 // Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU 230 func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) { 231 // Getting the PMU type will fail if the kernel doesn't support 232 // the perf_[k,u]probe PMU. 233 et, err := readUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", typ.String(), "type") 234 if errors.Is(err, os.ErrNotExist) { 235 return nil, fmt.Errorf("%s: %w", typ, ErrNotSupported) 236 } 237 if err != nil { 238 return nil, err 239 } 240 241 // Use tracefs if we want to set kretprobe's retprobeMaxActive. 242 if args.retprobeMaxActive != 0 { 243 return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported) 244 } 245 246 var config uint64 247 if args.ret { 248 bit, err := readUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", typ.String(), "/format/retprobe") 249 if err != nil { 250 return nil, err 251 } 252 config |= 1 << bit 253 } 254 255 var ( 256 attr unix.PerfEventAttr 257 sp unsafe.Pointer 258 token string 259 ) 260 switch typ { 261 case kprobeType: 262 // Create a pointer to a NUL-terminated string for the kernel. 263 sp, err = unsafeStringPtr(args.symbol) 264 if err != nil { 265 return nil, err 266 } 267 268 token = kprobeToken(args) 269 270 attr = unix.PerfEventAttr{ 271 // The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1, 272 // since it added the config2 (Ext2) field. Use Ext2 as probe_offset. 273 Size: unix.PERF_ATTR_SIZE_VER1, 274 Type: uint32(et), // PMU event type read from sysfs 275 Ext1: uint64(uintptr(sp)), // Kernel symbol to trace 276 Ext2: args.offset, // Kernel symbol offset 277 Config: config, // Retprobe flag 278 } 279 case uprobeType: 280 sp, err = unsafeStringPtr(args.path) 281 if err != nil { 282 return nil, err 283 } 284 285 if args.refCtrOffset != 0 { 286 config |= args.refCtrOffset << uprobeRefCtrOffsetShift 287 } 288 289 token = uprobeToken(args) 290 291 attr = unix.PerfEventAttr{ 292 // The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1, 293 // since it added the config2 (Ext2) field. The Size field controls the 294 // size of the internal buffer the kernel allocates for reading the 295 // perf_event_attr argument from userspace. 296 Size: unix.PERF_ATTR_SIZE_VER1, 297 Type: uint32(et), // PMU event type read from sysfs 298 Ext1: uint64(uintptr(sp)), // Uprobe path 299 Ext2: args.offset, // Uprobe offset 300 Config: config, // RefCtrOffset, Retprobe flag 301 } 302 } 303 304 rawFd, err := unix.PerfEventOpen(&attr, args.pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) 305 306 // On some old kernels, kprobe PMU doesn't allow `.` in symbol names and 307 // return -EINVAL. Return ErrNotSupported to allow falling back to tracefs. 308 // https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343 309 if errors.Is(err, unix.EINVAL) && strings.Contains(args.symbol, ".") { 310 return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported) 311 } 312 // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL 313 // when trying to create a retprobe for a missing symbol. 314 if errors.Is(err, os.ErrNotExist) { 315 return nil, fmt.Errorf("token %s: not found: %w", token, err) 316 } 317 // Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved 318 // to an invalid insn boundary. The exact conditions that trigger this error are 319 // arch specific however. 320 if errors.Is(err, unix.EILSEQ) { 321 return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) 322 } 323 // Since at least commit cb9a19fe4aa51, ENOTSUPP is returned 324 // when attempting to set a uprobe on a trap instruction. 325 if errors.Is(err, sys.ENOTSUPP) { 326 return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.offset, err) 327 } 328 329 if err != nil { 330 return nil, fmt.Errorf("token %s: opening perf event: %w", token, err) 331 } 332 333 // Ensure the string pointer is not collected before PerfEventOpen returns. 334 runtime.KeepAlive(sp) 335 336 fd, err := sys.NewFD(rawFd) 337 if err != nil { 338 return nil, err 339 } 340 341 // Kernel has perf_[k,u]probe PMU available, initialize perf event. 342 return &perfEvent{ 343 typ: typ.PerfEventType(args.ret), 344 name: args.symbol, 345 pmuID: et, 346 cookie: args.cookie, 347 fd: fd, 348 }, nil 349 } 350 351 // tracefsKprobe creates a Kprobe tracefs entry. 352 func tracefsKprobe(args probeArgs) (*perfEvent, error) { 353 return tracefsProbe(kprobeType, args) 354 } 355 356 // tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events. 357 // A new trace event group name is generated on every call to support creating 358 // multiple trace events for the same kernel or userspace symbol. 359 // Path and offset are only set in the case of uprobe(s) and are used to set 360 // the executable/library path on the filesystem and the offset where the probe is inserted. 361 // A perf event is then opened on the newly-created trace event and returned to the caller. 362 func tracefsProbe(typ probeType, args probeArgs) (*perfEvent, error) { 363 // Generate a random string for each trace event we attempt to create. 364 // This value is used as the 'group' token in tracefs to allow creating 365 // multiple kprobe trace events with the same name. 366 group, err := randomGroup("ebpf") 367 if err != nil { 368 return nil, fmt.Errorf("randomizing group name: %w", err) 369 } 370 args.group = group 371 372 // Create the [k,u]probe trace event using tracefs. 373 tid, err := createTraceFSProbeEvent(typ, args) 374 if err != nil { 375 return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) 376 } 377 378 // Kprobes are ephemeral tracepoints and share the same perf event type. 379 fd, err := openTracepointPerfEvent(tid, args.pid) 380 if err != nil { 381 // Make sure we clean up the created tracefs event when we return error. 382 // If a livepatch handler is already active on the symbol, the write to 383 // tracefs will succeed, a trace event will show up, but creating the 384 // perf event will fail with EBUSY. 385 _ = closeTraceFSProbeEvent(typ, args.group, args.symbol) 386 return nil, err 387 } 388 389 return &perfEvent{ 390 typ: typ.PerfEventType(args.ret), 391 group: group, 392 name: args.symbol, 393 tracefsID: tid, 394 cookie: args.cookie, 395 fd: fd, 396 }, nil 397 } 398 399 var errInvalidMaxActive = errors.New("can only set maxactive on kretprobes") 400 401 // createTraceFSProbeEvent creates a new ephemeral trace event. 402 // 403 // Returns os.ErrNotExist if symbol is not a valid 404 // kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist 405 // if a probe with the same group and symbol already exists. Returns an error if 406 // args.retprobeMaxActive is used on non kprobe types. Returns ErrNotSupported if 407 // the kernel is too old to support kretprobe maxactive. 408 func createTraceFSProbeEvent(typ probeType, args probeArgs) (uint64, error) { 409 // Before attempting to create a trace event through tracefs, 410 // check if an event with the same group and name already exists. 411 // Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate 412 // entry, so we need to rely on reads for detecting uniqueness. 413 _, err := getTraceEventID(args.group, args.symbol) 414 if err == nil { 415 return 0, fmt.Errorf("trace event %s/%s: %w", args.group, args.symbol, os.ErrExist) 416 } 417 if err != nil && !errors.Is(err, os.ErrNotExist) { 418 return 0, fmt.Errorf("checking trace event %s/%s: %w", args.group, args.symbol, err) 419 } 420 421 // Open the kprobe_events file in tracefs. 422 f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) 423 if err != nil { 424 return 0, fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err) 425 } 426 defer f.Close() 427 428 var pe, token string 429 switch typ { 430 case kprobeType: 431 // The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt): 432 // p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe 433 // r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe 434 // -:[GRP/]EVENT : Clear a probe 435 // 436 // Some examples: 437 // r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy 438 // p:ebpf_5678/p_my_kprobe __x64_sys_execve 439 // 440 // Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the 441 // kernel default to NR_CPUS. This is desired in most eBPF cases since 442 // subsampling or rate limiting logic can be more accurately implemented in 443 // the eBPF program itself. 444 // See Documentation/kprobes.txt for more details. 445 if args.retprobeMaxActive != 0 && !args.ret { 446 return 0, errInvalidMaxActive 447 } 448 token = kprobeToken(args) 449 pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret, args.retprobeMaxActive), args.group, sanitizeSymbol(args.symbol), token) 450 case uprobeType: 451 // The uprobe_events syntax is as follows: 452 // p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe 453 // r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe 454 // -:[GRP/]EVENT : Clear a probe 455 // 456 // Some examples: 457 // r:ebpf_1234/readline /bin/bash:0x12345 458 // p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123) 459 // 460 // See Documentation/trace/uprobetracer.txt for more details. 461 if args.retprobeMaxActive != 0 { 462 return 0, errInvalidMaxActive 463 } 464 token = uprobeToken(args) 465 pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret, 0), args.group, args.symbol, token) 466 } 467 _, err = f.WriteString(pe) 468 469 // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL 470 // when trying to create a retprobe for a missing symbol. 471 if errors.Is(err, os.ErrNotExist) { 472 return 0, fmt.Errorf("token %s: not found: %w", token, err) 473 } 474 // Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved 475 // to an invalid insn boundary. The exact conditions that trigger this error are 476 // arch specific however. 477 if errors.Is(err, syscall.EILSEQ) { 478 return 0, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) 479 } 480 // ERANGE is returned when the `SYM[+offs]` token is too big and cannot 481 // be resolved. 482 if errors.Is(err, syscall.ERANGE) { 483 return 0, fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist) 484 } 485 486 if err != nil { 487 return 0, fmt.Errorf("token %s: writing '%s': %w", token, pe, err) 488 } 489 490 // Get the newly-created trace event's id. 491 tid, err := getTraceEventID(args.group, args.symbol) 492 if args.retprobeMaxActive != 0 && errors.Is(err, os.ErrNotExist) { 493 // Kernels < 4.12 don't support maxactive and therefore auto generate 494 // group and event names from the symbol and offset. The symbol is used 495 // without any sanitization. 496 // See https://elixir.bootlin.com/linux/v4.10/source/kernel/trace/trace_kprobe.c#L712 497 event := fmt.Sprintf("kprobes/r_%s_%d", args.symbol, args.offset) 498 if err := removeTraceFSProbeEvent(typ, event); err != nil { 499 return 0, fmt.Errorf("failed to remove spurious maxactive event: %s", err) 500 } 501 return 0, fmt.Errorf("create trace event with non-default maxactive: %w", ErrNotSupported) 502 } 503 if err != nil { 504 return 0, fmt.Errorf("get trace event id: %w", err) 505 } 506 507 return tid, nil 508 } 509 510 // closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol 511 // from <tracefs>/[k,u]probe_events. 512 func closeTraceFSProbeEvent(typ probeType, group, symbol string) error { 513 pe := fmt.Sprintf("%s/%s", group, sanitizeSymbol(symbol)) 514 return removeTraceFSProbeEvent(typ, pe) 515 } 516 517 func removeTraceFSProbeEvent(typ probeType, pe string) error { 518 f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) 519 if err != nil { 520 return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err) 521 } 522 defer f.Close() 523 524 // See [k,u]probe_events syntax above. The probe type does not need to be specified 525 // for removals. 526 if _, err = f.WriteString("-:" + pe); err != nil { 527 return fmt.Errorf("remove event %q from %s: %w", pe, typ.EventsPath(), err) 528 } 529 530 return nil 531 } 532 533 // randomGroup generates a pseudorandom string for use as a tracefs group name. 534 // Returns an error when the output string would exceed 63 characters (kernel 535 // limitation), when rand.Read() fails or when prefix contains characters not 536 // allowed by isValidTraceID. 537 func randomGroup(prefix string) (string, error) { 538 if !isValidTraceID(prefix) { 539 return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput) 540 } 541 542 b := make([]byte, 8) 543 if _, err := rand.Read(b); err != nil { 544 return "", fmt.Errorf("reading random bytes: %w", err) 545 } 546 547 group := fmt.Sprintf("%s_%x", prefix, b) 548 if len(group) > 63 { 549 return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, errInvalidInput) 550 } 551 552 return group, nil 553 } 554 555 func probePrefix(ret bool, maxActive int) string { 556 if ret { 557 if maxActive > 0 { 558 return fmt.Sprintf("r%d", maxActive) 559 } 560 return "r" 561 } 562 return "p" 563 } 564 565 // kprobeToken creates the SYM[+offs] token for the tracefs api. 566 func kprobeToken(args probeArgs) string { 567 po := args.symbol 568 569 if args.offset != 0 { 570 po += fmt.Sprintf("+%#x", args.offset) 571 } 572 573 return po 574 }