github.com/cilium/ebpf@v0.16.0/link/kprobe.go (about) 1 package link 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "runtime" 8 "strings" 9 "unsafe" 10 11 "github.com/cilium/ebpf" 12 "github.com/cilium/ebpf/internal" 13 "github.com/cilium/ebpf/internal/sys" 14 "github.com/cilium/ebpf/internal/tracefs" 15 "github.com/cilium/ebpf/internal/unix" 16 ) 17 18 // KprobeOptions defines additional parameters that will be used 19 // when loading Kprobes. 20 type KprobeOptions struct { 21 // Arbitrary value that can be fetched from an eBPF program 22 // via `bpf_get_attach_cookie()`. 23 // 24 // Needs kernel 5.15+. 25 Cookie uint64 26 // Offset of the kprobe relative to the traced symbol. 27 // Can be used to insert kprobes at arbitrary offsets in kernel functions, 28 // e.g. in places where functions have been inlined. 29 Offset uint64 30 // Increase the maximum number of concurrent invocations of a kretprobe. 31 // Required when tracing some long running functions in the kernel. 32 // 33 // Deprecated: this setting forces the use of an outdated kernel API and is not portable 34 // across kernel versions. 35 RetprobeMaxActive int 36 // Prefix used for the event name if the kprobe must be attached using tracefs. 37 // The group name will be formatted as `<prefix>_<randomstr>`. 38 // The default empty string is equivalent to "ebpf" as the prefix. 39 TraceFSPrefix string 40 } 41 42 func (ko *KprobeOptions) cookie() uint64 { 43 if ko == nil { 44 return 0 45 } 46 return ko.Cookie 47 } 48 49 // Kprobe attaches the given eBPF program to a perf event that fires when the 50 // given kernel symbol starts executing. See /proc/kallsyms for available 51 // symbols. For example, printk(): 52 // 53 // kp, err := Kprobe("printk", prog, nil) 54 // 55 // Losing the reference to the resulting Link (kp) will close the Kprobe 56 // and prevent further execution of prog. The Link must be Closed during 57 // program shutdown to avoid leaking system resources. 58 // 59 // If attaching to symbol fails, automatically retries with the running 60 // platform's syscall prefix (e.g. __x64_) to support attaching to syscalls 61 // in a portable fashion. 62 // 63 // The returned Link may implement [PerfEvent]. 64 func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { 65 k, err := kprobe(symbol, prog, opts, false) 66 if err != nil { 67 return nil, err 68 } 69 70 lnk, err := attachPerfEvent(k, prog, opts.cookie()) 71 if err != nil { 72 k.Close() 73 return nil, err 74 } 75 76 return lnk, nil 77 } 78 79 // Kretprobe attaches the given eBPF program to a perf event that fires right 80 // before the given kernel symbol exits, with the function stack left intact. 81 // See /proc/kallsyms for available symbols. For example, printk(): 82 // 83 // kp, err := Kretprobe("printk", prog, nil) 84 // 85 // Losing the reference to the resulting Link (kp) will close the Kretprobe 86 // and prevent further execution of prog. The Link must be Closed during 87 // program shutdown to avoid leaking system resources. 88 // 89 // If attaching to symbol fails, automatically retries with the running 90 // platform's syscall prefix (e.g. __x64_) to support attaching to syscalls 91 // in a portable fashion. 92 // 93 // On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol 94 // incorrectly returns unix.EINVAL instead of os.ErrNotExist. 95 // 96 // The returned Link may implement [PerfEvent]. 97 func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { 98 k, err := kprobe(symbol, prog, opts, true) 99 if err != nil { 100 return nil, err 101 } 102 103 lnk, err := attachPerfEvent(k, prog, opts.cookie()) 104 if err != nil { 105 k.Close() 106 return nil, err 107 } 108 109 return lnk, nil 110 } 111 112 // isValidKprobeSymbol implements the equivalent of a regex match 113 // against "^[a-zA-Z_][0-9a-zA-Z_.]*$". 114 func isValidKprobeSymbol(s string) bool { 115 if len(s) < 1 { 116 return false 117 } 118 119 for i, c := range []byte(s) { 120 switch { 121 case c >= 'a' && c <= 'z': 122 case c >= 'A' && c <= 'Z': 123 case c == '_': 124 case i > 0 && c >= '0' && c <= '9': 125 126 // Allow `.` in symbol name. GCC-compiled kernel may change symbol name 127 // to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`. 128 // See: https://gcc.gnu.org/gcc-10/changes.html 129 case i > 0 && c == '.': 130 131 default: 132 return false 133 } 134 } 135 136 return true 137 } 138 139 // kprobe opens a perf event on the given symbol and attaches prog to it. 140 // If ret is true, create a kretprobe. 141 func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) { 142 if symbol == "" { 143 return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput) 144 } 145 if prog == nil { 146 return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) 147 } 148 if !isValidKprobeSymbol(symbol) { 149 return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput) 150 } 151 if prog.Type() != ebpf.Kprobe { 152 return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput) 153 } 154 155 args := tracefs.ProbeArgs{ 156 Type: tracefs.Kprobe, 157 Pid: perfAllThreads, 158 Symbol: symbol, 159 Ret: ret, 160 } 161 162 if opts != nil { 163 args.RetprobeMaxActive = opts.RetprobeMaxActive 164 args.Cookie = opts.Cookie 165 args.Offset = opts.Offset 166 args.Group = opts.TraceFSPrefix 167 } 168 169 // Use kprobe PMU if the kernel has it available. 170 tp, err := pmuProbe(args) 171 if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { 172 if prefix := internal.PlatformPrefix(); prefix != "" { 173 args.Symbol = prefix + symbol 174 tp, err = pmuProbe(args) 175 } 176 } 177 if err == nil { 178 return tp, nil 179 } 180 if err != nil && !errors.Is(err, ErrNotSupported) { 181 return nil, fmt.Errorf("creating perf_kprobe PMU (arch-specific fallback for %q): %w", symbol, err) 182 } 183 184 // Use tracefs if kprobe PMU is missing. 185 args.Symbol = symbol 186 tp, err = tracefsProbe(args) 187 if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { 188 if prefix := internal.PlatformPrefix(); prefix != "" { 189 args.Symbol = prefix + symbol 190 tp, err = tracefsProbe(args) 191 } 192 } 193 if err != nil { 194 return nil, fmt.Errorf("creating tracefs event (arch-specific fallback for %q): %w", symbol, err) 195 } 196 197 return tp, nil 198 } 199 200 // pmuProbe opens a perf event based on a Performance Monitoring Unit. 201 // 202 // Requires at least a 4.17 kernel. 203 // e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU" 204 // 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU" 205 // 206 // Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU 207 func pmuProbe(args tracefs.ProbeArgs) (*perfEvent, error) { 208 // Getting the PMU type will fail if the kernel doesn't support 209 // the perf_[k,u]probe PMU. 210 eventType, err := internal.ReadUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", args.Type.String(), "type") 211 if errors.Is(err, os.ErrNotExist) { 212 return nil, fmt.Errorf("%s: %w", args.Type, ErrNotSupported) 213 } 214 if err != nil { 215 return nil, err 216 } 217 218 // Use tracefs if we want to set kretprobe's retprobeMaxActive. 219 if args.RetprobeMaxActive != 0 { 220 return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported) 221 } 222 223 var config uint64 224 if args.Ret { 225 bit, err := internal.ReadUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", args.Type.String(), "/format/retprobe") 226 if err != nil { 227 return nil, err 228 } 229 config |= 1 << bit 230 } 231 232 var ( 233 attr unix.PerfEventAttr 234 sp unsafe.Pointer 235 token string 236 ) 237 switch args.Type { 238 case tracefs.Kprobe: 239 // Create a pointer to a NUL-terminated string for the kernel. 240 sp, err = unsafeStringPtr(args.Symbol) 241 if err != nil { 242 return nil, err 243 } 244 245 token = tracefs.KprobeToken(args) 246 247 attr = unix.PerfEventAttr{ 248 // The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1, 249 // since it added the config2 (Ext2) field. Use Ext2 as probe_offset. 250 Size: unix.PERF_ATTR_SIZE_VER1, 251 Type: uint32(eventType), // PMU event type read from sysfs 252 Ext1: uint64(uintptr(sp)), // Kernel symbol to trace 253 Ext2: args.Offset, // Kernel symbol offset 254 Config: config, // Retprobe flag 255 } 256 case tracefs.Uprobe: 257 sp, err = unsafeStringPtr(args.Path) 258 if err != nil { 259 return nil, err 260 } 261 262 if args.RefCtrOffset != 0 { 263 config |= args.RefCtrOffset << uprobeRefCtrOffsetShift 264 } 265 266 token = tracefs.UprobeToken(args) 267 268 attr = unix.PerfEventAttr{ 269 // The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1, 270 // since it added the config2 (Ext2) field. The Size field controls the 271 // size of the internal buffer the kernel allocates for reading the 272 // perf_event_attr argument from userspace. 273 Size: unix.PERF_ATTR_SIZE_VER1, 274 Type: uint32(eventType), // PMU event type read from sysfs 275 Ext1: uint64(uintptr(sp)), // Uprobe path 276 Ext2: args.Offset, // Uprobe offset 277 Config: config, // RefCtrOffset, Retprobe flag 278 } 279 } 280 281 cpu := 0 282 if args.Pid != perfAllThreads { 283 cpu = -1 284 } 285 rawFd, err := unix.PerfEventOpen(&attr, args.Pid, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC) 286 287 // On some old kernels, kprobe PMU doesn't allow `.` in symbol names and 288 // return -EINVAL. Return ErrNotSupported to allow falling back to tracefs. 289 // https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343 290 if errors.Is(err, unix.EINVAL) && strings.Contains(args.Symbol, ".") { 291 return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported) 292 } 293 // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL 294 // when trying to create a retprobe for a missing symbol. 295 if errors.Is(err, os.ErrNotExist) { 296 return nil, fmt.Errorf("token %s: not found: %w", token, err) 297 } 298 // Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved 299 // to an invalid insn boundary. The exact conditions that trigger this error are 300 // arch specific however. 301 if errors.Is(err, unix.EILSEQ) { 302 return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) 303 } 304 // Since at least commit cb9a19fe4aa51, ENOTSUPP is returned 305 // when attempting to set a uprobe on a trap instruction. 306 if errors.Is(err, sys.ENOTSUPP) { 307 return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.Offset, err) 308 } 309 310 if err != nil { 311 return nil, fmt.Errorf("token %s: opening perf event: %w", token, err) 312 } 313 314 // Ensure the string pointer is not collected before PerfEventOpen returns. 315 runtime.KeepAlive(sp) 316 317 fd, err := sys.NewFD(rawFd) 318 if err != nil { 319 return nil, err 320 } 321 322 // Kernel has perf_[k,u]probe PMU available, initialize perf event. 323 return newPerfEvent(fd, nil), nil 324 } 325 326 // tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events. 327 // A new trace event group name is generated on every call to support creating 328 // multiple trace events for the same kernel or userspace symbol. 329 // Path and offset are only set in the case of uprobe(s) and are used to set 330 // the executable/library path on the filesystem and the offset where the probe is inserted. 331 // A perf event is then opened on the newly-created trace event and returned to the caller. 332 func tracefsProbe(args tracefs.ProbeArgs) (*perfEvent, error) { 333 groupPrefix := "ebpf" 334 if args.Group != "" { 335 groupPrefix = args.Group 336 } 337 338 // Generate a random string for each trace event we attempt to create. 339 // This value is used as the 'group' token in tracefs to allow creating 340 // multiple kprobe trace events with the same name. 341 group, err := tracefs.RandomGroup(groupPrefix) 342 if err != nil { 343 return nil, fmt.Errorf("randomizing group name: %w", err) 344 } 345 args.Group = group 346 347 // Create the [k,u]probe trace event using tracefs. 348 evt, err := tracefs.NewEvent(args) 349 if err != nil { 350 return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) 351 } 352 353 // Kprobes are ephemeral tracepoints and share the same perf event type. 354 fd, err := openTracepointPerfEvent(evt.ID(), args.Pid) 355 if err != nil { 356 // Make sure we clean up the created tracefs event when we return error. 357 // If a livepatch handler is already active on the symbol, the write to 358 // tracefs will succeed, a trace event will show up, but creating the 359 // perf event will fail with EBUSY. 360 _ = evt.Close() 361 return nil, err 362 } 363 364 return newPerfEvent(fd, evt), nil 365 }