github.com/cilium/ebpf@v0.15.0/link/kprobe.go (about) 1 package link 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "runtime" 8 "strings" 9 "unsafe" 10 11 "github.com/cilium/ebpf" 12 "github.com/cilium/ebpf/internal" 13 "github.com/cilium/ebpf/internal/sys" 14 "github.com/cilium/ebpf/internal/tracefs" 15 "github.com/cilium/ebpf/internal/unix" 16 ) 17 18 // KprobeOptions defines additional parameters that will be used 19 // when loading Kprobes. 20 type KprobeOptions struct { 21 // Arbitrary value that can be fetched from an eBPF program 22 // via `bpf_get_attach_cookie()`. 23 // 24 // Needs kernel 5.15+. 25 Cookie uint64 26 // Offset of the kprobe relative to the traced symbol. 27 // Can be used to insert kprobes at arbitrary offsets in kernel functions, 28 // e.g. in places where functions have been inlined. 29 Offset uint64 30 // Increase the maximum number of concurrent invocations of a kretprobe. 31 // Required when tracing some long running functions in the kernel. 32 // 33 // Deprecated: this setting forces the use of an outdated kernel API and is not portable 34 // across kernel versions. 35 RetprobeMaxActive int 36 // Prefix used for the event name if the kprobe must be attached using tracefs. 37 // The group name will be formatted as `<prefix>_<randomstr>`. 38 // The default empty string is equivalent to "ebpf" as the prefix. 39 TraceFSPrefix string 40 } 41 42 func (ko *KprobeOptions) cookie() uint64 { 43 if ko == nil { 44 return 0 45 } 46 return ko.Cookie 47 } 48 49 // Kprobe attaches the given eBPF program to a perf event that fires when the 50 // given kernel symbol starts executing. See /proc/kallsyms for available 51 // symbols. For example, printk(): 52 // 53 // kp, err := Kprobe("printk", prog, nil) 54 // 55 // Losing the reference to the resulting Link (kp) will close the Kprobe 56 // and prevent further execution of prog. The Link must be Closed during 57 // program shutdown to avoid leaking system resources. 58 // 59 // If attaching to symbol fails, automatically retries with the running 60 // platform's syscall prefix (e.g. __x64_) to support attaching to syscalls 61 // in a portable fashion. 62 func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { 63 k, err := kprobe(symbol, prog, opts, false) 64 if err != nil { 65 return nil, err 66 } 67 68 lnk, err := attachPerfEvent(k, prog, opts.cookie()) 69 if err != nil { 70 k.Close() 71 return nil, err 72 } 73 74 return lnk, nil 75 } 76 77 // Kretprobe attaches the given eBPF program to a perf event that fires right 78 // before the given kernel symbol exits, with the function stack left intact. 79 // See /proc/kallsyms for available symbols. For example, printk(): 80 // 81 // kp, err := Kretprobe("printk", prog, nil) 82 // 83 // Losing the reference to the resulting Link (kp) will close the Kretprobe 84 // and prevent further execution of prog. The Link must be Closed during 85 // program shutdown to avoid leaking system resources. 86 // 87 // If attaching to symbol fails, automatically retries with the running 88 // platform's syscall prefix (e.g. __x64_) to support attaching to syscalls 89 // in a portable fashion. 90 // 91 // On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol 92 // incorrectly returns unix.EINVAL instead of os.ErrNotExist. 93 func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { 94 k, err := kprobe(symbol, prog, opts, true) 95 if err != nil { 96 return nil, err 97 } 98 99 lnk, err := attachPerfEvent(k, prog, opts.cookie()) 100 if err != nil { 101 k.Close() 102 return nil, err 103 } 104 105 return lnk, nil 106 } 107 108 // isValidKprobeSymbol implements the equivalent of a regex match 109 // against "^[a-zA-Z_][0-9a-zA-Z_.]*$". 110 func isValidKprobeSymbol(s string) bool { 111 if len(s) < 1 { 112 return false 113 } 114 115 for i, c := range []byte(s) { 116 switch { 117 case c >= 'a' && c <= 'z': 118 case c >= 'A' && c <= 'Z': 119 case c == '_': 120 case i > 0 && c >= '0' && c <= '9': 121 122 // Allow `.` in symbol name. GCC-compiled kernel may change symbol name 123 // to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`. 124 // See: https://gcc.gnu.org/gcc-10/changes.html 125 case i > 0 && c == '.': 126 127 default: 128 return false 129 } 130 } 131 132 return true 133 } 134 135 // kprobe opens a perf event on the given symbol and attaches prog to it. 136 // If ret is true, create a kretprobe. 137 func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) { 138 if symbol == "" { 139 return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput) 140 } 141 if prog == nil { 142 return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) 143 } 144 if !isValidKprobeSymbol(symbol) { 145 return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput) 146 } 147 if prog.Type() != ebpf.Kprobe { 148 return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput) 149 } 150 151 args := tracefs.ProbeArgs{ 152 Type: tracefs.Kprobe, 153 Pid: perfAllThreads, 154 Symbol: symbol, 155 Ret: ret, 156 } 157 158 if opts != nil { 159 args.RetprobeMaxActive = opts.RetprobeMaxActive 160 args.Cookie = opts.Cookie 161 args.Offset = opts.Offset 162 args.Group = opts.TraceFSPrefix 163 } 164 165 // Use kprobe PMU if the kernel has it available. 166 tp, err := pmuProbe(args) 167 if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { 168 if prefix := internal.PlatformPrefix(); prefix != "" { 169 args.Symbol = prefix + symbol 170 tp, err = pmuProbe(args) 171 } 172 } 173 if err == nil { 174 return tp, nil 175 } 176 if err != nil && !errors.Is(err, ErrNotSupported) { 177 return nil, fmt.Errorf("creating perf_kprobe PMU (arch-specific fallback for %q): %w", symbol, err) 178 } 179 180 // Use tracefs if kprobe PMU is missing. 181 args.Symbol = symbol 182 tp, err = tracefsProbe(args) 183 if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { 184 if prefix := internal.PlatformPrefix(); prefix != "" { 185 args.Symbol = prefix + symbol 186 tp, err = tracefsProbe(args) 187 } 188 } 189 if err != nil { 190 return nil, fmt.Errorf("creating tracefs event (arch-specific fallback for %q): %w", symbol, err) 191 } 192 193 return tp, nil 194 } 195 196 // pmuProbe opens a perf event based on a Performance Monitoring Unit. 197 // 198 // Requires at least a 4.17 kernel. 199 // e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU" 200 // 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU" 201 // 202 // Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU 203 func pmuProbe(args tracefs.ProbeArgs) (*perfEvent, error) { 204 // Getting the PMU type will fail if the kernel doesn't support 205 // the perf_[k,u]probe PMU. 206 eventType, err := internal.ReadUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", args.Type.String(), "type") 207 if errors.Is(err, os.ErrNotExist) { 208 return nil, fmt.Errorf("%s: %w", args.Type, ErrNotSupported) 209 } 210 if err != nil { 211 return nil, err 212 } 213 214 // Use tracefs if we want to set kretprobe's retprobeMaxActive. 215 if args.RetprobeMaxActive != 0 { 216 return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported) 217 } 218 219 var config uint64 220 if args.Ret { 221 bit, err := internal.ReadUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", args.Type.String(), "/format/retprobe") 222 if err != nil { 223 return nil, err 224 } 225 config |= 1 << bit 226 } 227 228 var ( 229 attr unix.PerfEventAttr 230 sp unsafe.Pointer 231 token string 232 ) 233 switch args.Type { 234 case tracefs.Kprobe: 235 // Create a pointer to a NUL-terminated string for the kernel. 236 sp, err = unsafeStringPtr(args.Symbol) 237 if err != nil { 238 return nil, err 239 } 240 241 token = tracefs.KprobeToken(args) 242 243 attr = unix.PerfEventAttr{ 244 // The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1, 245 // since it added the config2 (Ext2) field. Use Ext2 as probe_offset. 246 Size: unix.PERF_ATTR_SIZE_VER1, 247 Type: uint32(eventType), // PMU event type read from sysfs 248 Ext1: uint64(uintptr(sp)), // Kernel symbol to trace 249 Ext2: args.Offset, // Kernel symbol offset 250 Config: config, // Retprobe flag 251 } 252 case tracefs.Uprobe: 253 sp, err = unsafeStringPtr(args.Path) 254 if err != nil { 255 return nil, err 256 } 257 258 if args.RefCtrOffset != 0 { 259 config |= args.RefCtrOffset << uprobeRefCtrOffsetShift 260 } 261 262 token = tracefs.UprobeToken(args) 263 264 attr = unix.PerfEventAttr{ 265 // The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1, 266 // since it added the config2 (Ext2) field. The Size field controls the 267 // size of the internal buffer the kernel allocates for reading the 268 // perf_event_attr argument from userspace. 269 Size: unix.PERF_ATTR_SIZE_VER1, 270 Type: uint32(eventType), // PMU event type read from sysfs 271 Ext1: uint64(uintptr(sp)), // Uprobe path 272 Ext2: args.Offset, // Uprobe offset 273 Config: config, // RefCtrOffset, Retprobe flag 274 } 275 } 276 277 rawFd, err := unix.PerfEventOpen(&attr, args.Pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) 278 279 // On some old kernels, kprobe PMU doesn't allow `.` in symbol names and 280 // return -EINVAL. Return ErrNotSupported to allow falling back to tracefs. 281 // https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343 282 if errors.Is(err, unix.EINVAL) && strings.Contains(args.Symbol, ".") { 283 return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported) 284 } 285 // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL 286 // when trying to create a retprobe for a missing symbol. 287 if errors.Is(err, os.ErrNotExist) { 288 return nil, fmt.Errorf("token %s: not found: %w", token, err) 289 } 290 // Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved 291 // to an invalid insn boundary. The exact conditions that trigger this error are 292 // arch specific however. 293 if errors.Is(err, unix.EILSEQ) { 294 return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) 295 } 296 // Since at least commit cb9a19fe4aa51, ENOTSUPP is returned 297 // when attempting to set a uprobe on a trap instruction. 298 if errors.Is(err, sys.ENOTSUPP) { 299 return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.Offset, err) 300 } 301 302 if err != nil { 303 return nil, fmt.Errorf("token %s: opening perf event: %w", token, err) 304 } 305 306 // Ensure the string pointer is not collected before PerfEventOpen returns. 307 runtime.KeepAlive(sp) 308 309 fd, err := sys.NewFD(rawFd) 310 if err != nil { 311 return nil, err 312 } 313 314 // Kernel has perf_[k,u]probe PMU available, initialize perf event. 315 return newPerfEvent(fd, nil), nil 316 } 317 318 // tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events. 319 // A new trace event group name is generated on every call to support creating 320 // multiple trace events for the same kernel or userspace symbol. 321 // Path and offset are only set in the case of uprobe(s) and are used to set 322 // the executable/library path on the filesystem and the offset where the probe is inserted. 323 // A perf event is then opened on the newly-created trace event and returned to the caller. 324 func tracefsProbe(args tracefs.ProbeArgs) (*perfEvent, error) { 325 groupPrefix := "ebpf" 326 if args.Group != "" { 327 groupPrefix = args.Group 328 } 329 330 // Generate a random string for each trace event we attempt to create. 331 // This value is used as the 'group' token in tracefs to allow creating 332 // multiple kprobe trace events with the same name. 333 group, err := tracefs.RandomGroup(groupPrefix) 334 if err != nil { 335 return nil, fmt.Errorf("randomizing group name: %w", err) 336 } 337 args.Group = group 338 339 // Create the [k,u]probe trace event using tracefs. 340 evt, err := tracefs.NewEvent(args) 341 if err != nil { 342 return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) 343 } 344 345 // Kprobes are ephemeral tracepoints and share the same perf event type. 346 fd, err := openTracepointPerfEvent(evt.ID(), args.Pid) 347 if err != nil { 348 // Make sure we clean up the created tracefs event when we return error. 349 // If a livepatch handler is already active on the symbol, the write to 350 // tracefs will succeed, a trace event will show up, but creating the 351 // perf event will fail with EBUSY. 352 _ = evt.Close() 353 return nil, err 354 } 355 356 return newPerfEvent(fd, evt), nil 357 }