github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/trace/open/tracer/bpf/opensnoop.bpf.c (about) 1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2019 Facebook 3 // Copyright (c) 2020 Netflix 4 #include <vmlinux.h> 5 #include <bpf/bpf_helpers.h> 6 #include <bpf/bpf_core_read.h> 7 #include <gadget/mntns_filter.h> 8 #include <gadget/filesystem.h> 9 #include "opensnoop.h" 10 11 #define NR_MAX_PREFIX_FILTER 255 12 #define CHAR_BIT 8 13 14 const volatile pid_t targ_pid = 0; 15 const volatile pid_t targ_tgid = 0; 16 const volatile uid_t targ_uid = INVALID_UID; 17 const volatile bool targ_failed = false; 18 const volatile bool get_full_path = false; 19 const volatile __u32 prefixes_nr = 0; 20 21 // we need this to make sure the compiler doesn't remove our struct 22 const struct event *unusedevent __attribute__((unused)); 23 24 struct { 25 __uint(type, BPF_MAP_TYPE_HASH); 26 __uint(max_entries, 10240); 27 __type(key, u32); 28 __type(value, struct start_t); 29 } start SEC(".maps"); 30 31 struct { 32 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 33 __uint(key_size, sizeof(u32)); 34 __uint(value_size, sizeof(u32)); 35 } events SEC(".maps"); 36 37 struct { 38 __uint(type, BPF_MAP_TYPE_LPM_TRIE); 39 __type(key, struct prefix_key); 40 __type(value, __u8); 41 __uint(map_flags, BPF_F_NO_PREALLOC); 42 __uint(max_entries, NR_MAX_PREFIX_FILTER); 43 } prefixes SEC(".maps"); 44 45 struct { 46 __uint(type, BPF_MAP_TYPE_HASH); 47 __uint(max_entries, 1024); 48 __type(key, u32); 49 __type(value, struct prefix_key); 50 } prefix_keys SEC(".maps"); 51 52 struct { 53 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 54 __uint(max_entries, 1); 55 __type(key, u32); 56 __type(value, struct event); 57 } empty_event SEC(".maps"); 58 59 static const struct prefix_key empty_prefix_key = {}; 60 61 static __always_inline bool valid_uid(uid_t uid) 62 { 63 return uid != INVALID_UID; 64 } 65 66 static __always_inline bool trace_allowed(u32 tgid, u32 pid, 67 const char *filename) 68 { 69 u64 mntns_id; 70 u32 uid; 71 72 /* filters */ 73 if (targ_tgid && targ_tgid != tgid) 74 return false; 75 if (targ_pid && targ_pid != pid) 76 return false; 77 if (valid_uid(targ_uid)) { 78 uid = (u32)bpf_get_current_uid_gid(); 79 if (targ_uid != uid) { 80 return false; 81 } 82 } 83 84 if (prefixes_nr) { 85 struct prefix_key *key; 86 bool found; 87 88 found = false; 89 90 /* 91 * Allocate prefix_key from map rather than stack to avoid 92 * hitting the verifier limit. 93 */ 94 if (bpf_map_update_elem(&prefix_keys, &pid, &empty_prefix_key, 95 BPF_NOEXIST)) 96 goto clean; 97 98 key = bpf_map_lookup_elem(&prefix_keys, &pid); 99 if (!key) 100 goto clean; 101 102 /* 103 * It is fine to give the whole buffer size as prefixlen here. 104 * Indeed, the in-kernel lookup stops when there is a difference 105 * between the node (i.e. tested prefix) and the key (i.e. 106 * filename). 107 * There will always be a difference if the filename is longer 108 * than the prefix, but what matters is the matched length. 109 * If it equals the prefix length, then the filename matches the 110 * prefix. 111 */ 112 key->prefixlen = sizeof(key->filename) * CHAR_BIT; 113 __builtin_memcpy(key->filename, filename, 114 sizeof(key->filename)); 115 116 found = bpf_map_lookup_elem(&prefixes, key) != NULL; 117 clean: 118 bpf_map_delete_elem(&prefix_keys, &pid); 119 if (!found) 120 return false; 121 } 122 123 mntns_id = gadget_get_mntns_id(); 124 125 if (gadget_should_discard_mntns_id(mntns_id)) 126 return false; 127 128 return true; 129 } 130 131 static __always_inline int trace_enter(const char *filename, int flags, 132 __u16 mode) 133 { 134 u64 id = bpf_get_current_pid_tgid(); 135 /* use kernel terminology here for tgid/pid: */ 136 u32 tgid = id >> 32; 137 u32 pid = id; 138 139 struct start_t s = {}; 140 141 bpf_probe_read_user_str(s.fname, sizeof(s.fname), filename); 142 143 /* store arg info for later lookup */ 144 if (!trace_allowed(tgid, pid, (const char *)s.fname)) 145 return 0; 146 147 s.flags = flags; 148 s.mode = mode; 149 150 // TODO: not related to this commit. Should't it be id? instead of pid? 151 bpf_map_update_elem(&start, &pid, &s, BPF_ANY); 152 153 return 0; 154 } 155 156 SEC("tracepoint/syscalls/sys_enter_open") 157 int ig_open_e(struct syscall_trace_enter *ctx) 158 { 159 return trace_enter((const char *)ctx->args[0], (int)ctx->args[1], 160 (__u16)ctx->args[2]); 161 } 162 163 SEC("tracepoint/syscalls/sys_enter_openat") 164 int ig_openat_e(struct syscall_trace_enter *ctx) 165 { 166 return trace_enter((const char *)ctx->args[1], (int)ctx->args[2], 167 (__u16)ctx->args[3]); 168 } 169 170 static __always_inline int trace_exit(struct syscall_trace_exit *ctx) 171 { 172 struct event *event; 173 long int ret; 174 __u32 fd; 175 __s32 errval; 176 u32 pid = bpf_get_current_pid_tgid(); 177 u64 uid_gid = bpf_get_current_uid_gid(); 178 u64 mntns_id; 179 size_t full_fname_len = 0; 180 struct start_t *s; 181 182 s = bpf_map_lookup_elem(&start, &pid); 183 if (!s) 184 return 0; /* missed entry */ 185 186 u32 zero = 0; 187 event = bpf_map_lookup_elem(&empty_event, &zero); 188 if (!event) 189 return 0; // should never happen 190 191 event->flags = s->flags; 192 event->mode = s->mode; 193 __builtin_memcpy(event->fname, s->fname, sizeof(s->fname)); 194 195 ret = ctx->ret; 196 if (targ_failed && ret >= 0) 197 goto cleanup; /* want failed only */ 198 199 fd = 0; 200 errval = 0; 201 if (ret >= 0) { 202 fd = ret; 203 } else { 204 errval = -ret; 205 } 206 207 /* event data */ 208 event->pid = bpf_get_current_pid_tgid() >> 32; 209 event->uid = (u32)uid_gid; 210 event->gid = (u32)(uid_gid >> 32); 211 bpf_get_current_comm(&event->comm, sizeof(event->comm)); 212 event->err = errval; 213 event->fd = fd; 214 event->mntns_id = gadget_get_mntns_id(); 215 event->timestamp = bpf_ktime_get_boot_ns(); 216 217 // Attempting to extract the full file path with symlink resolution 218 if (ret >= 0 && get_full_path) { 219 long r = read_full_path_of_open_file_fd( 220 ret, (char *)event->full_fname, 221 sizeof(event->full_fname)); 222 if (r > 0) { 223 full_fname_len = (size_t)r; 224 } else { 225 // If we cannot get the full path put the empty string 226 event->full_fname[0] = '\0'; 227 full_fname_len = 1; 228 } 229 } else { 230 // If the open failed, we can't get the full path 231 event->full_fname[0] = '\0'; 232 full_fname_len = 1; 233 } 234 235 /* emit event */ 236 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, event, 237 sizeof(struct event) - 238 (PATH_MAX - full_fname_len)); 239 cleanup: 240 bpf_map_delete_elem(&start, &pid); 241 return 0; 242 } 243 244 SEC("tracepoint/syscalls/sys_exit_open") 245 int ig_open_x(struct syscall_trace_exit *ctx) 246 { 247 return trace_exit(ctx); 248 } 249 250 SEC("tracepoint/syscalls/sys_exit_openat") 251 int ig_openat_x(struct syscall_trace_exit *ctx) 252 { 253 return trace_exit(ctx); 254 } 255 256 char LICENSE[] SEC("license") = "GPL";