github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/trace/exec/tracer/bpf/execsnoop.bpf.c (about) 1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 #include <vmlinux.h> 3 #include <bpf/bpf_helpers.h> 4 #include <bpf/bpf_core_read.h> 5 #ifdef __TARGET_ARCH_arm64 6 #include <bpf/bpf_tracing.h> 7 #endif /* __TARGET_ARCH_arm64 */ 8 9 #include <gadget/mntns_filter.h> 10 #ifdef WITH_LONG_PATHS 11 #include <gadget/filesystem.h> 12 #endif 13 #include "execsnoop.h" 14 15 // Defined in include/uapi/linux/magic.h 16 #define OVERLAYFS_SUPER_MAGIC 0x794c7630 17 18 const volatile bool ignore_failed = true; 19 const volatile uid_t targ_uid = INVALID_UID; 20 const volatile int max_args = DEFAULT_MAXARGS; 21 22 static const struct event empty_event = {}; 23 24 struct { 25 __uint(type, BPF_MAP_TYPE_HASH); 26 #ifdef WITH_LONG_PATHS 27 __uint(max_entries, 1024); 28 #else /* !WITH_LONG_PATHS */ 29 __uint(max_entries, 10240); 30 #endif /* !WITH_LONG_PATHS */ 31 __type(key, pid_t); 32 __type(value, struct event); 33 } execs SEC(".maps"); 34 35 struct { 36 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 37 __uint(key_size, sizeof(u32)); 38 __uint(value_size, sizeof(u32)); 39 } events SEC(".maps"); 40 41 // man clone(2): 42 // If any of the threads in a thread group performs an 43 // execve(2), then all threads other than the thread group 44 // leader are terminated, and the new program is executed in 45 // the thread group leader. 46 // 47 // sys_enter_execve might be called from a thread and the corresponding 48 // sys_exit_execve will be called from the thread group leader in case of 49 // execve success, or from the same thread in case of execve failure. So we 50 // need to lookup the pid from the tgid in sys_exit_execve. 51 // 52 // We don't know in advance which execve(2) will succeed, so we need to keep 53 // track of all tgid<->pid mappings in a BPF map. 54 // 55 // We don't want to use bpf_for_each_map_elem() because it requires Linux 5.13. 56 // 57 // If several execve(2) are performed in parallel from different threads, only 58 // one can succeed. The kernel will run the tracepoint syscalls/sys_exit_execve 59 // for the failing execve(2) first and then for the successful one last. 60 // 61 // So we can insert a tgid->pid mapping in the same hash entry by modulo adding 62 // the pid in value and removing it by subtracting. By the time we need to 63 // lookup the pid by the tgid, there will be only one pid left in the hash entry. 64 struct { 65 __uint(type, BPF_MAP_TYPE_HASH); 66 __type(key, pid_t); // tgid 67 __type(value, u64); // sum of pids 68 __uint(max_entries, 1024); 69 } pid_by_tgid SEC(".maps"); 70 71 static __always_inline bool valid_uid(uid_t uid) 72 { 73 return uid != INVALID_UID; 74 } 75 76 SEC("tracepoint/syscalls/sys_enter_execve") 77 int ig_execve_e(struct syscall_trace_enter *ctx) 78 { 79 u64 id; 80 char *cwd; 81 pid_t pid, tgid; 82 u64 zero64 = 0; 83 u64 *pid_sum; 84 struct event *event; 85 struct fs_struct *fs; 86 struct task_struct *task; 87 unsigned int ret; 88 const char **args = (const char **)(ctx->args[1]); 89 const char *argp; 90 int i; 91 u64 mntns_id; 92 u64 uid_gid = bpf_get_current_uid_gid(); 93 u32 uid = (u32)uid_gid; 94 u32 gid = (u32)(uid_gid >> 32); 95 96 if (valid_uid(targ_uid) && targ_uid != uid) 97 return 0; 98 99 task = (struct task_struct *)bpf_get_current_task(); 100 mntns_id = (u64)BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum); 101 102 if (gadget_should_discard_mntns_id(mntns_id)) 103 return 0; 104 105 id = bpf_get_current_pid_tgid(); 106 pid = (pid_t)id; 107 tgid = id >> 32; 108 if (bpf_map_update_elem(&execs, &pid, &empty_event, BPF_NOEXIST)) 109 return 0; 110 111 event = bpf_map_lookup_elem(&execs, &pid); 112 if (!event) 113 return 0; 114 115 bpf_map_update_elem(&pid_by_tgid, &tgid, &zero64, BPF_NOEXIST); 116 117 pid_sum = bpf_map_lookup_elem(&pid_by_tgid, &tgid); 118 if (!pid_sum) 119 return 0; 120 121 __atomic_add_fetch(pid_sum, (u64)pid, __ATOMIC_RELAXED); 122 123 event->timestamp = bpf_ktime_get_boot_ns(); 124 event->pid = tgid; 125 event->uid = uid; 126 event->gid = gid; 127 // loginuid is only available when CONFIG_AUDIT is set 128 if (bpf_core_field_exists(task->loginuid)) 129 event->loginuid = BPF_CORE_READ(task, loginuid.val); 130 else 131 event->loginuid = 4294967295; // -1 or "no user id" 132 // sessionid is only available when CONFIG_AUDIT is set 133 if (bpf_core_field_exists(task->sessionid)) 134 event->sessionid = BPF_CORE_READ(task, sessionid); 135 136 event->ppid = (pid_t)BPF_CORE_READ(task, real_parent, tgid); 137 event->args_count = 0; 138 event->args_size = 0; 139 event->mntns_id = mntns_id; 140 141 #ifdef WITH_LONG_PATHS 142 fs = BPF_CORE_READ(task, fs); 143 cwd = get_path_str(&fs->pwd); 144 bpf_probe_read_kernel_str(event->cwd, MAX_STRING_SIZE, cwd); 145 #endif 146 147 ret = bpf_probe_read_user_str(event->args, ARGSIZE, 148 (const char *)ctx->args[0]); 149 if (ret <= ARGSIZE) { 150 event->args_size += ret; 151 } else { 152 /* write an empty string */ 153 event->args[0] = '\0'; 154 event->args_size++; 155 } 156 157 event->args_count++; 158 #pragma unroll 159 for (i = 1; i < TOTAL_MAX_ARGS && i < max_args; i++) { 160 bpf_probe_read_user(&argp, sizeof(argp), &args[i]); 161 if (!argp) 162 return 0; 163 164 if (event->args_size > LAST_ARG) 165 return 0; 166 167 ret = bpf_probe_read_user_str(&event->args[event->args_size], 168 ARGSIZE, argp); 169 if (ret > ARGSIZE) 170 return 0; 171 172 event->args_count++; 173 event->args_size += ret; 174 } 175 /* try to read one more argument to check if there is one */ 176 bpf_probe_read_user(&argp, sizeof(argp), &args[max_args]); 177 if (!argp) 178 return 0; 179 180 /* pointer to max_args+1 isn't null, asume we have more arguments */ 181 event->args_count++; 182 return 0; 183 } 184 185 static __always_inline bool has_upper_layer() 186 { 187 struct task_struct *task = (struct task_struct *)bpf_get_current_task(); 188 struct inode *inode = BPF_CORE_READ(task, mm, exe_file, f_inode); 189 if (!inode) { 190 return false; 191 } 192 unsigned long sb_magic = BPF_CORE_READ(inode, i_sb, s_magic); 193 194 if (sb_magic != OVERLAYFS_SUPER_MAGIC) { 195 return false; 196 } 197 198 struct dentry *upperdentry; 199 200 // struct ovl_inode defined in fs/overlayfs/ovl_entry.h 201 // Unfortunately, not exported to vmlinux.h 202 // and not available in /sys/kernel/btf/vmlinux 203 // See https://github.com/cilium/ebpf/pull/1300 204 // We only rely on vfs_inode and __upperdentry relative positions 205 bpf_probe_read_kernel(&upperdentry, sizeof(upperdentry), 206 ((void *)inode) + 207 bpf_core_type_size(struct inode)); 208 return upperdentry != NULL; 209 } 210 211 SEC("tracepoint/syscalls/sys_exit_execve") 212 int ig_execve_x(struct syscall_trace_exit *ctx) 213 { 214 u64 id; 215 pid_t pid, tgid; 216 pid_t execs_lookup_key; 217 u64 *pid_sum; 218 int ret; 219 struct event *event; 220 u32 uid = (u32)bpf_get_current_uid_gid(); 221 struct task_struct *task = (struct task_struct *)bpf_get_current_task(); 222 struct task_struct *parent = BPF_CORE_READ(task, real_parent); 223 struct file *exe_file; 224 char *exepath; 225 226 if (valid_uid(targ_uid) && targ_uid != uid) 227 return 0; 228 id = bpf_get_current_pid_tgid(); 229 pid = (pid_t)id; 230 tgid = id >> 32; 231 ret = ctx->ret; 232 233 pid_sum = bpf_map_lookup_elem(&pid_by_tgid, &tgid); 234 if (!pid_sum) 235 return 0; 236 237 // sys_enter_execve and sys_exit_execve might be called from different 238 // threads. We need to lookup the pid from the tgid. 239 execs_lookup_key = (ret == 0) ? (pid_t)*pid_sum : pid; 240 event = bpf_map_lookup_elem(&execs, &execs_lookup_key); 241 242 // Remove the tgid->pid mapping if the value reaches 0 243 // or the execve() call was successful 244 __atomic_add_fetch(pid_sum, (u64)-pid, __ATOMIC_RELAXED); 245 if (*pid_sum == 0 || ret == 0) 246 bpf_map_delete_elem(&pid_by_tgid, &tgid); 247 248 if (!event) 249 return 0; 250 if (ignore_failed && ret < 0) 251 goto cleanup; 252 253 if (ret == 0) { 254 event->upper_layer = has_upper_layer(); 255 } 256 257 event->retval = ret; 258 bpf_get_current_comm(&event->comm, sizeof(event->comm)); 259 260 if (parent != NULL) 261 bpf_probe_read_kernel(&event->pcomm, sizeof(event->pcomm), 262 parent->comm); 263 264 #ifdef WITH_LONG_PATHS 265 exe_file = BPF_CORE_READ(task, mm, exe_file); 266 exepath = get_path_str(&exe_file->f_path); 267 bpf_probe_read_kernel_str(event->exepath, MAX_STRING_SIZE, exepath); 268 #endif 269 270 size_t len = EVENT_SIZE(event); 271 if (len <= sizeof(*event)) 272 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, event, 273 len); 274 cleanup: 275 bpf_map_delete_elem(&execs, &execs_lookup_key); 276 return 0; 277 } 278 279 char LICENSE[] SEC("license") = "GPL";