github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/trace/exec/tracer/bpf/execsnoop.bpf.c

github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/trace/exec/tracer/bpf/execsnoop.bpf.c (about)

     1  // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
     2  #include <vmlinux.h>
     3  #include <bpf/bpf_helpers.h>
     4  #include <bpf/bpf_core_read.h>
     5  #ifdef __TARGET_ARCH_arm64
     6  #include <bpf/bpf_tracing.h>
     7  #endif /* __TARGET_ARCH_arm64 */
     8  
     9  #include <gadget/mntns_filter.h>
    10  #ifdef WITH_LONG_PATHS
    11  #include <gadget/filesystem.h>
    12  #endif
    13  #include "execsnoop.h"
    14  
    15  // Defined in include/uapi/linux/magic.h
    16  #define OVERLAYFS_SUPER_MAGIC 0x794c7630
    17  
    18  const volatile bool ignore_failed = true;
    19  const volatile uid_t targ_uid = INVALID_UID;
    20  const volatile int max_args = DEFAULT_MAXARGS;
    21  
    22  static const struct event empty_event = {};
    23  
    24  struct {
    25  	__uint(type, BPF_MAP_TYPE_HASH);
    26  #ifdef WITH_LONG_PATHS
    27  	__uint(max_entries, 1024);
    28  #else /* !WITH_LONG_PATHS */
    29  	__uint(max_entries, 10240);
    30  #endif /* !WITH_LONG_PATHS */
    31  	__type(key, pid_t);
    32  	__type(value, struct event);
    33  } execs SEC(".maps");
    34  
    35  struct {
    36  	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
    37  	__uint(key_size, sizeof(u32));
    38  	__uint(value_size, sizeof(u32));
    39  } events SEC(".maps");
    40  
    41  // man clone(2):
    42  //   If any of the threads in a thread group performs an
    43  //   execve(2), then all threads other than the thread group
    44  //   leader are terminated, and the new program is executed in
    45  //   the thread group leader.
    46  //
    47  // sys_enter_execve might be called from a thread and the corresponding
    48  // sys_exit_execve will be called from the thread group leader in case of
    49  // execve success, or from the same thread in case of execve failure. So we
    50  // need to lookup the pid from the tgid in sys_exit_execve.
    51  //
    52  // We don't know in advance which execve(2) will succeed, so we need to keep
    53  // track of all tgid<->pid mappings in a BPF map.
    54  //
    55  // We don't want to use bpf_for_each_map_elem() because it requires Linux 5.13.
    56  //
    57  // If several execve(2) are performed in parallel from different threads, only
    58  // one can succeed. The kernel will run the tracepoint syscalls/sys_exit_execve
    59  // for the failing execve(2) first and then for the successful one last.
    60  //
    61  // So we can insert a tgid->pid mapping in the same hash entry by modulo adding
    62  // the pid in value and removing it by subtracting. By the time we need to
    63  // lookup the pid by the tgid, there will be only one pid left in the hash entry.
    64  struct {
    65  	__uint(type, BPF_MAP_TYPE_HASH);
    66  	__type(key, pid_t); // tgid
    67  	__type(value, u64); // sum of pids
    68  	__uint(max_entries, 1024);
    69  } pid_by_tgid SEC(".maps");
    70  
    71  static __always_inline bool valid_uid(uid_t uid)
    72  {
    73  	return uid != INVALID_UID;
    74  }
    75  
    76  SEC("tracepoint/syscalls/sys_enter_execve")
    77  int ig_execve_e(struct syscall_trace_enter *ctx)
    78  {
    79  	u64 id;
    80  	char *cwd;
    81  	pid_t pid, tgid;
    82  	u64 zero64 = 0;
    83  	u64 *pid_sum;
    84  	struct event *event;
    85  	struct fs_struct *fs;
    86  	struct task_struct *task;
    87  	unsigned int ret;
    88  	const char **args = (const char **)(ctx->args[1]);
    89  	const char *argp;
    90  	int i;
    91  	u64 mntns_id;
    92  	u64 uid_gid = bpf_get_current_uid_gid();
    93  	u32 uid = (u32)uid_gid;
    94  	u32 gid = (u32)(uid_gid >> 32);
    95  
    96  	if (valid_uid(targ_uid) && targ_uid != uid)
    97  		return 0;
    98  
    99  	task = (struct task_struct *)bpf_get_current_task();
   100  	mntns_id = (u64)BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum);
   101  
   102  	if (gadget_should_discard_mntns_id(mntns_id))
   103  		return 0;
   104  
   105  	id = bpf_get_current_pid_tgid();
   106  	pid = (pid_t)id;
   107  	tgid = id >> 32;
   108  	if (bpf_map_update_elem(&execs, &pid, &empty_event, BPF_NOEXIST))
   109  		return 0;
   110  
   111  	event = bpf_map_lookup_elem(&execs, &pid);
   112  	if (!event)
   113  		return 0;
   114  
   115  	bpf_map_update_elem(&pid_by_tgid, &tgid, &zero64, BPF_NOEXIST);
   116  
   117  	pid_sum = bpf_map_lookup_elem(&pid_by_tgid, &tgid);
   118  	if (!pid_sum)
   119  		return 0;
   120  
   121  	__atomic_add_fetch(pid_sum, (u64)pid, __ATOMIC_RELAXED);
   122  
   123  	event->timestamp = bpf_ktime_get_boot_ns();
   124  	event->pid = tgid;
   125  	event->uid = uid;
   126  	event->gid = gid;
   127  	// loginuid is only available when CONFIG_AUDIT is set
   128  	if (bpf_core_field_exists(task->loginuid))
   129  		event->loginuid = BPF_CORE_READ(task, loginuid.val);
   130  	else
   131  		event->loginuid = 4294967295; // -1 or "no user id"
   132  	// sessionid is only available when CONFIG_AUDIT is set
   133  	if (bpf_core_field_exists(task->sessionid))
   134  		event->sessionid = BPF_CORE_READ(task, sessionid);
   135  
   136  	event->ppid = (pid_t)BPF_CORE_READ(task, real_parent, tgid);
   137  	event->args_count = 0;
   138  	event->args_size = 0;
   139  	event->mntns_id = mntns_id;
   140  
   141  #ifdef WITH_LONG_PATHS
   142  	fs = BPF_CORE_READ(task, fs);
   143  	cwd = get_path_str(&fs->pwd);
   144  	bpf_probe_read_kernel_str(event->cwd, MAX_STRING_SIZE, cwd);
   145  #endif
   146  
   147  	ret = bpf_probe_read_user_str(event->args, ARGSIZE,
   148  				      (const char *)ctx->args[0]);
   149  	if (ret <= ARGSIZE) {
   150  		event->args_size += ret;
   151  	} else {
   152  		/* write an empty string */
   153  		event->args[0] = '\0';
   154  		event->args_size++;
   155  	}
   156  
   157  	event->args_count++;
   158  #pragma unroll
   159  	for (i = 1; i < TOTAL_MAX_ARGS && i < max_args; i++) {
   160  		bpf_probe_read_user(&argp, sizeof(argp), &args[i]);
   161  		if (!argp)
   162  			return 0;
   163  
   164  		if (event->args_size > LAST_ARG)
   165  			return 0;
   166  
   167  		ret = bpf_probe_read_user_str(&event->args[event->args_size],
   168  					      ARGSIZE, argp);
   169  		if (ret > ARGSIZE)
   170  			return 0;
   171  
   172  		event->args_count++;
   173  		event->args_size += ret;
   174  	}
   175  	/* try to read one more argument to check if there is one */
   176  	bpf_probe_read_user(&argp, sizeof(argp), &args[max_args]);
   177  	if (!argp)
   178  		return 0;
   179  
   180  	/* pointer to max_args+1 isn't null, asume we have more arguments */
   181  	event->args_count++;
   182  	return 0;
   183  }
   184  
   185  static __always_inline bool has_upper_layer()
   186  {
   187  	struct task_struct *task = (struct task_struct *)bpf_get_current_task();
   188  	struct inode *inode = BPF_CORE_READ(task, mm, exe_file, f_inode);
   189  	if (!inode) {
   190  		return false;
   191  	}
   192  	unsigned long sb_magic = BPF_CORE_READ(inode, i_sb, s_magic);
   193  
   194  	if (sb_magic != OVERLAYFS_SUPER_MAGIC) {
   195  		return false;
   196  	}
   197  
   198  	struct dentry *upperdentry;
   199  
   200  	// struct ovl_inode defined in fs/overlayfs/ovl_entry.h
   201  	// Unfortunately, not exported to vmlinux.h
   202  	// and not available in /sys/kernel/btf/vmlinux
   203  	// See https://github.com/cilium/ebpf/pull/1300
   204  	// We only rely on vfs_inode and __upperdentry relative positions
   205  	bpf_probe_read_kernel(&upperdentry, sizeof(upperdentry),
   206  			      ((void *)inode) +
   207  				      bpf_core_type_size(struct inode));
   208  	return upperdentry != NULL;
   209  }
   210  
   211  SEC("tracepoint/syscalls/sys_exit_execve")
   212  int ig_execve_x(struct syscall_trace_exit *ctx)
   213  {
   214  	u64 id;
   215  	pid_t pid, tgid;
   216  	pid_t execs_lookup_key;
   217  	u64 *pid_sum;
   218  	int ret;
   219  	struct event *event;
   220  	u32 uid = (u32)bpf_get_current_uid_gid();
   221  	struct task_struct *task = (struct task_struct *)bpf_get_current_task();
   222  	struct task_struct *parent = BPF_CORE_READ(task, real_parent);
   223  	struct file *exe_file;
   224  	char *exepath;
   225  
   226  	if (valid_uid(targ_uid) && targ_uid != uid)
   227  		return 0;
   228  	id = bpf_get_current_pid_tgid();
   229  	pid = (pid_t)id;
   230  	tgid = id >> 32;
   231  	ret = ctx->ret;
   232  
   233  	pid_sum = bpf_map_lookup_elem(&pid_by_tgid, &tgid);
   234  	if (!pid_sum)
   235  		return 0;
   236  
   237  	// sys_enter_execve and sys_exit_execve might be called from different
   238  	// threads. We need to lookup the pid from the tgid.
   239  	execs_lookup_key = (ret == 0) ? (pid_t)*pid_sum : pid;
   240  	event = bpf_map_lookup_elem(&execs, &execs_lookup_key);
   241  
   242  	// Remove the tgid->pid mapping if the value reaches 0
   243  	// or the execve() call was successful
   244  	__atomic_add_fetch(pid_sum, (u64)-pid, __ATOMIC_RELAXED);
   245  	if (*pid_sum == 0 || ret == 0)
   246  		bpf_map_delete_elem(&pid_by_tgid, &tgid);
   247  
   248  	if (!event)
   249  		return 0;
   250  	if (ignore_failed && ret < 0)
   251  		goto cleanup;
   252  
   253  	if (ret == 0) {
   254  		event->upper_layer = has_upper_layer();
   255  	}
   256  
   257  	event->retval = ret;
   258  	bpf_get_current_comm(&event->comm, sizeof(event->comm));
   259  
   260  	if (parent != NULL)
   261  		bpf_probe_read_kernel(&event->pcomm, sizeof(event->pcomm),
   262  				      parent->comm);
   263  
   264  #ifdef WITH_LONG_PATHS
   265  	exe_file = BPF_CORE_READ(task, mm, exe_file);
   266  	exepath = get_path_str(&exe_file->f_path);
   267  	bpf_probe_read_kernel_str(event->exepath, MAX_STRING_SIZE, exepath);
   268  #endif
   269  
   270  	size_t len = EVENT_SIZE(event);
   271  	if (len <= sizeof(*event))
   272  		bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, event,
   273  				      len);
   274  cleanup:
   275  	bpf_map_delete_elem(&execs, &execs_lookup_key);
   276  	return 0;
   277  }
   278  
   279  char LICENSE[] SEC("license") = "GPL";