github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/trace/open/tracer/bpf/opensnoop.bpf.c (about)

     1  // SPDX-License-Identifier: GPL-2.0
     2  // Copyright (c) 2019 Facebook
     3  // Copyright (c) 2020 Netflix
     4  #include <vmlinux.h>
     5  #include <bpf/bpf_helpers.h>
     6  #include <bpf/bpf_core_read.h>
     7  #include <gadget/mntns_filter.h>
     8  #include <gadget/filesystem.h>
     9  #include "opensnoop.h"
    10  
    11  #define NR_MAX_PREFIX_FILTER 255
    12  #define CHAR_BIT 8
    13  
    14  const volatile pid_t targ_pid = 0;
    15  const volatile pid_t targ_tgid = 0;
    16  const volatile uid_t targ_uid = INVALID_UID;
    17  const volatile bool targ_failed = false;
    18  const volatile bool get_full_path = false;
    19  const volatile __u32 prefixes_nr = 0;
    20  
    21  // we need this to make sure the compiler doesn't remove our struct
    22  const struct event *unusedevent __attribute__((unused));
    23  
    24  struct {
    25  	__uint(type, BPF_MAP_TYPE_HASH);
    26  	__uint(max_entries, 10240);
    27  	__type(key, u32);
    28  	__type(value, struct start_t);
    29  } start SEC(".maps");
    30  
    31  struct {
    32  	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
    33  	__uint(key_size, sizeof(u32));
    34  	__uint(value_size, sizeof(u32));
    35  } events SEC(".maps");
    36  
    37  struct {
    38  	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
    39  	__type(key, struct prefix_key);
    40  	__type(value, __u8);
    41  	__uint(map_flags, BPF_F_NO_PREALLOC);
    42  	__uint(max_entries, NR_MAX_PREFIX_FILTER);
    43  } prefixes SEC(".maps");
    44  
    45  struct {
    46  	__uint(type, BPF_MAP_TYPE_HASH);
    47  	__uint(max_entries, 1024);
    48  	__type(key, u32);
    49  	__type(value, struct prefix_key);
    50  } prefix_keys SEC(".maps");
    51  
    52  struct {
    53  	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
    54  	__uint(max_entries, 1);
    55  	__type(key, u32);
    56  	__type(value, struct event);
    57  } empty_event SEC(".maps");
    58  
    59  static const struct prefix_key empty_prefix_key = {};
    60  
    61  static __always_inline bool valid_uid(uid_t uid)
    62  {
    63  	return uid != INVALID_UID;
    64  }
    65  
    66  static __always_inline bool trace_allowed(u32 tgid, u32 pid,
    67  					  const char *filename)
    68  {
    69  	u64 mntns_id;
    70  	u32 uid;
    71  
    72  	/* filters */
    73  	if (targ_tgid && targ_tgid != tgid)
    74  		return false;
    75  	if (targ_pid && targ_pid != pid)
    76  		return false;
    77  	if (valid_uid(targ_uid)) {
    78  		uid = (u32)bpf_get_current_uid_gid();
    79  		if (targ_uid != uid) {
    80  			return false;
    81  		}
    82  	}
    83  
    84  	if (prefixes_nr) {
    85  		struct prefix_key *key;
    86  		bool found;
    87  
    88  		found = false;
    89  
    90  		/*
    91  		 * Allocate prefix_key from map rather than stack to avoid
    92  		 * hitting the verifier limit.
    93  		 */
    94  		if (bpf_map_update_elem(&prefix_keys, &pid, &empty_prefix_key,
    95  					BPF_NOEXIST))
    96  			goto clean;
    97  
    98  		key = bpf_map_lookup_elem(&prefix_keys, &pid);
    99  		if (!key)
   100  			goto clean;
   101  
   102  		/*
   103  		 * It is fine to give the whole buffer size as prefixlen here.
   104  		 * Indeed, the in-kernel lookup stops when there is a difference
   105  		 * between the node (i.e. tested prefix) and the key (i.e.
   106  		 * filename).
   107  		 * There will always be a difference if the filename is longer
   108  		 * than the prefix, but what matters is the matched length.
   109  		 * If it equals the prefix length, then the filename matches the
   110  		 * prefix.
   111  		 */
   112  		key->prefixlen = sizeof(key->filename) * CHAR_BIT;
   113  		__builtin_memcpy(key->filename, filename,
   114  				 sizeof(key->filename));
   115  
   116  		found = bpf_map_lookup_elem(&prefixes, key) != NULL;
   117  clean:
   118  		bpf_map_delete_elem(&prefix_keys, &pid);
   119  		if (!found)
   120  			return false;
   121  	}
   122  
   123  	mntns_id = gadget_get_mntns_id();
   124  
   125  	if (gadget_should_discard_mntns_id(mntns_id))
   126  		return false;
   127  
   128  	return true;
   129  }
   130  
   131  static __always_inline int trace_enter(const char *filename, int flags,
   132  				       __u16 mode)
   133  {
   134  	u64 id = bpf_get_current_pid_tgid();
   135  	/* use kernel terminology here for tgid/pid: */
   136  	u32 tgid = id >> 32;
   137  	u32 pid = id;
   138  
   139  	struct start_t s = {};
   140  
   141  	bpf_probe_read_user_str(s.fname, sizeof(s.fname), filename);
   142  
   143  	/* store arg info for later lookup */
   144  	if (!trace_allowed(tgid, pid, (const char *)s.fname))
   145  		return 0;
   146  
   147  	s.flags = flags;
   148  	s.mode = mode;
   149  
   150  	// TODO: not related to this commit. Should't it be id? instead of pid?
   151  	bpf_map_update_elem(&start, &pid, &s, BPF_ANY);
   152  
   153  	return 0;
   154  }
   155  
   156  SEC("tracepoint/syscalls/sys_enter_open")
   157  int ig_open_e(struct syscall_trace_enter *ctx)
   158  {
   159  	return trace_enter((const char *)ctx->args[0], (int)ctx->args[1],
   160  			   (__u16)ctx->args[2]);
   161  }
   162  
   163  SEC("tracepoint/syscalls/sys_enter_openat")
   164  int ig_openat_e(struct syscall_trace_enter *ctx)
   165  {
   166  	return trace_enter((const char *)ctx->args[1], (int)ctx->args[2],
   167  			   (__u16)ctx->args[3]);
   168  }
   169  
   170  static __always_inline int trace_exit(struct syscall_trace_exit *ctx)
   171  {
   172  	struct event *event;
   173  	long int ret;
   174  	__u32 fd;
   175  	__s32 errval;
   176  	u32 pid = bpf_get_current_pid_tgid();
   177  	u64 uid_gid = bpf_get_current_uid_gid();
   178  	u64 mntns_id;
   179  	size_t full_fname_len = 0;
   180  	struct start_t *s;
   181  
   182  	s = bpf_map_lookup_elem(&start, &pid);
   183  	if (!s)
   184  		return 0; /* missed entry */
   185  
   186  	u32 zero = 0;
   187  	event = bpf_map_lookup_elem(&empty_event, &zero);
   188  	if (!event)
   189  		return 0; // should never happen
   190  
   191  	event->flags = s->flags;
   192  	event->mode = s->mode;
   193  	__builtin_memcpy(event->fname, s->fname, sizeof(s->fname));
   194  
   195  	ret = ctx->ret;
   196  	if (targ_failed && ret >= 0)
   197  		goto cleanup; /* want failed only */
   198  
   199  	fd = 0;
   200  	errval = 0;
   201  	if (ret >= 0) {
   202  		fd = ret;
   203  	} else {
   204  		errval = -ret;
   205  	}
   206  
   207  	/* event data */
   208  	event->pid = bpf_get_current_pid_tgid() >> 32;
   209  	event->uid = (u32)uid_gid;
   210  	event->gid = (u32)(uid_gid >> 32);
   211  	bpf_get_current_comm(&event->comm, sizeof(event->comm));
   212  	event->err = errval;
   213  	event->fd = fd;
   214  	event->mntns_id = gadget_get_mntns_id();
   215  	event->timestamp = bpf_ktime_get_boot_ns();
   216  
   217  	// Attempting to extract the full file path with symlink resolution
   218  	if (ret >= 0 && get_full_path) {
   219  		long r = read_full_path_of_open_file_fd(
   220  			ret, (char *)event->full_fname,
   221  			sizeof(event->full_fname));
   222  		if (r > 0) {
   223  			full_fname_len = (size_t)r;
   224  		} else {
   225  			// If we cannot get the full path put the empty string
   226  			event->full_fname[0] = '\0';
   227  			full_fname_len = 1;
   228  		}
   229  	} else {
   230  		// If the open failed, we can't get the full path
   231  		event->full_fname[0] = '\0';
   232  		full_fname_len = 1;
   233  	}
   234  
   235  	/* emit event */
   236  	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, event,
   237  			      sizeof(struct event) -
   238  				      (PATH_MAX - full_fname_len));
   239  cleanup:
   240  	bpf_map_delete_elem(&start, &pid);
   241  	return 0;
   242  }
   243  
   244  SEC("tracepoint/syscalls/sys_exit_open")
   245  int ig_open_x(struct syscall_trace_exit *ctx)
   246  {
   247  	return trace_exit(ctx);
   248  }
   249  
   250  SEC("tracepoint/syscalls/sys_exit_openat")
   251  int ig_openat_x(struct syscall_trace_exit *ctx)
   252  {
   253  	return trace_exit(ctx);
   254  }
   255  
   256  char LICENSE[] SEC("license") = "GPL";