github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/trace/tcp/tracer/bpf/tcptracer.bpf.c (about)

     1  // SPDX-License-Identifier: GPL-2.0
     2  // Copyright (c) 2022 Microsoft Corporation
     3  //
     4  // Based on tcptracer(8) from BCC by Kinvolk GmbH and
     5  // tcpconnect(8) by Anton Protopopov
     6  #include <vmlinux.h>
     7  
     8  #include <bpf/bpf_helpers.h>
     9  #include <bpf/bpf_core_read.h>
    10  #include <bpf/bpf_tracing.h>
    11  #include <bpf/bpf_endian.h>
    12  #include "tcptracer.h"
    13  #include <gadget/mntns_filter.h>
    14  
    15  const volatile uid_t filter_uid = -1;
    16  const volatile pid_t filter_pid = 0;
    17  
    18  /* Define here, because there are conflicts with include files */
    19  #define AF_INET 2
    20  #define AF_INET6 10
    21  
    22  // we need this to make sure the compiler doesn't remove our struct
    23  const struct event *unusedevent __attribute__((unused));
    24  const enum event_type unused_eventtype __attribute__((unused));
    25  
    26  /*
    27   * tcp_set_state doesn't run in the context of the process that initiated the
    28   * connection so we need to store a map TUPLE -> PID to send the right PID on
    29   * the event.
    30   */
    31  struct tuple_key_t {
    32  	union {
    33  		__u32 saddr_v4;
    34  		unsigned __int128 saddr_v6;
    35  	};
    36  	union {
    37  		__u32 daddr_v4;
    38  		unsigned __int128 daddr_v6;
    39  	};
    40  	u16 sport;
    41  	u16 dport;
    42  	u32 netns;
    43  };
    44  
    45  struct pid_comm_t {
    46  	u64 pid;
    47  	char comm[TASK_COMM_LEN];
    48  	u64 mntns_id;
    49  	u64 uid_gid;
    50  };
    51  
    52  struct {
    53  	__uint(type, BPF_MAP_TYPE_HASH);
    54  	__uint(max_entries, MAX_ENTRIES);
    55  	__type(key, struct tuple_key_t);
    56  	__type(value, struct pid_comm_t);
    57  } tuplepid SEC(".maps");
    58  
    59  struct {
    60  	__uint(type, BPF_MAP_TYPE_HASH);
    61  	__uint(max_entries, MAX_ENTRIES);
    62  	__type(key, u32);
    63  	__type(value, struct sock *);
    64  } sockets SEC(".maps");
    65  
    66  struct {
    67  	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
    68  	__uint(key_size, sizeof(u32));
    69  	__uint(value_size, sizeof(u32));
    70  } events SEC(".maps");
    71  
    72  static __always_inline bool fill_tuple(struct tuple_key_t *tuple,
    73  				       struct sock *sk, int family)
    74  {
    75  	struct inet_sock *sockp = (struct inet_sock *)sk;
    76  
    77  	BPF_CORE_READ_INTO(&tuple->netns, sk, __sk_common.skc_net.net, ns.inum);
    78  
    79  	switch (family) {
    80  	case AF_INET:
    81  		BPF_CORE_READ_INTO(&tuple->saddr_v4, sk,
    82  				   __sk_common.skc_rcv_saddr);
    83  		if (tuple->saddr_v4 == 0)
    84  			return false;
    85  
    86  		BPF_CORE_READ_INTO(&tuple->daddr_v4, sk, __sk_common.skc_daddr);
    87  		if (tuple->daddr_v4 == 0)
    88  			return false;
    89  
    90  		break;
    91  	case AF_INET6:
    92  		BPF_CORE_READ_INTO(
    93  			&tuple->saddr_v6, sk,
    94  			__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
    95  		if (tuple->saddr_v6 == 0)
    96  			return false;
    97  		BPF_CORE_READ_INTO(&tuple->daddr_v6, sk,
    98  				   __sk_common.skc_v6_daddr.in6_u.u6_addr32);
    99  		if (tuple->daddr_v6 == 0)
   100  			return false;
   101  
   102  		break;
   103  	/* it should not happen but to be sure let's handle this case */
   104  	default:
   105  		return false;
   106  	}
   107  
   108  	BPF_CORE_READ_INTO(&tuple->dport, sk, __sk_common.skc_dport);
   109  	if (tuple->dport == 0)
   110  		return false;
   111  
   112  	BPF_CORE_READ_INTO(&tuple->sport, sockp, inet_sport);
   113  	if (tuple->sport == 0)
   114  		return false;
   115  
   116  	return true;
   117  }
   118  
   119  static __always_inline void fill_event(struct tuple_key_t *tuple,
   120  				       struct event *event, __u32 pid,
   121  				       __u64 uid_gid, __u16 family, __u8 type,
   122  				       __u64 mntns_id)
   123  {
   124  	event->ts_us = bpf_ktime_get_ns() / 1000;
   125  	event->type = type;
   126  	event->pid = pid;
   127  	event->uid = (__u32)uid_gid;
   128  	event->gid = (__u32)(uid_gid >> 32);
   129  	;
   130  	event->af = family;
   131  	event->netns = tuple->netns;
   132  	event->mntns_id = mntns_id;
   133  	if (family == AF_INET) {
   134  		event->saddr_v4 = tuple->saddr_v4;
   135  		event->daddr_v4 = tuple->daddr_v4;
   136  	} else {
   137  		event->saddr_v6 = tuple->saddr_v6;
   138  		event->daddr_v6 = tuple->daddr_v6;
   139  	}
   140  	event->sport = tuple->sport;
   141  	event->dport = tuple->dport;
   142  }
   143  
   144  /* returns true if the event should be skipped */
   145  static __always_inline bool filter_event(struct sock *sk, __u32 uid, __u32 pid,
   146  					 __u64 mntns_id)
   147  {
   148  	u16 family;
   149  
   150  	family = BPF_CORE_READ(sk, __sk_common.skc_family);
   151  	if (family != AF_INET && family != AF_INET6)
   152  		return true;
   153  
   154  	if (gadget_should_discard_mntns_id(mntns_id))
   155  		return true;
   156  
   157  	if (filter_pid && pid != filter_pid)
   158  		return true;
   159  
   160  	if (filter_uid != (uid_t)-1 && uid != filter_uid)
   161  		return true;
   162  
   163  	return false;
   164  }
   165  
   166  static __always_inline int enter_tcp_connect(struct pt_regs *ctx,
   167  					     struct sock *sk)
   168  {
   169  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   170  	__u32 pid = pid_tgid >> 32;
   171  	__u32 tid = pid_tgid;
   172  	__u64 uid_gid = bpf_get_current_uid_gid();
   173  	__u32 uid = uid_gid;
   174  	__u64 mntns_id;
   175  
   176  	mntns_id = gadget_get_mntns_id();
   177  
   178  	if (filter_event(sk, uid, pid, mntns_id))
   179  		return 0;
   180  
   181  	bpf_map_update_elem(&sockets, &tid, &sk, 0);
   182  	return 0;
   183  }
   184  
   185  static __always_inline int exit_tcp_connect(struct pt_regs *ctx, int ret,
   186  					    __u16 family)
   187  {
   188  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   189  	__u32 pid = pid_tgid >> 32;
   190  	__u32 tid = pid_tgid;
   191  	__u64 uid_gid = bpf_get_current_uid_gid();
   192  	struct tuple_key_t tuple = {};
   193  	struct pid_comm_t pid_comm = {};
   194  	struct sock **skpp;
   195  	struct sock *sk;
   196  	struct task_struct *task;
   197  
   198  	skpp = bpf_map_lookup_elem(&sockets, &tid);
   199  	if (!skpp)
   200  		return 0;
   201  
   202  	if (ret)
   203  		goto end;
   204  
   205  	sk = *skpp;
   206  
   207  	if (!fill_tuple(&tuple, sk, family))
   208  		goto end;
   209  
   210  	task = (struct task_struct *)bpf_get_current_task();
   211  
   212  	pid_comm.pid = pid;
   213  	pid_comm.uid_gid = uid_gid;
   214  	pid_comm.mntns_id = (u64)BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum);
   215  	bpf_get_current_comm(&pid_comm.comm, sizeof(pid_comm.comm));
   216  
   217  	bpf_map_update_elem(&tuplepid, &tuple, &pid_comm, 0);
   218  
   219  end:
   220  	bpf_map_delete_elem(&sockets, &tid);
   221  	return 0;
   222  }
   223  
   224  SEC("kprobe/tcp_v4_connect")
   225  int BPF_KPROBE(ig_tcp_v4_co_e, struct sock *sk)
   226  {
   227  	return enter_tcp_connect(ctx, sk);
   228  }
   229  
   230  SEC("kretprobe/tcp_v4_connect")
   231  int BPF_KRETPROBE(ig_tcp_v4_co_x, int ret)
   232  {
   233  	return exit_tcp_connect(ctx, ret, AF_INET);
   234  }
   235  
   236  SEC("kprobe/tcp_v6_connect")
   237  int BPF_KPROBE(ig_tcp_v6_co_e, struct sock *sk)
   238  {
   239  	return enter_tcp_connect(ctx, sk);
   240  }
   241  
   242  SEC("kretprobe/tcp_v6_connect")
   243  int BPF_KRETPROBE(ig_tcp_v6_co_x, int ret)
   244  {
   245  	return exit_tcp_connect(ctx, ret, AF_INET6);
   246  }
   247  
   248  SEC("kprobe/tcp_close")
   249  int BPF_KPROBE(ig_tcp_close, struct sock *sk)
   250  {
   251  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   252  	__u32 pid = pid_tgid >> 32;
   253  	__u64 uid_gid = bpf_get_current_uid_gid();
   254  	__u32 uid = uid_gid;
   255  	struct tuple_key_t tuple = {};
   256  	struct event event = {};
   257  	u16 family;
   258  	__u64 mntns_id;
   259  
   260  	mntns_id = gadget_get_mntns_id();
   261  
   262  	if (filter_event(sk, uid, pid, mntns_id))
   263  		return 0;
   264  
   265  	/*
   266  	 * Don't generate close events for connections that were never
   267  	 * established in the first place.
   268  	 */
   269  	u8 oldstate = BPF_CORE_READ(sk, __sk_common.skc_state);
   270  	if (oldstate == TCP_SYN_SENT || oldstate == TCP_SYN_RECV ||
   271  	    oldstate == TCP_NEW_SYN_RECV)
   272  		return 0;
   273  
   274  	family = BPF_CORE_READ(sk, __sk_common.skc_family);
   275  	if (!fill_tuple(&tuple, sk, family))
   276  		return 0;
   277  
   278  	fill_event(&tuple, &event, pid, uid_gid, family, TCP_EVENT_TYPE_CLOSE,
   279  		   mntns_id);
   280  	bpf_get_current_comm(&event.task, sizeof(event.task));
   281  	event.timestamp = bpf_ktime_get_boot_ns();
   282  
   283  	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,
   284  			      sizeof(event));
   285  
   286  	return 0;
   287  };
   288  
   289  SEC("kprobe/tcp_set_state")
   290  int BPF_KPROBE(ig_tcp_state, struct sock *sk, int state)
   291  {
   292  	struct tuple_key_t tuple = {};
   293  	struct event event = {};
   294  	__u16 family;
   295  
   296  	if (state != TCP_ESTABLISHED && state != TCP_CLOSE)
   297  		goto end;
   298  
   299  	family = BPF_CORE_READ(sk, __sk_common.skc_family);
   300  
   301  	if (!fill_tuple(&tuple, sk, family))
   302  		goto end;
   303  
   304  	if (state == TCP_CLOSE)
   305  		goto end;
   306  
   307  	struct pid_comm_t *p;
   308  	p = bpf_map_lookup_elem(&tuplepid, &tuple);
   309  	if (!p)
   310  		return 0; /* missed entry */
   311  
   312  	fill_event(&tuple, &event, p->pid, p->uid_gid, family,
   313  		   TCP_EVENT_TYPE_CONNECT, p->mntns_id);
   314  	__builtin_memcpy(&event.task, p->comm, sizeof(event.task));
   315  	event.timestamp = bpf_ktime_get_boot_ns();
   316  
   317  	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,
   318  			      sizeof(event));
   319  
   320  end:
   321  	bpf_map_delete_elem(&tuplepid, &tuple);
   322  
   323  	return 0;
   324  }
   325  
   326  SEC("kretprobe/inet_csk_accept")
   327  int BPF_KRETPROBE(ig_tcp_accept, struct sock *sk)
   328  {
   329  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   330  	__u32 pid = pid_tgid >> 32;
   331  	__u64 uid_gid = bpf_get_current_uid_gid();
   332  	__u32 uid = uid_gid;
   333  	__u16 sport, family;
   334  	struct event event = {};
   335  	struct tuple_key_t t = {};
   336  	u64 mntns_id;
   337  
   338  	if (!sk)
   339  		return 0;
   340  
   341  	mntns_id = gadget_get_mntns_id();
   342  
   343  	if (filter_event(sk, uid, pid, mntns_id))
   344  		return 0;
   345  
   346  	family = BPF_CORE_READ(sk, __sk_common.skc_family);
   347  	sport = BPF_CORE_READ(sk, __sk_common.skc_num);
   348  
   349  	fill_tuple(&t, sk, family);
   350  	t.sport = bpf_ntohs(sport);
   351  	/* do not send event if IP address is 0.0.0.0 or port is 0 */
   352  	if (t.saddr_v6 == 0 || t.daddr_v6 == 0 || t.dport == 0 || t.sport == 0)
   353  		return 0;
   354  
   355  	fill_event(&t, &event, pid, uid_gid, family, TCP_EVENT_TYPE_ACCEPT,
   356  		   mntns_id);
   357  
   358  	bpf_get_current_comm(&event.task, sizeof(event.task));
   359  	event.timestamp = bpf_ktime_get_boot_ns();
   360  
   361  	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,
   362  			      sizeof(event));
   363  
   364  	return 0;
   365  }
   366  
   367  char LICENSE[] SEC("license") = "GPL";