github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/trace/tcpconnect/tracer/bpf/tcpconnect.bpf.c (about)

     1  // SPDX-License-Identifier: GPL-2.0
     2  // Copyright (c) 2020 Anton Protopopov
     3  //
     4  // Based on tcpconnect(8) from BCC by Brendan Gregg
     5  #include <vmlinux.h>
     6  
     7  #include <bpf/bpf_helpers.h>
     8  #include <bpf/bpf_core_read.h>
     9  #include <bpf/bpf_tracing.h>
    10  
    11  #include <gadget/maps.bpf.h>
    12  #include "tcpconnect.h"
    13  #include <gadget/mntns_filter.h>
    14  
    15  const volatile int filter_ports[MAX_PORTS];
    16  const volatile int filter_ports_len = 0;
    17  const volatile uid_t filter_uid = -1;
    18  const volatile pid_t filter_pid = 0;
    19  const volatile bool do_count = 0;
    20  const volatile bool calculate_latency = false;
    21  const volatile __u64 targ_min_latency_ns = 0;
    22  
    23  /* Define here, because there are conflicts with include files */
    24  #define AF_INET 2
    25  #define AF_INET6 10
    26  
    27  // we need this to make sure the compiler doesn't remove our struct
    28  const struct event *unusedevent __attribute__((unused));
    29  
    30  // sockets_per_process keeps track of the sockets between:
    31  // - kprobe enter_tcp_connect
    32  // - kretprobe exit_tcp_connect
    33  struct {
    34  	__uint(type, BPF_MAP_TYPE_HASH);
    35  	__uint(max_entries, MAX_ENTRIES);
    36  	__type(key, u32); // tid
    37  	__type(value, struct sock *);
    38  } sockets_per_process SEC(".maps");
    39  
    40  struct piddata {
    41  	char comm[TASK_COMM_LEN];
    42  	u64 ts;
    43  	u32 pid;
    44  	u32 tid;
    45  	u64 mntns_id;
    46  };
    47  
    48  // sockets_latency keeps track of sockets to calculate the latency between:
    49  // - enter_tcp_connect (where the socket is added in the map)
    50  // - handle_tcp_rcv_state_process (where the socket is removed from the map)
    51  struct {
    52  	__uint(type, BPF_MAP_TYPE_HASH);
    53  	__uint(max_entries, 4096);
    54  	__type(key, struct sock *);
    55  	__type(value, struct piddata);
    56  } sockets_latency SEC(".maps");
    57  
    58  struct {
    59  	__uint(type, BPF_MAP_TYPE_HASH);
    60  	__uint(max_entries, MAX_ENTRIES);
    61  	__type(key, struct ipv4_flow_key);
    62  	__type(value, u64);
    63  } ipv4_count SEC(".maps");
    64  
    65  struct {
    66  	__uint(type, BPF_MAP_TYPE_HASH);
    67  	__uint(max_entries, MAX_ENTRIES);
    68  	__type(key, struct ipv6_flow_key);
    69  	__type(value, u64);
    70  } ipv6_count SEC(".maps");
    71  
    72  struct {
    73  	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
    74  	__uint(key_size, sizeof(u32));
    75  	__uint(value_size, sizeof(u32));
    76  } events SEC(".maps");
    77  
    78  static __always_inline bool filter_port(__u16 port)
    79  {
    80  	int i;
    81  
    82  	if (filter_ports_len == 0)
    83  		return false;
    84  
    85  	// This loop was written a bit different than the upstream one
    86  	// to avoid a verifier error.
    87  	for (i = 0; i < MAX_PORTS; i++) {
    88  		if (i >= filter_ports_len)
    89  			break;
    90  		if (port == filter_ports[i])
    91  			return false;
    92  	}
    93  	return true;
    94  }
    95  
    96  static __always_inline int enter_tcp_connect(struct pt_regs *ctx,
    97  					     struct sock *sk)
    98  {
    99  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   100  	__u64 uid_gid = bpf_get_current_uid_gid();
   101  	__u32 pid = pid_tgid >> 32;
   102  	__u32 tid = pid_tgid;
   103  	__u64 mntns_id;
   104  	__u32 uid = (u32)uid_gid;
   105  	;
   106  	struct piddata piddata = {};
   107  
   108  	if (filter_pid && pid != filter_pid)
   109  		return 0;
   110  
   111  	if (filter_uid != (uid_t)-1 && uid != filter_uid)
   112  		return 0;
   113  
   114  	mntns_id = gadget_get_mntns_id();
   115  
   116  	if (gadget_should_discard_mntns_id(mntns_id))
   117  		return 0;
   118  
   119  	if (calculate_latency) {
   120  		bpf_get_current_comm(&piddata.comm, sizeof(piddata.comm));
   121  		piddata.ts = bpf_ktime_get_ns();
   122  		piddata.tid = tid;
   123  		piddata.pid = pid;
   124  		piddata.mntns_id = mntns_id;
   125  		bpf_map_update_elem(&sockets_latency, &sk, &piddata, 0);
   126  	} else {
   127  		bpf_map_update_elem(&sockets_per_process, &tid, &sk, 0);
   128  	}
   129  	return 0;
   130  }
   131  
   132  static __always_inline void count_v4(struct sock *sk, __u16 dport)
   133  {
   134  	struct ipv4_flow_key key = {};
   135  	static __u64 zero;
   136  	__u64 *val;
   137  
   138  	BPF_CORE_READ_INTO(&key.saddr, sk, __sk_common.skc_rcv_saddr);
   139  	BPF_CORE_READ_INTO(&key.daddr, sk, __sk_common.skc_daddr);
   140  	key.dport = dport;
   141  	val = bpf_map_lookup_or_try_init(&ipv4_count, &key, &zero);
   142  	if (val)
   143  		__atomic_add_fetch(val, 1, __ATOMIC_RELAXED);
   144  }
   145  
   146  static __always_inline void count_v6(struct sock *sk, __u16 dport)
   147  {
   148  	struct ipv6_flow_key key = {};
   149  	static const __u64 zero;
   150  	__u64 *val;
   151  
   152  	BPF_CORE_READ_INTO(&key.saddr, sk,
   153  			   __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
   154  	BPF_CORE_READ_INTO(&key.daddr, sk,
   155  			   __sk_common.skc_v6_daddr.in6_u.u6_addr32);
   156  	key.dport = dport;
   157  
   158  	val = bpf_map_lookup_or_try_init(&ipv6_count, &key, &zero);
   159  	if (val)
   160  		__atomic_add_fetch(val, 1, __ATOMIC_RELAXED);
   161  }
   162  
   163  static __always_inline void trace_v4(struct pt_regs *ctx, pid_t pid,
   164  				     struct sock *sk, __u16 dport,
   165  				     __u64 mntns_id)
   166  {
   167  	struct event event = {};
   168  
   169  	__u64 uid_gid = bpf_get_current_uid_gid();
   170  
   171  	event.af = AF_INET;
   172  	event.pid = pid;
   173  	event.uid = (u32)uid_gid;
   174  	event.gid = (u32)(uid_gid >> 32);
   175  	BPF_CORE_READ_INTO(&event.saddr_v4, sk, __sk_common.skc_rcv_saddr);
   176  	BPF_CORE_READ_INTO(&event.daddr_v4, sk, __sk_common.skc_daddr);
   177  	event.dport = dport;
   178  	event.sport = BPF_CORE_READ(sk, __sk_common.skc_num);
   179  	;
   180  	event.mntns_id = mntns_id;
   181  	bpf_get_current_comm(event.task, sizeof(event.task));
   182  	event.timestamp = bpf_ktime_get_boot_ns();
   183  
   184  	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,
   185  			      sizeof(event));
   186  }
   187  
   188  static __always_inline void trace_v6(struct pt_regs *ctx, pid_t pid,
   189  				     struct sock *sk, __u16 dport,
   190  				     __u64 mntns_id)
   191  {
   192  	struct event event = {};
   193  
   194  	__u64 uid_gid = bpf_get_current_uid_gid();
   195  
   196  	event.af = AF_INET6;
   197  	event.pid = pid;
   198  	event.uid = (u32)uid_gid;
   199  	event.gid = (u32)(uid_gid >> 32);
   200  	event.mntns_id = mntns_id;
   201  	BPF_CORE_READ_INTO(&event.saddr_v6, sk,
   202  			   __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
   203  	BPF_CORE_READ_INTO(&event.daddr_v6, sk,
   204  			   __sk_common.skc_v6_daddr.in6_u.u6_addr32);
   205  	event.dport = dport;
   206  	event.sport = BPF_CORE_READ(sk, __sk_common.skc_num);
   207  	;
   208  	bpf_get_current_comm(event.task, sizeof(event.task));
   209  	event.timestamp = bpf_ktime_get_boot_ns();
   210  
   211  	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,
   212  			      sizeof(event));
   213  }
   214  
   215  static __always_inline int exit_tcp_connect(struct pt_regs *ctx, int ret,
   216  					    int ip_ver)
   217  {
   218  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   219  	__u32 pid = pid_tgid >> 32;
   220  	__u32 tid = pid_tgid;
   221  	struct sock **skpp;
   222  	struct sock *sk;
   223  	u64 mntns_id;
   224  	__u16 dport;
   225  
   226  	skpp = bpf_map_lookup_elem(&sockets_per_process, &tid);
   227  	if (!skpp)
   228  		return 0;
   229  
   230  	if (ret)
   231  		goto end;
   232  
   233  	sk = *skpp;
   234  
   235  	BPF_CORE_READ_INTO(&dport, sk, __sk_common.skc_dport);
   236  	if (filter_port(dport))
   237  		goto end;
   238  
   239  	if (do_count) {
   240  		if (ip_ver == 4)
   241  			count_v4(sk, dport);
   242  		else
   243  			count_v6(sk, dport);
   244  	} else {
   245  		mntns_id = gadget_get_mntns_id();
   246  
   247  		if (ip_ver == 4)
   248  			trace_v4(ctx, pid, sk, dport, mntns_id);
   249  		else
   250  			trace_v6(ctx, pid, sk, dport, mntns_id);
   251  	}
   252  
   253  end:
   254  	bpf_map_delete_elem(&sockets_per_process, &tid);
   255  	return 0;
   256  }
   257  
   258  static __always_inline int cleanup_sockets_latency_map(const struct sock *sk)
   259  {
   260  	bpf_map_delete_elem(&sockets_latency, &sk);
   261  	return 0;
   262  }
   263  
   264  static __always_inline int handle_tcp_rcv_state_process(void *ctx,
   265  							struct sock *sk)
   266  {
   267  	struct piddata *piddatap;
   268  	struct event event = {};
   269  	u64 ts;
   270  
   271  	if (BPF_CORE_READ(sk, __sk_common.skc_state) != TCP_SYN_SENT)
   272  		return 0;
   273  
   274  	piddatap = bpf_map_lookup_elem(&sockets_latency, &sk);
   275  	if (!piddatap)
   276  		return 0;
   277  
   278  	ts = bpf_ktime_get_ns();
   279  	if (ts < piddatap->ts)
   280  		goto cleanup;
   281  
   282  	event.latency = ts - piddatap->ts;
   283  	if (targ_min_latency_ns && event.latency < targ_min_latency_ns)
   284  		goto cleanup;
   285  	__builtin_memcpy(&event.task, piddatap->comm, sizeof(event.task));
   286  	event.pid = piddatap->pid;
   287  	event.mntns_id = piddatap->mntns_id;
   288  	event.sport = BPF_CORE_READ(sk, __sk_common.skc_num);
   289  	event.dport = BPF_CORE_READ(sk, __sk_common.skc_dport);
   290  	event.af = BPF_CORE_READ(sk, __sk_common.skc_family);
   291  	if (event.af == AF_INET) {
   292  		event.saddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr);
   293  		event.daddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_daddr);
   294  	} else {
   295  		BPF_CORE_READ_INTO(
   296  			&event.saddr_v6, sk,
   297  			__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
   298  		BPF_CORE_READ_INTO(&event.daddr_v6, sk,
   299  				   __sk_common.skc_v6_daddr.in6_u.u6_addr32);
   300  	}
   301  	event.timestamp = bpf_ktime_get_boot_ns();
   302  	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,
   303  			      sizeof(event));
   304  
   305  cleanup:
   306  	return cleanup_sockets_latency_map(sk);
   307  }
   308  
   309  SEC("kprobe/tcp_v4_connect")
   310  int BPF_KPROBE(ig_tcpc_v4_co_e, struct sock *sk)
   311  {
   312  	return enter_tcp_connect(ctx, sk);
   313  }
   314  
   315  // This kretprobe is only attached if calculate_latency is false
   316  SEC("kretprobe/tcp_v4_connect")
   317  int BPF_KRETPROBE(ig_tcpc_v4_co_x, int ret)
   318  {
   319  	return exit_tcp_connect(ctx, ret, 4);
   320  }
   321  
   322  SEC("kprobe/tcp_v6_connect")
   323  int BPF_KPROBE(ig_tcpc_v6_co_e, struct sock *sk)
   324  {
   325  	return enter_tcp_connect(ctx, sk);
   326  }
   327  
   328  // This kretprobe is only attached if calculate_latency is false
   329  SEC("kretprobe/tcp_v6_connect")
   330  int BPF_KRETPROBE(ig_tcpc_v6_co_x, int ret)
   331  {
   332  	return exit_tcp_connect(ctx, ret, 6);
   333  }
   334  
   335  // This kprobe is only attached if calculate_latency is true
   336  SEC("kprobe/tcp_rcv_state_process")
   337  int BPF_KPROBE(ig_tcp_rsp, struct sock *sk)
   338  {
   339  	return handle_tcp_rcv_state_process(ctx, sk);
   340  }
   341  
   342  // tcp_destroy_sock is fired for ipv4 and ipv6.
   343  // This tracepoint is only attached if calculate_latency is true
   344  SEC("tracepoint/tcp/tcp_destroy_sock")
   345  int ig_tcp_destroy(struct trace_event_raw_tcp_event_sk *ctx)
   346  {
   347  	return cleanup_sockets_latency_map(ctx->skaddr);
   348  }
   349  
   350  char LICENSE[] SEC("license") = "GPL";