github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/socketenricher/bpf/socket-enricher.bpf.c (about)

     1  // SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Apache-2.0
     2  /* Copyright (c) 2023 The Inspektor Gadget authors */
     3  
     4  #include <vmlinux.h>
     5  #include <bpf/bpf_helpers.h>
     6  #include <bpf/bpf_core_read.h>
     7  #include <bpf/bpf_tracing.h>
     8  #include <bpf/bpf_endian.h>
     9  
    10  #include <bpf/bpf_helpers.h>
    11  
    12  #include <gadget/sockets-map.h>
    13  #include "socket-enricher-helpers.h"
    14  
    15  #define MAX_ENTRIES 10240
    16  
    17  // The map 'start' keeps context between a kprobe and a kretprobe
    18  // Keys: pid_tgid
    19  // Values: the argument of the kprobe function:
    20  // - When used in bind: struct socket *
    21  // - When used in tcp_connect: struct sock *
    22  struct {
    23  	__uint(type, BPF_MAP_TYPE_HASH);
    24  	__uint(max_entries, MAX_ENTRIES);
    25  	__type(key, __u64);
    26  	__type(value, void *);
    27  } start SEC(".maps");
    28  
    29  const volatile bool disable_bpf_iterators = 0;
    30  
    31  static __always_inline void insert_current_socket(struct sock *sock)
    32  {
    33  	struct sockets_key socket_key = {
    34  		0,
    35  	};
    36  	prepare_socket_key(&socket_key, sock);
    37  
    38  	struct sockets_value socket_value = {
    39  		0,
    40  	};
    41  	// use 'current' task
    42  	struct task_struct *task = (struct task_struct *)bpf_get_current_task();
    43  	socket_value.mntns = (u64)BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum);
    44  	socket_value.pid_tgid = bpf_get_current_pid_tgid();
    45  	socket_value.uid_gid = bpf_get_current_uid_gid();
    46  	bpf_get_current_comm(&socket_value.task, sizeof(socket_value.task));
    47  	socket_value.sock = (__u64)sock;
    48  	if (socket_key.family == AF_INET6)
    49  		socket_value.ipv6only = BPF_CORE_READ_BITFIELD_PROBED(
    50  			sock, __sk_common.skc_ipv6only);
    51  
    52  	bpf_map_update_elem(&gadget_sockets, &socket_key, &socket_value,
    53  			    BPF_ANY);
    54  }
    55  
    56  static __always_inline int remove_socket(struct sock *sock)
    57  {
    58  	struct inet_sock *inet_sock = (struct inet_sock *)sock;
    59  	struct sockets_key socket_key = {
    60  		0,
    61  	};
    62  
    63  	BPF_CORE_READ_INTO(&socket_key.family, sock, __sk_common.skc_family);
    64  	BPF_CORE_READ_INTO(&socket_key.netns, sock, __sk_common.skc_net.net,
    65  			   ns.inum);
    66  
    67  	socket_key.proto = BPF_CORE_READ_BITFIELD_PROBED(sock, sk_protocol);
    68  	socket_key.port = bpf_ntohs(BPF_CORE_READ(inet_sock, inet_sport));
    69  
    70  	struct sockets_value *socket_value =
    71  		bpf_map_lookup_elem(&gadget_sockets, &socket_key);
    72  	if (socket_value == NULL)
    73  		return 0;
    74  
    75  	if (socket_value->sock != (__u64)sock)
    76  		return 0;
    77  
    78  	if (socket_value->deletion_timestamp == 0) {
    79  		// bpf timers are not used because they require Linux 5.15 and we want
    80  		// to support older kernels.
    81  		// Use bpf iterators if available (Linux 5.8) otherwise delete
    82  		// directly.
    83  		if (disable_bpf_iterators) {
    84  			bpf_map_delete_elem(&gadget_sockets, &socket_key);
    85  		} else {
    86  			// Avoid bpf_ktime_get_boot_ns() to support older kernels
    87  			socket_value->deletion_timestamp = bpf_ktime_get_ns();
    88  		}
    89  	}
    90  	return 0;
    91  }
    92  
    93  // probe_bind_entry & probe_bind_exit are used:
    94  // - server side
    95  // - for both UDP and TCP
    96  // - for both IPv4 and IPv6
    97  static __always_inline int probe_bind_entry(struct pt_regs *ctx,
    98  					    struct socket *socket)
    99  {
   100  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   101  
   102  	bpf_map_update_elem(&start, &pid_tgid, &socket, BPF_ANY);
   103  	return 0;
   104  };
   105  
   106  static __always_inline int probe_bind_exit(struct pt_regs *ctx, short ver)
   107  {
   108  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   109  	struct socket **socketp, *socket;
   110  	struct sock *sock;
   111  	int ret;
   112  
   113  	socketp = bpf_map_lookup_elem(&start, &pid_tgid);
   114  	if (!socketp)
   115  		return 0;
   116  
   117  	ret = PT_REGS_RC(ctx);
   118  	if (ret != 0)
   119  		goto cleanup;
   120  
   121  	socket = *socketp;
   122  	sock = BPF_CORE_READ(socket, sk);
   123  
   124  	insert_current_socket(sock);
   125  
   126  cleanup:
   127  	bpf_map_delete_elem(&start, &pid_tgid);
   128  	return 0;
   129  }
   130  
   131  // enter_tcp_connect & exit_tcp_connect are used:
   132  // - client side
   133  // - for TCP only
   134  // - for both IPv4 and IPv6
   135  static __always_inline int enter_tcp_connect(struct pt_regs *ctx,
   136  					     struct sock *sk)
   137  {
   138  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   139  	bpf_map_update_elem(&start, &pid_tgid, &sk, 0);
   140  
   141  	// Add socket to the map before the connection is established, so that
   142  	// early SYN packets can be enriched.
   143  	insert_current_socket(sk);
   144  
   145  	return 0;
   146  }
   147  
   148  static __always_inline int exit_tcp_connect(struct pt_regs *ctx, int ret)
   149  {
   150  	__u64 pid_tgid = bpf_get_current_pid_tgid();
   151  	struct task_struct *task;
   152  	struct sock **skpp;
   153  	struct sock *sk;
   154  
   155  	skpp = bpf_map_lookup_elem(&start, &pid_tgid);
   156  	if (!skpp)
   157  		return 0;
   158  
   159  	sk = *skpp;
   160  
   161  	if (ret)
   162  		remove_socket(sk);
   163  
   164  	bpf_map_delete_elem(&start, &pid_tgid);
   165  	return 0;
   166  }
   167  
   168  // enter_udp_sendmsg is used:
   169  // - client side
   170  // - for UDP only
   171  // - for both IPv4 and IPv6
   172  static __always_inline int enter_udp_sendmsg(struct pt_regs *ctx,
   173  					     struct sock *sk,
   174  					     struct msghdr *msg, size_t len)
   175  {
   176  	insert_current_socket(sk);
   177  	return 0;
   178  }
   179  
   180  // probe_release_entry is used:
   181  // - for both server and client sides
   182  // - for both UDP and TCP
   183  // - for both IPv4 and IPv6
   184  static __always_inline int
   185  probe_release_entry(struct pt_regs *ctx, struct socket *socket, __u16 family)
   186  {
   187  	struct sock *sock;
   188  
   189  	sock = BPF_CORE_READ(socket, sk);
   190  
   191  	// The kernel function inet6_release() calls inet_release() and we have a kprobe on both, so beware if it is called
   192  	// in the right context.
   193  	if (BPF_CORE_READ(sock, __sk_common.skc_family) != family)
   194  		return 0;
   195  
   196  	return remove_socket(sock);
   197  }
   198  
   199  SEC("kprobe/inet_bind")
   200  int BPF_KPROBE(ig_bind_ipv4_e, struct socket *socket)
   201  {
   202  	return probe_bind_entry(ctx, socket);
   203  }
   204  
   205  SEC("kretprobe/inet_bind")
   206  int BPF_KRETPROBE(ig_bind_ipv4_x)
   207  {
   208  	return probe_bind_exit(ctx, 4);
   209  }
   210  
   211  SEC("kprobe/inet6_bind")
   212  int BPF_KPROBE(ig_bind_ipv6_e, struct socket *socket)
   213  {
   214  	return probe_bind_entry(ctx, socket);
   215  }
   216  
   217  SEC("kretprobe/inet6_bind")
   218  int BPF_KRETPROBE(ig_bind_ipv6_x)
   219  {
   220  	return probe_bind_exit(ctx, 6);
   221  }
   222  
   223  SEC("kprobe/tcp_connect")
   224  int BPF_KPROBE(ig_tcp_co_e, struct sock *sk)
   225  {
   226  	return enter_tcp_connect(ctx, sk);
   227  }
   228  
   229  SEC("kretprobe/tcp_connect")
   230  int BPF_KRETPROBE(ig_tcp_co_x, int ret)
   231  {
   232  	return exit_tcp_connect(ctx, ret);
   233  }
   234  
   235  SEC("kprobe/udp_sendmsg")
   236  int BPF_KPROBE(ig_udp_sendmsg, struct sock *sk, struct msghdr *msg, size_t len)
   237  {
   238  	return enter_udp_sendmsg(ctx, sk, msg, len);
   239  }
   240  
   241  SEC("kprobe/udpv6_sendmsg")
   242  int BPF_KPROBE(ig_udp6_sendmsg, struct sock *sk, struct msghdr *msg, size_t len)
   243  {
   244  	return enter_udp_sendmsg(ctx, sk, msg, len);
   245  }
   246  
   247  SEC("kprobe/inet_release")
   248  int BPF_KPROBE(ig_free_ipv4_e, struct socket *socket)
   249  {
   250  	return probe_release_entry(ctx, socket, AF_INET);
   251  }
   252  
   253  SEC("kprobe/inet6_release")
   254  int BPF_KPROBE(ig_free_ipv6_e, struct socket *socket)
   255  {
   256  	return probe_release_entry(ctx, socket, AF_INET6);
   257  }
   258  
   259  char _license[] SEC("license") = "GPL";