github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/trace/tcpconnect/tracer/bpf/tcpconnect.bpf.c (about) 1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2020 Anton Protopopov 3 // 4 // Based on tcpconnect(8) from BCC by Brendan Gregg 5 #include <vmlinux.h> 6 7 #include <bpf/bpf_helpers.h> 8 #include <bpf/bpf_core_read.h> 9 #include <bpf/bpf_tracing.h> 10 11 #include <gadget/maps.bpf.h> 12 #include "tcpconnect.h" 13 #include <gadget/mntns_filter.h> 14 15 const volatile int filter_ports[MAX_PORTS]; 16 const volatile int filter_ports_len = 0; 17 const volatile uid_t filter_uid = -1; 18 const volatile pid_t filter_pid = 0; 19 const volatile bool do_count = 0; 20 const volatile bool calculate_latency = false; 21 const volatile __u64 targ_min_latency_ns = 0; 22 23 /* Define here, because there are conflicts with include files */ 24 #define AF_INET 2 25 #define AF_INET6 10 26 27 // we need this to make sure the compiler doesn't remove our struct 28 const struct event *unusedevent __attribute__((unused)); 29 30 // sockets_per_process keeps track of the sockets between: 31 // - kprobe enter_tcp_connect 32 // - kretprobe exit_tcp_connect 33 struct { 34 __uint(type, BPF_MAP_TYPE_HASH); 35 __uint(max_entries, MAX_ENTRIES); 36 __type(key, u32); // tid 37 __type(value, struct sock *); 38 } sockets_per_process SEC(".maps"); 39 40 struct piddata { 41 char comm[TASK_COMM_LEN]; 42 u64 ts; 43 u32 pid; 44 u32 tid; 45 u64 mntns_id; 46 }; 47 48 // sockets_latency keeps track of sockets to calculate the latency between: 49 // - enter_tcp_connect (where the socket is added in the map) 50 // - handle_tcp_rcv_state_process (where the socket is removed from the map) 51 struct { 52 __uint(type, BPF_MAP_TYPE_HASH); 53 __uint(max_entries, 4096); 54 __type(key, struct sock *); 55 __type(value, struct piddata); 56 } sockets_latency SEC(".maps"); 57 58 struct { 59 __uint(type, BPF_MAP_TYPE_HASH); 60 __uint(max_entries, MAX_ENTRIES); 61 __type(key, struct ipv4_flow_key); 62 __type(value, u64); 63 } ipv4_count SEC(".maps"); 64 65 struct { 66 __uint(type, BPF_MAP_TYPE_HASH); 67 __uint(max_entries, MAX_ENTRIES); 68 __type(key, struct ipv6_flow_key); 69 __type(value, u64); 70 } ipv6_count SEC(".maps"); 71 72 struct { 73 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 74 __uint(key_size, sizeof(u32)); 75 __uint(value_size, sizeof(u32)); 76 } events SEC(".maps"); 77 78 static __always_inline bool filter_port(__u16 port) 79 { 80 int i; 81 82 if (filter_ports_len == 0) 83 return false; 84 85 // This loop was written a bit different than the upstream one 86 // to avoid a verifier error. 87 for (i = 0; i < MAX_PORTS; i++) { 88 if (i >= filter_ports_len) 89 break; 90 if (port == filter_ports[i]) 91 return false; 92 } 93 return true; 94 } 95 96 static __always_inline int enter_tcp_connect(struct pt_regs *ctx, 97 struct sock *sk) 98 { 99 __u64 pid_tgid = bpf_get_current_pid_tgid(); 100 __u64 uid_gid = bpf_get_current_uid_gid(); 101 __u32 pid = pid_tgid >> 32; 102 __u32 tid = pid_tgid; 103 __u64 mntns_id; 104 __u32 uid = (u32)uid_gid; 105 ; 106 struct piddata piddata = {}; 107 108 if (filter_pid && pid != filter_pid) 109 return 0; 110 111 if (filter_uid != (uid_t)-1 && uid != filter_uid) 112 return 0; 113 114 mntns_id = gadget_get_mntns_id(); 115 116 if (gadget_should_discard_mntns_id(mntns_id)) 117 return 0; 118 119 if (calculate_latency) { 120 bpf_get_current_comm(&piddata.comm, sizeof(piddata.comm)); 121 piddata.ts = bpf_ktime_get_ns(); 122 piddata.tid = tid; 123 piddata.pid = pid; 124 piddata.mntns_id = mntns_id; 125 bpf_map_update_elem(&sockets_latency, &sk, &piddata, 0); 126 } else { 127 bpf_map_update_elem(&sockets_per_process, &tid, &sk, 0); 128 } 129 return 0; 130 } 131 132 static __always_inline void count_v4(struct sock *sk, __u16 dport) 133 { 134 struct ipv4_flow_key key = {}; 135 static __u64 zero; 136 __u64 *val; 137 138 BPF_CORE_READ_INTO(&key.saddr, sk, __sk_common.skc_rcv_saddr); 139 BPF_CORE_READ_INTO(&key.daddr, sk, __sk_common.skc_daddr); 140 key.dport = dport; 141 val = bpf_map_lookup_or_try_init(&ipv4_count, &key, &zero); 142 if (val) 143 __atomic_add_fetch(val, 1, __ATOMIC_RELAXED); 144 } 145 146 static __always_inline void count_v6(struct sock *sk, __u16 dport) 147 { 148 struct ipv6_flow_key key = {}; 149 static const __u64 zero; 150 __u64 *val; 151 152 BPF_CORE_READ_INTO(&key.saddr, sk, 153 __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 154 BPF_CORE_READ_INTO(&key.daddr, sk, 155 __sk_common.skc_v6_daddr.in6_u.u6_addr32); 156 key.dport = dport; 157 158 val = bpf_map_lookup_or_try_init(&ipv6_count, &key, &zero); 159 if (val) 160 __atomic_add_fetch(val, 1, __ATOMIC_RELAXED); 161 } 162 163 static __always_inline void trace_v4(struct pt_regs *ctx, pid_t pid, 164 struct sock *sk, __u16 dport, 165 __u64 mntns_id) 166 { 167 struct event event = {}; 168 169 __u64 uid_gid = bpf_get_current_uid_gid(); 170 171 event.af = AF_INET; 172 event.pid = pid; 173 event.uid = (u32)uid_gid; 174 event.gid = (u32)(uid_gid >> 32); 175 BPF_CORE_READ_INTO(&event.saddr_v4, sk, __sk_common.skc_rcv_saddr); 176 BPF_CORE_READ_INTO(&event.daddr_v4, sk, __sk_common.skc_daddr); 177 event.dport = dport; 178 event.sport = BPF_CORE_READ(sk, __sk_common.skc_num); 179 ; 180 event.mntns_id = mntns_id; 181 bpf_get_current_comm(event.task, sizeof(event.task)); 182 event.timestamp = bpf_ktime_get_boot_ns(); 183 184 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, 185 sizeof(event)); 186 } 187 188 static __always_inline void trace_v6(struct pt_regs *ctx, pid_t pid, 189 struct sock *sk, __u16 dport, 190 __u64 mntns_id) 191 { 192 struct event event = {}; 193 194 __u64 uid_gid = bpf_get_current_uid_gid(); 195 196 event.af = AF_INET6; 197 event.pid = pid; 198 event.uid = (u32)uid_gid; 199 event.gid = (u32)(uid_gid >> 32); 200 event.mntns_id = mntns_id; 201 BPF_CORE_READ_INTO(&event.saddr_v6, sk, 202 __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 203 BPF_CORE_READ_INTO(&event.daddr_v6, sk, 204 __sk_common.skc_v6_daddr.in6_u.u6_addr32); 205 event.dport = dport; 206 event.sport = BPF_CORE_READ(sk, __sk_common.skc_num); 207 ; 208 bpf_get_current_comm(event.task, sizeof(event.task)); 209 event.timestamp = bpf_ktime_get_boot_ns(); 210 211 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, 212 sizeof(event)); 213 } 214 215 static __always_inline int exit_tcp_connect(struct pt_regs *ctx, int ret, 216 int ip_ver) 217 { 218 __u64 pid_tgid = bpf_get_current_pid_tgid(); 219 __u32 pid = pid_tgid >> 32; 220 __u32 tid = pid_tgid; 221 struct sock **skpp; 222 struct sock *sk; 223 u64 mntns_id; 224 __u16 dport; 225 226 skpp = bpf_map_lookup_elem(&sockets_per_process, &tid); 227 if (!skpp) 228 return 0; 229 230 if (ret) 231 goto end; 232 233 sk = *skpp; 234 235 BPF_CORE_READ_INTO(&dport, sk, __sk_common.skc_dport); 236 if (filter_port(dport)) 237 goto end; 238 239 if (do_count) { 240 if (ip_ver == 4) 241 count_v4(sk, dport); 242 else 243 count_v6(sk, dport); 244 } else { 245 mntns_id = gadget_get_mntns_id(); 246 247 if (ip_ver == 4) 248 trace_v4(ctx, pid, sk, dport, mntns_id); 249 else 250 trace_v6(ctx, pid, sk, dport, mntns_id); 251 } 252 253 end: 254 bpf_map_delete_elem(&sockets_per_process, &tid); 255 return 0; 256 } 257 258 static __always_inline int cleanup_sockets_latency_map(const struct sock *sk) 259 { 260 bpf_map_delete_elem(&sockets_latency, &sk); 261 return 0; 262 } 263 264 static __always_inline int handle_tcp_rcv_state_process(void *ctx, 265 struct sock *sk) 266 { 267 struct piddata *piddatap; 268 struct event event = {}; 269 u64 ts; 270 271 if (BPF_CORE_READ(sk, __sk_common.skc_state) != TCP_SYN_SENT) 272 return 0; 273 274 piddatap = bpf_map_lookup_elem(&sockets_latency, &sk); 275 if (!piddatap) 276 return 0; 277 278 ts = bpf_ktime_get_ns(); 279 if (ts < piddatap->ts) 280 goto cleanup; 281 282 event.latency = ts - piddatap->ts; 283 if (targ_min_latency_ns && event.latency < targ_min_latency_ns) 284 goto cleanup; 285 __builtin_memcpy(&event.task, piddatap->comm, sizeof(event.task)); 286 event.pid = piddatap->pid; 287 event.mntns_id = piddatap->mntns_id; 288 event.sport = BPF_CORE_READ(sk, __sk_common.skc_num); 289 event.dport = BPF_CORE_READ(sk, __sk_common.skc_dport); 290 event.af = BPF_CORE_READ(sk, __sk_common.skc_family); 291 if (event.af == AF_INET) { 292 event.saddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); 293 event.daddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_daddr); 294 } else { 295 BPF_CORE_READ_INTO( 296 &event.saddr_v6, sk, 297 __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 298 BPF_CORE_READ_INTO(&event.daddr_v6, sk, 299 __sk_common.skc_v6_daddr.in6_u.u6_addr32); 300 } 301 event.timestamp = bpf_ktime_get_boot_ns(); 302 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, 303 sizeof(event)); 304 305 cleanup: 306 return cleanup_sockets_latency_map(sk); 307 } 308 309 SEC("kprobe/tcp_v4_connect") 310 int BPF_KPROBE(ig_tcpc_v4_co_e, struct sock *sk) 311 { 312 return enter_tcp_connect(ctx, sk); 313 } 314 315 // This kretprobe is only attached if calculate_latency is false 316 SEC("kretprobe/tcp_v4_connect") 317 int BPF_KRETPROBE(ig_tcpc_v4_co_x, int ret) 318 { 319 return exit_tcp_connect(ctx, ret, 4); 320 } 321 322 SEC("kprobe/tcp_v6_connect") 323 int BPF_KPROBE(ig_tcpc_v6_co_e, struct sock *sk) 324 { 325 return enter_tcp_connect(ctx, sk); 326 } 327 328 // This kretprobe is only attached if calculate_latency is false 329 SEC("kretprobe/tcp_v6_connect") 330 int BPF_KRETPROBE(ig_tcpc_v6_co_x, int ret) 331 { 332 return exit_tcp_connect(ctx, ret, 6); 333 } 334 335 // This kprobe is only attached if calculate_latency is true 336 SEC("kprobe/tcp_rcv_state_process") 337 int BPF_KPROBE(ig_tcp_rsp, struct sock *sk) 338 { 339 return handle_tcp_rcv_state_process(ctx, sk); 340 } 341 342 // tcp_destroy_sock is fired for ipv4 and ipv6. 343 // This tracepoint is only attached if calculate_latency is true 344 SEC("tracepoint/tcp/tcp_destroy_sock") 345 int ig_tcp_destroy(struct trace_event_raw_tcp_event_sk *ctx) 346 { 347 return cleanup_sockets_latency_map(ctx->skaddr); 348 } 349 350 char LICENSE[] SEC("license") = "GPL";