github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/ebpftracer/c/headers/common/network.h (about) 1 #ifndef __COMMON_NETWORK_H__ 2 #define __COMMON_NETWORK_H__ 3 4 #include "types.h" 5 #include <vmlinux.h> 6 #include <vmlinux_flavors.h> 7 8 #include <bpf/bpf_endian.h> 9 10 #include <common/common.h> 11 12 // clang-format off 13 14 // TYPES 15 16 typedef union iphdrs_t { 17 struct iphdr iphdr; 18 struct ipv6hdr ipv6hdr; 19 } iphdrs; 20 21 typedef union { 22 u32 v4addr; 23 unsigned __int128 v6addr; 24 } __attribute__((packed)) addr_t; 25 26 typedef struct { 27 addr_t saddr; 28 addr_t daddr; 29 u16 sport; 30 u16 dport; 31 u16 family; 32 } __attribute__((packed)) tuple_t; 33 34 // network flow events 35 36 typedef struct netflow { 37 u32 host_pid; 38 u8 proto; 39 tuple_t tuple; 40 } __attribute__((__packed__)) netflow_t; 41 42 statfunc netflow_t invert_netflow(netflow_t flow) 43 { 44 tuple_t inverted_tuple = { 45 .saddr = flow.tuple.daddr, 46 .daddr = flow.tuple.saddr, 47 .sport = flow.tuple.dport, 48 .dport = flow.tuple.sport, 49 .family = flow.tuple.family, 50 }; 51 netflow_t res = { 52 .host_pid = flow.host_pid, 53 .proto = flow.proto, 54 .tuple = inverted_tuple, 55 }; 56 return res; 57 } 58 59 #define flow_unknown 0 60 #define flow_incoming 1 61 #define flow_outgoing 2 62 63 // TODO: per flow statistics can be added later 64 typedef struct netflowvalue { 65 u8 direction; // 0 = flow_unknown, 1 = flow_incoming, 2 = flow_outgoing 66 u64 last_update; // last time this flow was updated 67 u64 tx_bytes; // total bytes sent 68 u64 rx_bytes; // total bytes received 69 u64 tx_packets; // total packets sent 70 u64 rx_packets; // total packets received 71 } __attribute__((__packed__)) netflowvalue_t; 72 73 // netflowmap (keep track of network flows) 74 75 struct { 76 __uint(type, BPF_MAP_TYPE_LRU_HASH); 77 __uint(max_entries, 65535); // simultaneous network flows being tracked 78 __type(key, netflow_t); // the network flow ... 79 __type(value, netflowvalue_t); // ... linked to flow stats 80 } netflowmap SEC(".maps"); // relate sockets and tasks 81 82 // NOTE: proto header structs need full type in vmlinux.h (for correct skb copy) 83 84 typedef union protohdrs_t { 85 struct tcphdr tcphdr; 86 struct udphdr udphdr; 87 struct icmphdr icmphdr; 88 struct icmp6hdr icmp6hdr; 89 union { 90 u8 tcp_extra[40]; // data offset might set it up to 60 bytes 91 }; 92 } protohdrs; 93 94 typedef struct nethdrs_t { 95 iphdrs iphdrs; 96 protohdrs protohdrs; 97 } nethdrs; 98 99 // cgroupctxmap 100 101 typedef enum net_packet { 102 CAP_NET_PACKET = 1 << 0, 103 // Layer 3 104 SUB_NET_PACKET_IP = 1 << 1, 105 // Layer 4 106 SUB_NET_PACKET_TCP = 1 << 2, 107 SUB_NET_PACKET_UDP = 1 << 3, 108 SUB_NET_PACKET_ICMP = 1 << 4, 109 SUB_NET_PACKET_ICMPV6 = 1 << 5, 110 // Layer 7 111 SUB_NET_PACKET_DNS = 1 << 6, 112 SUB_NET_PACKET_HTTP = 1 << 7, 113 SUB_NET_PACKET_SOCKS5 = 1 << 8, 114 } net_packet_t; 115 116 typedef struct net_event_contextmd { 117 u8 should_flow; // Cache result from should_submit_flow_event 118 u32 header_size; 119 u8 captured; // packet has already been captured 120 netflow_t flow; 121 } __attribute__((__packed__)) net_event_contextmd_t; 122 123 typedef struct net_event_context { 124 event_context_t eventctx; 125 u8 argnum; 126 struct { // event arguments (needs packing), use anonymous struct to ... 127 u8 index0; 128 u32 bytes; 129 // ... (payload sent by bpf_perf_event_output) 130 } __attribute__((__packed__)); // ... avoid address-of-packed-member warns 131 // members bellow this point are metadata (not part of event to be sent) 132 net_event_contextmd_t md; 133 } __attribute__((__packed__)) net_event_context_t; 134 135 // network related maps 136 137 typedef struct { 138 u64 ts; 139 u16 ip_csum; 140 struct in6_addr src; 141 struct in6_addr dst; 142 } indexer_t; 143 144 typedef struct { 145 __uint(type, BPF_MAP_TYPE_LRU_HASH); 146 __uint(max_entries, 4096); // 800 KB // simultaneous cgroup/skb ingress/eggress progs 147 __type(key, indexer_t); // layer 3 header fields used as indexer 148 __type(value, net_event_context_t); // event context built so cgroup/skb can use 149 } cgrpctxmap_t; 150 151 cgrpctxmap_t cgrpctxmap_in SEC(".maps"); // saved info SKB caller <=> SKB ingress 152 cgrpctxmap_t cgrpctxmap_eg SEC(".maps"); // saved info SKB caller <=> SKB egress 153 154 // inodemap 155 156 typedef struct net_task_context { 157 struct task_struct *task; 158 task_context_t taskctx; 159 s32 syscall; 160 u16 padding; 161 u16 policies_version; 162 u64 matched_policies; 163 } net_task_context_t; 164 165 struct { 166 __uint(type, BPF_MAP_TYPE_LRU_HASH); 167 __uint(max_entries, 65535); // 9 MB // simultaneous sockets being traced 168 __type(key, u64); // socket inode number ... 169 __type(value, struct net_task_context); // ... linked to a task context 170 } inodemap SEC(".maps"); // relate sockets and tasks 171 172 // sockmap (map two cloned "socket" representation structs ("sock")) 173 174 struct { 175 __uint(type, BPF_MAP_TYPE_LRU_HASH); 176 __uint(max_entries, 65535); // 9 MB // simultaneous sockets being cloned 177 __type(key, u64); // *(struct sock *newsock) ... 178 __type(value, u64); // ... old sock->socket inode number 179 } sockmap SEC(".maps"); // relate a cloned sock struct with 180 181 // entrymap 182 183 typedef struct entry { 184 long unsigned int args[6]; 185 } entry_t; 186 187 struct { 188 __uint(type, BPF_MAP_TYPE_LRU_HASH); 189 __uint(max_entries, 2048); // simultaneous tasks being traced for entry/exit 190 __type(key, u32); // host thread group id (tgid or tid) ... 191 __type(value, struct entry); // ... linked to entry ctx->args 192 } entrymap SEC(".maps"); // can't use args_map (indexed by existing events only) 193 194 // network capture events 195 196 struct { 197 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 198 __uint(max_entries, 10240); 199 __type(key, u32); 200 __type(value, u32); 201 } net_cap_events SEC(".maps"); 202 203 // scratch area 204 205 struct { 206 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 207 __uint(max_entries, 1); // simultaneous softirqs running per CPU (?) 208 __type(key, u32); // per cpu index ... (always zero) 209 __type(value, event_data_t); // ... linked to a scratch area 210 } net_heap_event SEC(".maps"); 211 212 // CONSTANTS 213 214 // Network return value (retval) codes 215 216 // Layer 3 Protocol (since no Layer 2 is available) 217 #define family_ipv4 (1 << 0) 218 #define family_ipv6 (1 << 1) 219 // HTTP Direction (request/response) Flag 220 #define proto_http_req (1 << 2) 221 #define proto_http_resp (1 << 3) 222 // Packet Direction (ingress/egress) Flag 223 #define packet_ingress (1 << 4) 224 #define packet_egress (1 << 5) 225 // Flows (begin/end) Flags per Protocol 226 #define flow_tcp_begin (1 << 6) // syn+ack flag or first flow packet 227 #define flow_tcp_sample (1 << 7) // sample with statistics after first flow 228 #define flow_tcp_end (1 << 8) // fin flag or last flow packet 229 #define flow_udp_begin (1 << 9) // first flow packet 230 #define flow_udp_end (1 << 10) // last flow packet 231 #define flow_src_initiator (1 << 11) // src is the flow initiator 232 // Socks5 Direction (request/response) Flag 233 #define proto_socks5_req (1 << 12) 234 #define proto_socks5_resp (1 << 13) 235 236 // payload size: full packets, only headers 237 #define FULL 65536 // 1 << 16 238 #define HEADERS 0 // no payload 239 240 // when guessing by src/dst ports, declare at network.h 241 #define UDP_PORT_DNS 53 242 #define TCP_PORT_DNS 53 243 #define TCP_PORT_SOCKS5 1080 244 245 // layer 7 parsing related constants 246 #define http_min_len 7 // longest http command is "DELETE " 247 #define socks5_min_len 4 // we try to match the socks5 request. this should 248 249 // PROTOTYPES 250 251 statfunc u32 get_inet_rcv_saddr(struct inet_sock *); 252 statfunc u32 get_inet_saddr(struct inet_sock *); 253 statfunc u32 get_inet_daddr(struct inet_sock *); 254 statfunc u16 get_inet_sport(struct inet_sock *); 255 statfunc u16 get_inet_num(struct inet_sock *); 256 statfunc u16 get_inet_dport(struct inet_sock *); 257 statfunc struct sock *get_socket_sock(struct socket *); 258 statfunc u16 get_sock_family(struct sock *); 259 statfunc u16 get_sock_protocol(struct sock *); 260 statfunc u16 get_sockaddr_family(struct sockaddr *); 261 statfunc struct in6_addr get_sock_v6_rcv_saddr(struct sock *); 262 statfunc struct in6_addr get_ipv6_pinfo_saddr(struct ipv6_pinfo *); 263 statfunc struct in6_addr get_sock_v6_daddr(struct sock *); 264 statfunc volatile unsigned char get_sock_state(struct sock *); 265 statfunc struct ipv6_pinfo *get_inet_pinet6(struct inet_sock *); 266 statfunc struct sockaddr_un get_unix_sock_addr(struct unix_sock *); 267 statfunc int get_network_details_from_sock_v4(struct sock *, net_conn_v4_t *, int); 268 statfunc struct ipv6_pinfo *inet6_sk_own_impl(struct sock *, struct inet_sock *); 269 statfunc int get_network_details_from_sock_v6(struct sock *, net_conn_v6_t *, int); 270 statfunc int get_local_sockaddr_in_from_network_details(struct sockaddr_in *, net_conn_v4_t *, u16); 271 statfunc int get_remote_sockaddr_in_from_network_details(struct sockaddr_in *, net_conn_v4_t *, u16); 272 statfunc int get_local_sockaddr_in6_from_network_details(struct sockaddr_in6 *, net_conn_v6_t *, u16); 273 statfunc int get_remote_sockaddr_in6_from_network_details(struct sockaddr_in6 *, net_conn_v6_t *, u16); 274 statfunc int get_local_net_id_from_network_details_v4(struct sock *, net_id_t *, net_conn_v4_t *, u16); 275 statfunc int get_local_net_id_from_network_details_v6(struct sock *, net_id_t *, net_conn_v6_t *, u16); 276 statfunc bool fill_tuple(struct sock *, tuple_t *); 277 278 // clang-format on 279 280 // FUNCTIONS 281 282 // 283 // Regular events related to network 284 // 285 286 statfunc u32 get_inet_rcv_saddr(struct inet_sock *inet) 287 { 288 return BPF_CORE_READ(inet, inet_rcv_saddr); 289 } 290 291 statfunc u32 get_inet_saddr(struct inet_sock *inet) 292 { 293 return BPF_CORE_READ(inet, inet_saddr); 294 } 295 296 statfunc u32 get_inet_daddr(struct inet_sock *inet) 297 { 298 return BPF_CORE_READ(inet, inet_daddr); 299 } 300 301 statfunc u16 get_inet_sport(struct inet_sock *inet) 302 { 303 return BPF_CORE_READ(inet, inet_sport); 304 } 305 306 statfunc u16 get_inet_num(struct inet_sock *inet) 307 { 308 return BPF_CORE_READ(inet, inet_num); 309 } 310 311 statfunc u16 get_inet_dport(struct inet_sock *inet) 312 { 313 return BPF_CORE_READ(inet, inet_dport); 314 } 315 316 statfunc struct sock *get_socket_sock(struct socket *socket) 317 { 318 return BPF_CORE_READ(socket, sk); 319 } 320 321 statfunc u16 get_sock_family(struct sock *sock) 322 { 323 return BPF_CORE_READ(sock, sk_family); 324 } 325 326 statfunc u16 get_sock_protocol(struct sock *sock) 327 { 328 u16 protocol = 0; 329 330 // commit bf9765145b85 ("sock: Make sk_protocol a 16-bit value") 331 struct sock___old *check = NULL; 332 if (bpf_core_field_exists(check->__sk_flags_offset)) { 333 check = (struct sock___old *) sock; 334 bpf_core_read(&protocol, 1, (void *) (&check->sk_gso_max_segs) - 3); 335 } else { 336 protocol = BPF_CORE_READ(sock, sk_protocol); 337 } 338 339 return protocol; 340 } 341 342 statfunc u16 get_sockaddr_family(struct sockaddr *address) 343 { 344 return BPF_CORE_READ(address, sa_family); 345 } 346 347 statfunc struct in6_addr get_sock_v6_rcv_saddr(struct sock *sock) 348 { 349 return BPF_CORE_READ(sock, sk_v6_rcv_saddr); 350 } 351 352 statfunc struct in6_addr get_ipv6_pinfo_saddr(struct ipv6_pinfo *np) 353 { 354 return BPF_CORE_READ(np, saddr); 355 } 356 357 statfunc struct in6_addr get_sock_v6_daddr(struct sock *sock) 358 { 359 return BPF_CORE_READ(sock, sk_v6_daddr); 360 } 361 362 statfunc volatile unsigned char get_sock_state(struct sock *sock) 363 { 364 volatile unsigned char sk_state_own_impl; 365 bpf_core_read( 366 (void *) &sk_state_own_impl, sizeof(sk_state_own_impl), (const void *) &sock->sk_state); 367 return sk_state_own_impl; 368 } 369 370 statfunc struct ipv6_pinfo *get_inet_pinet6(struct inet_sock *inet) 371 { 372 struct ipv6_pinfo *pinet6_own_impl; 373 bpf_core_read(&pinet6_own_impl, sizeof(pinet6_own_impl), &inet->pinet6); 374 return pinet6_own_impl; 375 } 376 377 statfunc struct sockaddr_un get_unix_sock_addr(struct unix_sock *sock) 378 { 379 struct unix_address *addr = BPF_CORE_READ(sock, addr); 380 int len = BPF_CORE_READ(addr, len); 381 struct sockaddr_un sockaddr = {}; 382 if (len <= sizeof(struct sockaddr_un)) { 383 bpf_probe_read(&sockaddr, len, addr->name); 384 } 385 return sockaddr; 386 } 387 388 statfunc int get_network_details_from_sock_v4(struct sock *sk, net_conn_v4_t *net_details, int peer) 389 { 390 struct inet_sock *inet = inet_sk(sk); 391 392 if (!peer) { 393 net_details->local_address = get_inet_rcv_saddr(inet); 394 net_details->local_port = bpf_ntohs(get_inet_num(inet)); 395 net_details->remote_address = get_inet_daddr(inet); 396 net_details->remote_port = get_inet_dport(inet); 397 } else { 398 net_details->remote_address = get_inet_rcv_saddr(inet); 399 net_details->remote_port = bpf_ntohs(get_inet_num(inet)); 400 net_details->local_address = get_inet_daddr(inet); 401 net_details->local_port = get_inet_dport(inet); 402 } 403 404 return 0; 405 } 406 407 statfunc struct ipv6_pinfo *inet6_sk_own_impl(struct sock *__sk, struct inet_sock *inet) 408 { 409 volatile unsigned char sk_state_own_impl; 410 sk_state_own_impl = get_sock_state(__sk); 411 412 struct ipv6_pinfo *pinet6_own_impl; 413 pinet6_own_impl = get_inet_pinet6(inet); 414 415 bool sk_fullsock = (1 << sk_state_own_impl) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV); 416 return sk_fullsock ? pinet6_own_impl : NULL; 417 } 418 419 statfunc int get_network_details_from_sock_v6(struct sock *sk, net_conn_v6_t *net_details, int peer) 420 { 421 // inspired by 'inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer)' 422 // reference: https://elixir.bootlin.com/linux/latest/source/net/ipv6/af_inet6.c#L509 423 424 struct inet_sock *inet = inet_sk(sk); 425 struct ipv6_pinfo *np = inet6_sk_own_impl(sk, inet); 426 427 struct in6_addr addr = {}; 428 addr = get_sock_v6_rcv_saddr(sk); 429 if (ipv6_addr_any(&addr)) { 430 addr = get_ipv6_pinfo_saddr(np); 431 } 432 433 // the flowinfo field can be specified by the user to indicate a network flow. how it is used by 434 // the kernel, or whether it is enforced to be unique is not so obvious. getting this value is 435 // only supported by the kernel for outgoing packets using the 'struct ipv6_pinfo'. in any 436 // case, leaving it with value of 0 won't affect our representation of network flows. 437 net_details->flowinfo = 0; 438 439 // the scope_id field can be specified by the user to indicate the network interface from which 440 // to send a packet. this only applies for link-local addresses, and is used only by the local 441 // kernel. getting this value is done by using the 'ipv6_iface_scope_id(const struct in6_addr 442 // *addr, int iface)' function. in any case, leaving it with value of 0 won't affect our 443 // representation of network flows. 444 net_details->scope_id = 0; 445 446 if (peer) { 447 net_details->local_address = get_sock_v6_daddr(sk); 448 net_details->local_port = get_inet_dport(inet); 449 net_details->remote_address = addr; 450 net_details->remote_port = get_inet_sport(inet); 451 } else { 452 net_details->local_address = addr; 453 net_details->local_port = get_inet_sport(inet); 454 net_details->remote_address = get_sock_v6_daddr(sk); 455 net_details->remote_port = get_inet_dport(inet); 456 } 457 458 return 0; 459 } 460 461 statfunc int get_local_sockaddr_in_from_network_details(struct sockaddr_in *addr, 462 net_conn_v4_t *net_details, 463 u16 family) 464 { 465 addr->sin_family = family; 466 addr->sin_port = net_details->local_port; 467 addr->sin_addr.s_addr = net_details->local_address; 468 469 return 0; 470 } 471 472 statfunc int get_remote_sockaddr_in_from_network_details(struct sockaddr_in *addr, 473 net_conn_v4_t *net_details, 474 u16 family) 475 { 476 addr->sin_family = family; 477 addr->sin_port = net_details->remote_port; 478 addr->sin_addr.s_addr = net_details->remote_address; 479 480 return 0; 481 } 482 483 statfunc int get_local_sockaddr_in6_from_network_details(struct sockaddr_in6 *addr, 484 net_conn_v6_t *net_details, 485 u16 family) 486 { 487 addr->sin6_family = family; 488 addr->sin6_port = net_details->local_port; 489 addr->sin6_flowinfo = net_details->flowinfo; 490 addr->sin6_addr = net_details->local_address; 491 addr->sin6_scope_id = net_details->scope_id; 492 493 return 0; 494 } 495 496 statfunc int get_remote_sockaddr_in6_from_network_details(struct sockaddr_in6 *addr, 497 net_conn_v6_t *net_details, 498 u16 family) 499 { 500 addr->sin6_family = family; 501 addr->sin6_port = net_details->remote_port; 502 addr->sin6_flowinfo = net_details->flowinfo; 503 addr->sin6_addr = net_details->remote_address; 504 addr->sin6_scope_id = net_details->scope_id; 505 506 return 0; 507 } 508 509 statfunc int get_local_net_id_from_network_details_v4(struct sock *sk, 510 net_id_t *connect_id, 511 net_conn_v4_t *net_details, 512 u16 family) 513 { 514 connect_id->address.s6_addr32[3] = net_details->local_address; 515 connect_id->address.s6_addr16[5] = 0xffff; 516 connect_id->port = net_details->local_port; 517 connect_id->protocol = get_sock_protocol(sk); 518 519 return 0; 520 } 521 522 statfunc int get_local_net_id_from_network_details_v6(struct sock *sk, 523 net_id_t *connect_id, 524 net_conn_v6_t *net_details, 525 u16 family) 526 { 527 connect_id->address = net_details->local_address; 528 connect_id->port = net_details->local_port; 529 connect_id->protocol = get_sock_protocol(sk); 530 531 return 0; 532 } 533 534 statfunc bool fill_tuple(struct sock *sk, tuple_t *tuple) 535 { 536 u16 family = BPF_CORE_READ(sk, __sk_common.skc_family); 537 tuple->family = family; 538 539 switch (family) { 540 case AF_INET: 541 BPF_CORE_READ_INTO(&tuple->saddr.v4addr, sk, __sk_common.skc_rcv_saddr); 542 if (tuple->saddr.v4addr == 0) 543 return false; 544 545 BPF_CORE_READ_INTO(&tuple->daddr.v4addr, sk, __sk_common.skc_daddr); 546 if (tuple->daddr.v4addr == 0) 547 return false; 548 549 break; 550 case AF_INET6: 551 BPF_CORE_READ_INTO(&tuple->saddr.v6addr, sk, __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 552 if (tuple->saddr.v6addr == 0) 553 return false; 554 BPF_CORE_READ_INTO(&tuple->daddr.v6addr, sk, __sk_common.skc_v6_daddr.in6_u.u6_addr32); 555 if (tuple->daddr.v6addr == 0) 556 return false; 557 558 break; 559 560 default: 561 return false; 562 } 563 564 //BPF_CORE_READ_INTO(&tuple->sport, sockp, inet_sport); 565 BPF_CORE_READ_INTO(&tuple->sport, sk, __sk_common.skc_num); 566 if (tuple->sport == 0) 567 return false; 568 569 BPF_CORE_READ_INTO(&tuple->dport, sk, __sk_common.skc_dport); 570 if (tuple->dport == 0) 571 return false; 572 tuple->dport = bpf_ntohs(tuple->dport); 573 574 return true; 575 } 576 577 #endif