github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/ebpftracer/c/headers/common/network.h (about)

     1  #ifndef __COMMON_NETWORK_H__
     2  #define __COMMON_NETWORK_H__
     3  
     4  #include "types.h"
     5  #include <vmlinux.h>
     6  #include <vmlinux_flavors.h>
     7  
     8  #include <bpf/bpf_endian.h>
     9  
    10  #include <common/common.h>
    11  
    12  // clang-format off
    13  
    14  // TYPES
    15  
    16  typedef union iphdrs_t {
    17      struct iphdr iphdr;
    18      struct ipv6hdr ipv6hdr;
    19  } iphdrs;
    20  
    21  typedef union  {
    22      u32 v4addr;
    23      unsigned __int128 v6addr;
    24  }  __attribute__((packed)) addr_t;
    25  
    26  typedef struct {
    27      addr_t saddr;
    28      addr_t daddr;
    29      u16 sport;
    30      u16 dport;
    31      u16 family;
    32  } __attribute__((packed)) tuple_t;
    33  
    34  // network flow events
    35  
    36  typedef struct netflow {
    37      u32 host_pid;
    38      u8 proto;
    39      tuple_t tuple;
    40  } __attribute__((__packed__)) netflow_t;
    41  
    42  statfunc netflow_t invert_netflow(netflow_t flow)
    43  {
    44      tuple_t inverted_tuple = {
    45          .saddr = flow.tuple.daddr,
    46          .daddr = flow.tuple.saddr,
    47          .sport = flow.tuple.dport,
    48          .dport = flow.tuple.sport,
    49          .family = flow.tuple.family,
    50      };
    51      netflow_t res = {
    52         .host_pid = flow.host_pid,
    53         .proto = flow.proto,
    54         .tuple = inverted_tuple,
    55      };
    56      return res;
    57  }
    58  
    59  #define flow_unknown 0
    60  #define flow_incoming 1
    61  #define flow_outgoing 2
    62  
    63  // TODO: per flow statistics can be added later
    64  typedef struct netflowvalue {
    65      u8 direction;                           // 0 = flow_unknown, 1 = flow_incoming, 2 = flow_outgoing
    66      u64 last_update;                        // last time this flow was updated
    67      u64 tx_bytes;                           // total bytes sent
    68      u64 rx_bytes;                           // total bytes received
    69      u64 tx_packets;                         // total packets sent
    70      u64 rx_packets;                         // total packets received
    71  } __attribute__((__packed__)) netflowvalue_t;
    72  
    73  // netflowmap (keep track of network flows)
    74  
    75  struct {
    76      __uint(type, BPF_MAP_TYPE_LRU_HASH);
    77      __uint(max_entries, 65535);             // simultaneous network flows being tracked
    78      __type(key, netflow_t);                 // the network flow ...
    79      __type(value, netflowvalue_t);          // ... linked to flow stats
    80  } netflowmap SEC(".maps");                  // relate sockets and tasks
    81  
    82  // NOTE: proto header structs need full type in vmlinux.h (for correct skb copy)
    83  
    84  typedef union protohdrs_t {
    85      struct tcphdr tcphdr;
    86      struct udphdr udphdr;
    87      struct icmphdr icmphdr;
    88      struct icmp6hdr icmp6hdr;
    89      union {
    90          u8 tcp_extra[40]; // data offset might set it up to 60 bytes
    91      };
    92  } protohdrs;
    93  
    94  typedef struct nethdrs_t {
    95      iphdrs iphdrs;
    96      protohdrs protohdrs;
    97  } nethdrs;
    98  
    99  // cgroupctxmap
   100  
   101  typedef enum net_packet {
   102      CAP_NET_PACKET = 1 << 0,
   103      // Layer 3
   104      SUB_NET_PACKET_IP = 1 << 1,
   105      // Layer 4
   106      SUB_NET_PACKET_TCP = 1 << 2,
   107      SUB_NET_PACKET_UDP = 1 << 3,
   108      SUB_NET_PACKET_ICMP = 1 << 4,
   109      SUB_NET_PACKET_ICMPV6 = 1 << 5,
   110      // Layer 7
   111      SUB_NET_PACKET_DNS = 1 << 6,
   112      SUB_NET_PACKET_HTTP = 1 << 7,
   113      SUB_NET_PACKET_SOCKS5 = 1 << 8,
   114  } net_packet_t;
   115  
   116  typedef struct net_event_contextmd {
   117      u8 should_flow;    // Cache result from should_submit_flow_event
   118      u32 header_size;
   119      u8 captured;        // packet has already been captured
   120      netflow_t flow;
   121  } __attribute__((__packed__)) net_event_contextmd_t;
   122  
   123  typedef struct net_event_context {
   124      event_context_t eventctx;
   125      u8 argnum;
   126      struct { // event arguments (needs packing), use anonymous struct to ...
   127          u8 index0;
   128          u32 bytes;
   129          // ... (payload sent by bpf_perf_event_output)
   130      } __attribute__((__packed__)); // ... avoid address-of-packed-member warns
   131      // members bellow this point are metadata (not part of event to be sent)
   132      net_event_contextmd_t md;
   133  } __attribute__((__packed__)) net_event_context_t;
   134  
   135  // network related maps
   136  
   137  typedef struct {
   138      u64 ts;
   139      u16 ip_csum;
   140      struct in6_addr src;
   141      struct in6_addr dst;
   142  } indexer_t;
   143  
   144  typedef struct {
   145      __uint(type, BPF_MAP_TYPE_LRU_HASH);
   146      __uint(max_entries, 4096); // 800 KB    // simultaneous cgroup/skb ingress/eggress progs
   147      __type(key, indexer_t);                 // layer 3 header fields used as indexer
   148      __type(value, net_event_context_t);     // event context built so cgroup/skb can use
   149  } cgrpctxmap_t;
   150  
   151  cgrpctxmap_t cgrpctxmap_in SEC(".maps");    // saved info SKB caller <=> SKB ingress
   152  cgrpctxmap_t cgrpctxmap_eg SEC(".maps");    // saved info SKB caller <=> SKB egress
   153  
   154  // inodemap
   155  
   156  typedef struct net_task_context {
   157      struct task_struct *task;
   158      task_context_t taskctx;
   159      s32 syscall;
   160      u16 padding;
   161      u16 policies_version;
   162      u64 matched_policies;
   163  } net_task_context_t;
   164  
   165  struct {
   166      __uint(type, BPF_MAP_TYPE_LRU_HASH);
   167      __uint(max_entries, 65535); // 9 MB     // simultaneous sockets being traced
   168      __type(key, u64);                       // socket inode number ...
   169      __type(value, struct net_task_context); // ... linked to a task context
   170  } inodemap SEC(".maps");                    // relate sockets and tasks
   171  
   172  // sockmap (map two cloned "socket" representation structs ("sock"))
   173  
   174  struct {
   175      __uint(type, BPF_MAP_TYPE_LRU_HASH);
   176      __uint(max_entries, 65535); // 9 MB     // simultaneous sockets being cloned
   177      __type(key, u64);                       // *(struct sock *newsock) ...
   178      __type(value, u64);                     // ... old sock->socket inode number
   179  } sockmap SEC(".maps");                     // relate a cloned sock struct with
   180  
   181  // entrymap
   182  
   183  typedef struct entry {
   184      long unsigned int args[6];
   185  } entry_t;
   186  
   187  struct {
   188      __uint(type, BPF_MAP_TYPE_LRU_HASH);
   189      __uint(max_entries, 2048);              // simultaneous tasks being traced for entry/exit
   190      __type(key, u32);                       // host thread group id (tgid or tid) ...
   191      __type(value, struct entry);            // ... linked to entry ctx->args
   192  } entrymap SEC(".maps");                    // can't use args_map (indexed by existing events only)
   193  
   194  // network capture events
   195  
   196  struct {
   197      __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
   198      __uint(max_entries, 10240);
   199      __type(key, u32);
   200      __type(value, u32);
   201  } net_cap_events SEC(".maps");
   202  
   203  // scratch area
   204  
   205  struct {
   206      __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
   207      __uint(max_entries, 1);                 // simultaneous softirqs running per CPU (?)
   208      __type(key, u32);                       // per cpu index ... (always zero)
   209      __type(value, event_data_t);            // ... linked to a scratch area
   210  } net_heap_event SEC(".maps");
   211  
   212  // CONSTANTS
   213  
   214  // Network return value (retval) codes
   215  
   216  // Layer 3 Protocol (since no Layer 2 is available)
   217  #define family_ipv4             (1 << 0)
   218  #define family_ipv6             (1 << 1)
   219  // HTTP Direction (request/response) Flag
   220  #define proto_http_req          (1 << 2)
   221  #define proto_http_resp         (1 << 3)
   222  // Packet Direction (ingress/egress) Flag
   223  #define packet_ingress          (1 << 4)
   224  #define packet_egress           (1 << 5)
   225  // Flows (begin/end) Flags per Protocol
   226  #define flow_tcp_begin          (1 << 6)  // syn+ack flag or first flow packet
   227  #define flow_tcp_sample         (1 << 7)  // sample with statistics after first flow
   228  #define flow_tcp_end            (1 << 8)  // fin flag or last flow packet
   229  #define flow_udp_begin          (1 << 9)  // first flow packet
   230  #define flow_udp_end            (1 << 10)  // last flow packet
   231  #define flow_src_initiator      (1 << 11) // src is the flow initiator
   232  // Socks5 Direction (request/response) Flag
   233  #define proto_socks5_req          (1 << 12)
   234  #define proto_socks5_resp         (1 << 13)
   235  
   236  // payload size: full packets, only headers
   237  #define FULL    65536       // 1 << 16
   238  #define HEADERS 0           // no payload
   239  
   240  // when guessing by src/dst ports, declare at network.h
   241  #define UDP_PORT_DNS 53
   242  #define TCP_PORT_DNS 53
   243  #define TCP_PORT_SOCKS5 1080
   244  
   245  // layer 7 parsing related constants
   246  #define http_min_len 7 // longest http command is "DELETE "
   247  #define socks5_min_len 4 // we try to match the socks5 request. this should
   248  
   249  // PROTOTYPES
   250  
   251  statfunc u32 get_inet_rcv_saddr(struct inet_sock *);
   252  statfunc u32 get_inet_saddr(struct inet_sock *);
   253  statfunc u32 get_inet_daddr(struct inet_sock *);
   254  statfunc u16 get_inet_sport(struct inet_sock *);
   255  statfunc u16 get_inet_num(struct inet_sock *);
   256  statfunc u16 get_inet_dport(struct inet_sock *);
   257  statfunc struct sock *get_socket_sock(struct socket *);
   258  statfunc u16 get_sock_family(struct sock *);
   259  statfunc u16 get_sock_protocol(struct sock *);
   260  statfunc u16 get_sockaddr_family(struct sockaddr *);
   261  statfunc struct in6_addr get_sock_v6_rcv_saddr(struct sock *);
   262  statfunc struct in6_addr get_ipv6_pinfo_saddr(struct ipv6_pinfo *);
   263  statfunc struct in6_addr get_sock_v6_daddr(struct sock *);
   264  statfunc volatile unsigned char get_sock_state(struct sock *);
   265  statfunc struct ipv6_pinfo *get_inet_pinet6(struct inet_sock *);
   266  statfunc struct sockaddr_un get_unix_sock_addr(struct unix_sock *);
   267  statfunc int get_network_details_from_sock_v4(struct sock *, net_conn_v4_t *, int);
   268  statfunc struct ipv6_pinfo *inet6_sk_own_impl(struct sock *, struct inet_sock *);
   269  statfunc int get_network_details_from_sock_v6(struct sock *, net_conn_v6_t *, int);
   270  statfunc int get_local_sockaddr_in_from_network_details(struct sockaddr_in *, net_conn_v4_t *, u16);
   271  statfunc int get_remote_sockaddr_in_from_network_details(struct sockaddr_in *, net_conn_v4_t *, u16);
   272  statfunc int get_local_sockaddr_in6_from_network_details(struct sockaddr_in6 *, net_conn_v6_t *, u16);
   273  statfunc int get_remote_sockaddr_in6_from_network_details(struct sockaddr_in6 *, net_conn_v6_t *, u16);
   274  statfunc int get_local_net_id_from_network_details_v4(struct sock *, net_id_t *, net_conn_v4_t *, u16);
   275  statfunc int get_local_net_id_from_network_details_v6(struct sock *, net_id_t *, net_conn_v6_t *, u16);
   276  statfunc bool fill_tuple(struct sock *, tuple_t *);
   277  
   278  // clang-format on
   279  
   280  // FUNCTIONS
   281  
   282  //
   283  // Regular events related to network
   284  //
   285  
   286  statfunc u32 get_inet_rcv_saddr(struct inet_sock *inet)
   287  {
   288      return BPF_CORE_READ(inet, inet_rcv_saddr);
   289  }
   290  
   291  statfunc u32 get_inet_saddr(struct inet_sock *inet)
   292  {
   293      return BPF_CORE_READ(inet, inet_saddr);
   294  }
   295  
   296  statfunc u32 get_inet_daddr(struct inet_sock *inet)
   297  {
   298      return BPF_CORE_READ(inet, inet_daddr);
   299  }
   300  
   301  statfunc u16 get_inet_sport(struct inet_sock *inet)
   302  {
   303      return BPF_CORE_READ(inet, inet_sport);
   304  }
   305  
   306  statfunc u16 get_inet_num(struct inet_sock *inet)
   307  {
   308      return BPF_CORE_READ(inet, inet_num);
   309  }
   310  
   311  statfunc u16 get_inet_dport(struct inet_sock *inet)
   312  {
   313      return BPF_CORE_READ(inet, inet_dport);
   314  }
   315  
   316  statfunc struct sock *get_socket_sock(struct socket *socket)
   317  {
   318      return BPF_CORE_READ(socket, sk);
   319  }
   320  
   321  statfunc u16 get_sock_family(struct sock *sock)
   322  {
   323      return BPF_CORE_READ(sock, sk_family);
   324  }
   325  
   326  statfunc u16 get_sock_protocol(struct sock *sock)
   327  {
   328      u16 protocol = 0;
   329  
   330      // commit bf9765145b85 ("sock: Make sk_protocol a 16-bit value")
   331      struct sock___old *check = NULL;
   332      if (bpf_core_field_exists(check->__sk_flags_offset)) {
   333          check = (struct sock___old *) sock;
   334          bpf_core_read(&protocol, 1, (void *) (&check->sk_gso_max_segs) - 3);
   335      } else {
   336          protocol = BPF_CORE_READ(sock, sk_protocol);
   337      }
   338  
   339      return protocol;
   340  }
   341  
   342  statfunc u16 get_sockaddr_family(struct sockaddr *address)
   343  {
   344      return BPF_CORE_READ(address, sa_family);
   345  }
   346  
   347  statfunc struct in6_addr get_sock_v6_rcv_saddr(struct sock *sock)
   348  {
   349      return BPF_CORE_READ(sock, sk_v6_rcv_saddr);
   350  }
   351  
   352  statfunc struct in6_addr get_ipv6_pinfo_saddr(struct ipv6_pinfo *np)
   353  {
   354      return BPF_CORE_READ(np, saddr);
   355  }
   356  
   357  statfunc struct in6_addr get_sock_v6_daddr(struct sock *sock)
   358  {
   359      return BPF_CORE_READ(sock, sk_v6_daddr);
   360  }
   361  
   362  statfunc volatile unsigned char get_sock_state(struct sock *sock)
   363  {
   364      volatile unsigned char sk_state_own_impl;
   365      bpf_core_read(
   366          (void *) &sk_state_own_impl, sizeof(sk_state_own_impl), (const void *) &sock->sk_state);
   367      return sk_state_own_impl;
   368  }
   369  
   370  statfunc struct ipv6_pinfo *get_inet_pinet6(struct inet_sock *inet)
   371  {
   372      struct ipv6_pinfo *pinet6_own_impl;
   373      bpf_core_read(&pinet6_own_impl, sizeof(pinet6_own_impl), &inet->pinet6);
   374      return pinet6_own_impl;
   375  }
   376  
   377  statfunc struct sockaddr_un get_unix_sock_addr(struct unix_sock *sock)
   378  {
   379      struct unix_address *addr = BPF_CORE_READ(sock, addr);
   380      int len = BPF_CORE_READ(addr, len);
   381      struct sockaddr_un sockaddr = {};
   382      if (len <= sizeof(struct sockaddr_un)) {
   383          bpf_probe_read(&sockaddr, len, addr->name);
   384      }
   385      return sockaddr;
   386  }
   387  
   388  statfunc int get_network_details_from_sock_v4(struct sock *sk, net_conn_v4_t *net_details, int peer)
   389  {
   390      struct inet_sock *inet = inet_sk(sk);
   391  
   392      if (!peer) {
   393          net_details->local_address = get_inet_rcv_saddr(inet);
   394          net_details->local_port = bpf_ntohs(get_inet_num(inet));
   395          net_details->remote_address = get_inet_daddr(inet);
   396          net_details->remote_port = get_inet_dport(inet);
   397      } else {
   398          net_details->remote_address = get_inet_rcv_saddr(inet);
   399          net_details->remote_port = bpf_ntohs(get_inet_num(inet));
   400          net_details->local_address = get_inet_daddr(inet);
   401          net_details->local_port = get_inet_dport(inet);
   402      }
   403  
   404      return 0;
   405  }
   406  
   407  statfunc struct ipv6_pinfo *inet6_sk_own_impl(struct sock *__sk, struct inet_sock *inet)
   408  {
   409      volatile unsigned char sk_state_own_impl;
   410      sk_state_own_impl = get_sock_state(__sk);
   411  
   412      struct ipv6_pinfo *pinet6_own_impl;
   413      pinet6_own_impl = get_inet_pinet6(inet);
   414  
   415      bool sk_fullsock = (1 << sk_state_own_impl) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
   416      return sk_fullsock ? pinet6_own_impl : NULL;
   417  }
   418  
   419  statfunc int get_network_details_from_sock_v6(struct sock *sk, net_conn_v6_t *net_details, int peer)
   420  {
   421      // inspired by 'inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer)'
   422      // reference: https://elixir.bootlin.com/linux/latest/source/net/ipv6/af_inet6.c#L509
   423  
   424      struct inet_sock *inet = inet_sk(sk);
   425      struct ipv6_pinfo *np = inet6_sk_own_impl(sk, inet);
   426  
   427      struct in6_addr addr = {};
   428      addr = get_sock_v6_rcv_saddr(sk);
   429      if (ipv6_addr_any(&addr)) {
   430          addr = get_ipv6_pinfo_saddr(np);
   431      }
   432  
   433      // the flowinfo field can be specified by the user to indicate a network flow. how it is used by
   434      // the kernel, or whether it is enforced to be unique is not so obvious.  getting this value is
   435      // only supported by the kernel for outgoing packets using the 'struct ipv6_pinfo'.  in any
   436      // case, leaving it with value of 0 won't affect our representation of network flows.
   437      net_details->flowinfo = 0;
   438  
   439      // the scope_id field can be specified by the user to indicate the network interface from which
   440      // to send a packet. this only applies for link-local addresses, and is used only by the local
   441      // kernel.  getting this value is done by using the 'ipv6_iface_scope_id(const struct in6_addr
   442      // *addr, int iface)' function.  in any case, leaving it with value of 0 won't affect our
   443      // representation of network flows.
   444      net_details->scope_id = 0;
   445  
   446      if (peer) {
   447          net_details->local_address = get_sock_v6_daddr(sk);
   448          net_details->local_port = get_inet_dport(inet);
   449          net_details->remote_address = addr;
   450          net_details->remote_port = get_inet_sport(inet);
   451      } else {
   452          net_details->local_address = addr;
   453          net_details->local_port = get_inet_sport(inet);
   454          net_details->remote_address = get_sock_v6_daddr(sk);
   455          net_details->remote_port = get_inet_dport(inet);
   456      }
   457  
   458      return 0;
   459  }
   460  
   461  statfunc int get_local_sockaddr_in_from_network_details(struct sockaddr_in *addr,
   462                                                          net_conn_v4_t *net_details,
   463                                                          u16 family)
   464  {
   465      addr->sin_family = family;
   466      addr->sin_port = net_details->local_port;
   467      addr->sin_addr.s_addr = net_details->local_address;
   468  
   469      return 0;
   470  }
   471  
   472  statfunc int get_remote_sockaddr_in_from_network_details(struct sockaddr_in *addr,
   473                                                           net_conn_v4_t *net_details,
   474                                                           u16 family)
   475  {
   476      addr->sin_family = family;
   477      addr->sin_port = net_details->remote_port;
   478      addr->sin_addr.s_addr = net_details->remote_address;
   479  
   480      return 0;
   481  }
   482  
   483  statfunc int get_local_sockaddr_in6_from_network_details(struct sockaddr_in6 *addr,
   484                                                           net_conn_v6_t *net_details,
   485                                                           u16 family)
   486  {
   487      addr->sin6_family = family;
   488      addr->sin6_port = net_details->local_port;
   489      addr->sin6_flowinfo = net_details->flowinfo;
   490      addr->sin6_addr = net_details->local_address;
   491      addr->sin6_scope_id = net_details->scope_id;
   492  
   493      return 0;
   494  }
   495  
   496  statfunc int get_remote_sockaddr_in6_from_network_details(struct sockaddr_in6 *addr,
   497                                                            net_conn_v6_t *net_details,
   498                                                            u16 family)
   499  {
   500      addr->sin6_family = family;
   501      addr->sin6_port = net_details->remote_port;
   502      addr->sin6_flowinfo = net_details->flowinfo;
   503      addr->sin6_addr = net_details->remote_address;
   504      addr->sin6_scope_id = net_details->scope_id;
   505  
   506      return 0;
   507  }
   508  
   509  statfunc int get_local_net_id_from_network_details_v4(struct sock *sk,
   510                                                        net_id_t *connect_id,
   511                                                        net_conn_v4_t *net_details,
   512                                                        u16 family)
   513  {
   514      connect_id->address.s6_addr32[3] = net_details->local_address;
   515      connect_id->address.s6_addr16[5] = 0xffff;
   516      connect_id->port = net_details->local_port;
   517      connect_id->protocol = get_sock_protocol(sk);
   518  
   519      return 0;
   520  }
   521  
   522  statfunc int get_local_net_id_from_network_details_v6(struct sock *sk,
   523                                                        net_id_t *connect_id,
   524                                                        net_conn_v6_t *net_details,
   525                                                        u16 family)
   526  {
   527      connect_id->address = net_details->local_address;
   528      connect_id->port = net_details->local_port;
   529      connect_id->protocol = get_sock_protocol(sk);
   530  
   531      return 0;
   532  }
   533  
   534  statfunc bool fill_tuple(struct sock *sk, tuple_t *tuple)
   535  {
   536      u16 family = BPF_CORE_READ(sk, __sk_common.skc_family);
   537      tuple->family = family;
   538  
   539  	switch (family) {
   540  	case AF_INET:
   541  		BPF_CORE_READ_INTO(&tuple->saddr.v4addr, sk, __sk_common.skc_rcv_saddr);
   542  		if (tuple->saddr.v4addr == 0)
   543  			return false;
   544  
   545  		BPF_CORE_READ_INTO(&tuple->daddr.v4addr, sk, __sk_common.skc_daddr);
   546  		if (tuple->daddr.v4addr == 0)
   547  			return false;
   548  
   549  		break;
   550  	case AF_INET6:
   551  		BPF_CORE_READ_INTO(&tuple->saddr.v6addr, sk, __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
   552  		if (tuple->saddr.v6addr == 0)
   553  			return false;
   554  		BPF_CORE_READ_INTO(&tuple->daddr.v6addr, sk, __sk_common.skc_v6_daddr.in6_u.u6_addr32);
   555  		if (tuple->daddr.v6addr == 0)
   556  			return false;
   557  
   558  		break;
   559  
   560  	default:
   561  		return false;
   562  	}
   563  
   564  	//BPF_CORE_READ_INTO(&tuple->sport, sockp, inet_sport);
   565  	BPF_CORE_READ_INTO(&tuple->sport, sk, __sk_common.skc_num);
   566  	if (tuple->sport == 0)
   567  	    return false;
   568  
   569      BPF_CORE_READ_INTO(&tuple->dport, sk, __sk_common.skc_dport);
   570      if (tuple->dport == 0)
   571          return false;
   572      tuple->dport = bpf_ntohs(tuple->dport);
   573  
   574  	return true;
   575  }
   576  
   577  #endif