github.com/cilium/cilium@v1.16.2/bpf/bpf_xdp.c (about)

     1  // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
     2  /* Copyright Authors of Cilium */
     3  
     4  #include <bpf/ctx/xdp.h>
     5  #include <bpf/api.h>
     6  
     7  #include <node_config.h>
     8  #include <netdev_config.h>
     9  #include <filter_config.h>
    10  
    11  #define SKIP_POLICY_MAP 1
    12  
    13  /* Controls the inclusion of the CILIUM_CALL_HANDLE_ICMP6_NS section in the
    14   * bpf_lxc object file.
    15   */
    16  #define SKIP_ICMPV6_NS_HANDLING
    17  
    18  /* Controls the inclusion of the CILIUM_CALL_SEND_ICMP6_TIME_EXCEEDED section
    19   * in the bpf_lxc object file. This is needed for all callers of
    20   * ipv6_local_delivery, which calls into the IPv6 L3 handling.
    21   */
    22  #define SKIP_ICMPV6_HOPLIMIT_HANDLING
    23  
    24  /* Controls the inclusion of the CILIUM_CALL_SRV6 section in the object file.
    25   */
    26  #define SKIP_SRV6_HANDLING
    27  
    28  /* The XDP datapath does not take care of health probes from the local node,
    29   * thus do not compile it in.
    30   */
    31  #undef ENABLE_HEALTH_CHECK
    32  
    33  #include "lib/common.h"
    34  #include "lib/maps.h"
    35  #include "lib/eps.h"
    36  #include "lib/events.h"
    37  #include "lib/nodeport.h"
    38  
    39  #ifdef ENABLE_PREFILTER
    40  #ifdef CIDR4_FILTER
    41  struct {
    42  	__uint(type, BPF_MAP_TYPE_HASH);
    43  	__type(key, struct lpm_v4_key);
    44  	__type(value, struct lpm_val);
    45  	__uint(pinning, LIBBPF_PIN_BY_NAME);
    46  	__uint(max_entries, CIDR4_HMAP_ELEMS);
    47  	__uint(map_flags, BPF_F_NO_PREALLOC);
    48  } CIDR4_HMAP_NAME __section_maps_btf;
    49  
    50  #ifdef CIDR4_LPM_PREFILTER
    51  struct {
    52  	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
    53  	__type(key, struct lpm_v4_key);
    54  	__type(value, struct lpm_val);
    55  	__uint(pinning, LIBBPF_PIN_BY_NAME);
    56  	__uint(max_entries, CIDR4_LMAP_ELEMS);
    57  	__uint(map_flags, BPF_F_NO_PREALLOC);
    58  } CIDR4_LMAP_NAME __section_maps_btf;
    59  
    60  #endif /* CIDR4_LPM_PREFILTER */
    61  #endif /* CIDR4_FILTER */
    62  
    63  #ifdef CIDR6_FILTER
    64  struct {
    65  	__uint(type, BPF_MAP_TYPE_HASH);
    66  	__type(key, struct lpm_v6_key);
    67  	__type(value, struct lpm_val);
    68  	__uint(pinning, LIBBPF_PIN_BY_NAME);
    69  	__uint(max_entries, CIDR4_HMAP_ELEMS);
    70  	__uint(map_flags, BPF_F_NO_PREALLOC);
    71  } CIDR6_HMAP_NAME __section_maps_btf;
    72  
    73  #ifdef CIDR6_LPM_PREFILTER
    74  struct {
    75  	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
    76  	__type(key, struct lpm_v6_key);
    77  	__type(value, struct lpm_val);
    78  	__uint(pinning, LIBBPF_PIN_BY_NAME);
    79  	__uint(max_entries, CIDR4_LMAP_ELEMS);
    80  	__uint(map_flags, BPF_F_NO_PREALLOC);
    81  } CIDR6_LMAP_NAME __section_maps_btf;
    82  #endif /* CIDR6_LPM_PREFILTER */
    83  #endif /* CIDR6_FILTER */
    84  #endif /* ENABLE_PREFILTER */
    85  
    86  static __always_inline __maybe_unused int
    87  bpf_xdp_exit(struct __ctx_buff *ctx, const int verdict)
    88  {
    89  	if (verdict == CTX_ACT_OK)
    90  		ctx_move_xfer(ctx);
    91  
    92  	return verdict;
    93  }
    94  
    95  #ifdef ENABLE_IPV4
    96  #ifdef ENABLE_NODEPORT_ACCELERATION
    97  __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV4_FROM_NETDEV)
    98  int tail_lb_ipv4(struct __ctx_buff *ctx)
    99  {
   100  	int ret = CTX_ACT_OK;
   101  	__s8 ext_err = 0;
   102  
   103  	if (!ctx_skip_nodeport(ctx)) {
   104  		int l3_off = ETH_HLEN;
   105  		void *data, *data_end;
   106  		struct iphdr *ip4;
   107  		bool __maybe_unused is_dsr = false;
   108  
   109  		if (!revalidate_data(ctx, &data, &data_end, &ip4)) {
   110  			ret = DROP_INVALID;
   111  			goto out;
   112  		}
   113  
   114  #if defined(ENABLE_DSR) && !defined(ENABLE_DSR_HYBRID) && DSR_ENCAP_MODE == DSR_ENCAP_GENEVE
   115  		{
   116  			int l4_off, inner_l2_off;
   117  			struct genevehdr geneve;
   118  			__sum16	udp_csum;
   119  			__be16 dport;
   120  			__u16 proto;
   121  
   122  			if (ip4->protocol != IPPROTO_UDP)
   123  				goto no_encap;
   124  
   125  			/* Punt packets with IP options to TC */
   126  			if (ipv4_hdrlen(ip4) != sizeof(*ip4))
   127  				goto no_encap;
   128  
   129  			l4_off = l3_off + sizeof(*ip4);
   130  
   131  			if (l4_load_port(ctx, l4_off + UDP_DPORT_OFF, &dport) < 0) {
   132  				ret = DROP_INVALID;
   133  				goto out;
   134  			}
   135  
   136  			if (dport != bpf_htons(TUNNEL_PORT))
   137  				goto no_encap;
   138  
   139  			/* Cilium uses BPF_F_ZERO_CSUM_TX for its tunnel traffic.
   140  			 *
   141  			 * Adding LB support for checksummed packets would require
   142  			 * that we adjust udp->check
   143  			 * 1.	after DNAT of the inner packet,
   144  			 * 2.	after re-writing the outer headers and inserting
   145  			 *	the DSR option
   146  			 */
   147  			if (ctx_load_bytes(ctx, l4_off + offsetof(struct udphdr, check),
   148  					   &udp_csum, sizeof(udp_csum)) < 0) {
   149  				ret = DROP_INVALID;
   150  				goto out;
   151  			}
   152  
   153  			if (udp_csum != 0)
   154  				goto no_encap;
   155  
   156  			if (ctx_load_bytes(ctx, l4_off + sizeof(struct udphdr), &geneve,
   157  					   sizeof(geneve)) < 0) {
   158  				ret = DROP_INVALID;
   159  				goto out;
   160  			}
   161  
   162  			if (geneve.protocol_type != bpf_htons(ETH_P_TEB))
   163  				goto no_encap;
   164  
   165  			/* Punt packets with GENEVE options to TC */
   166  			if (geneve.opt_len)
   167  				goto no_encap;
   168  
   169  			inner_l2_off = l4_off + sizeof(struct udphdr) + sizeof(struct genevehdr);
   170  
   171  			/* point at the inner L3 header: */
   172  			if (!validate_ethertype_l2_off(ctx, inner_l2_off, &proto))
   173  				goto no_encap;
   174  
   175  			if (proto != bpf_htons(ETH_P_IP))
   176  				goto no_encap;
   177  
   178  			l3_off = inner_l2_off + ETH_HLEN;
   179  
   180  			if (!revalidate_data_l3_off(ctx, &data, &data_end, &ip4, l3_off)) {
   181  				ret = DROP_INVALID;
   182  				goto out;
   183  			}
   184  		}
   185  no_encap:
   186  #endif /* ENABLE_DSR && !ENABLE_DSR_HYBRID && DSR_ENCAP_MODE == DSR_ENCAP_GENEVE */
   187  
   188  		ret = nodeport_lb4(ctx, ip4, l3_off, 0, &ext_err, &is_dsr);
   189  		if (ret == NAT_46X64_RECIRC)
   190  			ret = tail_call_internal(ctx, CILIUM_CALL_IPV6_FROM_NETDEV,
   191  						 &ext_err);
   192  	}
   193  
   194  out:
   195  	if (IS_ERR(ret))
   196  		return send_drop_notify_error_ext(ctx, UNKNOWN_ID, ret, ext_err,
   197  						  CTX_ACT_DROP, METRIC_INGRESS);
   198  
   199  	return bpf_xdp_exit(ctx, ret);
   200  }
   201  
   202  static __always_inline int check_v4_lb(struct __ctx_buff *ctx)
   203  {
   204  	__s8 ext_err = 0;
   205  	int ret;
   206  
   207  	ret = tail_call_internal(ctx, CILIUM_CALL_IPV4_FROM_NETDEV, &ext_err);
   208  	return send_drop_notify_error_ext(ctx, UNKNOWN_ID, ret, ext_err, CTX_ACT_DROP,
   209  					  METRIC_INGRESS);
   210  }
   211  #else
   212  static __always_inline int check_v4_lb(struct __ctx_buff *ctx __maybe_unused)
   213  {
   214  	return CTX_ACT_OK;
   215  }
   216  #endif /* ENABLE_NODEPORT_ACCELERATION */
   217  
   218  #ifdef ENABLE_PREFILTER
   219  static __always_inline int check_v4(struct __ctx_buff *ctx)
   220  {
   221  	void *data_end = ctx_data_end(ctx);
   222  	void *data = ctx_data(ctx);
   223  	struct iphdr *ipv4_hdr = data + sizeof(struct ethhdr);
   224  	struct lpm_v4_key pfx __maybe_unused;
   225  
   226  	if (ctx_no_room(ipv4_hdr + 1, data_end))
   227  		return CTX_ACT_DROP;
   228  
   229  #ifdef CIDR4_FILTER
   230  	memcpy(pfx.lpm.data, &ipv4_hdr->saddr, sizeof(pfx.addr));
   231  	pfx.lpm.prefixlen = 32;
   232  
   233  #ifdef CIDR4_LPM_PREFILTER
   234  	if (map_lookup_elem(&CIDR4_LMAP_NAME, &pfx))
   235  		return CTX_ACT_DROP;
   236  #endif /* CIDR4_LPM_PREFILTER */
   237  	return map_lookup_elem(&CIDR4_HMAP_NAME, &pfx) ?
   238  		CTX_ACT_DROP : check_v4_lb(ctx);
   239  #else
   240  	return check_v4_lb(ctx);
   241  #endif /* CIDR4_FILTER */
   242  }
   243  #else
   244  static __always_inline int check_v4(struct __ctx_buff *ctx)
   245  {
   246  	return check_v4_lb(ctx);
   247  }
   248  #endif /* ENABLE_PREFILTER */
   249  #endif /* ENABLE_IPV4 */
   250  
   251  #ifdef ENABLE_IPV6
   252  #ifdef ENABLE_NODEPORT_ACCELERATION
   253  __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_FROM_NETDEV)
   254  int tail_lb_ipv6(struct __ctx_buff *ctx)
   255  {
   256  	int ret = CTX_ACT_OK;
   257  	__s8 ext_err = 0;
   258  
   259  	if (!ctx_skip_nodeport(ctx)) {
   260  		void *data, *data_end;
   261  		struct ipv6hdr *ip6;
   262  		bool is_dsr = false;
   263  
   264  		if (!revalidate_data(ctx, &data, &data_end, &ip6)) {
   265  			ret = DROP_INVALID;
   266  			goto drop_err;
   267  		}
   268  
   269  		ret = nodeport_lb6(ctx, ip6, 0, &ext_err, &is_dsr);
   270  		if (IS_ERR(ret))
   271  			goto drop_err;
   272  	}
   273  
   274  	return bpf_xdp_exit(ctx, ret);
   275  
   276  drop_err:
   277  	return send_drop_notify_error_ext(ctx, UNKNOWN_ID, ret, ext_err,
   278  					  CTX_ACT_DROP, METRIC_INGRESS);
   279  }
   280  
   281  static __always_inline int check_v6_lb(struct __ctx_buff *ctx)
   282  {
   283  	__s8 ext_err = 0;
   284  	int ret;
   285  
   286  	ret = tail_call_internal(ctx, CILIUM_CALL_IPV6_FROM_NETDEV, &ext_err);
   287  	return send_drop_notify_error_ext(ctx, UNKNOWN_ID, ret, ext_err, CTX_ACT_DROP,
   288  					  METRIC_INGRESS);
   289  }
   290  #else
   291  static __always_inline int check_v6_lb(struct __ctx_buff *ctx __maybe_unused)
   292  {
   293  	return CTX_ACT_OK;
   294  }
   295  #endif /* ENABLE_NODEPORT_ACCELERATION */
   296  
   297  #ifdef ENABLE_PREFILTER
   298  static __always_inline int check_v6(struct __ctx_buff *ctx)
   299  {
   300  	void *data_end = ctx_data_end(ctx);
   301  	void *data = ctx_data(ctx);
   302  	struct ipv6hdr *ipv6_hdr = data + sizeof(struct ethhdr);
   303  	struct lpm_v6_key pfx __maybe_unused;
   304  
   305  	if (ctx_no_room(ipv6_hdr + 1, data_end))
   306  		return CTX_ACT_DROP;
   307  
   308  #ifdef CIDR6_FILTER
   309  	__bpf_memcpy_builtin(pfx.lpm.data, &ipv6_hdr->saddr, sizeof(pfx.addr));
   310  	pfx.lpm.prefixlen = 128;
   311  
   312  #ifdef CIDR6_LPM_PREFILTER
   313  	if (map_lookup_elem(&CIDR6_LMAP_NAME, &pfx))
   314  		return CTX_ACT_DROP;
   315  #endif /* CIDR6_LPM_PREFILTER */
   316  	return map_lookup_elem(&CIDR6_HMAP_NAME, &pfx) ?
   317  		CTX_ACT_DROP : check_v6_lb(ctx);
   318  #else
   319  	return check_v6_lb(ctx);
   320  #endif /* CIDR6_FILTER */
   321  }
   322  #else
   323  static __always_inline int check_v6(struct __ctx_buff *ctx)
   324  {
   325  	return check_v6_lb(ctx);
   326  }
   327  #endif /* ENABLE_PREFILTER */
   328  #endif /* ENABLE_IPV6 */
   329  
   330  static __always_inline int check_filters(struct __ctx_buff *ctx)
   331  {
   332  	int ret = CTX_ACT_OK;
   333  	__u16 proto;
   334  
   335  	if (!validate_ethertype(ctx, &proto))
   336  		return CTX_ACT_OK;
   337  
   338  	ctx_store_meta(ctx, XFER_MARKER, 0);
   339  	ctx_skip_nodeport_clear(ctx);
   340  
   341  	switch (proto) {
   342  #ifdef ENABLE_IPV4
   343  	case bpf_htons(ETH_P_IP):
   344  		ret = check_v4(ctx);
   345  		break;
   346  #endif /* ENABLE_IPV4 */
   347  #ifdef ENABLE_IPV6
   348  	case bpf_htons(ETH_P_IPV6):
   349  		ret = check_v6(ctx);
   350  		break;
   351  #endif /* ENABLE_IPV6 */
   352  	default:
   353  		break;
   354  	}
   355  
   356  	return bpf_xdp_exit(ctx, ret);
   357  }
   358  
   359  __section_entry
   360  int cil_xdp_entry(struct __ctx_buff *ctx)
   361  {
   362  	return check_filters(ctx);
   363  }
   364  
   365  BPF_LICENSE("Dual BSD/GPL");