github.com/cilium/cilium@v1.16.2/bpf/lib/fib.h (about)

     1  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
     2  /* Copyright Authors of Cilium */
     3  
     4  #pragma once
     5  
     6  #include <bpf/ctx/ctx.h>
     7  #include <bpf/api.h>
     8  
     9  #include "common.h"
    10  #include "neigh.h"
    11  #include "l3.h"
    12  
    13  static __always_inline int
    14  maybe_add_l2_hdr(struct __ctx_buff *ctx __maybe_unused,
    15  		 __u32 ifindex __maybe_unused,
    16  		 bool *l2_hdr_required __maybe_unused)
    17  {
    18  	if (IS_L3_DEV(ifindex)) {
    19  		/* The packet is going to be redirected to L3 dev, so
    20  		 * skip L2 addr settings.
    21  		 */
    22  		*l2_hdr_required = false;
    23  	} else if (ETH_HLEN == 0) {
    24  		/* The packet is going to be redirected from L3 to L2
    25  		 * device, so we need to create L2 header first.
    26  		 */
    27  		__u16 proto = ctx_get_protocol(ctx);
    28  
    29  		if (ctx_change_head(ctx, __ETH_HLEN, 0))
    30  			return DROP_INVALID;
    31  		if (eth_store_proto(ctx, proto, 0) < 0)
    32  			return DROP_WRITE_ERROR;
    33  	}
    34  	return 0;
    35  }
    36  
    37  static __always_inline bool fib_ok(int ret)
    38  {
    39  	return likely(ret == CTX_ACT_TX || ret == CTX_ACT_REDIRECT);
    40  }
    41  
    42   /* fib_do_redirect will redirect the ctx to a particular output interface.
    43    * @arg ctx			packet
    44    * @arg needs_l2_check		check for L3 -> L2 redirect
    45    * @arg fib_params		FIB lookup parameters
    46    * @arg allow_neigh_map	fallback to neighbour map for DMAC
    47    * @arg fib_ret		result of a preceding FIB lookup
    48    * @arg oif			egress interface index
    49    *
    50    * Returns:
    51    *   - result of BPF redirect
    52    *   - DROP_NO_FIB when DMAC couldn't be resolved
    53    *   - other DROP reasons
    54    * the redirect can occur with or without a previous call to fib_lookup.
    55    *
    56    * if a previous fib_lookup was performed, this function will attempt to redirect
    57    * to the output interface in the provided 'fib_params', as long as 'fib_ret'
    58    * is set to 'BPF_FIB_LKUP_RET_SUCCESS'
    59    *
    60    * if a previous fib_lookup was performed and the return was 'BPF_FIB_LKUP_NO_NEIGH'
    61    * this function will then attempt to copy the af_family and destination address
    62    * out of 'fib_params' and into 'redir_neigh' struct then perform a
    63    * 'redirect_neigh'.
    64    *
    65    * if no previous fib_lookup was performed, and the desire is to simply use
    66    * 'redirect_neigh' then set 'fib_params' to nil and 'fib_ret' to
    67    * 'BPF_FIB_LKUP_RET_NO_NEIGH'.
    68    * in this case, the 'oif' value will be used for the 'redirect_neigh' call.
    69    *
    70    * in a special case, if a previous fib_lookup was performed, and the return
    71    * was 'BPF_FIB_LKUP_RET_NO_NEIGH', and we are on a kernel version where
    72    * the target interface for the fib lookup is not returned
    73    * (due to ARP failing, see Kernel commit d1c362e1dd68) the provided 'oif'
    74    * will be used as output interface for redirect.
    75    */
    76  static __always_inline int
    77  fib_do_redirect(struct __ctx_buff *ctx, const bool needs_l2_check,
    78  		const struct bpf_fib_lookup_padded *fib_params,
    79  		bool allow_neigh_map, __s8 *fib_ret, int *oif)
    80  {
    81  	/* sanity check, we only enter this function with these two fib lookup
    82  	 * return codes.
    83  	 */
    84  	if (*fib_ret && (*fib_ret != BPF_FIB_LKUP_RET_NO_NEIGH))
    85  		return DROP_NO_FIB;
    86  
    87  	/* determine which oif to use before needs_l2_check determines if layer 2
    88  	 * header needs to be pushed.
    89  	 */
    90  	if (fib_params) {
    91  		if (*fib_ret == BPF_FIB_LKUP_RET_NO_NEIGH &&
    92  		    !is_defined(HAVE_FIB_IFINDEX) && *oif) {
    93  			/* For kernels without d1c362e1dd68 ("bpf: Always
    94  			 * return target ifindex in bpf_fib_lookup") we
    95  			 * fall back to use the caller-provided oif when
    96  			 * necessary.
    97  			 * no-op
    98  			 */
    99  		} else {
   100  			*oif = fib_params->l.ifindex;
   101  		}
   102  	}
   103  
   104  	/* determine if we need to append layer 2 header */
   105  	if (needs_l2_check) {
   106  		bool l2_hdr_required = true;
   107  		int ret;
   108  
   109  		ret = maybe_add_l2_hdr(ctx, *oif, &l2_hdr_required);
   110  		if (ret != 0)
   111  			return ret;
   112  		if (!l2_hdr_required)
   113  			goto out_send;
   114  	}
   115  
   116  	/* determine if we are performing redirect or redirect_neigh*/
   117  	switch (*fib_ret) {
   118  	case BPF_FIB_LKUP_RET_SUCCESS:
   119  		if (eth_store_daddr(ctx, fib_params->l.dmac, 0) < 0)
   120  			return DROP_WRITE_ERROR;
   121  		if (eth_store_saddr(ctx, fib_params->l.smac, 0) < 0)
   122  			return DROP_WRITE_ERROR;
   123  		break;
   124  	case BPF_FIB_LKUP_RET_NO_NEIGH:
   125  		/* If we are able to resolve neighbors on demand, always
   126  		 * prefer that over the BPF neighbor map since the latter
   127  		 * might be less accurate in some asymmetric corner cases.
   128  		 */
   129  		if (neigh_resolver_available()) {
   130  			if (fib_params) {
   131  				struct bpf_redir_neigh nh_params;
   132  
   133  				nh_params.nh_family = fib_params->l.family;
   134  				__bpf_memcpy_builtin(&nh_params.ipv6_nh,
   135  						     &fib_params->l.ipv6_dst,
   136  						     sizeof(nh_params.ipv6_nh));
   137  
   138  				return redirect_neigh(*oif, &nh_params,
   139  						sizeof(nh_params), 0);
   140  			}
   141  
   142  			return redirect_neigh(*oif, NULL, 0, 0);
   143  		} else {
   144  			union macaddr smac = NATIVE_DEV_MAC_BY_IFINDEX(*oif);
   145  			union macaddr *dmac = NULL;
   146  
   147  			if (allow_neigh_map) {
   148  				/* The neigh_record_ip{4,6} locations are mainly from
   149  				 * inbound client traffic on the load-balancer where we
   150  				 * know that replies need to go back to them.
   151  				 */
   152  				dmac = fib_params->l.family == AF_INET ?
   153  					neigh_lookup_ip4(&fib_params->l.ipv4_dst) :
   154  					neigh_lookup_ip6((void *)&fib_params->l.ipv6_dst);
   155  			}
   156  
   157  			if (!dmac) {
   158  				*fib_ret = BPF_FIB_MAP_NO_NEIGH;
   159  				return DROP_NO_FIB;
   160  			}
   161  			if (eth_store_daddr_aligned(ctx, dmac->addr, 0) < 0)
   162  				return DROP_WRITE_ERROR;
   163  			if (eth_store_saddr_aligned(ctx, smac.addr, 0) < 0)
   164  				return DROP_WRITE_ERROR;
   165  		}
   166  	};
   167  out_send:
   168  	return ctx_redirect(ctx, *oif, 0);
   169  }
   170  
   171  static __always_inline int
   172  fib_redirect(struct __ctx_buff *ctx, const bool needs_l2_check,
   173  	     struct bpf_fib_lookup_padded *fib_params __maybe_unused,
   174  	     bool use_neigh_map, __s8 *fib_err __maybe_unused, int *oif)
   175  {
   176  #ifdef ENABLE_SKIP_FIB
   177  	*oif = DIRECT_ROUTING_DEV_IFINDEX;
   178  #endif
   179  
   180  	if (!is_defined(ENABLE_SKIP_FIB) || !neigh_resolver_available()) {
   181  		int ret;
   182  
   183  		ret = fib_lookup(ctx, &fib_params->l, sizeof(fib_params->l), 0);
   184  		*fib_err = (__s8)ret;
   185  
   186  		return fib_do_redirect(ctx, needs_l2_check, fib_params, use_neigh_map,
   187  				       fib_err, oif);
   188  	}
   189  
   190  	*fib_err = BPF_FIB_LKUP_RET_NO_NEIGH;
   191  
   192  	return fib_do_redirect(ctx, needs_l2_check, NULL, use_neigh_map,
   193  			       fib_err, oif);
   194  }
   195  
   196  #ifdef ENABLE_IPV6
   197  /* fib_lookup_v6 will perform a fib lookup with the src and dest addresses
   198   * provided.
   199   *
   200   * after the function returns 'fib_params' will have the results of the fib lookup
   201   * if successful.
   202   */
   203  static __always_inline int
   204  fib_lookup_v6(struct __ctx_buff *ctx, struct bpf_fib_lookup_padded *fib_params,
   205  	      const struct in6_addr *ipv6_src, const struct in6_addr *ipv6_dst,
   206  	      int flags)
   207  {
   208  	fib_params->l.family	= AF_INET6;
   209  	fib_params->l.ifindex	= ctx_get_ifindex(ctx);
   210  
   211  	ipv6_addr_copy((union v6addr *)&fib_params->l.ipv6_src,
   212  		       (union v6addr *)ipv6_src);
   213  	ipv6_addr_copy((union v6addr *)&fib_params->l.ipv6_dst,
   214  		       (union v6addr *)ipv6_dst);
   215  
   216  	return fib_lookup(ctx, &fib_params->l, sizeof(fib_params->l), flags);
   217  };
   218  
   219  static __always_inline int
   220  fib_redirect_v6(struct __ctx_buff *ctx, int l3_off,
   221  		struct ipv6hdr *ip6 __maybe_unused, const bool needs_l2_check,
   222  		bool allow_neigh_map, __s8 *fib_err __maybe_unused, int *oif)
   223  {
   224  	struct bpf_fib_lookup_padded fib_params __maybe_unused = {0};
   225  	int ret;
   226  
   227  #ifdef ENABLE_SKIP_FIB
   228  	*oif = DIRECT_ROUTING_DEV_IFINDEX;
   229  #endif
   230  
   231  	if (!is_defined(ENABLE_SKIP_FIB) || !neigh_resolver_available()) {
   232  		ret = fib_lookup_v6(ctx, &fib_params, &ip6->saddr, &ip6->daddr, 0);
   233  		*fib_err = (__s8)ret;
   234  
   235  		ret = ipv6_l3(ctx, l3_off, NULL, NULL, METRIC_EGRESS);
   236  		if (unlikely(ret != CTX_ACT_OK))
   237  			return ret;
   238  
   239  		return fib_do_redirect(ctx, needs_l2_check, &fib_params, allow_neigh_map,
   240  				       fib_err, oif);
   241  	}
   242  
   243  	ret = ipv6_l3(ctx, l3_off, NULL, NULL, METRIC_EGRESS);
   244  	if (unlikely(ret != CTX_ACT_OK))
   245  		return ret;
   246  
   247  	*fib_err = BPF_FIB_LKUP_RET_NO_NEIGH;
   248  
   249  	return fib_do_redirect(ctx, needs_l2_check, NULL, allow_neigh_map,
   250  			       fib_err, oif);
   251  }
   252  #endif /* ENABLE_IPV6 */
   253  
   254  #ifdef ENABLE_IPV4
   255  /* fib_lookup_v4 will perform a fib lookup with the src and dest addresses
   256   * provided.
   257   *
   258   * after the function returns 'fib_params' will have the results of the fib lookup
   259   * if successful.
   260   */
   261  static __always_inline int
   262  fib_lookup_v4(struct __ctx_buff *ctx, struct bpf_fib_lookup_padded *fib_params,
   263  	      __be32 ipv4_src, __be32 ipv4_dst, int flags) {
   264  	fib_params->l.family	= AF_INET;
   265  	fib_params->l.ifindex	= ctx_get_ifindex(ctx);
   266  	fib_params->l.ipv4_src	= ipv4_src;
   267  	fib_params->l.ipv4_dst	= ipv4_dst;
   268  
   269  	return fib_lookup(ctx, &fib_params->l, sizeof(fib_params->l), flags);
   270  }
   271  
   272  static __always_inline int
   273  fib_redirect_v4(struct __ctx_buff *ctx, int l3_off,
   274  		struct iphdr *ip4 __maybe_unused, const bool needs_l2_check,
   275  		bool allow_neigh_map, __s8 *fib_err __maybe_unused, int *oif)
   276  {
   277  	struct bpf_fib_lookup_padded fib_params __maybe_unused = {0};
   278  	int ret;
   279  
   280  #ifdef ENABLE_SKIP_FIB
   281  	*oif = DIRECT_ROUTING_DEV_IFINDEX;
   282  #endif
   283  
   284  	if (!is_defined(ENABLE_SKIP_FIB) || !neigh_resolver_available()) {
   285  		ret = fib_lookup_v4(ctx, &fib_params, ip4->saddr, ip4->daddr, 0);
   286  		*fib_err = (__s8)ret;
   287  
   288  		ret = ipv4_l3(ctx, l3_off, NULL, NULL, ip4);
   289  		if (unlikely(ret != CTX_ACT_OK))
   290  			return ret;
   291  
   292  		return fib_do_redirect(ctx, needs_l2_check, &fib_params, allow_neigh_map,
   293  				       fib_err, oif);
   294  	}
   295  
   296  	ret = ipv4_l3(ctx, l3_off, NULL, NULL, ip4);
   297  	if (unlikely(ret != CTX_ACT_OK))
   298  		return ret;
   299  
   300  	*fib_err = BPF_FIB_LKUP_RET_NO_NEIGH;
   301  
   302  	return fib_do_redirect(ctx, needs_l2_check, NULL, allow_neigh_map,
   303  			       fib_err, oif);
   304  }
   305  #endif /* ENABLE_IPV4 */