github.com/cilium/cilium@v1.16.2/bpf/lib/encap.h (about)

     1  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
     2  /* Copyright Authors of Cilium */
     3  
     4  #pragma once
     5  
     6  #include "common.h"
     7  #include "dbg.h"
     8  #include "hash.h"
     9  #include "trace.h"
    10  
    11  #if __ctx_is == __ctx_skb
    12  #include "encrypt.h"
    13  #endif /* __ctx_is == __ctx_skb */
    14  
    15  #include "high_scale_ipcache.h"
    16  
    17  #ifdef HAVE_ENCAP
    18  struct {
    19  	__uint(type, BPF_MAP_TYPE_HASH);
    20  	__type(key, struct tunnel_key);
    21  	__type(value, struct tunnel_value);
    22  	__uint(pinning, LIBBPF_PIN_BY_NAME);
    23  	__uint(max_entries, TUNNEL_ENDPOINT_MAP_SIZE);
    24  	__uint(map_flags, CONDITIONAL_PREALLOC);
    25  } TUNNEL_MAP __section_maps_btf;
    26  
    27  static __always_inline int
    28  __encap_with_nodeid(struct __ctx_buff *ctx, __u32 src_ip, __be16 src_port,
    29  		    __be32 tunnel_endpoint,
    30  		    __u32 seclabel, __u32 dstid, __u32 vni __maybe_unused,
    31  		    enum trace_reason ct_reason, __u32 monitor, int *ifindex)
    32  {
    33  	__u32 node_id;
    34  
    35  	/* When encapsulating, a packet originating from the local host is
    36  	 * being considered as a packet from a remote node as it is being
    37  	 * received.
    38  	 */
    39  	if (seclabel == HOST_ID)
    40  		seclabel = LOCAL_NODE_ID;
    41  
    42  	node_id = bpf_ntohl(tunnel_endpoint);
    43  
    44  	cilium_dbg(ctx, DBG_ENCAP, node_id, seclabel);
    45  
    46  #if __ctx_is == __ctx_skb
    47  	*ifindex = ENCAP_IFINDEX;
    48  #else
    49  	*ifindex = 0;
    50  #endif
    51  
    52  	send_trace_notify(ctx, TRACE_TO_OVERLAY, seclabel, dstid, TRACE_EP_ID_UNKNOWN,
    53  			  *ifindex, ct_reason, monitor);
    54  
    55  	return ctx_set_encap_info(ctx, src_ip, src_port, node_id, seclabel, vni,
    56  				  NULL, 0);
    57  }
    58  
    59  static __always_inline int
    60  __encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 src_ip __maybe_unused,
    61  				 __be32 tunnel_endpoint,
    62  				 __u32 seclabel, __u32 dstid, __u32 vni,
    63  				 const struct trace_ctx *trace)
    64  {
    65  	int ifindex;
    66  	int ret = 0;
    67  
    68  	ret = __encap_with_nodeid(ctx, src_ip, 0, tunnel_endpoint, seclabel, dstid,
    69  				  vni, trace->reason, trace->monitor,
    70  				  &ifindex);
    71  	if (ret != CTX_ACT_REDIRECT)
    72  		return ret;
    73  
    74  	return ctx_redirect(ctx, ifindex, 0);
    75  }
    76  
    77  /* encap_and_redirect_with_nodeid returns CTX_ACT_OK after ctx meta-data is
    78   * set. Caller should pass the ctx to the stack at this point. Otherwise
    79   * returns CTX_ACT_REDIRECT on successful redirect to tunnel device.
    80   * On error returns a DROP_* reason.
    81   */
    82  static __always_inline int
    83  encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
    84  			       __u8 encrypt_key __maybe_unused,
    85  			       __u32 seclabel, __u32 dstid,
    86  			       const struct trace_ctx *trace)
    87  {
    88  #ifdef ENABLE_IPSEC
    89  	if (encrypt_key)
    90  		return set_ipsec_encrypt(ctx, encrypt_key, tunnel_endpoint,
    91  					 seclabel, true, false);
    92  #endif
    93  
    94  	return __encap_and_redirect_with_nodeid(ctx, 0, tunnel_endpoint,
    95  						seclabel, dstid, NOT_VTEP_DST,
    96  						trace);
    97  }
    98  
    99  /* __encap_and_redirect_lxc() is a variant of encap_and_redirect_lxc()
   100   * that requires a valid tunnel_endpoint.
   101   */
   102  static __always_inline int
   103  __encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
   104  			 __u8 encrypt_key __maybe_unused, __u32 seclabel,
   105  			 __u32 dstid, const struct trace_ctx *trace)
   106  {
   107  	int ifindex __maybe_unused;
   108  	int ret __maybe_unused;
   109  
   110  #ifdef ENABLE_IPSEC
   111  	if (encrypt_key)
   112  		return set_ipsec_encrypt(ctx, encrypt_key, tunnel_endpoint,
   113  					 seclabel, false, false);
   114  #endif
   115  
   116  	return encap_and_redirect_with_nodeid(ctx, tunnel_endpoint, 0, seclabel,
   117  					      dstid, trace);
   118  }
   119  
   120  #if defined(TUNNEL_MODE) || defined(ENABLE_HIGH_SCALE_IPCACHE)
   121  /* encap_and_redirect_lxc adds IPSec metadata (if enabled) and returns the packet
   122   * so that it can be passed to the IP stack. Without IPSec the packet is
   123   * typically redirected to the output tunnel device and ctx will not be seen by
   124   * the IP stack.
   125   *
   126   * Returns CTX_ACT_OK when ctx needs to be handed to IP stack (eg. for IPSec
   127   * handling), a DROP_* reason on error, and finally on successful redirect returns
   128   * CTX_ACT_REDIRECT.
   129   */
   130  static __always_inline int
   131  encap_and_redirect_lxc(struct __ctx_buff *ctx,
   132  		       __be32 tunnel_endpoint __maybe_unused,
   133  		       __u32 src_ip __maybe_unused,
   134  		       __u32 dst_ip __maybe_unused,
   135  		       __u8 encrypt_key __maybe_unused,
   136  		       struct tunnel_key *key __maybe_unused,
   137  		       __u32 seclabel, __u32 dstid,
   138  		       const struct trace_ctx *trace)
   139  {
   140  	struct tunnel_value *tunnel __maybe_unused;
   141  
   142  #ifdef ENABLE_HIGH_SCALE_IPCACHE
   143  	if (needs_encapsulation(dst_ip))
   144  		return __encap_and_redirect_with_nodeid(ctx, src_ip, dst_ip,
   145  							seclabel, dstid,
   146  							NOT_VTEP_DST, trace);
   147  	return DROP_NO_TUNNEL_ENDPOINT;
   148  #else /* ENABLE_HIGH_SCALE_IPCACHE */
   149  	if (tunnel_endpoint)
   150  		return __encap_and_redirect_lxc(ctx, tunnel_endpoint,
   151  						encrypt_key, seclabel, dstid,
   152  						trace);
   153  
   154  	tunnel = map_lookup_elem(&TUNNEL_MAP, key);
   155  	if (!tunnel)
   156  		return DROP_NO_TUNNEL_ENDPOINT;
   157  
   158  # ifdef ENABLE_IPSEC
   159  	if (tunnel->key) {
   160  		__u8 min_encrypt_key = get_min_encrypt_key(tunnel->key);
   161  
   162  		return set_ipsec_encrypt(ctx, min_encrypt_key, tunnel->ip4,
   163  					 seclabel, false, false);
   164  	}
   165  # endif
   166  	return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, 0, seclabel, dstid,
   167  					      trace);
   168  #endif /* ENABLE_HIGH_SCALE_IPCACHE */
   169  }
   170  
   171  static __always_inline int
   172  encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k,
   173  			  __u8 encrypt_key __maybe_unused,
   174  			  __u32 seclabel, const struct trace_ctx *trace)
   175  {
   176  	struct tunnel_value *tunnel;
   177  
   178  	tunnel = map_lookup_elem(&TUNNEL_MAP, k);
   179  	if (!tunnel)
   180  		return DROP_NO_TUNNEL_ENDPOINT;
   181  
   182  #ifdef ENABLE_IPSEC
   183  	if (encrypt_key)
   184  		return set_ipsec_encrypt(ctx, encrypt_key, tunnel->ip4,
   185  					 seclabel, true, false);
   186  #endif
   187  
   188  	return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, 0, seclabel, 0,
   189  					      trace);
   190  }
   191  #endif /* TUNNEL_MODE || ENABLE_HIGH_SCALE_IPCACHE */
   192  
   193  static __always_inline __be16
   194  tunnel_gen_src_port_v4(struct ipv4_ct_tuple *tuple __maybe_unused)
   195  {
   196  #if __ctx_is == __ctx_xdp
   197  	__be32 hash = hash_from_tuple_v4(tuple);
   198  
   199  	return (hash >> 16)  ^ (__be16)hash;
   200  #else
   201  	return 0;
   202  #endif
   203  }
   204  
   205  static __always_inline __be16
   206  tunnel_gen_src_port_v6(struct ipv6_ct_tuple *tuple __maybe_unused)
   207  {
   208  #if __ctx_is == __ctx_xdp
   209  	__be32 hash = hash_from_tuple_v6(tuple);
   210  
   211  	return (hash >> 16)  ^ (__be16)hash;
   212  #else
   213  	return 0;
   214  #endif
   215  }
   216  
   217  #if defined(ENABLE_DSR) && DSR_ENCAP_MODE == DSR_ENCAP_GENEVE
   218  static __always_inline int
   219  __encap_with_nodeid_opt(struct __ctx_buff *ctx, __u32 src_ip, __be16 src_port,
   220  			__u32 tunnel_endpoint,
   221  			__u32 seclabel, __u32 dstid, __u32 vni,
   222  			void *opt, __u32 opt_len,
   223  			enum trace_reason ct_reason,
   224  			__u32 monitor, int *ifindex)
   225  {
   226  	__u32 node_id;
   227  
   228  	/* When encapsulating, a packet originating from the local host is
   229  	 * being considered as a packet from a remote node as it is being
   230  	 * received.
   231  	 */
   232  	if (seclabel == HOST_ID)
   233  		seclabel = LOCAL_NODE_ID;
   234  
   235  	node_id = bpf_ntohl(tunnel_endpoint);
   236  
   237  	cilium_dbg(ctx, DBG_ENCAP, node_id, seclabel);
   238  
   239  #if __ctx_is == __ctx_skb
   240  	*ifindex = ENCAP_IFINDEX;
   241  #else
   242  	*ifindex = 0;
   243  #endif
   244  
   245  	send_trace_notify(ctx, TRACE_TO_OVERLAY, seclabel, dstid, TRACE_EP_ID_UNKNOWN,
   246  			  *ifindex, ct_reason, monitor);
   247  
   248  	return ctx_set_encap_info(ctx, src_ip, src_port, node_id, seclabel, vni, opt,
   249  				  opt_len);
   250  }
   251  
   252  static __always_inline void
   253  set_geneve_dsr_opt4(__be16 port, __be32 addr, struct geneve_dsr_opt4 *gopt)
   254  {
   255  	memset(gopt, 0, sizeof(*gopt));
   256  	gopt->hdr.opt_class = bpf_htons(DSR_GENEVE_OPT_CLASS);
   257  	gopt->hdr.type = DSR_GENEVE_OPT_TYPE;
   258  	gopt->hdr.length = DSR_IPV4_GENEVE_OPT_LEN;
   259  	gopt->addr = addr;
   260  	gopt->port = port;
   261  }
   262  
   263  static __always_inline void
   264  set_geneve_dsr_opt6(__be16 port, const union v6addr *addr,
   265  		    struct geneve_dsr_opt6 *gopt)
   266  {
   267  	memset(gopt, 0, sizeof(*gopt));
   268  	gopt->hdr.opt_class = bpf_htons(DSR_GENEVE_OPT_CLASS);
   269  	gopt->hdr.type = DSR_GENEVE_OPT_TYPE;
   270  	gopt->hdr.length = DSR_IPV6_GENEVE_OPT_LEN;
   271  	ipv6_addr_copy_unaligned((union v6addr *)&gopt->addr, addr);
   272  
   273  	gopt->port = port;
   274  }
   275  #endif
   276  #endif /* HAVE_ENCAP */