github.com/cilium/cilium@v1.16.2/bpf/lib/encrypt.h (about)

     1  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
     2  /* Copyright Authors of Cilium */
     3  
     4  #pragma once
     5  
     6  #include <bpf/ctx/skb.h>
     7  #include <bpf/api.h>
     8  #include <linux/if_ether.h>
     9  #include <linux/ip.h>
    10  
    11  #include "lib/common.h"
    12  #include "lib/drop.h"
    13  #include "lib/eps.h"
    14  #include "lib/vxlan.h"
    15  
    16  /* We cap key index at 4 bits because mark value is used to map ctx to key */
    17  #define MAX_KEY_INDEX 15
    18  
    19  #ifdef ENABLE_IPSEC
    20  struct {
    21  	__uint(type, BPF_MAP_TYPE_ARRAY);
    22  	__type(key, __u32);
    23  	__type(value, struct encrypt_config);
    24  	__uint(pinning, LIBBPF_PIN_BY_NAME);
    25  	__uint(max_entries, 1);
    26  } ENCRYPT_MAP __section_maps_btf;
    27  #endif
    28  
    29  static __always_inline __u8 get_min_encrypt_key(__u8 peer_key __maybe_unused)
    30  {
    31  #ifdef ENABLE_IPSEC
    32  	__u8 local_key = 0;
    33  	__u32 encrypt_key = 0;
    34  	struct encrypt_config *cfg;
    35  
    36  	cfg = map_lookup_elem(&ENCRYPT_MAP, &encrypt_key);
    37  	/* Having no key info for a context is the same as no encryption */
    38  	if (cfg)
    39  		local_key = cfg->encrypt_key;
    40  
    41  	/* If both ends can encrypt/decrypt use smaller of the two this
    42  	 * way both ends will have keys installed assuming key IDs are
    43  	 * always increasing. However, we have to handle roll-over case
    44  	 * and to do this safely we assume keys are no more than one ahead.
    45  	 * We expect user/control-place to accomplish this. Notice zero
    46  	 * will always be returned if either local or peer have the zero
    47  	 * key indicating no encryption.
    48  	 */
    49  	if (peer_key == MAX_KEY_INDEX)
    50  		return local_key == 1 ? peer_key : local_key;
    51  	if (local_key == MAX_KEY_INDEX)
    52  		return peer_key == 1 ? local_key : peer_key;
    53  	return local_key < peer_key ? local_key : peer_key;
    54  #else
    55  	return 0;
    56  #endif /* ENABLE_IPSEC */
    57  }
    58  
    59  #ifdef ENABLE_IPSEC
    60  # ifdef ENABLE_IPV4
    61  static __always_inline __u16
    62  lookup_ip4_node_id(__u32 ip4)
    63  {
    64  	struct node_key node_ip = {};
    65  	struct node_value *node_value = NULL;
    66  
    67  	node_ip.family = ENDPOINT_KEY_IPV4;
    68  	node_ip.ip4 = ip4;
    69  	node_value = map_lookup_elem(&NODE_MAP_V2, &node_ip);
    70  	if (!node_value)
    71  		return 0;
    72  	if (!node_value->id)
    73  		return 0;
    74  	return node_value->id;
    75  }
    76  # endif /* ENABLE_IPV4 */
    77  
    78  # ifdef ENABLE_IPV6
    79  static __always_inline __u16
    80  lookup_ip6_node_id(const union v6addr *ip6)
    81  {
    82  	struct node_key node_ip = {};
    83  	struct node_value *node_value = NULL;
    84  
    85  	node_ip.family = ENDPOINT_KEY_IPV6;
    86  	node_ip.ip6 = *ip6;
    87  	node_value = map_lookup_elem(&NODE_MAP_V2, &node_ip);
    88  	if (!node_value)
    89  		return 0;
    90  	if (!node_value->id)
    91  		return 0;
    92  	return node_value->id;
    93  }
    94  # endif /* ENABLE_IPV6 */
    95  
    96  static __always_inline void
    97  set_ipsec_decrypt_mark(struct __ctx_buff *ctx, __u16 node_id)
    98  {
    99  	/* Decrypt "key" is determined by SPI and originating node */
   100  	ctx->mark = MARK_MAGIC_DECRYPT | node_id << 16;
   101  }
   102  
   103  static __always_inline int
   104  set_ipsec_encrypt(struct __ctx_buff *ctx, __u8 spi, __u32 tunnel_endpoint,
   105  		  __u32 seclabel, bool use_meta, bool use_spi_from_map)
   106  {
   107  	/* IPSec is performed by the stack on any packets with the
   108  	 * MARK_MAGIC_ENCRYPT bit set. During the process though we
   109  	 * lose the lxc context (seclabel and tunnel endpoint). The
   110  	 * tunnel endpoint can be looked up from daddr but the sec
   111  	 * label is stashed in the mark or cb, and extracted in
   112  	 * bpf_host to send ctx onto tunnel for encap.
   113  	 */
   114  
   115  	struct node_key node_ip = {};
   116  	struct node_value *node_value = NULL;
   117  
   118  	node_ip.family = ENDPOINT_KEY_IPV4;
   119  	node_ip.ip4 = tunnel_endpoint;
   120  	node_value = map_lookup_elem(&NODE_MAP_V2, &node_ip);
   121  	if (!node_value || !node_value->id)
   122  		return DROP_NO_NODE_ID;
   123  
   124  	if (use_spi_from_map)
   125  		spi = get_min_encrypt_key(node_value->spi);
   126  
   127  	set_identity_meta(ctx, seclabel);
   128  	if (use_meta)
   129  		set_encrypt_key_meta(ctx, spi, node_value->id);
   130  	else
   131  		set_encrypt_key_mark(ctx, spi, node_value->id);
   132  	return CTX_ACT_OK;
   133  }
   134  
   135  static __always_inline int
   136  do_decrypt(struct __ctx_buff *ctx, __u16 proto)
   137  {
   138  	void *data, *data_end;
   139  	__u8 protocol = 0;
   140  	__u16 node_id = 0;
   141  	bool decrypted;
   142  #ifdef ENABLE_IPV6
   143  	struct ipv6hdr *ip6;
   144  #endif
   145  #ifdef ENABLE_IPV4
   146  	struct iphdr *ip4;
   147  #endif
   148  
   149  	decrypted = ((ctx->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT);
   150  
   151  	switch (proto) {
   152  #ifdef ENABLE_IPV6
   153  	case bpf_htons(ETH_P_IPV6):
   154  		if (!revalidate_data_pull(ctx, &data, &data_end, &ip6)) {
   155  			ctx->mark = 0;
   156  			return CTX_ACT_OK;
   157  		}
   158  		protocol = ip6->nexthdr;
   159  		if (!decrypted)
   160  			node_id = lookup_ip6_node_id((union v6addr *)&ip6->saddr);
   161  		break;
   162  #endif
   163  #ifdef ENABLE_IPV4
   164  	case bpf_htons(ETH_P_IP):
   165  		if (!revalidate_data_pull(ctx, &data, &data_end, &ip4)) {
   166  			ctx->mark = 0;
   167  			return CTX_ACT_OK;
   168  		}
   169  		protocol = ip4->protocol;
   170  		if (!decrypted)
   171  			node_id = lookup_ip4_node_id(ip4->saddr);
   172  		break;
   173  #endif
   174  	default:
   175  		return CTX_ACT_OK;
   176  	}
   177  
   178  	if (!decrypted) {
   179  		/* Allow all non-ESP packets up the stack per normal case
   180  		 * without encryption enabled.
   181  		 */
   182  		if (protocol != IPPROTO_ESP)
   183  			return CTX_ACT_OK;
   184  
   185  		if (!node_id)
   186  			return send_drop_notify_error(ctx, UNKNOWN_ID, DROP_NO_NODE_ID,
   187  						      CTX_ACT_DROP,
   188  						      METRIC_INGRESS);
   189  		set_ipsec_decrypt_mark(ctx, node_id);
   190  
   191  		/* We are going to pass this up the stack for IPsec decryption
   192  		 * but eth_type_trans may already have labeled this as an
   193  		 * OTHERHOST type packet. To avoid being dropped by IP stack
   194  		 * before IPSec can be processed mark as a HOST packet.
   195  		 */
   196  		ctx_change_type(ctx, PACKET_HOST);
   197  		return CTX_ACT_OK;
   198  	}
   199  	ctx->mark = 0;
   200  #ifdef ENABLE_ENDPOINT_ROUTES
   201  	return CTX_ACT_OK;
   202  #else
   203  	return ctx_redirect(ctx, CILIUM_IFINDEX, 0);
   204  #endif /* ENABLE_ROUTING */
   205  }
   206  
   207  #if defined(ENABLE_ENCRYPTED_OVERLAY)
   208  /* Sets the encryption mark on an overlay (VXLAN) packet and redirects the
   209   * packet to the ingress side of it's associated ifindex.
   210   *
   211   * The recirculated overlay packet will then be subjected to XFRM hooks in the
   212   * output routing path, since the original src/dst of the overlay packet routes
   213   * off-host.
   214   *
   215   * This function is useful when you want to encrypt overlay traffic and use the
   216   * underlay to deliver encrypted overlay traffic to the remote node.
   217   * For this to work the IPSec control plane must install XFRM policies and
   218   * states which set the tunnel source and destination to the underlay address of
   219   * the destination node.
   220   *
   221   * If the redirect to the ingress side of ctx->ingress is successful
   222   * CTX_ACT_REDIRECT is returned, otherwise an error code is returned.
   223   *
   224   * Be aware that the redirected-to interface needs to have the following
   225   * sysctl enabled for this to work correctly (per-device is fine)
   226   *   - net.ipv4.conf.default.rp_filter = 0
   227   *   - net.ipv4.conf.default.accept_local = 1
   228   */
   229  static __always_inline int
   230  encrypt_overlay_and_redirect(struct __ctx_buff *ctx)
   231  {
   232  	struct iphdr *ip4, *inner_ipv4 = NULL;
   233  	struct endpoint_info *ep_info = NULL;
   234  	void *data, *data_end;
   235  	__u8 dst_mac = 0;
   236  	int ret = 0;
   237  
   238  	if (!revalidate_data(ctx, &data, &data_end, &ip4))
   239  		return DROP_INVALID;
   240  
   241  	ret = vxlan_get_inner_ipv4(data, data_end, ip4, &inner_ipv4);
   242  	if (!ret)
   243  		return DROP_INVALID;
   244  
   245  	ep_info = __lookup_ip4_endpoint(inner_ipv4->saddr);
   246  	if (!ep_info)
   247  		return DROP_INVALID;
   248  
   249  	/*
   250  	 * this is a vxlan packet so ip4->daddr is the tunnel endpoint
   251  	 */
   252  	ret = set_ipsec_encrypt(ctx, 0, ip4->daddr, ep_info->sec_id, false,
   253  				true);
   254  	if (ret != CTX_ACT_OK)
   255  		return ret;
   256  
   257  	/*
   258  	 * source mac is our current egress interface, lets copy it to dmac
   259  	 * so redirecting to ingress side of the same interface doesn't fail.
   260  	 */
   261  	if (eth_load_saddr(ctx, &dst_mac, 0) != 0)
   262  		return DROP_INVALID;
   263  	if (eth_store_daddr(ctx, &dst_mac, 0) != 0)
   264  		return DROP_WRITE_ERROR;
   265  
   266  	/* need to revalidate data since we just re-wrote mac addresses */
   267  	if (!revalidate_data(ctx, &data, &data_end, &ip4))
   268  		return DROP_INVALID;
   269  
   270  	/* right now, the VNI of this packet is ENCRYPTED_OVERLAY_ID, we need
   271  	 * to rewrite this VNI to the source's sec id before we transmit it
   272  	 */
   273  	if (!vxlan_rewrite_vni(ctx, data, data_end, ip4,
   274  			       ep_info->sec_id))
   275  		return DROP_INVALID;
   276  
   277  	/* redirect to ingress side of ifindex so the packet has xfrm applied */
   278  	ret = ctx_redirect(ctx, ctx->ifindex, BPF_F_INGRESS);
   279  	if (ret != CTX_ACT_REDIRECT)
   280  		return DROP_INVALID;
   281  
   282  	return ret;
   283  }
   284  #endif /* ENABLE_ENCRYPTED_OVERLAY */
   285  
   286  #else
   287  static __always_inline int
   288  do_decrypt(struct __ctx_buff __maybe_unused *ctx, __u16 __maybe_unused proto)
   289  {
   290  	return CTX_ACT_OK;
   291  }
   292  #endif /* ENABLE_IPSEC */