github.com/cilium/cilium@v1.16.2/bpf/lib/encrypt.h (about) 1 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 /* Copyright Authors of Cilium */ 3 4 #pragma once 5 6 #include <bpf/ctx/skb.h> 7 #include <bpf/api.h> 8 #include <linux/if_ether.h> 9 #include <linux/ip.h> 10 11 #include "lib/common.h" 12 #include "lib/drop.h" 13 #include "lib/eps.h" 14 #include "lib/vxlan.h" 15 16 /* We cap key index at 4 bits because mark value is used to map ctx to key */ 17 #define MAX_KEY_INDEX 15 18 19 #ifdef ENABLE_IPSEC 20 struct { 21 __uint(type, BPF_MAP_TYPE_ARRAY); 22 __type(key, __u32); 23 __type(value, struct encrypt_config); 24 __uint(pinning, LIBBPF_PIN_BY_NAME); 25 __uint(max_entries, 1); 26 } ENCRYPT_MAP __section_maps_btf; 27 #endif 28 29 static __always_inline __u8 get_min_encrypt_key(__u8 peer_key __maybe_unused) 30 { 31 #ifdef ENABLE_IPSEC 32 __u8 local_key = 0; 33 __u32 encrypt_key = 0; 34 struct encrypt_config *cfg; 35 36 cfg = map_lookup_elem(&ENCRYPT_MAP, &encrypt_key); 37 /* Having no key info for a context is the same as no encryption */ 38 if (cfg) 39 local_key = cfg->encrypt_key; 40 41 /* If both ends can encrypt/decrypt use smaller of the two this 42 * way both ends will have keys installed assuming key IDs are 43 * always increasing. However, we have to handle roll-over case 44 * and to do this safely we assume keys are no more than one ahead. 45 * We expect user/control-place to accomplish this. Notice zero 46 * will always be returned if either local or peer have the zero 47 * key indicating no encryption. 48 */ 49 if (peer_key == MAX_KEY_INDEX) 50 return local_key == 1 ? peer_key : local_key; 51 if (local_key == MAX_KEY_INDEX) 52 return peer_key == 1 ? local_key : peer_key; 53 return local_key < peer_key ? local_key : peer_key; 54 #else 55 return 0; 56 #endif /* ENABLE_IPSEC */ 57 } 58 59 #ifdef ENABLE_IPSEC 60 # ifdef ENABLE_IPV4 61 static __always_inline __u16 62 lookup_ip4_node_id(__u32 ip4) 63 { 64 struct node_key node_ip = {}; 65 struct node_value *node_value = NULL; 66 67 node_ip.family = ENDPOINT_KEY_IPV4; 68 node_ip.ip4 = ip4; 69 node_value = map_lookup_elem(&NODE_MAP_V2, &node_ip); 70 if (!node_value) 71 return 0; 72 if (!node_value->id) 73 return 0; 74 return node_value->id; 75 } 76 # endif /* ENABLE_IPV4 */ 77 78 # ifdef ENABLE_IPV6 79 static __always_inline __u16 80 lookup_ip6_node_id(const union v6addr *ip6) 81 { 82 struct node_key node_ip = {}; 83 struct node_value *node_value = NULL; 84 85 node_ip.family = ENDPOINT_KEY_IPV6; 86 node_ip.ip6 = *ip6; 87 node_value = map_lookup_elem(&NODE_MAP_V2, &node_ip); 88 if (!node_value) 89 return 0; 90 if (!node_value->id) 91 return 0; 92 return node_value->id; 93 } 94 # endif /* ENABLE_IPV6 */ 95 96 static __always_inline void 97 set_ipsec_decrypt_mark(struct __ctx_buff *ctx, __u16 node_id) 98 { 99 /* Decrypt "key" is determined by SPI and originating node */ 100 ctx->mark = MARK_MAGIC_DECRYPT | node_id << 16; 101 } 102 103 static __always_inline int 104 set_ipsec_encrypt(struct __ctx_buff *ctx, __u8 spi, __u32 tunnel_endpoint, 105 __u32 seclabel, bool use_meta, bool use_spi_from_map) 106 { 107 /* IPSec is performed by the stack on any packets with the 108 * MARK_MAGIC_ENCRYPT bit set. During the process though we 109 * lose the lxc context (seclabel and tunnel endpoint). The 110 * tunnel endpoint can be looked up from daddr but the sec 111 * label is stashed in the mark or cb, and extracted in 112 * bpf_host to send ctx onto tunnel for encap. 113 */ 114 115 struct node_key node_ip = {}; 116 struct node_value *node_value = NULL; 117 118 node_ip.family = ENDPOINT_KEY_IPV4; 119 node_ip.ip4 = tunnel_endpoint; 120 node_value = map_lookup_elem(&NODE_MAP_V2, &node_ip); 121 if (!node_value || !node_value->id) 122 return DROP_NO_NODE_ID; 123 124 if (use_spi_from_map) 125 spi = get_min_encrypt_key(node_value->spi); 126 127 set_identity_meta(ctx, seclabel); 128 if (use_meta) 129 set_encrypt_key_meta(ctx, spi, node_value->id); 130 else 131 set_encrypt_key_mark(ctx, spi, node_value->id); 132 return CTX_ACT_OK; 133 } 134 135 static __always_inline int 136 do_decrypt(struct __ctx_buff *ctx, __u16 proto) 137 { 138 void *data, *data_end; 139 __u8 protocol = 0; 140 __u16 node_id = 0; 141 bool decrypted; 142 #ifdef ENABLE_IPV6 143 struct ipv6hdr *ip6; 144 #endif 145 #ifdef ENABLE_IPV4 146 struct iphdr *ip4; 147 #endif 148 149 decrypted = ((ctx->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT); 150 151 switch (proto) { 152 #ifdef ENABLE_IPV6 153 case bpf_htons(ETH_P_IPV6): 154 if (!revalidate_data_pull(ctx, &data, &data_end, &ip6)) { 155 ctx->mark = 0; 156 return CTX_ACT_OK; 157 } 158 protocol = ip6->nexthdr; 159 if (!decrypted) 160 node_id = lookup_ip6_node_id((union v6addr *)&ip6->saddr); 161 break; 162 #endif 163 #ifdef ENABLE_IPV4 164 case bpf_htons(ETH_P_IP): 165 if (!revalidate_data_pull(ctx, &data, &data_end, &ip4)) { 166 ctx->mark = 0; 167 return CTX_ACT_OK; 168 } 169 protocol = ip4->protocol; 170 if (!decrypted) 171 node_id = lookup_ip4_node_id(ip4->saddr); 172 break; 173 #endif 174 default: 175 return CTX_ACT_OK; 176 } 177 178 if (!decrypted) { 179 /* Allow all non-ESP packets up the stack per normal case 180 * without encryption enabled. 181 */ 182 if (protocol != IPPROTO_ESP) 183 return CTX_ACT_OK; 184 185 if (!node_id) 186 return send_drop_notify_error(ctx, UNKNOWN_ID, DROP_NO_NODE_ID, 187 CTX_ACT_DROP, 188 METRIC_INGRESS); 189 set_ipsec_decrypt_mark(ctx, node_id); 190 191 /* We are going to pass this up the stack for IPsec decryption 192 * but eth_type_trans may already have labeled this as an 193 * OTHERHOST type packet. To avoid being dropped by IP stack 194 * before IPSec can be processed mark as a HOST packet. 195 */ 196 ctx_change_type(ctx, PACKET_HOST); 197 return CTX_ACT_OK; 198 } 199 ctx->mark = 0; 200 #ifdef ENABLE_ENDPOINT_ROUTES 201 return CTX_ACT_OK; 202 #else 203 return ctx_redirect(ctx, CILIUM_IFINDEX, 0); 204 #endif /* ENABLE_ROUTING */ 205 } 206 207 #if defined(ENABLE_ENCRYPTED_OVERLAY) 208 /* Sets the encryption mark on an overlay (VXLAN) packet and redirects the 209 * packet to the ingress side of it's associated ifindex. 210 * 211 * The recirculated overlay packet will then be subjected to XFRM hooks in the 212 * output routing path, since the original src/dst of the overlay packet routes 213 * off-host. 214 * 215 * This function is useful when you want to encrypt overlay traffic and use the 216 * underlay to deliver encrypted overlay traffic to the remote node. 217 * For this to work the IPSec control plane must install XFRM policies and 218 * states which set the tunnel source and destination to the underlay address of 219 * the destination node. 220 * 221 * If the redirect to the ingress side of ctx->ingress is successful 222 * CTX_ACT_REDIRECT is returned, otherwise an error code is returned. 223 * 224 * Be aware that the redirected-to interface needs to have the following 225 * sysctl enabled for this to work correctly (per-device is fine) 226 * - net.ipv4.conf.default.rp_filter = 0 227 * - net.ipv4.conf.default.accept_local = 1 228 */ 229 static __always_inline int 230 encrypt_overlay_and_redirect(struct __ctx_buff *ctx) 231 { 232 struct iphdr *ip4, *inner_ipv4 = NULL; 233 struct endpoint_info *ep_info = NULL; 234 void *data, *data_end; 235 __u8 dst_mac = 0; 236 int ret = 0; 237 238 if (!revalidate_data(ctx, &data, &data_end, &ip4)) 239 return DROP_INVALID; 240 241 ret = vxlan_get_inner_ipv4(data, data_end, ip4, &inner_ipv4); 242 if (!ret) 243 return DROP_INVALID; 244 245 ep_info = __lookup_ip4_endpoint(inner_ipv4->saddr); 246 if (!ep_info) 247 return DROP_INVALID; 248 249 /* 250 * this is a vxlan packet so ip4->daddr is the tunnel endpoint 251 */ 252 ret = set_ipsec_encrypt(ctx, 0, ip4->daddr, ep_info->sec_id, false, 253 true); 254 if (ret != CTX_ACT_OK) 255 return ret; 256 257 /* 258 * source mac is our current egress interface, lets copy it to dmac 259 * so redirecting to ingress side of the same interface doesn't fail. 260 */ 261 if (eth_load_saddr(ctx, &dst_mac, 0) != 0) 262 return DROP_INVALID; 263 if (eth_store_daddr(ctx, &dst_mac, 0) != 0) 264 return DROP_WRITE_ERROR; 265 266 /* need to revalidate data since we just re-wrote mac addresses */ 267 if (!revalidate_data(ctx, &data, &data_end, &ip4)) 268 return DROP_INVALID; 269 270 /* right now, the VNI of this packet is ENCRYPTED_OVERLAY_ID, we need 271 * to rewrite this VNI to the source's sec id before we transmit it 272 */ 273 if (!vxlan_rewrite_vni(ctx, data, data_end, ip4, 274 ep_info->sec_id)) 275 return DROP_INVALID; 276 277 /* redirect to ingress side of ifindex so the packet has xfrm applied */ 278 ret = ctx_redirect(ctx, ctx->ifindex, BPF_F_INGRESS); 279 if (ret != CTX_ACT_REDIRECT) 280 return DROP_INVALID; 281 282 return ret; 283 } 284 #endif /* ENABLE_ENCRYPTED_OVERLAY */ 285 286 #else 287 static __always_inline int 288 do_decrypt(struct __ctx_buff __maybe_unused *ctx, __u16 __maybe_unused proto) 289 { 290 return CTX_ACT_OK; 291 } 292 #endif /* ENABLE_IPSEC */