github.com/cilium/cilium@v1.16.2/bpf/lib/encap.h (about) 1 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 /* Copyright Authors of Cilium */ 3 4 #pragma once 5 6 #include "common.h" 7 #include "dbg.h" 8 #include "hash.h" 9 #include "trace.h" 10 11 #if __ctx_is == __ctx_skb 12 #include "encrypt.h" 13 #endif /* __ctx_is == __ctx_skb */ 14 15 #include "high_scale_ipcache.h" 16 17 #ifdef HAVE_ENCAP 18 struct { 19 __uint(type, BPF_MAP_TYPE_HASH); 20 __type(key, struct tunnel_key); 21 __type(value, struct tunnel_value); 22 __uint(pinning, LIBBPF_PIN_BY_NAME); 23 __uint(max_entries, TUNNEL_ENDPOINT_MAP_SIZE); 24 __uint(map_flags, CONDITIONAL_PREALLOC); 25 } TUNNEL_MAP __section_maps_btf; 26 27 static __always_inline int 28 __encap_with_nodeid(struct __ctx_buff *ctx, __u32 src_ip, __be16 src_port, 29 __be32 tunnel_endpoint, 30 __u32 seclabel, __u32 dstid, __u32 vni __maybe_unused, 31 enum trace_reason ct_reason, __u32 monitor, int *ifindex) 32 { 33 __u32 node_id; 34 35 /* When encapsulating, a packet originating from the local host is 36 * being considered as a packet from a remote node as it is being 37 * received. 38 */ 39 if (seclabel == HOST_ID) 40 seclabel = LOCAL_NODE_ID; 41 42 node_id = bpf_ntohl(tunnel_endpoint); 43 44 cilium_dbg(ctx, DBG_ENCAP, node_id, seclabel); 45 46 #if __ctx_is == __ctx_skb 47 *ifindex = ENCAP_IFINDEX; 48 #else 49 *ifindex = 0; 50 #endif 51 52 send_trace_notify(ctx, TRACE_TO_OVERLAY, seclabel, dstid, TRACE_EP_ID_UNKNOWN, 53 *ifindex, ct_reason, monitor); 54 55 return ctx_set_encap_info(ctx, src_ip, src_port, node_id, seclabel, vni, 56 NULL, 0); 57 } 58 59 static __always_inline int 60 __encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 src_ip __maybe_unused, 61 __be32 tunnel_endpoint, 62 __u32 seclabel, __u32 dstid, __u32 vni, 63 const struct trace_ctx *trace) 64 { 65 int ifindex; 66 int ret = 0; 67 68 ret = __encap_with_nodeid(ctx, src_ip, 0, tunnel_endpoint, seclabel, dstid, 69 vni, trace->reason, trace->monitor, 70 &ifindex); 71 if (ret != CTX_ACT_REDIRECT) 72 return ret; 73 74 return ctx_redirect(ctx, ifindex, 0); 75 } 76 77 /* encap_and_redirect_with_nodeid returns CTX_ACT_OK after ctx meta-data is 78 * set. Caller should pass the ctx to the stack at this point. Otherwise 79 * returns CTX_ACT_REDIRECT on successful redirect to tunnel device. 80 * On error returns a DROP_* reason. 81 */ 82 static __always_inline int 83 encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint, 84 __u8 encrypt_key __maybe_unused, 85 __u32 seclabel, __u32 dstid, 86 const struct trace_ctx *trace) 87 { 88 #ifdef ENABLE_IPSEC 89 if (encrypt_key) 90 return set_ipsec_encrypt(ctx, encrypt_key, tunnel_endpoint, 91 seclabel, true, false); 92 #endif 93 94 return __encap_and_redirect_with_nodeid(ctx, 0, tunnel_endpoint, 95 seclabel, dstid, NOT_VTEP_DST, 96 trace); 97 } 98 99 /* __encap_and_redirect_lxc() is a variant of encap_and_redirect_lxc() 100 * that requires a valid tunnel_endpoint. 101 */ 102 static __always_inline int 103 __encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint, 104 __u8 encrypt_key __maybe_unused, __u32 seclabel, 105 __u32 dstid, const struct trace_ctx *trace) 106 { 107 int ifindex __maybe_unused; 108 int ret __maybe_unused; 109 110 #ifdef ENABLE_IPSEC 111 if (encrypt_key) 112 return set_ipsec_encrypt(ctx, encrypt_key, tunnel_endpoint, 113 seclabel, false, false); 114 #endif 115 116 return encap_and_redirect_with_nodeid(ctx, tunnel_endpoint, 0, seclabel, 117 dstid, trace); 118 } 119 120 #if defined(TUNNEL_MODE) || defined(ENABLE_HIGH_SCALE_IPCACHE) 121 /* encap_and_redirect_lxc adds IPSec metadata (if enabled) and returns the packet 122 * so that it can be passed to the IP stack. Without IPSec the packet is 123 * typically redirected to the output tunnel device and ctx will not be seen by 124 * the IP stack. 125 * 126 * Returns CTX_ACT_OK when ctx needs to be handed to IP stack (eg. for IPSec 127 * handling), a DROP_* reason on error, and finally on successful redirect returns 128 * CTX_ACT_REDIRECT. 129 */ 130 static __always_inline int 131 encap_and_redirect_lxc(struct __ctx_buff *ctx, 132 __be32 tunnel_endpoint __maybe_unused, 133 __u32 src_ip __maybe_unused, 134 __u32 dst_ip __maybe_unused, 135 __u8 encrypt_key __maybe_unused, 136 struct tunnel_key *key __maybe_unused, 137 __u32 seclabel, __u32 dstid, 138 const struct trace_ctx *trace) 139 { 140 struct tunnel_value *tunnel __maybe_unused; 141 142 #ifdef ENABLE_HIGH_SCALE_IPCACHE 143 if (needs_encapsulation(dst_ip)) 144 return __encap_and_redirect_with_nodeid(ctx, src_ip, dst_ip, 145 seclabel, dstid, 146 NOT_VTEP_DST, trace); 147 return DROP_NO_TUNNEL_ENDPOINT; 148 #else /* ENABLE_HIGH_SCALE_IPCACHE */ 149 if (tunnel_endpoint) 150 return __encap_and_redirect_lxc(ctx, tunnel_endpoint, 151 encrypt_key, seclabel, dstid, 152 trace); 153 154 tunnel = map_lookup_elem(&TUNNEL_MAP, key); 155 if (!tunnel) 156 return DROP_NO_TUNNEL_ENDPOINT; 157 158 # ifdef ENABLE_IPSEC 159 if (tunnel->key) { 160 __u8 min_encrypt_key = get_min_encrypt_key(tunnel->key); 161 162 return set_ipsec_encrypt(ctx, min_encrypt_key, tunnel->ip4, 163 seclabel, false, false); 164 } 165 # endif 166 return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, 0, seclabel, dstid, 167 trace); 168 #endif /* ENABLE_HIGH_SCALE_IPCACHE */ 169 } 170 171 static __always_inline int 172 encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k, 173 __u8 encrypt_key __maybe_unused, 174 __u32 seclabel, const struct trace_ctx *trace) 175 { 176 struct tunnel_value *tunnel; 177 178 tunnel = map_lookup_elem(&TUNNEL_MAP, k); 179 if (!tunnel) 180 return DROP_NO_TUNNEL_ENDPOINT; 181 182 #ifdef ENABLE_IPSEC 183 if (encrypt_key) 184 return set_ipsec_encrypt(ctx, encrypt_key, tunnel->ip4, 185 seclabel, true, false); 186 #endif 187 188 return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, 0, seclabel, 0, 189 trace); 190 } 191 #endif /* TUNNEL_MODE || ENABLE_HIGH_SCALE_IPCACHE */ 192 193 static __always_inline __be16 194 tunnel_gen_src_port_v4(struct ipv4_ct_tuple *tuple __maybe_unused) 195 { 196 #if __ctx_is == __ctx_xdp 197 __be32 hash = hash_from_tuple_v4(tuple); 198 199 return (hash >> 16) ^ (__be16)hash; 200 #else 201 return 0; 202 #endif 203 } 204 205 static __always_inline __be16 206 tunnel_gen_src_port_v6(struct ipv6_ct_tuple *tuple __maybe_unused) 207 { 208 #if __ctx_is == __ctx_xdp 209 __be32 hash = hash_from_tuple_v6(tuple); 210 211 return (hash >> 16) ^ (__be16)hash; 212 #else 213 return 0; 214 #endif 215 } 216 217 #if defined(ENABLE_DSR) && DSR_ENCAP_MODE == DSR_ENCAP_GENEVE 218 static __always_inline int 219 __encap_with_nodeid_opt(struct __ctx_buff *ctx, __u32 src_ip, __be16 src_port, 220 __u32 tunnel_endpoint, 221 __u32 seclabel, __u32 dstid, __u32 vni, 222 void *opt, __u32 opt_len, 223 enum trace_reason ct_reason, 224 __u32 monitor, int *ifindex) 225 { 226 __u32 node_id; 227 228 /* When encapsulating, a packet originating from the local host is 229 * being considered as a packet from a remote node as it is being 230 * received. 231 */ 232 if (seclabel == HOST_ID) 233 seclabel = LOCAL_NODE_ID; 234 235 node_id = bpf_ntohl(tunnel_endpoint); 236 237 cilium_dbg(ctx, DBG_ENCAP, node_id, seclabel); 238 239 #if __ctx_is == __ctx_skb 240 *ifindex = ENCAP_IFINDEX; 241 #else 242 *ifindex = 0; 243 #endif 244 245 send_trace_notify(ctx, TRACE_TO_OVERLAY, seclabel, dstid, TRACE_EP_ID_UNKNOWN, 246 *ifindex, ct_reason, monitor); 247 248 return ctx_set_encap_info(ctx, src_ip, src_port, node_id, seclabel, vni, opt, 249 opt_len); 250 } 251 252 static __always_inline void 253 set_geneve_dsr_opt4(__be16 port, __be32 addr, struct geneve_dsr_opt4 *gopt) 254 { 255 memset(gopt, 0, sizeof(*gopt)); 256 gopt->hdr.opt_class = bpf_htons(DSR_GENEVE_OPT_CLASS); 257 gopt->hdr.type = DSR_GENEVE_OPT_TYPE; 258 gopt->hdr.length = DSR_IPV4_GENEVE_OPT_LEN; 259 gopt->addr = addr; 260 gopt->port = port; 261 } 262 263 static __always_inline void 264 set_geneve_dsr_opt6(__be16 port, const union v6addr *addr, 265 struct geneve_dsr_opt6 *gopt) 266 { 267 memset(gopt, 0, sizeof(*gopt)); 268 gopt->hdr.opt_class = bpf_htons(DSR_GENEVE_OPT_CLASS); 269 gopt->hdr.type = DSR_GENEVE_OPT_TYPE; 270 gopt->hdr.length = DSR_IPV6_GENEVE_OPT_LEN; 271 ipv6_addr_copy_unaligned((union v6addr *)&gopt->addr, addr); 272 273 gopt->port = port; 274 } 275 #endif 276 #endif /* HAVE_ENCAP */