github.com/cilium/cilium@v1.16.2/bpf/lib/egress_gateway.h (about) 1 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 /* Copyright Authors of Cilium */ 3 4 #pragma once 5 6 #include "lib/fib.h" 7 #include "lib/identity.h" 8 #include "lib/overloadable.h" 9 10 #include "encap.h" 11 12 #ifdef ENABLE_EGRESS_GATEWAY_COMMON 13 14 /* EGRESS_STATIC_PREFIX represents the size in bits of the static prefix part of 15 * an egress policy key (i.e. the source IP). 16 */ 17 #define EGRESS_STATIC_PREFIX (sizeof(__be32) * 8) 18 #define EGRESS_PREFIX_LEN(PREFIX) (EGRESS_STATIC_PREFIX + (PREFIX)) 19 #define EGRESS_IPV4_PREFIX EGRESS_PREFIX_LEN(32) 20 21 /* These are special IP values in the CIDR 0.0.0.0/8 range that map to specific 22 * case for in the egress gateway policies handling. 23 */ 24 25 /* Special values in the policy_entry->gateway_ip: */ 26 #define EGRESS_GATEWAY_NO_GATEWAY (0) 27 #define EGRESS_GATEWAY_EXCLUDED_CIDR bpf_htonl(1) 28 29 /* Special values in the policy_entry->egress_ip: */ 30 #define EGRESS_GATEWAY_NO_EGRESS_IP (0) 31 32 static __always_inline 33 int egress_gw_fib_lookup_and_redirect(struct __ctx_buff *ctx, __be32 egress_ip, __be32 daddr, 34 __s8 *ext_err) 35 { 36 struct bpf_fib_lookup_padded fib_params = {}; 37 int oif = 0; 38 39 *ext_err = (__s8)fib_lookup_v4(ctx, &fib_params, egress_ip, daddr, 0); 40 41 switch (*ext_err) { 42 case BPF_FIB_LKUP_RET_SUCCESS: 43 break; 44 case BPF_FIB_LKUP_RET_NO_NEIGH: 45 /* Don't redirect if we can't update the L2 DMAC: */ 46 if (!neigh_resolver_available()) 47 return CTX_ACT_OK; 48 49 /* Don't redirect without a valid target ifindex: */ 50 if (!is_defined(HAVE_FIB_IFINDEX)) 51 return CTX_ACT_OK; 52 break; 53 default: 54 return DROP_NO_FIB; 55 } 56 57 /* Skip redirect in to-netdev if we stay on the same iface: */ 58 if (is_defined(IS_BPF_HOST) && fib_params.l.ifindex == ctx_get_ifindex(ctx)) 59 return CTX_ACT_OK; 60 61 return fib_do_redirect(ctx, true, &fib_params, false, ext_err, &oif); 62 } 63 64 #ifdef ENABLE_EGRESS_GATEWAY 65 struct { 66 __uint(type, BPF_MAP_TYPE_LPM_TRIE); 67 __type(key, struct egress_gw_policy_key); 68 __type(value, struct egress_gw_policy_entry); 69 __uint(pinning, LIBBPF_PIN_BY_NAME); 70 __uint(max_entries, EGRESS_POLICY_MAP_SIZE); 71 __uint(map_flags, BPF_F_NO_PREALLOC); 72 } EGRESS_POLICY_MAP __section_maps_btf; 73 74 static __always_inline 75 struct egress_gw_policy_entry *lookup_ip4_egress_gw_policy(__be32 saddr, __be32 daddr) 76 { 77 struct egress_gw_policy_key key = { 78 .lpm_key = { EGRESS_IPV4_PREFIX, {} }, 79 .saddr = saddr, 80 .daddr = daddr, 81 }; 82 return map_lookup_elem(&EGRESS_POLICY_MAP, &key); 83 } 84 #endif /* ENABLE_EGRESS_GATEWAY */ 85 86 static __always_inline int 87 egress_gw_request_needs_redirect(struct ipv4_ct_tuple *rtuple __maybe_unused, 88 __be32 *gateway_ip __maybe_unused) 89 { 90 #if defined(ENABLE_EGRESS_GATEWAY) 91 struct egress_gw_policy_entry *egress_gw_policy; 92 93 egress_gw_policy = lookup_ip4_egress_gw_policy(ipv4_ct_reverse_tuple_saddr(rtuple), 94 ipv4_ct_reverse_tuple_daddr(rtuple)); 95 if (!egress_gw_policy) 96 return CTX_ACT_OK; 97 98 switch (egress_gw_policy->gateway_ip) { 99 case EGRESS_GATEWAY_NO_GATEWAY: 100 /* If no gateway is found, drop the packet. */ 101 return DROP_NO_EGRESS_GATEWAY; 102 case EGRESS_GATEWAY_EXCLUDED_CIDR: 103 return CTX_ACT_OK; 104 } 105 106 *gateway_ip = egress_gw_policy->gateway_ip; 107 return CTX_ACT_REDIRECT; 108 #else 109 return CTX_ACT_OK; 110 #endif /* ENABLE_EGRESS_GATEWAY */ 111 } 112 113 static __always_inline 114 bool egress_gw_snat_needed(__be32 saddr __maybe_unused, 115 __be32 daddr __maybe_unused, 116 __be32 *snat_addr __maybe_unused) 117 { 118 #if defined(ENABLE_EGRESS_GATEWAY) 119 struct egress_gw_policy_entry *egress_gw_policy; 120 121 egress_gw_policy = lookup_ip4_egress_gw_policy(saddr, daddr); 122 if (!egress_gw_policy) 123 return false; 124 125 if (egress_gw_policy->gateway_ip == EGRESS_GATEWAY_NO_GATEWAY || 126 egress_gw_policy->gateway_ip == EGRESS_GATEWAY_EXCLUDED_CIDR) 127 return false; 128 129 *snat_addr = egress_gw_policy->egress_ip; 130 return true; 131 #else 132 return false; 133 #endif /* ENABLE_EGRESS_GATEWAY */ 134 } 135 136 static __always_inline 137 bool egress_gw_reply_matches_policy(struct iphdr *ip4 __maybe_unused) 138 { 139 #if defined(ENABLE_EGRESS_GATEWAY) 140 struct egress_gw_policy_entry *egress_policy; 141 142 /* Find a matching policy by looking up the reverse address tuple: */ 143 egress_policy = lookup_ip4_egress_gw_policy(ip4->daddr, ip4->saddr); 144 if (!egress_policy) 145 return false; 146 147 if (egress_policy->gateway_ip == EGRESS_GATEWAY_NO_GATEWAY || 148 egress_policy->gateway_ip == EGRESS_GATEWAY_EXCLUDED_CIDR) 149 return false; 150 151 return true; 152 #else 153 return false; 154 #endif /* ENABLE_EGRESS_GATEWAY */ 155 } 156 157 /** Match a packet against EGW policy map, and return the gateway's IP. 158 * @arg rtuple CT tuple for the packet 159 * @arg ct_status CT result, to identify egressing connections 160 * @arg gateway_ip returns the gateway node's IP 161 * 162 * Returns 163 * * CTX_ACT_REDIRECT if a matching policy entry was found, 164 * * CTX_ACT_OK if no EGW logic should be applied, 165 * * DROP_* for error conditions. 166 */ 167 static __always_inline int 168 egress_gw_request_needs_redirect_hook(struct ipv4_ct_tuple *rtuple, 169 enum ct_status ct_status, 170 __be32 *gateway_ip) 171 { 172 #if defined(IS_BPF_LXC) 173 /* If the packet is a reply or is related, it means that outside 174 * has initiated the connection, and so we should skip egress 175 * gateway, since an egress policy is only matching connections 176 * originating from a pod. 177 */ 178 if (ct_status == CT_REPLY || ct_status == CT_RELATED) 179 return CTX_ACT_OK; 180 #else 181 /* We lookup CT in forward direction at to-netdev and expect to 182 * get CT_ESTABLISHED for outbound connection as 183 * from_container should have already created a CT entry. 184 * If we get CT_NEW here, it's an indication that it's a reply 185 * for inbound connection or host-level outbound connection. 186 * We don't expect to receive any other ct_status here. 187 */ 188 if (ct_status != CT_ESTABLISHED) 189 return CTX_ACT_OK; 190 #endif 191 192 return egress_gw_request_needs_redirect(rtuple, gateway_ip); 193 } 194 195 static __always_inline 196 bool egress_gw_snat_needed_hook(__be32 saddr, __be32 daddr, __be32 *snat_addr) 197 { 198 struct remote_endpoint_info *remote_ep; 199 200 remote_ep = lookup_ip4_remote_endpoint(daddr, 0); 201 /* If the packet is destined to an entity inside the cluster, either EP 202 * or node, skip SNAT since only traffic leaving the cluster is supposed 203 * to be masqueraded with an egress IP. 204 */ 205 if (remote_ep && 206 identity_is_cluster(remote_ep->sec_identity)) 207 return false; 208 209 return egress_gw_snat_needed(saddr, daddr, snat_addr); 210 } 211 212 static __always_inline 213 bool egress_gw_reply_needs_redirect_hook(struct iphdr *ip4, __u32 *tunnel_endpoint, 214 __u32 *dst_sec_identity) 215 { 216 if (egress_gw_reply_matches_policy(ip4)) { 217 struct remote_endpoint_info *info; 218 219 info = lookup_ip4_remote_endpoint(ip4->daddr, 0); 220 if (!info || info->tunnel_endpoint == 0) 221 return false; 222 223 *tunnel_endpoint = info->tunnel_endpoint; 224 *dst_sec_identity = info->sec_identity; 225 226 return true; 227 } 228 229 return false; 230 } 231 232 static __always_inline 233 int egress_gw_handle_packet(struct __ctx_buff *ctx, 234 struct ipv4_ct_tuple *tuple, 235 enum ct_status ct_status, 236 __u32 src_sec_identity, __u32 dst_sec_identity, 237 const struct trace_ctx *trace) 238 { 239 struct endpoint_info *gateway_node_ep; 240 __be32 gateway_ip = 0; 241 int ret; 242 243 /* If the packet is destined to an entity inside the cluster, 244 * either EP or node, it should not be forwarded to an egress 245 * gateway since only traffic leaving the cluster is supposed to 246 * be masqueraded with an egress IP. 247 */ 248 if (identity_is_cluster(dst_sec_identity)) 249 return CTX_ACT_OK; 250 251 ret = egress_gw_request_needs_redirect_hook(tuple, ct_status, &gateway_ip); 252 if (IS_ERR(ret)) 253 return ret; 254 255 if (ret == CTX_ACT_OK) 256 return ret; 257 258 /* If the gateway node is the local node, then just let the 259 * packet go through, as it will be SNATed later on by 260 * handle_nat_fwd(). 261 */ 262 gateway_node_ep = __lookup_ip4_endpoint(gateway_ip); 263 if (gateway_node_ep && (gateway_node_ep->flags & ENDPOINT_F_HOST)) 264 return CTX_ACT_OK; 265 266 /* Send the packet to egress gateway node through a tunnel. */ 267 return __encap_and_redirect_with_nodeid(ctx, 0, gateway_ip, 268 src_sec_identity, dst_sec_identity, 269 NOT_VTEP_DST, trace); 270 } 271 272 #endif /* ENABLE_EGRESS_GATEWAY_COMMON */