github.com/cilium/cilium@v1.16.2/bpf/bpf_xdp.c (about) 1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 /* Copyright Authors of Cilium */ 3 4 #include <bpf/ctx/xdp.h> 5 #include <bpf/api.h> 6 7 #include <node_config.h> 8 #include <netdev_config.h> 9 #include <filter_config.h> 10 11 #define SKIP_POLICY_MAP 1 12 13 /* Controls the inclusion of the CILIUM_CALL_HANDLE_ICMP6_NS section in the 14 * bpf_lxc object file. 15 */ 16 #define SKIP_ICMPV6_NS_HANDLING 17 18 /* Controls the inclusion of the CILIUM_CALL_SEND_ICMP6_TIME_EXCEEDED section 19 * in the bpf_lxc object file. This is needed for all callers of 20 * ipv6_local_delivery, which calls into the IPv6 L3 handling. 21 */ 22 #define SKIP_ICMPV6_HOPLIMIT_HANDLING 23 24 /* Controls the inclusion of the CILIUM_CALL_SRV6 section in the object file. 25 */ 26 #define SKIP_SRV6_HANDLING 27 28 /* The XDP datapath does not take care of health probes from the local node, 29 * thus do not compile it in. 30 */ 31 #undef ENABLE_HEALTH_CHECK 32 33 #include "lib/common.h" 34 #include "lib/maps.h" 35 #include "lib/eps.h" 36 #include "lib/events.h" 37 #include "lib/nodeport.h" 38 39 #ifdef ENABLE_PREFILTER 40 #ifdef CIDR4_FILTER 41 struct { 42 __uint(type, BPF_MAP_TYPE_HASH); 43 __type(key, struct lpm_v4_key); 44 __type(value, struct lpm_val); 45 __uint(pinning, LIBBPF_PIN_BY_NAME); 46 __uint(max_entries, CIDR4_HMAP_ELEMS); 47 __uint(map_flags, BPF_F_NO_PREALLOC); 48 } CIDR4_HMAP_NAME __section_maps_btf; 49 50 #ifdef CIDR4_LPM_PREFILTER 51 struct { 52 __uint(type, BPF_MAP_TYPE_LPM_TRIE); 53 __type(key, struct lpm_v4_key); 54 __type(value, struct lpm_val); 55 __uint(pinning, LIBBPF_PIN_BY_NAME); 56 __uint(max_entries, CIDR4_LMAP_ELEMS); 57 __uint(map_flags, BPF_F_NO_PREALLOC); 58 } CIDR4_LMAP_NAME __section_maps_btf; 59 60 #endif /* CIDR4_LPM_PREFILTER */ 61 #endif /* CIDR4_FILTER */ 62 63 #ifdef CIDR6_FILTER 64 struct { 65 __uint(type, BPF_MAP_TYPE_HASH); 66 __type(key, struct lpm_v6_key); 67 __type(value, struct lpm_val); 68 __uint(pinning, LIBBPF_PIN_BY_NAME); 69 __uint(max_entries, CIDR4_HMAP_ELEMS); 70 __uint(map_flags, BPF_F_NO_PREALLOC); 71 } CIDR6_HMAP_NAME __section_maps_btf; 72 73 #ifdef CIDR6_LPM_PREFILTER 74 struct { 75 __uint(type, BPF_MAP_TYPE_LPM_TRIE); 76 __type(key, struct lpm_v6_key); 77 __type(value, struct lpm_val); 78 __uint(pinning, LIBBPF_PIN_BY_NAME); 79 __uint(max_entries, CIDR4_LMAP_ELEMS); 80 __uint(map_flags, BPF_F_NO_PREALLOC); 81 } CIDR6_LMAP_NAME __section_maps_btf; 82 #endif /* CIDR6_LPM_PREFILTER */ 83 #endif /* CIDR6_FILTER */ 84 #endif /* ENABLE_PREFILTER */ 85 86 static __always_inline __maybe_unused int 87 bpf_xdp_exit(struct __ctx_buff *ctx, const int verdict) 88 { 89 if (verdict == CTX_ACT_OK) 90 ctx_move_xfer(ctx); 91 92 return verdict; 93 } 94 95 #ifdef ENABLE_IPV4 96 #ifdef ENABLE_NODEPORT_ACCELERATION 97 __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV4_FROM_NETDEV) 98 int tail_lb_ipv4(struct __ctx_buff *ctx) 99 { 100 int ret = CTX_ACT_OK; 101 __s8 ext_err = 0; 102 103 if (!ctx_skip_nodeport(ctx)) { 104 int l3_off = ETH_HLEN; 105 void *data, *data_end; 106 struct iphdr *ip4; 107 bool __maybe_unused is_dsr = false; 108 109 if (!revalidate_data(ctx, &data, &data_end, &ip4)) { 110 ret = DROP_INVALID; 111 goto out; 112 } 113 114 #if defined(ENABLE_DSR) && !defined(ENABLE_DSR_HYBRID) && DSR_ENCAP_MODE == DSR_ENCAP_GENEVE 115 { 116 int l4_off, inner_l2_off; 117 struct genevehdr geneve; 118 __sum16 udp_csum; 119 __be16 dport; 120 __u16 proto; 121 122 if (ip4->protocol != IPPROTO_UDP) 123 goto no_encap; 124 125 /* Punt packets with IP options to TC */ 126 if (ipv4_hdrlen(ip4) != sizeof(*ip4)) 127 goto no_encap; 128 129 l4_off = l3_off + sizeof(*ip4); 130 131 if (l4_load_port(ctx, l4_off + UDP_DPORT_OFF, &dport) < 0) { 132 ret = DROP_INVALID; 133 goto out; 134 } 135 136 if (dport != bpf_htons(TUNNEL_PORT)) 137 goto no_encap; 138 139 /* Cilium uses BPF_F_ZERO_CSUM_TX for its tunnel traffic. 140 * 141 * Adding LB support for checksummed packets would require 142 * that we adjust udp->check 143 * 1. after DNAT of the inner packet, 144 * 2. after re-writing the outer headers and inserting 145 * the DSR option 146 */ 147 if (ctx_load_bytes(ctx, l4_off + offsetof(struct udphdr, check), 148 &udp_csum, sizeof(udp_csum)) < 0) { 149 ret = DROP_INVALID; 150 goto out; 151 } 152 153 if (udp_csum != 0) 154 goto no_encap; 155 156 if (ctx_load_bytes(ctx, l4_off + sizeof(struct udphdr), &geneve, 157 sizeof(geneve)) < 0) { 158 ret = DROP_INVALID; 159 goto out; 160 } 161 162 if (geneve.protocol_type != bpf_htons(ETH_P_TEB)) 163 goto no_encap; 164 165 /* Punt packets with GENEVE options to TC */ 166 if (geneve.opt_len) 167 goto no_encap; 168 169 inner_l2_off = l4_off + sizeof(struct udphdr) + sizeof(struct genevehdr); 170 171 /* point at the inner L3 header: */ 172 if (!validate_ethertype_l2_off(ctx, inner_l2_off, &proto)) 173 goto no_encap; 174 175 if (proto != bpf_htons(ETH_P_IP)) 176 goto no_encap; 177 178 l3_off = inner_l2_off + ETH_HLEN; 179 180 if (!revalidate_data_l3_off(ctx, &data, &data_end, &ip4, l3_off)) { 181 ret = DROP_INVALID; 182 goto out; 183 } 184 } 185 no_encap: 186 #endif /* ENABLE_DSR && !ENABLE_DSR_HYBRID && DSR_ENCAP_MODE == DSR_ENCAP_GENEVE */ 187 188 ret = nodeport_lb4(ctx, ip4, l3_off, 0, &ext_err, &is_dsr); 189 if (ret == NAT_46X64_RECIRC) 190 ret = tail_call_internal(ctx, CILIUM_CALL_IPV6_FROM_NETDEV, 191 &ext_err); 192 } 193 194 out: 195 if (IS_ERR(ret)) 196 return send_drop_notify_error_ext(ctx, UNKNOWN_ID, ret, ext_err, 197 CTX_ACT_DROP, METRIC_INGRESS); 198 199 return bpf_xdp_exit(ctx, ret); 200 } 201 202 static __always_inline int check_v4_lb(struct __ctx_buff *ctx) 203 { 204 __s8 ext_err = 0; 205 int ret; 206 207 ret = tail_call_internal(ctx, CILIUM_CALL_IPV4_FROM_NETDEV, &ext_err); 208 return send_drop_notify_error_ext(ctx, UNKNOWN_ID, ret, ext_err, CTX_ACT_DROP, 209 METRIC_INGRESS); 210 } 211 #else 212 static __always_inline int check_v4_lb(struct __ctx_buff *ctx __maybe_unused) 213 { 214 return CTX_ACT_OK; 215 } 216 #endif /* ENABLE_NODEPORT_ACCELERATION */ 217 218 #ifdef ENABLE_PREFILTER 219 static __always_inline int check_v4(struct __ctx_buff *ctx) 220 { 221 void *data_end = ctx_data_end(ctx); 222 void *data = ctx_data(ctx); 223 struct iphdr *ipv4_hdr = data + sizeof(struct ethhdr); 224 struct lpm_v4_key pfx __maybe_unused; 225 226 if (ctx_no_room(ipv4_hdr + 1, data_end)) 227 return CTX_ACT_DROP; 228 229 #ifdef CIDR4_FILTER 230 memcpy(pfx.lpm.data, &ipv4_hdr->saddr, sizeof(pfx.addr)); 231 pfx.lpm.prefixlen = 32; 232 233 #ifdef CIDR4_LPM_PREFILTER 234 if (map_lookup_elem(&CIDR4_LMAP_NAME, &pfx)) 235 return CTX_ACT_DROP; 236 #endif /* CIDR4_LPM_PREFILTER */ 237 return map_lookup_elem(&CIDR4_HMAP_NAME, &pfx) ? 238 CTX_ACT_DROP : check_v4_lb(ctx); 239 #else 240 return check_v4_lb(ctx); 241 #endif /* CIDR4_FILTER */ 242 } 243 #else 244 static __always_inline int check_v4(struct __ctx_buff *ctx) 245 { 246 return check_v4_lb(ctx); 247 } 248 #endif /* ENABLE_PREFILTER */ 249 #endif /* ENABLE_IPV4 */ 250 251 #ifdef ENABLE_IPV6 252 #ifdef ENABLE_NODEPORT_ACCELERATION 253 __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_FROM_NETDEV) 254 int tail_lb_ipv6(struct __ctx_buff *ctx) 255 { 256 int ret = CTX_ACT_OK; 257 __s8 ext_err = 0; 258 259 if (!ctx_skip_nodeport(ctx)) { 260 void *data, *data_end; 261 struct ipv6hdr *ip6; 262 bool is_dsr = false; 263 264 if (!revalidate_data(ctx, &data, &data_end, &ip6)) { 265 ret = DROP_INVALID; 266 goto drop_err; 267 } 268 269 ret = nodeport_lb6(ctx, ip6, 0, &ext_err, &is_dsr); 270 if (IS_ERR(ret)) 271 goto drop_err; 272 } 273 274 return bpf_xdp_exit(ctx, ret); 275 276 drop_err: 277 return send_drop_notify_error_ext(ctx, UNKNOWN_ID, ret, ext_err, 278 CTX_ACT_DROP, METRIC_INGRESS); 279 } 280 281 static __always_inline int check_v6_lb(struct __ctx_buff *ctx) 282 { 283 __s8 ext_err = 0; 284 int ret; 285 286 ret = tail_call_internal(ctx, CILIUM_CALL_IPV6_FROM_NETDEV, &ext_err); 287 return send_drop_notify_error_ext(ctx, UNKNOWN_ID, ret, ext_err, CTX_ACT_DROP, 288 METRIC_INGRESS); 289 } 290 #else 291 static __always_inline int check_v6_lb(struct __ctx_buff *ctx __maybe_unused) 292 { 293 return CTX_ACT_OK; 294 } 295 #endif /* ENABLE_NODEPORT_ACCELERATION */ 296 297 #ifdef ENABLE_PREFILTER 298 static __always_inline int check_v6(struct __ctx_buff *ctx) 299 { 300 void *data_end = ctx_data_end(ctx); 301 void *data = ctx_data(ctx); 302 struct ipv6hdr *ipv6_hdr = data + sizeof(struct ethhdr); 303 struct lpm_v6_key pfx __maybe_unused; 304 305 if (ctx_no_room(ipv6_hdr + 1, data_end)) 306 return CTX_ACT_DROP; 307 308 #ifdef CIDR6_FILTER 309 __bpf_memcpy_builtin(pfx.lpm.data, &ipv6_hdr->saddr, sizeof(pfx.addr)); 310 pfx.lpm.prefixlen = 128; 311 312 #ifdef CIDR6_LPM_PREFILTER 313 if (map_lookup_elem(&CIDR6_LMAP_NAME, &pfx)) 314 return CTX_ACT_DROP; 315 #endif /* CIDR6_LPM_PREFILTER */ 316 return map_lookup_elem(&CIDR6_HMAP_NAME, &pfx) ? 317 CTX_ACT_DROP : check_v6_lb(ctx); 318 #else 319 return check_v6_lb(ctx); 320 #endif /* CIDR6_FILTER */ 321 } 322 #else 323 static __always_inline int check_v6(struct __ctx_buff *ctx) 324 { 325 return check_v6_lb(ctx); 326 } 327 #endif /* ENABLE_PREFILTER */ 328 #endif /* ENABLE_IPV6 */ 329 330 static __always_inline int check_filters(struct __ctx_buff *ctx) 331 { 332 int ret = CTX_ACT_OK; 333 __u16 proto; 334 335 if (!validate_ethertype(ctx, &proto)) 336 return CTX_ACT_OK; 337 338 ctx_store_meta(ctx, XFER_MARKER, 0); 339 ctx_skip_nodeport_clear(ctx); 340 341 switch (proto) { 342 #ifdef ENABLE_IPV4 343 case bpf_htons(ETH_P_IP): 344 ret = check_v4(ctx); 345 break; 346 #endif /* ENABLE_IPV4 */ 347 #ifdef ENABLE_IPV6 348 case bpf_htons(ETH_P_IPV6): 349 ret = check_v6(ctx); 350 break; 351 #endif /* ENABLE_IPV6 */ 352 default: 353 break; 354 } 355 356 return bpf_xdp_exit(ctx, ret); 357 } 358 359 __section_entry 360 int cil_xdp_entry(struct __ctx_buff *ctx) 361 { 362 return check_filters(ctx); 363 } 364 365 BPF_LICENSE("Dual BSD/GPL");