github.com/cilium/cilium@v1.16.2/bpf/lib/icmp6.h (about) 1 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 /* Copyright Authors of Cilium */ 3 4 #if !defined(__LIB_ICMP6__) && defined(ENABLE_IPV6) 5 #define __LIB_ICMP6__ 6 7 #include <linux/icmpv6.h> 8 #include <linux/in.h> 9 #include "common.h" 10 #include "eth.h" 11 #include "drop.h" 12 #include "eps.h" 13 14 #define ICMP6_TYPE_OFFSET offsetof(struct icmp6hdr, icmp6_type) 15 #define ICMP6_CSUM_OFFSET (sizeof(struct ipv6hdr) + offsetof(struct icmp6hdr, icmp6_cksum)) 16 #define ICMP6_ND_TARGET_OFFSET (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr)) 17 #define ICMP6_ND_OPTS (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) + sizeof(struct in6_addr)) 18 19 #define ICMP6_UNREACH_MSG_TYPE 1 20 #define ICMP6_PARAM_ERR_MSG_TYPE 4 21 #define ICMP6_ECHO_REQUEST_MSG_TYPE 128 22 #define ICMP6_ECHO_REPLY_MSG_TYPE 129 23 #define ICMP6_MULT_LIST_QUERY_TYPE 130 24 #define ICMP6_NS_MSG_TYPE 135 25 #define ICMP6_NA_MSG_TYPE 136 26 #define ICMP6_RR_MSG_TYPE 138 27 #define ICMP6_INV_NS_MSG_TYPE 141 28 #define ICMP6_MULT_LIST_REPORT_V2_TYPE 143 29 #define ICMP6_SEND_NS_MSG_TYPE 148 30 #define ICMP6_SEND_NA_MSG_TYPE 149 31 #define ICMP6_MULT_RA_MSG_TYPE 151 32 #define ICMP6_MULT_RT_MSG_TYPE 153 33 34 #define SKIP_HOST_FIREWALL -2 35 36 /* If no specific action is specified, drop unknown neighbour solicitation 37 * messages. 38 */ 39 #ifndef ACTION_UNKNOWN_ICMP6_NS 40 #define ACTION_UNKNOWN_ICMP6_NS DROP_UNKNOWN_TARGET 41 #endif 42 43 static __always_inline int icmp6_load_type(struct __ctx_buff *ctx, int l4_off, __u8 *type) 44 { 45 return ctx_load_bytes(ctx, l4_off + ICMP6_TYPE_OFFSET, type, sizeof(*type)); 46 } 47 48 static __always_inline int icmp6_send_reply(struct __ctx_buff *ctx, int nh_off) 49 { 50 union macaddr smac, dmac = THIS_INTERFACE_MAC; 51 const int csum_off = nh_off + ICMP6_CSUM_OFFSET; 52 union v6addr sip, dip, router_ip; 53 __be32 sum; 54 55 if (ipv6_load_saddr(ctx, nh_off, &sip) < 0 || 56 ipv6_load_daddr(ctx, nh_off, &dip) < 0) 57 return DROP_INVALID; 58 59 BPF_V6(router_ip, ROUTER_IP); 60 /* ctx->saddr = ctx->daddr */ 61 if (ipv6_store_saddr(ctx, router_ip.addr, nh_off) < 0) 62 return DROP_WRITE_ERROR; 63 /* ctx->daddr = ctx->saddr */ 64 if (ipv6_store_daddr(ctx, sip.addr, nh_off) < 0) 65 return DROP_WRITE_ERROR; 66 67 /* fixup checksums */ 68 sum = csum_diff(sip.addr, 16, router_ip.addr, 16, 0); 69 if (l4_csum_replace(ctx, csum_off, 0, sum, BPF_F_PSEUDO_HDR) < 0) 70 return DROP_CSUM_L4; 71 72 sum = csum_diff(dip.addr, 16, sip.addr, 16, 0); 73 if (l4_csum_replace(ctx, csum_off, 0, sum, BPF_F_PSEUDO_HDR) < 0) 74 return DROP_CSUM_L4; 75 76 /* dmac = smac, smac = dmac */ 77 if (eth_load_saddr(ctx, smac.addr, 0) < 0) 78 return DROP_INVALID; 79 80 if (eth_store_daddr(ctx, smac.addr, 0) < 0 || 81 eth_store_saddr(ctx, dmac.addr, 0) < 0) 82 return DROP_WRITE_ERROR; 83 84 cilium_dbg_capture(ctx, DBG_CAPTURE_DELIVERY, ctx_get_ifindex(ctx)); 85 86 return redirect_self(ctx); 87 } 88 89 /* 90 * send_icmp6_ndisc_adv 91 * @ctx: socket buffer 92 * @nh_off: offset to the IPv6 header 93 * @mac: device mac address 94 * @to_router: ndisc is sent to router, otherwise ndisc is sent to an endpoint. 95 * 96 * Send an ICMPv6 nadv reply in return to an ICMPv6 ndisc. 97 */ 98 static __always_inline int 99 send_icmp6_ndisc_adv(struct __ctx_buff *ctx, int nh_off, 100 const union macaddr *mac, bool to_router) 101 { 102 struct icmp6hdr icmp6hdr __align_stack_8 = {}, icmp6hdr_old __align_stack_8; 103 __u8 opts[8], opts_old[8]; 104 const int csum_off = nh_off + ICMP6_CSUM_OFFSET; 105 __be32 sum; 106 107 if (ctx_load_bytes(ctx, nh_off + sizeof(struct ipv6hdr), &icmp6hdr_old, 108 sizeof(icmp6hdr_old)) < 0) 109 return DROP_INVALID; 110 111 /* fill icmp6hdr */ 112 icmp6hdr.icmp6_type = 136; 113 icmp6hdr.icmp6_code = 0; 114 icmp6hdr.icmp6_cksum = icmp6hdr_old.icmp6_cksum; 115 icmp6hdr.icmp6_dataun.un_data32[0] = 0; 116 117 if (to_router) { 118 icmp6hdr.icmp6_router = 1; 119 icmp6hdr.icmp6_solicited = 1; 120 icmp6hdr.icmp6_override = 0; 121 } else { 122 icmp6hdr.icmp6_router = 0; 123 icmp6hdr.icmp6_solicited = 1; 124 icmp6hdr.icmp6_override = 1; 125 } 126 127 if (ctx_store_bytes(ctx, nh_off + sizeof(struct ipv6hdr), &icmp6hdr, 128 sizeof(icmp6hdr), 0) < 0) 129 return DROP_WRITE_ERROR; 130 131 /* fixup checksums */ 132 sum = csum_diff(&icmp6hdr_old, sizeof(icmp6hdr_old), 133 &icmp6hdr, sizeof(icmp6hdr), 0); 134 if (l4_csum_replace(ctx, csum_off, 0, sum, BPF_F_PSEUDO_HDR) < 0) 135 return DROP_CSUM_L4; 136 137 /* get old options */ 138 if (ctx_load_bytes(ctx, nh_off + ICMP6_ND_OPTS, opts_old, sizeof(opts_old)) < 0) 139 return DROP_INVALID; 140 141 opts[0] = 2; 142 opts[1] = 1; 143 opts[2] = mac->addr[0]; 144 opts[3] = mac->addr[1]; 145 opts[4] = mac->addr[2]; 146 opts[5] = mac->addr[3]; 147 opts[6] = mac->addr[4]; 148 opts[7] = mac->addr[5]; 149 150 /* store ND_OPT_TARGET_LL_ADDR option */ 151 if (ctx_store_bytes(ctx, nh_off + ICMP6_ND_OPTS, opts, sizeof(opts), 0) < 0) 152 return DROP_WRITE_ERROR; 153 154 /* fixup checksum */ 155 sum = csum_diff(opts_old, sizeof(opts_old), opts, sizeof(opts), 0); 156 if (l4_csum_replace(ctx, csum_off, 0, sum, BPF_F_PSEUDO_HDR) < 0) 157 return DROP_CSUM_L4; 158 159 return icmp6_send_reply(ctx, nh_off); 160 } 161 162 static __always_inline __be32 compute_icmp6_csum(char data[80], __u16 payload_len, 163 struct ipv6hdr *ipv6hdr) 164 { 165 __be32 sum; 166 167 /* compute checksum with new payload length */ 168 sum = csum_diff(NULL, 0, data, payload_len, 0); 169 sum = ipv6_pseudohdr_checksum(ipv6hdr, IPPROTO_ICMPV6, payload_len, 170 sum); 171 return sum; 172 } 173 174 static __always_inline int __icmp6_send_time_exceeded(struct __ctx_buff *ctx, 175 int nh_off) 176 { 177 /* FIXME: Fix code below to not require this init */ 178 char data[80] = {}; 179 struct icmp6hdr *icmp6hoplim; 180 struct ipv6hdr *ipv6hdr; 181 char *upper; /* icmp6 or tcp or udp */ 182 const int csum_off = nh_off + ICMP6_CSUM_OFFSET; 183 __be32 sum = 0; 184 __u16 payload_len = 0; /* FIXME: Uninit of this causes verifier bug */ 185 __u8 icmp6_nexthdr = IPPROTO_ICMPV6; 186 int trimlen; 187 188 /* initialize pointers to offsets in data */ 189 icmp6hoplim = (struct icmp6hdr *)data; 190 ipv6hdr = (struct ipv6hdr *)(data + 8); 191 upper = (data + 48); 192 193 /* fill icmp6hdr */ 194 icmp6hoplim->icmp6_type = 3; 195 icmp6hoplim->icmp6_code = 0; 196 icmp6hoplim->icmp6_cksum = 0; 197 icmp6hoplim->icmp6_dataun.un_data32[0] = 0; 198 199 cilium_dbg(ctx, DBG_ICMP6_TIME_EXCEEDED, 0, 0); 200 201 /* read original v6 hdr into offset 8 */ 202 if (ctx_load_bytes(ctx, nh_off, ipv6hdr, sizeof(*ipv6hdr)) < 0) 203 return DROP_INVALID; 204 205 if (ipv6_store_nexthdr(ctx, &icmp6_nexthdr, nh_off) < 0) 206 return DROP_WRITE_ERROR; 207 208 /* read original v6 payload into offset 48 */ 209 switch (ipv6hdr->nexthdr) { 210 case IPPROTO_ICMPV6: 211 #ifdef ENABLE_SCTP 212 case IPPROTO_SCTP: 213 #endif /* ENABLE_SCTP */ 214 case IPPROTO_UDP: 215 if (ctx_load_bytes(ctx, nh_off + sizeof(struct ipv6hdr), 216 upper, 8) < 0) 217 return DROP_INVALID; 218 sum = compute_icmp6_csum(data, 56, ipv6hdr); 219 payload_len = bpf_htons(56); 220 trimlen = 56 - bpf_ntohs(ipv6hdr->payload_len); 221 if (ctx_change_tail(ctx, ctx_full_len(ctx) + trimlen, 0) < 0) 222 return DROP_WRITE_ERROR; 223 /* trim or expand buffer and copy data buffer after ipv6 header */ 224 if (ctx_store_bytes(ctx, nh_off + sizeof(struct ipv6hdr), 225 data, 56, 0) < 0) 226 return DROP_WRITE_ERROR; 227 if (ipv6_store_paylen(ctx, nh_off, &payload_len) < 0) 228 return DROP_WRITE_ERROR; 229 230 break; 231 /* copy header without options */ 232 case IPPROTO_TCP: 233 if (ctx_load_bytes(ctx, nh_off + sizeof(struct ipv6hdr), 234 upper, 20) < 0) 235 return DROP_INVALID; 236 sum = compute_icmp6_csum(data, 68, ipv6hdr); 237 payload_len = bpf_htons(68); 238 /* trim or expand buffer and copy data buffer after ipv6 header */ 239 trimlen = 68 - bpf_ntohs(ipv6hdr->payload_len); 240 if (ctx_change_tail(ctx, ctx_full_len(ctx) + trimlen, 0) < 0) 241 return DROP_WRITE_ERROR; 242 if (ctx_store_bytes(ctx, nh_off + sizeof(struct ipv6hdr), 243 data, 68, 0) < 0) 244 return DROP_WRITE_ERROR; 245 if (ipv6_store_paylen(ctx, nh_off, &payload_len) < 0) 246 return DROP_WRITE_ERROR; 247 248 break; 249 default: 250 return DROP_UNKNOWN_L4; 251 } 252 253 if (l4_csum_replace(ctx, csum_off, 0, sum, BPF_F_PSEUDO_HDR) < 0) 254 return DROP_CSUM_L4; 255 256 return icmp6_send_reply(ctx, nh_off); 257 } 258 259 #ifndef SKIP_ICMPV6_HOPLIMIT_HANDLING 260 __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_SEND_ICMP6_TIME_EXCEEDED) 261 int tail_icmp6_send_time_exceeded(struct __ctx_buff *ctx __maybe_unused) 262 { 263 int ret, nh_off = ctx_load_and_clear_meta(ctx, 0); 264 enum metric_dir direction = (enum metric_dir)ctx_load_meta(ctx, 1); 265 266 ret = __icmp6_send_time_exceeded(ctx, nh_off); 267 if (IS_ERR(ret)) 268 return send_drop_notify_error(ctx, UNKNOWN_ID, ret, CTX_ACT_DROP, 269 direction); 270 return ret; 271 } 272 273 /* 274 * icmp6_send_time_exceeded 275 * @ctx: socket buffer 276 * @nh_off: offset to the IPv6 header 277 * @direction: direction of packet (can be ingress or egress) 278 * Send a ICMPv6 time exceeded in response to an IPv6 frame. 279 * 280 * NOTE: This is terminal function and will cause the BPF program to exit 281 */ 282 static __always_inline int icmp6_send_time_exceeded(struct __ctx_buff *ctx, 283 int nh_off, enum metric_dir direction) 284 { 285 ctx_store_meta(ctx, 0, nh_off); 286 ctx_store_meta(ctx, 1, direction); 287 288 return tail_call_internal(ctx, CILIUM_CALL_SEND_ICMP6_TIME_EXCEEDED, NULL); 289 } 290 #endif 291 292 static __always_inline int __icmp6_handle_ns(struct __ctx_buff *ctx, int nh_off) 293 { 294 union v6addr target, router; 295 struct endpoint_info *ep; 296 union macaddr router_mac = THIS_INTERFACE_MAC; 297 298 if (ctx_load_bytes(ctx, nh_off + ICMP6_ND_TARGET_OFFSET, target.addr, 299 sizeof(((struct ipv6hdr *)NULL)->saddr)) < 0) 300 return DROP_INVALID; 301 302 cilium_dbg(ctx, DBG_ICMP6_NS, target.p3, target.p4); 303 304 BPF_V6(router, ROUTER_IP); 305 306 if (ipv6_addr_equals(&target, &router)) { 307 308 return send_icmp6_ndisc_adv(ctx, nh_off, &router_mac, true); 309 } 310 311 ep = __lookup_ip6_endpoint(&target); 312 if (ep) { 313 if (ep->flags & ENDPOINT_F_HOST) { 314 /* Target must be a node_ip, because of ENDPOINT_F_HOST flag 315 * and target != router_ip. 316 * 317 * We pass these packets to stack to make sure: 318 * 319 * 1. The response NA has node IP as source address instead of 320 * router IP, to address https://github.com/cilium/cilium/issues/14509. 321 * 322 * 2. Kernel stack can record a neighbor entry for the 323 * source IP, to avoid bpf_fib_lookup failure as mentioned at 324 * https://github.com/cilium/cilium/pull/30837#issuecomment-1960897445. 325 */ 326 return CTX_ACT_OK; 327 } 328 return send_icmp6_ndisc_adv(ctx, nh_off, &router_mac, false); 329 } 330 331 /* Unknown target address, drop */ 332 return ACTION_UNKNOWN_ICMP6_NS; 333 } 334 335 #ifndef SKIP_ICMPV6_NS_HANDLING 336 __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_HANDLE_ICMP6_NS) 337 int tail_icmp6_handle_ns(struct __ctx_buff *ctx) 338 { 339 int ret, nh_off = ctx_load_and_clear_meta(ctx, 0); 340 enum metric_dir direction = (enum metric_dir)ctx_load_meta(ctx, 1); 341 342 ret = __icmp6_handle_ns(ctx, nh_off); 343 if (IS_ERR(ret)) 344 return send_drop_notify_error(ctx, UNKNOWN_ID, ret, CTX_ACT_DROP, direction); 345 return ret; 346 } 347 #endif 348 349 /* 350 * icmp6_handle_ns 351 * @ctx: socket buffer 352 * @nh_off: offset to the IPv6 header 353 * @direction: direction of packet(ingress or egress) 354 * @ext_err: extended error value 355 * 356 * Respond to ICMPv6 Neighbour Solicitation 357 * 358 * NOTE: This is terminal function and will cause the BPF program to exit 359 */ 360 static __always_inline int icmp6_handle_ns(struct __ctx_buff *ctx, int nh_off, 361 enum metric_dir direction, 362 __s8 *ext_err) 363 { 364 ctx_store_meta(ctx, 0, nh_off); 365 ctx_store_meta(ctx, 1, direction); 366 367 return tail_call_internal(ctx, CILIUM_CALL_HANDLE_ICMP6_NS, ext_err); 368 } 369 370 static __always_inline bool 371 is_icmp6_ndp(struct __ctx_buff *ctx, const struct ipv6hdr *ip6, int nh_off) 372 { 373 __u8 type; 374 375 if (icmp6_load_type(ctx, nh_off + sizeof(struct ipv6hdr), &type) < 0) 376 return false; 377 378 return ip6->nexthdr == IPPROTO_ICMPV6 && 379 (type == ICMP6_NS_MSG_TYPE || type == ICMP6_NA_MSG_TYPE); 380 } 381 382 static __always_inline int icmp6_ndp_handle(struct __ctx_buff *ctx, int nh_off, 383 enum metric_dir direction, 384 __s8 *ext_err) 385 { 386 __u8 type; 387 388 if (icmp6_load_type(ctx, nh_off + sizeof(struct ipv6hdr), &type) < 0) 389 return DROP_INVALID; 390 391 cilium_dbg(ctx, DBG_ICMP6_HANDLE, type, 0); 392 if (type == ICMP6_NS_MSG_TYPE) 393 return icmp6_handle_ns(ctx, nh_off, direction, ext_err); 394 395 /* All branching above will have issued a tail call, all 396 * remaining traffic is subject to forwarding to containers. 397 */ 398 return 0; 399 } 400 401 static __always_inline int 402 icmp6_host_handle(struct __ctx_buff *ctx, int l4_off, __s8 *ext_err, bool handle_ns) 403 { 404 __u8 type; 405 406 if (icmp6_load_type(ctx, l4_off, &type) < 0) 407 return DROP_INVALID; 408 409 if (type == ICMP6_NS_MSG_TYPE && handle_ns) 410 return icmp6_handle_ns(ctx, ETH_HLEN, METRIC_INGRESS, ext_err); 411 412 #ifdef ENABLE_HOST_FIREWALL 413 /* When the host firewall is enabled, we drop and allow ICMPv6 messages 414 * according to RFC4890, except for echo request and reply messages which 415 * are handled by host policies and can be dropped. 416 * | ICMPv6 Message | Action | Type | 417 * |---------------------------------|-----------------|------| 418 * | ICMPv6-unreach | CTX_ACT_OK | 1 | 419 * | ICMPv6-too-big | CTX_ACT_OK | 2 | 420 * | ICMPv6-timed | CTX_ACT_OK | 3 | 421 * | ICMPv6-parameter | CTX_ACT_OK | 4 | 422 * | ICMPv6-err-private-exp-100 | CTX_ACT_DROP | 100 | 423 * | ICMPv6-err-private-exp-101 | CTX_ACT_DROP | 101 | 424 * | ICMPv6-err-expansion | CTX_ACT_DROP | 127 | 425 * | ICMPv6-echo-message | Firewall | 128 | 426 * | ICMPv6-echo-reply | Firewall | 129 | 427 * | ICMPv6-mult-list-query | CTX_ACT_OK | 130 | 428 * | ICMPv6-mult-list-report | CTX_ACT_OK | 131 | 429 * | ICMPv6-mult-list-done | CTX_ACT_OK | 132 | 430 * | ICMPv6-router-solici | CTX_ACT_OK | 133 | 431 * | ICMPv6-router-advert | CTX_ACT_OK | 134 | 432 * | ICMPv6-neighbor-solicit | icmp6_handle_ns | 135 | 433 * | ICMPv6-neighbor-advert | CTX_ACT_OK | 136 | 434 * | ICMPv6-redirect-message | CTX_ACT_DROP | 137 | 435 * | ICMPv6-router-renumber | CTX_ACT_OK | 138 | 436 * | ICMPv6-node-info-query | CTX_ACT_DROP | 139 | 437 * | ICMPv6-node-info-response | CTX_ACT_DROP | 140 | 438 * | ICMPv6-inv-neighbor-solicit | CTX_ACT_OK | 141 | 439 * | ICMPv6-inv-neighbor-advert | CTX_ACT_OK | 142 | 440 * | ICMPv6-mult-list-report-v2 | CTX_ACT_OK | 143 | 441 * | ICMPv6-home-agent-disco-request | CTX_ACT_DROP | 144 | 442 * | ICMPv6-home-agent-disco-reply | CTX_ACT_DROP | 145 | 443 * | ICMPv6-mobile-solicit | CTX_ACT_DROP | 146 | 444 * | ICMPv6-mobile-advert | CTX_ACT_DROP | 147 | 445 * | ICMPv6-send-solicit | CTX_ACT_OK | 148 | 446 * | ICMPv6-send-advert | CTX_ACT_OK | 149 | 447 * | ICMPv6-mobile-exp | CTX_ACT_DROP | 150 | 448 * | ICMPv6-mult-router-advert | CTX_ACT_OK | 151 | 449 * | ICMPv6-mult-router-solicit | CTX_ACT_OK | 152 | 450 * | ICMPv6-mult-router-term | CTX_ACT_OK | 153 | 451 * | ICMPv6-FMIPv6 | CTX_ACT_DROP | 154 | 452 * | ICMPv6-rpl-control | CTX_ACT_DROP | 155 | 453 * | ICMPv6-info-private-exp-200 | CTX_ACT_DROP | 200 | 454 * | ICMPv6-info-private-exp-201 | CTX_ACT_DROP | 201 | 455 * | ICMPv6-info-expansion | CTX_ACT_DROP | 255 | 456 * | ICMPv6-unallocated | CTX_ACT_DROP | | 457 * | ICMPv6-unassigned | CTX_ACT_DROP | | 458 */ 459 460 if (type == ICMP6_NS_MSG_TYPE) 461 return CTX_ACT_OK; 462 463 if (type == ICMP6_ECHO_REQUEST_MSG_TYPE || type == ICMP6_ECHO_REPLY_MSG_TYPE) 464 /* Decision is deferred to the host policies. */ 465 return CTX_ACT_OK; 466 467 if ((ICMP6_UNREACH_MSG_TYPE <= type && type <= ICMP6_PARAM_ERR_MSG_TYPE) || 468 (ICMP6_MULT_LIST_QUERY_TYPE <= type && type <= ICMP6_NA_MSG_TYPE) || 469 (ICMP6_INV_NS_MSG_TYPE <= type && type <= ICMP6_MULT_LIST_REPORT_V2_TYPE) || 470 (ICMP6_SEND_NS_MSG_TYPE <= type && type <= ICMP6_SEND_NA_MSG_TYPE) || 471 (ICMP6_MULT_RA_MSG_TYPE <= type && type <= ICMP6_MULT_RT_MSG_TYPE)) 472 return SKIP_HOST_FIREWALL; 473 return DROP_FORBIDDEN_ICMP6; 474 #else 475 return CTX_ACT_OK; 476 #endif /* ENABLE_HOST_FIREWALL */ 477 } 478 479 #endif