github.com/cilium/cilium@v1.16.2/bpf/lib/mcast.h (about) 1 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 /* Copyright Authors of Cilium */ 3 4 #pragma once 5 6 #include <bpf/api.h> 7 #include <linux/ip.h> 8 #include <linux/igmp.h> 9 10 #include "bpf/ctx/skb.h" 11 #include "bpf/helpers.h" 12 #include "bpf/helpers_skb.h" 13 #include "lib/common.h" 14 #include "lib/drop.h" 15 #include "lib/eth.h" 16 #include "linux/bpf.h" 17 #include "lib/encap.h" 18 19 /* the below structures are define outside of an IFDEF guard to satisfy 20 * enterprise_bpf_alignchecker.c requirement 21 */ 22 23 /* mcast_subscriber flags */ 24 enum { 25 /* indicates subscriber is remote and ifindex is the exit interface */ 26 MCAST_SUB_F_REMOTE = (1U << 0) 27 }; 28 29 /* 32bit big endian multicast group address for use with ipv4 protocol */ 30 typedef __be32 mcast_group_v4; 31 32 /* structure to describe a local or remote subscriber of a multicast group 33 * for the ipv4 protocol. 34 */ 35 struct mcast_subscriber_v4 { 36 /* source address of the subscriber, big endian */ 37 __be32 saddr; 38 /* local ifindex of subscriber of exit interface is remote subscriber */ 39 __u32 ifindex; 40 /* reserved */ 41 __u16 pad1; 42 /* reserved */ 43 __u8 pad2; 44 /* flags for further subscriber description */ 45 __u8 flags; 46 }; 47 48 #ifdef ENABLE_MULTICAST 49 50 #define MCAST_MAX_GROUP 1024 51 #define MCAST_MAX_SUBSCRIBERS 1024 52 /* used to bound iteration of group records within an igmpv3 membership report */ 53 #define MCAST_MAX_GREC 24 54 55 /* Multicast group map is a nested hash of maps. 56 * The outer map is keyed by a 'mcast_group_v4' multicast group address. 57 * The inner value is an hash map of 'mcast_subscriber_v4' structures keyed 58 * by a their IPv4 source address in big endian format. 59 */ 60 struct { 61 __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); 62 __type(key, mcast_group_v4); 63 __type(value, __u32); 64 __uint(pinning, LIBBPF_PIN_BY_NAME); 65 __uint(max_entries, MCAST_MAX_GROUP); 66 __uint(map_flags, CONDITIONAL_PREALLOC); 67 /* Multicast group subscribers inner map definition */ 68 __array(values, struct { 69 __uint(type, BPF_MAP_TYPE_HASH); 70 __uint(key_size, sizeof(__be32)); 71 __uint(value_size, sizeof(struct mcast_subscriber_v4)); 72 __uint(max_entries, MCAST_MAX_SUBSCRIBERS); 73 __uint(map_flags, CONDITIONAL_PREALLOC); 74 }); 75 } cilium_mcast_group_outer_v4_map __section_maps_btf; 76 77 /* lookup a subscriber map for the given ipv4 multicast group 78 * returns a void pointer to a inner subscriper map if one exists 79 */ 80 static __always_inline void *mcast_lookup_subscriber_map(__be32 *group) 81 { 82 return map_lookup_elem(&cilium_mcast_group_outer_v4_map, group); 83 } 84 85 /* returns 1 if ip4 header is followed by an IGMP payload, 0 if not */ 86 static __always_inline bool mcast_ipv4_is_igmp(const struct iphdr *ip4) 87 { 88 if (ip4->protocol == IPPROTO_IGMP) 89 return 1; 90 return 0; 91 } 92 93 /* returns the IGMP type for a given IGMP message 94 * a call to 'mcast_ipv4_is_igmp' must be used prior to this call to ensure an 95 * igmp message follows the ipv4 header 96 */ 97 static __always_inline __s32 mcast_ipv4_igmp_type(const struct iphdr *ip4, 98 const void *data, 99 const void *data_end) 100 { 101 const struct igmphdr *hdr; 102 int ip_len = ip4->ihl * 4; 103 104 if (data + ETH_HLEN + ip_len + sizeof(struct igmphdr) > data_end) 105 return DROP_INVALID; 106 107 hdr = data + ETH_HLEN + ip_len; 108 return hdr->type; 109 } 110 111 /* add a subscriber to a subscriber map */ 112 /* returns 1 on success or DROP_INVALID for error */ 113 static __always_inline __s32 mcast_ipv4_add_subscriber(void *map, 114 struct mcast_subscriber_v4 *sub) 115 { 116 if ((map_update_elem(map, &sub->saddr, sub, BPF_ANY) != 0)) 117 return DROP_INVALID; 118 return 1; 119 } 120 121 /* remove a subscriber to a subscriber map */ 122 /* always returns 1 */ 123 static __always_inline void mcast_ipv4_remove_subscriber(void *map, 124 struct mcast_subscriber_v4 *sub) 125 { 126 map_delete_elem(map, &sub->saddr); 127 } 128 129 static __always_inline __s32 mcast_ipv4_handle_v3_membership_report(void *ctx, 130 void *group_map, 131 const struct iphdr *ip4, 132 const void *data, 133 const void *data_end) 134 { 135 struct mcast_subscriber_v4 subscriber = { 136 .saddr = ip4->saddr, 137 .ifindex = ctx_get_ingress_ifindex(ctx) 138 }; 139 const struct igmpv3_report *rep; 140 const struct igmpv3_grec *rec; 141 int ip_len = ip4->ihl * 4; 142 __s32 subscribed = 0; 143 void *sub_map = 0; 144 __u16 ngrec = 0; 145 __u32 i = 0; 146 147 if (data + ETH_HLEN + ip_len + sizeof(struct igmpv3_report) > data_end) 148 return DROP_INVALID; 149 150 rep = data + ETH_HLEN + ip_len; 151 152 ngrec = bpf_ntohs(rep->ngrec); 153 154 if (ngrec > MCAST_MAX_GREC) 155 return DROP_INVALID; 156 157 /* start a bounded loop which exits when we hit the total number of 158 * group records in the membership report. 159 * 160 * add our subscriber into each group advertised in the report. 161 */ 162 #pragma unroll 163 for (i = 0; i < MCAST_MAX_GREC; i++) { 164 /* Wrap this in an if, instead of breaking out of the loop, 165 * so unroll has a constant number of iterations. 166 * 167 * Compiler was not happy with a continue; statement and the 168 * wrap is necessary. 169 * 170 * remove this when Cilium's min supported kernel version is 171 * >= 5.3 with support for bounded loops. 172 */ 173 if (i < ngrec) { 174 rec = &rep->grec[i]; 175 176 /* verifier seems to only be happy with a packet bounds check 177 * per iteration 178 */ 179 if ((void *)rec + sizeof(struct igmpv3_grec) > data_end) 180 return DROP_INVALID; 181 182 /* lookup user configured multicast group */ 183 sub_map = map_lookup_elem(group_map, &rec->grec_mca); 184 if (!sub_map) 185 continue; 186 187 /* note: 188 * the datapath currently assumes that no source addresses are 189 * present in the exclude message, indicating a join from all 190 * sources message 191 */ 192 if (rec->grec_type == IGMPV3_CHANGE_TO_EXCLUDE) { 193 subscribed = mcast_ipv4_add_subscriber(sub_map, &subscriber); 194 if (subscribed != 1) 195 return DROP_INVALID; 196 continue; 197 } 198 199 /* note: 200 * the datapath currently assumes that no source addresses are 201 * present in the include message, indicating a leave from all 202 * sources message 203 */ 204 if (rec->grec_type == IGMPV3_CHANGE_TO_INCLUDE) 205 mcast_ipv4_remove_subscriber(sub_map, &subscriber); 206 } 207 } 208 if (subscribed) 209 return DROP_IGMP_SUBSCRIBED; 210 211 return DROP_IGMP_HANDLED; 212 } 213 214 static __always_inline __s32 mcast_ipv4_handle_v2_membership_report(void *ctx, 215 void *group_map, 216 const struct iphdr *ip4, 217 const void *data, 218 const void *data_end) 219 { 220 struct mcast_subscriber_v4 subscriber = { 221 .saddr = ip4->saddr, 222 .ifindex = ctx_get_ingress_ifindex(ctx) 223 }; 224 int ip_len = ip4->ihl * 4; 225 const struct igmphdr *hdr; 226 void *sub_map = 0; 227 228 if (data + ETH_HLEN + ip_len + sizeof(struct igmphdr) > data_end) 229 return DROP_INVALID; 230 231 hdr = data + ETH_HLEN + ip_len; 232 233 if (hdr->type != IGMPV2_HOST_MEMBERSHIP_REPORT) 234 return DROP_INVALID; 235 236 /* lookup user configured multicast group */ 237 sub_map = map_lookup_elem(group_map, &hdr->group); 238 if (!sub_map) 239 return DROP_IGMP_HANDLED; 240 241 if (mcast_ipv4_add_subscriber(sub_map, &subscriber)) 242 return DROP_IGMP_SUBSCRIBED; 243 244 return DROP_IGMP_HANDLED; 245 } 246 247 static __always_inline __s32 mcast_ipv4_handle_igmp_leave(void *group_map, 248 const struct iphdr *ip4, 249 const void *data, 250 const void *data_end) 251 { 252 struct mcast_subscriber_v4 subscriber = { 253 .saddr = ip4->saddr, 254 }; 255 int ip_len = ip4->ihl * 4; 256 const struct igmphdr *hdr; 257 void *sub_map = 0; 258 259 if (data + ETH_HLEN + ip_len + sizeof(struct igmphdr) > data_end) 260 return DROP_INVALID; 261 262 hdr = data + ETH_HLEN + ip_len; 263 264 if (hdr->type != IGMP_HOST_LEAVE_MESSAGE) 265 return DROP_INVALID; 266 267 /* lookup user configured multicast group */ 268 sub_map = map_lookup_elem(group_map, &hdr->group); 269 if (!sub_map) 270 return DROP_IGMP_HANDLED; 271 272 mcast_ipv4_remove_subscriber(sub_map, &subscriber); 273 274 return DROP_IGMP_HANDLED; 275 } 276 277 /* ipv4 igmp handler which dispatches to specific igmp message handlers */ 278 static __always_inline __s32 mcast_ipv4_handle_igmp(void *ctx, 279 struct iphdr *ip4, 280 void *data, 281 void *data_end) 282 { 283 __s32 igmp_type = mcast_ipv4_igmp_type(ip4, data, data_end); 284 285 if (igmp_type < 0) 286 return igmp_type; 287 288 switch (igmp_type) { 289 case IGMPV3_HOST_MEMBERSHIP_REPORT: 290 return mcast_ipv4_handle_v3_membership_report(ctx, 291 &cilium_mcast_group_outer_v4_map, 292 ip4, 293 data, 294 data_end); 295 case IGMPV2_HOST_MEMBERSHIP_REPORT: 296 return mcast_ipv4_handle_v2_membership_report(ctx, 297 &cilium_mcast_group_outer_v4_map, 298 ip4, 299 data, 300 data_end); 301 case IGMP_HOST_LEAVE_MESSAGE: 302 return mcast_ipv4_handle_igmp_leave(&cilium_mcast_group_outer_v4_map, 303 ip4, 304 data, 305 data_end); 306 } 307 308 return DROP_IGMP_HANDLED; 309 } 310 311 /* encodes a multicast mac address given a ipv4 group address 312 * results are in big endian format and written directly into 'mac' 313 */ 314 static __always_inline void mcast_encode_ipv4_mac(union macaddr *mac, 315 const __u8 group[4]) 316 { 317 mac->addr[0] = 0x01; 318 mac->addr[1] = 0x00; 319 mac->addr[2] = 0x0E; 320 mac->addr[3] = group[1] & 0x7F; 321 mac->addr[4] = group[2]; 322 mac->addr[5] = group[3]; 323 } 324 325 /* callback data used for __mcast_ep_delivery */ 326 struct _mcast_ep_delivery_ctx { 327 void *ctx; 328 __s32 ret; 329 }; 330 331 /* performs packet replication and delivery for multicast traffic egressing 332 * an endpoint. 333 * 334 * to be used as a callback function for bpf_for_each_map_elem 335 * 336 * callback functions must return 1 or 0 to pass eBPF verifier. 337 */ 338 static long __mcast_ep_delivery(__maybe_unused void *sub_map, 339 __maybe_unused const __u32 *key, 340 const struct mcast_subscriber_v4 *sub, 341 struct _mcast_ep_delivery_ctx *cb_ctx) 342 { 343 int ret = 0; 344 __u32 tunnel_id = WORLD_ID; 345 __u8 from_overlay = 0; 346 struct bpf_tunnel_key tun_key = {0}; 347 348 if (!cb_ctx || !sub) 349 return 1; 350 351 if (!cb_ctx->ctx) 352 return 1; 353 354 if (!sub->ifindex) 355 return 1; 356 357 from_overlay = (ctx_get_ingress_ifindex(cb_ctx->ctx) == ENCAP_IFINDEX); 358 359 /* set tunnel key for remote delivery 360 * this helper sets the tunnel metadata on the skb_buff but only 361 * tunnel drivers will read it, therefore any local delivery will 362 * simply ignore if its present and deliver without an issue. 363 * 364 * if the ingress interface is set to our tunnel interface, do not 365 * perform delivery, this would cause a loop, since the sender's node 366 * already delivered to all remote nodes. 367 * 368 * checking ctx->ingress_ifindex is reliable since 369 * __netif_receive_skb_core sets the skb's input interface before 370 * calling ingress TC programs. 371 */ 372 if (sub->flags & MCAST_SUB_F_REMOTE) { 373 if (from_overlay) 374 return 0; 375 376 #ifdef ENABLE_ENCRYPTED_OVERLAY 377 /* if encrypted overlay is enabled we'll mark the packet for 378 * encryption via the tunnel ID. 379 */ 380 tunnel_id = ENCRYPTED_OVERLAY_ID; 381 #endif /* ENABLE_ENCRYPTED_OVERLAY */ 382 tun_key.tunnel_id = tunnel_id; 383 tun_key.remote_ipv4 = bpf_ntohl(sub->saddr); 384 tun_key.tunnel_ttl = IPDEFTTL; 385 386 ret = ctx_set_tunnel_key(cb_ctx->ctx, 387 &tun_key, 388 TUNNEL_KEY_WITHOUT_SRC_IP, 389 BPF_F_ZERO_CSUM_TX); 390 391 if (ret < 0) { 392 cb_ctx->ret = ret; 393 return 1; 394 } 395 } 396 397 ret = clone_redirect(cb_ctx->ctx, sub->ifindex, 0); 398 if (ret != 0) { 399 cb_ctx->ret = ret; 400 return 1; 401 } 402 return 0; 403 }; 404 405 /* tailcall to perform multicast packet replication and delivery. 406 * when this call is entered we should already know that the packet is destined 407 * for a multicast group and the multicast group exists in 408 * cilium_mcast_group_outer_v4_map 409 */ 410 __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_MULTICAST_EP_DELIVERY) 411 int tail_mcast_ep_delivery(struct __ctx_buff *ctx) 412 { 413 struct _mcast_ep_delivery_ctx cb_ctx = { 414 .ctx = ctx, 415 .ret = 0 416 }; 417 union macaddr mac = {0}; 418 void *data, *data_end; 419 struct iphdr *ip4 = 0; 420 void *sub_map = 0; 421 422 if (!revalidate_data(ctx, &data, &data_end, &ip4)) 423 return DROP_INVALID; 424 425 sub_map = map_lookup_elem(&cilium_mcast_group_outer_v4_map, &ip4->daddr); 426 if (!sub_map) 427 return DROP_INVALID; 428 429 mcast_encode_ipv4_mac(&mac, (__u8 *)&ip4->daddr); 430 431 eth_store_daddr(ctx, &mac.addr[0], 0); 432 433 for_each_map_elem(sub_map, __mcast_ep_delivery, &cb_ctx, 0); 434 435 return send_drop_notify(ctx, 436 UNKNOWN_ID, 437 UNKNOWN_ID, 438 TRACE_EP_ID_UNKNOWN, 439 DROP_MULTICAST_HANDLED, 440 CTX_ACT_DROP, 441 METRIC_INGRESS); 442 } 443 444 #endif /* ENABLE_MULTICAST */