github.com/cilium/cilium@v1.16.2/test/l4lb/test_tc_tunnel.c (about) 1 // SPDX-License-Identifier: GPL-2.0 2 // 3 // Taken from https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/test_tc_tunnel.c?h=v5.12 4 5 /* In-place tunneling */ 6 7 #include <stdbool.h> 8 #include <string.h> 9 10 #include <linux/stddef.h> 11 #include <linux/bpf.h> 12 #include <linux/if_ether.h> 13 #include <linux/in.h> 14 #include <linux/ip.h> 15 #include <linux/ipv6.h> 16 #include <linux/mpls.h> 17 #include <linux/tcp.h> 18 #include <linux/udp.h> 19 #include <linux/pkt_cls.h> 20 #include <linux/types.h> 21 22 #include <bpf/bpf_endian.h> 23 #include <bpf/bpf_helpers.h> 24 25 #define ERROR(ret) do {\ 26 char fmt[] = "ERROR line:%d ret:%d\n";\ 27 bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ 28 } while (0) 29 30 static const int cfg_port = 8000; 31 32 static const int cfg_udp_src = 20000; 33 34 #define UDP_PORT 5555 35 #define MPLS_OVER_UDP_PORT 6635 36 #define ETH_OVER_UDP_PORT 7777 37 38 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */ 39 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | 40 MPLS_LS_S_MASK | 0xff); 41 42 struct gre_hdr { 43 __be16 flags; 44 __be16 protocol; 45 } __attribute__((packed)); 46 47 union l4hdr { 48 struct udphdr udp; 49 struct gre_hdr gre; 50 }; 51 52 struct v4hdr { 53 struct iphdr ip; 54 union l4hdr l4hdr; 55 __u8 pad[16]; /* enough space for L2 header */ 56 } __attribute__((packed)); 57 58 struct v6hdr { 59 struct ipv6hdr ip; 60 union l4hdr l4hdr; 61 __u8 pad[16]; /* enough space for L2 header */ 62 } __attribute__((packed)); 63 64 static __always_inline void set_ipv4_csum(struct iphdr *iph) 65 { 66 __u16 *iph16 = (__u16 *)iph; 67 __u32 csum; 68 int i; 69 70 iph->check = 0; 71 72 #pragma clang loop unroll(full) 73 for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++) 74 csum += *iph16++; 75 76 iph->check = ~((csum & 0xffff) + (csum >> 16)); 77 } 78 79 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, 80 __u16 l2_proto) 81 { 82 __u16 udp_dst = UDP_PORT; 83 struct iphdr iph_inner; 84 struct v4hdr h_outer; 85 struct tcphdr tcph; 86 int olen, l2_len; 87 int tcp_off; 88 __u64 flags; 89 90 /* Most tests encapsulate a packet into a tunnel with the same 91 * network protocol, and derive the outer header fields from 92 * the inner header. 93 * 94 * The 6in4 case tests different inner and outer protocols. As 95 * the inner is ipv6, but the outer expects an ipv4 header as 96 * input, manually build a struct iphdr based on the ipv6hdr. 97 */ 98 if (encap_proto == IPPROTO_IPV6) { 99 const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1; 100 const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2; 101 struct ipv6hdr iph6_inner; 102 103 /* Read the IPv6 header */ 104 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner, 105 sizeof(iph6_inner)) < 0) 106 return TC_ACT_OK; 107 108 /* Derive the IPv4 header fields from the IPv6 header */ 109 memset(&iph_inner, 0, sizeof(iph_inner)); 110 iph_inner.version = 4; 111 iph_inner.ihl = 5; 112 iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) + 113 bpf_ntohs(iph6_inner.payload_len)); 114 iph_inner.ttl = iph6_inner.hop_limit - 1; 115 iph_inner.protocol = iph6_inner.nexthdr; 116 iph_inner.saddr = __bpf_constant_htonl(saddr); 117 iph_inner.daddr = __bpf_constant_htonl(daddr); 118 119 tcp_off = sizeof(iph6_inner); 120 } else { 121 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, 122 sizeof(iph_inner)) < 0) 123 return TC_ACT_OK; 124 125 tcp_off = sizeof(iph_inner); 126 } 127 128 /* filter only packets we want */ 129 if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP) 130 return TC_ACT_OK; 131 132 if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off, 133 &tcph, sizeof(tcph)) < 0) 134 return TC_ACT_OK; 135 136 if (tcph.dest != __bpf_constant_htons(cfg_port)) 137 return TC_ACT_OK; 138 139 olen = sizeof(h_outer.ip); 140 l2_len = 0; 141 142 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4; 143 144 switch (l2_proto) { 145 case ETH_P_MPLS_UC: 146 l2_len = sizeof(mpls_label); 147 udp_dst = MPLS_OVER_UDP_PORT; 148 break; 149 case ETH_P_TEB: 150 l2_len = ETH_HLEN; 151 udp_dst = ETH_OVER_UDP_PORT; 152 break; 153 } 154 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); 155 156 switch (encap_proto) { 157 case IPPROTO_GRE: 158 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; 159 olen += sizeof(h_outer.l4hdr.gre); 160 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto); 161 h_outer.l4hdr.gre.flags = 0; 162 break; 163 case IPPROTO_UDP: 164 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP; 165 olen += sizeof(h_outer.l4hdr.udp); 166 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); 167 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); 168 h_outer.l4hdr.udp.check = 0; 169 h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) + 170 sizeof(h_outer.l4hdr.udp) + 171 l2_len); 172 break; 173 case IPPROTO_IPIP: 174 case IPPROTO_IPV6: 175 break; 176 default: 177 return TC_ACT_OK; 178 } 179 180 /* add L2 encap (if specified) */ 181 switch (l2_proto) { 182 case ETH_P_MPLS_UC: 183 *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; 184 break; 185 case ETH_P_TEB: 186 if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, 187 ETH_HLEN)) 188 return TC_ACT_SHOT; 189 break; 190 } 191 olen += l2_len; 192 193 /* add room between mac and network header */ 194 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) 195 return TC_ACT_SHOT; 196 197 /* prepare new outer network header */ 198 h_outer.ip = iph_inner; 199 h_outer.ip.tot_len = bpf_htons(olen + 200 bpf_ntohs(h_outer.ip.tot_len)); 201 h_outer.ip.protocol = encap_proto; 202 203 set_ipv4_csum((void *)&h_outer.ip); 204 205 /* store new outer network header */ 206 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, 207 BPF_F_INVALIDATE_HASH) < 0) 208 return TC_ACT_SHOT; 209 210 /* if changing outer proto type, update eth->h_proto */ 211 if (encap_proto == IPPROTO_IPV6) { 212 struct ethhdr eth; 213 214 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0) 215 return TC_ACT_SHOT; 216 eth.h_proto = bpf_htons(ETH_P_IP); 217 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0) 218 return TC_ACT_SHOT; 219 } 220 221 return TC_ACT_OK; 222 } 223 224 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, 225 __u16 l2_proto) 226 { 227 __u16 udp_dst = UDP_PORT; 228 struct ipv6hdr iph_inner; 229 struct v6hdr h_outer; 230 struct tcphdr tcph; 231 int olen, l2_len; 232 __u16 tot_len; 233 __u64 flags; 234 235 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, 236 sizeof(iph_inner)) < 0) 237 return TC_ACT_OK; 238 239 /* filter only packets we want */ 240 if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner), 241 &tcph, sizeof(tcph)) < 0) 242 return TC_ACT_OK; 243 244 if (tcph.dest != __bpf_constant_htons(cfg_port)) 245 return TC_ACT_OK; 246 247 olen = sizeof(h_outer.ip); 248 l2_len = 0; 249 250 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6; 251 252 switch (l2_proto) { 253 case ETH_P_MPLS_UC: 254 l2_len = sizeof(mpls_label); 255 udp_dst = MPLS_OVER_UDP_PORT; 256 break; 257 case ETH_P_TEB: 258 l2_len = ETH_HLEN; 259 udp_dst = ETH_OVER_UDP_PORT; 260 break; 261 } 262 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); 263 264 switch (encap_proto) { 265 case IPPROTO_GRE: 266 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; 267 olen += sizeof(h_outer.l4hdr.gre); 268 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto); 269 h_outer.l4hdr.gre.flags = 0; 270 break; 271 case IPPROTO_UDP: 272 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP; 273 olen += sizeof(h_outer.l4hdr.udp); 274 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); 275 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); 276 tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) + 277 sizeof(h_outer.l4hdr.udp); 278 h_outer.l4hdr.udp.check = 0; 279 h_outer.l4hdr.udp.len = bpf_htons(tot_len); 280 break; 281 case IPPROTO_IPV6: 282 break; 283 default: 284 return TC_ACT_OK; 285 } 286 287 /* add L2 encap (if specified) */ 288 switch (l2_proto) { 289 case ETH_P_MPLS_UC: 290 *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; 291 break; 292 case ETH_P_TEB: 293 if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, 294 ETH_HLEN)) 295 return TC_ACT_SHOT; 296 break; 297 } 298 olen += l2_len; 299 300 /* add room between mac and network header */ 301 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) 302 return TC_ACT_SHOT; 303 304 /* prepare new outer network header */ 305 h_outer.ip = iph_inner; 306 h_outer.ip.payload_len = bpf_htons(olen + 307 bpf_ntohs(h_outer.ip.payload_len)); 308 309 h_outer.ip.nexthdr = encap_proto; 310 311 /* store new outer network header */ 312 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, 313 BPF_F_INVALIDATE_HASH) < 0) 314 return TC_ACT_SHOT; 315 316 return TC_ACT_OK; 317 } 318 319 SEC("encap_ipip_none") 320 int __encap_ipip_none(struct __sk_buff *skb) 321 { 322 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 323 return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP); 324 else 325 return TC_ACT_OK; 326 } 327 328 SEC("encap_gre_none") 329 int __encap_gre_none(struct __sk_buff *skb) 330 { 331 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 332 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP); 333 else 334 return TC_ACT_OK; 335 } 336 337 SEC("encap_gre_mpls") 338 int __encap_gre_mpls(struct __sk_buff *skb) 339 { 340 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 341 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC); 342 else 343 return TC_ACT_OK; 344 } 345 346 SEC("encap_gre_eth") 347 int __encap_gre_eth(struct __sk_buff *skb) 348 { 349 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 350 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB); 351 else 352 return TC_ACT_OK; 353 } 354 355 SEC("encap_udp_none") 356 int __encap_udp_none(struct __sk_buff *skb) 357 { 358 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 359 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP); 360 else 361 return TC_ACT_OK; 362 } 363 364 SEC("encap_udp_mpls") 365 int __encap_udp_mpls(struct __sk_buff *skb) 366 { 367 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 368 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC); 369 else 370 return TC_ACT_OK; 371 } 372 373 SEC("encap_udp_eth") 374 int __encap_udp_eth(struct __sk_buff *skb) 375 { 376 if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) 377 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB); 378 else 379 return TC_ACT_OK; 380 } 381 382 SEC("encap_sit_none") 383 int __encap_sit_none(struct __sk_buff *skb) 384 { 385 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 386 return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP); 387 else 388 return TC_ACT_OK; 389 } 390 391 SEC("encap_ip6tnl_none") 392 int __encap_ip6tnl_none(struct __sk_buff *skb) 393 { 394 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 395 return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6); 396 else 397 return TC_ACT_OK; 398 } 399 400 SEC("encap_ip6gre_none") 401 int __encap_ip6gre_none(struct __sk_buff *skb) 402 { 403 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 404 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6); 405 else 406 return TC_ACT_OK; 407 } 408 409 SEC("encap_ip6gre_mpls") 410 int __encap_ip6gre_mpls(struct __sk_buff *skb) 411 { 412 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 413 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC); 414 else 415 return TC_ACT_OK; 416 } 417 418 SEC("encap_ip6gre_eth") 419 int __encap_ip6gre_eth(struct __sk_buff *skb) 420 { 421 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 422 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB); 423 else 424 return TC_ACT_OK; 425 } 426 427 SEC("encap_ip6udp_none") 428 int __encap_ip6udp_none(struct __sk_buff *skb) 429 { 430 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 431 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6); 432 else 433 return TC_ACT_OK; 434 } 435 436 SEC("encap_ip6udp_mpls") 437 int __encap_ip6udp_mpls(struct __sk_buff *skb) 438 { 439 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 440 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC); 441 else 442 return TC_ACT_OK; 443 } 444 445 SEC("encap_ip6udp_eth") 446 int __encap_ip6udp_eth(struct __sk_buff *skb) 447 { 448 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) 449 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB); 450 else 451 return TC_ACT_OK; 452 } 453 454 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) 455 { 456 struct gre_hdr greh; 457 struct udphdr udph; 458 int olen = len; 459 460 switch (proto) { 461 case IPPROTO_IPIP: 462 case IPPROTO_IPV6: 463 //return TC_ACT_SHOT; 464 break; 465 case IPPROTO_GRE: 466 olen += sizeof(struct gre_hdr); 467 if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0) 468 return TC_ACT_OK; 469 switch (bpf_ntohs(greh.protocol)) { 470 case ETH_P_MPLS_UC: 471 olen += sizeof(mpls_label); 472 break; 473 case ETH_P_TEB: 474 olen += ETH_HLEN; 475 break; 476 } 477 break; 478 case IPPROTO_UDP: 479 olen += sizeof(struct udphdr); 480 if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0) 481 return TC_ACT_OK; 482 switch (bpf_ntohs(udph.dest)) { 483 case MPLS_OVER_UDP_PORT: 484 olen += sizeof(mpls_label); 485 break; 486 case ETH_OVER_UDP_PORT: 487 olen += ETH_HLEN; 488 break; 489 } 490 break; 491 default: 492 return TC_ACT_OK; 493 } 494 495 if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, BPF_F_ADJ_ROOM_FIXED_GSO)) 496 return TC_ACT_SHOT; 497 498 return TC_ACT_OK; 499 } 500 501 static int decap_ipv4(struct __sk_buff *skb) 502 { 503 struct iphdr iph_outer; 504 505 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, 506 sizeof(iph_outer)) < 0) 507 return TC_ACT_OK; 508 509 if (iph_outer.ihl != 5) 510 return TC_ACT_OK; 511 512 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), 513 iph_outer.protocol); 514 } 515 516 static int decap_ipv6(struct __sk_buff *skb) 517 { 518 struct ipv6hdr iph_outer; 519 520 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, 521 sizeof(iph_outer)) < 0) 522 return TC_ACT_OK; 523 524 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), 525 iph_outer.nexthdr); 526 } 527 528 SEC("decap") 529 int decap_f(struct __sk_buff *skb) 530 { 531 switch (skb->protocol) { 532 case __bpf_constant_htons(ETH_P_IP): 533 return decap_ipv4(skb); 534 case __bpf_constant_htons(ETH_P_IPV6): 535 return decap_ipv6(skb); 536 default: 537 /* does not match, ignore */ 538 return TC_ACT_OK; 539 } 540 } 541 542 char __license[] SEC("license") = "GPL";