github.com/cilium/cilium@v1.16.2/test/l4lb/test_tc_tunnel.c (about)

     1  // SPDX-License-Identifier: GPL-2.0
     2  //
     3  // Taken from https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/test_tc_tunnel.c?h=v5.12
     4  
     5  /* In-place tunneling */
     6  
     7  #include <stdbool.h>
     8  #include <string.h>
     9  
    10  #include <linux/stddef.h>
    11  #include <linux/bpf.h>
    12  #include <linux/if_ether.h>
    13  #include <linux/in.h>
    14  #include <linux/ip.h>
    15  #include <linux/ipv6.h>
    16  #include <linux/mpls.h>
    17  #include <linux/tcp.h>
    18  #include <linux/udp.h>
    19  #include <linux/pkt_cls.h>
    20  #include <linux/types.h>
    21  
    22  #include <bpf/bpf_endian.h>
    23  #include <bpf/bpf_helpers.h>
    24  
    25  #define ERROR(ret) do {\
    26  		char fmt[] = "ERROR line:%d ret:%d\n";\
    27  		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
    28  	} while (0)
    29  
    30  static const int cfg_port = 8000;
    31  
    32  static const int cfg_udp_src = 20000;
    33  
    34  #define	UDP_PORT		5555
    35  #define	MPLS_OVER_UDP_PORT	6635
    36  #define	ETH_OVER_UDP_PORT	7777
    37  
    38  /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
    39  static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
    40  						     MPLS_LS_S_MASK | 0xff);
    41  
    42  struct gre_hdr {
    43  	__be16 flags;
    44  	__be16 protocol;
    45  } __attribute__((packed));
    46  
    47  union l4hdr {
    48  	struct udphdr udp;
    49  	struct gre_hdr gre;
    50  };
    51  
    52  struct v4hdr {
    53  	struct iphdr ip;
    54  	union l4hdr l4hdr;
    55  	__u8 pad[16];			/* enough space for L2 header */
    56  } __attribute__((packed));
    57  
    58  struct v6hdr {
    59  	struct ipv6hdr ip;
    60  	union l4hdr l4hdr;
    61  	__u8 pad[16];			/* enough space for L2 header */
    62  } __attribute__((packed));
    63  
    64  static __always_inline void set_ipv4_csum(struct iphdr *iph)
    65  {
    66  	__u16 *iph16 = (__u16 *)iph;
    67  	__u32 csum;
    68  	int i;
    69  
    70  	iph->check = 0;
    71  
    72  #pragma clang loop unroll(full)
    73  	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
    74  		csum += *iph16++;
    75  
    76  	iph->check = ~((csum & 0xffff) + (csum >> 16));
    77  }
    78  
    79  static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
    80  				      __u16 l2_proto)
    81  {
    82  	__u16 udp_dst = UDP_PORT;
    83  	struct iphdr iph_inner;
    84  	struct v4hdr h_outer;
    85  	struct tcphdr tcph;
    86  	int olen, l2_len;
    87  	int tcp_off;
    88  	__u64 flags;
    89  
    90  	/* Most tests encapsulate a packet into a tunnel with the same
    91  	 * network protocol, and derive the outer header fields from
    92  	 * the inner header.
    93  	 *
    94  	 * The 6in4 case tests different inner and outer protocols. As
    95  	 * the inner is ipv6, but the outer expects an ipv4 header as
    96  	 * input, manually build a struct iphdr based on the ipv6hdr.
    97  	 */
    98  	if (encap_proto == IPPROTO_IPV6) {
    99  		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
   100  		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
   101  		struct ipv6hdr iph6_inner;
   102  
   103  		/* Read the IPv6 header */
   104  		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
   105  				       sizeof(iph6_inner)) < 0)
   106  			return TC_ACT_OK;
   107  
   108  		/* Derive the IPv4 header fields from the IPv6 header */
   109  		memset(&iph_inner, 0, sizeof(iph_inner));
   110  		iph_inner.version = 4;
   111  		iph_inner.ihl = 5;
   112  		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
   113  				    bpf_ntohs(iph6_inner.payload_len));
   114  		iph_inner.ttl = iph6_inner.hop_limit - 1;
   115  		iph_inner.protocol = iph6_inner.nexthdr;
   116  		iph_inner.saddr = __bpf_constant_htonl(saddr);
   117  		iph_inner.daddr = __bpf_constant_htonl(daddr);
   118  
   119  		tcp_off = sizeof(iph6_inner);
   120  	} else {
   121  		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
   122  				       sizeof(iph_inner)) < 0)
   123  			return TC_ACT_OK;
   124  
   125  		tcp_off = sizeof(iph_inner);
   126  	}
   127  
   128  	/* filter only packets we want */
   129  	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
   130  		return TC_ACT_OK;
   131  
   132  	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
   133  			       &tcph, sizeof(tcph)) < 0)
   134  		return TC_ACT_OK;
   135  
   136  	if (tcph.dest != __bpf_constant_htons(cfg_port))
   137  		return TC_ACT_OK;
   138  
   139  	olen = sizeof(h_outer.ip);
   140  	l2_len = 0;
   141  
   142  	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
   143  
   144  	switch (l2_proto) {
   145  	case ETH_P_MPLS_UC:
   146  		l2_len = sizeof(mpls_label);
   147  		udp_dst = MPLS_OVER_UDP_PORT;
   148  		break;
   149  	case ETH_P_TEB:
   150  		l2_len = ETH_HLEN;
   151  		udp_dst = ETH_OVER_UDP_PORT;
   152  		break;
   153  	}
   154  	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
   155  
   156  	switch (encap_proto) {
   157  	case IPPROTO_GRE:
   158  		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
   159  		olen += sizeof(h_outer.l4hdr.gre);
   160  		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
   161  		h_outer.l4hdr.gre.flags = 0;
   162  		break;
   163  	case IPPROTO_UDP:
   164  		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
   165  		olen += sizeof(h_outer.l4hdr.udp);
   166  		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
   167  		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
   168  		h_outer.l4hdr.udp.check = 0;
   169  		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
   170  						  sizeof(h_outer.l4hdr.udp) +
   171  						  l2_len);
   172  		break;
   173  	case IPPROTO_IPIP:
   174  	case IPPROTO_IPV6:
   175  		break;
   176  	default:
   177  		return TC_ACT_OK;
   178  	}
   179  
   180  	/* add L2 encap (if specified) */
   181  	switch (l2_proto) {
   182  	case ETH_P_MPLS_UC:
   183  		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
   184  		break;
   185  	case ETH_P_TEB:
   186  		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
   187  				       ETH_HLEN))
   188  			return TC_ACT_SHOT;
   189  		break;
   190  	}
   191  	olen += l2_len;
   192  
   193  	/* add room between mac and network header */
   194  	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
   195  		return TC_ACT_SHOT;
   196  
   197  	/* prepare new outer network header */
   198  	h_outer.ip = iph_inner;
   199  	h_outer.ip.tot_len = bpf_htons(olen +
   200  				       bpf_ntohs(h_outer.ip.tot_len));
   201  	h_outer.ip.protocol = encap_proto;
   202  
   203  	set_ipv4_csum((void *)&h_outer.ip);
   204  
   205  	/* store new outer network header */
   206  	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
   207  				BPF_F_INVALIDATE_HASH) < 0)
   208  		return TC_ACT_SHOT;
   209  
   210  	/* if changing outer proto type, update eth->h_proto */
   211  	if (encap_proto == IPPROTO_IPV6) {
   212  		struct ethhdr eth;
   213  
   214  		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
   215  			return TC_ACT_SHOT;
   216  		eth.h_proto = bpf_htons(ETH_P_IP);
   217  		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
   218  			return TC_ACT_SHOT;
   219  	}
   220  
   221  	return TC_ACT_OK;
   222  }
   223  
   224  static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
   225  				      __u16 l2_proto)
   226  {
   227  	__u16 udp_dst = UDP_PORT;
   228  	struct ipv6hdr iph_inner;
   229  	struct v6hdr h_outer;
   230  	struct tcphdr tcph;
   231  	int olen, l2_len;
   232  	__u16 tot_len;
   233  	__u64 flags;
   234  
   235  	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
   236  			       sizeof(iph_inner)) < 0)
   237  		return TC_ACT_OK;
   238  
   239  	/* filter only packets we want */
   240  	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
   241  			       &tcph, sizeof(tcph)) < 0)
   242  		return TC_ACT_OK;
   243  
   244  	if (tcph.dest != __bpf_constant_htons(cfg_port))
   245  		return TC_ACT_OK;
   246  
   247  	olen = sizeof(h_outer.ip);
   248  	l2_len = 0;
   249  
   250  	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
   251  
   252  	switch (l2_proto) {
   253  	case ETH_P_MPLS_UC:
   254  		l2_len = sizeof(mpls_label);
   255  		udp_dst = MPLS_OVER_UDP_PORT;
   256  		break;
   257  	case ETH_P_TEB:
   258  		l2_len = ETH_HLEN;
   259  		udp_dst = ETH_OVER_UDP_PORT;
   260  		break;
   261  	}
   262  	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
   263  
   264  	switch (encap_proto) {
   265  	case IPPROTO_GRE:
   266  		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
   267  		olen += sizeof(h_outer.l4hdr.gre);
   268  		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
   269  		h_outer.l4hdr.gre.flags = 0;
   270  		break;
   271  	case IPPROTO_UDP:
   272  		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
   273  		olen += sizeof(h_outer.l4hdr.udp);
   274  		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
   275  		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
   276  		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
   277  			  sizeof(h_outer.l4hdr.udp);
   278  		h_outer.l4hdr.udp.check = 0;
   279  		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
   280  		break;
   281  	case IPPROTO_IPV6:
   282  		break;
   283  	default:
   284  		return TC_ACT_OK;
   285  	}
   286  
   287  	/* add L2 encap (if specified) */
   288  	switch (l2_proto) {
   289  	case ETH_P_MPLS_UC:
   290  		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
   291  		break;
   292  	case ETH_P_TEB:
   293  		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
   294  				       ETH_HLEN))
   295  			return TC_ACT_SHOT;
   296  		break;
   297  	}
   298  	olen += l2_len;
   299  
   300  	/* add room between mac and network header */
   301  	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
   302  		return TC_ACT_SHOT;
   303  
   304  	/* prepare new outer network header */
   305  	h_outer.ip = iph_inner;
   306  	h_outer.ip.payload_len = bpf_htons(olen +
   307  					   bpf_ntohs(h_outer.ip.payload_len));
   308  
   309  	h_outer.ip.nexthdr = encap_proto;
   310  
   311  	/* store new outer network header */
   312  	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
   313  				BPF_F_INVALIDATE_HASH) < 0)
   314  		return TC_ACT_SHOT;
   315  
   316  	return TC_ACT_OK;
   317  }
   318  
   319  SEC("encap_ipip_none")
   320  int __encap_ipip_none(struct __sk_buff *skb)
   321  {
   322  	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
   323  		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
   324  	else
   325  		return TC_ACT_OK;
   326  }
   327  
   328  SEC("encap_gre_none")
   329  int __encap_gre_none(struct __sk_buff *skb)
   330  {
   331  	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
   332  		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
   333  	else
   334  		return TC_ACT_OK;
   335  }
   336  
   337  SEC("encap_gre_mpls")
   338  int __encap_gre_mpls(struct __sk_buff *skb)
   339  {
   340  	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
   341  		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
   342  	else
   343  		return TC_ACT_OK;
   344  }
   345  
   346  SEC("encap_gre_eth")
   347  int __encap_gre_eth(struct __sk_buff *skb)
   348  {
   349  	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
   350  		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
   351  	else
   352  		return TC_ACT_OK;
   353  }
   354  
   355  SEC("encap_udp_none")
   356  int __encap_udp_none(struct __sk_buff *skb)
   357  {
   358  	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
   359  		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
   360  	else
   361  		return TC_ACT_OK;
   362  }
   363  
   364  SEC("encap_udp_mpls")
   365  int __encap_udp_mpls(struct __sk_buff *skb)
   366  {
   367  	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
   368  		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
   369  	else
   370  		return TC_ACT_OK;
   371  }
   372  
   373  SEC("encap_udp_eth")
   374  int __encap_udp_eth(struct __sk_buff *skb)
   375  {
   376  	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
   377  		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
   378  	else
   379  		return TC_ACT_OK;
   380  }
   381  
   382  SEC("encap_sit_none")
   383  int __encap_sit_none(struct __sk_buff *skb)
   384  {
   385  	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
   386  		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
   387  	else
   388  		return TC_ACT_OK;
   389  }
   390  
   391  SEC("encap_ip6tnl_none")
   392  int __encap_ip6tnl_none(struct __sk_buff *skb)
   393  {
   394  	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
   395  		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
   396  	else
   397  		return TC_ACT_OK;
   398  }
   399  
   400  SEC("encap_ip6gre_none")
   401  int __encap_ip6gre_none(struct __sk_buff *skb)
   402  {
   403  	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
   404  		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
   405  	else
   406  		return TC_ACT_OK;
   407  }
   408  
   409  SEC("encap_ip6gre_mpls")
   410  int __encap_ip6gre_mpls(struct __sk_buff *skb)
   411  {
   412  	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
   413  		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
   414  	else
   415  		return TC_ACT_OK;
   416  }
   417  
   418  SEC("encap_ip6gre_eth")
   419  int __encap_ip6gre_eth(struct __sk_buff *skb)
   420  {
   421  	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
   422  		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
   423  	else
   424  		return TC_ACT_OK;
   425  }
   426  
   427  SEC("encap_ip6udp_none")
   428  int __encap_ip6udp_none(struct __sk_buff *skb)
   429  {
   430  	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
   431  		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
   432  	else
   433  		return TC_ACT_OK;
   434  }
   435  
   436  SEC("encap_ip6udp_mpls")
   437  int __encap_ip6udp_mpls(struct __sk_buff *skb)
   438  {
   439  	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
   440  		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
   441  	else
   442  		return TC_ACT_OK;
   443  }
   444  
   445  SEC("encap_ip6udp_eth")
   446  int __encap_ip6udp_eth(struct __sk_buff *skb)
   447  {
   448  	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
   449  		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
   450  	else
   451  		return TC_ACT_OK;
   452  }
   453  
   454  static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
   455  {
   456  	struct gre_hdr greh;
   457  	struct udphdr udph;
   458  	int olen = len;
   459  
   460  	switch (proto) {
   461  	case IPPROTO_IPIP:
   462  	case IPPROTO_IPV6:
   463          //return TC_ACT_SHOT;
   464  		break;
   465  	case IPPROTO_GRE:
   466  		olen += sizeof(struct gre_hdr);
   467  		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
   468  			return TC_ACT_OK;
   469  		switch (bpf_ntohs(greh.protocol)) {
   470  		case ETH_P_MPLS_UC:
   471  			olen += sizeof(mpls_label);
   472  			break;
   473  		case ETH_P_TEB:
   474  			olen += ETH_HLEN;
   475  			break;
   476  		}
   477  		break;
   478  	case IPPROTO_UDP:
   479  		olen += sizeof(struct udphdr);
   480  		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
   481  			return TC_ACT_OK;
   482  		switch (bpf_ntohs(udph.dest)) {
   483  		case MPLS_OVER_UDP_PORT:
   484  			olen += sizeof(mpls_label);
   485  			break;
   486  		case ETH_OVER_UDP_PORT:
   487  			olen += ETH_HLEN;
   488  			break;
   489  		}
   490  		break;
   491  	default:
   492  		return TC_ACT_OK;
   493  	}
   494  
   495     	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, BPF_F_ADJ_ROOM_FIXED_GSO))
   496      	return TC_ACT_SHOT;
   497  
   498  	return TC_ACT_OK;
   499  }
   500  
   501  static int decap_ipv4(struct __sk_buff *skb)
   502  {
   503  	struct iphdr iph_outer;
   504  
   505  	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
   506  			       sizeof(iph_outer)) < 0)
   507  		return TC_ACT_OK;
   508  
   509  	if (iph_outer.ihl != 5)
   510  		return TC_ACT_OK;
   511  
   512  	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
   513  			      iph_outer.protocol);
   514  }
   515  
   516  static int decap_ipv6(struct __sk_buff *skb)
   517  {
   518  	struct ipv6hdr iph_outer;
   519  
   520  	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
   521  			       sizeof(iph_outer)) < 0)
   522  		return TC_ACT_OK;
   523  
   524  	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
   525  			      iph_outer.nexthdr);
   526  }
   527  
   528  SEC("decap")
   529  int decap_f(struct __sk_buff *skb)
   530  {
   531  	switch (skb->protocol) {
   532  	case __bpf_constant_htons(ETH_P_IP):
   533  		return decap_ipv4(skb);
   534  	case __bpf_constant_htons(ETH_P_IPV6):
   535  		return decap_ipv6(skb);
   536  	default:
   537  		/* does not match, ignore */
   538  		return TC_ACT_OK;
   539  	}
   540  }
   541  
   542  char __license[] SEC("license") = "GPL";