github.com/cilium/cilium@v1.16.2/bpf/lib/nat_46x64.h (about)

     1  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
     2  /* Copyright Authors of Cilium */
     3  
     4  #pragma once
     5  
     6  #include <linux/ip.h>
     7  #include <linux/icmp.h>
     8  #include <linux/icmpv6.h>
     9  
    10  #include "common.h"
    11  #include "ipv4.h"
    12  #include "ipv6.h"
    13  #include "eth.h"
    14  
    15  static __always_inline __maybe_unused bool is_v4_in_v6(const union v6addr *daddr)
    16  {
    17  	/* Check for ::FFFF:<IPv4 address>. */
    18  	union v6addr dprobe  = {
    19  		.addr[10] = 0xff,
    20  		.addr[11] = 0xff,
    21  	};
    22  	union v6addr dmasked = {
    23  		.d1 = daddr->d1,
    24  	};
    25  
    26  	dmasked.p3 = daddr->p3;
    27  	return ipv6_addr_equals(&dprobe, &dmasked);
    28  }
    29  
    30  static __always_inline __maybe_unused bool is_v4_in_v6_rfc8215(const union v6addr *daddr)
    31  {
    32  	union v6addr dprobe  = {
    33  		.addr[0] = NAT_46X64_PREFIX_0,
    34  		.addr[1] = NAT_46X64_PREFIX_1,
    35  		.addr[2] = NAT_46X64_PREFIX_2,
    36  		.addr[3] = NAT_46X64_PREFIX_3,
    37  	};
    38  	union v6addr dmasked = {
    39  		.d1 = daddr->d1,
    40  	};
    41  
    42  	dmasked.p3 = daddr->p3;
    43  	return ipv6_addr_equals(&dprobe, &dmasked);
    44  }
    45  
    46  static __always_inline __maybe_unused
    47  void build_v4_in_v6(union v6addr *daddr, __be32 v4)
    48  {
    49  	memset(daddr, 0, sizeof(*daddr));
    50  	daddr->addr[10] = 0xff;
    51  	daddr->addr[11] = 0xff;
    52  	daddr->p4 = v4;
    53  }
    54  
    55  static __always_inline __maybe_unused
    56  void build_v4_in_v6_rfc8215(union v6addr *daddr, __be32 v4)
    57  {
    58  	memset(daddr, 0, sizeof(*daddr));
    59  	daddr->addr[0] = NAT_46X64_PREFIX_0;
    60  	daddr->addr[1] = NAT_46X64_PREFIX_1;
    61  	daddr->addr[2] = NAT_46X64_PREFIX_2;
    62  	daddr->addr[3] = NAT_46X64_PREFIX_3;
    63  	daddr->p4 = v4;
    64  }
    65  
    66  static __always_inline __maybe_unused
    67  void build_v4_from_v6(const union v6addr *v6, __be32 *daddr)
    68  {
    69  	*daddr = v6->p4;
    70  }
    71  
    72  static __always_inline int get_csum_offset(__u8 protocol)
    73  {
    74  	int csum_off;
    75  
    76  	switch (protocol) {
    77  	case IPPROTO_TCP:
    78  		csum_off = TCP_CSUM_OFF;
    79  		break;
    80  	case IPPROTO_UDP:
    81  		csum_off = UDP_CSUM_OFF;
    82  		break;
    83  #ifdef ENABLE_SCTP
    84  	case IPPROTO_SCTP:
    85  		/* See comment in csum.h */
    86  		csum_off = 0;
    87  		break;
    88  #endif  /* ENABLE_SCTP */
    89  	case IPPROTO_ICMP:
    90  		csum_off = (offsetof(struct icmphdr, checksum));
    91  		break;
    92  	case IPPROTO_ICMPV6:
    93  		csum_off = (offsetof(struct icmp6hdr, icmp6_cksum));
    94  		break;
    95  	default:
    96  		return DROP_UNKNOWN_L4;
    97  	}
    98  
    99  	return csum_off;
   100  }
   101  
   102  static __always_inline int icmp4_to_icmp6(struct __ctx_buff *ctx, int nh_off)
   103  {
   104  	struct icmphdr icmp4 __align_stack_8;
   105  	struct icmp6hdr icmp6 __align_stack_8 = {};
   106  
   107  	if (ctx_load_bytes(ctx, nh_off, &icmp4, sizeof(icmp4)) < 0)
   108  		return DROP_INVALID;
   109  	icmp6.icmp6_cksum = icmp4.checksum;
   110  	switch (icmp4.type) {
   111  	case ICMP_ECHO:
   112  		icmp6.icmp6_type = ICMPV6_ECHO_REQUEST;
   113  		icmp6.icmp6_identifier = icmp4.un.echo.id;
   114  		icmp6.icmp6_sequence = icmp4.un.echo.sequence;
   115  		break;
   116  	case ICMP_ECHOREPLY:
   117  		icmp6.icmp6_type = ICMPV6_ECHO_REPLY;
   118  		icmp6.icmp6_identifier = icmp4.un.echo.id;
   119  		icmp6.icmp6_sequence = icmp4.un.echo.sequence;
   120  		break;
   121  	case ICMP_DEST_UNREACH:
   122  		icmp6.icmp6_type = ICMPV6_DEST_UNREACH;
   123  		switch (icmp4.code) {
   124  		case ICMP_NET_UNREACH:
   125  		case ICMP_HOST_UNREACH:
   126  			icmp6.icmp6_code = ICMPV6_NOROUTE;
   127  			break;
   128  		case ICMP_PROT_UNREACH:
   129  			icmp6.icmp6_type = ICMPV6_PARAMPROB;
   130  			icmp6.icmp6_code = ICMPV6_UNK_NEXTHDR;
   131  			icmp6.icmp6_pointer = 6;
   132  			break;
   133  		case ICMP_PORT_UNREACH:
   134  			icmp6.icmp6_code = ICMPV6_PORT_UNREACH;
   135  			break;
   136  		case ICMP_FRAG_NEEDED:
   137  			icmp6.icmp6_type = ICMPV6_PKT_TOOBIG;
   138  			icmp6.icmp6_code = 0;
   139  			/* FIXME */
   140  			if (icmp4.un.frag.mtu)
   141  				icmp6.icmp6_mtu = bpf_htonl(bpf_ntohs(icmp4.un.frag.mtu));
   142  			else
   143  				icmp6.icmp6_mtu = bpf_htonl(1500);
   144  			break;
   145  		case ICMP_SR_FAILED:
   146  			icmp6.icmp6_code = ICMPV6_NOROUTE;
   147  			break;
   148  		case ICMP_NET_UNKNOWN:
   149  		case ICMP_HOST_UNKNOWN:
   150  		case ICMP_HOST_ISOLATED:
   151  		case ICMP_NET_UNR_TOS:
   152  		case ICMP_HOST_UNR_TOS:
   153  			icmp6.icmp6_code = 0;
   154  			break;
   155  		case ICMP_NET_ANO:
   156  		case ICMP_HOST_ANO:
   157  		case ICMP_PKT_FILTERED:
   158  			icmp6.icmp6_code = ICMPV6_ADM_PROHIBITED;
   159  			break;
   160  		default:
   161  			return DROP_UNKNOWN_ICMP_CODE;
   162  		}
   163  		break;
   164  	case ICMP_TIME_EXCEEDED:
   165  		icmp6.icmp6_type = ICMPV6_TIME_EXCEED;
   166  		break;
   167  	case ICMP_PARAMETERPROB:
   168  		icmp6.icmp6_type = ICMPV6_PARAMPROB;
   169  		/* FIXME */
   170  		icmp6.icmp6_pointer = 6;
   171  		break;
   172  	default:
   173  		return DROP_UNKNOWN_ICMP_TYPE;
   174  	}
   175  	if (ctx_store_bytes(ctx, nh_off, &icmp6, sizeof(icmp6), 0) < 0)
   176  		return DROP_WRITE_ERROR;
   177  	icmp4.checksum = 0;
   178  	icmp6.icmp6_cksum = 0;
   179  	return csum_diff(&icmp4, sizeof(icmp4), &icmp6, sizeof(icmp6), 0);
   180  }
   181  
   182  static __always_inline int icmp6_to_icmp4(struct __ctx_buff *ctx, int nh_off)
   183  {
   184  	struct icmphdr icmp4 __align_stack_8 = {};
   185  	struct icmp6hdr icmp6 __align_stack_8;
   186  	__u32 mtu;
   187  
   188  	if (ctx_load_bytes(ctx, nh_off, &icmp6, sizeof(icmp6)) < 0)
   189  		return DROP_INVALID;
   190  	icmp4.checksum = icmp6.icmp6_cksum;
   191  	switch (icmp6.icmp6_type) {
   192  	case ICMPV6_ECHO_REQUEST:
   193  		icmp4.type = ICMP_ECHO;
   194  		icmp4.un.echo.id = icmp6.icmp6_identifier;
   195  		icmp4.un.echo.sequence = icmp6.icmp6_sequence;
   196  		break;
   197  	case ICMPV6_ECHO_REPLY:
   198  		icmp4.type = ICMP_ECHOREPLY;
   199  		icmp4.un.echo.id = icmp6.icmp6_identifier;
   200  		icmp4.un.echo.sequence = icmp6.icmp6_sequence;
   201  		break;
   202  	case ICMPV6_DEST_UNREACH:
   203  		icmp4.type = ICMP_DEST_UNREACH;
   204  		switch (icmp6.icmp6_code) {
   205  		case ICMPV6_NOROUTE:
   206  		case ICMPV6_NOT_NEIGHBOUR:
   207  		case ICMPV6_ADDR_UNREACH:
   208  			icmp4.code = ICMP_HOST_UNREACH;
   209  			break;
   210  		case ICMPV6_ADM_PROHIBITED:
   211  			icmp4.code = ICMP_HOST_ANO;
   212  			break;
   213  		case ICMPV6_PORT_UNREACH:
   214  			icmp4.code = ICMP_PORT_UNREACH;
   215  			break;
   216  		default:
   217  			return DROP_UNKNOWN_ICMP6_CODE;
   218  		}
   219  		break;
   220  	case ICMPV6_PKT_TOOBIG:
   221  		icmp4.type = ICMP_DEST_UNREACH;
   222  		icmp4.code = ICMP_FRAG_NEEDED;
   223  		/* FIXME */
   224  		if (icmp6.icmp6_mtu) {
   225  			mtu = bpf_ntohl(icmp6.icmp6_mtu);
   226  			icmp4.un.frag.mtu = bpf_htons((__u16)mtu);
   227  		} else {
   228  			icmp4.un.frag.mtu = bpf_htons(1500);
   229  		}
   230  		break;
   231  	case ICMPV6_TIME_EXCEED:
   232  		icmp4.type = ICMP_TIME_EXCEEDED;
   233  		icmp4.code = icmp6.icmp6_code;
   234  		break;
   235  	case ICMPV6_PARAMPROB:
   236  		switch (icmp6.icmp6_code) {
   237  		case ICMPV6_HDR_FIELD:
   238  			icmp4.type = ICMP_PARAMETERPROB;
   239  			icmp4.code = 0;
   240  			break;
   241  		case ICMPV6_UNK_NEXTHDR:
   242  			icmp4.type = ICMP_DEST_UNREACH;
   243  			icmp4.code = ICMP_PROT_UNREACH;
   244  			break;
   245  		default:
   246  			return DROP_UNKNOWN_ICMP6_CODE;
   247  		}
   248  		break;
   249  	default:
   250  		return DROP_UNKNOWN_ICMP6_TYPE;
   251  	}
   252  	if (ctx_store_bytes(ctx, nh_off, &icmp4, sizeof(icmp4), 0) < 0)
   253  		return DROP_WRITE_ERROR;
   254  	icmp4.checksum = 0;
   255  	icmp6.icmp6_cksum = 0;
   256  	return csum_diff(&icmp6, sizeof(icmp6), &icmp4, sizeof(icmp4), 0);
   257  }
   258  
   259  static __always_inline int ipv4_to_ipv6(struct __ctx_buff *ctx, int nh_off,
   260  					const union v6addr *src6,
   261  					const union v6addr *dst6)
   262  {
   263  	__be16 protocol = bpf_htons(ETH_P_IPV6);
   264  	__u64 csum_flags = BPF_F_PSEUDO_HDR;
   265  	struct ipv6hdr v6 = {};
   266  	struct iphdr v4;
   267  	int csum_off;
   268  	__be32 csum;
   269  	__be16 v4hdr_len;
   270  
   271  	if (ctx_load_bytes(ctx, nh_off, &v4, sizeof(v4)) < 0)
   272  		return DROP_INVALID;
   273  	if (ipv4_hdrlen(&v4) != sizeof(v4))
   274  		return DROP_INVALID_EXTHDR;
   275  	v6.version = 0x6;
   276  	v6.saddr.in6_u.u6_addr32[0] = src6->p1;
   277  	v6.saddr.in6_u.u6_addr32[1] = src6->p2;
   278  	v6.saddr.in6_u.u6_addr32[2] = src6->p3;
   279  	v6.saddr.in6_u.u6_addr32[3] = src6->p4;
   280  	v6.daddr.in6_u.u6_addr32[0] = dst6->p1;
   281  	v6.daddr.in6_u.u6_addr32[1] = dst6->p2;
   282  	v6.daddr.in6_u.u6_addr32[2] = dst6->p3;
   283  	v6.daddr.in6_u.u6_addr32[3] = dst6->p4;
   284  	if (v4.protocol == IPPROTO_ICMP)
   285  		v6.nexthdr = IPPROTO_ICMPV6;
   286  	else
   287  		v6.nexthdr = v4.protocol;
   288  	v6.hop_limit = v4.ttl;
   289  	v4hdr_len = (__be16)(v4.ihl << 2);
   290  	v6.payload_len = bpf_htons(bpf_ntohs(v4.tot_len) - v4hdr_len);
   291  	if (ctx_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0) < 0)
   292  		return DROP_WRITE_ERROR;
   293  	if (ctx_store_bytes(ctx, nh_off, &v6, sizeof(v6), 0) < 0 ||
   294  	    ctx_store_bytes(ctx, nh_off - 2, &protocol, 2, 0) < 0)
   295  		return DROP_WRITE_ERROR;
   296  	if (v4.protocol == IPPROTO_ICMP) {
   297  		csum = icmp4_to_icmp6(ctx, nh_off + sizeof(v6));
   298  		csum = ipv6_pseudohdr_checksum(&v6, IPPROTO_ICMPV6,
   299  					       bpf_ntohs(v6.payload_len), csum);
   300  	} else {
   301  		csum = 0;
   302  		csum = csum_diff(&v4.saddr, 4, &v6.saddr, 16, csum);
   303  		csum = csum_diff(&v4.daddr, 4, &v6.daddr, 16, csum);
   304  		if (v4.protocol == IPPROTO_UDP)
   305  			csum_flags |= BPF_F_MARK_MANGLED_0;
   306  	}
   307  	csum_off = get_csum_offset(v6.nexthdr);
   308  	if (csum_off < 0)
   309  		return csum_off;
   310  	csum_off += sizeof(struct ipv6hdr);
   311  	if (l4_csum_replace(ctx, nh_off + csum_off, 0, csum, csum_flags) < 0)
   312  		return DROP_CSUM_L4;
   313  	return 0;
   314  }
   315  
   316  static __always_inline int ipv6_to_ipv4(struct __ctx_buff *ctx,
   317  					__be32 src4, __be32 dst4)
   318  {
   319  	__be16 protocol = bpf_htons(ETH_P_IP);
   320  	__u64 csum_flags = BPF_F_PSEUDO_HDR;
   321  	int csum_off, nh_off = ETH_HLEN;
   322  	struct ipv6hdr v6;
   323  	struct iphdr v4 = {};
   324  	__be32 csum = 0;
   325  
   326  	if (ctx_load_bytes(ctx, nh_off, &v6, sizeof(v6)) < 0)
   327  		return DROP_INVALID;
   328  	/* Drop frames which carry extensions headers */
   329  	if (ipv6_hdrlen(ctx, &v6.nexthdr) != sizeof(v6))
   330  		return DROP_INVALID_EXTHDR;
   331  	v4.ihl = 0x5;
   332  	v4.version = 0x4;
   333  	v4.saddr = src4;
   334  	v4.daddr = dst4;
   335  	if (v6.nexthdr == IPPROTO_ICMPV6)
   336  		v4.protocol = IPPROTO_ICMP;
   337  	else
   338  		v4.protocol = v6.nexthdr;
   339  	v4.ttl = v6.hop_limit;
   340  	v4.tot_len = bpf_htons(bpf_ntohs(v6.payload_len) + sizeof(v4));
   341  	csum_off = offsetof(struct iphdr, check);
   342  	csum = csum_diff(NULL, 0, &v4, sizeof(v4), csum);
   343  	if (ctx_change_proto(ctx, bpf_htons(ETH_P_IP), 0) < 0)
   344  		return DROP_WRITE_ERROR;
   345  	if (ctx_store_bytes(ctx, nh_off, &v4, sizeof(v4), 0) < 0 ||
   346  	    ctx_store_bytes(ctx, nh_off - 2, &protocol, 2, 0) < 0)
   347  		return DROP_WRITE_ERROR;
   348  	if (ipv4_csum_update_by_diff(ctx, nh_off, csum) < 0)
   349  		return DROP_CSUM_L3;
   350  	if (v6.nexthdr == IPPROTO_ICMPV6) {
   351  		__be32 csum1 = 0;
   352  
   353  		csum = icmp6_to_icmp4(ctx, nh_off + sizeof(v4));
   354  		csum1 = ipv6_pseudohdr_checksum(&v6, IPPROTO_ICMPV6,
   355  						bpf_ntohs(v6.payload_len), 0);
   356  		csum = csum_sub(csum, csum1);
   357  	} else {
   358  		csum = 0;
   359  		csum = csum_diff(&v6.saddr, 16, &v4.saddr, 4, csum);
   360  		csum = csum_diff(&v6.daddr, 16, &v4.daddr, 4, csum);
   361  		if (v4.protocol == IPPROTO_UDP)
   362  			csum_flags |= BPF_F_MARK_MANGLED_0;
   363  	}
   364  	csum_off = get_csum_offset(v4.protocol);
   365  	if (csum_off < 0)
   366  		return csum_off;
   367  	csum_off += sizeof(struct iphdr);
   368  	if (l4_csum_replace(ctx, nh_off + csum_off, 0, csum, csum_flags) < 0)
   369  		return DROP_CSUM_L4;
   370  	return 0;
   371  }
   372  
   373  static __always_inline int
   374  nat46_rfc8215(struct __ctx_buff *ctx __maybe_unused,
   375  	      const struct iphdr *ip4 __maybe_unused,
   376  	      int l3_off __maybe_unused)
   377  {
   378  	union v6addr src6, dst6;
   379  
   380  	build_v4_in_v6_rfc8215(&src6, ip4->saddr);
   381  	build_v4_in_v6_rfc8215(&dst6, ip4->daddr);
   382  
   383  	return ipv4_to_ipv6(ctx, l3_off, &src6, &dst6);
   384  }
   385  
   386  static __always_inline int
   387  nat64_rfc8215(struct __ctx_buff *ctx __maybe_unused,
   388  	      const struct ipv6hdr *ip6 __maybe_unused)
   389  {
   390  	__be32 src4, dst4;
   391  
   392  	build_v4_from_v6((const union v6addr *)&ip6->saddr, &src4);
   393  	build_v4_from_v6((const union v6addr *)&ip6->daddr, &dst4);
   394  
   395  	return ipv6_to_ipv4(ctx, src4, dst4);
   396  }
   397  
   398  #define NAT46x64_MODE_XLATE	1
   399  #define NAT46x64_MODE_ROUTE	2
   400  
   401  static __always_inline bool nat46x64_cb_route(struct __ctx_buff *ctx)
   402  {
   403  	return ctx_load_meta(ctx, CB_NAT_46X64) == NAT46x64_MODE_ROUTE;
   404  }
   405  
   406  static __always_inline bool nat46x64_cb_xlate(struct __ctx_buff *ctx)
   407  {
   408  	return ctx_load_meta(ctx, CB_NAT_46X64) == NAT46x64_MODE_XLATE;
   409  }