github.com/datadog/cilium@v1.6.12/bpf/lib/nat46.h (about)

     1  /*
     2   *  Copyright (C) 2016-2017 Authors of Cilium
     3   *
     4   *  This program is free software; you can redistribute it and/or modify
     5   *  it under the terms of the GNU General Public License as published by
     6   *  the Free Software Foundation; either version 2 of the License, or
     7   *  (at your option) any later version.
     8   *
     9   *  This program is distributed in the hope that it will be useful,
    10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
    11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    12   *  GNU General Public License for more details.
    13   *
    14   *  You should have received a copy of the GNU General Public License
    15   *  along with this program; if not, write to the Free Software
    16   *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    17   */
    18  #ifndef __LIB_NAT46__
    19  #define __LIB_NAT46__
    20  
    21  #include <linux/ip.h>
    22  #include <linux/icmp.h>
    23  #include <linux/icmpv6.h>
    24  #include "common.h"
    25  #include "ipv4.h"
    26  #include "ipv6.h"
    27  #include "eth.h"
    28  #include "dbg.h"
    29  
    30  #if defined ENABLE_NAT46 && \
    31      (!defined ENABLE_IPV4 || !defined ENABLE_IPV6 || \
    32       !defined CONNTRACK || !defined ENABLE_HOST_REDIRECT)
    33  #error "ENABLE_NAT46 requisite options are not configured, see lib/nat46.h."
    34  #endif
    35  
    36  static inline int get_csum_offset(__u8 protocol)
    37  {
    38  	int csum_off;
    39  
    40  	switch (protocol) {
    41  	case IPPROTO_TCP:
    42  		csum_off = TCP_CSUM_OFF;
    43  		break;
    44  	case IPPROTO_UDP:
    45  		csum_off = UDP_CSUM_OFF;
    46  		break;
    47  	case IPPROTO_ICMP:
    48  		csum_off = (offsetof(struct icmphdr, checksum));
    49  		break;
    50  	case IPPROTO_ICMPV6:
    51  		csum_off = (offsetof(struct icmp6hdr, icmp6_cksum));
    52  		break;
    53  	default:
    54  		return DROP_UNKNOWN_L4;
    55  	}
    56  
    57  	return csum_off;
    58  }
    59  
    60  static inline int icmp4_to_icmp6(struct __sk_buff *skb, int nh_off)
    61  {
    62  	struct icmphdr icmp4;
    63  	struct icmp6hdr icmp6 = {};
    64  
    65  	if (skb_load_bytes(skb, nh_off, &icmp4, sizeof(icmp4)) < 0)
    66  		return DROP_INVALID;
    67  	else
    68  		icmp6.icmp6_cksum = icmp4.checksum;
    69  
    70  	switch(icmp4.type) {
    71  	case ICMP_ECHO:
    72  		icmp6.icmp6_type = ICMPV6_ECHO_REQUEST;
    73  		icmp6.icmp6_identifier = icmp4.un.echo.id;
    74  		icmp6.icmp6_sequence = icmp4.un.echo.sequence;
    75  		break;
    76  	case ICMP_ECHOREPLY:
    77  		icmp6.icmp6_type = ICMPV6_ECHO_REPLY;
    78  		icmp6.icmp6_identifier = icmp4.un.echo.id;
    79  		icmp6.icmp6_sequence = icmp4.un.echo.sequence;
    80  		break;
    81  	case ICMP_DEST_UNREACH:
    82  		icmp6.icmp6_type = ICMPV6_DEST_UNREACH;
    83  		switch(icmp4.code) {
    84  		case ICMP_NET_UNREACH:
    85  		case ICMP_HOST_UNREACH:
    86  			icmp6.icmp6_code = ICMPV6_NOROUTE;
    87  			break;
    88  		case ICMP_PROT_UNREACH:
    89  			icmp6.icmp6_type = ICMPV6_PARAMPROB;
    90  			icmp6.icmp6_code = ICMPV6_UNK_NEXTHDR;
    91  			icmp6.icmp6_pointer = 6;
    92  			break;
    93  		case ICMP_PORT_UNREACH:
    94  			icmp6.icmp6_code = ICMPV6_PORT_UNREACH;
    95  			break;
    96  		case ICMP_FRAG_NEEDED:
    97  			icmp6.icmp6_type = ICMPV6_PKT_TOOBIG;
    98  			icmp6.icmp6_code = 0;
    99  			/* FIXME */
   100  			if (icmp4.un.frag.mtu)
   101  				icmp6.icmp6_mtu = bpf_htonl(bpf_ntohs(icmp4.un.frag.mtu));
   102  			else
   103  				icmp6.icmp6_mtu = bpf_htonl(1500);
   104  			break;
   105  		case ICMP_SR_FAILED:
   106  			icmp6.icmp6_code = ICMPV6_NOROUTE;
   107  			break;
   108  		case ICMP_NET_UNKNOWN:
   109  		case ICMP_HOST_UNKNOWN:
   110  		case ICMP_HOST_ISOLATED:
   111  		case ICMP_NET_UNR_TOS:
   112  		case ICMP_HOST_UNR_TOS:
   113  			icmp6.icmp6_code = 0;
   114  			break;
   115  		case ICMP_NET_ANO:
   116  		case ICMP_HOST_ANO:
   117  		case ICMP_PKT_FILTERED:
   118  			icmp6.icmp6_code = ICMPV6_ADM_PROHIBITED;
   119  			break;
   120  		default:
   121  			return DROP_UNKNOWN_ICMP_CODE;
   122  		}
   123  		break;
   124  	case ICMP_TIME_EXCEEDED:
   125  		icmp6.icmp6_type = ICMPV6_TIME_EXCEED;
   126  		break;
   127  	case ICMP_PARAMETERPROB:
   128  		icmp6.icmp6_type = ICMPV6_PARAMPROB;
   129  		/* FIXME */
   130  		icmp6.icmp6_pointer = 6;
   131  		break;
   132  	default:
   133  		return DROP_UNKNOWN_ICMP_TYPE;
   134  	}
   135  
   136  	if (skb_store_bytes(skb, nh_off, &icmp6, sizeof(icmp6), 0) < 0)
   137  		return DROP_WRITE_ERROR;
   138  
   139  	icmp4.checksum = 0;
   140  	icmp6.icmp6_cksum = 0;
   141  	return csum_diff(&icmp4, sizeof(icmp4), &icmp6, sizeof(icmp6), 0);
   142  }
   143  
   144  static inline int icmp6_to_icmp4(struct __sk_buff *skb, int nh_off)
   145  {
   146  	struct icmphdr icmp4 = {};
   147  	struct icmp6hdr icmp6;
   148  
   149  	if (skb_load_bytes(skb, nh_off, &icmp6, sizeof(icmp6)) < 0)
   150  		return DROP_INVALID;
   151  	else
   152  		icmp4.checksum = icmp6.icmp6_cksum;
   153  
   154  	switch(icmp6.icmp6_type) {
   155  	case ICMPV6_ECHO_REQUEST:
   156  		icmp4.type = ICMP_ECHO;
   157  		icmp4.un.echo.id = icmp6.icmp6_identifier;
   158  		icmp4.un.echo.sequence = icmp6.icmp6_sequence;
   159  		break;
   160  	case ICMPV6_ECHO_REPLY:
   161  		icmp4.type = ICMP_ECHOREPLY;
   162  		icmp4.un.echo.id = icmp6.icmp6_identifier;
   163  		icmp4.un.echo.sequence = icmp6.icmp6_sequence;
   164  		break;
   165  	case ICMPV6_DEST_UNREACH:
   166  		icmp4.type = ICMP_DEST_UNREACH;
   167  		switch(icmp6.icmp6_code) {
   168  		case ICMPV6_NOROUTE:
   169  		case ICMPV6_NOT_NEIGHBOUR:
   170  		case ICMPV6_ADDR_UNREACH:
   171  			icmp4.code = ICMP_HOST_UNREACH;
   172  			break;
   173  		case ICMPV6_ADM_PROHIBITED:
   174  			icmp4.code = ICMP_HOST_ANO;
   175  			break;
   176  		case ICMPV6_PORT_UNREACH:
   177  			icmp4.code = ICMP_PORT_UNREACH;
   178  			break;
   179  		default:
   180  			return DROP_UNKNOWN_ICMP6_CODE;
   181  		}
   182  	case ICMPV6_PKT_TOOBIG:
   183  		icmp4.type = ICMP_DEST_UNREACH;
   184  		icmp4.code = ICMP_FRAG_NEEDED;
   185  		/* FIXME */
   186  		if (icmp6.icmp6_mtu)
   187  			icmp4.un.frag.mtu = bpf_htons(bpf_ntohl(icmp6.icmp6_mtu));
   188  		else
   189  			icmp4.un.frag.mtu = bpf_htons(1500);
   190  		break;
   191  	case ICMPV6_TIME_EXCEED:
   192  		icmp4.type = ICMP_TIME_EXCEEDED;
   193  		icmp4.code = icmp6.icmp6_code;
   194  		break;
   195  	case ICMPV6_PARAMPROB:
   196  		switch(icmp6.icmp6_code) {
   197  		case ICMPV6_HDR_FIELD:
   198  			icmp4.type = ICMP_PARAMETERPROB;
   199  			icmp4.code = 0;
   200  			break;
   201  		case ICMPV6_UNK_NEXTHDR:
   202  			icmp4.type = ICMP_DEST_UNREACH;
   203  			icmp4.code = ICMP_PROT_UNREACH;
   204  			break;
   205  		default:
   206  			return DROP_UNKNOWN_ICMP6_CODE;
   207  		}
   208  	default:
   209  		return DROP_UNKNOWN_ICMP6_TYPE;
   210  	}
   211  
   212  	if (skb_store_bytes(skb, nh_off, &icmp4, sizeof(icmp4), 0) < 0)
   213  		return DROP_WRITE_ERROR;
   214  
   215  	icmp4.checksum = 0;
   216  	icmp6.icmp6_cksum = 0;
   217  	return csum_diff(&icmp6, sizeof(icmp6), &icmp4, sizeof(icmp4), 0);
   218  }
   219  
   220  static inline int ipv6_prefix_match(struct in6_addr *addr,
   221  				    union v6addr *v6prefix)
   222  {
   223  	if (addr->in6_u.u6_addr32[0] == v6prefix->p1 &&
   224  	    addr->in6_u.u6_addr32[1] == v6prefix->p2 &&
   225  	    addr->in6_u.u6_addr32[2] == v6prefix->p3)
   226  		return 1;
   227  	else
   228  		return 0;
   229  }
   230  
   231  /*
   232   * ipv4 to ipv6 stateless nat
   233   * (s4,d4) -> (s6,d6)
   234   * s6 = nat46_prefix<s4>
   235   * d6 = nat46_prefix<d4> or v6_dst if non null
   236   */
   237  static inline int ipv4_to_ipv6(struct __sk_buff *skb, struct iphdr *ip4,
   238  			       int nh_off, union v6addr *v6_dst)
   239  {
   240  	struct ipv6hdr v6 = {};
   241  	struct iphdr v4;
   242  	int csum_off;
   243  	__be32 csum;
   244  	__be16 v4hdr_len;
   245  	__be16 protocol = bpf_htons(ETH_P_IPV6);
   246  	__u64 csum_flags = BPF_F_PSEUDO_HDR;
   247  	union v6addr nat46_prefix = NAT46_PREFIX;
   248  	
   249  	if (skb_load_bytes(skb, nh_off, &v4, sizeof(v4)) < 0)
   250  		return DROP_INVALID;
   251  
   252  	if (ipv4_hdrlen(ip4) != sizeof(v4))
   253  		return DROP_INVALID_EXTHDR;
   254  
   255  	/* build v6 header */
   256  	v6.version = 0x6;
   257  	v6.saddr.in6_u.u6_addr32[0] = nat46_prefix.p1;
   258  	v6.saddr.in6_u.u6_addr32[1] = nat46_prefix.p2;
   259  	v6.saddr.in6_u.u6_addr32[2] = nat46_prefix.p3;
   260  	v6.saddr.in6_u.u6_addr32[3] = v4.saddr;
   261  
   262  	if (v6_dst) {
   263  		v6.daddr.in6_u.u6_addr32[0] = v6_dst->p1;
   264  		v6.daddr.in6_u.u6_addr32[1] = v6_dst->p2;
   265  		v6.daddr.in6_u.u6_addr32[2] = v6_dst->p3;
   266  		v6.daddr.in6_u.u6_addr32[3] = v6_dst->p4;
   267  	} else {
   268  		v6.daddr.in6_u.u6_addr32[0] = nat46_prefix.p1;
   269  		v6.daddr.in6_u.u6_addr32[1] = nat46_prefix.p2;
   270  		v6.daddr.in6_u.u6_addr32[2] = nat46_prefix.p3;
   271  		v6.daddr.in6_u.u6_addr32[3] = bpf_htonl((bpf_ntohl(nat46_prefix.p4) & 0xFFFF0000) |
   272  							(bpf_ntohl(v4.daddr) & 0xFFFF));
   273  	}
   274  
   275  	if (v4.protocol == IPPROTO_ICMP)
   276  		v6.nexthdr = IPPROTO_ICMPV6;
   277  	else
   278  		v6.nexthdr = v4.protocol;
   279  	v6.hop_limit = v4.ttl;
   280  	v4hdr_len = (v4.ihl << 2);
   281  	v6.payload_len = bpf_htons(bpf_ntohs(v4.tot_len) - v4hdr_len);
   282  
   283  	if (skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0) < 0) {
   284  #ifdef DEBUG_NAT46
   285  		printk("v46 NAT: skb_modify failed\n");
   286  #endif
   287  		return DROP_WRITE_ERROR;
   288  	}
   289  
   290  	if (skb_store_bytes(skb, nh_off, &v6, sizeof(v6), 0) < 0 ||
   291  	    skb_store_bytes(skb, nh_off - 2, &protocol, 2, 0) < 0)
   292  		return DROP_WRITE_ERROR;
   293  
   294  	if (v4.protocol == IPPROTO_ICMP) {
   295  		csum = icmp4_to_icmp6(skb, nh_off + sizeof(v6));
   296  		csum = ipv6_pseudohdr_checksum(&v6, IPPROTO_ICMPV6,
   297  					       bpf_ntohs(v6.payload_len), csum);
   298  	} else {
   299  		csum = 0;
   300  		csum = csum_diff(&v4.saddr, 4, &v6.saddr, 16, csum);
   301  		csum = csum_diff(&v4.daddr, 4, &v6.daddr, 16, csum);
   302  		if (v4.protocol == IPPROTO_UDP)
   303  			csum_flags |= BPF_F_MARK_MANGLED_0;
   304  	}
   305  
   306  	/* 
   307  	 * get checksum from inner header tcp / udp / icmp
   308  	 * undo ipv4 pseudohdr checksum and
   309  	 * add  ipv6 pseudohdr checksum
   310  	 */
   311  	csum_off = get_csum_offset(v6.nexthdr);
   312  	if (csum_off < 0)
   313  		return csum_off;
   314  	else
   315  		csum_off += sizeof(struct ipv6hdr);
   316  
   317  	if (l4_csum_replace(skb, nh_off + csum_off, 0, csum, csum_flags) < 0)
   318  		return DROP_CSUM_L4;
   319  
   320  #ifdef DEBUG_NAT46
   321  	printk("v46 NAT: nh_off %d, csum_off %d\n", nh_off, csum_off);
   322  #endif
   323  	return 0;
   324  }
   325  
   326  /*
   327   * ipv6 to ipv4 stateless nat
   328   * (s6,d6) -> (s4,d4)
   329   * s4 = <ipv4-range>.<lxc-id>
   330   * d4 = d6[96 .. 127]
   331   */
   332  static inline int ipv6_to_ipv4(struct __sk_buff *skb, int nh_off, __be32 saddr)
   333  {
   334  	struct ipv6hdr v6;
   335  	struct iphdr v4 = {};
   336  	int csum_off;
   337  	__be32 csum = 0;
   338  	__be16 protocol = bpf_htons(ETH_P_IP);
   339  	__u64 csum_flags = BPF_F_PSEUDO_HDR;
   340  
   341  	if (skb_load_bytes(skb, nh_off, &v6, sizeof(v6)) < 0)
   342  		return DROP_INVALID;
   343  
   344  	/* Drop frames which carry extensions headers */
   345  	if (ipv6_hdrlen(skb, nh_off, &v6.nexthdr) != sizeof(v6))
   346  		return DROP_INVALID_EXTHDR;
   347  
   348  	/* build v4 header */
   349  	v4.ihl = 0x5;
   350  	v4.version = 0x4;
   351  	v4.saddr = saddr;
   352  	v4.daddr = v6.daddr.in6_u.u6_addr32[3];
   353  	if (v6.nexthdr == IPPROTO_ICMPV6)
   354  		v4.protocol = IPPROTO_ICMP;
   355  	else
   356  		v4.protocol = v6.nexthdr;
   357  	v4.ttl = v6.hop_limit;
   358  	v4.tot_len = bpf_htons(bpf_ntohs(v6.payload_len) + sizeof(v4));
   359  	csum_off = offsetof(struct iphdr, check);
   360  	csum = csum_diff(NULL, 0, &v4, sizeof(v4), csum);
   361  
   362  	if (skb_change_proto(skb, bpf_htons(ETH_P_IP), 0) < 0) {
   363  #ifdef DEBUG_NAT46
   364  		printk("v46 NAT: skb_modify failed\n");
   365  #endif
   366  		return DROP_WRITE_ERROR;
   367  	}
   368  
   369  	if (skb_store_bytes(skb, nh_off, &v4, sizeof(v4), 0) < 0 ||
   370  	    skb_store_bytes(skb, nh_off - 2, &protocol, 2, 0) < 0)
   371  		return DROP_WRITE_ERROR;
   372  
   373  	if (l3_csum_replace(skb, nh_off + csum_off, 0, csum, 0) < 0)
   374  		return DROP_CSUM_L3;
   375  
   376  	if (v6.nexthdr == IPPROTO_ICMPV6) {
   377  		__be32 csum1 = 0;
   378  		csum = icmp6_to_icmp4(skb, nh_off + sizeof(v4));
   379  		csum1 = ipv6_pseudohdr_checksum(&v6, IPPROTO_ICMPV6,
   380  						bpf_ntohs(v6.payload_len), 0);
   381  		csum = csum - csum1;
   382  	} else {
   383  		csum = 0;
   384  		csum = csum_diff(&v6.saddr, 16, &v4.saddr, 4, csum);
   385  		csum = csum_diff(&v6.daddr, 16, &v4.daddr, 4, csum);
   386  		if (v4.protocol == IPPROTO_UDP)
   387  			csum_flags |= BPF_F_MARK_MANGLED_0;
   388  	}
   389  	/* 
   390  	 * get checksum from inner header tcp / udp / icmp
   391  	 * undo ipv6 pseudohdr checksum and
   392  	 * add  ipv4 pseudohdr checksum
   393  	 */
   394  	csum_off = get_csum_offset(v4.protocol);
   395  	if (csum_off < 0)
   396  		return csum_off;
   397  	else
   398  		csum_off += sizeof(struct iphdr);
   399  
   400  	if (l4_csum_replace(skb, nh_off + csum_off, 0, csum, csum_flags) < 0)
   401  		return DROP_CSUM_L4;
   402  
   403  #ifdef DEBUG_NAT46
   404  	printk("v64 NAT: nh_off %d, csum_off %d\n", nh_off, csum_off);
   405  #endif
   406  
   407  	return 0;
   408  }
   409  #endif /* __LIB_NAT46__ */