github.com/datadog/cilium@v1.6.12/bpf/lib/nat46.h (about) 1 /* 2 * Copyright (C) 2016-2017 Authors of Cilium 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #ifndef __LIB_NAT46__ 19 #define __LIB_NAT46__ 20 21 #include <linux/ip.h> 22 #include <linux/icmp.h> 23 #include <linux/icmpv6.h> 24 #include "common.h" 25 #include "ipv4.h" 26 #include "ipv6.h" 27 #include "eth.h" 28 #include "dbg.h" 29 30 #if defined ENABLE_NAT46 && \ 31 (!defined ENABLE_IPV4 || !defined ENABLE_IPV6 || \ 32 !defined CONNTRACK || !defined ENABLE_HOST_REDIRECT) 33 #error "ENABLE_NAT46 requisite options are not configured, see lib/nat46.h." 34 #endif 35 36 static inline int get_csum_offset(__u8 protocol) 37 { 38 int csum_off; 39 40 switch (protocol) { 41 case IPPROTO_TCP: 42 csum_off = TCP_CSUM_OFF; 43 break; 44 case IPPROTO_UDP: 45 csum_off = UDP_CSUM_OFF; 46 break; 47 case IPPROTO_ICMP: 48 csum_off = (offsetof(struct icmphdr, checksum)); 49 break; 50 case IPPROTO_ICMPV6: 51 csum_off = (offsetof(struct icmp6hdr, icmp6_cksum)); 52 break; 53 default: 54 return DROP_UNKNOWN_L4; 55 } 56 57 return csum_off; 58 } 59 60 static inline int icmp4_to_icmp6(struct __sk_buff *skb, int nh_off) 61 { 62 struct icmphdr icmp4; 63 struct icmp6hdr icmp6 = {}; 64 65 if (skb_load_bytes(skb, nh_off, &icmp4, sizeof(icmp4)) < 0) 66 return DROP_INVALID; 67 else 68 icmp6.icmp6_cksum = icmp4.checksum; 69 70 switch(icmp4.type) { 71 case ICMP_ECHO: 72 icmp6.icmp6_type = ICMPV6_ECHO_REQUEST; 73 icmp6.icmp6_identifier = icmp4.un.echo.id; 74 icmp6.icmp6_sequence = icmp4.un.echo.sequence; 75 break; 76 case ICMP_ECHOREPLY: 77 icmp6.icmp6_type = ICMPV6_ECHO_REPLY; 78 icmp6.icmp6_identifier = icmp4.un.echo.id; 79 icmp6.icmp6_sequence = icmp4.un.echo.sequence; 80 break; 81 case ICMP_DEST_UNREACH: 82 icmp6.icmp6_type = ICMPV6_DEST_UNREACH; 83 switch(icmp4.code) { 84 case ICMP_NET_UNREACH: 85 case ICMP_HOST_UNREACH: 86 icmp6.icmp6_code = ICMPV6_NOROUTE; 87 break; 88 case ICMP_PROT_UNREACH: 89 icmp6.icmp6_type = ICMPV6_PARAMPROB; 90 icmp6.icmp6_code = ICMPV6_UNK_NEXTHDR; 91 icmp6.icmp6_pointer = 6; 92 break; 93 case ICMP_PORT_UNREACH: 94 icmp6.icmp6_code = ICMPV6_PORT_UNREACH; 95 break; 96 case ICMP_FRAG_NEEDED: 97 icmp6.icmp6_type = ICMPV6_PKT_TOOBIG; 98 icmp6.icmp6_code = 0; 99 /* FIXME */ 100 if (icmp4.un.frag.mtu) 101 icmp6.icmp6_mtu = bpf_htonl(bpf_ntohs(icmp4.un.frag.mtu)); 102 else 103 icmp6.icmp6_mtu = bpf_htonl(1500); 104 break; 105 case ICMP_SR_FAILED: 106 icmp6.icmp6_code = ICMPV6_NOROUTE; 107 break; 108 case ICMP_NET_UNKNOWN: 109 case ICMP_HOST_UNKNOWN: 110 case ICMP_HOST_ISOLATED: 111 case ICMP_NET_UNR_TOS: 112 case ICMP_HOST_UNR_TOS: 113 icmp6.icmp6_code = 0; 114 break; 115 case ICMP_NET_ANO: 116 case ICMP_HOST_ANO: 117 case ICMP_PKT_FILTERED: 118 icmp6.icmp6_code = ICMPV6_ADM_PROHIBITED; 119 break; 120 default: 121 return DROP_UNKNOWN_ICMP_CODE; 122 } 123 break; 124 case ICMP_TIME_EXCEEDED: 125 icmp6.icmp6_type = ICMPV6_TIME_EXCEED; 126 break; 127 case ICMP_PARAMETERPROB: 128 icmp6.icmp6_type = ICMPV6_PARAMPROB; 129 /* FIXME */ 130 icmp6.icmp6_pointer = 6; 131 break; 132 default: 133 return DROP_UNKNOWN_ICMP_TYPE; 134 } 135 136 if (skb_store_bytes(skb, nh_off, &icmp6, sizeof(icmp6), 0) < 0) 137 return DROP_WRITE_ERROR; 138 139 icmp4.checksum = 0; 140 icmp6.icmp6_cksum = 0; 141 return csum_diff(&icmp4, sizeof(icmp4), &icmp6, sizeof(icmp6), 0); 142 } 143 144 static inline int icmp6_to_icmp4(struct __sk_buff *skb, int nh_off) 145 { 146 struct icmphdr icmp4 = {}; 147 struct icmp6hdr icmp6; 148 149 if (skb_load_bytes(skb, nh_off, &icmp6, sizeof(icmp6)) < 0) 150 return DROP_INVALID; 151 else 152 icmp4.checksum = icmp6.icmp6_cksum; 153 154 switch(icmp6.icmp6_type) { 155 case ICMPV6_ECHO_REQUEST: 156 icmp4.type = ICMP_ECHO; 157 icmp4.un.echo.id = icmp6.icmp6_identifier; 158 icmp4.un.echo.sequence = icmp6.icmp6_sequence; 159 break; 160 case ICMPV6_ECHO_REPLY: 161 icmp4.type = ICMP_ECHOREPLY; 162 icmp4.un.echo.id = icmp6.icmp6_identifier; 163 icmp4.un.echo.sequence = icmp6.icmp6_sequence; 164 break; 165 case ICMPV6_DEST_UNREACH: 166 icmp4.type = ICMP_DEST_UNREACH; 167 switch(icmp6.icmp6_code) { 168 case ICMPV6_NOROUTE: 169 case ICMPV6_NOT_NEIGHBOUR: 170 case ICMPV6_ADDR_UNREACH: 171 icmp4.code = ICMP_HOST_UNREACH; 172 break; 173 case ICMPV6_ADM_PROHIBITED: 174 icmp4.code = ICMP_HOST_ANO; 175 break; 176 case ICMPV6_PORT_UNREACH: 177 icmp4.code = ICMP_PORT_UNREACH; 178 break; 179 default: 180 return DROP_UNKNOWN_ICMP6_CODE; 181 } 182 case ICMPV6_PKT_TOOBIG: 183 icmp4.type = ICMP_DEST_UNREACH; 184 icmp4.code = ICMP_FRAG_NEEDED; 185 /* FIXME */ 186 if (icmp6.icmp6_mtu) 187 icmp4.un.frag.mtu = bpf_htons(bpf_ntohl(icmp6.icmp6_mtu)); 188 else 189 icmp4.un.frag.mtu = bpf_htons(1500); 190 break; 191 case ICMPV6_TIME_EXCEED: 192 icmp4.type = ICMP_TIME_EXCEEDED; 193 icmp4.code = icmp6.icmp6_code; 194 break; 195 case ICMPV6_PARAMPROB: 196 switch(icmp6.icmp6_code) { 197 case ICMPV6_HDR_FIELD: 198 icmp4.type = ICMP_PARAMETERPROB; 199 icmp4.code = 0; 200 break; 201 case ICMPV6_UNK_NEXTHDR: 202 icmp4.type = ICMP_DEST_UNREACH; 203 icmp4.code = ICMP_PROT_UNREACH; 204 break; 205 default: 206 return DROP_UNKNOWN_ICMP6_CODE; 207 } 208 default: 209 return DROP_UNKNOWN_ICMP6_TYPE; 210 } 211 212 if (skb_store_bytes(skb, nh_off, &icmp4, sizeof(icmp4), 0) < 0) 213 return DROP_WRITE_ERROR; 214 215 icmp4.checksum = 0; 216 icmp6.icmp6_cksum = 0; 217 return csum_diff(&icmp6, sizeof(icmp6), &icmp4, sizeof(icmp4), 0); 218 } 219 220 static inline int ipv6_prefix_match(struct in6_addr *addr, 221 union v6addr *v6prefix) 222 { 223 if (addr->in6_u.u6_addr32[0] == v6prefix->p1 && 224 addr->in6_u.u6_addr32[1] == v6prefix->p2 && 225 addr->in6_u.u6_addr32[2] == v6prefix->p3) 226 return 1; 227 else 228 return 0; 229 } 230 231 /* 232 * ipv4 to ipv6 stateless nat 233 * (s4,d4) -> (s6,d6) 234 * s6 = nat46_prefix<s4> 235 * d6 = nat46_prefix<d4> or v6_dst if non null 236 */ 237 static inline int ipv4_to_ipv6(struct __sk_buff *skb, struct iphdr *ip4, 238 int nh_off, union v6addr *v6_dst) 239 { 240 struct ipv6hdr v6 = {}; 241 struct iphdr v4; 242 int csum_off; 243 __be32 csum; 244 __be16 v4hdr_len; 245 __be16 protocol = bpf_htons(ETH_P_IPV6); 246 __u64 csum_flags = BPF_F_PSEUDO_HDR; 247 union v6addr nat46_prefix = NAT46_PREFIX; 248 249 if (skb_load_bytes(skb, nh_off, &v4, sizeof(v4)) < 0) 250 return DROP_INVALID; 251 252 if (ipv4_hdrlen(ip4) != sizeof(v4)) 253 return DROP_INVALID_EXTHDR; 254 255 /* build v6 header */ 256 v6.version = 0x6; 257 v6.saddr.in6_u.u6_addr32[0] = nat46_prefix.p1; 258 v6.saddr.in6_u.u6_addr32[1] = nat46_prefix.p2; 259 v6.saddr.in6_u.u6_addr32[2] = nat46_prefix.p3; 260 v6.saddr.in6_u.u6_addr32[3] = v4.saddr; 261 262 if (v6_dst) { 263 v6.daddr.in6_u.u6_addr32[0] = v6_dst->p1; 264 v6.daddr.in6_u.u6_addr32[1] = v6_dst->p2; 265 v6.daddr.in6_u.u6_addr32[2] = v6_dst->p3; 266 v6.daddr.in6_u.u6_addr32[3] = v6_dst->p4; 267 } else { 268 v6.daddr.in6_u.u6_addr32[0] = nat46_prefix.p1; 269 v6.daddr.in6_u.u6_addr32[1] = nat46_prefix.p2; 270 v6.daddr.in6_u.u6_addr32[2] = nat46_prefix.p3; 271 v6.daddr.in6_u.u6_addr32[3] = bpf_htonl((bpf_ntohl(nat46_prefix.p4) & 0xFFFF0000) | 272 (bpf_ntohl(v4.daddr) & 0xFFFF)); 273 } 274 275 if (v4.protocol == IPPROTO_ICMP) 276 v6.nexthdr = IPPROTO_ICMPV6; 277 else 278 v6.nexthdr = v4.protocol; 279 v6.hop_limit = v4.ttl; 280 v4hdr_len = (v4.ihl << 2); 281 v6.payload_len = bpf_htons(bpf_ntohs(v4.tot_len) - v4hdr_len); 282 283 if (skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0) < 0) { 284 #ifdef DEBUG_NAT46 285 printk("v46 NAT: skb_modify failed\n"); 286 #endif 287 return DROP_WRITE_ERROR; 288 } 289 290 if (skb_store_bytes(skb, nh_off, &v6, sizeof(v6), 0) < 0 || 291 skb_store_bytes(skb, nh_off - 2, &protocol, 2, 0) < 0) 292 return DROP_WRITE_ERROR; 293 294 if (v4.protocol == IPPROTO_ICMP) { 295 csum = icmp4_to_icmp6(skb, nh_off + sizeof(v6)); 296 csum = ipv6_pseudohdr_checksum(&v6, IPPROTO_ICMPV6, 297 bpf_ntohs(v6.payload_len), csum); 298 } else { 299 csum = 0; 300 csum = csum_diff(&v4.saddr, 4, &v6.saddr, 16, csum); 301 csum = csum_diff(&v4.daddr, 4, &v6.daddr, 16, csum); 302 if (v4.protocol == IPPROTO_UDP) 303 csum_flags |= BPF_F_MARK_MANGLED_0; 304 } 305 306 /* 307 * get checksum from inner header tcp / udp / icmp 308 * undo ipv4 pseudohdr checksum and 309 * add ipv6 pseudohdr checksum 310 */ 311 csum_off = get_csum_offset(v6.nexthdr); 312 if (csum_off < 0) 313 return csum_off; 314 else 315 csum_off += sizeof(struct ipv6hdr); 316 317 if (l4_csum_replace(skb, nh_off + csum_off, 0, csum, csum_flags) < 0) 318 return DROP_CSUM_L4; 319 320 #ifdef DEBUG_NAT46 321 printk("v46 NAT: nh_off %d, csum_off %d\n", nh_off, csum_off); 322 #endif 323 return 0; 324 } 325 326 /* 327 * ipv6 to ipv4 stateless nat 328 * (s6,d6) -> (s4,d4) 329 * s4 = <ipv4-range>.<lxc-id> 330 * d4 = d6[96 .. 127] 331 */ 332 static inline int ipv6_to_ipv4(struct __sk_buff *skb, int nh_off, __be32 saddr) 333 { 334 struct ipv6hdr v6; 335 struct iphdr v4 = {}; 336 int csum_off; 337 __be32 csum = 0; 338 __be16 protocol = bpf_htons(ETH_P_IP); 339 __u64 csum_flags = BPF_F_PSEUDO_HDR; 340 341 if (skb_load_bytes(skb, nh_off, &v6, sizeof(v6)) < 0) 342 return DROP_INVALID; 343 344 /* Drop frames which carry extensions headers */ 345 if (ipv6_hdrlen(skb, nh_off, &v6.nexthdr) != sizeof(v6)) 346 return DROP_INVALID_EXTHDR; 347 348 /* build v4 header */ 349 v4.ihl = 0x5; 350 v4.version = 0x4; 351 v4.saddr = saddr; 352 v4.daddr = v6.daddr.in6_u.u6_addr32[3]; 353 if (v6.nexthdr == IPPROTO_ICMPV6) 354 v4.protocol = IPPROTO_ICMP; 355 else 356 v4.protocol = v6.nexthdr; 357 v4.ttl = v6.hop_limit; 358 v4.tot_len = bpf_htons(bpf_ntohs(v6.payload_len) + sizeof(v4)); 359 csum_off = offsetof(struct iphdr, check); 360 csum = csum_diff(NULL, 0, &v4, sizeof(v4), csum); 361 362 if (skb_change_proto(skb, bpf_htons(ETH_P_IP), 0) < 0) { 363 #ifdef DEBUG_NAT46 364 printk("v46 NAT: skb_modify failed\n"); 365 #endif 366 return DROP_WRITE_ERROR; 367 } 368 369 if (skb_store_bytes(skb, nh_off, &v4, sizeof(v4), 0) < 0 || 370 skb_store_bytes(skb, nh_off - 2, &protocol, 2, 0) < 0) 371 return DROP_WRITE_ERROR; 372 373 if (l3_csum_replace(skb, nh_off + csum_off, 0, csum, 0) < 0) 374 return DROP_CSUM_L3; 375 376 if (v6.nexthdr == IPPROTO_ICMPV6) { 377 __be32 csum1 = 0; 378 csum = icmp6_to_icmp4(skb, nh_off + sizeof(v4)); 379 csum1 = ipv6_pseudohdr_checksum(&v6, IPPROTO_ICMPV6, 380 bpf_ntohs(v6.payload_len), 0); 381 csum = csum - csum1; 382 } else { 383 csum = 0; 384 csum = csum_diff(&v6.saddr, 16, &v4.saddr, 4, csum); 385 csum = csum_diff(&v6.daddr, 16, &v4.daddr, 4, csum); 386 if (v4.protocol == IPPROTO_UDP) 387 csum_flags |= BPF_F_MARK_MANGLED_0; 388 } 389 /* 390 * get checksum from inner header tcp / udp / icmp 391 * undo ipv6 pseudohdr checksum and 392 * add ipv4 pseudohdr checksum 393 */ 394 csum_off = get_csum_offset(v4.protocol); 395 if (csum_off < 0) 396 return csum_off; 397 else 398 csum_off += sizeof(struct iphdr); 399 400 if (l4_csum_replace(skb, nh_off + csum_off, 0, csum, csum_flags) < 0) 401 return DROP_CSUM_L4; 402 403 #ifdef DEBUG_NAT46 404 printk("v64 NAT: nh_off %d, csum_off %d\n", nh_off, csum_off); 405 #endif 406 407 return 0; 408 } 409 #endif /* __LIB_NAT46__ */