github.com/fafucoder/cilium@v1.6.11/bpf/bpf_netdev.c (about) 1 /* 2 * Copyright (C) 2016-2019 Authors of Cilium 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include <node_config.h> 19 #include <netdev_config.h> 20 21 /* These are configuartion options which have a default value in their 22 * respective header files and must thus be defined beforehand: 23 * 24 * Pass unknown ICMPv6 NS to stack */ 25 #define ACTION_UNKNOWN_ICMP6_NS TC_ACT_OK 26 27 /* Include policy_can_access_ingress() */ 28 #define REQUIRES_CAN_ACCESS 29 30 #include <bpf/api.h> 31 32 #include <stdint.h> 33 #include <stdio.h> 34 35 #include "lib/utils.h" 36 #include "lib/common.h" 37 #include "lib/arp.h" 38 #include "lib/maps.h" 39 #include "lib/ipv6.h" 40 #include "lib/ipv4.h" 41 #include "lib/icmp6.h" 42 #include "lib/eth.h" 43 #include "lib/dbg.h" 44 #include "lib/trace.h" 45 #include "lib/l3.h" 46 #include "lib/l4.h" 47 #include "lib/policy.h" 48 #include "lib/drop.h" 49 #include "lib/encap.h" 50 #include "lib/nat.h" 51 #include "lib/lb.h" 52 #include "lib/nodeport.h" 53 54 #if defined FROM_HOST && (defined ENABLE_IPV4 || defined ENABLE_IPV6) 55 static inline int rewrite_dmac_to_host(struct __sk_buff *skb, __u32 src_identity) 56 { 57 /* When attached to cilium_host, we rewrite the DMAC to the mac of 58 * cilium_host (peer) to ensure the packet is being considered to be 59 * addressed to the host (PACKET_HOST) */ 60 union macaddr cilium_net_mac = CILIUM_NET_MAC; 61 62 /* Rewrite to destination MAC of cilium_net (remote peer) */ 63 if (eth_store_daddr(skb, (__u8 *) &cilium_net_mac.addr, 0) < 0) 64 return send_drop_notify_error(skb, src_identity, DROP_WRITE_ERROR, TC_ACT_OK, METRIC_INGRESS); 65 66 return TC_ACT_OK; 67 } 68 #endif 69 70 #if defined ENABLE_IPV4 || defined ENABLE_IPV6 71 static inline __u32 finalize_sec_ctx(__u32 secctx, __u32 src_identity) 72 { 73 #ifdef ENABLE_SECCTX_FROM_IPCACHE 74 /* If we could not derive the secctx from the packet itself but 75 * from the ipcache instead, then use the ipcache identity. E.g. 76 * used in ipvlan master device's datapath on ingress. 77 */ 78 if (secctx == WORLD_ID && !identity_is_reserved(src_identity)) 79 secctx = src_identity; 80 #endif /* ENABLE_SECCTX_FROM_IPCACHE */ 81 return secctx; 82 } 83 #endif 84 85 #ifdef ENABLE_IPV6 86 static inline __u32 derive_sec_ctx(struct __sk_buff *skb, const union v6addr *node_ip, 87 struct ipv6hdr *ip6) 88 { 89 #ifdef FIXED_SRC_SECCTX 90 return FIXED_SRC_SECCTX; 91 #else 92 if (ipv6_match_prefix_64((union v6addr *) &ip6->saddr, node_ip)) { 93 /* Read initial 4 bytes of header and then extract flowlabel */ 94 __u32 *tmp = (__u32 *) ip6; 95 return bpf_ntohl(*tmp & IPV6_FLOWLABEL_MASK); 96 } 97 98 return WORLD_ID; 99 #endif 100 } 101 102 static inline int handle_ipv6(struct __sk_buff *skb, __u32 src_identity) 103 { 104 struct remote_endpoint_info *info = NULL; 105 union v6addr node_ip = { }; 106 void *data, *data_end; 107 struct ipv6hdr *ip6; 108 union v6addr *dst; 109 int l4_off, l3_off = ETH_HLEN, hdrlen; 110 struct endpoint_info *ep; 111 __u8 nexthdr; 112 __u32 secctx; 113 114 if (!revalidate_data(skb, &data, &data_end, &ip6)) 115 return DROP_INVALID; 116 117 #ifdef ENABLE_NODEPORT 118 if (!bpf_skip_nodeport(skb)) { 119 int ret = nodeport_lb6(skb, src_identity); 120 if (ret < 0) 121 return ret; 122 } 123 #if defined(ENCAP_IFINDEX) || defined(NO_REDIRECT) 124 /* See IPv4 case for NO_REDIRECT comments */ 125 return TC_ACT_OK; 126 #endif /* ENCAP_IFINDEX || NO_REDIRECT */ 127 /* Verifier workaround: modified ctx access. */ 128 if (!revalidate_data(skb, &data, &data_end, &ip6)) 129 return DROP_INVALID; 130 #endif /* ENABLE_NODEPORT */ 131 132 nexthdr = ip6->nexthdr; 133 hdrlen = ipv6_hdrlen(skb, l3_off, &nexthdr); 134 if (hdrlen < 0) 135 return hdrlen; 136 137 l4_off = l3_off + hdrlen; 138 139 #ifdef HANDLE_NS 140 if (unlikely(nexthdr == IPPROTO_ICMPV6)) { 141 int ret = icmp6_handle(skb, ETH_HLEN, ip6, METRIC_INGRESS); 142 if (IS_ERR(ret)) 143 return ret; 144 } 145 #endif 146 147 BPF_V6(node_ip, ROUTER_IP); 148 secctx = derive_sec_ctx(skb, &node_ip, ip6); 149 150 /* Packets from the proxy will already have a real identity. */ 151 if (identity_is_reserved(src_identity)) { 152 union v6addr *src = (union v6addr *) &ip6->saddr; 153 info = ipcache_lookup6(&IPCACHE_MAP, src, V6_CACHE_KEY_LEN); 154 if (info != NULL) { 155 __u32 sec_label = info->sec_label; 156 if (sec_label) 157 src_identity = info->sec_label; 158 } 159 cilium_dbg(skb, info ? DBG_IP_ID_MAP_SUCCEED6 : DBG_IP_ID_MAP_FAILED6, 160 ((__u32 *) src)[3], src_identity); 161 } 162 163 secctx = finalize_sec_ctx(secctx, src_identity); 164 #ifdef FROM_HOST 165 if (1) { 166 int ret; 167 168 secctx = src_identity; 169 170 /* If we are attached to cilium_host at egress, this will 171 * rewrite the destination mac address to the MAC of cilium_net */ 172 ret = rewrite_dmac_to_host(skb, secctx); 173 /* DIRECT PACKET READ INVALID */ 174 if (IS_ERR(ret)) 175 return ret; 176 } 177 178 if (!revalidate_data(skb, &data, &data_end, &ip6)) 179 return DROP_INVALID; 180 #endif 181 182 /* Lookup IPv4 address in list of local endpoints */ 183 if ((ep = lookup_ip6_endpoint(ip6)) != NULL) { 184 /* Let through packets to the node-ip so they are 185 * processed by the local ip stack */ 186 if (ep->flags & ENDPOINT_F_HOST) 187 return TC_ACT_OK; 188 189 return ipv6_local_delivery(skb, l3_off, l4_off, secctx, ip6, nexthdr, ep, METRIC_INGRESS); 190 } 191 192 #ifdef ENCAP_IFINDEX 193 dst = (union v6addr *) &ip6->daddr; 194 info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN); 195 if (info != NULL && info->tunnel_endpoint != 0) { 196 int ret = encap_and_redirect_with_nodeid(skb, info->tunnel_endpoint, 197 info->key, 198 secctx, TRACE_PAYLOAD_LEN); 199 200 /* If IPSEC is needed recirc through ingress to use xfrm stack 201 * and then result will routed back through bpf_netdev on egress 202 * but with encrypt marks. 203 */ 204 if (ret == IPSEC_ENDPOINT) 205 return TC_ACT_OK; 206 else 207 return ret; 208 } else { 209 struct endpoint_key key = {}; 210 int ret; 211 212 /* IPv6 lookup key: daddr/96 */ 213 dst = (union v6addr *) &ip6->daddr; 214 key.ip6.p1 = dst->p1; 215 key.ip6.p2 = dst->p2; 216 key.ip6.p3 = dst->p3; 217 key.ip6.p4 = 0; 218 key.family = ENDPOINT_KEY_IPV6; 219 220 ret = encap_and_redirect_netdev(skb, &key, secctx, TRACE_PAYLOAD_LEN); 221 if (ret == IPSEC_ENDPOINT) 222 return TC_ACT_OK; 223 else if (ret != DROP_NO_TUNNEL_ENDPOINT) 224 return ret; 225 } 226 #endif 227 228 dst = (union v6addr *) &ip6->daddr; 229 info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN); 230 #ifdef FROM_HOST 231 if (info == NULL || info->sec_label == WORLD_ID) { 232 /* See IPv4 comment. */ 233 return DROP_UNROUTABLE; 234 } 235 #endif 236 #ifdef ENABLE_IPSEC 237 if (info && info->key && info->tunnel_endpoint) { 238 __u8 key = get_min_encrypt_key(info->key); 239 240 set_encrypt_key_cb(skb, key); 241 #ifdef IP_POOLS 242 set_encrypt_dip(skb, info->tunnel_endpoint); 243 #else 244 set_identity_cb(skb, secctx); 245 #endif 246 } 247 #endif 248 return TC_ACT_OK; 249 } 250 251 __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_FROM_LXC) int tail_handle_ipv6(struct __sk_buff *skb) 252 { 253 __u32 proxy_identity = skb->cb[CB_SRC_IDENTITY]; 254 int ret; 255 256 skb->cb[CB_SRC_IDENTITY] = 0; 257 ret = handle_ipv6(skb, proxy_identity); 258 if (IS_ERR(ret)) 259 return send_drop_notify_error(skb, proxy_identity, ret, TC_ACT_SHOT, METRIC_INGRESS); 260 261 return ret; 262 } 263 #endif /* ENABLE_IPV6 */ 264 265 #ifdef ENABLE_IPV4 266 static inline __u32 derive_ipv4_sec_ctx(struct __sk_buff *skb, struct iphdr *ip4) 267 { 268 #ifdef FIXED_SRC_SECCTX 269 return FIXED_SRC_SECCTX; 270 #else 271 return WORLD_ID; 272 #endif 273 } 274 275 static inline int handle_ipv4(struct __sk_buff *skb, __u32 src_identity) 276 { 277 struct remote_endpoint_info *info = NULL; 278 struct ipv4_ct_tuple tuple = {}; 279 struct endpoint_info *ep; 280 void *data, *data_end; 281 struct iphdr *ip4; 282 int l4_off; 283 __u32 secctx; 284 285 if (!revalidate_data(skb, &data, &data_end, &ip4)) 286 return DROP_INVALID; 287 288 #ifdef ENABLE_NODEPORT 289 if (!bpf_skip_nodeport(skb)) { 290 int ret = nodeport_lb4(skb, src_identity); 291 if (ret < 0) 292 return ret; 293 } 294 #if defined(ENCAP_IFINDEX) || defined(NO_REDIRECT) 295 /* We cannot redirect a packet to a local endpoint in the direct 296 * routing mode, as the redirect bypasses nf_conntrack table. 297 * This makes a second reply from the endpoint to be MASQUERADEd or 298 * to be DROPed by k8s's "--ctstate INVALID -j DROP" depending via 299 * which interface it was inputed. */ 300 return TC_ACT_OK; 301 #endif /* ENCAP_IFINDEX || NO_REDIRECT */ 302 /* Verifier workaround: modified ctx access. */ 303 if (!revalidate_data(skb, &data, &data_end, &ip4)) 304 return DROP_INVALID; 305 #endif /* ENABLE_NODEPORT */ 306 307 l4_off = ETH_HLEN + ipv4_hdrlen(ip4); 308 secctx = derive_ipv4_sec_ctx(skb, ip4); 309 tuple.nexthdr = ip4->protocol; 310 311 /* Packets from the proxy will already have a real identity. */ 312 if (identity_is_reserved(src_identity)) { 313 info = ipcache_lookup4(&IPCACHE_MAP, ip4->saddr, V4_CACHE_KEY_LEN); 314 if (info != NULL) { 315 __u32 sec_label = info->sec_label; 316 if (sec_label) { 317 /* When SNAT is enabled on traffic ingressing 318 * into Cilium, all traffic from the world will 319 * have a source IP of the host. It will only 320 * actually be from the host if "src_identity" 321 * (passed into this function) reports the src 322 * as the host. So we can ignore the ipcache 323 * if it reports the source as HOST_ID. 324 */ 325 #ifndef ENABLE_EXTRA_HOST_DEV 326 if (sec_label != HOST_ID) 327 #endif 328 src_identity = sec_label; 329 } 330 } 331 cilium_dbg(skb, info ? DBG_IP_ID_MAP_SUCCEED4 : DBG_IP_ID_MAP_FAILED4, 332 ip4->saddr, src_identity); 333 } 334 335 secctx = finalize_sec_ctx(secctx, src_identity); 336 #ifdef FROM_HOST 337 if (1) { 338 int ret; 339 340 secctx = src_identity; 341 342 /* If we are attached to cilium_host at egress, this will 343 * rewrite the destination mac address to the MAC of cilium_net */ 344 ret = rewrite_dmac_to_host(skb, secctx); 345 /* DIRECT PACKET READ INVALID */ 346 if (IS_ERR(ret)) 347 return ret; 348 } 349 350 if (!revalidate_data(skb, &data, &data_end, &ip4)) 351 return DROP_INVALID; 352 #endif 353 354 /* Lookup IPv4 address in list of local endpoints and host IPs */ 355 if ((ep = lookup_ip4_endpoint(ip4)) != NULL) { 356 /* Let through packets to the node-ip so they are 357 * processed by the local ip stack */ 358 if (ep->flags & ENDPOINT_F_HOST) 359 #ifdef HOST_REDIRECT_TO_INGRESS 360 /* This is required for L7 proxy to send packets to the host. */ 361 return redirect(HOST_IFINDEX, BPF_F_INGRESS); 362 #else 363 return TC_ACT_OK; 364 #endif 365 366 return ipv4_local_delivery(skb, ETH_HLEN, l4_off, secctx, ip4, ep, METRIC_INGRESS); 367 } 368 369 #ifdef ENCAP_IFINDEX 370 info = ipcache_lookup4(&IPCACHE_MAP, ip4->daddr, V4_CACHE_KEY_LEN); 371 if (info != NULL && info->tunnel_endpoint != 0) { 372 int ret = encap_and_redirect_with_nodeid(skb, info->tunnel_endpoint, 373 info->key, 374 secctx, TRACE_PAYLOAD_LEN); 375 376 if (ret == IPSEC_ENDPOINT) 377 return TC_ACT_OK; 378 else 379 return ret; 380 } else { 381 /* IPv4 lookup key: daddr & IPV4_MASK */ 382 struct endpoint_key key = {}; 383 int ret; 384 385 key.ip4 = ip4->daddr & IPV4_MASK; 386 key.family = ENDPOINT_KEY_IPV4; 387 388 cilium_dbg(skb, DBG_NETDEV_ENCAP4, key.ip4, secctx); 389 ret = encap_and_redirect_netdev(skb, &key, secctx, TRACE_PAYLOAD_LEN); 390 if (ret == IPSEC_ENDPOINT) 391 return TC_ACT_OK; 392 else if (ret != DROP_NO_TUNNEL_ENDPOINT) 393 return ret; 394 } 395 #endif 396 397 #ifdef HOST_REDIRECT_TO_INGRESS 398 return redirect(HOST_IFINDEX, BPF_F_INGRESS); 399 #else 400 401 info = ipcache_lookup4(&IPCACHE_MAP, ip4->daddr, V4_CACHE_KEY_LEN); 402 #ifdef FROM_HOST 403 if (info == NULL || info->sec_label == WORLD_ID) { 404 /* We have received a packet for which no ipcache entry exists, 405 * we do not know what to do with this packet, drop it. 406 * 407 * The info == NULL test is soley to satisfy verifier requirements 408 * as in Cilium case we'll always hit the 0.0.0.0/32 catch-all 409 * entry. Therefore we need to test for WORLD_ID. It is clearly 410 * wrong to route a skb to cilium_host for which we don't know 411 * anything about it as otherwise we'll run into a routing loop. 412 */ 413 return DROP_UNROUTABLE; 414 } 415 #endif 416 #ifdef ENABLE_IPSEC 417 if (info && info->key && info->tunnel_endpoint) { 418 __u8 key = get_min_encrypt_key(info->key); 419 420 set_encrypt_key_cb(skb, key); 421 #ifdef IP_POOLS 422 set_encrypt_dip(skb, info->tunnel_endpoint); 423 #else 424 set_identity_cb(skb, secctx); 425 #endif 426 } 427 #endif 428 return TC_ACT_OK; 429 #endif 430 } 431 432 __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV4_FROM_LXC) int tail_handle_ipv4(struct __sk_buff *skb) 433 { 434 __u32 proxy_identity = skb->cb[CB_SRC_IDENTITY]; 435 int ret; 436 437 skb->cb[CB_SRC_IDENTITY] = 0; 438 ret = handle_ipv4(skb, proxy_identity); 439 if (IS_ERR(ret)) 440 return send_drop_notify_error(skb, proxy_identity, ret, TC_ACT_SHOT, METRIC_INGRESS); 441 442 return ret; 443 } 444 445 #endif /* ENABLE_IPV4 */ 446 447 #ifdef ENABLE_IPSEC 448 #ifndef ENCAP_IFINDEX 449 static __always_inline int do_netdev_encrypt_pools(struct __sk_buff *skb) 450 { 451 int ret = 0; 452 #ifdef IP_POOLS 453 __u32 tunnel_endpoint = 0; 454 void *data, *data_end; 455 __u32 tunnel_source = IPV4_ENCRYPT_IFACE; 456 struct iphdr *iphdr; 457 __be32 sum; 458 459 tunnel_endpoint = skb->cb[4]; 460 skb->mark = 0; 461 462 if (!revalidate_data(skb, &data, &data_end, &iphdr)) { 463 ret = DROP_INVALID; 464 goto drop_err; 465 } 466 467 /* When IP_POOLS is enabled ip addresses are not 468 * assigned on a per node basis so lacking node 469 * affinity we can not use IP address to assign the 470 * destination IP. Instead rewrite it here from cb[]. 471 */ 472 sum = csum_diff(&iphdr->daddr, 4, &tunnel_endpoint, 4, 0); 473 if (skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr), 474 &tunnel_endpoint, 4, 0) < 0) { 475 ret = DROP_WRITE_ERROR; 476 goto drop_err; 477 } 478 if (l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check), 479 0, sum, 0) < 0) { 480 ret = DROP_CSUM_L3; 481 goto drop_err; 482 } 483 484 if (!revalidate_data(skb, &data, &data_end, &iphdr)) { 485 ret = DROP_INVALID; 486 goto drop_err; 487 } 488 489 sum = csum_diff(&iphdr->saddr, 4, &tunnel_source, 4, 0); 490 if (skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, saddr), 491 &tunnel_source, 4, 0) < 0) { 492 ret = DROP_WRITE_ERROR; 493 goto drop_err; 494 } 495 if (l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check), 496 0, sum, 0) < 0) { 497 ret = DROP_CSUM_L3; 498 goto drop_err; 499 } 500 drop_err: 501 #endif // IP_POOLS 502 return ret; 503 } 504 505 static __always_inline int do_netdev_encrypt_fib(struct __sk_buff *skb, int *encrypt_iface) 506 { 507 int ret = 0; 508 509 #ifdef HAVE_FIB_LOOKUP 510 struct bpf_fib_lookup fib_params = {}; 511 void *data, *data_end; 512 struct iphdr *iphdr; 513 __be32 sum; 514 int err; 515 516 if (!revalidate_data(skb, &data, &data_end, &iphdr)) { 517 ret = DROP_INVALID; 518 goto drop_err_fib; 519 } 520 521 fib_params.family = AF_INET; 522 fib_params.ifindex = *encrypt_iface; 523 524 fib_params.ipv4_src = iphdr->saddr; 525 fib_params.ipv4_dst = iphdr->daddr; 526 527 err = fib_lookup(skb, &fib_params, sizeof(fib_params), 528 BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT); 529 if (err != 0) { 530 ret = DROP_NO_FIB; 531 goto drop_err_fib; 532 } 533 if (eth_store_daddr(skb, fib_params.dmac, 0) < 0) { 534 ret = DROP_WRITE_ERROR; 535 goto drop_err_fib; 536 } 537 if (eth_store_saddr(skb, fib_params.smac, 0) < 0) { 538 ret = DROP_WRITE_ERROR; 539 goto drop_err_fib; 540 } 541 *encrypt_iface = fib_params.ifindex; 542 drop_err_fib: 543 #endif /* HAVE_FIB_LOOKUP */ 544 return ret; 545 } 546 547 static __always_inline int do_netdev_encrypt(struct __sk_buff *skb) 548 { 549 int encrypt_iface; 550 int ret = 0; 551 552 #ifdef ENCRYPT_NODE 553 encrypt_iface = ENCRYPT_IFACE; 554 #endif 555 556 ret = do_netdev_encrypt_pools(skb); 557 if (ret) 558 return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_INGRESS); 559 560 ret = do_netdev_encrypt_fib(skb, &encrypt_iface); 561 if (ret) 562 return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_INGRESS); 563 564 bpf_clear_cb(skb); 565 #ifdef ENCRYPT_NODE 566 return redirect(encrypt_iface, 0); 567 #else 568 return TC_ACT_OK; 569 #endif 570 } 571 572 #else /* ENCAP_IFINDEX */ 573 static __always_inline int do_netdev_encrypt_encap(struct __sk_buff *skb) 574 { 575 __u32 seclabel, tunnel_endpoint = 0; 576 577 seclabel = get_identity(skb); 578 tunnel_endpoint = skb->cb[4]; 579 skb->mark = 0; 580 581 bpf_clear_cb(skb); 582 return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, TRACE_PAYLOAD_LEN); 583 } 584 585 static __always_inline int do_netdev_encrypt(struct __sk_buff *skb) 586 { 587 return do_netdev_encrypt_encap(skb); 588 } 589 #endif /* ENCAP_IFINDEX */ 590 #endif /* ENABLE_IPSEC */ 591 592 static __always_inline int do_netdev(struct __sk_buff *skb, __u16 proto) 593 { 594 __u32 identity = 0; 595 int ret; 596 597 #ifdef ENABLE_IPSEC 598 if (1) { 599 __u32 magic = skb->mark & MARK_MAGIC_HOST_MASK; 600 601 if (magic == MARK_MAGIC_ENCRYPT) 602 return do_netdev_encrypt(skb); 603 } 604 #endif 605 bpf_clear_cb(skb); 606 bpf_clear_nodeport(skb); 607 608 #ifdef FROM_HOST 609 if (1) { 610 611 #ifdef HOST_REDIRECT_TO_INGRESS 612 if (proto == bpf_htons(ETH_P_ARP)) { 613 union macaddr mac = HOST_IFINDEX_MAC; 614 return arp_respond(skb, &mac, BPF_F_INGRESS); 615 } 616 #endif 617 618 int trace = TRACE_FROM_HOST; 619 bool from_proxy; 620 621 from_proxy = inherit_identity_from_host(skb, &identity); 622 if (from_proxy) 623 trace = TRACE_FROM_PROXY; 624 send_trace_notify(skb, trace, identity, 0, 0, 625 skb->ingress_ifindex, 0, TRACE_PAYLOAD_LEN); 626 } 627 #else 628 send_trace_notify(skb, TRACE_FROM_STACK, 0, 0, 0, skb->ingress_ifindex, 629 0, TRACE_PAYLOAD_LEN); 630 #endif 631 632 switch (proto) { 633 #ifdef ENABLE_IPV6 634 case bpf_htons(ETH_P_IPV6): 635 skb->cb[CB_SRC_IDENTITY] = identity; 636 ep_tail_call(skb, CILIUM_CALL_IPV6_FROM_LXC); 637 /* See comment below for IPv4. */ 638 return send_drop_notify_error(skb, identity, DROP_MISSED_TAIL_CALL, 639 TC_ACT_OK, METRIC_INGRESS); 640 #endif 641 642 #ifdef ENABLE_IPV4 643 case bpf_htons(ETH_P_IP): 644 skb->cb[CB_SRC_IDENTITY] = identity; 645 ep_tail_call(skb, CILIUM_CALL_IPV4_FROM_LXC); 646 /* We are not returning an error here to always allow traffic to 647 * the stack in case maps have become unavailable. 648 * 649 * Note: Since drop notification requires a tail call as well, 650 * this notification is unlikely to succeed. */ 651 return send_drop_notify_error(skb, identity, DROP_MISSED_TAIL_CALL, 652 TC_ACT_OK, METRIC_INGRESS); 653 #endif 654 655 default: 656 /* Pass unknown traffic to the stack */ 657 ret = TC_ACT_OK; 658 } 659 660 return ret; 661 } 662 663 __section("from-netdev") 664 int from_netdev(struct __sk_buff *skb) 665 { 666 int ret = ret; 667 __u16 proto; 668 669 if (!validate_ethertype(skb, &proto)) 670 /* Pass unknown traffic to the stack */ 671 return TC_ACT_OK; 672 673 #ifdef ENABLE_MASQUERADE 674 cilium_dbg_capture(skb, DBG_CAPTURE_SNAT_PRE, skb->ifindex); 675 ret = snat_process(skb, BPF_PKT_DIR); 676 if (ret != TC_ACT_OK) { 677 return ret; 678 } 679 cilium_dbg_capture(skb, DBG_CAPTURE_SNAT_POST, skb->ifindex); 680 #endif /* ENABLE_MASQUERADE */ 681 682 return do_netdev(skb, proto); 683 } 684 685 __section("to-netdev") 686 int to_netdev(struct __sk_buff *skb) 687 { 688 /* Cannot compile the section out entriely, test/bpf/verifier-test.sh 689 * workaround. 690 */ 691 int ret = TC_ACT_OK; 692 #if defined(ENABLE_NODEPORT) || defined(ENABLE_MASQUERADE) 693 #ifdef ENABLE_NODEPORT 694 if ((skb->mark & MARK_MAGIC_SNAT_DONE) == MARK_MAGIC_SNAT_DONE) 695 return TC_ACT_OK; 696 ret = nodeport_nat_fwd(skb, false); 697 if (IS_ERR(ret)) 698 return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_EGRESS); 699 #else 700 __u16 proto; 701 if (!validate_ethertype(skb, &proto)) 702 /* Pass unknown traffic to the stack */ 703 return TC_ACT_OK; 704 cilium_dbg_capture(skb, DBG_CAPTURE_SNAT_PRE, skb->ifindex); 705 ret = snat_process(skb, BPF_PKT_DIR); 706 if (!ret) 707 cilium_dbg_capture(skb, DBG_CAPTURE_SNAT_POST, skb->ifindex); 708 #endif /* ENABLE_NODEPORT */ 709 #endif /* ENABLE_NODEPORT || ENABLE_MASQUERADE */ 710 return ret; 711 } 712 713 BPF_LICENSE("GPL");