github.com/fafucoder/cilium@v1.6.11/bpf/bpf_netdev.c (about)

     1  /*
     2   *  Copyright (C) 2016-2019 Authors of Cilium
     3   *
     4   *  This program is free software; you can redistribute it and/or modify
     5   *  it under the terms of the GNU General Public License as published by
     6   *  the Free Software Foundation; either version 2 of the License, or
     7   *  (at your option) any later version.
     8   *
     9   *  This program is distributed in the hope that it will be useful,
    10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
    11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    12   *  GNU General Public License for more details.
    13   *
    14   *  You should have received a copy of the GNU General Public License
    15   *  along with this program; if not, write to the Free Software
    16   *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    17   */
    18  #include <node_config.h>
    19  #include <netdev_config.h>
    20  
    21  /* These are configuartion options which have a default value in their
    22   * respective header files and must thus be defined beforehand:
    23   *
    24   * Pass unknown ICMPv6 NS to stack */
    25  #define ACTION_UNKNOWN_ICMP6_NS TC_ACT_OK
    26  
    27  /* Include policy_can_access_ingress() */
    28  #define REQUIRES_CAN_ACCESS
    29  
    30  #include <bpf/api.h>
    31  
    32  #include <stdint.h>
    33  #include <stdio.h>
    34  
    35  #include "lib/utils.h"
    36  #include "lib/common.h"
    37  #include "lib/arp.h"
    38  #include "lib/maps.h"
    39  #include "lib/ipv6.h"
    40  #include "lib/ipv4.h"
    41  #include "lib/icmp6.h"
    42  #include "lib/eth.h"
    43  #include "lib/dbg.h"
    44  #include "lib/trace.h"
    45  #include "lib/l3.h"
    46  #include "lib/l4.h"
    47  #include "lib/policy.h"
    48  #include "lib/drop.h"
    49  #include "lib/encap.h"
    50  #include "lib/nat.h"
    51  #include "lib/lb.h"
    52  #include "lib/nodeport.h"
    53  
    54  #if defined FROM_HOST && (defined ENABLE_IPV4 || defined ENABLE_IPV6)
    55  static inline int rewrite_dmac_to_host(struct __sk_buff *skb, __u32 src_identity)
    56  {
    57  	/* When attached to cilium_host, we rewrite the DMAC to the mac of
    58  	 * cilium_host (peer) to ensure the packet is being considered to be
    59  	 * addressed to the host (PACKET_HOST) */
    60  	union macaddr cilium_net_mac = CILIUM_NET_MAC;
    61  
    62  	/* Rewrite to destination MAC of cilium_net (remote peer) */
    63  	if (eth_store_daddr(skb, (__u8 *) &cilium_net_mac.addr, 0) < 0)
    64  		return send_drop_notify_error(skb, src_identity, DROP_WRITE_ERROR, TC_ACT_OK, METRIC_INGRESS);
    65  
    66  	return TC_ACT_OK;
    67  }
    68  #endif
    69  
    70  #if defined ENABLE_IPV4 || defined ENABLE_IPV6
    71  static inline __u32 finalize_sec_ctx(__u32 secctx, __u32 src_identity)
    72  {
    73  #ifdef ENABLE_SECCTX_FROM_IPCACHE
    74  	/* If we could not derive the secctx from the packet itself but
    75  	 * from the ipcache instead, then use the ipcache identity. E.g.
    76  	 * used in ipvlan master device's datapath on ingress.
    77  	 */
    78  	if (secctx == WORLD_ID && !identity_is_reserved(src_identity))
    79  		secctx = src_identity;
    80  #endif /* ENABLE_SECCTX_FROM_IPCACHE */
    81  	return secctx;
    82  }
    83  #endif
    84  
    85  #ifdef ENABLE_IPV6
    86  static inline __u32 derive_sec_ctx(struct __sk_buff *skb, const union v6addr *node_ip,
    87  				   struct ipv6hdr *ip6)
    88  {
    89  #ifdef FIXED_SRC_SECCTX
    90  	return FIXED_SRC_SECCTX;
    91  #else
    92  	if (ipv6_match_prefix_64((union v6addr *) &ip6->saddr, node_ip)) {
    93  		/* Read initial 4 bytes of header and then extract flowlabel */
    94  		__u32 *tmp = (__u32 *) ip6;
    95  		return bpf_ntohl(*tmp & IPV6_FLOWLABEL_MASK);
    96  	}
    97  
    98  	return WORLD_ID;
    99  #endif
   100  }
   101  
   102  static inline int handle_ipv6(struct __sk_buff *skb, __u32 src_identity)
   103  {
   104  	struct remote_endpoint_info *info = NULL;
   105  	union v6addr node_ip = { };
   106  	void *data, *data_end;
   107  	struct ipv6hdr *ip6;
   108  	union v6addr *dst;
   109  	int l4_off, l3_off = ETH_HLEN, hdrlen;
   110  	struct endpoint_info *ep;
   111  	__u8 nexthdr;
   112  	__u32 secctx;
   113  
   114  	if (!revalidate_data(skb, &data, &data_end, &ip6))
   115  		return DROP_INVALID;
   116  
   117  #ifdef ENABLE_NODEPORT
   118  	if (!bpf_skip_nodeport(skb)) {
   119  		int ret = nodeport_lb6(skb, src_identity);
   120  		if (ret < 0)
   121  			return ret;
   122  	}
   123  #if defined(ENCAP_IFINDEX) || defined(NO_REDIRECT)
   124  	/* See IPv4 case for NO_REDIRECT comments */
   125  	return TC_ACT_OK;
   126  #endif /* ENCAP_IFINDEX || NO_REDIRECT */
   127  	/* Verifier workaround: modified ctx access. */
   128  	if (!revalidate_data(skb, &data, &data_end, &ip6))
   129  		return DROP_INVALID;
   130  #endif /* ENABLE_NODEPORT */
   131  
   132  	nexthdr = ip6->nexthdr;
   133  	hdrlen = ipv6_hdrlen(skb, l3_off, &nexthdr);
   134  	if (hdrlen < 0)
   135  		return hdrlen;
   136  
   137  	l4_off = l3_off + hdrlen;
   138  
   139  #ifdef HANDLE_NS
   140  	if (unlikely(nexthdr == IPPROTO_ICMPV6)) {
   141  		int ret = icmp6_handle(skb, ETH_HLEN, ip6, METRIC_INGRESS);
   142  		if (IS_ERR(ret))
   143  			return ret;
   144  	}
   145  #endif
   146  
   147  	BPF_V6(node_ip, ROUTER_IP);
   148  	secctx = derive_sec_ctx(skb, &node_ip, ip6);
   149  
   150  	/* Packets from the proxy will already have a real identity. */
   151  	if (identity_is_reserved(src_identity)) {
   152  		union v6addr *src = (union v6addr *) &ip6->saddr;
   153  		info = ipcache_lookup6(&IPCACHE_MAP, src, V6_CACHE_KEY_LEN);
   154  		if (info != NULL) {
   155  			__u32 sec_label = info->sec_label;
   156  			if (sec_label)
   157  				src_identity = info->sec_label;
   158  		}
   159  		cilium_dbg(skb, info ? DBG_IP_ID_MAP_SUCCEED6 : DBG_IP_ID_MAP_FAILED6,
   160  			   ((__u32 *) src)[3], src_identity);
   161  	}
   162  
   163  	secctx = finalize_sec_ctx(secctx, src_identity);
   164  #ifdef FROM_HOST
   165  	if (1) {
   166  		int ret;
   167  
   168  		secctx = src_identity;
   169  
   170  		/* If we are attached to cilium_host at egress, this will
   171  		 * rewrite the destination mac address to the MAC of cilium_net */
   172  		ret = rewrite_dmac_to_host(skb, secctx);
   173  		/* DIRECT PACKET READ INVALID */
   174  		if (IS_ERR(ret))
   175  			return ret;
   176  	}
   177  
   178  	if (!revalidate_data(skb, &data, &data_end, &ip6))
   179  		return DROP_INVALID;
   180  #endif
   181  
   182  	/* Lookup IPv4 address in list of local endpoints */
   183  	if ((ep = lookup_ip6_endpoint(ip6)) != NULL) {
   184  		/* Let through packets to the node-ip so they are
   185  		 * processed by the local ip stack */
   186  		if (ep->flags & ENDPOINT_F_HOST)
   187  			return TC_ACT_OK;
   188  
   189  		return ipv6_local_delivery(skb, l3_off, l4_off, secctx, ip6, nexthdr, ep, METRIC_INGRESS);
   190  	}
   191  
   192  #ifdef ENCAP_IFINDEX
   193  	dst = (union v6addr *) &ip6->daddr;
   194  	info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN);
   195  	if (info != NULL && info->tunnel_endpoint != 0) {
   196  		int ret = encap_and_redirect_with_nodeid(skb, info->tunnel_endpoint,
   197  							 info->key,
   198  							 secctx, TRACE_PAYLOAD_LEN);
   199  
   200  		/* If IPSEC is needed recirc through ingress to use xfrm stack
   201  		 * and then result will routed back through bpf_netdev on egress
   202  		 * but with encrypt marks.
   203  		 */
   204  		if (ret == IPSEC_ENDPOINT)
   205  			return TC_ACT_OK;
   206  		else
   207  			return ret;
   208  	} else {
   209  		struct endpoint_key key = {};
   210  		int ret;
   211  
   212  		/* IPv6 lookup key: daddr/96 */
   213  		dst = (union v6addr *) &ip6->daddr;
   214  		key.ip6.p1 = dst->p1;
   215  		key.ip6.p2 = dst->p2;
   216  		key.ip6.p3 = dst->p3;
   217  		key.ip6.p4 = 0;
   218  		key.family = ENDPOINT_KEY_IPV6;
   219  
   220  		ret = encap_and_redirect_netdev(skb, &key, secctx, TRACE_PAYLOAD_LEN);
   221  		if (ret == IPSEC_ENDPOINT)
   222  			return TC_ACT_OK;
   223  		else if (ret != DROP_NO_TUNNEL_ENDPOINT)
   224  			return ret;
   225  	}
   226  #endif
   227  
   228  	dst = (union v6addr *) &ip6->daddr;
   229  	info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN);
   230  #ifdef FROM_HOST
   231  	if (info == NULL || info->sec_label == WORLD_ID) {
   232  		/* See IPv4 comment. */
   233  		return DROP_UNROUTABLE;
   234  	}
   235  #endif
   236  #ifdef ENABLE_IPSEC
   237  	if (info && info->key && info->tunnel_endpoint) {
   238  		__u8 key = get_min_encrypt_key(info->key);
   239  
   240  		set_encrypt_key_cb(skb, key);
   241  #ifdef IP_POOLS
   242  		set_encrypt_dip(skb, info->tunnel_endpoint);
   243  #else
   244  		set_identity_cb(skb, secctx);
   245  #endif
   246  	}
   247  #endif
   248  	return TC_ACT_OK;
   249  }
   250  
   251  __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_FROM_LXC) int tail_handle_ipv6(struct __sk_buff *skb)
   252  {
   253  	__u32 proxy_identity = skb->cb[CB_SRC_IDENTITY];
   254  	int ret;
   255  
   256  	skb->cb[CB_SRC_IDENTITY] = 0;
   257  	ret = handle_ipv6(skb, proxy_identity);
   258  	if (IS_ERR(ret))
   259  		return send_drop_notify_error(skb, proxy_identity, ret, TC_ACT_SHOT, METRIC_INGRESS);
   260  
   261  	return ret;
   262  }
   263  #endif /* ENABLE_IPV6 */
   264  
   265  #ifdef ENABLE_IPV4
   266  static inline __u32 derive_ipv4_sec_ctx(struct __sk_buff *skb, struct iphdr *ip4)
   267  {
   268  #ifdef FIXED_SRC_SECCTX
   269  	return FIXED_SRC_SECCTX;
   270  #else
   271  	return WORLD_ID;
   272  #endif
   273  }
   274  
   275  static inline int handle_ipv4(struct __sk_buff *skb, __u32 src_identity)
   276  {
   277  	struct remote_endpoint_info *info = NULL;
   278  	struct ipv4_ct_tuple tuple = {};
   279  	struct endpoint_info *ep;
   280  	void *data, *data_end;
   281  	struct iphdr *ip4;
   282  	int l4_off;
   283  	__u32 secctx;
   284  
   285  	if (!revalidate_data(skb, &data, &data_end, &ip4))
   286  		return DROP_INVALID;
   287  
   288  #ifdef ENABLE_NODEPORT
   289  	if (!bpf_skip_nodeport(skb)) {
   290  		int ret = nodeport_lb4(skb, src_identity);
   291  		if (ret < 0)
   292  			return ret;
   293  	}
   294  #if defined(ENCAP_IFINDEX) || defined(NO_REDIRECT)
   295  	/* We cannot redirect a packet to a local endpoint in the direct
   296  	 * routing mode, as the redirect bypasses nf_conntrack table.
   297  	 * This makes a second reply from the endpoint to be MASQUERADEd or
   298  	 * to be DROPed by k8s's "--ctstate INVALID -j DROP" depending via
   299  	 * which interface it was inputed. */
   300  	return TC_ACT_OK;
   301  #endif /* ENCAP_IFINDEX || NO_REDIRECT */
   302  	/* Verifier workaround: modified ctx access. */
   303  	if (!revalidate_data(skb, &data, &data_end, &ip4))
   304  		return DROP_INVALID;
   305  #endif /* ENABLE_NODEPORT */
   306  
   307  	l4_off = ETH_HLEN + ipv4_hdrlen(ip4);
   308  	secctx = derive_ipv4_sec_ctx(skb, ip4);
   309  	tuple.nexthdr = ip4->protocol;
   310  
   311  	/* Packets from the proxy will already have a real identity. */
   312  	if (identity_is_reserved(src_identity)) {
   313  		info = ipcache_lookup4(&IPCACHE_MAP, ip4->saddr, V4_CACHE_KEY_LEN);
   314  		if (info != NULL) {
   315  			__u32 sec_label = info->sec_label;
   316  			if (sec_label) {
   317  				/* When SNAT is enabled on traffic ingressing
   318  				 * into Cilium, all traffic from the world will
   319  				 * have a source IP of the host. It will only
   320  				 * actually be from the host if "src_identity"
   321  				 * (passed into this function) reports the src
   322  				 * as the host. So we can ignore the ipcache
   323  				 * if it reports the source as HOST_ID.
   324  				 */
   325  #ifndef ENABLE_EXTRA_HOST_DEV
   326  				if (sec_label != HOST_ID)
   327  #endif
   328  					src_identity = sec_label;
   329  			}
   330  		}
   331  		cilium_dbg(skb, info ? DBG_IP_ID_MAP_SUCCEED4 : DBG_IP_ID_MAP_FAILED4,
   332  			   ip4->saddr, src_identity);
   333  	}
   334  
   335  	secctx = finalize_sec_ctx(secctx, src_identity);
   336  #ifdef FROM_HOST
   337  	if (1) {
   338  		int ret;
   339  
   340  		secctx = src_identity;
   341  
   342  		/* If we are attached to cilium_host at egress, this will
   343  		 * rewrite the destination mac address to the MAC of cilium_net */
   344  		ret = rewrite_dmac_to_host(skb, secctx);
   345  		/* DIRECT PACKET READ INVALID */
   346  		if (IS_ERR(ret))
   347  			return ret;
   348  	}
   349  
   350  	if (!revalidate_data(skb, &data, &data_end, &ip4))
   351  		return DROP_INVALID;
   352  #endif
   353  
   354  	/* Lookup IPv4 address in list of local endpoints and host IPs */
   355  	if ((ep = lookup_ip4_endpoint(ip4)) != NULL) {
   356  		/* Let through packets to the node-ip so they are
   357  		 * processed by the local ip stack */
   358  		if (ep->flags & ENDPOINT_F_HOST)
   359  #ifdef HOST_REDIRECT_TO_INGRESS
   360  			/* This is required for L7 proxy to send packets to the host. */
   361  			return redirect(HOST_IFINDEX, BPF_F_INGRESS);
   362  #else
   363  			return TC_ACT_OK;
   364  #endif
   365  
   366  		return ipv4_local_delivery(skb, ETH_HLEN, l4_off, secctx, ip4, ep, METRIC_INGRESS);
   367  	}
   368  
   369  #ifdef ENCAP_IFINDEX
   370  	info = ipcache_lookup4(&IPCACHE_MAP, ip4->daddr, V4_CACHE_KEY_LEN);
   371  	if (info != NULL && info->tunnel_endpoint != 0) {
   372  		int ret = encap_and_redirect_with_nodeid(skb, info->tunnel_endpoint,
   373  							 info->key,
   374  							 secctx, TRACE_PAYLOAD_LEN);
   375  
   376  		if (ret == IPSEC_ENDPOINT)
   377  			return TC_ACT_OK;
   378  		else
   379  			return ret;
   380  	} else {
   381  		/* IPv4 lookup key: daddr & IPV4_MASK */
   382  		struct endpoint_key key = {};
   383  		int ret;
   384  
   385  		key.ip4 = ip4->daddr & IPV4_MASK;
   386  		key.family = ENDPOINT_KEY_IPV4;
   387  
   388  		cilium_dbg(skb, DBG_NETDEV_ENCAP4, key.ip4, secctx);
   389  		ret = encap_and_redirect_netdev(skb, &key, secctx, TRACE_PAYLOAD_LEN);
   390  		if (ret == IPSEC_ENDPOINT)
   391  			return TC_ACT_OK;
   392  		else if (ret != DROP_NO_TUNNEL_ENDPOINT)
   393  			return ret;
   394  	}
   395  #endif
   396  
   397  #ifdef HOST_REDIRECT_TO_INGRESS
   398      return redirect(HOST_IFINDEX, BPF_F_INGRESS);
   399  #else
   400  
   401  	info = ipcache_lookup4(&IPCACHE_MAP, ip4->daddr, V4_CACHE_KEY_LEN);
   402  #ifdef FROM_HOST
   403  	if (info == NULL || info->sec_label == WORLD_ID) {
   404  		/* We have received a packet for which no ipcache entry exists,
   405  		 * we do not know what to do with this packet, drop it.
   406  		 *
   407  		 * The info == NULL test is soley to satisfy verifier requirements
   408  		 * as in Cilium case we'll always hit the 0.0.0.0/32 catch-all
   409  		 * entry. Therefore we need to test for WORLD_ID. It is clearly
   410  		 * wrong to route a skb to cilium_host for which we don't know
   411  		 * anything about it as otherwise we'll run into a routing loop.
   412  		 */
   413  		return DROP_UNROUTABLE;
   414  	}
   415  #endif
   416  #ifdef ENABLE_IPSEC
   417  	if (info && info->key && info->tunnel_endpoint) {
   418  		__u8 key = get_min_encrypt_key(info->key);
   419  
   420  		set_encrypt_key_cb(skb, key);
   421  #ifdef IP_POOLS
   422  		set_encrypt_dip(skb, info->tunnel_endpoint);
   423  #else
   424  		set_identity_cb(skb, secctx);
   425  #endif
   426  	}
   427  #endif
   428  	return TC_ACT_OK;
   429  #endif
   430  }
   431  
   432  __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV4_FROM_LXC) int tail_handle_ipv4(struct __sk_buff *skb)
   433  {
   434  	__u32 proxy_identity = skb->cb[CB_SRC_IDENTITY];
   435  	int ret;
   436  
   437  	skb->cb[CB_SRC_IDENTITY] = 0;
   438  	ret = handle_ipv4(skb, proxy_identity);
   439  	if (IS_ERR(ret))
   440  		return send_drop_notify_error(skb, proxy_identity, ret, TC_ACT_SHOT, METRIC_INGRESS);
   441  
   442  	return ret;
   443  }
   444  
   445  #endif /* ENABLE_IPV4 */
   446  
   447  #ifdef ENABLE_IPSEC
   448  #ifndef ENCAP_IFINDEX
   449  static __always_inline int do_netdev_encrypt_pools(struct __sk_buff *skb)
   450  {
   451  	int ret = 0;
   452  #ifdef IP_POOLS
   453  	__u32 tunnel_endpoint = 0;
   454  	void *data, *data_end;
   455  	__u32 tunnel_source = IPV4_ENCRYPT_IFACE;
   456  	struct iphdr *iphdr;
   457  	__be32 sum;
   458  
   459  	tunnel_endpoint = skb->cb[4];
   460  	skb->mark = 0;
   461  
   462  	if (!revalidate_data(skb, &data, &data_end, &iphdr)) {
   463  		ret = DROP_INVALID;
   464  		goto drop_err;
   465  	}
   466  
   467  	/* When IP_POOLS is enabled ip addresses are not
   468  	 * assigned on a per node basis so lacking node
   469  	 * affinity we can not use IP address to assign the
   470  	 * destination IP. Instead rewrite it here from cb[].
   471  	 */
   472  	sum = csum_diff(&iphdr->daddr, 4, &tunnel_endpoint, 4, 0);
   473  	if (skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr),
   474  	    &tunnel_endpoint, 4, 0) < 0) {
   475  		ret = DROP_WRITE_ERROR;
   476  		goto drop_err;
   477  	}
   478  	if (l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check),
   479  	    0, sum, 0) < 0) {
   480  		ret = DROP_CSUM_L3;
   481  		goto drop_err;
   482  	}
   483  
   484  	if (!revalidate_data(skb, &data, &data_end, &iphdr)) {
   485  		ret = DROP_INVALID;
   486  		goto drop_err;
   487  	}
   488  
   489  	sum = csum_diff(&iphdr->saddr, 4, &tunnel_source, 4, 0);
   490  	if (skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, saddr),
   491  	    &tunnel_source, 4, 0) < 0) {
   492  		ret = DROP_WRITE_ERROR;
   493  		goto drop_err;
   494  	}
   495  	if (l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check),
   496  	    0, sum, 0) < 0) {
   497  		ret = DROP_CSUM_L3;
   498  		goto drop_err;
   499  	}
   500  drop_err:
   501  #endif // IP_POOLS
   502  	return ret;
   503  }
   504  
   505  static __always_inline int do_netdev_encrypt_fib(struct __sk_buff *skb, int *encrypt_iface)
   506  {
   507  	int ret = 0;
   508  
   509  #ifdef HAVE_FIB_LOOKUP
   510  	struct bpf_fib_lookup fib_params = {};
   511  	void *data, *data_end;
   512  	struct iphdr *iphdr;
   513  	__be32 sum;
   514  	int err;
   515  
   516  	if (!revalidate_data(skb, &data, &data_end, &iphdr)) {
   517  		ret = DROP_INVALID;
   518  		goto drop_err_fib;
   519  	}
   520  
   521  	fib_params.family = AF_INET;
   522  	fib_params.ifindex = *encrypt_iface;
   523  
   524  	fib_params.ipv4_src = iphdr->saddr;
   525  	fib_params.ipv4_dst = iphdr->daddr;
   526  
   527  	err = fib_lookup(skb, &fib_params, sizeof(fib_params),
   528  		    BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT);
   529  	if (err != 0) {
   530  		ret = DROP_NO_FIB;
   531  		goto drop_err_fib;
   532  	}
   533  	if (eth_store_daddr(skb, fib_params.dmac, 0) < 0) {
   534  		ret = DROP_WRITE_ERROR;
   535  		goto drop_err_fib;
   536  	}
   537  	if (eth_store_saddr(skb, fib_params.smac, 0) < 0) {
   538  		ret = DROP_WRITE_ERROR;
   539  		goto drop_err_fib;
   540  	}
   541  	*encrypt_iface = fib_params.ifindex;
   542  drop_err_fib:
   543  #endif /* HAVE_FIB_LOOKUP */
   544  	return ret;
   545  }
   546  
   547  static __always_inline int do_netdev_encrypt(struct __sk_buff *skb)
   548  {
   549  	int encrypt_iface;
   550  	int ret = 0;
   551  
   552  #ifdef ENCRYPT_NODE
   553  	encrypt_iface = ENCRYPT_IFACE;
   554  #endif
   555  
   556  	ret = do_netdev_encrypt_pools(skb);
   557  	if (ret)
   558  		return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_INGRESS);
   559  
   560  	ret = do_netdev_encrypt_fib(skb, &encrypt_iface);
   561  	if (ret)
   562  		return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_INGRESS);
   563  
   564  	bpf_clear_cb(skb);
   565  #ifdef ENCRYPT_NODE
   566  	return redirect(encrypt_iface, 0);
   567  #else
   568  	return TC_ACT_OK;
   569  #endif
   570  }
   571  
   572  #else /* ENCAP_IFINDEX */
   573  static __always_inline int do_netdev_encrypt_encap(struct __sk_buff *skb)
   574  {
   575  	__u32 seclabel, tunnel_endpoint = 0;
   576  
   577  	seclabel = get_identity(skb);
   578  	tunnel_endpoint = skb->cb[4];
   579  	skb->mark = 0;
   580  
   581  	bpf_clear_cb(skb);
   582  	return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, TRACE_PAYLOAD_LEN);
   583  }
   584  
   585  static __always_inline int do_netdev_encrypt(struct __sk_buff *skb)
   586  {
   587  	return do_netdev_encrypt_encap(skb);
   588  }
   589  #endif /* ENCAP_IFINDEX */
   590  #endif /* ENABLE_IPSEC */
   591  
   592  static __always_inline int do_netdev(struct __sk_buff *skb, __u16 proto)
   593  {
   594  	__u32 identity = 0;
   595  	int ret;
   596  
   597  #ifdef ENABLE_IPSEC
   598  	if (1) {
   599  		__u32 magic = skb->mark & MARK_MAGIC_HOST_MASK;
   600  
   601  		if (magic == MARK_MAGIC_ENCRYPT)
   602  			return do_netdev_encrypt(skb);
   603  	}
   604  #endif
   605  	bpf_clear_cb(skb);
   606  	bpf_clear_nodeport(skb);
   607  
   608  #ifdef FROM_HOST
   609  	if (1) {
   610  
   611  #ifdef HOST_REDIRECT_TO_INGRESS
   612  	if (proto == bpf_htons(ETH_P_ARP)) {
   613  		union macaddr mac = HOST_IFINDEX_MAC;
   614  		return arp_respond(skb, &mac, BPF_F_INGRESS);
   615  	}
   616  #endif
   617  
   618  		int trace = TRACE_FROM_HOST;
   619  		bool from_proxy;
   620  
   621  		from_proxy = inherit_identity_from_host(skb, &identity);
   622  		if (from_proxy)
   623  			trace = TRACE_FROM_PROXY;
   624  		send_trace_notify(skb, trace, identity, 0, 0,
   625  				  skb->ingress_ifindex, 0, TRACE_PAYLOAD_LEN);
   626  	}
   627  #else
   628  	send_trace_notify(skb, TRACE_FROM_STACK, 0, 0, 0, skb->ingress_ifindex,
   629  			  0, TRACE_PAYLOAD_LEN);
   630  #endif
   631  
   632  	switch (proto) {
   633  #ifdef ENABLE_IPV6
   634  	case bpf_htons(ETH_P_IPV6):
   635  		skb->cb[CB_SRC_IDENTITY] = identity;
   636  		ep_tail_call(skb, CILIUM_CALL_IPV6_FROM_LXC);
   637  		/* See comment below for IPv4. */
   638  		return send_drop_notify_error(skb, identity, DROP_MISSED_TAIL_CALL,
   639  					      TC_ACT_OK, METRIC_INGRESS);
   640  #endif
   641  
   642  #ifdef ENABLE_IPV4
   643  	case bpf_htons(ETH_P_IP):
   644  		skb->cb[CB_SRC_IDENTITY] = identity;
   645  		ep_tail_call(skb, CILIUM_CALL_IPV4_FROM_LXC);
   646  		/* We are not returning an error here to always allow traffic to
   647  		 * the stack in case maps have become unavailable.
   648  		 *
   649  		 * Note: Since drop notification requires a tail call as well,
   650  		 * this notification is unlikely to succeed. */
   651  		return send_drop_notify_error(skb, identity, DROP_MISSED_TAIL_CALL,
   652  		                              TC_ACT_OK, METRIC_INGRESS);
   653  #endif
   654  
   655  	default:
   656  		/* Pass unknown traffic to the stack */
   657  		ret = TC_ACT_OK;
   658  	}
   659  
   660  	return ret;
   661  }
   662  
   663  __section("from-netdev")
   664  int from_netdev(struct __sk_buff *skb)
   665  {
   666  	int ret = ret;
   667  	__u16 proto;
   668  
   669  	if (!validate_ethertype(skb, &proto))
   670  		/* Pass unknown traffic to the stack */
   671  		return TC_ACT_OK;
   672  
   673  #ifdef ENABLE_MASQUERADE
   674  	cilium_dbg_capture(skb, DBG_CAPTURE_SNAT_PRE, skb->ifindex);
   675  	ret = snat_process(skb, BPF_PKT_DIR);
   676  	if (ret != TC_ACT_OK) {
   677  		return ret;
   678  	}
   679  	cilium_dbg_capture(skb, DBG_CAPTURE_SNAT_POST, skb->ifindex);
   680  #endif /* ENABLE_MASQUERADE */
   681  
   682  	return do_netdev(skb, proto);
   683  }
   684  
   685  __section("to-netdev")
   686  int to_netdev(struct __sk_buff *skb)
   687  {
   688  	/* Cannot compile the section out entriely, test/bpf/verifier-test.sh
   689  	 * workaround.
   690  	 */
   691  	int ret = TC_ACT_OK;
   692  #if defined(ENABLE_NODEPORT) || defined(ENABLE_MASQUERADE)
   693  #ifdef ENABLE_NODEPORT
   694  	if ((skb->mark & MARK_MAGIC_SNAT_DONE) == MARK_MAGIC_SNAT_DONE)
   695  		return TC_ACT_OK;
   696  	ret = nodeport_nat_fwd(skb, false);
   697  	if (IS_ERR(ret))
   698  		return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_EGRESS);
   699  #else
   700  	__u16 proto;
   701  	if (!validate_ethertype(skb, &proto))
   702  		/* Pass unknown traffic to the stack */
   703  		return TC_ACT_OK;
   704  	cilium_dbg_capture(skb, DBG_CAPTURE_SNAT_PRE, skb->ifindex);
   705  	ret = snat_process(skb, BPF_PKT_DIR);
   706  	if (!ret)
   707  		cilium_dbg_capture(skb, DBG_CAPTURE_SNAT_POST, skb->ifindex);
   708  #endif /* ENABLE_NODEPORT */
   709  #endif /* ENABLE_NODEPORT || ENABLE_MASQUERADE */
   710  	return ret;
   711  }
   712  
   713  BPF_LICENSE("GPL");