github.com/fafucoder/cilium@v1.6.11/bpf/bpf_overlay.c (about)

     1  /*
     2   *  Copyright (C) 2016-2019 Authors of Cilium
     3   *
     4   *  This program is free software; you can redistribute it and/or modify
     5   *  it under the terms of the GNU General Public License as published by
     6   *  the Free Software Foundation; either version 2 of the License, or
     7   *  (at your option) any later version.
     8   *
     9   *  This program is distributed in the hope that it will be useful,
    10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
    11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    12   *  GNU General Public License for more details.
    13   *
    14   *  You should have received a copy of the GNU General Public License
    15   *  along with this program; if not, write to the Free Software
    16   *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    17   */
    18  #include <node_config.h>
    19  #include <netdev_config.h>
    20  
    21  #include <bpf/api.h>
    22  
    23  #include <stdint.h>
    24  #include <stdio.h>
    25  
    26  #include <linux/if_packet.h>
    27  
    28  #include "lib/tailcall.h"
    29  #include "lib/utils.h"
    30  #include "lib/common.h"
    31  #include "lib/maps.h"
    32  #include "lib/ipv6.h"
    33  #include "lib/eth.h"
    34  #include "lib/dbg.h"
    35  #include "lib/trace.h"
    36  #include "lib/l3.h"
    37  #include "lib/drop.h"
    38  #include "lib/policy.h"
    39  #include "lib/nodeport.h"
    40  
    41  #ifdef ENABLE_IPV6
    42  static inline int handle_ipv6(struct __sk_buff *skb, __u32 *identity)
    43  {
    44  	int ret, l4_off, l3_off = ETH_HLEN, hdrlen;
    45  	void *data_end, *data;
    46  	struct ipv6hdr *ip6;
    47  	struct bpf_tunnel_key key = {};
    48  	struct endpoint_info *ep;
    49  	bool decrypted;
    50  
    51  	/* verifier workaround (dereference of modified ctx ptr) */
    52  	if (!revalidate_data_first(skb, &data, &data_end, &ip6))
    53  		return DROP_INVALID;
    54  #ifdef ENABLE_NODEPORT
    55  	if (!bpf_skip_nodeport(skb)) {
    56  		int ret = nodeport_lb6(skb, *identity);
    57  		if (ret < 0)
    58  			return ret;
    59  	}
    60  #endif
    61  	ret = encap_remap_v6_host_address(skb, false);
    62  	if (unlikely(ret < 0))
    63  		return ret;
    64  
    65  	if (!revalidate_data(skb, &data, &data_end, &ip6))
    66  		return DROP_INVALID;
    67  
    68  	decrypted = ((skb->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT);
    69  	if (decrypted) {
    70  		*identity = get_identity(skb);
    71  	} else {
    72  		if (unlikely(skb_get_tunnel_key(skb, &key, sizeof(key), 0) < 0))
    73  			return DROP_NO_TUNNEL_KEY;
    74  		*identity = key.tunnel_id;
    75  	}
    76  
    77  	cilium_dbg(skb, DBG_DECAP, key.tunnel_id, key.tunnel_label);
    78  
    79  #ifdef ENABLE_IPSEC
    80  	if (!decrypted) {
    81  		/* IPSec is not currently enforce (feature coming soon)
    82  		 * so for now just handle normally
    83  		 */
    84  		if (ip6->nexthdr != IPPROTO_ESP) {
    85  			update_metrics(skb->len, METRIC_INGRESS, REASON_PLAINTEXT);
    86  			goto not_esp;
    87  		}
    88  
    89  		/* Decrypt "key" is determined by SPI */
    90  		skb->mark = MARK_MAGIC_DECRYPT;
    91  		set_identity(skb, key.tunnel_id);
    92  		/* To IPSec stack on cilium_vxlan we are going to pass
    93  		 * this up the stack but eth_type_trans has already labeled
    94  		 * this as an OTHERHOST type packet. To avoid being dropped
    95  		 * by IP stack before IPSec can be processed mark as a HOST
    96  		 * packet.
    97  		 */
    98  		skb_change_type(skb, PACKET_HOST);
    99  		return TC_ACT_OK;
   100  	} else {
   101  		key.tunnel_id = get_identity(skb);
   102  		skb->mark = 0;
   103  	}
   104  not_esp:
   105  #endif
   106  
   107  	/* Lookup IPv6 address in list of local endpoints */
   108  	if ((ep = lookup_ip6_endpoint(ip6)) != NULL) {
   109  		/* Let through packets to the node-ip so they are
   110  		 * processed by the local ip stack */
   111  		if (ep->flags & ENDPOINT_F_HOST)
   112  			goto to_host;
   113  
   114  		__u8 nexthdr = ip6->nexthdr;
   115  		hdrlen = ipv6_hdrlen(skb, l3_off, &nexthdr);
   116  		if (hdrlen < 0)
   117  			return hdrlen;
   118  
   119  		l4_off = l3_off + hdrlen;
   120  		return ipv6_local_delivery(skb, l3_off, l4_off, key.tunnel_id, ip6, nexthdr, ep, METRIC_INGRESS);
   121  	}
   122  
   123  to_host:
   124  #ifdef HOST_IFINDEX
   125  	if (1) {
   126  		union macaddr host_mac = HOST_IFINDEX_MAC;
   127  		union macaddr router_mac = NODE_MAC;
   128  		int ret;
   129  
   130  		ret = ipv6_l3(skb, ETH_HLEN, (__u8 *) &router_mac.addr, (__u8 *) &host_mac.addr, METRIC_INGRESS);
   131  		if (ret != TC_ACT_OK)
   132  			return ret;
   133  
   134  		cilium_dbg_capture(skb, DBG_CAPTURE_DELIVERY, HOST_IFINDEX);
   135  		return redirect(HOST_IFINDEX, 0);
   136  	}
   137  #else
   138  	return TC_ACT_OK;
   139  #endif
   140  }
   141  
   142  __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_FROM_LXC) int tail_handle_ipv6(struct __sk_buff *skb)
   143  {
   144  	__u32 src_identity = 0;
   145  	int ret = handle_ipv6(skb, &src_identity);
   146  
   147  	if (IS_ERR(ret))
   148  		return send_drop_notify_error(skb, src_identity, ret, TC_ACT_SHOT, METRIC_INGRESS);
   149  
   150  	return ret;
   151  }
   152  #endif /* ENABLE_IPV6 */
   153  
   154  #ifdef ENABLE_IPV4
   155  static inline int handle_ipv4(struct __sk_buff *skb, __u32 *identity)
   156  {
   157  	void *data_end, *data;
   158  	struct iphdr *ip4;
   159  	struct endpoint_info *ep;
   160  	struct bpf_tunnel_key key = {};
   161  	bool decrypted;
   162  	int l4_off;
   163  
   164  	/* verifier workaround (dereference of modified ctx ptr) */
   165  	if (!revalidate_data_first(skb, &data, &data_end, &ip4))
   166  		return DROP_INVALID;
   167  #ifdef ENABLE_NODEPORT
   168  	if (!bpf_skip_nodeport(skb)) {
   169  		int ret = nodeport_lb4(skb, *identity);
   170  		if (ret < 0)
   171  			return ret;
   172  	}
   173  #endif
   174  	if (!revalidate_data(skb, &data, &data_end, &ip4))
   175  		return DROP_INVALID;
   176  
   177  	decrypted = ((skb->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT);
   178  	/* If packets are decrypted the key has already been pushed into metadata. */
   179  	if (decrypted) {
   180  		*identity = get_identity(skb);
   181  	} else {
   182  		if (unlikely(skb_get_tunnel_key(skb, &key, sizeof(key), 0) < 0))
   183  			return DROP_NO_TUNNEL_KEY;
   184  		*identity = key.tunnel_id;
   185  	}
   186  
   187  	l4_off = ETH_HLEN + ipv4_hdrlen(ip4);
   188  #ifdef ENABLE_IPSEC
   189  	if (!decrypted) {
   190  		/* IPSec is not currently enforce (feature coming soon)
   191  		 * so for now just handle normally
   192  		 */
   193  		if (ip4->protocol != IPPROTO_ESP) {
   194  			update_metrics(skb->len, METRIC_INGRESS, REASON_PLAINTEXT);
   195  			goto not_esp;
   196  		}
   197  
   198  		skb->mark = MARK_MAGIC_DECRYPT;
   199  		set_identity(skb, key.tunnel_id);
   200  		/* To IPSec stack on cilium_vxlan we are going to pass
   201  		 * this up the stack but eth_type_trans has already labeled
   202  		 * this as an OTHERHOST type packet. To avoid being dropped
   203  		 * by IP stack before IPSec can be processed mark as a HOST
   204  		 * packet.
   205  		 */
   206  		skb_change_type(skb, PACKET_HOST);
   207  		return TC_ACT_OK;
   208  	} else {
   209  		key.tunnel_id = get_identity(skb);
   210  		skb->mark = 0;
   211  	}
   212  not_esp:
   213  #endif
   214  
   215  	/* Lookup IPv4 address in list of local endpoints */
   216  	if ((ep = lookup_ip4_endpoint(ip4)) != NULL) {
   217  		/* Let through packets to the node-ip so they are
   218  		 * processed by the local ip stack */
   219  		if (ep->flags & ENDPOINT_F_HOST)
   220  			goto to_host;
   221  
   222  		return ipv4_local_delivery(skb, ETH_HLEN, l4_off, key.tunnel_id, ip4, ep, METRIC_INGRESS);
   223  	}
   224  
   225  to_host:
   226  #ifdef HOST_IFINDEX
   227  	if (1) {
   228  		union macaddr host_mac = HOST_IFINDEX_MAC;
   229  		union macaddr router_mac = NODE_MAC;
   230  		int ret;
   231  
   232  		ret = ipv4_l3(skb, ETH_HLEN, (__u8 *) &router_mac.addr, (__u8 *) &host_mac.addr, ip4);
   233  		if (ret != TC_ACT_OK)
   234  			return ret;
   235  
   236  		cilium_dbg_capture(skb, DBG_CAPTURE_DELIVERY, HOST_IFINDEX);
   237  		return redirect(HOST_IFINDEX, 0);
   238  	}
   239  #else
   240  	return TC_ACT_OK;
   241  #endif
   242  }
   243  
   244  __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV4_FROM_LXC) int tail_handle_ipv4(struct __sk_buff *skb)
   245  {
   246  	__u32 src_identity = 0;
   247  	int ret = handle_ipv4(skb, &src_identity);
   248  
   249  	if (IS_ERR(ret))
   250  		return send_drop_notify_error(skb, src_identity, ret, TC_ACT_SHOT, METRIC_INGRESS);
   251  
   252  	return ret;
   253  }
   254  #endif /* ENABLE_IPV4 */
   255  
   256  __section("from-overlay")
   257  int from_overlay(struct __sk_buff *skb)
   258  {
   259  	__u16 proto;
   260  	int ret;
   261  
   262  	bpf_clear_cb(skb);
   263  	bpf_clear_nodeport(skb);
   264  
   265  	if (!validate_ethertype(skb, &proto)) {
   266  		/* Pass unknown traffic to the stack */
   267  		ret = TC_ACT_OK;
   268  		goto out;
   269  	}
   270  
   271  #ifdef ENABLE_IPSEC
   272  	if ((skb->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT) {
   273  		send_trace_notify(skb, TRACE_FROM_OVERLAY, get_identity(skb), 0, 0,
   274  				  skb->ingress_ifindex,
   275  				  TRACE_REASON_ENCRYPTED, TRACE_PAYLOAD_LEN);
   276  	} else
   277  #endif
   278  	{
   279  		send_trace_notify(skb, TRACE_FROM_OVERLAY, 0, 0, 0,
   280  				  skb->ingress_ifindex, 0, TRACE_PAYLOAD_LEN);
   281  	}
   282  
   283  	switch (proto) {
   284  	case bpf_htons(ETH_P_IPV6):
   285  #ifdef ENABLE_IPV6
   286  		ep_tail_call(skb, CILIUM_CALL_IPV6_FROM_LXC);
   287  		ret = DROP_MISSED_TAIL_CALL;
   288  #else
   289  		ret = DROP_UNKNOWN_L3;
   290  #endif
   291  		break;
   292  
   293  	case bpf_htons(ETH_P_IP):
   294  #ifdef ENABLE_IPV4
   295  		ep_tail_call(skb, CILIUM_CALL_IPV4_FROM_LXC);
   296  		ret = DROP_MISSED_TAIL_CALL;
   297  #else
   298  		ret = DROP_UNKNOWN_L3;
   299  #endif
   300  		break;
   301  
   302  	default:
   303  		/* Pass unknown traffic to the stack */
   304  		ret = TC_ACT_OK;
   305  	}
   306  out:
   307  	if (IS_ERR(ret))
   308  		return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_INGRESS);
   309  	return ret;
   310  }
   311  
   312  #ifdef ENABLE_NODEPORT
   313  declare_tailcall_if(is_defined(ENABLE_IPV6), CILIUM_CALL_ENCAP_NODEPORT_NAT)
   314  int tail_handle_nat_fwd(struct __sk_buff *skb)
   315  {
   316  	int ret;
   317  
   318  	if ((skb->mark & MARK_MAGIC_SNAT_DONE) == MARK_MAGIC_SNAT_DONE)
   319  		return TC_ACT_OK;
   320  	ret = nodeport_nat_fwd(skb, true);
   321  	if (IS_ERR(ret))
   322  		return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_EGRESS);
   323  	return ret;
   324  }
   325  #endif
   326  
   327  __section("to-overlay")
   328  int to_overlay(struct __sk_buff *skb)
   329  {
   330  	int ret = encap_remap_v6_host_address(skb, true);
   331  	if (unlikely(ret < 0))
   332  		goto out;
   333  #ifdef ENABLE_NODEPORT
   334  	invoke_tailcall_if(is_defined(ENABLE_IPV6),
   335  			   CILIUM_CALL_ENCAP_NODEPORT_NAT, tail_handle_nat_fwd);
   336  #endif
   337  out:
   338  	if (IS_ERR(ret))
   339  		return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_EGRESS);
   340  	return ret;
   341  }
   342  
   343  BPF_LICENSE("GPL");