github.com/fafucoder/cilium@v1.6.11/bpf/lib/encap.h (about)

     1  /*
     2   *  Copyright (C) 2016-2018 Authors of Cilium
     3   *
     4   *  This program is free software; you can redistribute it and/or modify
     5   *  it under the terms of the GNU General Public License as published by
     6   *  the Free Software Foundation; either version 2 of the License, or
     7   *  (at your option) any later version.
     8   *
     9   *  This program is distributed in the hope that it will be useful,
    10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
    11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    12   *  GNU General Public License for more details.
    13   *
    14   *  You should have received a copy of the GNU General Public License
    15   *  along with this program; if not, write to the Free Software
    16   *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    17   */
    18  #ifndef __LIB_ENCAP_H_
    19  #define __LIB_ENCAP_H_
    20  
    21  #include "common.h"
    22  #include "dbg.h"
    23  
    24  #ifdef ENCAP_IFINDEX
    25  #ifdef ENABLE_IPSEC
    26  static inline int __inline__
    27  enacap_and_redirect_nomark_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key,
    28  			 __u32 seclabel)
    29  {
    30  	/* Traffic from local host in tunnel mode will be passed to
    31  	 * cilium_host. In non-IPSec case traffic with non-local dst
    32  	 * will then be redirected to tunnel device. In IPSec case
    33  	 * though we need to traverse xfrm path still. The mark +
    34  	 * cb[4] hints will not survive a veth pair xmit to ingress
    35  	 * however so below encap_and_redirect_ipsec will not work.
    36  	 * Instead pass hints via cb[0], cb[4] (cb is not cleared
    37  	 * by dev_skb_forward) and catch hints with bpf_ipsec prog
    38  	 * that will populate mark/cb as expected by xfrm and 2nd
    39  	 * traversal into bpf_netdev. Remember we can't use cb[0-3]
    40  	 * in both cases because xfrm layer would overwrite them. We
    41  	 * use cb[4] here so it doesn't need to be reset by bpf_ipsec.
    42  	 */
    43  	skb->cb[0] = or_encrypt_key(key);
    44  	skb->cb[1] = seclabel;
    45  	skb->cb[4] = tunnel_endpoint;
    46  	return IPSEC_ENDPOINT;
    47  }
    48  
    49  static inline int __inline__
    50  encap_and_redirect_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key,
    51  			 __u32 seclabel)
    52  {
    53  	/* IPSec is performed by the stack on any packets with the
    54  	 * MARK_MAGIC_ENCRYPT bit set. During the process though we
    55  	 * lose the lxc context (seclabel and tunnel endpoint). The
    56  	 * tunnel endpoint can be looked up from daddr but the sec
    57  	 * label is stashed in the mark and extracted in bpf_netdev
    58  	 * to send skb onto tunnel for encap.
    59  	 */
    60  	set_encrypt_key(skb, key);
    61  	set_identity(skb, seclabel);
    62  	skb->cb[4] = tunnel_endpoint;
    63  	return IPSEC_ENDPOINT;
    64  }
    65  #endif
    66  
    67  static inline int __inline__
    68  encap_remap_v6_host_address(struct __sk_buff *skb, const bool egress)
    69  {
    70  #ifdef ENABLE_ENCAP_HOST_REMAP
    71  	struct csum_offset csum = {};
    72  	union v6addr host_ip;
    73  	void *data, *data_end;
    74  	struct ipv6hdr *ip6;
    75  	union v6addr *which;
    76  	__u32 off, noff;
    77  	__u8 nexthdr;
    78  	__u16 proto;
    79  	__be32 sum;
    80  	int ret;
    81  
    82  	validate_ethertype(skb, &proto);
    83  	if (proto != bpf_htons(ETH_P_IPV6))
    84  		return 0;
    85  	if (!revalidate_data(skb, &data, &data_end, &ip6))
    86  		return DROP_INVALID;
    87  	/* For requests routed via tunnel with external v6 node IP
    88  	 * we need to remap their source address to the router address
    89  	 * as otherwise replies are not routed via tunnel but public
    90  	 * address instead.
    91  	 */
    92  	if (egress) {
    93  		BPF_V6(host_ip, HOST_IP);
    94  		which = (union v6addr *)&ip6->saddr;
    95  	} else {
    96  		BPF_V6(host_ip, ROUTER_IP);
    97  		which = (union v6addr *)&ip6->daddr;
    98  	}
    99  	if (ipv6_addrcmp(which, &host_ip))
   100  		return 0;
   101  	nexthdr = ip6->nexthdr;
   102  	ret = ipv6_hdrlen(skb, ETH_HLEN, &nexthdr);
   103  	if (ret < 0)
   104  		return ret;
   105  	off = ((void *)ip6 - data) + ret;
   106  	if (egress) {
   107  		BPF_V6(host_ip, ROUTER_IP);
   108  		noff = ETH_HLEN + offsetof(struct ipv6hdr, saddr);
   109  	} else {
   110  		BPF_V6(host_ip, HOST_IP);
   111  		noff = ETH_HLEN + offsetof(struct ipv6hdr, daddr);
   112  	}
   113  	sum = csum_diff(which, 16, &host_ip, 16, 0);
   114  	csum_l4_offset_and_flags(nexthdr, &csum);
   115  	if (skb_store_bytes(skb, noff, &host_ip, 16, 0) < 0)
   116  		return DROP_WRITE_ERROR;
   117  	if (csum.offset &&
   118  	    csum_l4_replace(skb, off, &csum, 0, sum, BPF_F_PSEUDO_HDR) < 0)
   119  		return DROP_CSUM_L4;
   120  #endif /* ENABLE_ENCAP_HOST_REMAP */
   121  	return 0;
   122  }
   123  
   124  static inline int __inline__
   125  __encap_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint,
   126  		    __u32 seclabel, __u32 monitor)
   127  {
   128  	struct bpf_tunnel_key key = {};
   129  	__u32 node_id;
   130  	int ret;
   131  
   132  	node_id = bpf_htonl(tunnel_endpoint);
   133  	key.tunnel_id = seclabel;
   134  	key.remote_ipv4 = node_id;
   135  
   136  	cilium_dbg(skb, DBG_ENCAP, node_id, seclabel);
   137  
   138  	ret = skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
   139  	if (unlikely(ret < 0))
   140  		return DROP_WRITE_ERROR;
   141  
   142  	send_trace_notify(skb, TRACE_TO_OVERLAY, seclabel, 0, 0, ENCAP_IFINDEX,
   143  			  0, monitor);
   144  	return 0;
   145  }
   146  
   147  static inline int __inline__
   148  __encap_and_redirect_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint,
   149  				 __u32 seclabel, __u32 monitor)
   150  {
   151  	int ret = __encap_with_nodeid(skb, tunnel_endpoint, seclabel, monitor);
   152  	if (ret != 0)
   153  		return ret;
   154  	return redirect(ENCAP_IFINDEX, 0);
   155  }
   156  
   157  /* encap_and_redirect_with_nodeid returns IPSEC_ENDPOINT after skb meta-data is
   158   * set when IPSec is enabled. Caller should pass the skb to the stack at this
   159   * point. Otherwise returns TC_ACT_REDIRECT on successful redirect to tunnel
   160   * device. On error returns TC_ACT_SHOT, DROP_NO_TUNNEL_ENDPOINT or
   161   * DROP_WRITE_ERROR.
   162   */
   163  static inline int __inline__
   164  encap_and_redirect_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint,
   165  			       __u8 key, __u32 seclabel, __u32 monitor)
   166  {
   167  #ifdef ENABLE_IPSEC
   168  	if (key)
   169  		return enacap_and_redirect_nomark_ipsec(skb, tunnel_endpoint, key, seclabel);
   170  #endif
   171  	return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor);
   172  }
   173  
   174  /* encap_and_redirect based on ENABLE_IPSEC flag and from_host bool will decide
   175   * which version of code to call. With IPSec enabled and from_host set use the
   176   * IPSec branch which configures metadata for IPSec kernel stack. Otherwise
   177   * packet is redirected to output tunnel device and skb will not be seen by
   178   * IP stack.
   179   *
   180   * Returns IPSEC_ENDPOINT when skb needs to be handed to IP stack for IPSec
   181   * handling, TC_ACT_SHOT, DROP_NO_TUNNEL_ENDPOINT or DROP_WRITE_ERROR on error,
   182   * and finally on successful redirect returns TC_ACT_REDIRECT.
   183   */
   184  static inline int __inline__
   185  encap_and_redirect_lxc(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 encrypt_key, struct endpoint_key *key, __u32 seclabel, __u32 monitor)
   186  {
   187  	struct endpoint_key *tunnel;
   188  
   189  	if (tunnel_endpoint) {
   190  #ifdef ENABLE_IPSEC
   191  		if (encrypt_key)
   192  			return encap_and_redirect_ipsec(skb, tunnel_endpoint, encrypt_key, seclabel);
   193  #endif
   194  		return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor);
   195  	}
   196  
   197  	if ((tunnel = map_lookup_elem(&TUNNEL_MAP, key)) == NULL) {
   198  		return DROP_NO_TUNNEL_ENDPOINT;
   199  	}
   200  
   201  #ifdef ENABLE_IPSEC
   202  	if (tunnel->key) {
   203  		__u8 min_encrypt_key = get_min_encrypt_key(tunnel->key);
   204  
   205  		return encap_and_redirect_ipsec(skb, tunnel->ip4,
   206  						min_encrypt_key,
   207  						seclabel);
   208  	}
   209  #endif
   210  	return __encap_and_redirect_with_nodeid(skb, tunnel->ip4, seclabel, monitor);
   211  }
   212  
   213  static inline int __inline__
   214  encap_and_redirect_netdev(struct __sk_buff *skb, struct endpoint_key *k, __u32 seclabel, __u32 monitor)
   215  {
   216  	struct endpoint_key *tunnel;
   217  
   218  	if ((tunnel = map_lookup_elem(&TUNNEL_MAP, k)) == NULL) {
   219  		return DROP_NO_TUNNEL_ENDPOINT;
   220  	}
   221  
   222  #ifdef ENABLE_IPSEC
   223  	if (tunnel->key) {
   224  		__u8 key = get_min_encrypt_key(tunnel->key);
   225  
   226  		return enacap_and_redirect_nomark_ipsec(skb, tunnel->ip4,
   227  						       key, seclabel);
   228  	}
   229  #endif
   230  	return __encap_and_redirect_with_nodeid(skb, tunnel->ip4, seclabel, monitor);
   231  }
   232  #endif /* ENCAP_IFINDEX */
   233  #endif /* __LIB_ENCAP_H_ */