github.com/fafucoder/cilium@v1.6.11/bpf/lib/encap.h (about) 1 /* 2 * Copyright (C) 2016-2018 Authors of Cilium 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #ifndef __LIB_ENCAP_H_ 19 #define __LIB_ENCAP_H_ 20 21 #include "common.h" 22 #include "dbg.h" 23 24 #ifdef ENCAP_IFINDEX 25 #ifdef ENABLE_IPSEC 26 static inline int __inline__ 27 enacap_and_redirect_nomark_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key, 28 __u32 seclabel) 29 { 30 /* Traffic from local host in tunnel mode will be passed to 31 * cilium_host. In non-IPSec case traffic with non-local dst 32 * will then be redirected to tunnel device. In IPSec case 33 * though we need to traverse xfrm path still. The mark + 34 * cb[4] hints will not survive a veth pair xmit to ingress 35 * however so below encap_and_redirect_ipsec will not work. 36 * Instead pass hints via cb[0], cb[4] (cb is not cleared 37 * by dev_skb_forward) and catch hints with bpf_ipsec prog 38 * that will populate mark/cb as expected by xfrm and 2nd 39 * traversal into bpf_netdev. Remember we can't use cb[0-3] 40 * in both cases because xfrm layer would overwrite them. We 41 * use cb[4] here so it doesn't need to be reset by bpf_ipsec. 42 */ 43 skb->cb[0] = or_encrypt_key(key); 44 skb->cb[1] = seclabel; 45 skb->cb[4] = tunnel_endpoint; 46 return IPSEC_ENDPOINT; 47 } 48 49 static inline int __inline__ 50 encap_and_redirect_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key, 51 __u32 seclabel) 52 { 53 /* IPSec is performed by the stack on any packets with the 54 * MARK_MAGIC_ENCRYPT bit set. During the process though we 55 * lose the lxc context (seclabel and tunnel endpoint). The 56 * tunnel endpoint can be looked up from daddr but the sec 57 * label is stashed in the mark and extracted in bpf_netdev 58 * to send skb onto tunnel for encap. 59 */ 60 set_encrypt_key(skb, key); 61 set_identity(skb, seclabel); 62 skb->cb[4] = tunnel_endpoint; 63 return IPSEC_ENDPOINT; 64 } 65 #endif 66 67 static inline int __inline__ 68 encap_remap_v6_host_address(struct __sk_buff *skb, const bool egress) 69 { 70 #ifdef ENABLE_ENCAP_HOST_REMAP 71 struct csum_offset csum = {}; 72 union v6addr host_ip; 73 void *data, *data_end; 74 struct ipv6hdr *ip6; 75 union v6addr *which; 76 __u32 off, noff; 77 __u8 nexthdr; 78 __u16 proto; 79 __be32 sum; 80 int ret; 81 82 validate_ethertype(skb, &proto); 83 if (proto != bpf_htons(ETH_P_IPV6)) 84 return 0; 85 if (!revalidate_data(skb, &data, &data_end, &ip6)) 86 return DROP_INVALID; 87 /* For requests routed via tunnel with external v6 node IP 88 * we need to remap their source address to the router address 89 * as otherwise replies are not routed via tunnel but public 90 * address instead. 91 */ 92 if (egress) { 93 BPF_V6(host_ip, HOST_IP); 94 which = (union v6addr *)&ip6->saddr; 95 } else { 96 BPF_V6(host_ip, ROUTER_IP); 97 which = (union v6addr *)&ip6->daddr; 98 } 99 if (ipv6_addrcmp(which, &host_ip)) 100 return 0; 101 nexthdr = ip6->nexthdr; 102 ret = ipv6_hdrlen(skb, ETH_HLEN, &nexthdr); 103 if (ret < 0) 104 return ret; 105 off = ((void *)ip6 - data) + ret; 106 if (egress) { 107 BPF_V6(host_ip, ROUTER_IP); 108 noff = ETH_HLEN + offsetof(struct ipv6hdr, saddr); 109 } else { 110 BPF_V6(host_ip, HOST_IP); 111 noff = ETH_HLEN + offsetof(struct ipv6hdr, daddr); 112 } 113 sum = csum_diff(which, 16, &host_ip, 16, 0); 114 csum_l4_offset_and_flags(nexthdr, &csum); 115 if (skb_store_bytes(skb, noff, &host_ip, 16, 0) < 0) 116 return DROP_WRITE_ERROR; 117 if (csum.offset && 118 csum_l4_replace(skb, off, &csum, 0, sum, BPF_F_PSEUDO_HDR) < 0) 119 return DROP_CSUM_L4; 120 #endif /* ENABLE_ENCAP_HOST_REMAP */ 121 return 0; 122 } 123 124 static inline int __inline__ 125 __encap_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint, 126 __u32 seclabel, __u32 monitor) 127 { 128 struct bpf_tunnel_key key = {}; 129 __u32 node_id; 130 int ret; 131 132 node_id = bpf_htonl(tunnel_endpoint); 133 key.tunnel_id = seclabel; 134 key.remote_ipv4 = node_id; 135 136 cilium_dbg(skb, DBG_ENCAP, node_id, seclabel); 137 138 ret = skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); 139 if (unlikely(ret < 0)) 140 return DROP_WRITE_ERROR; 141 142 send_trace_notify(skb, TRACE_TO_OVERLAY, seclabel, 0, 0, ENCAP_IFINDEX, 143 0, monitor); 144 return 0; 145 } 146 147 static inline int __inline__ 148 __encap_and_redirect_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint, 149 __u32 seclabel, __u32 monitor) 150 { 151 int ret = __encap_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); 152 if (ret != 0) 153 return ret; 154 return redirect(ENCAP_IFINDEX, 0); 155 } 156 157 /* encap_and_redirect_with_nodeid returns IPSEC_ENDPOINT after skb meta-data is 158 * set when IPSec is enabled. Caller should pass the skb to the stack at this 159 * point. Otherwise returns TC_ACT_REDIRECT on successful redirect to tunnel 160 * device. On error returns TC_ACT_SHOT, DROP_NO_TUNNEL_ENDPOINT or 161 * DROP_WRITE_ERROR. 162 */ 163 static inline int __inline__ 164 encap_and_redirect_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint, 165 __u8 key, __u32 seclabel, __u32 monitor) 166 { 167 #ifdef ENABLE_IPSEC 168 if (key) 169 return enacap_and_redirect_nomark_ipsec(skb, tunnel_endpoint, key, seclabel); 170 #endif 171 return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); 172 } 173 174 /* encap_and_redirect based on ENABLE_IPSEC flag and from_host bool will decide 175 * which version of code to call. With IPSec enabled and from_host set use the 176 * IPSec branch which configures metadata for IPSec kernel stack. Otherwise 177 * packet is redirected to output tunnel device and skb will not be seen by 178 * IP stack. 179 * 180 * Returns IPSEC_ENDPOINT when skb needs to be handed to IP stack for IPSec 181 * handling, TC_ACT_SHOT, DROP_NO_TUNNEL_ENDPOINT or DROP_WRITE_ERROR on error, 182 * and finally on successful redirect returns TC_ACT_REDIRECT. 183 */ 184 static inline int __inline__ 185 encap_and_redirect_lxc(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 encrypt_key, struct endpoint_key *key, __u32 seclabel, __u32 monitor) 186 { 187 struct endpoint_key *tunnel; 188 189 if (tunnel_endpoint) { 190 #ifdef ENABLE_IPSEC 191 if (encrypt_key) 192 return encap_and_redirect_ipsec(skb, tunnel_endpoint, encrypt_key, seclabel); 193 #endif 194 return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); 195 } 196 197 if ((tunnel = map_lookup_elem(&TUNNEL_MAP, key)) == NULL) { 198 return DROP_NO_TUNNEL_ENDPOINT; 199 } 200 201 #ifdef ENABLE_IPSEC 202 if (tunnel->key) { 203 __u8 min_encrypt_key = get_min_encrypt_key(tunnel->key); 204 205 return encap_and_redirect_ipsec(skb, tunnel->ip4, 206 min_encrypt_key, 207 seclabel); 208 } 209 #endif 210 return __encap_and_redirect_with_nodeid(skb, tunnel->ip4, seclabel, monitor); 211 } 212 213 static inline int __inline__ 214 encap_and_redirect_netdev(struct __sk_buff *skb, struct endpoint_key *k, __u32 seclabel, __u32 monitor) 215 { 216 struct endpoint_key *tunnel; 217 218 if ((tunnel = map_lookup_elem(&TUNNEL_MAP, k)) == NULL) { 219 return DROP_NO_TUNNEL_ENDPOINT; 220 } 221 222 #ifdef ENABLE_IPSEC 223 if (tunnel->key) { 224 __u8 key = get_min_encrypt_key(tunnel->key); 225 226 return enacap_and_redirect_nomark_ipsec(skb, tunnel->ip4, 227 key, seclabel); 228 } 229 #endif 230 return __encap_and_redirect_with_nodeid(skb, tunnel->ip4, seclabel, monitor); 231 } 232 #endif /* ENCAP_IFINDEX */ 233 #endif /* __LIB_ENCAP_H_ */