github.com/datadog/cilium@v1.6.12/bpf/bpf_overlay.c (about) 1 /* 2 * Copyright (C) 2016-2019 Authors of Cilium 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include <node_config.h> 19 #include <netdev_config.h> 20 21 #include <bpf/api.h> 22 23 #include <stdint.h> 24 #include <stdio.h> 25 26 #include <linux/if_packet.h> 27 28 #include "lib/tailcall.h" 29 #include "lib/utils.h" 30 #include "lib/common.h" 31 #include "lib/maps.h" 32 #include "lib/ipv6.h" 33 #include "lib/eth.h" 34 #include "lib/dbg.h" 35 #include "lib/trace.h" 36 #include "lib/l3.h" 37 #include "lib/drop.h" 38 #include "lib/policy.h" 39 #include "lib/nodeport.h" 40 41 #ifdef ENABLE_IPV6 42 static inline int handle_ipv6(struct __sk_buff *skb, __u32 *identity) 43 { 44 int ret, l4_off, l3_off = ETH_HLEN, hdrlen; 45 void *data_end, *data; 46 struct ipv6hdr *ip6; 47 struct bpf_tunnel_key key = {}; 48 struct endpoint_info *ep; 49 bool decrypted; 50 51 /* verifier workaround (dereference of modified ctx ptr) */ 52 if (!revalidate_data_first(skb, &data, &data_end, &ip6)) 53 return DROP_INVALID; 54 #ifdef ENABLE_NODEPORT 55 if (!bpf_skip_nodeport(skb)) { 56 int ret = nodeport_lb6(skb, *identity); 57 if (ret < 0) 58 return ret; 59 } 60 #endif 61 ret = encap_remap_v6_host_address(skb, false); 62 if (unlikely(ret < 0)) 63 return ret; 64 65 if (!revalidate_data(skb, &data, &data_end, &ip6)) 66 return DROP_INVALID; 67 68 decrypted = ((skb->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT); 69 if (decrypted) { 70 *identity = get_identity(skb); 71 } else { 72 if (unlikely(skb_get_tunnel_key(skb, &key, sizeof(key), 0) < 0)) 73 return DROP_NO_TUNNEL_KEY; 74 *identity = key.tunnel_id; 75 } 76 77 cilium_dbg(skb, DBG_DECAP, key.tunnel_id, key.tunnel_label); 78 79 #ifdef ENABLE_IPSEC 80 if (!decrypted) { 81 /* IPSec is not currently enforce (feature coming soon) 82 * so for now just handle normally 83 */ 84 if (ip6->nexthdr != IPPROTO_ESP) { 85 update_metrics(skb->len, METRIC_INGRESS, REASON_PLAINTEXT); 86 goto not_esp; 87 } 88 89 /* Decrypt "key" is determined by SPI */ 90 skb->mark = MARK_MAGIC_DECRYPT; 91 set_identity(skb, key.tunnel_id); 92 /* To IPSec stack on cilium_vxlan we are going to pass 93 * this up the stack but eth_type_trans has already labeled 94 * this as an OTHERHOST type packet. To avoid being dropped 95 * by IP stack before IPSec can be processed mark as a HOST 96 * packet. 97 */ 98 skb_change_type(skb, PACKET_HOST); 99 return TC_ACT_OK; 100 } else { 101 key.tunnel_id = get_identity(skb); 102 skb->mark = 0; 103 } 104 not_esp: 105 #endif 106 107 /* Lookup IPv6 address in list of local endpoints */ 108 if ((ep = lookup_ip6_endpoint(ip6)) != NULL) { 109 /* Let through packets to the node-ip so they are 110 * processed by the local ip stack */ 111 if (ep->flags & ENDPOINT_F_HOST) 112 goto to_host; 113 114 __u8 nexthdr = ip6->nexthdr; 115 hdrlen = ipv6_hdrlen(skb, l3_off, &nexthdr); 116 if (hdrlen < 0) 117 return hdrlen; 118 119 l4_off = l3_off + hdrlen; 120 return ipv6_local_delivery(skb, l3_off, l4_off, key.tunnel_id, ip6, nexthdr, ep, METRIC_INGRESS); 121 } 122 123 to_host: 124 #ifdef HOST_IFINDEX 125 if (1) { 126 union macaddr host_mac = HOST_IFINDEX_MAC; 127 union macaddr router_mac = NODE_MAC; 128 int ret; 129 130 ret = ipv6_l3(skb, ETH_HLEN, (__u8 *) &router_mac.addr, (__u8 *) &host_mac.addr, METRIC_INGRESS); 131 if (ret != TC_ACT_OK) 132 return ret; 133 134 cilium_dbg_capture(skb, DBG_CAPTURE_DELIVERY, HOST_IFINDEX); 135 return redirect(HOST_IFINDEX, 0); 136 } 137 #else 138 return TC_ACT_OK; 139 #endif 140 } 141 142 __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_FROM_LXC) int tail_handle_ipv6(struct __sk_buff *skb) 143 { 144 __u32 src_identity = 0; 145 int ret = handle_ipv6(skb, &src_identity); 146 147 if (IS_ERR(ret)) 148 return send_drop_notify_error(skb, src_identity, ret, TC_ACT_SHOT, METRIC_INGRESS); 149 150 return ret; 151 } 152 #endif /* ENABLE_IPV6 */ 153 154 #ifdef ENABLE_IPV4 155 static inline int handle_ipv4(struct __sk_buff *skb, __u32 *identity) 156 { 157 void *data_end, *data; 158 struct iphdr *ip4; 159 struct endpoint_info *ep; 160 struct bpf_tunnel_key key = {}; 161 bool decrypted; 162 int l4_off; 163 164 /* verifier workaround (dereference of modified ctx ptr) */ 165 if (!revalidate_data_first(skb, &data, &data_end, &ip4)) 166 return DROP_INVALID; 167 #ifdef ENABLE_NODEPORT 168 if (!bpf_skip_nodeport(skb)) { 169 int ret = nodeport_lb4(skb, *identity); 170 if (ret < 0) 171 return ret; 172 } 173 #endif 174 if (!revalidate_data(skb, &data, &data_end, &ip4)) 175 return DROP_INVALID; 176 177 decrypted = ((skb->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT); 178 /* If packets are decrypted the key has already been pushed into metadata. */ 179 if (decrypted) { 180 *identity = get_identity(skb); 181 } else { 182 if (unlikely(skb_get_tunnel_key(skb, &key, sizeof(key), 0) < 0)) 183 return DROP_NO_TUNNEL_KEY; 184 *identity = key.tunnel_id; 185 } 186 187 l4_off = ETH_HLEN + ipv4_hdrlen(ip4); 188 #ifdef ENABLE_IPSEC 189 if (!decrypted) { 190 /* IPSec is not currently enforce (feature coming soon) 191 * so for now just handle normally 192 */ 193 if (ip4->protocol != IPPROTO_ESP) { 194 update_metrics(skb->len, METRIC_INGRESS, REASON_PLAINTEXT); 195 goto not_esp; 196 } 197 198 skb->mark = MARK_MAGIC_DECRYPT; 199 set_identity(skb, key.tunnel_id); 200 /* To IPSec stack on cilium_vxlan we are going to pass 201 * this up the stack but eth_type_trans has already labeled 202 * this as an OTHERHOST type packet. To avoid being dropped 203 * by IP stack before IPSec can be processed mark as a HOST 204 * packet. 205 */ 206 skb_change_type(skb, PACKET_HOST); 207 return TC_ACT_OK; 208 } else { 209 key.tunnel_id = get_identity(skb); 210 skb->mark = 0; 211 } 212 not_esp: 213 #endif 214 215 /* Lookup IPv4 address in list of local endpoints */ 216 if ((ep = lookup_ip4_endpoint(ip4)) != NULL) { 217 /* Let through packets to the node-ip so they are 218 * processed by the local ip stack */ 219 if (ep->flags & ENDPOINT_F_HOST) 220 goto to_host; 221 222 return ipv4_local_delivery(skb, ETH_HLEN, l4_off, key.tunnel_id, ip4, ep, METRIC_INGRESS); 223 } 224 225 to_host: 226 #ifdef HOST_IFINDEX 227 if (1) { 228 union macaddr host_mac = HOST_IFINDEX_MAC; 229 union macaddr router_mac = NODE_MAC; 230 int ret; 231 232 ret = ipv4_l3(skb, ETH_HLEN, (__u8 *) &router_mac.addr, (__u8 *) &host_mac.addr, ip4); 233 if (ret != TC_ACT_OK) 234 return ret; 235 236 cilium_dbg_capture(skb, DBG_CAPTURE_DELIVERY, HOST_IFINDEX); 237 return redirect(HOST_IFINDEX, 0); 238 } 239 #else 240 return TC_ACT_OK; 241 #endif 242 } 243 244 __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV4_FROM_LXC) int tail_handle_ipv4(struct __sk_buff *skb) 245 { 246 __u32 src_identity = 0; 247 int ret = handle_ipv4(skb, &src_identity); 248 249 if (IS_ERR(ret)) 250 return send_drop_notify_error(skb, src_identity, ret, TC_ACT_SHOT, METRIC_INGRESS); 251 252 return ret; 253 } 254 #endif /* ENABLE_IPV4 */ 255 256 __section("from-overlay") 257 int from_overlay(struct __sk_buff *skb) 258 { 259 __u16 proto; 260 int ret; 261 262 bpf_clear_cb(skb); 263 bpf_clear_nodeport(skb); 264 265 if (!validate_ethertype(skb, &proto)) { 266 /* Pass unknown traffic to the stack */ 267 ret = TC_ACT_OK; 268 goto out; 269 } 270 271 #ifdef ENABLE_IPSEC 272 if ((skb->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT) { 273 send_trace_notify(skb, TRACE_FROM_OVERLAY, get_identity(skb), 0, 0, 274 skb->ingress_ifindex, 275 TRACE_REASON_ENCRYPTED, TRACE_PAYLOAD_LEN); 276 } else 277 #endif 278 { 279 send_trace_notify(skb, TRACE_FROM_OVERLAY, 0, 0, 0, 280 skb->ingress_ifindex, 0, TRACE_PAYLOAD_LEN); 281 } 282 283 switch (proto) { 284 case bpf_htons(ETH_P_IPV6): 285 #ifdef ENABLE_IPV6 286 ep_tail_call(skb, CILIUM_CALL_IPV6_FROM_LXC); 287 ret = DROP_MISSED_TAIL_CALL; 288 #else 289 ret = DROP_UNKNOWN_L3; 290 #endif 291 break; 292 293 case bpf_htons(ETH_P_IP): 294 #ifdef ENABLE_IPV4 295 ep_tail_call(skb, CILIUM_CALL_IPV4_FROM_LXC); 296 ret = DROP_MISSED_TAIL_CALL; 297 #else 298 ret = DROP_UNKNOWN_L3; 299 #endif 300 break; 301 302 default: 303 /* Pass unknown traffic to the stack */ 304 ret = TC_ACT_OK; 305 } 306 out: 307 if (IS_ERR(ret)) 308 return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_INGRESS); 309 return ret; 310 } 311 312 #ifdef ENABLE_NODEPORT 313 declare_tailcall_if(is_defined(ENABLE_IPV6), CILIUM_CALL_ENCAP_NODEPORT_NAT) 314 int tail_handle_nat_fwd(struct __sk_buff *skb) 315 { 316 int ret; 317 318 if ((skb->mark & MARK_MAGIC_SNAT_DONE) == MARK_MAGIC_SNAT_DONE) 319 return TC_ACT_OK; 320 ret = nodeport_nat_fwd(skb, true); 321 if (IS_ERR(ret)) 322 return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_EGRESS); 323 return ret; 324 } 325 #endif 326 327 __section("to-overlay") 328 int to_overlay(struct __sk_buff *skb) 329 { 330 int ret = encap_remap_v6_host_address(skb, true); 331 if (unlikely(ret < 0)) 332 goto out; 333 #ifdef ENABLE_NODEPORT 334 invoke_tailcall_if(is_defined(ENABLE_IPV6), 335 CILIUM_CALL_ENCAP_NODEPORT_NAT, tail_handle_nat_fwd); 336 #endif 337 out: 338 if (IS_ERR(ret)) 339 return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_EGRESS); 340 return ret; 341 } 342 343 BPF_LICENSE("GPL");