github.com/cilium/cilium@v1.16.2/bpf/lib/l3.h (about) 1 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 /* Copyright Authors of Cilium */ 3 4 #pragma once 5 6 #include "common.h" 7 #include "ipv6.h" 8 #include "ipv4.h" 9 #include "eps.h" 10 #include "eth.h" 11 #include "dbg.h" 12 #include "l4.h" 13 #include "icmp6.h" 14 #include "csum.h" 15 16 /* 17 * When the host routing is enabled we need to check policies at source, as in 18 * this case the skb is delivered directly to pod's namespace and the ingress 19 * policy (the cil_to_container BPF program) is bypassed. 20 */ 21 #if defined(ENABLE_ENDPOINT_ROUTES) && defined(ENABLE_HOST_ROUTING) 22 # ifndef FORCE_LOCAL_POLICY_EVAL_AT_SOURCE 23 # define FORCE_LOCAL_POLICY_EVAL_AT_SOURCE 24 # endif 25 #endif 26 27 #ifdef ENABLE_IPV6 28 static __always_inline int ipv6_l3(struct __ctx_buff *ctx, int l3_off, 29 const __u8 *smac, const __u8 *dmac, 30 __u8 __maybe_unused direction) 31 { 32 int ret; 33 34 ret = ipv6_dec_hoplimit(ctx, l3_off); 35 if (IS_ERR(ret)) { 36 #ifndef SKIP_ICMPV6_HOPLIMIT_HANDLING 37 if (ret == DROP_TTL_EXCEEDED) 38 return icmp6_send_time_exceeded(ctx, l3_off, direction); 39 #endif 40 return ret; 41 } 42 43 if (smac && eth_store_saddr(ctx, smac, 0) < 0) 44 return DROP_WRITE_ERROR; 45 if (dmac && eth_store_daddr(ctx, dmac, 0) < 0) 46 return DROP_WRITE_ERROR; 47 48 return CTX_ACT_OK; 49 } 50 #endif /* ENABLE_IPV6 */ 51 52 static __always_inline int ipv4_l3(struct __ctx_buff *ctx, int l3_off, 53 const __u8 *smac, const __u8 *dmac, 54 struct iphdr *ip4) 55 { 56 int ret; 57 58 ret = ipv4_dec_ttl(ctx, l3_off, ip4); 59 /* FIXME: Send ICMP TTL */ 60 if (IS_ERR(ret)) 61 return ret; 62 63 if (smac && eth_store_saddr(ctx, smac, 0) < 0) 64 return DROP_WRITE_ERROR; 65 if (dmac && eth_store_daddr(ctx, dmac, 0) < 0) 66 return DROP_WRITE_ERROR; 67 68 return CTX_ACT_OK; 69 } 70 71 #ifndef SKIP_POLICY_MAP 72 static __always_inline int 73 l3_local_delivery(struct __ctx_buff *ctx, __u32 seclabel, 74 __u32 magic __maybe_unused, 75 const struct endpoint_info *ep __maybe_unused, 76 __u8 direction __maybe_unused, 77 bool from_host __maybe_unused, 78 bool from_tunnel __maybe_unused, __u32 cluster_id __maybe_unused) 79 { 80 #ifdef LOCAL_DELIVERY_METRICS 81 /* 82 * Special LXC case for updating egress forwarding metrics. 83 * Note that the packet could still be dropped but it would show up 84 * as an ingress drop counter in metrics. 85 */ 86 update_metrics(ctx_full_len(ctx), direction, REASON_FORWARDED); 87 #endif 88 89 #if defined(USE_BPF_PROG_FOR_INGRESS_POLICY) && \ 90 !defined(FORCE_LOCAL_POLICY_EVAL_AT_SOURCE) 91 set_identity_mark(ctx, seclabel, magic); 92 93 # if !defined(ENABLE_NODEPORT) 94 /* In tunneling mode, we execute this code to send the packet from 95 * cilium_vxlan to lxc*. If we're using kube-proxy, we don't want to use 96 * redirect() because that would bypass conntrack and the reverse DNAT. 97 * Thus, we send packets to the stack, but since they have the wrong 98 * Ethernet addresses, we need to mark them as PACKET_HOST or the kernel 99 * will drop them. 100 */ 101 if (from_tunnel) { 102 ctx_change_type(ctx, PACKET_HOST); 103 return CTX_ACT_OK; 104 } 105 # endif /* !ENABLE_NODEPORT */ 106 107 return redirect_ep(ctx, ep->ifindex, from_host, from_tunnel); 108 #else 109 110 /* Jumps to destination pod's BPF program to enforce ingress policies. */ 111 ctx_store_meta(ctx, CB_SRC_LABEL, seclabel); 112 /* With v1.17+, the actual ifindex is unused and this can be just a 113 * "needs redirect" boolean flag: 114 */ 115 ctx_store_meta(ctx, CB_IFINDEX, ep->ifindex); 116 ctx_store_meta(ctx, CB_FROM_HOST, from_host ? 1 : 0); 117 ctx_store_meta(ctx, CB_FROM_TUNNEL, from_tunnel ? 1 : 0); 118 ctx_store_meta(ctx, CB_CLUSTER_ID_INGRESS, cluster_id); 119 120 return tail_call_policy(ctx, ep->lxc_id); 121 #endif 122 } 123 124 #ifdef ENABLE_IPV6 125 /* Performs IPv6 L2/L3 handling and delivers the packet to the destination pod 126 * on the same node, either via the stack or via a redirect call. 127 * Depending on the configuration, it may also enforce ingress policies for the 128 * destination pod via a tail call. 129 */ 130 static __always_inline int ipv6_local_delivery(struct __ctx_buff *ctx, int l3_off, 131 __u32 seclabel, __u32 magic, 132 const struct endpoint_info *ep, 133 __u8 direction, bool from_host, 134 bool from_tunnel) 135 { 136 mac_t router_mac = ep->node_mac; 137 mac_t lxc_mac = ep->mac; 138 int ret; 139 140 cilium_dbg(ctx, DBG_LOCAL_DELIVERY, ep->lxc_id, seclabel); 141 142 ret = ipv6_l3(ctx, l3_off, (__u8 *)&router_mac, (__u8 *)&lxc_mac, direction); 143 if (ret != CTX_ACT_OK) 144 return ret; 145 146 return l3_local_delivery(ctx, seclabel, magic, ep, direction, from_host, 147 from_tunnel, 0); 148 } 149 #endif /* ENABLE_IPV6 */ 150 151 /* Performs IPv4 L2/L3 handling and delivers the packet to the destination pod 152 * on the same node, either via the stack or via a redirect call. 153 * Depending on the configuration, it may also enforce ingress policies for the 154 * destination pod via a tail call. 155 */ 156 static __always_inline int ipv4_local_delivery(struct __ctx_buff *ctx, int l3_off, 157 __u32 seclabel, __u32 magic, 158 struct iphdr *ip4, 159 const struct endpoint_info *ep, 160 __u8 direction, bool from_host, 161 bool from_tunnel, __u32 cluster_id) 162 { 163 mac_t router_mac = ep->node_mac; 164 mac_t lxc_mac = ep->mac; 165 int ret; 166 167 cilium_dbg(ctx, DBG_LOCAL_DELIVERY, ep->lxc_id, seclabel); 168 169 ret = ipv4_l3(ctx, l3_off, (__u8 *) &router_mac, (__u8 *) &lxc_mac, ip4); 170 if (ret != CTX_ACT_OK) 171 return ret; 172 173 return l3_local_delivery(ctx, seclabel, magic, ep, direction, from_host, 174 from_tunnel, cluster_id); 175 } 176 #endif /* SKIP_POLICY_MAP */