github.com/cilium/cilium@v1.16.2/bpf/lib/l3.h (about)

     1  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
     2  /* Copyright Authors of Cilium */
     3  
     4  #pragma once
     5  
     6  #include "common.h"
     7  #include "ipv6.h"
     8  #include "ipv4.h"
     9  #include "eps.h"
    10  #include "eth.h"
    11  #include "dbg.h"
    12  #include "l4.h"
    13  #include "icmp6.h"
    14  #include "csum.h"
    15  
    16  /*
    17   * When the host routing is enabled we need to check policies at source, as in
    18   * this case the skb is delivered directly to pod's namespace and the ingress
    19   * policy (the cil_to_container BPF program) is bypassed.
    20   */
    21  #if defined(ENABLE_ENDPOINT_ROUTES) && defined(ENABLE_HOST_ROUTING)
    22  #  ifndef FORCE_LOCAL_POLICY_EVAL_AT_SOURCE
    23  #  define FORCE_LOCAL_POLICY_EVAL_AT_SOURCE
    24  #  endif
    25  #endif
    26  
    27  #ifdef ENABLE_IPV6
    28  static __always_inline int ipv6_l3(struct __ctx_buff *ctx, int l3_off,
    29  				   const __u8 *smac, const __u8 *dmac,
    30  				   __u8 __maybe_unused direction)
    31  {
    32  	int ret;
    33  
    34  	ret = ipv6_dec_hoplimit(ctx, l3_off);
    35  	if (IS_ERR(ret)) {
    36  #ifndef SKIP_ICMPV6_HOPLIMIT_HANDLING
    37  		if (ret == DROP_TTL_EXCEEDED)
    38  			return icmp6_send_time_exceeded(ctx, l3_off, direction);
    39  #endif
    40  		return ret;
    41  	}
    42  
    43  	if (smac && eth_store_saddr(ctx, smac, 0) < 0)
    44  		return DROP_WRITE_ERROR;
    45  	if (dmac && eth_store_daddr(ctx, dmac, 0) < 0)
    46  		return DROP_WRITE_ERROR;
    47  
    48  	return CTX_ACT_OK;
    49  }
    50  #endif /* ENABLE_IPV6 */
    51  
    52  static __always_inline int ipv4_l3(struct __ctx_buff *ctx, int l3_off,
    53  				   const __u8 *smac, const __u8 *dmac,
    54  				   struct iphdr *ip4)
    55  {
    56  	int ret;
    57  
    58  	ret = ipv4_dec_ttl(ctx, l3_off, ip4);
    59  	/* FIXME: Send ICMP TTL */
    60  	if (IS_ERR(ret))
    61  		return ret;
    62  
    63  	if (smac && eth_store_saddr(ctx, smac, 0) < 0)
    64  		return DROP_WRITE_ERROR;
    65  	if (dmac && eth_store_daddr(ctx, dmac, 0) < 0)
    66  		return DROP_WRITE_ERROR;
    67  
    68  	return CTX_ACT_OK;
    69  }
    70  
    71  #ifndef SKIP_POLICY_MAP
    72  static __always_inline int
    73  l3_local_delivery(struct __ctx_buff *ctx, __u32 seclabel,
    74  		  __u32 magic __maybe_unused,
    75  		  const struct endpoint_info *ep __maybe_unused,
    76  		  __u8 direction __maybe_unused,
    77  		  bool from_host __maybe_unused,
    78  		  bool from_tunnel __maybe_unused, __u32 cluster_id __maybe_unused)
    79  {
    80  #ifdef LOCAL_DELIVERY_METRICS
    81  	/*
    82  	 * Special LXC case for updating egress forwarding metrics.
    83  	 * Note that the packet could still be dropped but it would show up
    84  	 * as an ingress drop counter in metrics.
    85  	 */
    86  	update_metrics(ctx_full_len(ctx), direction, REASON_FORWARDED);
    87  #endif
    88  
    89  #if defined(USE_BPF_PROG_FOR_INGRESS_POLICY) && \
    90  	!defined(FORCE_LOCAL_POLICY_EVAL_AT_SOURCE)
    91  	set_identity_mark(ctx, seclabel, magic);
    92  
    93  # if !defined(ENABLE_NODEPORT)
    94  	/* In tunneling mode, we execute this code to send the packet from
    95  	 * cilium_vxlan to lxc*. If we're using kube-proxy, we don't want to use
    96  	 * redirect() because that would bypass conntrack and the reverse DNAT.
    97  	 * Thus, we send packets to the stack, but since they have the wrong
    98  	 * Ethernet addresses, we need to mark them as PACKET_HOST or the kernel
    99  	 * will drop them.
   100  	 */
   101  	if (from_tunnel) {
   102  		ctx_change_type(ctx, PACKET_HOST);
   103  		return CTX_ACT_OK;
   104  	}
   105  # endif /* !ENABLE_NODEPORT */
   106  
   107  	return redirect_ep(ctx, ep->ifindex, from_host, from_tunnel);
   108  #else
   109  
   110  	/* Jumps to destination pod's BPF program to enforce ingress policies. */
   111  	ctx_store_meta(ctx, CB_SRC_LABEL, seclabel);
   112  	/* With v1.17+, the actual ifindex is unused and this can be just a
   113  	 * "needs redirect" boolean flag:
   114  	 */
   115  	ctx_store_meta(ctx, CB_IFINDEX, ep->ifindex);
   116  	ctx_store_meta(ctx, CB_FROM_HOST, from_host ? 1 : 0);
   117  	ctx_store_meta(ctx, CB_FROM_TUNNEL, from_tunnel ? 1 : 0);
   118  	ctx_store_meta(ctx, CB_CLUSTER_ID_INGRESS, cluster_id);
   119  
   120  	return tail_call_policy(ctx, ep->lxc_id);
   121  #endif
   122  }
   123  
   124  #ifdef ENABLE_IPV6
   125  /* Performs IPv6 L2/L3 handling and delivers the packet to the destination pod
   126   * on the same node, either via the stack or via a redirect call.
   127   * Depending on the configuration, it may also enforce ingress policies for the
   128   * destination pod via a tail call.
   129   */
   130  static __always_inline int ipv6_local_delivery(struct __ctx_buff *ctx, int l3_off,
   131  					       __u32 seclabel, __u32 magic,
   132  					       const struct endpoint_info *ep,
   133  					       __u8 direction, bool from_host,
   134  					       bool from_tunnel)
   135  {
   136  	mac_t router_mac = ep->node_mac;
   137  	mac_t lxc_mac = ep->mac;
   138  	int ret;
   139  
   140  	cilium_dbg(ctx, DBG_LOCAL_DELIVERY, ep->lxc_id, seclabel);
   141  
   142  	ret = ipv6_l3(ctx, l3_off, (__u8 *)&router_mac, (__u8 *)&lxc_mac, direction);
   143  	if (ret != CTX_ACT_OK)
   144  		return ret;
   145  
   146  	return l3_local_delivery(ctx, seclabel, magic, ep, direction, from_host,
   147  				 from_tunnel, 0);
   148  }
   149  #endif /* ENABLE_IPV6 */
   150  
   151  /* Performs IPv4 L2/L3 handling and delivers the packet to the destination pod
   152   * on the same node, either via the stack or via a redirect call.
   153   * Depending on the configuration, it may also enforce ingress policies for the
   154   * destination pod via a tail call.
   155   */
   156  static __always_inline int ipv4_local_delivery(struct __ctx_buff *ctx, int l3_off,
   157  					       __u32 seclabel, __u32 magic,
   158  					       struct iphdr *ip4,
   159  					       const struct endpoint_info *ep,
   160  					       __u8 direction, bool from_host,
   161  					       bool from_tunnel, __u32 cluster_id)
   162  {
   163  	mac_t router_mac = ep->node_mac;
   164  	mac_t lxc_mac = ep->mac;
   165  	int ret;
   166  
   167  	cilium_dbg(ctx, DBG_LOCAL_DELIVERY, ep->lxc_id, seclabel);
   168  
   169  	ret = ipv4_l3(ctx, l3_off, (__u8 *) &router_mac, (__u8 *) &lxc_mac, ip4);
   170  	if (ret != CTX_ACT_OK)
   171  		return ret;
   172  
   173  	return l3_local_delivery(ctx, seclabel, magic, ep, direction, from_host,
   174  				 from_tunnel, cluster_id);
   175  }
   176  #endif /* SKIP_POLICY_MAP */