github.com/cilium/cilium@v1.16.2/bpf/lib/policy.h (about)

     1  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
     2  /* Copyright Authors of Cilium */
     3  
     4  #pragma once
     5  
     6  #include <linux/icmp.h>
     7  
     8  #include "drop.h"
     9  #include "dbg.h"
    10  #include "eps.h"
    11  #include "maps.h"
    12  
    13  static __always_inline int
    14  __account_and_check(struct __ctx_buff *ctx __maybe_unused, struct policy_entry *policy,
    15  		    __s8 *ext_err, __u16 *proxy_port)
    16  {
    17  #ifdef POLICY_ACCOUNTING
    18  	/* FIXME: Use per cpu counters */
    19  	__sync_fetch_and_add(&policy->packets, 1);
    20  	__sync_fetch_and_add(&policy->bytes, ctx_full_len(ctx));
    21  #endif
    22  
    23  	if (unlikely(policy->deny))
    24  		return DROP_POLICY_DENY;
    25  
    26  	*proxy_port = policy->proxy_port;
    27  	if (unlikely(policy->auth_type)) {
    28  		if (ext_err)
    29  			*ext_err = (__s8)policy->auth_type;
    30  		return DROP_POLICY_AUTH_REQUIRED;
    31  	}
    32  	return CTX_ACT_OK;
    33  }
    34  
    35  static __always_inline int
    36  __policy_can_access(const void *map, struct __ctx_buff *ctx, __u32 local_id,
    37  		    __u32 remote_id, __u16 ethertype __maybe_unused, __u16 dport,
    38  		    __u8 proto, int off __maybe_unused, int dir,
    39  		    bool is_untracked_fragment, __u8 *match_type, __s8 *ext_err,
    40  		    __u16 *proxy_port)
    41  {
    42  	struct policy_entry *policy;
    43  	struct policy_entry *l4policy;
    44  	struct policy_key key = {
    45  		.lpm_key = { POLICY_FULL_PREFIX, {} }, /* always look up with unwildcarded data */
    46  		.sec_label = remote_id,
    47  		.egress = !dir,
    48  		.pad = 0,
    49  		.protocol = proto,
    50  		.dport = dport,
    51  	};
    52  
    53  #if defined(ALLOW_ICMP_FRAG_NEEDED) || defined(ENABLE_ICMP_RULE)
    54  	switch (ethertype) {
    55  	case ETH_P_IP:
    56  		if (proto == IPPROTO_ICMP) {
    57  			struct icmphdr icmphdr __align_stack_8;
    58  
    59  			if (ctx_load_bytes(ctx, off, &icmphdr, sizeof(icmphdr)) < 0)
    60  				return DROP_INVALID;
    61  
    62  # if defined(ALLOW_ICMP_FRAG_NEEDED)
    63  			if (icmphdr.type == ICMP_DEST_UNREACH &&
    64  			    icmphdr.code == ICMP_FRAG_NEEDED) {
    65  				*proxy_port = 0;
    66  				return CTX_ACT_OK;
    67  			}
    68  # endif
    69  
    70  # if defined(ENABLE_ICMP_RULE)
    71  			key.dport = bpf_u8_to_be16(icmphdr.type);
    72  # endif
    73  		}
    74  		break;
    75  	case ETH_P_IPV6:
    76  # if defined(ENABLE_ICMP_RULE)
    77  		if (proto == IPPROTO_ICMPV6) {
    78  			__u8 icmp_type;
    79  
    80  			if (ctx_load_bytes(ctx, off, &icmp_type, sizeof(icmp_type)) < 0)
    81  				return DROP_INVALID;
    82  
    83  			key.dport = bpf_u8_to_be16(icmp_type);
    84  		}
    85  # endif
    86  		break;
    87  	default:
    88  		break;
    89  	}
    90  #endif /* ALLOW_ICMP_FRAG_NEEDED || ENABLE_ICMP_RULE */
    91  
    92  	/* Policy match precedence:
    93  	 * 1. id/proto/port  (L3/L4)
    94  	 * 2. ANY/proto/port (L4-only)
    95  	 * 3. id/proto/ANY   (L3-proto)
    96  	 * 4. ANY/proto/ANY  (Proto-only)
    97  	 * 5. id/ANY/ANY     (L3-only)
    98  	 * 6. ANY/ANY/ANY    (All)
    99  	 */
   100  
   101  	/* Start with L3/L4 lookup.
   102  	 * LPM precedence order with L3:
   103  	 * 1. id/proto/port
   104  	 * 3. id/proto/ANY (check L4-only match first)
   105  	 * 5. id/ANY/ANY   (check proto match first)
   106  	 *
   107  	 * Note: Untracked fragments always have zero ports in the tuple so they can
   108  	 * only match entries that have fully wildcarded ports.
   109  	 */
   110  	policy = map_lookup_elem(map, &key);
   111  
   112  	/* This is a full L3/L4 match if port is not wildcarded,
   113  	 * need to check for L4-only policy first if it is.
   114  	 */
   115  	if (likely(policy && !policy->wildcard_dport)) {
   116  		cilium_dbg3(ctx, DBG_L4_CREATE, remote_id, local_id,
   117  			    dport << 16 | proto);
   118  		*match_type = POLICY_MATCH_L3_L4;		/* 1. id/proto/port */
   119  		goto check_policy;
   120  	}
   121  
   122  	/* L4-only lookup. */
   123  	key.sec_label = 0;
   124  	/* LPM precedence order without L3:
   125  	 * 2. ANY/proto/port
   126  	 * 4. ANY/proto/ANY
   127  	 * 6. ANY/ANY/ANY   == allow-all as L3 is zeroed in this lookup,
   128  	 *                     defer this until L3 match has been ruled out below.
   129  	 *
   130  	 * Untracked fragments always have zero ports in the tuple so they can
   131  	 * only match entries that have fully wildcarded ports.
   132  	 */
   133  	l4policy = map_lookup_elem(map, &key);
   134  
   135  	if (likely(l4policy && !l4policy->wildcard_dport)) {
   136  		*match_type = POLICY_MATCH_L4_ONLY;		/* 2. ANY/proto/port */
   137  		goto check_l4_policy;
   138  	}
   139  
   140  	if (likely(policy && !policy->wildcard_protocol)) {
   141  		*match_type = POLICY_MATCH_L3_PROTO;		/* 3. id/proto/ANY */
   142  		goto check_policy;
   143  	}
   144  
   145  	if (likely(l4policy && !l4policy->wildcard_protocol)) {
   146  		*match_type = POLICY_MATCH_PROTO_ONLY;		/* 4. ANY/proto/ANY */
   147  		goto check_l4_policy;
   148  	}
   149  
   150  	if (likely(policy)) {
   151  		*match_type = POLICY_MATCH_L3_ONLY;		/* 5. id/ANY/ANY */
   152  		goto check_policy;
   153  	}
   154  
   155  	if (likely(l4policy)) {
   156  		*match_type = POLICY_MATCH_ALL;			/* 6. ANY/ANY/ANY */
   157  		goto check_l4_policy;
   158  	}
   159  
   160  	/* TODO: Consider skipping policy lookup in this case? */
   161  	if (ctx_load_meta(ctx, CB_POLICY)) {
   162  		*proxy_port = 0;
   163  		return CTX_ACT_OK;
   164  	}
   165  
   166  	if (is_untracked_fragment)
   167  		return DROP_FRAG_NOSUPPORT;
   168  
   169  	return DROP_POLICY;
   170  
   171  check_policy:
   172  	return __account_and_check(ctx, policy, ext_err, proxy_port);
   173  
   174  check_l4_policy:
   175  	return __account_and_check(ctx, l4policy, ext_err, proxy_port);
   176  }
   177  
   178  /**
   179   * Determine whether the policy allows this traffic on ingress.
   180   * @arg ctx		Packet to allow or deny
   181   * @arg map		Policy map
   182   * @arg src_id		Source security identity for this packet
   183   * @arg dst_id		Destination security identity for this packet
   184   * @arg ethertype	Ethertype of this packet
   185   * @arg dport		Destination port of this packet
   186   * @arg proto		L3 Protocol of this packet
   187   * @arg l4_off		Offset to L4 header of this packet
   188   * @arg is_untracked_fragment	True if packet is a TCP/UDP datagram fragment
   189   *				AND IPv4 fragment tracking is disabled
   190   * @arg match_type		Pointer to store layers used for policy match
   191   * @arg ext_err		Pointer to store extended error information if this packet isn't allowed
   192   *
   193   * Returns:
   194   *   - Positive integer indicating the proxy_port to handle this traffic
   195   *   - CTX_ACT_OK if the policy allows this traffic based only on labels/L3/L4
   196   *   - Negative error code if the packet should be dropped
   197   */
   198  static __always_inline int
   199  policy_can_ingress(struct __ctx_buff *ctx, const void *map, __u32 src_id, __u32 dst_id,
   200  		   __u16 ethertype, __u16 dport, __u8 proto, int l4_off,
   201  		   bool is_untracked_fragment, __u8 *match_type, __u8 *audited,
   202  		   __s8 *ext_err, __u16 *proxy_port)
   203  {
   204  	int ret;
   205  
   206  	ret = __policy_can_access(map, ctx, dst_id, src_id, ethertype, dport,
   207  				  proto, l4_off, CT_INGRESS, is_untracked_fragment,
   208  				  match_type, ext_err, proxy_port);
   209  	if (ret >= CTX_ACT_OK)
   210  		return ret;
   211  
   212  	cilium_dbg(ctx, DBG_POLICY_DENIED, src_id, dst_id);
   213  
   214  	*audited = 0;
   215  #ifdef POLICY_AUDIT_MODE
   216  	if (IS_ERR(ret)) {
   217  		ret = CTX_ACT_OK;
   218  		*audited = 1;
   219  	}
   220  #endif
   221  
   222  	return ret;
   223  }
   224  
   225  static __always_inline int policy_can_ingress6(struct __ctx_buff *ctx, const void *map,
   226  					       const struct ipv6_ct_tuple *tuple,
   227  					       int l4_off,  __u32 src_id, __u32 dst_id,
   228  					       __u8 *match_type, __u8 *audited,
   229  					       __s8 *ext_err, __u16 *proxy_port)
   230  {
   231  	return policy_can_ingress(ctx, map, src_id, dst_id, ETH_P_IPV6, tuple->dport,
   232  				 tuple->nexthdr, l4_off, false, match_type, audited,
   233  				 ext_err, proxy_port);
   234  }
   235  
   236  static __always_inline int policy_can_ingress4(struct __ctx_buff *ctx,
   237  		const void *map,
   238  					       const struct ipv4_ct_tuple *tuple,
   239  					       int l4_off, bool is_untracked_fragment,
   240  					       __u32 src_id, __u32 dst_id,
   241  					       __u8 *match_type, __u8 *audited,
   242  					       __s8 *ext_err, __u16 *proxy_port)
   243  {
   244  	return policy_can_ingress(ctx, map, src_id, dst_id, ETH_P_IP, tuple->dport,
   245  				 tuple->nexthdr, l4_off, is_untracked_fragment,
   246  				 match_type, audited, ext_err, proxy_port);
   247  }
   248  
   249  #ifdef HAVE_ENCAP
   250  static __always_inline bool is_encap(__u16 dport, __u8 proto)
   251  {
   252  	return proto == IPPROTO_UDP && dport == bpf_htons(TUNNEL_PORT);
   253  }
   254  #endif
   255  
   256  static __always_inline int
   257  policy_can_egress(struct __ctx_buff *ctx, const void *map, __u32 src_id, __u32 dst_id,
   258  		  __u16 ethertype, __u16 dport, __u8 proto, int l4_off, __u8 *match_type,
   259  		  __u8 *audited, __s8 *ext_err, __u16 *proxy_port)
   260  {
   261  	int ret;
   262  
   263  #ifdef HAVE_ENCAP
   264  	if (src_id != HOST_ID && is_encap(dport, proto))
   265  		return DROP_ENCAP_PROHIBITED;
   266  #endif
   267  	ret = __policy_can_access(map, ctx, src_id, dst_id, ethertype, dport,
   268  				  proto, l4_off, CT_EGRESS, false, match_type,
   269  				  ext_err, proxy_port);
   270  	if (ret >= 0)
   271  		return ret;
   272  	cilium_dbg(ctx, DBG_POLICY_DENIED, src_id, dst_id);
   273  	*audited = 0;
   274  #ifdef POLICY_AUDIT_MODE
   275  	if (IS_ERR(ret)) {
   276  		ret = CTX_ACT_OK;
   277  		*audited = 1;
   278  	}
   279  #endif
   280  	return ret;
   281  }
   282  
   283  static __always_inline int policy_can_egress6(struct __ctx_buff *ctx, const void *map,
   284  					      const struct ipv6_ct_tuple *tuple,
   285  					      int l4_off, __u32 src_id, __u32 dst_id,
   286  					      __u8 *match_type, __u8 *audited, __s8 *ext_err,
   287  					      __u16 *proxy_port)
   288  {
   289  	return policy_can_egress(ctx, map, src_id, dst_id, ETH_P_IPV6, tuple->dport,
   290  				 tuple->nexthdr, l4_off, match_type, audited,
   291  				 ext_err, proxy_port);
   292  }
   293  
   294  static __always_inline int policy_can_egress4(struct __ctx_buff *ctx, const void *map,
   295  					      const struct ipv4_ct_tuple *tuple,
   296  					      int l4_off, __u32 src_id, __u32 dst_id,
   297  					      __u8 *match_type, __u8 *audited, __s8 *ext_err,
   298  					      __u16 *proxy_port)
   299  {
   300  	return policy_can_egress(ctx, map, src_id, dst_id, ETH_P_IP, tuple->dport,
   301  				 tuple->nexthdr, l4_off, match_type, audited,
   302  				 ext_err, proxy_port);
   303  }
   304  
   305  /**
   306   * Mark ctx to skip policy enforcement
   307   * @arg ctx	packet
   308   *
   309   * Will cause the packet to ignore the policy enforcement verdict for allow rules and
   310   * be considered accepted despite of the policy outcome. Has no effect on deny rules.
   311   */
   312  static __always_inline void policy_mark_skip(struct __ctx_buff *ctx)
   313  {
   314  	ctx_store_meta(ctx, CB_POLICY, 1);
   315  }
   316  
   317  static __always_inline void policy_clear_mark(struct __ctx_buff *ctx)
   318  {
   319  	ctx_store_meta(ctx, CB_POLICY, 0);
   320  }