github.com/cilium/cilium@v1.16.2/bpf/lib/host_firewall.h (about)

     1  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
     2  /* Copyright Authors of Cilium */
     3  
     4  #pragma once
     5  
     6  /* Only compile in if host firewall is enabled and file is included from
     7   * bpf_host.
     8   */
     9  #if defined(ENABLE_HOST_FIREWALL) && defined(IS_BPF_HOST)
    10  
    11  #include "auth.h"
    12  #include "policy.h"
    13  #include "policy_log.h"
    14  #include "trace.h"
    15  
    16  # ifdef ENABLE_IPV6
    17  #  ifndef ENABLE_MASQUERADE_IPV6
    18  static __always_inline int
    19  ipv6_whitelist_snated_egress_connections(struct __ctx_buff *ctx, struct ipv6_ct_tuple *tuple,
    20  					 enum ct_status ct_ret, __s8 *ext_err)
    21  {
    22  	/* If kube-proxy is in use (no BPF-based masquerading), packets from
    23  	 * pods may be SNATed. The response packet will therefore have a host
    24  	 * IP as the destination IP.
    25  	 * To avoid enforcing host policies for response packets to pods, we
    26  	 * need to create a CT entry for the forward, SNATed packet from the
    27  	 * pod. Response packets will thus match this CT entry and bypass host
    28  	 * policies.
    29  	 * We know the packet is a SNATed packet if the srcid from ipcache is
    30  	 * HOST_ID, but the actual srcid (derived from the packet mark) isn't.
    31  	 */
    32  	if (ct_ret == CT_NEW) {
    33  		int ret = ct_create6(get_ct_map6(tuple), &CT_MAP_ANY6,
    34  				     tuple, ctx, CT_EGRESS, NULL, ext_err);
    35  		if (unlikely(ret < 0))
    36  			return ret;
    37  	}
    38  
    39  	return CTX_ACT_OK;
    40  }
    41  #  endif /* ENABLE_MASQUERADE_IPV6 */
    42  
    43  static __always_inline bool
    44  ipv6_host_policy_egress_lookup(struct __ctx_buff *ctx, __u32 src_sec_identity,
    45  			       __u32 ipcache_srcid, struct ipv6hdr *ip6,
    46  			       struct ct_buffer6 *ct_buffer)
    47  {
    48  	struct ipv6_ct_tuple *tuple = &ct_buffer->tuple;
    49  	int l3_off = ETH_HLEN, hdrlen;
    50  
    51  	/* Further action is needed in two cases:
    52  	 * 1. Packets from host IPs: need to enforce host policies.
    53  	 * 2. SNATed packets from pods: need to create a CT entry to skip
    54  	 *    applying host policies to reply packets
    55  	 *    (see ipv6_whitelist_snated_egress_connections).
    56  	 */
    57  	if (src_sec_identity != HOST_ID &&
    58  	    (is_defined(ENABLE_MASQUERADE_IPV6) || ipcache_srcid != HOST_ID))
    59  		return false;
    60  
    61  	/* Lookup connection in conntrack map. */
    62  	tuple->nexthdr = ip6->nexthdr;
    63  	ipv6_addr_copy(&tuple->saddr, (union v6addr *)&ip6->saddr);
    64  	ipv6_addr_copy(&tuple->daddr, (union v6addr *)&ip6->daddr);
    65  	hdrlen = ipv6_hdrlen(ctx, &tuple->nexthdr);
    66  	if (hdrlen < 0) {
    67  		ct_buffer->ret = hdrlen;
    68  		return true;
    69  	}
    70  	ct_buffer->l4_off = l3_off + hdrlen;
    71  	ct_buffer->ret = ct_lookup6(get_ct_map6(tuple), tuple, ctx, ct_buffer->l4_off,
    72  				    CT_EGRESS, NULL, &ct_buffer->monitor);
    73  	return true;
    74  }
    75  
    76  static __always_inline int
    77  __ipv6_host_policy_egress(struct __ctx_buff *ctx, bool is_host_id __maybe_unused,
    78  			  struct ipv6hdr *ip6, struct ct_buffer6 *ct_buffer,
    79  			  struct trace_ctx *trace, __s8 *ext_err)
    80  {
    81  	struct ipv6_ct_tuple *tuple = &ct_buffer->tuple;
    82  	__u32 tunnel_endpoint = 0;
    83  	int ret = ct_buffer->ret;
    84  	int verdict;
    85  	__u8 policy_match_type = POLICY_MATCH_NONE;
    86  	__u8 audited = 0;
    87  	__u8 auth_type = 0;
    88  	struct remote_endpoint_info *info;
    89  	__u32 dst_sec_identity = 0;
    90  	__u16 proxy_port = 0;
    91  
    92  	trace->monitor = ct_buffer->monitor;
    93  	trace->reason = (enum trace_reason)ret;
    94  
    95  #  ifndef ENABLE_MASQUERADE_IPV6
    96  	if (!is_host_id)
    97  		/* Checked in ipv6_host_policy_egress_lookup: src_id == HOST_ID. */
    98  		return ipv6_whitelist_snated_egress_connections(ctx, tuple, (enum ct_status)ret,
    99  							   ext_err);
   100  #  endif /* ENABLE_MASQUERADE_IPV6 */
   101  
   102  	/* Retrieve destination identity. */
   103  	info = lookup_ip6_remote_endpoint((union v6addr *)&ip6->daddr, 0);
   104  	if (info && info->sec_identity) {
   105  		dst_sec_identity = info->sec_identity;
   106  		tunnel_endpoint = info->tunnel_endpoint;
   107  	}
   108  	cilium_dbg(ctx, info ? DBG_IP_ID_MAP_SUCCEED6 : DBG_IP_ID_MAP_FAILED6,
   109  		   ip6->daddr.s6_addr32[3], dst_sec_identity);
   110  
   111  	/* Reply traffic and related are allowed regardless of policy verdict. */
   112  	if (ret == CT_REPLY || ret == CT_RELATED)
   113  		return CTX_ACT_OK;
   114  
   115  	/* Perform policy lookup. */
   116  	verdict = policy_can_egress6(ctx, &POLICY_MAP, tuple, ct_buffer->l4_off, HOST_ID,
   117  				     dst_sec_identity, &policy_match_type, &audited, ext_err,
   118  				     &proxy_port);
   119  	if (verdict == DROP_POLICY_AUTH_REQUIRED) {
   120  		auth_type = (__u8)*ext_err;
   121  		verdict = auth_lookup(ctx, HOST_ID, dst_sec_identity, tunnel_endpoint, auth_type);
   122  	}
   123  
   124  	/* Only create CT entry for accepted connections */
   125  	if (ret == CT_NEW && verdict == CTX_ACT_OK) {
   126  		struct ct_state ct_state_new = {};
   127  
   128  		ct_state_new.src_sec_id = HOST_ID;
   129  		ct_state_new.proxy_redirect = proxy_port > 0;
   130  
   131  		/* ext_err may contain a value from __policy_can_access, and
   132  		 * ct_create6 overwrites it only if it returns an error itself.
   133  		 * As the error from __policy_can_access is dropped in that
   134  		 * case, it's OK to return ext_err from ct_create6 along with
   135  		 * its error code.
   136  		 */
   137  		ret = ct_create6(get_ct_map6(tuple), &CT_MAP_ANY6, tuple,
   138  				 ctx, CT_EGRESS, &ct_state_new, ext_err);
   139  		if (IS_ERR(ret))
   140  			return ret;
   141  	}
   142  
   143  	/* Emit verdict if drop or if allow for CT_NEW. */
   144  	if (verdict != CTX_ACT_OK || ret != CT_ESTABLISHED)
   145  		send_policy_verdict_notify(ctx, dst_sec_identity, tuple->dport,
   146  					   tuple->nexthdr, POLICY_EGRESS, 1,
   147  					   verdict, proxy_port, policy_match_type, audited,
   148  					   auth_type);
   149  	return verdict;
   150  }
   151  
   152  static __always_inline int
   153  ipv6_host_policy_egress(struct __ctx_buff *ctx, __u32 src_id,
   154  			__u32 ipcache_srcid, struct ipv6hdr *ip6,
   155  			struct trace_ctx *trace, __s8 *ext_err)
   156  {
   157  	struct ct_buffer6 ct_buffer = {};
   158  
   159  	if (!ipv6_host_policy_egress_lookup(ctx, src_id, ipcache_srcid, ip6, &ct_buffer))
   160  		return CTX_ACT_OK;
   161  	if (ct_buffer.ret < 0)
   162  		return ct_buffer.ret;
   163  
   164  	return __ipv6_host_policy_egress(ctx, src_id == HOST_ID,
   165  					ip6, &ct_buffer, trace, ext_err);
   166  }
   167  
   168  static __always_inline bool
   169  ipv6_host_policy_ingress_lookup(struct __ctx_buff *ctx, struct ipv6hdr *ip6,
   170  				struct ct_buffer6 *ct_buffer)
   171  {
   172  	__u32 dst_sec_identity = WORLD_IPV6_ID;
   173  	struct remote_endpoint_info *info;
   174  	struct ipv6_ct_tuple *tuple = &ct_buffer->tuple;
   175  	int hdrlen;
   176  
   177  	/* Retrieve destination identity. */
   178  	ipv6_addr_copy(&tuple->daddr, (union v6addr *)&ip6->daddr);
   179  	info = lookup_ip6_remote_endpoint(&tuple->daddr, 0);
   180  	if (info && info->sec_identity)
   181  		dst_sec_identity = info->sec_identity;
   182  	cilium_dbg(ctx, info ? DBG_IP_ID_MAP_SUCCEED6 : DBG_IP_ID_MAP_FAILED6,
   183  		   tuple->daddr.p4, dst_sec_identity);
   184  
   185  	/* Only enforce host policies for packets to host IPs. */
   186  	if (dst_sec_identity != HOST_ID)
   187  		return false;
   188  
   189  	/* Lookup connection in conntrack map. */
   190  	tuple->nexthdr = ip6->nexthdr;
   191  	ipv6_addr_copy(&tuple->saddr, (union v6addr *)&ip6->saddr);
   192  	hdrlen = ipv6_hdrlen(ctx, &tuple->nexthdr);
   193  	if (hdrlen < 0) {
   194  		ct_buffer->ret = hdrlen;
   195  		return true;
   196  	}
   197  	ct_buffer->l4_off = ETH_HLEN + hdrlen;
   198  	ct_buffer->ret = ct_lookup6(get_ct_map6(tuple), tuple, ctx, ct_buffer->l4_off,
   199  				    CT_INGRESS, NULL, &ct_buffer->monitor);
   200  
   201  	return true;
   202  }
   203  
   204  static __always_inline int
   205  __ipv6_host_policy_ingress(struct __ctx_buff *ctx, struct ipv6hdr *ip6,
   206  			   struct ct_buffer6 *ct_buffer, __u32 *src_sec_identity,
   207  			   struct trace_ctx *trace, __s8 *ext_err)
   208  {
   209  	struct ipv6_ct_tuple *tuple = &ct_buffer->tuple;
   210  	__u32 tunnel_endpoint = 0;
   211  	int ret = ct_buffer->ret;
   212  	int verdict = CTX_ACT_OK;
   213  	__u8 policy_match_type = POLICY_MATCH_NONE;
   214  	__u8 audited = 0;
   215  	__u8 auth_type = 0;
   216  	struct remote_endpoint_info *info;
   217  	__u16 proxy_port = 0;
   218  
   219  	trace->monitor = ct_buffer->monitor;
   220  	trace->reason = (enum trace_reason)ret;
   221  
   222  	/* Retrieve source identity. */
   223  	info = lookup_ip6_remote_endpoint((union v6addr *)&ip6->saddr, 0);
   224  	if (info && info->sec_identity) {
   225  		*src_sec_identity = info->sec_identity;
   226  		tunnel_endpoint = info->tunnel_endpoint;
   227  	}
   228  	cilium_dbg(ctx, info ? DBG_IP_ID_MAP_SUCCEED6 : DBG_IP_ID_MAP_FAILED6,
   229  		   ip6->saddr.s6_addr32[3], *src_sec_identity);
   230  
   231  	/* Reply traffic and related are allowed regardless of policy verdict. */
   232  	if (ret == CT_REPLY || ret == CT_RELATED)
   233  		goto out;
   234  
   235  	/* Perform policy lookup */
   236  	verdict = policy_can_ingress6(ctx, &POLICY_MAP, tuple, ct_buffer->l4_off,
   237  				      *src_sec_identity, HOST_ID, &policy_match_type, &audited,
   238  				      ext_err, &proxy_port);
   239  	if (verdict == DROP_POLICY_AUTH_REQUIRED) {
   240  		auth_type = (__u8)*ext_err;
   241  		verdict = auth_lookup(ctx, HOST_ID, *src_sec_identity, tunnel_endpoint, auth_type);
   242  	}
   243  
   244  	/* Only create CT entry for accepted connections */
   245  	if (ret == CT_NEW && verdict == CTX_ACT_OK) {
   246  		struct ct_state ct_state_new = {};
   247  
   248  		/* Create new entry for connection in conntrack map. */
   249  		ct_state_new.src_sec_id = *src_sec_identity;
   250  		ct_state_new.proxy_redirect = proxy_port > 0;
   251  
   252  		/* ext_err may contain a value from __policy_can_access, and
   253  		 * ct_create6 overwrites it only if it returns an error itself.
   254  		 * As the error from __policy_can_access is dropped in that
   255  		 * case, it's OK to return ext_err from ct_create6 along with
   256  		 * its error code.
   257  		 */
   258  		ret = ct_create6(get_ct_map6(tuple), &CT_MAP_ANY6, tuple,
   259  				 ctx, CT_INGRESS, &ct_state_new, ext_err);
   260  		if (IS_ERR(ret))
   261  			return ret;
   262  	}
   263  
   264  	/* Emit verdict if drop or if allow for CT_NEW. */
   265  	if (verdict != CTX_ACT_OK || ret != CT_ESTABLISHED)
   266  		send_policy_verdict_notify(ctx, *src_sec_identity, tuple->dport,
   267  					   tuple->nexthdr, POLICY_INGRESS, 1,
   268  					   verdict, proxy_port, policy_match_type, audited,
   269  					   auth_type);
   270  out:
   271  	/* This change is necessary for packets redirected from the lxc device to
   272  	 * the host device.
   273  	 */
   274  	ctx_change_type(ctx, PACKET_HOST);
   275  	return verdict;
   276  }
   277  
   278  static __always_inline int
   279  ipv6_host_policy_ingress(struct __ctx_buff *ctx, __u32 *src_sec_identity,
   280  			 struct trace_ctx *trace, __s8 *ext_err)
   281  {
   282  	struct ct_buffer6 ct_buffer = {};
   283  	void *data, *data_end;
   284  	struct ipv6hdr *ip6;
   285  
   286  	if (!revalidate_data(ctx, &data, &data_end, &ip6))
   287  		return DROP_INVALID;
   288  
   289  	if (!ipv6_host_policy_ingress_lookup(ctx, ip6, &ct_buffer))
   290  		return CTX_ACT_OK;
   291  	if (ct_buffer.ret < 0)
   292  		return ct_buffer.ret;
   293  
   294  	return __ipv6_host_policy_ingress(ctx, ip6, &ct_buffer, src_sec_identity, trace, ext_err);
   295  }
   296  # endif /* ENABLE_IPV6 */
   297  
   298  # ifdef ENABLE_IPV4
   299  #  ifndef ENABLE_MASQUERADE_IPV4
   300  static __always_inline int
   301  ipv4_whitelist_snated_egress_connections(struct __ctx_buff *ctx, struct ipv4_ct_tuple *tuple,
   302  					 enum ct_status ct_ret, __s8 *ext_err)
   303  {
   304  	/* If kube-proxy is in use (no BPF-based masquerading), packets from
   305  	 * pods may be SNATed. The response packet will therefore have a host
   306  	 * IP as the destination IP.
   307  	 * To avoid enforcing host policies for response packets to pods, we
   308  	 * need to create a CT entry for the forward, SNATed packet from the
   309  	 * pod. Response packets will thus match this CT entry and bypass host
   310  	 * policies.
   311  	 * We know the packet is a SNATed packet if the srcid from ipcache is
   312  	 * HOST_ID, but the actual srcid (derived from the packet mark) isn't.
   313  	 */
   314  	if (ct_ret == CT_NEW) {
   315  		int ret = ct_create4(get_ct_map4(tuple), &CT_MAP_ANY4,
   316  				     tuple, ctx, CT_EGRESS, NULL, ext_err);
   317  		if (unlikely(ret < 0))
   318  			return ret;
   319  	}
   320  
   321  	return CTX_ACT_OK;
   322  }
   323  #  endif /* ENABLE_MASQUERADE_IPV4 */
   324  
   325  static __always_inline bool
   326  ipv4_host_policy_egress_lookup(struct __ctx_buff *ctx, __u32 src_sec_identity,
   327  			       __u32 ipcache_srcid, struct iphdr *ip4,
   328  			       struct ct_buffer4 *ct_buffer)
   329  {
   330  	struct ipv4_ct_tuple *tuple = &ct_buffer->tuple;
   331  	int l3_off = ETH_HLEN;
   332  
   333  	/* Further action is needed in two cases:
   334  	 * 1. Packets from host IPs: need to enforce host policies.
   335  	 * 2. SNATed packets from pods: need to create a CT entry to skip
   336  	 *    applying host policies to reply packets.
   337  	 */
   338  	if (src_sec_identity != HOST_ID &&
   339  	    (is_defined(ENABLE_MASQUERADE_IPV4) || ipcache_srcid != HOST_ID))
   340  		return false;
   341  
   342  	/* Lookup connection in conntrack map. */
   343  	tuple->nexthdr = ip4->protocol;
   344  	tuple->daddr = ip4->daddr;
   345  	tuple->saddr = ip4->saddr;
   346  	ct_buffer->l4_off = l3_off + ipv4_hdrlen(ip4);
   347  	ct_buffer->ret = ct_lookup4(get_ct_map4(tuple), tuple, ctx, ip4, ct_buffer->l4_off,
   348  				    CT_EGRESS, NULL, &ct_buffer->monitor);
   349  	return true;
   350  }
   351  
   352  static __always_inline int
   353  __ipv4_host_policy_egress(struct __ctx_buff *ctx, bool is_host_id __maybe_unused,
   354  			  struct iphdr *ip4, struct ct_buffer4 *ct_buffer,
   355  			  struct trace_ctx *trace, __s8 *ext_err)
   356  {
   357  	struct ipv4_ct_tuple *tuple = &ct_buffer->tuple;
   358  	__u32 tunnel_endpoint = 0;
   359  	int ret = ct_buffer->ret;
   360  	int verdict;
   361  	__u8 policy_match_type = POLICY_MATCH_NONE;
   362  	__u8 audited = 0;
   363  	__u8 auth_type = 0;
   364  	struct remote_endpoint_info *info;
   365  	__u32 dst_sec_identity = 0;
   366  	__u16 proxy_port = 0;
   367  
   368  	trace->monitor = ct_buffer->monitor;
   369  	trace->reason = (enum trace_reason)ret;
   370  
   371  #  ifndef ENABLE_MASQUERADE_IPV4
   372  	if (!is_host_id)
   373  		/* Checked in ipv4_host_policy_egress_lookup: ipcache_srcid == HOST_ID. */
   374  		return ipv4_whitelist_snated_egress_connections(ctx, tuple, (enum ct_status)ret,
   375  							   ext_err);
   376  #  endif /* ENABLE_MASQUERADE_IPV4 */
   377  
   378  	/* Retrieve destination identity. */
   379  	info = lookup_ip4_remote_endpoint(ip4->daddr, 0);
   380  	if (info && info->sec_identity) {
   381  		dst_sec_identity = info->sec_identity;
   382  		tunnel_endpoint = info->tunnel_endpoint;
   383  	}
   384  	cilium_dbg(ctx, info ? DBG_IP_ID_MAP_SUCCEED4 : DBG_IP_ID_MAP_FAILED4,
   385  		   ip4->daddr, dst_sec_identity);
   386  
   387  	/* Reply traffic and related are allowed regardless of policy verdict. */
   388  	if (ret == CT_REPLY || ret == CT_RELATED)
   389  		return CTX_ACT_OK;
   390  
   391  	/* Perform policy lookup. */
   392  	verdict = policy_can_egress4(ctx, &POLICY_MAP, tuple, ct_buffer->l4_off, HOST_ID,
   393  				     dst_sec_identity, &policy_match_type,
   394  				     &audited, ext_err, &proxy_port);
   395  	if (verdict == DROP_POLICY_AUTH_REQUIRED) {
   396  		auth_type = (__u8)*ext_err;
   397  		verdict = auth_lookup(ctx, HOST_ID, dst_sec_identity, tunnel_endpoint, auth_type);
   398  	}
   399  
   400  	/* Only create CT entry for accepted connections */
   401  	if (ret == CT_NEW && verdict == CTX_ACT_OK) {
   402  		struct ct_state ct_state_new = {};
   403  
   404  		ct_state_new.src_sec_id = HOST_ID;
   405  		ct_state_new.proxy_redirect = proxy_port > 0;
   406  
   407  		/* ext_err may contain a value from __eolicy_can_access, and
   408  		 * ct_create4 overwrites it only if it returns an error itself.
   409  		 * As the error from __policy_can_access is dropped in that
   410  		 * case, it's OK to return ext_err from ct_create4 along with
   411  		 * its error code.
   412  		 */
   413  		ret = ct_create4(get_ct_map4(tuple), &CT_MAP_ANY4, tuple,
   414  				 ctx, CT_EGRESS, &ct_state_new, ext_err);
   415  		if (IS_ERR(ret))
   416  			return ret;
   417  	}
   418  
   419  	/* Emit verdict if drop or if allow for CT_NEW. */
   420  	if (verdict != CTX_ACT_OK || ret != CT_ESTABLISHED)
   421  		send_policy_verdict_notify(ctx, dst_sec_identity, tuple->dport,
   422  					   tuple->nexthdr, POLICY_EGRESS, 0,
   423  					   verdict, proxy_port, policy_match_type, audited,
   424  					   auth_type);
   425  	return verdict;
   426  }
   427  
   428  static __always_inline int
   429  ipv4_host_policy_egress(struct __ctx_buff *ctx, __u32 src_id,
   430  			__u32 ipcache_srcid, struct iphdr *ip4,
   431  			struct trace_ctx *trace, __s8 *ext_err)
   432  {
   433  	struct ct_buffer4 ct_buffer = {};
   434  
   435  	if (!ipv4_host_policy_egress_lookup(ctx, src_id, ipcache_srcid, ip4, &ct_buffer))
   436  		return CTX_ACT_OK;
   437  	if (ct_buffer.ret < 0)
   438  		return ct_buffer.ret;
   439  
   440  	return __ipv4_host_policy_egress(ctx, src_id == HOST_ID, ip4, &ct_buffer, trace, ext_err);
   441  }
   442  
   443  static __always_inline bool
   444  ipv4_host_policy_ingress_lookup(struct __ctx_buff *ctx, struct iphdr *ip4,
   445  				struct ct_buffer4 *ct_buffer)
   446  {
   447  	__u32 dst_sec_identity = WORLD_IPV4_ID;
   448  	struct remote_endpoint_info *info;
   449  	struct ipv4_ct_tuple *tuple = &ct_buffer->tuple;
   450  	int l3_off = ETH_HLEN;
   451  
   452  	/* Retrieve destination identity. */
   453  	info = lookup_ip4_remote_endpoint(ip4->daddr, 0);
   454  	if (info && info->sec_identity)
   455  		dst_sec_identity = info->sec_identity;
   456  	cilium_dbg(ctx, info ? DBG_IP_ID_MAP_SUCCEED4 : DBG_IP_ID_MAP_FAILED4,
   457  		   ip4->daddr, dst_sec_identity);
   458  
   459  	/* Only enforce host policies for packets to host IPs. */
   460  	if (dst_sec_identity != HOST_ID)
   461  		return false;
   462  
   463  	/* Lookup connection in conntrack map. */
   464  	tuple->nexthdr = ip4->protocol;
   465  	tuple->daddr = ip4->daddr;
   466  	tuple->saddr = ip4->saddr;
   467  	ct_buffer->l4_off = l3_off + ipv4_hdrlen(ip4);
   468  	ct_buffer->ret = ct_lookup4(get_ct_map4(tuple), tuple, ctx, ip4, ct_buffer->l4_off,
   469  				    CT_INGRESS, NULL, &ct_buffer->monitor);
   470  
   471  	return true;
   472  }
   473  
   474  static __always_inline int
   475  __ipv4_host_policy_ingress(struct __ctx_buff *ctx, struct iphdr *ip4,
   476  			   struct ct_buffer4 *ct_buffer, __u32 *src_sec_identity,
   477  			   struct trace_ctx *trace, __s8 *ext_err)
   478  {
   479  	struct ipv4_ct_tuple *tuple = &ct_buffer->tuple;
   480  	__u32 tunnel_endpoint = 0;
   481  	int ret = ct_buffer->ret;
   482  	int verdict = CTX_ACT_OK;
   483  	__u8 policy_match_type = POLICY_MATCH_NONE;
   484  	__u8 audited = 0;
   485  	__u8 auth_type = 0;
   486  	struct remote_endpoint_info *info;
   487  	bool is_untracked_fragment = false;
   488  	__u16 proxy_port = 0;
   489  
   490  	trace->monitor = ct_buffer->monitor;
   491  	trace->reason = (enum trace_reason)ret;
   492  
   493  	/* Retrieve source identity. */
   494  	info = lookup_ip4_remote_endpoint(ip4->saddr, 0);
   495  	if (info && info->sec_identity) {
   496  		*src_sec_identity = info->sec_identity;
   497  		tunnel_endpoint = info->tunnel_endpoint;
   498  	}
   499  	cilium_dbg(ctx, info ? DBG_IP_ID_MAP_SUCCEED4 : DBG_IP_ID_MAP_FAILED4,
   500  		   ip4->saddr, *src_sec_identity);
   501  
   502  	/* Reply traffic and related are allowed regardless of policy verdict. */
   503  	if (ret == CT_REPLY || ret == CT_RELATED)
   504  		goto out;
   505  
   506  #  ifndef ENABLE_IPV4_FRAGMENTS
   507  	/* Indicate that this is a datagram fragment for which we cannot
   508  	 * retrieve L4 ports. Do not set flag if we support fragmentation.
   509  	 */
   510  	is_untracked_fragment = ipv4_is_fragment(ip4);
   511  #  endif
   512  
   513  	/* Perform policy lookup */
   514  	verdict = policy_can_ingress4(ctx, &POLICY_MAP, tuple, ct_buffer->l4_off,
   515  				      is_untracked_fragment, *src_sec_identity, HOST_ID,
   516  				      &policy_match_type, &audited, ext_err, &proxy_port);
   517  	if (verdict == DROP_POLICY_AUTH_REQUIRED) {
   518  		auth_type = (__u8)*ext_err;
   519  		verdict = auth_lookup(ctx, HOST_ID, *src_sec_identity, tunnel_endpoint, auth_type);
   520  	}
   521  
   522  	/* Only create CT entry for accepted connections */
   523  	if (ret == CT_NEW && verdict == CTX_ACT_OK) {
   524  		struct ct_state ct_state_new = {};
   525  
   526  		/* Create new entry for connection in conntrack map. */
   527  		ct_state_new.src_sec_id = *src_sec_identity;
   528  		ct_state_new.proxy_redirect = proxy_port > 0;
   529  
   530  		/* ext_err may contain a value from __policy_can_access, and
   531  		 * ct_create4 overwrites it only if it returns an error itself.
   532  		 * As the error from __policy_can_access is dropped in that
   533  		 * case, it's OK to return ext_err from ct_create4 along with
   534  		 * its error code.
   535  		 */
   536  		ret = ct_create4(get_ct_map4(tuple), &CT_MAP_ANY4, tuple,
   537  				 ctx, CT_INGRESS, &ct_state_new, ext_err);
   538  		if (IS_ERR(ret))
   539  			return ret;
   540  	}
   541  
   542  	/* Emit verdict if drop or if allow for CT_NEW. */
   543  	if (verdict != CTX_ACT_OK || ret != CT_ESTABLISHED)
   544  		send_policy_verdict_notify(ctx, *src_sec_identity, tuple->dport,
   545  					   tuple->nexthdr, POLICY_INGRESS, 0,
   546  					   verdict, proxy_port, policy_match_type, audited,
   547  					   auth_type);
   548  out:
   549  	/* This change is necessary for packets redirected from the lxc device to
   550  	 * the host device.
   551  	 */
   552  	ctx_change_type(ctx, PACKET_HOST);
   553  	return verdict;
   554  }
   555  
   556  static __always_inline int
   557  ipv4_host_policy_ingress(struct __ctx_buff *ctx, __u32 *src_sec_identity,
   558  			 struct trace_ctx *trace, __s8 *ext_err)
   559  {
   560  	struct ct_buffer4 ct_buffer = {};
   561  	void *data, *data_end;
   562  	struct iphdr *ip4;
   563  
   564  	if (!revalidate_data(ctx, &data, &data_end, &ip4))
   565  		return DROP_INVALID;
   566  
   567  	if (!ipv4_host_policy_ingress_lookup(ctx, ip4, &ct_buffer))
   568  		return CTX_ACT_OK;
   569  	if (ct_buffer.ret < 0)
   570  		return ct_buffer.ret;
   571  
   572  	return __ipv4_host_policy_ingress(ctx, ip4, &ct_buffer, src_sec_identity, trace, ext_err);
   573  }
   574  # endif /* ENABLE_IPV4 */
   575  #endif /* ENABLE_HOST_FIREWALL && IS_BPF_HOST */