github.com/cilium/cilium@v1.16.2/bpf/lib/overloadable_skb.h (about)

     1  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
     2  /* Copyright Authors of Cilium */
     3  
     4  #pragma once
     5  
     6  #include "lib/common.h"
     7  #include "linux/ip.h"
     8  #include "lib/clustermesh.h"
     9  
    10  
    11  static __always_inline __maybe_unused void
    12  bpf_clear_meta(struct __sk_buff *ctx)
    13  {
    14  	__u32 zero = 0;
    15  
    16  	WRITE_ONCE(ctx->cb[0], zero);
    17  	WRITE_ONCE(ctx->cb[1], zero);
    18  	WRITE_ONCE(ctx->cb[2], zero);
    19  	WRITE_ONCE(ctx->cb[3], zero);
    20  	WRITE_ONCE(ctx->cb[4], zero);
    21  
    22  	/* This needs to be cleared mainly for tcx. */
    23  	WRITE_ONCE(ctx->tc_classid, zero);
    24  }
    25  
    26  /**
    27   * get_identity - returns source identity from the mark field
    28   *
    29   * Identity stored in the mark is rearranged to place identity in the most
    30   * significant bits and cluster_id in the least significant bits, separated by 8
    31   * bits that are used for other options. When retrieving identity from the mark,
    32   * we need to rearrange it back to the original format.
    33   *
    34   * Example mark containing identity, where I is a bit for identity, C is a bit
    35   * for cluster_id, and X is a bit that should not be touched by this function:
    36   * IIIIIIII IIIIIIII XXXXXXXX CCCCCCCC
    37   *
    38   * This function should return an identity that looks like the following:
    39   * CCCCCCCC IIIIIIII IIIIIIII
    40   *
    41   * The agent flag 'max-connected-clusters' can effect the allocation of bits
    42   * for identity and cluster_id in the mark (see comment in set_identity_mark).
    43   */
    44  static __always_inline __maybe_unused int
    45  get_identity(const struct __sk_buff *ctx)
    46  {
    47  	__u32 cluster_id_lower = ctx->mark & CLUSTER_ID_LOWER_MASK;
    48  	__u32 cluster_id_upper = (ctx->mark & get_cluster_id_upper_mask()) >> (8 + IDENTITY_LEN);
    49  	__u32 identity = (ctx->mark >> 16) & IDENTITY_MAX;
    50  
    51  	return (cluster_id_lower | cluster_id_upper) << IDENTITY_LEN | identity;
    52  }
    53  
    54  /**
    55   * get_epid - returns source endpoint identity from the mark field
    56   */
    57  static __always_inline __maybe_unused __u32
    58  get_epid(const struct __sk_buff *ctx)
    59  {
    60  	return ctx->mark >> 16;
    61  }
    62  
    63  /**
    64   * set_identity_mark - pushes 24 bit identity into ctx mark value.
    65   *
    66   * Identity in the mark looks like the following, where I is a bit for
    67   * identity, C is a bit for cluster_id, and X is a bit that should not be
    68   * touched by this function:
    69   * IIIIIIII IIIIIIII XXXXXXXX CCCCCCCC
    70   *
    71   * With the agent flag 'max-connected-clusters', it is possible to extend the
    72   * cluster_id range by sacrificing some bits of the identity. When this is set
    73   * to a value other than the default 255, the most significant bits are taken
    74   * from identity and used for the most significant bits of cluster_id.
    75   *
    76   * An agent with 'max-connected-clusters=512' would set identity in the mark
    77   * like the following:
    78   * CIIIIIII IIIIIIII XXXXXXXX CCCCCCCC
    79   */
    80  static __always_inline __maybe_unused void
    81  set_identity_mark(struct __sk_buff *ctx, __u32 identity, __u32 magic)
    82  {
    83  	__u32 cluster_id = (identity >> IDENTITY_LEN) & CLUSTER_ID_MAX;
    84  	__u32 cluster_id_lower = cluster_id & 0xFF;
    85  	__u32 cluster_id_upper = ((cluster_id & 0xFFFFFF00) << (8 + IDENTITY_LEN));
    86  
    87  	ctx->mark |= magic;
    88  	ctx->mark &= MARK_MAGIC_KEY_MASK;
    89  	ctx->mark |= (identity & IDENTITY_MAX) << 16 | cluster_id_lower | cluster_id_upper;
    90  }
    91  
    92  static __always_inline __maybe_unused void
    93  set_identity_meta(struct __sk_buff *ctx, __u32 identity)
    94  {
    95  	ctx->cb[CB_ENCRYPT_IDENTITY] = identity;
    96  }
    97  
    98  /**
    99   * set_encrypt_key - pushes 8 bit key, 16 bit node ID, and encryption marker into ctx mark value.
   100   */
   101  static __always_inline __maybe_unused void
   102  set_encrypt_key_mark(struct __sk_buff *ctx, __u8 key, __u32 node_id)
   103  {
   104  	ctx->mark = or_encrypt_key(key) | node_id << 16;
   105  }
   106  
   107  static __always_inline __maybe_unused void
   108  set_encrypt_key_meta(struct __sk_buff *ctx, __u8 key, __u32 node_id)
   109  {
   110  	ctx->cb[CB_ENCRYPT_MAGIC] = or_encrypt_key(key) | node_id << 16;
   111  }
   112  
   113  /**
   114   * set_cluster_id_mark - sets the cluster_id mark.
   115   */
   116  static __always_inline __maybe_unused void
   117  ctx_set_cluster_id_mark(struct __sk_buff *ctx, __u32 cluster_id)
   118  {
   119  	__u32 cluster_id_lower = (cluster_id & 0xFF);
   120  	__u32 cluster_id_upper = ((cluster_id & 0xFFFFFF00) << (8 + IDENTITY_LEN));
   121  
   122  	ctx->mark |=  cluster_id_lower | cluster_id_upper | MARK_MAGIC_CLUSTER_ID;
   123  }
   124  
   125  static __always_inline __maybe_unused __u32
   126  ctx_get_cluster_id_mark(struct __sk_buff *ctx)
   127  {
   128  	__u32 ret = 0;
   129  	__u32 cluster_id_lower = ctx->mark & CLUSTER_ID_LOWER_MASK;
   130  	__u32 cluster_id_upper = (ctx->mark & get_cluster_id_upper_mask()) >> (8 + IDENTITY_LEN);
   131  
   132  	if ((ctx->mark & MARK_MAGIC_CLUSTER_ID) != MARK_MAGIC_CLUSTER_ID)
   133  		return ret;
   134  
   135  	ret = (cluster_id_upper | cluster_id_lower) & CLUSTER_ID_MAX;
   136  	ctx->mark &= ~(__u32)(MARK_MAGIC_CLUSTER_ID | get_mark_magic_cluster_id_mask());
   137  
   138  	return ret;
   139  }
   140  
   141  static __always_inline __maybe_unused int
   142  redirect_self(const struct __sk_buff *ctx)
   143  {
   144  	/* Looping back the packet into the originating netns. We xmit into the
   145  	 * hosts' veth device such that we end up on ingress in the peer.
   146  	 */
   147  	return ctx_redirect(ctx, ctx->ifindex, 0);
   148  }
   149  
   150  static __always_inline __maybe_unused bool
   151  neigh_resolver_available(void)
   152  {
   153  	return is_defined(HAVE_FIB_NEIGH);
   154  }
   155  
   156  static __always_inline __maybe_unused void
   157  ctx_skip_nodeport_clear(struct __sk_buff *ctx __maybe_unused)
   158  {
   159  #ifdef ENABLE_NODEPORT
   160  	ctx->tc_index &= ~TC_INDEX_F_SKIP_NODEPORT;
   161  #endif
   162  }
   163  
   164  static __always_inline __maybe_unused void
   165  ctx_skip_nodeport_set(struct __sk_buff *ctx __maybe_unused)
   166  {
   167  #ifdef ENABLE_NODEPORT
   168  	ctx->tc_index |= TC_INDEX_F_SKIP_NODEPORT;
   169  #endif
   170  }
   171  
   172  static __always_inline __maybe_unused bool
   173  ctx_skip_nodeport(struct __sk_buff *ctx __maybe_unused)
   174  {
   175  #ifdef ENABLE_NODEPORT
   176  	volatile __u32 tc_index = ctx->tc_index;
   177  	ctx->tc_index &= ~TC_INDEX_F_SKIP_NODEPORT;
   178  	return tc_index & TC_INDEX_F_SKIP_NODEPORT;
   179  #else
   180  	return true;
   181  #endif
   182  }
   183  
   184  #ifdef ENABLE_HOST_FIREWALL
   185  static __always_inline void
   186  ctx_skip_host_fw_set(struct __sk_buff *ctx)
   187  {
   188  	ctx->tc_index |= TC_INDEX_F_SKIP_HOST_FIREWALL;
   189  }
   190  
   191  static __always_inline bool
   192  ctx_skip_host_fw(struct __sk_buff *ctx)
   193  {
   194  	volatile __u32 tc_index = ctx->tc_index;
   195  
   196  	ctx->tc_index &= ~TC_INDEX_F_SKIP_HOST_FIREWALL;
   197  	return tc_index & TC_INDEX_F_SKIP_HOST_FIREWALL;
   198  }
   199  #endif /* ENABLE_HOST_FIREWALL */
   200  
   201  static __always_inline __maybe_unused __u32 ctx_get_xfer(struct __sk_buff *ctx,
   202  							 __u32 off)
   203  {
   204  	__u32 *data_meta = ctx_data_meta(ctx);
   205  	void *data = ctx_data(ctx);
   206  
   207  	return !ctx_no_room(data_meta + off + 1, data) ? data_meta[off] : 0;
   208  }
   209  
   210  static __always_inline __maybe_unused void
   211  ctx_set_xfer(struct __sk_buff *ctx __maybe_unused, __u32 meta __maybe_unused)
   212  {
   213  	/* Only possible from XDP -> SKB. */
   214  }
   215  
   216  static __always_inline __maybe_unused void
   217  ctx_move_xfer(struct __sk_buff *ctx __maybe_unused)
   218  {
   219  	/* Only possible from XDP -> SKB. */
   220  }
   221  
   222  static __always_inline __maybe_unused int
   223  ctx_change_head(struct __sk_buff *ctx, __u32 head_room, __u64 flags)
   224  {
   225  	return skb_change_head(ctx, head_room, flags);
   226  }
   227  
   228  static __always_inline void ctx_snat_done_set(struct __sk_buff *ctx)
   229  {
   230  	ctx->mark &= ~MARK_MAGIC_HOST_MASK;
   231  	ctx->mark |= MARK_MAGIC_SNAT_DONE;
   232  }
   233  
   234  static __always_inline bool ctx_snat_done(const struct __sk_buff *ctx)
   235  {
   236  	return (ctx->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_SNAT_DONE;
   237  }
   238  
   239  static __always_inline bool ctx_is_overlay(const struct __sk_buff *ctx)
   240  {
   241  	if (!is_defined(HAVE_ENCAP))
   242  		return false;
   243  
   244  	return (ctx->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_OVERLAY;
   245  }
   246  
   247  #ifdef ENABLE_EGRESS_GATEWAY_COMMON
   248  static __always_inline void ctx_egw_done_set(struct __sk_buff *ctx)
   249  {
   250  	ctx->mark &= ~MARK_MAGIC_HOST_MASK;
   251  	ctx->mark |= MARK_MAGIC_EGW_DONE;
   252  }
   253  
   254  static __always_inline bool ctx_egw_done(const struct __sk_buff *ctx)
   255  {
   256  	return (ctx->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_EGW_DONE;
   257  }
   258  #endif /* ENABLE_EGRESS_GATEWAY_COMMON */
   259  
   260  #ifdef HAVE_ENCAP
   261  static __always_inline __maybe_unused int
   262  ctx_set_encap_info(struct __sk_buff *ctx, __u32 src_ip,
   263  		   __be16 src_port __maybe_unused, __u32 node_id,
   264  		   __u32 seclabel, __u32 vni __maybe_unused,
   265  		   void *opt, __u32 opt_len)
   266  {
   267  	struct bpf_tunnel_key key = {};
   268  	__u32 key_size = TUNNEL_KEY_WITHOUT_SRC_IP;
   269  	int ret;
   270  
   271  #ifdef ENABLE_VTEP
   272  	if (vni != NOT_VTEP_DST)
   273  		key.tunnel_id = get_tunnel_id(vni);
   274  	else
   275  #endif /* ENABLE_VTEP */
   276  		key.tunnel_id = get_tunnel_id(seclabel);
   277  
   278  	if (src_ip != 0) {
   279  		key.local_ipv4 = bpf_ntohl(src_ip);
   280  		key_size = sizeof(key);
   281  	}
   282  	key.remote_ipv4 = node_id;
   283  	key.tunnel_ttl = IPDEFTTL;
   284  
   285  	ret = ctx_set_tunnel_key(ctx, &key, key_size, BPF_F_ZERO_CSUM_TX);
   286  	if (unlikely(ret < 0))
   287  		return DROP_WRITE_ERROR;
   288  
   289  	if (opt && opt_len > 0) {
   290  		ret = ctx_set_tunnel_opt(ctx, opt, opt_len);
   291  		if (unlikely(ret < 0))
   292  			return DROP_WRITE_ERROR;
   293  	}
   294  
   295  	return CTX_ACT_REDIRECT;
   296  }
   297  #endif /* HAVE_ENCAP */