github.com/cilium/cilium@v1.16.2/bpf/lib/overloadable_skb.h (about) 1 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 /* Copyright Authors of Cilium */ 3 4 #pragma once 5 6 #include "lib/common.h" 7 #include "linux/ip.h" 8 #include "lib/clustermesh.h" 9 10 11 static __always_inline __maybe_unused void 12 bpf_clear_meta(struct __sk_buff *ctx) 13 { 14 __u32 zero = 0; 15 16 WRITE_ONCE(ctx->cb[0], zero); 17 WRITE_ONCE(ctx->cb[1], zero); 18 WRITE_ONCE(ctx->cb[2], zero); 19 WRITE_ONCE(ctx->cb[3], zero); 20 WRITE_ONCE(ctx->cb[4], zero); 21 22 /* This needs to be cleared mainly for tcx. */ 23 WRITE_ONCE(ctx->tc_classid, zero); 24 } 25 26 /** 27 * get_identity - returns source identity from the mark field 28 * 29 * Identity stored in the mark is rearranged to place identity in the most 30 * significant bits and cluster_id in the least significant bits, separated by 8 31 * bits that are used for other options. When retrieving identity from the mark, 32 * we need to rearrange it back to the original format. 33 * 34 * Example mark containing identity, where I is a bit for identity, C is a bit 35 * for cluster_id, and X is a bit that should not be touched by this function: 36 * IIIIIIII IIIIIIII XXXXXXXX CCCCCCCC 37 * 38 * This function should return an identity that looks like the following: 39 * CCCCCCCC IIIIIIII IIIIIIII 40 * 41 * The agent flag 'max-connected-clusters' can effect the allocation of bits 42 * for identity and cluster_id in the mark (see comment in set_identity_mark). 43 */ 44 static __always_inline __maybe_unused int 45 get_identity(const struct __sk_buff *ctx) 46 { 47 __u32 cluster_id_lower = ctx->mark & CLUSTER_ID_LOWER_MASK; 48 __u32 cluster_id_upper = (ctx->mark & get_cluster_id_upper_mask()) >> (8 + IDENTITY_LEN); 49 __u32 identity = (ctx->mark >> 16) & IDENTITY_MAX; 50 51 return (cluster_id_lower | cluster_id_upper) << IDENTITY_LEN | identity; 52 } 53 54 /** 55 * get_epid - returns source endpoint identity from the mark field 56 */ 57 static __always_inline __maybe_unused __u32 58 get_epid(const struct __sk_buff *ctx) 59 { 60 return ctx->mark >> 16; 61 } 62 63 /** 64 * set_identity_mark - pushes 24 bit identity into ctx mark value. 65 * 66 * Identity in the mark looks like the following, where I is a bit for 67 * identity, C is a bit for cluster_id, and X is a bit that should not be 68 * touched by this function: 69 * IIIIIIII IIIIIIII XXXXXXXX CCCCCCCC 70 * 71 * With the agent flag 'max-connected-clusters', it is possible to extend the 72 * cluster_id range by sacrificing some bits of the identity. When this is set 73 * to a value other than the default 255, the most significant bits are taken 74 * from identity and used for the most significant bits of cluster_id. 75 * 76 * An agent with 'max-connected-clusters=512' would set identity in the mark 77 * like the following: 78 * CIIIIIII IIIIIIII XXXXXXXX CCCCCCCC 79 */ 80 static __always_inline __maybe_unused void 81 set_identity_mark(struct __sk_buff *ctx, __u32 identity, __u32 magic) 82 { 83 __u32 cluster_id = (identity >> IDENTITY_LEN) & CLUSTER_ID_MAX; 84 __u32 cluster_id_lower = cluster_id & 0xFF; 85 __u32 cluster_id_upper = ((cluster_id & 0xFFFFFF00) << (8 + IDENTITY_LEN)); 86 87 ctx->mark |= magic; 88 ctx->mark &= MARK_MAGIC_KEY_MASK; 89 ctx->mark |= (identity & IDENTITY_MAX) << 16 | cluster_id_lower | cluster_id_upper; 90 } 91 92 static __always_inline __maybe_unused void 93 set_identity_meta(struct __sk_buff *ctx, __u32 identity) 94 { 95 ctx->cb[CB_ENCRYPT_IDENTITY] = identity; 96 } 97 98 /** 99 * set_encrypt_key - pushes 8 bit key, 16 bit node ID, and encryption marker into ctx mark value. 100 */ 101 static __always_inline __maybe_unused void 102 set_encrypt_key_mark(struct __sk_buff *ctx, __u8 key, __u32 node_id) 103 { 104 ctx->mark = or_encrypt_key(key) | node_id << 16; 105 } 106 107 static __always_inline __maybe_unused void 108 set_encrypt_key_meta(struct __sk_buff *ctx, __u8 key, __u32 node_id) 109 { 110 ctx->cb[CB_ENCRYPT_MAGIC] = or_encrypt_key(key) | node_id << 16; 111 } 112 113 /** 114 * set_cluster_id_mark - sets the cluster_id mark. 115 */ 116 static __always_inline __maybe_unused void 117 ctx_set_cluster_id_mark(struct __sk_buff *ctx, __u32 cluster_id) 118 { 119 __u32 cluster_id_lower = (cluster_id & 0xFF); 120 __u32 cluster_id_upper = ((cluster_id & 0xFFFFFF00) << (8 + IDENTITY_LEN)); 121 122 ctx->mark |= cluster_id_lower | cluster_id_upper | MARK_MAGIC_CLUSTER_ID; 123 } 124 125 static __always_inline __maybe_unused __u32 126 ctx_get_cluster_id_mark(struct __sk_buff *ctx) 127 { 128 __u32 ret = 0; 129 __u32 cluster_id_lower = ctx->mark & CLUSTER_ID_LOWER_MASK; 130 __u32 cluster_id_upper = (ctx->mark & get_cluster_id_upper_mask()) >> (8 + IDENTITY_LEN); 131 132 if ((ctx->mark & MARK_MAGIC_CLUSTER_ID) != MARK_MAGIC_CLUSTER_ID) 133 return ret; 134 135 ret = (cluster_id_upper | cluster_id_lower) & CLUSTER_ID_MAX; 136 ctx->mark &= ~(__u32)(MARK_MAGIC_CLUSTER_ID | get_mark_magic_cluster_id_mask()); 137 138 return ret; 139 } 140 141 static __always_inline __maybe_unused int 142 redirect_self(const struct __sk_buff *ctx) 143 { 144 /* Looping back the packet into the originating netns. We xmit into the 145 * hosts' veth device such that we end up on ingress in the peer. 146 */ 147 return ctx_redirect(ctx, ctx->ifindex, 0); 148 } 149 150 static __always_inline __maybe_unused bool 151 neigh_resolver_available(void) 152 { 153 return is_defined(HAVE_FIB_NEIGH); 154 } 155 156 static __always_inline __maybe_unused void 157 ctx_skip_nodeport_clear(struct __sk_buff *ctx __maybe_unused) 158 { 159 #ifdef ENABLE_NODEPORT 160 ctx->tc_index &= ~TC_INDEX_F_SKIP_NODEPORT; 161 #endif 162 } 163 164 static __always_inline __maybe_unused void 165 ctx_skip_nodeport_set(struct __sk_buff *ctx __maybe_unused) 166 { 167 #ifdef ENABLE_NODEPORT 168 ctx->tc_index |= TC_INDEX_F_SKIP_NODEPORT; 169 #endif 170 } 171 172 static __always_inline __maybe_unused bool 173 ctx_skip_nodeport(struct __sk_buff *ctx __maybe_unused) 174 { 175 #ifdef ENABLE_NODEPORT 176 volatile __u32 tc_index = ctx->tc_index; 177 ctx->tc_index &= ~TC_INDEX_F_SKIP_NODEPORT; 178 return tc_index & TC_INDEX_F_SKIP_NODEPORT; 179 #else 180 return true; 181 #endif 182 } 183 184 #ifdef ENABLE_HOST_FIREWALL 185 static __always_inline void 186 ctx_skip_host_fw_set(struct __sk_buff *ctx) 187 { 188 ctx->tc_index |= TC_INDEX_F_SKIP_HOST_FIREWALL; 189 } 190 191 static __always_inline bool 192 ctx_skip_host_fw(struct __sk_buff *ctx) 193 { 194 volatile __u32 tc_index = ctx->tc_index; 195 196 ctx->tc_index &= ~TC_INDEX_F_SKIP_HOST_FIREWALL; 197 return tc_index & TC_INDEX_F_SKIP_HOST_FIREWALL; 198 } 199 #endif /* ENABLE_HOST_FIREWALL */ 200 201 static __always_inline __maybe_unused __u32 ctx_get_xfer(struct __sk_buff *ctx, 202 __u32 off) 203 { 204 __u32 *data_meta = ctx_data_meta(ctx); 205 void *data = ctx_data(ctx); 206 207 return !ctx_no_room(data_meta + off + 1, data) ? data_meta[off] : 0; 208 } 209 210 static __always_inline __maybe_unused void 211 ctx_set_xfer(struct __sk_buff *ctx __maybe_unused, __u32 meta __maybe_unused) 212 { 213 /* Only possible from XDP -> SKB. */ 214 } 215 216 static __always_inline __maybe_unused void 217 ctx_move_xfer(struct __sk_buff *ctx __maybe_unused) 218 { 219 /* Only possible from XDP -> SKB. */ 220 } 221 222 static __always_inline __maybe_unused int 223 ctx_change_head(struct __sk_buff *ctx, __u32 head_room, __u64 flags) 224 { 225 return skb_change_head(ctx, head_room, flags); 226 } 227 228 static __always_inline void ctx_snat_done_set(struct __sk_buff *ctx) 229 { 230 ctx->mark &= ~MARK_MAGIC_HOST_MASK; 231 ctx->mark |= MARK_MAGIC_SNAT_DONE; 232 } 233 234 static __always_inline bool ctx_snat_done(const struct __sk_buff *ctx) 235 { 236 return (ctx->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_SNAT_DONE; 237 } 238 239 static __always_inline bool ctx_is_overlay(const struct __sk_buff *ctx) 240 { 241 if (!is_defined(HAVE_ENCAP)) 242 return false; 243 244 return (ctx->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_OVERLAY; 245 } 246 247 #ifdef ENABLE_EGRESS_GATEWAY_COMMON 248 static __always_inline void ctx_egw_done_set(struct __sk_buff *ctx) 249 { 250 ctx->mark &= ~MARK_MAGIC_HOST_MASK; 251 ctx->mark |= MARK_MAGIC_EGW_DONE; 252 } 253 254 static __always_inline bool ctx_egw_done(const struct __sk_buff *ctx) 255 { 256 return (ctx->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_EGW_DONE; 257 } 258 #endif /* ENABLE_EGRESS_GATEWAY_COMMON */ 259 260 #ifdef HAVE_ENCAP 261 static __always_inline __maybe_unused int 262 ctx_set_encap_info(struct __sk_buff *ctx, __u32 src_ip, 263 __be16 src_port __maybe_unused, __u32 node_id, 264 __u32 seclabel, __u32 vni __maybe_unused, 265 void *opt, __u32 opt_len) 266 { 267 struct bpf_tunnel_key key = {}; 268 __u32 key_size = TUNNEL_KEY_WITHOUT_SRC_IP; 269 int ret; 270 271 #ifdef ENABLE_VTEP 272 if (vni != NOT_VTEP_DST) 273 key.tunnel_id = get_tunnel_id(vni); 274 else 275 #endif /* ENABLE_VTEP */ 276 key.tunnel_id = get_tunnel_id(seclabel); 277 278 if (src_ip != 0) { 279 key.local_ipv4 = bpf_ntohl(src_ip); 280 key_size = sizeof(key); 281 } 282 key.remote_ipv4 = node_id; 283 key.tunnel_ttl = IPDEFTTL; 284 285 ret = ctx_set_tunnel_key(ctx, &key, key_size, BPF_F_ZERO_CSUM_TX); 286 if (unlikely(ret < 0)) 287 return DROP_WRITE_ERROR; 288 289 if (opt && opt_len > 0) { 290 ret = ctx_set_tunnel_opt(ctx, opt, opt_len); 291 if (unlikely(ret < 0)) 292 return DROP_WRITE_ERROR; 293 } 294 295 return CTX_ACT_REDIRECT; 296 } 297 #endif /* HAVE_ENCAP */