github.com/cilium/cilium@v1.16.2/bpf/lib/pcap.h (about) 1 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 /* Copyright Authors of Cilium */ 3 4 #pragma once 5 6 #include <bpf/ctx/ctx.h> 7 #include <bpf/api.h> 8 9 #ifdef ENABLE_CAPTURE 10 #include "common.h" 11 #include "time_cache.h" 12 #include "lb.h" 13 14 struct pcap_timeval { 15 __u32 tv_sec; 16 __u32 tv_usec; 17 }; 18 19 struct pcap_timeoff { 20 __u64 tv_boot; 21 }; 22 23 struct pcap_pkthdr { 24 union { 25 /* User space needs to perform inline conversion from 26 * boot offset to time of day before writing out to 27 * an external file. 28 */ 29 struct pcap_timeval ts; 30 struct pcap_timeoff to; 31 }; 32 __u32 caplen; 33 __u32 len; 34 }; 35 36 struct capture_msg { 37 /* The hash is reserved and always zero for allowing different 38 * header extensions in future. 39 */ 40 NOTIFY_COMMON_HDR 41 /* The pcap hdr must be the last member so that the placement 42 * inside the perf RB is linear: pcap hdr + packet payload. 43 */ 44 struct pcap_pkthdr hdr; 45 }; 46 47 static __always_inline void cilium_capture(struct __ctx_buff *ctx, 48 const __u8 subtype, 49 const __u16 rule_id, 50 const __u64 tstamp, 51 __u64 __cap_len) 52 { 53 __u64 ctx_len = ctx_full_len(ctx); 54 __u64 cap_len = (!__cap_len || ctx_len < __cap_len) ? 55 ctx_len : __cap_len; 56 /* rule_id is the demuxer for the target pcap file when there are 57 * multiple capturing rules present. 58 */ 59 struct capture_msg msg = { 60 .type = CILIUM_NOTIFY_CAPTURE, 61 .subtype = subtype, 62 .source = rule_id, 63 .hdr = { 64 .to = { 65 .tv_boot = tstamp, 66 }, 67 .caplen = cap_len, 68 .len = ctx_len, 69 }, 70 }; 71 72 ctx_event_output(ctx, &EVENTS_MAP, (cap_len << 32) | BPF_F_CURRENT_CPU, 73 &msg, sizeof(msg)); 74 } 75 76 static __always_inline void __cilium_capture_in(struct __ctx_buff *ctx, 77 __u16 rule_id, __u32 cap_len) 78 { 79 /* For later pcap file generation, we export boot time to the RB 80 * such that user space can later reconstruct a real time of day 81 * timestamp in-place. 82 */ 83 cilium_capture(ctx, CAPTURE_INGRESS, rule_id, 84 bpf_ktime_cache_set(boot_ns), cap_len); 85 } 86 87 static __always_inline void __cilium_capture_out(struct __ctx_buff *ctx, 88 __u16 rule_id, __u32 cap_len) 89 { 90 cilium_capture(ctx, CAPTURE_EGRESS, rule_id, 91 bpf_ktime_cache_get(), cap_len); 92 } 93 94 /* The capture_enabled integer ({0,1}) is enabled/disabled via BPF based ELF 95 * templating. Meaning, when disabled, the verifier's dead code elimination 96 * will ensure that there is no overhead when the facility is not used. The 97 * below is a fallback definition for when the templating var is not defined. 98 */ 99 #ifndef capture_enabled 100 # define capture_enabled (ctx_is_xdp()) 101 #endif /* capture_enabled */ 102 103 struct capture_cache { 104 bool rule_seen; 105 __u16 rule_id; 106 __u16 cap_len; 107 }; 108 109 struct { 110 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 111 __type(key, __u32); 112 __type(value, struct capture_cache); 113 __uint(pinning, LIBBPF_PIN_BY_NAME); 114 __uint(max_entries, 1); 115 } cilium_capture_cache __section_maps_btf; 116 117 struct capture_rule { 118 __u16 rule_id; 119 __u16 reserved; 120 __u32 cap_len; 121 }; 122 123 /* 5-tuple wildcard key / mask. */ 124 struct capture4_wcard { 125 __be32 saddr; /* masking: prefix */ 126 __be32 daddr; /* masking: prefix */ 127 __be16 sport; /* masking: 0 or 0xffff */ 128 __be16 dport; /* masking: 0 or 0xffff */ 129 __u8 nexthdr; /* masking: 0 or 0xff */ 130 __u8 smask; /* prefix len: saddr */ 131 __u8 dmask; /* prefix len: daddr */ 132 __u8 flags; /* reserved: 0 */ 133 }; 134 135 /* 5-tuple wildcard key / mask. */ 136 struct capture6_wcard { 137 union v6addr saddr; /* masking: prefix */ 138 union v6addr daddr; /* masking: prefix */ 139 __be16 sport; /* masking: 0 or 0xffff */ 140 __be16 dport; /* masking: 0 or 0xffff */ 141 __u8 nexthdr; /* masking: 0 or 0xff */ 142 __u8 smask; /* prefix len: saddr */ 143 __u8 dmask; /* prefix len: daddr */ 144 __u8 flags; /* reserved: 0 */ 145 }; 146 147 #ifdef ENABLE_IPV4 148 struct { 149 __uint(type, BPF_MAP_TYPE_HASH); 150 __type(key, struct capture4_wcard); 151 __type(value, struct capture_rule); 152 __uint(pinning, LIBBPF_PIN_BY_NAME); 153 __uint(max_entries, CAPTURE4_SIZE); 154 __uint(map_flags, BPF_F_NO_PREALLOC); 155 } CAPTURE4_RULES __section_maps_btf; 156 157 static __always_inline void 158 cilium_capture4_masked_key(const struct capture4_wcard *orig, 159 const struct capture4_wcard *mask, 160 struct capture4_wcard *out) 161 { 162 out->daddr = orig->daddr & mask->daddr; 163 out->saddr = orig->saddr & mask->saddr; 164 out->dport = orig->dport & mask->dport; 165 out->sport = orig->sport & mask->sport; 166 out->nexthdr = orig->nexthdr & mask->nexthdr; 167 out->dmask = mask->dmask; 168 out->smask = mask->smask; 169 } 170 171 /* The agent is generating and emitting the PREFIX_MASKS4 and regenerating 172 * if a mask was added or removed. The cilium_capture4_rules can have n 173 * entries with m different PREFIX_MASKS4 where n >> m. Lookup performance 174 * depends mainly on m. Below is a fallback / example definition mainly for 175 * compile testing given agent typically emits this instead. Ordering of 176 * masks from agent side can f.e. be based on # of 1s from high to low. 177 */ 178 #ifndef PREFIX_MASKS4 179 # define PREFIX_MASKS4 \ 180 { \ 181 /* rule_id 1: \ 182 * srcIP/32, dstIP/32, dport, nexthdr \ 183 */ \ 184 .daddr = 0xffffffff, \ 185 .dmask = 32, \ 186 .saddr = 0xffffffff, \ 187 .smask = 32, \ 188 .dport = 0xffff, \ 189 .sport = 0, \ 190 .nexthdr = 0xff, \ 191 }, { \ 192 /* rule_id 2 (1st mask): \ 193 * srcIP/32 or dstIP/32 \ 194 */ \ 195 .daddr = 0xffffffff, \ 196 .dmask = 32, \ 197 .saddr = 0, \ 198 .smask = 0, \ 199 .dport = 0, \ 200 .sport = 0, \ 201 .nexthdr = 0, \ 202 }, { \ 203 /* rule_id 2 (2nd mask): \ 204 * srcIP/32 or dstIP/32 \ 205 */ \ 206 .daddr = 0, \ 207 .dmask = 0, \ 208 .saddr = 0xffffffff, \ 209 .smask = 32, \ 210 .dport = 0, \ 211 .sport = 0, \ 212 .nexthdr = 0, \ 213 }, 214 #endif /* PREFIX_MASKS4 */ 215 216 static __always_inline struct capture_rule * 217 cilium_capture4_classify_wcard(struct __ctx_buff *ctx) 218 { 219 struct capture4_wcard prefix_masks[] = { PREFIX_MASKS4 }; 220 struct capture4_wcard okey, lkey; 221 struct capture_rule *match; 222 void *data, *data_end; 223 struct iphdr *ip4; 224 int i; 225 const int size = sizeof(prefix_masks) / 226 sizeof(prefix_masks[0]); 227 228 if (!revalidate_data(ctx, &data, &data_end, &ip4)) 229 return NULL; 230 231 okey.daddr = ip4->daddr; 232 okey.dmask = 32; 233 okey.saddr = ip4->saddr; 234 okey.smask = 32; 235 okey.nexthdr = ip4->protocol; 236 237 if (ip4->protocol != IPPROTO_TCP && 238 ip4->protocol != IPPROTO_UDP) 239 return NULL; 240 if (l4_load_ports(ctx, ETH_HLEN + ipv4_hdrlen(ip4), &okey.sport) < 0) 241 return NULL; 242 243 okey.flags = 0; 244 lkey.flags = 0; 245 246 _Pragma("unroll") 247 for (i = 0; i < size; i++) { 248 cilium_capture4_masked_key(&okey, &prefix_masks[i], &lkey); 249 match = map_lookup_elem(&CAPTURE4_RULES, &lkey); 250 if (match) 251 return match; 252 } 253 254 return NULL; 255 } 256 #endif /* ENABLE_IPV4 */ 257 258 #ifdef ENABLE_IPV6 259 struct { 260 __uint(type, BPF_MAP_TYPE_HASH); 261 __type(key, struct capture6_wcard); 262 __type(value, struct capture_rule); 263 __uint(pinning, LIBBPF_PIN_BY_NAME); 264 __uint(max_entries, CAPTURE6_SIZE); 265 __uint(map_flags, BPF_F_NO_PREALLOC); 266 } CAPTURE6_RULES __section_maps_btf; 267 268 static __always_inline void 269 cilium_capture6_masked_key(const struct capture6_wcard *orig, 270 const struct capture6_wcard *mask, 271 struct capture6_wcard *out) 272 { 273 out->daddr.d1 = orig->daddr.d1 & mask->daddr.d1; 274 out->daddr.d2 = orig->daddr.d2 & mask->daddr.d2; 275 out->saddr.d1 = orig->saddr.d1 & mask->saddr.d1; 276 out->saddr.d2 = orig->saddr.d2 & mask->saddr.d2; 277 out->dport = orig->dport & mask->dport; 278 out->sport = orig->sport & mask->sport; 279 out->nexthdr = orig->nexthdr & mask->nexthdr; 280 out->dmask = mask->dmask; 281 out->smask = mask->smask; 282 } 283 284 /* The agent is generating and emitting the PREFIX_MASKS6 and regenerating 285 * if a mask was added or removed. Example for compile testing: 286 */ 287 #ifndef PREFIX_MASKS6 288 # define PREFIX_MASKS6 \ 289 { \ 290 /* rule_id 1: \ 291 * srcIP/128, dstIP/128, dport, nexthdr \ 292 */ \ 293 .daddr = { \ 294 .d1 = 0xffffffff, \ 295 .d2 = 0xffffffff, \ 296 }, \ 297 .dmask = 128, \ 298 .saddr = { \ 299 .d1 = 0xffffffff, \ 300 .d2 = 0xffffffff, \ 301 }, \ 302 .smask = 128, \ 303 .dport = 0xffff, \ 304 .sport = 0, \ 305 .nexthdr = 0xff, \ 306 }, { \ 307 /* rule_id 2 (1st mask): \ 308 * srcIP/128 or dstIP/128 \ 309 */ \ 310 .daddr = { \ 311 .d1 = 0xffffffff, \ 312 .d2 = 0xffffffff, \ 313 }, \ 314 .dmask = 128, \ 315 .saddr = {}, \ 316 .smask = 0, \ 317 .dport = 0, \ 318 .sport = 0, \ 319 .nexthdr = 0, \ 320 }, { \ 321 /* rule_id 2 (2nd mask): \ 322 * srcIP/128 or dstIP/128 \ 323 */ \ 324 .daddr = {}, \ 325 .dmask = 0, \ 326 .saddr = { \ 327 .d1 = 0xffffffff, \ 328 .d2 = 0xffffffff, \ 329 }, \ 330 .smask = 128, \ 331 .dport = 0, \ 332 .sport = 0, \ 333 .nexthdr = 0, \ 334 }, 335 #endif /* PREFIX_MASKS6 */ 336 337 static __always_inline struct capture_rule * 338 cilium_capture6_classify_wcard(struct __ctx_buff *ctx) 339 { 340 struct capture6_wcard prefix_masks[] = { PREFIX_MASKS6 }; 341 struct capture6_wcard okey, lkey; 342 struct capture_rule *match; 343 void *data, *data_end; 344 struct ipv6hdr *ip6; 345 int i, ret, l3_off = ETH_HLEN; 346 const int size = sizeof(prefix_masks) / 347 sizeof(prefix_masks[0]); 348 349 if (!revalidate_data(ctx, &data, &data_end, &ip6)) 350 return NULL; 351 352 ipv6_addr_copy(&okey.daddr, (union v6addr *)&ip6->daddr); 353 okey.dmask = 128; 354 ipv6_addr_copy(&okey.saddr, (union v6addr *)&ip6->saddr); 355 okey.smask = 128; 356 okey.nexthdr = ip6->nexthdr; 357 358 ret = ipv6_hdrlen(ctx, &okey.nexthdr); 359 if (ret < 0) 360 return NULL; 361 if (okey.nexthdr != IPPROTO_TCP && 362 okey.nexthdr != IPPROTO_UDP) 363 return NULL; 364 if (l4_load_ports(ctx, l3_off + ret, &okey.sport) < 0) 365 return NULL; 366 367 okey.flags = 0; 368 lkey.flags = 0; 369 370 _Pragma("unroll") 371 for (i = 0; i < size; i++) { 372 cilium_capture6_masked_key(&okey, &prefix_masks[i], &lkey); 373 match = map_lookup_elem(&CAPTURE6_RULES, &lkey); 374 if (match) 375 return match; 376 } 377 378 return NULL; 379 } 380 #endif /* ENABLE_IPV6 */ 381 382 static __always_inline struct capture_rule * 383 cilium_capture_classify_wcard(struct __ctx_buff *ctx) 384 { 385 struct capture_rule *ret = NULL; 386 __u16 proto; 387 388 if (!validate_ethertype(ctx, &proto)) 389 return ret; 390 switch (proto) { 391 #ifdef ENABLE_IPV4 392 case bpf_htons(ETH_P_IP): 393 ret = cilium_capture4_classify_wcard(ctx); 394 break; 395 #endif 396 #ifdef ENABLE_IPV6 397 case bpf_htons(ETH_P_IPV6): 398 ret = cilium_capture6_classify_wcard(ctx); 399 break; 400 #endif 401 default: 402 break; 403 } 404 return ret; 405 } 406 407 static __always_inline bool 408 cilium_capture_candidate(struct __ctx_buff *ctx __maybe_unused, 409 __u16 *rule_id __maybe_unused, 410 __u16 *cap_len __maybe_unused) 411 { 412 if (capture_enabled) { 413 struct capture_cache *c; 414 struct capture_rule *r; 415 __u32 zero = 0; 416 417 c = map_lookup_elem(&cilium_capture_cache, &zero); 418 if (always_succeeds(c)) { 419 r = cilium_capture_classify_wcard(ctx); 420 c->rule_seen = r; 421 if (r) { 422 c->cap_len = *cap_len = (__u16)r->cap_len; 423 c->rule_id = *rule_id = r->rule_id; 424 return true; 425 } 426 } 427 } 428 return false; 429 } 430 431 static __always_inline bool 432 cilium_capture_cached(struct __ctx_buff *ctx __maybe_unused, 433 __u16 *rule_id __maybe_unused, 434 __u32 *cap_len __maybe_unused) 435 { 436 if (capture_enabled) { 437 struct capture_cache *c; 438 __u32 zero = 0; 439 440 /* Avoid full classification a 2nd time due to i) overhead but 441 * also since ii) we might have pushed an encap header in front 442 * where we don't want to dissect everything again. 443 */ 444 c = map_lookup_elem(&cilium_capture_cache, &zero); 445 if (always_succeeds(c) && c->rule_seen) { 446 *cap_len = c->cap_len; 447 *rule_id = c->rule_id; 448 return true; 449 } 450 } 451 return false; 452 } 453 454 static __always_inline void 455 cilium_capture_in(struct __ctx_buff *ctx __maybe_unused) 456 { 457 __u16 cap_len; 458 __u16 rule_id; 459 460 if (cilium_capture_candidate(ctx, &rule_id, &cap_len)) 461 __cilium_capture_in(ctx, rule_id, cap_len); 462 } 463 464 static __always_inline void 465 cilium_capture_out(struct __ctx_buff *ctx __maybe_unused) 466 { 467 __u32 cap_len; 468 __u16 rule_id; 469 470 /* cilium_capture_out() is always paired with cilium_capture_in(), so 471 * we can rely on previous cached result on whether to push the pkt 472 * to the RB or not. 473 */ 474 if (cilium_capture_cached(ctx, &rule_id, &cap_len)) 475 __cilium_capture_out(ctx, rule_id, cap_len); 476 } 477 478 #else /* ENABLE_CAPTURE */ 479 480 static __always_inline void 481 cilium_capture_in(struct __ctx_buff *ctx __maybe_unused) 482 { 483 } 484 485 static __always_inline void 486 cilium_capture_out(struct __ctx_buff *ctx __maybe_unused) 487 { 488 } 489 490 #endif /* ENABLE_CAPTURE */