github.com/cilium/cilium@v1.16.2/bpf/lib/pcap.h (about)

     1  /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
     2  /* Copyright Authors of Cilium */
     3  
     4  #pragma once
     5  
     6  #include <bpf/ctx/ctx.h>
     7  #include <bpf/api.h>
     8  
     9  #ifdef ENABLE_CAPTURE
    10  #include "common.h"
    11  #include "time_cache.h"
    12  #include "lb.h"
    13  
    14  struct pcap_timeval {
    15  	__u32 tv_sec;
    16  	__u32 tv_usec;
    17  };
    18  
    19  struct pcap_timeoff {
    20  	__u64 tv_boot;
    21  };
    22  
    23  struct pcap_pkthdr {
    24  	union {
    25  		/* User space needs to perform inline conversion from
    26  		 * boot offset to time of day before writing out to
    27  		 * an external file.
    28  		 */
    29  		struct pcap_timeval ts;
    30  		struct pcap_timeoff to;
    31  	};
    32  	__u32 caplen;
    33  	__u32 len;
    34  };
    35  
    36  struct capture_msg {
    37  	/* The hash is reserved and always zero for allowing different
    38  	 * header extensions in future.
    39  	 */
    40  	NOTIFY_COMMON_HDR
    41  	/* The pcap hdr must be the last member so that the placement
    42  	 * inside the perf RB is linear: pcap hdr + packet payload.
    43  	 */
    44  	struct pcap_pkthdr hdr;
    45  };
    46  
    47  static __always_inline void cilium_capture(struct __ctx_buff *ctx,
    48  					   const __u8 subtype,
    49  					   const __u16 rule_id,
    50  					   const __u64 tstamp,
    51  					   __u64 __cap_len)
    52  {
    53  	__u64 ctx_len = ctx_full_len(ctx);
    54  	__u64 cap_len = (!__cap_len || ctx_len < __cap_len) ?
    55  			ctx_len : __cap_len;
    56  	/* rule_id is the demuxer for the target pcap file when there are
    57  	 * multiple capturing rules present.
    58  	 */
    59  	struct capture_msg msg = {
    60  		.type    = CILIUM_NOTIFY_CAPTURE,
    61  		.subtype = subtype,
    62  		.source  = rule_id,
    63  		.hdr     = {
    64  			.to	= {
    65  				.tv_boot = tstamp,
    66  			},
    67  			.caplen	= cap_len,
    68  			.len	= ctx_len,
    69  		},
    70  	};
    71  
    72  	ctx_event_output(ctx, &EVENTS_MAP, (cap_len << 32) | BPF_F_CURRENT_CPU,
    73  			 &msg, sizeof(msg));
    74  }
    75  
    76  static __always_inline void __cilium_capture_in(struct __ctx_buff *ctx,
    77  						__u16 rule_id, __u32 cap_len)
    78  {
    79  	/* For later pcap file generation, we export boot time to the RB
    80  	 * such that user space can later reconstruct a real time of day
    81  	 * timestamp in-place.
    82  	 */
    83  	cilium_capture(ctx, CAPTURE_INGRESS, rule_id,
    84  		       bpf_ktime_cache_set(boot_ns), cap_len);
    85  }
    86  
    87  static __always_inline void __cilium_capture_out(struct __ctx_buff *ctx,
    88  						 __u16 rule_id, __u32 cap_len)
    89  {
    90  	cilium_capture(ctx, CAPTURE_EGRESS, rule_id,
    91  		       bpf_ktime_cache_get(), cap_len);
    92  }
    93  
    94  /* The capture_enabled integer ({0,1}) is enabled/disabled via BPF based ELF
    95   * templating. Meaning, when disabled, the verifier's dead code elimination
    96   * will ensure that there is no overhead when the facility is not used. The
    97   * below is a fallback definition for when the templating var is not defined.
    98   */
    99  #ifndef capture_enabled
   100  # define capture_enabled (ctx_is_xdp())
   101  #endif /* capture_enabled */
   102  
   103  struct capture_cache {
   104  	bool  rule_seen;
   105  	__u16 rule_id;
   106  	__u16 cap_len;
   107  };
   108  
   109  struct {
   110  	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
   111  	__type(key, __u32);
   112  	__type(value, struct capture_cache);
   113  	__uint(pinning, LIBBPF_PIN_BY_NAME);
   114  	__uint(max_entries, 1);
   115  } cilium_capture_cache __section_maps_btf;
   116  
   117  struct capture_rule {
   118  	__u16 rule_id;
   119  	__u16 reserved;
   120  	__u32 cap_len;
   121  };
   122  
   123  /* 5-tuple wildcard key / mask. */
   124  struct capture4_wcard {
   125  	__be32 saddr;   /* masking: prefix */
   126  	__be32 daddr;   /* masking: prefix */
   127  	__be16 sport;   /* masking: 0 or 0xffff */
   128  	__be16 dport;   /* masking: 0 or 0xffff */
   129  	__u8   nexthdr; /* masking: 0 or 0xff */
   130  	__u8   smask;   /* prefix len: saddr */
   131  	__u8   dmask;   /* prefix len: daddr */
   132  	__u8   flags;   /* reserved: 0 */
   133  };
   134  
   135  /* 5-tuple wildcard key / mask. */
   136  struct capture6_wcard {
   137  	union v6addr saddr; /* masking: prefix */
   138  	union v6addr daddr; /* masking: prefix */
   139  	__be16 sport;       /* masking: 0 or 0xffff */
   140  	__be16 dport;       /* masking: 0 or 0xffff */
   141  	__u8   nexthdr;     /* masking: 0 or 0xff */
   142  	__u8   smask;       /* prefix len: saddr */
   143  	__u8   dmask;       /* prefix len: daddr */
   144  	__u8   flags;       /* reserved: 0 */
   145  };
   146  
   147  #ifdef ENABLE_IPV4
   148  struct {
   149  	__uint(type, BPF_MAP_TYPE_HASH);
   150  	__type(key, struct capture4_wcard);
   151  	__type(value, struct capture_rule);
   152  	__uint(pinning, LIBBPF_PIN_BY_NAME);
   153  	__uint(max_entries, CAPTURE4_SIZE);
   154  	__uint(map_flags, BPF_F_NO_PREALLOC);
   155  } CAPTURE4_RULES __section_maps_btf;
   156  
   157  static __always_inline void
   158  cilium_capture4_masked_key(const struct capture4_wcard *orig,
   159  			   const struct capture4_wcard *mask,
   160  			   struct capture4_wcard *out)
   161  {
   162  	out->daddr = orig->daddr & mask->daddr;
   163  	out->saddr = orig->saddr & mask->saddr;
   164  	out->dport = orig->dport & mask->dport;
   165  	out->sport = orig->sport & mask->sport;
   166  	out->nexthdr = orig->nexthdr & mask->nexthdr;
   167  	out->dmask = mask->dmask;
   168  	out->smask = mask->smask;
   169  }
   170  
   171  /* The agent is generating and emitting the PREFIX_MASKS4 and regenerating
   172   * if a mask was added or removed. The cilium_capture4_rules can have n
   173   * entries with m different PREFIX_MASKS4 where n >> m. Lookup performance
   174   * depends mainly on m. Below is a fallback / example definition mainly for
   175   * compile testing given agent typically emits this instead. Ordering of
   176   * masks from agent side can f.e. be based on # of 1s from high to low.
   177   */
   178  #ifndef PREFIX_MASKS4
   179  # define PREFIX_MASKS4					\
   180  	{						\
   181  		/* rule_id 1:				\
   182  		 *  srcIP/32, dstIP/32, dport, nexthdr	\
   183  		 */					\
   184  		.daddr   = 0xffffffff,			\
   185  		.dmask   = 32,				\
   186  		.saddr   = 0xffffffff,			\
   187  		.smask   = 32,				\
   188  		.dport   = 0xffff,			\
   189  		.sport   = 0,				\
   190  		.nexthdr = 0xff,			\
   191  	}, {						\
   192  		/* rule_id 2 (1st mask):		\
   193  		 *  srcIP/32 or dstIP/32		\
   194  		 */					\
   195  		.daddr   = 0xffffffff,			\
   196  		.dmask   = 32,				\
   197  		.saddr   = 0,				\
   198  		.smask   = 0,				\
   199  		.dport   = 0,				\
   200  		.sport   = 0,				\
   201  		.nexthdr = 0,				\
   202  	}, {						\
   203  		/* rule_id 2 (2nd mask):		\
   204  		 *  srcIP/32 or dstIP/32		\
   205  		 */					\
   206  		.daddr   = 0,				\
   207  		.dmask   = 0,				\
   208  		.saddr   = 0xffffffff,			\
   209  		.smask   = 32,				\
   210  		.dport   = 0,				\
   211  		.sport   = 0,				\
   212  		.nexthdr = 0,				\
   213  	},
   214  #endif /* PREFIX_MASKS4 */
   215  
   216  static __always_inline struct capture_rule *
   217  cilium_capture4_classify_wcard(struct __ctx_buff *ctx)
   218  {
   219  	struct capture4_wcard prefix_masks[] = { PREFIX_MASKS4 };
   220  	struct capture4_wcard okey, lkey;
   221  	struct capture_rule *match;
   222  	void *data, *data_end;
   223  	struct iphdr *ip4;
   224  	int i;
   225  	const int size = sizeof(prefix_masks) /
   226  			 sizeof(prefix_masks[0]);
   227  
   228  	if (!revalidate_data(ctx, &data, &data_end, &ip4))
   229  		return NULL;
   230  
   231  	okey.daddr = ip4->daddr;
   232  	okey.dmask = 32;
   233  	okey.saddr = ip4->saddr;
   234  	okey.smask = 32;
   235  	okey.nexthdr = ip4->protocol;
   236  
   237  	if (ip4->protocol != IPPROTO_TCP &&
   238  	    ip4->protocol != IPPROTO_UDP)
   239  		return NULL;
   240  	if (l4_load_ports(ctx, ETH_HLEN + ipv4_hdrlen(ip4), &okey.sport) < 0)
   241  		return NULL;
   242  
   243  	okey.flags = 0;
   244  	lkey.flags = 0;
   245  
   246  _Pragma("unroll")
   247  	for (i = 0; i < size; i++) {
   248  		cilium_capture4_masked_key(&okey, &prefix_masks[i], &lkey);
   249  		match = map_lookup_elem(&CAPTURE4_RULES, &lkey);
   250  		if (match)
   251  			return match;
   252  	}
   253  
   254  	return NULL;
   255  }
   256  #endif /* ENABLE_IPV4 */
   257  
   258  #ifdef ENABLE_IPV6
   259  struct {
   260  	__uint(type, BPF_MAP_TYPE_HASH);
   261  	__type(key, struct capture6_wcard);
   262  	__type(value, struct capture_rule);
   263  	__uint(pinning, LIBBPF_PIN_BY_NAME);
   264  	__uint(max_entries, CAPTURE6_SIZE);
   265  	__uint(map_flags, BPF_F_NO_PREALLOC);
   266  } CAPTURE6_RULES __section_maps_btf;
   267  
   268  static __always_inline void
   269  cilium_capture6_masked_key(const struct capture6_wcard *orig,
   270  			   const struct capture6_wcard *mask,
   271  			   struct capture6_wcard *out)
   272  {
   273  	out->daddr.d1 = orig->daddr.d1 & mask->daddr.d1;
   274  	out->daddr.d2 = orig->daddr.d2 & mask->daddr.d2;
   275  	out->saddr.d1 = orig->saddr.d1 & mask->saddr.d1;
   276  	out->saddr.d2 = orig->saddr.d2 & mask->saddr.d2;
   277  	out->dport = orig->dport & mask->dport;
   278  	out->sport = orig->sport & mask->sport;
   279  	out->nexthdr = orig->nexthdr & mask->nexthdr;
   280  	out->dmask = mask->dmask;
   281  	out->smask = mask->smask;
   282  }
   283  
   284  /* The agent is generating and emitting the PREFIX_MASKS6 and regenerating
   285   * if a mask was added or removed. Example for compile testing:
   286   */
   287  #ifndef PREFIX_MASKS6
   288  # define PREFIX_MASKS6					 \
   289  	{						 \
   290  		/* rule_id 1:				 \
   291  		 *  srcIP/128, dstIP/128, dport, nexthdr \
   292  		 */					 \
   293  		.daddr = {				 \
   294  			.d1 = 0xffffffff,		 \
   295  			.d2 = 0xffffffff,		 \
   296  		},					 \
   297  		.dmask    = 128,			 \
   298  		.saddr = {				 \
   299  			.d1 = 0xffffffff,		 \
   300  			.d2 = 0xffffffff,		 \
   301  		},					 \
   302  		.smask    = 128,			 \
   303  		.dport    = 0xffff,			 \
   304  		.sport    = 0,				 \
   305  		.nexthdr  = 0xff,			 \
   306  	}, {						 \
   307  		/* rule_id 2 (1st mask):		 \
   308  		 *  srcIP/128 or dstIP/128		 \
   309  		 */					 \
   310  		.daddr = {				 \
   311  			.d1 = 0xffffffff,		 \
   312  			.d2 = 0xffffffff,		 \
   313  		},					 \
   314  		.dmask    = 128,			 \
   315  		.saddr    = {},				 \
   316  		.smask    = 0,				 \
   317  		.dport    = 0,				 \
   318  		.sport    = 0,				 \
   319  		.nexthdr  = 0,				 \
   320  	}, {						 \
   321  		/* rule_id 2 (2nd mask):		 \
   322  		 *  srcIP/128 or dstIP/128		 \
   323  		 */					 \
   324  		.daddr    = {},				 \
   325  		.dmask    = 0,				 \
   326  		.saddr = {				 \
   327  			.d1 = 0xffffffff,		 \
   328  			.d2 = 0xffffffff,		 \
   329  		},					 \
   330  		.smask    = 128,			 \
   331  		.dport    = 0,				 \
   332  		.sport    = 0,				 \
   333  		.nexthdr  = 0,				 \
   334  	},
   335  #endif /* PREFIX_MASKS6 */
   336  
   337  static __always_inline struct capture_rule *
   338  cilium_capture6_classify_wcard(struct __ctx_buff *ctx)
   339  {
   340  	struct capture6_wcard prefix_masks[] = { PREFIX_MASKS6 };
   341  	struct capture6_wcard okey, lkey;
   342  	struct capture_rule *match;
   343  	void *data, *data_end;
   344  	struct ipv6hdr *ip6;
   345  	int i, ret, l3_off = ETH_HLEN;
   346  	const int size = sizeof(prefix_masks) /
   347  			 sizeof(prefix_masks[0]);
   348  
   349  	if (!revalidate_data(ctx, &data, &data_end, &ip6))
   350  		return NULL;
   351  
   352  	ipv6_addr_copy(&okey.daddr, (union v6addr *)&ip6->daddr);
   353  	okey.dmask = 128;
   354  	ipv6_addr_copy(&okey.saddr, (union v6addr *)&ip6->saddr);
   355  	okey.smask = 128;
   356  	okey.nexthdr = ip6->nexthdr;
   357  
   358  	ret = ipv6_hdrlen(ctx, &okey.nexthdr);
   359  	if (ret < 0)
   360  		return NULL;
   361  	if (okey.nexthdr != IPPROTO_TCP &&
   362  	    okey.nexthdr != IPPROTO_UDP)
   363  		return NULL;
   364  	if (l4_load_ports(ctx, l3_off + ret, &okey.sport) < 0)
   365  		return NULL;
   366  
   367  	okey.flags = 0;
   368  	lkey.flags = 0;
   369  
   370  _Pragma("unroll")
   371  	for (i = 0; i < size; i++) {
   372  		cilium_capture6_masked_key(&okey, &prefix_masks[i], &lkey);
   373  		match = map_lookup_elem(&CAPTURE6_RULES, &lkey);
   374  		if (match)
   375  			return match;
   376  	}
   377  
   378  	return NULL;
   379  }
   380  #endif /* ENABLE_IPV6 */
   381  
   382  static __always_inline struct capture_rule *
   383  cilium_capture_classify_wcard(struct __ctx_buff *ctx)
   384  {
   385  	struct capture_rule *ret = NULL;
   386  	__u16 proto;
   387  
   388  	if (!validate_ethertype(ctx, &proto))
   389  		return ret;
   390  	switch (proto) {
   391  #ifdef ENABLE_IPV4
   392  	case bpf_htons(ETH_P_IP):
   393  		ret = cilium_capture4_classify_wcard(ctx);
   394  		break;
   395  #endif
   396  #ifdef ENABLE_IPV6
   397  	case bpf_htons(ETH_P_IPV6):
   398  		ret = cilium_capture6_classify_wcard(ctx);
   399  		break;
   400  #endif
   401  	default:
   402  		break;
   403  	}
   404  	return ret;
   405  }
   406  
   407  static __always_inline bool
   408  cilium_capture_candidate(struct __ctx_buff *ctx __maybe_unused,
   409  			 __u16 *rule_id __maybe_unused,
   410  			 __u16 *cap_len __maybe_unused)
   411  {
   412  	if (capture_enabled) {
   413  		struct capture_cache *c;
   414  		struct capture_rule *r;
   415  		__u32 zero = 0;
   416  
   417  		c = map_lookup_elem(&cilium_capture_cache, &zero);
   418  		if (always_succeeds(c)) {
   419  			r = cilium_capture_classify_wcard(ctx);
   420  			c->rule_seen = r;
   421  			if (r) {
   422  				c->cap_len = *cap_len = (__u16)r->cap_len;
   423  				c->rule_id = *rule_id = r->rule_id;
   424  				return true;
   425  			}
   426  		}
   427  	}
   428  	return false;
   429  }
   430  
   431  static __always_inline bool
   432  cilium_capture_cached(struct __ctx_buff *ctx __maybe_unused,
   433  		      __u16 *rule_id __maybe_unused,
   434  		      __u32 *cap_len __maybe_unused)
   435  {
   436  	if (capture_enabled) {
   437  		struct capture_cache *c;
   438  		__u32 zero = 0;
   439  
   440  		/* Avoid full classification a 2nd time due to i) overhead but
   441  		 * also since ii) we might have pushed an encap header in front
   442  		 * where we don't want to dissect everything again.
   443  		 */
   444  		c = map_lookup_elem(&cilium_capture_cache, &zero);
   445  		if (always_succeeds(c) && c->rule_seen) {
   446  			*cap_len = c->cap_len;
   447  			*rule_id = c->rule_id;
   448  			return true;
   449  		}
   450  	}
   451  	return false;
   452  }
   453  
   454  static __always_inline void
   455  cilium_capture_in(struct __ctx_buff *ctx __maybe_unused)
   456  {
   457  	__u16 cap_len;
   458  	__u16 rule_id;
   459  
   460  	if (cilium_capture_candidate(ctx, &rule_id, &cap_len))
   461  		__cilium_capture_in(ctx, rule_id, cap_len);
   462  }
   463  
   464  static __always_inline void
   465  cilium_capture_out(struct __ctx_buff *ctx __maybe_unused)
   466  {
   467  	__u32 cap_len;
   468  	__u16 rule_id;
   469  
   470  	/* cilium_capture_out() is always paired with cilium_capture_in(), so
   471  	 * we can rely on previous cached result on whether to push the pkt
   472  	 * to the RB or not.
   473  	 */
   474  	if (cilium_capture_cached(ctx, &rule_id, &cap_len))
   475  		__cilium_capture_out(ctx, rule_id, cap_len);
   476  }
   477  
   478  #else /* ENABLE_CAPTURE */
   479  
   480  static __always_inline void
   481  cilium_capture_in(struct __ctx_buff *ctx __maybe_unused)
   482  {
   483  }
   484  
   485  static __always_inline void
   486  cilium_capture_out(struct __ctx_buff *ctx __maybe_unused)
   487  {
   488  }
   489  
   490  #endif /* ENABLE_CAPTURE */