github.com/fafucoder/cilium@v1.6.11/bpf/lib/common.h (about)

     1  /*
     2   *  Copyright (C) 2016-2019 Authors of Cilium
     3   *
     4   *  This program is free software; you can redistribute it and/or modify
     5   *  it under the terms of the GNU General Public License as published by
     6   *  the Free Software Foundation; either version 2 of the License, or
     7   *  (at your option) any later version.
     8   *
     9   *  This program is distributed in the hope that it will be useful,
    10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
    11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    12   *  GNU General Public License for more details.
    13   *
    14   *  You should have received a copy of the GNU General Public License
    15   *  along with this program; if not, write to the Free Software
    16   *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    17   */
    18  #ifndef __LIB_COMMON_H_
    19  #define __LIB_COMMON_H_
    20  
    21  #include <bpf_features.h>
    22  #include <bpf/api.h>
    23  #include <linux/if_ether.h>
    24  #include <linux/ipv6.h>
    25  #include <linux/in.h>
    26  #include <stdint.h>
    27  #include <stdbool.h>
    28  
    29  // FIXME: GH-3239 LRU logic is not handling timeouts gracefully enough
    30  // #ifndef HAVE_LRU_MAP_TYPE
    31  // #define NEEDS_TIMEOUT 1
    32  // #endif
    33  #define NEEDS_TIMEOUT 1
    34  
    35  #ifndef AF_INET
    36  #define AF_INET 2
    37  #endif
    38  
    39  #ifndef AF_INET6
    40  #define AF_INET6 10
    41  #endif
    42  
    43  #ifndef EVENT_SOURCE
    44  #define EVENT_SOURCE 0
    45  #endif
    46  
    47  #define PORT_UDP_VXLAN 4789
    48  #define PORT_UDP_GENEVE 6081
    49  #define PORT_UDP_VXLAN_LINUX 8472
    50  
    51  #ifdef PREALLOCATE_MAPS
    52  #define CONDITIONAL_PREALLOC 0
    53  #else
    54  #define CONDITIONAL_PREALLOC BPF_F_NO_PREALLOC
    55  #endif
    56  
    57  /* TODO: ipsec v6 tunnel datapath still needs separate fixing */
    58  #ifndef ENABLE_IPSEC
    59  # ifdef ENABLE_IPV6
    60  #  define ENABLE_ENCAP_HOST_REMAP 1
    61  # endif
    62  #endif
    63  
    64  #define __inline__ __attribute__((always_inline))
    65  #ifndef __always_inline
    66  #define __always_inline inline __inline__
    67  #endif
    68  
    69  #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
    70  
    71  /* These are shared with test/bpf/check-complexity.sh, when modifying any of
    72   * the below, that script should also be updated. */
    73  #define CILIUM_CALL_DROP_NOTIFY			1
    74  #define CILIUM_CALL_ERROR_NOTIFY		2
    75  #define CILIUM_CALL_SEND_ICMP6_ECHO_REPLY	3
    76  #define CILIUM_CALL_HANDLE_ICMP6_NS		4
    77  #define CILIUM_CALL_SEND_ICMP6_TIME_EXCEEDED	5
    78  #define CILIUM_CALL_ARP				6
    79  #define CILIUM_CALL_IPV4_FROM_LXC		7
    80  #define CILIUM_CALL_NAT64			8
    81  #define CILIUM_CALL_NAT46			9
    82  #define CILIUM_CALL_IPV6_FROM_LXC		10
    83  #define CILIUM_CALL_IPV4_TO_LXC_POLICY_ONLY	11
    84  #define CILIUM_CALL_IPV6_TO_LXC_POLICY_ONLY	12
    85  #define CILIUM_CALL_IPV4_TO_ENDPOINT		13
    86  #define CILIUM_CALL_IPV6_TO_ENDPOINT		14
    87  #define CILIUM_CALL_IPV4_NODEPORT_NAT		15
    88  #define CILIUM_CALL_IPV6_NODEPORT_NAT		16
    89  #define CILIUM_CALL_IPV4_NODEPORT_REVNAT	17
    90  #define CILIUM_CALL_IPV6_NODEPORT_REVNAT	18
    91  #define CILIUM_CALL_ENCAP_NODEPORT_NAT		19
    92  #define CILIUM_CALL_SIZE			20
    93  
    94  typedef __u64 mac_t;
    95  
    96  union v6addr {
    97          struct {
    98                  __u32 p1;
    99                  __u32 p2;
   100                  __u32 p3;
   101                  __u32 p4;
   102          };
   103  	struct {
   104  		__u64 d1;
   105  		__u64 d2;
   106  	};
   107          __u8 addr[16];
   108  } __attribute__((packed));
   109  
   110  static inline bool validate_ethertype(struct __sk_buff *skb, __u16 *proto)
   111  {
   112  	void *data = (void *) (long) skb->data;
   113  	void *data_end = (void *) (long) skb->data_end;
   114  
   115  	if (data + ETH_HLEN > data_end)
   116  		return false;
   117  
   118  	struct ethhdr *eth = data;
   119  	*proto = eth->h_proto;
   120  
   121  	if (bpf_ntohs(*proto) < ETH_P_802_3_MIN)
   122  		return false; // non-Ethernet II unsupported
   123  
   124  	return true;
   125  }
   126  
   127  static inline bool __revalidate_data(struct __sk_buff *skb, void **data_,
   128  				     void **data_end_, void **l3,
   129  				     const size_t l3_len, const bool pull)
   130  {
   131  	const size_t tot_len = ETH_HLEN + l3_len;
   132  	void *data_end;
   133  	void *data;
   134  
   135  	/* Verifier workaround, do this unconditionally: invalid size of register spill. */
   136  	if (pull)
   137  		skb_pull_data(skb, tot_len);
   138  	data_end = (void *)(long)skb->data_end;
   139  	data = (void *)(long)skb->data;
   140  	if (data + tot_len > data_end)
   141  		return false;
   142  
   143  	/* Verifier workaround: pointer arithmetic on pkt_end prohibited. */
   144  	*data_ = data;
   145  	*data_end_ = data_end;
   146  
   147  	*l3 = data + ETH_HLEN;
   148  	return true;
   149  }
   150  
   151  /* revalidate_data_first() initializes the provided pointers from the skb and
   152   * ensures that the data is pulled in for access. Should be used the first
   153   * time that the skb data is accessed, subsequent calls can be made to
   154   * revalidate_data() which is cheaper.
   155   * Returns true if 'skb' is long enough for an IP header of the provided type,
   156   * false otherwise. */
   157  #define revalidate_data_first(skb, data, data_end, ip)			\
   158  	__revalidate_data(skb, data, data_end, (void **)ip, sizeof(**ip), true)
   159  
   160  /* revalidate_data() initializes the provided pointers from the skb.
   161   * Returns true if 'skb' is long enough for an IP header of the provided type,
   162   * false otherwise. */
   163  #define revalidate_data(skb, data, data_end, ip)			\
   164  	__revalidate_data(skb, data, data_end, (void **)ip, sizeof(**ip), false)
   165  
   166  /* Macros for working with L3 cilium defined IPV6 addresses */
   167  #define BPF_V6(dst, ...)	BPF_V6_1(dst, fetch_ipv6(__VA_ARGS__))
   168  #define BPF_V6_1(dst, ...)	BPF_V6_4(dst, __VA_ARGS__)
   169  #define BPF_V6_4(dst, a1, a2, a3, a4)		\
   170  	({					\
   171  		dst.p1 = a1;			\
   172  		dst.p2 = a2;			\
   173  		dst.p3 = a3;			\
   174  		dst.p4 = a4;			\
   175  	})
   176  
   177  #define ENDPOINT_KEY_IPV4 1
   178  #define ENDPOINT_KEY_IPV6 2
   179  
   180  /* Structure representing an IPv4 or IPv6 address, being used for:
   181   *  - key as endpoints map
   182   *  - key for tunnel endpoint map
   183   *  - value for tunnel endpoint map
   184   */
   185  struct endpoint_key {
   186  	union {
   187  		struct {
   188  			__u32		ip4;
   189  			__u32		pad1;
   190  			__u32		pad2;
   191  			__u32		pad3;
   192  		};
   193  		union v6addr	ip6;
   194  	};
   195  	__u8 family;
   196  	__u8 key;
   197  	__u16 pad5;
   198  } __attribute__((packed));
   199  
   200  #define ENDPOINT_F_HOST		1 /* Special endpoint representing local host */
   201  
   202  /* Value of endpoint map */
   203  struct endpoint_info {
   204  	__u32		ifindex;
   205  	__u16		unused; /* used to be sec_label, no longer used */
   206  	__u16           lxc_id;
   207  	__u32		flags;
   208  	mac_t		mac;
   209  	mac_t		node_mac;
   210  	__u32		pad[4];
   211  };
   212  
   213  struct remote_endpoint_info {
   214  	__u32		sec_label;
   215  	__u32		tunnel_endpoint;
   216  	__u8		key;
   217  };
   218  
   219  struct policy_key {
   220  	__u32		sec_label;
   221  	__u16		dport;
   222  	__u8		protocol;
   223  	__u8		egress:1,
   224  			pad:7;
   225  };
   226  
   227  struct policy_entry {
   228  	__be16		proxy_port;
   229  	__u16		pad[3];
   230  	__u64		packets;
   231  	__u64		bytes;
   232  };
   233  
   234  struct metrics_key {
   235      __u8      reason;     //0: forwarded, >0 dropped
   236      __u8      dir:2,      //1: ingress 2: egress
   237                pad:6;
   238      __u16     reserved[3]; // reserved for future extension
   239  };
   240  
   241  
   242  struct metrics_value {
   243       __u64	count;
   244       __u64	bytes;
   245  };
   246  
   247  
   248  enum {
   249  	CILIUM_NOTIFY_UNSPEC,
   250  	CILIUM_NOTIFY_DROP,
   251  	CILIUM_NOTIFY_DBG_MSG,
   252  	CILIUM_NOTIFY_DBG_CAPTURE,
   253  	CILIUM_NOTIFY_TRACE,
   254  };
   255  
   256  #define NOTIFY_COMMON_HDR \
   257  	__u8		type; \
   258  	__u8		subtype; \
   259  	__u16		source; \
   260  	__u32		hash;
   261  
   262  #ifndef TRACE_PAYLOAD_LEN
   263  #define TRACE_PAYLOAD_LEN 128ULL
   264  #endif
   265  
   266  #ifndef BPF_F_PSEUDO_HDR
   267  # define BPF_F_PSEUDO_HDR                (1ULL << 4)
   268  #endif
   269  
   270  #define IS_ERR(x) (unlikely((x < 0) || (x == TC_ACT_SHOT)))
   271  
   272  /* Cilium IPSec code to indicate packet needs to be handled
   273   * by IPSec stack. Maps to TC_ACT_OK.
   274   */
   275  #define IPSEC_ENDPOINT TC_ACT_OK
   276  
   277  /* Return value to indicate that proxy redirection is required */
   278  #define POLICY_ACT_PROXY_REDIRECT (1 << 16)
   279  
   280  /* Cilium error codes, must NOT overlap with TC return codes.
   281   * These also serve as drop reasons for metrics,
   282   * where reason > 0 corresponds to -(DROP_*)
   283   */
   284  #define DROP_INVALID_SMAC	-130 /* unused */
   285  #define DROP_INVALID_DMAC	-131 /* unused */
   286  #define DROP_INVALID_SIP	-132
   287  #define DROP_POLICY		-133
   288  #define DROP_INVALID		-134
   289  #define DROP_CT_INVALID_HDR	-135
   290  #define DROP_CT_MISSING_ACK	-136 /* unused */
   291  #define DROP_CT_UNKNOWN_PROTO	-137
   292  #define DROP_CT_CANT_CREATE_	-138 /* unused */
   293  #define DROP_UNKNOWN_L3		-139
   294  #define DROP_MISSED_TAIL_CALL	-140
   295  #define DROP_WRITE_ERROR	-141
   296  #define DROP_UNKNOWN_L4		-142
   297  #define DROP_UNKNOWN_ICMP_CODE	-143
   298  #define DROP_UNKNOWN_ICMP_TYPE	-144
   299  #define DROP_UNKNOWN_ICMP6_CODE	-145
   300  #define DROP_UNKNOWN_ICMP6_TYPE	-146
   301  #define DROP_NO_TUNNEL_KEY	-147
   302  #define DROP_NO_TUNNEL_OPT_	-148 /* unused */
   303  #define DROP_INVALID_GENEVE_	-149 /* unused */
   304  #define DROP_UNKNOWN_TARGET	-150
   305  #define DROP_UNROUTABLE		-151
   306  #define DROP_NO_LXC		-152 /* unused */
   307  #define DROP_CSUM_L3		-153
   308  #define DROP_CSUM_L4		-154
   309  #define DROP_CT_CREATE_FAILED	-155
   310  #define DROP_INVALID_EXTHDR	-156
   311  #define DROP_FRAG_NOSUPPORT	-157
   312  #define DROP_NO_SERVICE		-158
   313  #define DROP_POLICY_L4		-159 /* unused */
   314  #define DROP_NO_TUNNEL_ENDPOINT -160
   315  #define DROP_PROXYMAP_CREATE_FAILED_	-161 /* unused */
   316  #define DROP_POLICY_CIDR		-162 /* unused */
   317  #define DROP_UNKNOWN_CT			-163
   318  #define DROP_HOST_UNREACHABLE		-164
   319  #define DROP_NO_CONFIG		-165
   320  #define DROP_UNSUPPORTED_L2		-166
   321  #define DROP_NAT_NO_MAPPING	-167
   322  #define DROP_NAT_UNSUPP_PROTO	-168
   323  #define DROP_NO_FIB		-169
   324  #define DROP_ENCAP_PROHIBITED	-170
   325  #define DROP_INVALID_IDENTITY	-171
   326  #define DROP_UNKNOWN_SENDER	-172
   327  #define DROP_NAT_NOT_NEEDED	-173 /* Mapped as drop code, though drop not necessary. */
   328  
   329  #define NAT_PUNT_TO_STACK	DROP_NAT_NOT_NEEDED
   330  
   331  /* Cilium metrics reasons for forwarding packets and other stats.
   332   * If reason is larger than below then this is a drop reason and
   333   * value corresponds to -(DROP_*), see above.
   334   */
   335  #define REASON_FORWARDED	0
   336  #define REASON_PLAINTEXT	3
   337  #define REASON_DECRYPT		4
   338  #define REASON_LB_NO_SLAVE	5
   339  #define REASON_LB_NO_BACKEND	6
   340  #define REASON_LB_REVNAT_UPDATE	7
   341  #define REASON_LB_REVNAT_STALE	8
   342  
   343  /* Cilium metrics direction for dropping/forwarding packet */
   344  #define METRIC_INGRESS  1
   345  #define METRIC_EGRESS   2
   346  
   347  /* Magic skb->mark identifies packets origination and encryption status.
   348   *
   349   * The upper 16 bits plus lower 8 bits (e.g. mask 0XFFFF00FF) contain the
   350   * packets security identity. The lower/upper halves are swapped to recover
   351   * the identity.
   352   *
   353   * The 4 bits at 0X0F00 provide
   354   *  - the magic marker values which indicate whether the packet is coming from
   355   *    an ingress or egress proxy, a local process and its current encryption
   356   *    status.
   357   *
   358   * The 4 bits at 0xF000 provide
   359   *  - the key index to use for encryption when multiple keys are in-flight.
   360   *    In the IPsec case this becomes the SPI on the wire.
   361   */
   362  #define MARK_MAGIC_HOST_MASK		0x0F00
   363  #define MARK_MAGIC_PROXY_INGRESS	0x0A00
   364  #define MARK_MAGIC_PROXY_EGRESS		0x0B00
   365  #define MARK_MAGIC_HOST			0x0C00
   366  #define MARK_MAGIC_DECRYPT		0x0D00
   367  #define MARK_MAGIC_ENCRYPT		0x0E00
   368  #define MARK_MAGIC_IDENTITY		0x0F00 /* mark carries identity */
   369  #define MARK_MAGIC_TO_PROXY		0x0200
   370  
   371  #define MARK_MAGIC_KEY_ID		0xF000
   372  #define MARK_MAGIC_KEY_MASK		0xFF00
   373  
   374  /* IPSec cannot be configured with NodePort BPF today, hence non-conflicting
   375   * overlap with MARK_MAGIC_KEY_ID.
   376   */
   377  #define MARK_MAGIC_SNAT_DONE		0x1500
   378  
   379  /**
   380   * get_identity - returns source identity from the mark field
   381   */
   382  static inline int __inline__ get_identity(struct __sk_buff *skb)
   383  {
   384  	return ((skb->mark & 0xFF) << 16) | skb->mark >> 16;
   385  }
   386  
   387  static inline void __inline__ set_encrypt_dip(struct __sk_buff *skb, __u32 ip_endpoint)
   388  {
   389  	skb->cb[4] = ip_endpoint;
   390  }
   391  
   392  /**
   393   * set_identity - pushes 24 bit identity into skb mark value.
   394   */
   395  static inline void __inline__ set_identity(struct __sk_buff *skb, __u32 identity)
   396  {
   397  	skb->mark = skb->mark & MARK_MAGIC_KEY_MASK;
   398  	skb->mark |= ((identity & 0xFFFF) << 16) | ((identity & 0xFF0000) >> 16);
   399  }
   400  
   401  static inline void __inline__ set_identity_cb(struct __sk_buff *skb, __u32 identity)
   402  {
   403  	skb->cb[1] = identity;
   404  }
   405  
   406  /* We cap key index at 4 bits because mark value is used to map skb to key */
   407  #define MAX_KEY_INDEX 15
   408  
   409  /* encrypt_key is the index into the encrypt map */
   410  struct encrypt_key {
   411  	__u32 ctx;
   412  } __attribute__((packed));
   413  
   414  /* encrypt_config is the current encryption context on the node */
   415  struct encrypt_config {
   416  	__u8 encrypt_key;
   417  } __attribute__((packed));
   418  
   419  /**
   420   * or_encrypt_key - mask and shift key into encryption format
   421   */
   422  static inline __u32 __inline__ or_encrypt_key(__u8 key)
   423  {
   424  	return (((__u32)key & 0x0F) << 12) | MARK_MAGIC_ENCRYPT;
   425  }
   426  
   427  /**
   428   * set_encrypt_key - pushes 8 bit key and encryption marker into skb mark value.
   429   */
   430  static inline void __inline__ set_encrypt_key(struct __sk_buff *skb, __u8 key)
   431  {
   432  	skb->mark = or_encrypt_key(key);
   433  }
   434  
   435  static inline void __inline__ set_encrypt_key_cb(struct __sk_buff *skb, __u8 key)
   436  {
   437  	skb->cb[0] = or_encrypt_key(key);
   438  }
   439  
   440  /*
   441   * skb->tc_index uses
   442   *
   443   * cilium_host @egress
   444   *   bpf_host -> bpf_lxc
   445   */
   446  #define TC_INDEX_F_SKIP_INGRESS_PROXY	1
   447  #define TC_INDEX_F_SKIP_EGRESS_PROXY	2
   448  #define TC_INDEX_F_SKIP_NODEPORT	4
   449  #define TC_INDEX_F_SKIP_RECIRCULATION	8
   450  
   451  /* skb->cb[] usage: */
   452  enum {
   453  	CB_SRC_LABEL,
   454  	CB_IFINDEX,
   455  	CB_POLICY,
   456  	CB_NAT46_STATE,
   457  #define CB_NAT		CB_NAT46_STATE	/* Alias, non-overlapping */
   458  	CB_CT_STATE,
   459  };
   460  
   461  /* State values for NAT46 */
   462  enum {
   463  	NAT46_CLEAR,
   464  	NAT64,
   465  	NAT46,
   466  };
   467  
   468  #define TUPLE_F_OUT		0	/* Outgoing flow */
   469  #define TUPLE_F_IN		1	/* Incoming flow */
   470  #define TUPLE_F_RELATED		2	/* Flow represents related packets */
   471  #define TUPLE_F_SERVICE		4	/* Flow represents service/slave map */
   472  
   473  #define CT_EGRESS 0
   474  #define CT_INGRESS 1
   475  #define CT_SERVICE 2
   476  
   477  #ifdef ENABLE_NODEPORT
   478  #define NAT_MIN_EGRESS		NODEPORT_PORT_MIN
   479  #else
   480  #define NAT_MIN_EGRESS		EPHERMERAL_MIN
   481  #endif
   482  
   483  enum {
   484  	CT_NEW,
   485  	CT_ESTABLISHED,
   486  	CT_REPLY,
   487  	CT_RELATED,
   488  };
   489  
   490  struct ipv6_ct_tuple {
   491  	/* Address fields are reversed, i.e.,
   492  	 * these field names are correct for reply direction traffic. */
   493  	union v6addr	daddr;
   494  	union v6addr	saddr;
   495  	/* The order of dport+sport must not be changed!
   496  	 * These field names are correct for original direction traffic. */
   497  	__be16		dport;
   498  	__be16		sport;
   499  	__u8		nexthdr;
   500  	__u8		flags;
   501  } __attribute__((packed));
   502  
   503  struct ipv4_ct_tuple {
   504  	/* Address fields are reversed, i.e.,
   505  	 * these field names are correct for reply direction traffic. */
   506  	__be32		daddr;
   507  	__be32		saddr;
   508  	/* The order of dport+sport must not be changed!
   509  	 * These field names are correct for original direction traffic. */
   510  	__be16		dport;
   511  	__be16		sport;
   512  	__u8		nexthdr;
   513  	__u8		flags;
   514  } __attribute__((packed));
   515  
   516  struct ct_entry {
   517  	__u64 rx_packets;
   518  	__u64 rx_bytes;
   519  	__u64 tx_packets;
   520  	__u64 tx_bytes;
   521  	__u32 lifetime;
   522  	__u16 rx_closing:1,
   523  	      tx_closing:1,
   524  	      nat46:1,
   525  	      lb_loopback:1,
   526  	      seen_non_syn:1,
   527  	      node_port:1,
   528  	      proxy_redirect:1, // Connection is redirected to a proxy
   529  	      reserved:9;
   530  	__u16 rev_nat_index;
   531  	__u16 backend_id; /* Populated only in v1.6+ BPF code. */
   532  
   533  	/* *x_flags_seen represents the OR of all TCP flags seen for the
   534  	 * transmit/receive direction of this entry. */
   535  	__u8  tx_flags_seen;
   536  	__u8  rx_flags_seen;
   537  
   538  	__u32 src_sec_id; /* Used from userspace proxies, do not change offset! */
   539  
   540  	/* last_*x_report is a timestamp of the last time a monitor
   541  	 * notification was sent for the transmit/receive direction. */
   542  	__u32 last_tx_report;
   543  	__u32 last_rx_report;
   544  };
   545  
   546  struct lb6_key {
   547          union v6addr address;
   548          __be16 dport;		/* L4 port filter, if unset, all ports apply */
   549  	__u16 slave;		/* Backend iterator, 0 indicates the master service */
   550  } __attribute__((packed));
   551  
   552  struct lb6_service {
   553  	union v6addr target;
   554  	__be16 port;
   555  	__u16 count;
   556  	__u16 rev_nat_index;
   557  	__u16 weight;
   558  } __attribute__((packed));
   559  
   560  struct lb6_key_v2 {
   561  	union v6addr address;	/* Service virtual IPv6 address */
   562  	__be16 dport;		/* L4 port filter, if unset, all ports apply */
   563  	__u16 slave;		/* Backend iterator, 0 indicates the master service */
   564  	__u8 proto;		/* L4 protocol, currently not used (set to 0) */
   565  	__u8 pad[3];
   566  };
   567  
   568  /* See lb4_service_v2 comments */
   569  struct lb6_service_v2 {
   570  	__u32 backend_id;
   571  	__u16 count;
   572  	__u16 rev_nat_index;
   573  	__u16 weight;
   574  	__u16 pad;
   575  };
   576  
   577  /* See lb4_backend comments */
   578  struct lb6_backend {
   579  	union v6addr address;
   580  	__be16 port;
   581  	__u8 proto;
   582  	__u8 pad;
   583  };
   584  
   585  struct lb6_reverse_nat {
   586  	union v6addr address;
   587  	__be16 port;
   588  } __attribute__((packed));
   589  
   590  struct lb4_key_v2 {
   591  	__be32 address;		/* Service virtual IPv4 address */
   592  	__be16 dport;		/* L4 port filter, if unset, all ports apply */
   593  	__u16 slave;		/* Backend iterator, 0 indicates the master service */
   594  	__u8 proto;		/* L4 protocol, currently not used (set to 0) */
   595  	__u8 pad[3];
   596  };
   597  
   598  struct lb4_service_v2 {
   599  	__u32 backend_id;	/* Backend ID in lb4_backends */
   600  	/* For the master service, count denotes number of service endpoints.
   601  	 * For service endpoints, zero. (Previously, legacy service ID)
   602  	 */
   603  	__u16 count;
   604  	__u16 rev_nat_index;	/* Reverse NAT ID in lb4_reverse_nat */
   605  	__u16 weight;		/* Currently not used */
   606  	__u16 pad;
   607  };
   608  
   609  struct lb4_backend {
   610  	__be32 address;		/* Service endpoint IPv4 address */
   611  	__be16 port;		/* L4 port filter */
   612  	__u8 proto;		/* L4 protocol, currently not used (set to 0) */
   613  	__u8 pad;
   614  };
   615  
   616  struct lb4_reverse_nat {
   617  	__be32 address;
   618  	__be16 port;
   619  } __attribute__((packed));
   620  
   621  // LB_RR_MAX_SEQ generated by daemon in node_config.h
   622  struct lb_sequence {
   623  	__u16 count;
   624  	__u16 idx[LB_RR_MAX_SEQ];
   625  };
   626  
   627  struct ct_state {
   628  	__u16 rev_nat_index;
   629  	__u16 loopback:1,
   630  	      node_port:1,
   631  	      proxy_redirect:1, // Connection is redirected to a proxy
   632  	      reserved:13;
   633  	__be16 orig_dport;
   634  	__be32 addr;
   635  	__be32 svc_addr;
   636  	__u32 src_sec_id;
   637  	__u16 unused;
   638  	__u16 backend_id;	/* Backend ID in lb4_backends */
   639  };
   640  
   641  /* ep_config corresponds to the EndpointConfig object in pkg/maps/configmap. */
   642  struct ep_config {
   643  	__u32 flags; /* enum ep_cfg_flag */
   644  	__be32 ipv4Addr;
   645  	union v6addr ipv6Addr;
   646  	mac_t node_mac;
   647  	__u16 lxc_id;
   648  	__be16 lxc_id_nb;
   649  	__u32 identity;
   650  	__be32 identity_nb;
   651  	__u32 pad;
   652  } __attribute__((packed));
   653  
   654  /**
   655   * relax_verifier is a dummy helper call to introduce a pruning checkpoing to
   656   * help relax the verifier to avoid reaching complexity limits on older
   657   * kernels.
   658   */
   659  static inline void relax_verifier(void)
   660  {
   661  	int foo = 0;
   662  	csum_diff(0, 0, &foo, 1, 0);
   663  }
   664  
   665  static inline int redirect_self(struct __sk_buff *skb)
   666  {
   667  	/* Looping back the packet into the originating netns. In
   668  	 * case of veth, it's xmit'ing into the hosts' veth device
   669  	 * such that we end up on ingress in the peer. For ipvlan
   670  	 * slave it's redirect to ingress as we are attached on the
   671  	 * slave in netns already.
   672  	 */
   673  #ifdef ENABLE_HOST_REDIRECT
   674  	return redirect(skb->ifindex, 0);
   675  #else
   676  	return redirect(skb->ifindex, BPF_F_INGRESS);
   677  #endif
   678  }
   679  
   680  static inline int redirect_peer(int ifindex, uint32_t flags)
   681  {
   682  	/* If our datapath has proper redirect support, we make use
   683  	 * of it here, otherwise we terminate tc processing by letting
   684  	 * stack handle forwarding e.g. in ipvlan case.
   685  	 */
   686  #ifdef ENABLE_HOST_REDIRECT
   687  	return redirect(ifindex, flags);
   688  #else
   689  	return TC_ACT_OK;
   690  #endif /* ENABLE_HOST_REDIRECT */
   691  }
   692  
   693  #endif