github.com/fafucoder/cilium@v1.6.11/bpf/lib/common.h (about) 1 /* 2 * Copyright (C) 2016-2019 Authors of Cilium 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #ifndef __LIB_COMMON_H_ 19 #define __LIB_COMMON_H_ 20 21 #include <bpf_features.h> 22 #include <bpf/api.h> 23 #include <linux/if_ether.h> 24 #include <linux/ipv6.h> 25 #include <linux/in.h> 26 #include <stdint.h> 27 #include <stdbool.h> 28 29 // FIXME: GH-3239 LRU logic is not handling timeouts gracefully enough 30 // #ifndef HAVE_LRU_MAP_TYPE 31 // #define NEEDS_TIMEOUT 1 32 // #endif 33 #define NEEDS_TIMEOUT 1 34 35 #ifndef AF_INET 36 #define AF_INET 2 37 #endif 38 39 #ifndef AF_INET6 40 #define AF_INET6 10 41 #endif 42 43 #ifndef EVENT_SOURCE 44 #define EVENT_SOURCE 0 45 #endif 46 47 #define PORT_UDP_VXLAN 4789 48 #define PORT_UDP_GENEVE 6081 49 #define PORT_UDP_VXLAN_LINUX 8472 50 51 #ifdef PREALLOCATE_MAPS 52 #define CONDITIONAL_PREALLOC 0 53 #else 54 #define CONDITIONAL_PREALLOC BPF_F_NO_PREALLOC 55 #endif 56 57 /* TODO: ipsec v6 tunnel datapath still needs separate fixing */ 58 #ifndef ENABLE_IPSEC 59 # ifdef ENABLE_IPV6 60 # define ENABLE_ENCAP_HOST_REMAP 1 61 # endif 62 #endif 63 64 #define __inline__ __attribute__((always_inline)) 65 #ifndef __always_inline 66 #define __always_inline inline __inline__ 67 #endif 68 69 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) 70 71 /* These are shared with test/bpf/check-complexity.sh, when modifying any of 72 * the below, that script should also be updated. */ 73 #define CILIUM_CALL_DROP_NOTIFY 1 74 #define CILIUM_CALL_ERROR_NOTIFY 2 75 #define CILIUM_CALL_SEND_ICMP6_ECHO_REPLY 3 76 #define CILIUM_CALL_HANDLE_ICMP6_NS 4 77 #define CILIUM_CALL_SEND_ICMP6_TIME_EXCEEDED 5 78 #define CILIUM_CALL_ARP 6 79 #define CILIUM_CALL_IPV4_FROM_LXC 7 80 #define CILIUM_CALL_NAT64 8 81 #define CILIUM_CALL_NAT46 9 82 #define CILIUM_CALL_IPV6_FROM_LXC 10 83 #define CILIUM_CALL_IPV4_TO_LXC_POLICY_ONLY 11 84 #define CILIUM_CALL_IPV6_TO_LXC_POLICY_ONLY 12 85 #define CILIUM_CALL_IPV4_TO_ENDPOINT 13 86 #define CILIUM_CALL_IPV6_TO_ENDPOINT 14 87 #define CILIUM_CALL_IPV4_NODEPORT_NAT 15 88 #define CILIUM_CALL_IPV6_NODEPORT_NAT 16 89 #define CILIUM_CALL_IPV4_NODEPORT_REVNAT 17 90 #define CILIUM_CALL_IPV6_NODEPORT_REVNAT 18 91 #define CILIUM_CALL_ENCAP_NODEPORT_NAT 19 92 #define CILIUM_CALL_SIZE 20 93 94 typedef __u64 mac_t; 95 96 union v6addr { 97 struct { 98 __u32 p1; 99 __u32 p2; 100 __u32 p3; 101 __u32 p4; 102 }; 103 struct { 104 __u64 d1; 105 __u64 d2; 106 }; 107 __u8 addr[16]; 108 } __attribute__((packed)); 109 110 static inline bool validate_ethertype(struct __sk_buff *skb, __u16 *proto) 111 { 112 void *data = (void *) (long) skb->data; 113 void *data_end = (void *) (long) skb->data_end; 114 115 if (data + ETH_HLEN > data_end) 116 return false; 117 118 struct ethhdr *eth = data; 119 *proto = eth->h_proto; 120 121 if (bpf_ntohs(*proto) < ETH_P_802_3_MIN) 122 return false; // non-Ethernet II unsupported 123 124 return true; 125 } 126 127 static inline bool __revalidate_data(struct __sk_buff *skb, void **data_, 128 void **data_end_, void **l3, 129 const size_t l3_len, const bool pull) 130 { 131 const size_t tot_len = ETH_HLEN + l3_len; 132 void *data_end; 133 void *data; 134 135 /* Verifier workaround, do this unconditionally: invalid size of register spill. */ 136 if (pull) 137 skb_pull_data(skb, tot_len); 138 data_end = (void *)(long)skb->data_end; 139 data = (void *)(long)skb->data; 140 if (data + tot_len > data_end) 141 return false; 142 143 /* Verifier workaround: pointer arithmetic on pkt_end prohibited. */ 144 *data_ = data; 145 *data_end_ = data_end; 146 147 *l3 = data + ETH_HLEN; 148 return true; 149 } 150 151 /* revalidate_data_first() initializes the provided pointers from the skb and 152 * ensures that the data is pulled in for access. Should be used the first 153 * time that the skb data is accessed, subsequent calls can be made to 154 * revalidate_data() which is cheaper. 155 * Returns true if 'skb' is long enough for an IP header of the provided type, 156 * false otherwise. */ 157 #define revalidate_data_first(skb, data, data_end, ip) \ 158 __revalidate_data(skb, data, data_end, (void **)ip, sizeof(**ip), true) 159 160 /* revalidate_data() initializes the provided pointers from the skb. 161 * Returns true if 'skb' is long enough for an IP header of the provided type, 162 * false otherwise. */ 163 #define revalidate_data(skb, data, data_end, ip) \ 164 __revalidate_data(skb, data, data_end, (void **)ip, sizeof(**ip), false) 165 166 /* Macros for working with L3 cilium defined IPV6 addresses */ 167 #define BPF_V6(dst, ...) BPF_V6_1(dst, fetch_ipv6(__VA_ARGS__)) 168 #define BPF_V6_1(dst, ...) BPF_V6_4(dst, __VA_ARGS__) 169 #define BPF_V6_4(dst, a1, a2, a3, a4) \ 170 ({ \ 171 dst.p1 = a1; \ 172 dst.p2 = a2; \ 173 dst.p3 = a3; \ 174 dst.p4 = a4; \ 175 }) 176 177 #define ENDPOINT_KEY_IPV4 1 178 #define ENDPOINT_KEY_IPV6 2 179 180 /* Structure representing an IPv4 or IPv6 address, being used for: 181 * - key as endpoints map 182 * - key for tunnel endpoint map 183 * - value for tunnel endpoint map 184 */ 185 struct endpoint_key { 186 union { 187 struct { 188 __u32 ip4; 189 __u32 pad1; 190 __u32 pad2; 191 __u32 pad3; 192 }; 193 union v6addr ip6; 194 }; 195 __u8 family; 196 __u8 key; 197 __u16 pad5; 198 } __attribute__((packed)); 199 200 #define ENDPOINT_F_HOST 1 /* Special endpoint representing local host */ 201 202 /* Value of endpoint map */ 203 struct endpoint_info { 204 __u32 ifindex; 205 __u16 unused; /* used to be sec_label, no longer used */ 206 __u16 lxc_id; 207 __u32 flags; 208 mac_t mac; 209 mac_t node_mac; 210 __u32 pad[4]; 211 }; 212 213 struct remote_endpoint_info { 214 __u32 sec_label; 215 __u32 tunnel_endpoint; 216 __u8 key; 217 }; 218 219 struct policy_key { 220 __u32 sec_label; 221 __u16 dport; 222 __u8 protocol; 223 __u8 egress:1, 224 pad:7; 225 }; 226 227 struct policy_entry { 228 __be16 proxy_port; 229 __u16 pad[3]; 230 __u64 packets; 231 __u64 bytes; 232 }; 233 234 struct metrics_key { 235 __u8 reason; //0: forwarded, >0 dropped 236 __u8 dir:2, //1: ingress 2: egress 237 pad:6; 238 __u16 reserved[3]; // reserved for future extension 239 }; 240 241 242 struct metrics_value { 243 __u64 count; 244 __u64 bytes; 245 }; 246 247 248 enum { 249 CILIUM_NOTIFY_UNSPEC, 250 CILIUM_NOTIFY_DROP, 251 CILIUM_NOTIFY_DBG_MSG, 252 CILIUM_NOTIFY_DBG_CAPTURE, 253 CILIUM_NOTIFY_TRACE, 254 }; 255 256 #define NOTIFY_COMMON_HDR \ 257 __u8 type; \ 258 __u8 subtype; \ 259 __u16 source; \ 260 __u32 hash; 261 262 #ifndef TRACE_PAYLOAD_LEN 263 #define TRACE_PAYLOAD_LEN 128ULL 264 #endif 265 266 #ifndef BPF_F_PSEUDO_HDR 267 # define BPF_F_PSEUDO_HDR (1ULL << 4) 268 #endif 269 270 #define IS_ERR(x) (unlikely((x < 0) || (x == TC_ACT_SHOT))) 271 272 /* Cilium IPSec code to indicate packet needs to be handled 273 * by IPSec stack. Maps to TC_ACT_OK. 274 */ 275 #define IPSEC_ENDPOINT TC_ACT_OK 276 277 /* Return value to indicate that proxy redirection is required */ 278 #define POLICY_ACT_PROXY_REDIRECT (1 << 16) 279 280 /* Cilium error codes, must NOT overlap with TC return codes. 281 * These also serve as drop reasons for metrics, 282 * where reason > 0 corresponds to -(DROP_*) 283 */ 284 #define DROP_INVALID_SMAC -130 /* unused */ 285 #define DROP_INVALID_DMAC -131 /* unused */ 286 #define DROP_INVALID_SIP -132 287 #define DROP_POLICY -133 288 #define DROP_INVALID -134 289 #define DROP_CT_INVALID_HDR -135 290 #define DROP_CT_MISSING_ACK -136 /* unused */ 291 #define DROP_CT_UNKNOWN_PROTO -137 292 #define DROP_CT_CANT_CREATE_ -138 /* unused */ 293 #define DROP_UNKNOWN_L3 -139 294 #define DROP_MISSED_TAIL_CALL -140 295 #define DROP_WRITE_ERROR -141 296 #define DROP_UNKNOWN_L4 -142 297 #define DROP_UNKNOWN_ICMP_CODE -143 298 #define DROP_UNKNOWN_ICMP_TYPE -144 299 #define DROP_UNKNOWN_ICMP6_CODE -145 300 #define DROP_UNKNOWN_ICMP6_TYPE -146 301 #define DROP_NO_TUNNEL_KEY -147 302 #define DROP_NO_TUNNEL_OPT_ -148 /* unused */ 303 #define DROP_INVALID_GENEVE_ -149 /* unused */ 304 #define DROP_UNKNOWN_TARGET -150 305 #define DROP_UNROUTABLE -151 306 #define DROP_NO_LXC -152 /* unused */ 307 #define DROP_CSUM_L3 -153 308 #define DROP_CSUM_L4 -154 309 #define DROP_CT_CREATE_FAILED -155 310 #define DROP_INVALID_EXTHDR -156 311 #define DROP_FRAG_NOSUPPORT -157 312 #define DROP_NO_SERVICE -158 313 #define DROP_POLICY_L4 -159 /* unused */ 314 #define DROP_NO_TUNNEL_ENDPOINT -160 315 #define DROP_PROXYMAP_CREATE_FAILED_ -161 /* unused */ 316 #define DROP_POLICY_CIDR -162 /* unused */ 317 #define DROP_UNKNOWN_CT -163 318 #define DROP_HOST_UNREACHABLE -164 319 #define DROP_NO_CONFIG -165 320 #define DROP_UNSUPPORTED_L2 -166 321 #define DROP_NAT_NO_MAPPING -167 322 #define DROP_NAT_UNSUPP_PROTO -168 323 #define DROP_NO_FIB -169 324 #define DROP_ENCAP_PROHIBITED -170 325 #define DROP_INVALID_IDENTITY -171 326 #define DROP_UNKNOWN_SENDER -172 327 #define DROP_NAT_NOT_NEEDED -173 /* Mapped as drop code, though drop not necessary. */ 328 329 #define NAT_PUNT_TO_STACK DROP_NAT_NOT_NEEDED 330 331 /* Cilium metrics reasons for forwarding packets and other stats. 332 * If reason is larger than below then this is a drop reason and 333 * value corresponds to -(DROP_*), see above. 334 */ 335 #define REASON_FORWARDED 0 336 #define REASON_PLAINTEXT 3 337 #define REASON_DECRYPT 4 338 #define REASON_LB_NO_SLAVE 5 339 #define REASON_LB_NO_BACKEND 6 340 #define REASON_LB_REVNAT_UPDATE 7 341 #define REASON_LB_REVNAT_STALE 8 342 343 /* Cilium metrics direction for dropping/forwarding packet */ 344 #define METRIC_INGRESS 1 345 #define METRIC_EGRESS 2 346 347 /* Magic skb->mark identifies packets origination and encryption status. 348 * 349 * The upper 16 bits plus lower 8 bits (e.g. mask 0XFFFF00FF) contain the 350 * packets security identity. The lower/upper halves are swapped to recover 351 * the identity. 352 * 353 * The 4 bits at 0X0F00 provide 354 * - the magic marker values which indicate whether the packet is coming from 355 * an ingress or egress proxy, a local process and its current encryption 356 * status. 357 * 358 * The 4 bits at 0xF000 provide 359 * - the key index to use for encryption when multiple keys are in-flight. 360 * In the IPsec case this becomes the SPI on the wire. 361 */ 362 #define MARK_MAGIC_HOST_MASK 0x0F00 363 #define MARK_MAGIC_PROXY_INGRESS 0x0A00 364 #define MARK_MAGIC_PROXY_EGRESS 0x0B00 365 #define MARK_MAGIC_HOST 0x0C00 366 #define MARK_MAGIC_DECRYPT 0x0D00 367 #define MARK_MAGIC_ENCRYPT 0x0E00 368 #define MARK_MAGIC_IDENTITY 0x0F00 /* mark carries identity */ 369 #define MARK_MAGIC_TO_PROXY 0x0200 370 371 #define MARK_MAGIC_KEY_ID 0xF000 372 #define MARK_MAGIC_KEY_MASK 0xFF00 373 374 /* IPSec cannot be configured with NodePort BPF today, hence non-conflicting 375 * overlap with MARK_MAGIC_KEY_ID. 376 */ 377 #define MARK_MAGIC_SNAT_DONE 0x1500 378 379 /** 380 * get_identity - returns source identity from the mark field 381 */ 382 static inline int __inline__ get_identity(struct __sk_buff *skb) 383 { 384 return ((skb->mark & 0xFF) << 16) | skb->mark >> 16; 385 } 386 387 static inline void __inline__ set_encrypt_dip(struct __sk_buff *skb, __u32 ip_endpoint) 388 { 389 skb->cb[4] = ip_endpoint; 390 } 391 392 /** 393 * set_identity - pushes 24 bit identity into skb mark value. 394 */ 395 static inline void __inline__ set_identity(struct __sk_buff *skb, __u32 identity) 396 { 397 skb->mark = skb->mark & MARK_MAGIC_KEY_MASK; 398 skb->mark |= ((identity & 0xFFFF) << 16) | ((identity & 0xFF0000) >> 16); 399 } 400 401 static inline void __inline__ set_identity_cb(struct __sk_buff *skb, __u32 identity) 402 { 403 skb->cb[1] = identity; 404 } 405 406 /* We cap key index at 4 bits because mark value is used to map skb to key */ 407 #define MAX_KEY_INDEX 15 408 409 /* encrypt_key is the index into the encrypt map */ 410 struct encrypt_key { 411 __u32 ctx; 412 } __attribute__((packed)); 413 414 /* encrypt_config is the current encryption context on the node */ 415 struct encrypt_config { 416 __u8 encrypt_key; 417 } __attribute__((packed)); 418 419 /** 420 * or_encrypt_key - mask and shift key into encryption format 421 */ 422 static inline __u32 __inline__ or_encrypt_key(__u8 key) 423 { 424 return (((__u32)key & 0x0F) << 12) | MARK_MAGIC_ENCRYPT; 425 } 426 427 /** 428 * set_encrypt_key - pushes 8 bit key and encryption marker into skb mark value. 429 */ 430 static inline void __inline__ set_encrypt_key(struct __sk_buff *skb, __u8 key) 431 { 432 skb->mark = or_encrypt_key(key); 433 } 434 435 static inline void __inline__ set_encrypt_key_cb(struct __sk_buff *skb, __u8 key) 436 { 437 skb->cb[0] = or_encrypt_key(key); 438 } 439 440 /* 441 * skb->tc_index uses 442 * 443 * cilium_host @egress 444 * bpf_host -> bpf_lxc 445 */ 446 #define TC_INDEX_F_SKIP_INGRESS_PROXY 1 447 #define TC_INDEX_F_SKIP_EGRESS_PROXY 2 448 #define TC_INDEX_F_SKIP_NODEPORT 4 449 #define TC_INDEX_F_SKIP_RECIRCULATION 8 450 451 /* skb->cb[] usage: */ 452 enum { 453 CB_SRC_LABEL, 454 CB_IFINDEX, 455 CB_POLICY, 456 CB_NAT46_STATE, 457 #define CB_NAT CB_NAT46_STATE /* Alias, non-overlapping */ 458 CB_CT_STATE, 459 }; 460 461 /* State values for NAT46 */ 462 enum { 463 NAT46_CLEAR, 464 NAT64, 465 NAT46, 466 }; 467 468 #define TUPLE_F_OUT 0 /* Outgoing flow */ 469 #define TUPLE_F_IN 1 /* Incoming flow */ 470 #define TUPLE_F_RELATED 2 /* Flow represents related packets */ 471 #define TUPLE_F_SERVICE 4 /* Flow represents service/slave map */ 472 473 #define CT_EGRESS 0 474 #define CT_INGRESS 1 475 #define CT_SERVICE 2 476 477 #ifdef ENABLE_NODEPORT 478 #define NAT_MIN_EGRESS NODEPORT_PORT_MIN 479 #else 480 #define NAT_MIN_EGRESS EPHERMERAL_MIN 481 #endif 482 483 enum { 484 CT_NEW, 485 CT_ESTABLISHED, 486 CT_REPLY, 487 CT_RELATED, 488 }; 489 490 struct ipv6_ct_tuple { 491 /* Address fields are reversed, i.e., 492 * these field names are correct for reply direction traffic. */ 493 union v6addr daddr; 494 union v6addr saddr; 495 /* The order of dport+sport must not be changed! 496 * These field names are correct for original direction traffic. */ 497 __be16 dport; 498 __be16 sport; 499 __u8 nexthdr; 500 __u8 flags; 501 } __attribute__((packed)); 502 503 struct ipv4_ct_tuple { 504 /* Address fields are reversed, i.e., 505 * these field names are correct for reply direction traffic. */ 506 __be32 daddr; 507 __be32 saddr; 508 /* The order of dport+sport must not be changed! 509 * These field names are correct for original direction traffic. */ 510 __be16 dport; 511 __be16 sport; 512 __u8 nexthdr; 513 __u8 flags; 514 } __attribute__((packed)); 515 516 struct ct_entry { 517 __u64 rx_packets; 518 __u64 rx_bytes; 519 __u64 tx_packets; 520 __u64 tx_bytes; 521 __u32 lifetime; 522 __u16 rx_closing:1, 523 tx_closing:1, 524 nat46:1, 525 lb_loopback:1, 526 seen_non_syn:1, 527 node_port:1, 528 proxy_redirect:1, // Connection is redirected to a proxy 529 reserved:9; 530 __u16 rev_nat_index; 531 __u16 backend_id; /* Populated only in v1.6+ BPF code. */ 532 533 /* *x_flags_seen represents the OR of all TCP flags seen for the 534 * transmit/receive direction of this entry. */ 535 __u8 tx_flags_seen; 536 __u8 rx_flags_seen; 537 538 __u32 src_sec_id; /* Used from userspace proxies, do not change offset! */ 539 540 /* last_*x_report is a timestamp of the last time a monitor 541 * notification was sent for the transmit/receive direction. */ 542 __u32 last_tx_report; 543 __u32 last_rx_report; 544 }; 545 546 struct lb6_key { 547 union v6addr address; 548 __be16 dport; /* L4 port filter, if unset, all ports apply */ 549 __u16 slave; /* Backend iterator, 0 indicates the master service */ 550 } __attribute__((packed)); 551 552 struct lb6_service { 553 union v6addr target; 554 __be16 port; 555 __u16 count; 556 __u16 rev_nat_index; 557 __u16 weight; 558 } __attribute__((packed)); 559 560 struct lb6_key_v2 { 561 union v6addr address; /* Service virtual IPv6 address */ 562 __be16 dport; /* L4 port filter, if unset, all ports apply */ 563 __u16 slave; /* Backend iterator, 0 indicates the master service */ 564 __u8 proto; /* L4 protocol, currently not used (set to 0) */ 565 __u8 pad[3]; 566 }; 567 568 /* See lb4_service_v2 comments */ 569 struct lb6_service_v2 { 570 __u32 backend_id; 571 __u16 count; 572 __u16 rev_nat_index; 573 __u16 weight; 574 __u16 pad; 575 }; 576 577 /* See lb4_backend comments */ 578 struct lb6_backend { 579 union v6addr address; 580 __be16 port; 581 __u8 proto; 582 __u8 pad; 583 }; 584 585 struct lb6_reverse_nat { 586 union v6addr address; 587 __be16 port; 588 } __attribute__((packed)); 589 590 struct lb4_key_v2 { 591 __be32 address; /* Service virtual IPv4 address */ 592 __be16 dport; /* L4 port filter, if unset, all ports apply */ 593 __u16 slave; /* Backend iterator, 0 indicates the master service */ 594 __u8 proto; /* L4 protocol, currently not used (set to 0) */ 595 __u8 pad[3]; 596 }; 597 598 struct lb4_service_v2 { 599 __u32 backend_id; /* Backend ID in lb4_backends */ 600 /* For the master service, count denotes number of service endpoints. 601 * For service endpoints, zero. (Previously, legacy service ID) 602 */ 603 __u16 count; 604 __u16 rev_nat_index; /* Reverse NAT ID in lb4_reverse_nat */ 605 __u16 weight; /* Currently not used */ 606 __u16 pad; 607 }; 608 609 struct lb4_backend { 610 __be32 address; /* Service endpoint IPv4 address */ 611 __be16 port; /* L4 port filter */ 612 __u8 proto; /* L4 protocol, currently not used (set to 0) */ 613 __u8 pad; 614 }; 615 616 struct lb4_reverse_nat { 617 __be32 address; 618 __be16 port; 619 } __attribute__((packed)); 620 621 // LB_RR_MAX_SEQ generated by daemon in node_config.h 622 struct lb_sequence { 623 __u16 count; 624 __u16 idx[LB_RR_MAX_SEQ]; 625 }; 626 627 struct ct_state { 628 __u16 rev_nat_index; 629 __u16 loopback:1, 630 node_port:1, 631 proxy_redirect:1, // Connection is redirected to a proxy 632 reserved:13; 633 __be16 orig_dport; 634 __be32 addr; 635 __be32 svc_addr; 636 __u32 src_sec_id; 637 __u16 unused; 638 __u16 backend_id; /* Backend ID in lb4_backends */ 639 }; 640 641 /* ep_config corresponds to the EndpointConfig object in pkg/maps/configmap. */ 642 struct ep_config { 643 __u32 flags; /* enum ep_cfg_flag */ 644 __be32 ipv4Addr; 645 union v6addr ipv6Addr; 646 mac_t node_mac; 647 __u16 lxc_id; 648 __be16 lxc_id_nb; 649 __u32 identity; 650 __be32 identity_nb; 651 __u32 pad; 652 } __attribute__((packed)); 653 654 /** 655 * relax_verifier is a dummy helper call to introduce a pruning checkpoing to 656 * help relax the verifier to avoid reaching complexity limits on older 657 * kernels. 658 */ 659 static inline void relax_verifier(void) 660 { 661 int foo = 0; 662 csum_diff(0, 0, &foo, 1, 0); 663 } 664 665 static inline int redirect_self(struct __sk_buff *skb) 666 { 667 /* Looping back the packet into the originating netns. In 668 * case of veth, it's xmit'ing into the hosts' veth device 669 * such that we end up on ingress in the peer. For ipvlan 670 * slave it's redirect to ingress as we are attached on the 671 * slave in netns already. 672 */ 673 #ifdef ENABLE_HOST_REDIRECT 674 return redirect(skb->ifindex, 0); 675 #else 676 return redirect(skb->ifindex, BPF_F_INGRESS); 677 #endif 678 } 679 680 static inline int redirect_peer(int ifindex, uint32_t flags) 681 { 682 /* If our datapath has proper redirect support, we make use 683 * of it here, otherwise we terminate tc processing by letting 684 * stack handle forwarding e.g. in ipvlan case. 685 */ 686 #ifdef ENABLE_HOST_REDIRECT 687 return redirect(ifindex, flags); 688 #else 689 return TC_ACT_OK; 690 #endif /* ENABLE_HOST_REDIRECT */ 691 } 692 693 #endif