github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/drivers/overlay/bpf_linux_test.go (about)

     1  package overlay
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"errors"
     7  	"fmt"
     8  	"net"
     9  	"net/netip"
    10  	"testing"
    11  	"time"
    12  
    13  	"golang.org/x/net/bpf"
    14  	"golang.org/x/net/ipv4"
    15  	"golang.org/x/sys/unix"
    16  )
    17  
    18  func TestVNIMatchBPF(t *testing.T) {
    19  	// The BPF filter program under test uses Linux extensions which are not
    20  	// emulated by any user-space BPF interpreters. It is also classic BPF,
    21  	// which cannot be tested in-kernel using the bpf(BPF_PROG_RUN) syscall.
    22  	// The best we can do without actually programming it into an iptables
    23  	// rule and end-to-end testing it is to attach it as a socket filter to
    24  	// a raw socket and test which loopback packets make it through.
    25  	//
    26  	// Modern kernels transpile cBPF programs into eBPF for execution, so a
    27  	// possible future direction would be to extract the transpiler and
    28  	// convert the program under test to eBPF so it could be loaded and run
    29  	// using the bpf(2) syscall.
    30  	// https://elixir.bootlin.com/linux/v6.2/source/net/core/filter.c#L559
    31  	// Though the effort would be better spent on adding nftables support to
    32  	// libnetwork so this whole BPF program could be replaced with a native
    33  	// nftables '@th' match expression.
    34  	//
    35  	// The filter could be manually e2e-tested for both IPv4 and IPv6 by
    36  	// programming ip[6]tables rules which log matching packets and sending
    37  	// test packets loopback using netcat. All the necessary information
    38  	// (bytecode and an acceptable test vector) is logged by this test.
    39  	//
    40  	//     $ sudo ip6tables -A INPUT -p udp -s ::1 -d ::1 -m bpf \
    41  	//         --bytecode "${bpf_program_under_test}" \
    42  	//         -j LOG --log-prefix '[IPv6 VNI match]:'
    43  	//     $ <<<"${udp_payload_hexdump}" xxd -r -p | nc -u -6 localhost 30000
    44  	//     $ sudo dmesg
    45  
    46  	loopback := net.IPv4(127, 0, 0, 1)
    47  
    48  	// Reserve an ephemeral UDP port for loopback testing.
    49  	// Binding to a TUN device would be more hermetic, but is much more effort to set up.
    50  	reservation, err := net.ListenUDP("udp", &net.UDPAddr{IP: loopback, Port: 0})
    51  	if err != nil {
    52  		t.Fatal(err)
    53  	}
    54  	defer reservation.Close()
    55  	daddr := reservation.LocalAddr().(*net.UDPAddr).AddrPort()
    56  
    57  	sender, err := net.DialUDP("udp", nil, reservation.LocalAddr().(*net.UDPAddr))
    58  	if err != nil {
    59  		t.Fatal(err)
    60  	}
    61  	defer sender.Close()
    62  	saddr := sender.LocalAddr().(*net.UDPAddr).AddrPort()
    63  
    64  	// There doesn't seem to be a way to receive the entire Layer-3 IPv6
    65  	// packet including the fixed IP header using the portable raw sockets
    66  	// API. That can only be done from an AF_PACKET socket, and it is
    67  	// unclear whether 'ld poff' would behave the same in a BPF program
    68  	// attached to such a socket as in an xt_bpf match.
    69  	c, err := net.ListenIP("ip4:udp", &net.IPAddr{IP: loopback})
    70  	if err != nil {
    71  		if errors.Is(err, unix.EPERM) {
    72  			t.Skip("test requires CAP_NET_RAW")
    73  		}
    74  		t.Fatal(err)
    75  	}
    76  	defer c.Close()
    77  
    78  	pc := ipv4.NewPacketConn(c)
    79  
    80  	testvectors := []uint32{
    81  		0,
    82  		1,
    83  		0x08,
    84  		42,
    85  		0x80,
    86  		0xfe,
    87  		0xff,
    88  		0x100,
    89  		0xfff,  // 4095
    90  		0x1000, // 4096
    91  		0x1001,
    92  		0x10000,
    93  		0xfffffe,
    94  		0xffffff, // Max VNI
    95  	}
    96  	for _, vni := range []uint32{1, 42, 0x100, 0x1000, 0xfffffe, 0xffffff} {
    97  		t.Run(fmt.Sprintf("vni=%d", vni), func(t *testing.T) {
    98  			setBPF(t, pc, vniMatchBPF(vni))
    99  
   100  			for _, v := range testvectors {
   101  				pkt := appendVXLANHeader(nil, v)
   102  				pkt = append(pkt, []byte{0xde, 0xad, 0xbe, 0xef}...)
   103  				if _, err := sender.Write(pkt); err != nil {
   104  					t.Fatal(err)
   105  				}
   106  
   107  				rpkt, ok := readUDPPacketFromRawSocket(t, pc, saddr, daddr)
   108  				// Sanity check: the only packets readUDPPacketFromRawSocket
   109  				// should return are ones we sent.
   110  				if ok && !bytes.Equal(pkt, rpkt) {
   111  					t.Fatalf("received unexpected packet: % x", rpkt)
   112  				}
   113  				if ok != (v == vni) {
   114  					t.Errorf("unexpected packet tagged with vni=%d (got %v, want %v)", v, ok, v == vni)
   115  				}
   116  			}
   117  		})
   118  	}
   119  }
   120  
   121  func appendVXLANHeader(b []byte, vni uint32) []byte {
   122  	// https://tools.ietf.org/html/rfc7348#section-5
   123  	b = append(b, []byte{0x08, 0x00, 0x00, 0x00}...)
   124  	return binary.BigEndian.AppendUint32(b, vni<<8)
   125  }
   126  
   127  func setBPF(t *testing.T, c *ipv4.PacketConn, fprog []bpf.RawInstruction) {
   128  	// https://natanyellin.com/posts/ebpf-filtering-done-right/
   129  	blockall, _ := bpf.Assemble([]bpf.Instruction{bpf.RetConstant{Val: 0}})
   130  	if err := c.SetBPF(blockall); err != nil {
   131  		t.Fatal(err)
   132  	}
   133  	ms := make([]ipv4.Message, 100)
   134  	for {
   135  		n, err := c.ReadBatch(ms, unix.MSG_DONTWAIT)
   136  		if err != nil {
   137  			if errors.Is(err, unix.EAGAIN) {
   138  				break
   139  			}
   140  			t.Fatal(err)
   141  		}
   142  		if n == 0 {
   143  			break
   144  		}
   145  	}
   146  
   147  	t.Logf("setting socket filter: %v", marshalXTBPF(fprog))
   148  	if err := c.SetBPF(fprog); err != nil {
   149  		t.Fatal(err)
   150  	}
   151  }
   152  
   153  // readUDPPacketFromRawSocket reads raw IP packets from pc until a UDP packet
   154  // which matches the (src, dst) 4-tuple is found or the receive buffer is empty,
   155  // and returns the payload of the UDP packet.
   156  func readUDPPacketFromRawSocket(t *testing.T, pc *ipv4.PacketConn, src, dst netip.AddrPort) ([]byte, bool) {
   157  	t.Helper()
   158  
   159  	ms := []ipv4.Message{
   160  		{Buffers: [][]byte{make([]byte, 1500)}},
   161  	}
   162  
   163  	// Set a time limit to prevent an infinite loop if there is a lot of
   164  	// loopback traffic being captured which prevents the buffer from
   165  	// emptying.
   166  	deadline := time.Now().Add(1 * time.Second)
   167  	for time.Now().Before(deadline) {
   168  		n, err := pc.ReadBatch(ms, unix.MSG_DONTWAIT)
   169  		if err != nil {
   170  			if !errors.Is(err, unix.EAGAIN) {
   171  				t.Fatal(err)
   172  			}
   173  			break
   174  		}
   175  		if n == 0 {
   176  			break
   177  		}
   178  		pkt := ms[0].Buffers[0][:ms[0].N]
   179  		psrc, pdst, payload, ok := parseUDP(pkt)
   180  		// Discard captured packets which belong to other unrelated flows.
   181  		if !ok || psrc != src || pdst != dst {
   182  			t.Logf("discarding packet:\n% x", pkt)
   183  			continue
   184  		}
   185  		t.Logf("received packet (%v -> %v):\n% x", psrc, pdst, payload)
   186  		// While not strictly required, copy payload into a new
   187  		// slice which does not share a backing array with pkt
   188  		// so the IP and UDP headers can be garbage collected.
   189  		return append([]byte(nil), payload...), true
   190  	}
   191  	return nil, false
   192  }
   193  
   194  func parseIPv4(b []byte) (src, dst netip.Addr, protocol byte, payload []byte, ok bool) {
   195  	if len(b) < 20 {
   196  		return netip.Addr{}, netip.Addr{}, 0, nil, false
   197  	}
   198  	hlen := int(b[0]&0x0f) * 4
   199  	if hlen < 20 {
   200  		return netip.Addr{}, netip.Addr{}, 0, nil, false
   201  	}
   202  	src, _ = netip.AddrFromSlice(b[12:16])
   203  	dst, _ = netip.AddrFromSlice(b[16:20])
   204  	protocol = b[9]
   205  	payload = b[hlen:]
   206  	return src, dst, protocol, payload, true
   207  }
   208  
   209  // parseUDP parses the IP and UDP headers from the raw Layer-3 packet data in b.
   210  func parseUDP(b []byte) (src, dst netip.AddrPort, payload []byte, ok bool) {
   211  	srcip, dstip, protocol, ippayload, ok := parseIPv4(b)
   212  	if !ok {
   213  		return netip.AddrPort{}, netip.AddrPort{}, nil, false
   214  	}
   215  	if protocol != 17 {
   216  		return netip.AddrPort{}, netip.AddrPort{}, nil, false
   217  	}
   218  	if len(ippayload) < 8 {
   219  		return netip.AddrPort{}, netip.AddrPort{}, nil, false
   220  	}
   221  	sport := binary.BigEndian.Uint16(ippayload[0:2])
   222  	dport := binary.BigEndian.Uint16(ippayload[2:4])
   223  	src = netip.AddrPortFrom(srcip, sport)
   224  	dst = netip.AddrPortFrom(dstip, dport)
   225  	payload = ippayload[8:]
   226  	return src, dst, payload, true
   227  }