github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/drivers/overlay/bpf_linux_test.go (about) 1 package overlay 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "errors" 7 "fmt" 8 "net" 9 "net/netip" 10 "testing" 11 "time" 12 13 "golang.org/x/net/bpf" 14 "golang.org/x/net/ipv4" 15 "golang.org/x/sys/unix" 16 ) 17 18 func TestVNIMatchBPF(t *testing.T) { 19 // The BPF filter program under test uses Linux extensions which are not 20 // emulated by any user-space BPF interpreters. It is also classic BPF, 21 // which cannot be tested in-kernel using the bpf(BPF_PROG_RUN) syscall. 22 // The best we can do without actually programming it into an iptables 23 // rule and end-to-end testing it is to attach it as a socket filter to 24 // a raw socket and test which loopback packets make it through. 25 // 26 // Modern kernels transpile cBPF programs into eBPF for execution, so a 27 // possible future direction would be to extract the transpiler and 28 // convert the program under test to eBPF so it could be loaded and run 29 // using the bpf(2) syscall. 30 // https://elixir.bootlin.com/linux/v6.2/source/net/core/filter.c#L559 31 // Though the effort would be better spent on adding nftables support to 32 // libnetwork so this whole BPF program could be replaced with a native 33 // nftables '@th' match expression. 34 // 35 // The filter could be manually e2e-tested for both IPv4 and IPv6 by 36 // programming ip[6]tables rules which log matching packets and sending 37 // test packets loopback using netcat. All the necessary information 38 // (bytecode and an acceptable test vector) is logged by this test. 39 // 40 // $ sudo ip6tables -A INPUT -p udp -s ::1 -d ::1 -m bpf \ 41 // --bytecode "${bpf_program_under_test}" \ 42 // -j LOG --log-prefix '[IPv6 VNI match]:' 43 // $ <<<"${udp_payload_hexdump}" xxd -r -p | nc -u -6 localhost 30000 44 // $ sudo dmesg 45 46 loopback := net.IPv4(127, 0, 0, 1) 47 48 // Reserve an ephemeral UDP port for loopback testing. 49 // Binding to a TUN device would be more hermetic, but is much more effort to set up. 50 reservation, err := net.ListenUDP("udp", &net.UDPAddr{IP: loopback, Port: 0}) 51 if err != nil { 52 t.Fatal(err) 53 } 54 defer reservation.Close() 55 daddr := reservation.LocalAddr().(*net.UDPAddr).AddrPort() 56 57 sender, err := net.DialUDP("udp", nil, reservation.LocalAddr().(*net.UDPAddr)) 58 if err != nil { 59 t.Fatal(err) 60 } 61 defer sender.Close() 62 saddr := sender.LocalAddr().(*net.UDPAddr).AddrPort() 63 64 // There doesn't seem to be a way to receive the entire Layer-3 IPv6 65 // packet including the fixed IP header using the portable raw sockets 66 // API. That can only be done from an AF_PACKET socket, and it is 67 // unclear whether 'ld poff' would behave the same in a BPF program 68 // attached to such a socket as in an xt_bpf match. 69 c, err := net.ListenIP("ip4:udp", &net.IPAddr{IP: loopback}) 70 if err != nil { 71 if errors.Is(err, unix.EPERM) { 72 t.Skip("test requires CAP_NET_RAW") 73 } 74 t.Fatal(err) 75 } 76 defer c.Close() 77 78 pc := ipv4.NewPacketConn(c) 79 80 testvectors := []uint32{ 81 0, 82 1, 83 0x08, 84 42, 85 0x80, 86 0xfe, 87 0xff, 88 0x100, 89 0xfff, // 4095 90 0x1000, // 4096 91 0x1001, 92 0x10000, 93 0xfffffe, 94 0xffffff, // Max VNI 95 } 96 for _, vni := range []uint32{1, 42, 0x100, 0x1000, 0xfffffe, 0xffffff} { 97 t.Run(fmt.Sprintf("vni=%d", vni), func(t *testing.T) { 98 setBPF(t, pc, vniMatchBPF(vni)) 99 100 for _, v := range testvectors { 101 pkt := appendVXLANHeader(nil, v) 102 pkt = append(pkt, []byte{0xde, 0xad, 0xbe, 0xef}...) 103 if _, err := sender.Write(pkt); err != nil { 104 t.Fatal(err) 105 } 106 107 rpkt, ok := readUDPPacketFromRawSocket(t, pc, saddr, daddr) 108 // Sanity check: the only packets readUDPPacketFromRawSocket 109 // should return are ones we sent. 110 if ok && !bytes.Equal(pkt, rpkt) { 111 t.Fatalf("received unexpected packet: % x", rpkt) 112 } 113 if ok != (v == vni) { 114 t.Errorf("unexpected packet tagged with vni=%d (got %v, want %v)", v, ok, v == vni) 115 } 116 } 117 }) 118 } 119 } 120 121 func appendVXLANHeader(b []byte, vni uint32) []byte { 122 // https://tools.ietf.org/html/rfc7348#section-5 123 b = append(b, []byte{0x08, 0x00, 0x00, 0x00}...) 124 return binary.BigEndian.AppendUint32(b, vni<<8) 125 } 126 127 func setBPF(t *testing.T, c *ipv4.PacketConn, fprog []bpf.RawInstruction) { 128 // https://natanyellin.com/posts/ebpf-filtering-done-right/ 129 blockall, _ := bpf.Assemble([]bpf.Instruction{bpf.RetConstant{Val: 0}}) 130 if err := c.SetBPF(blockall); err != nil { 131 t.Fatal(err) 132 } 133 ms := make([]ipv4.Message, 100) 134 for { 135 n, err := c.ReadBatch(ms, unix.MSG_DONTWAIT) 136 if err != nil { 137 if errors.Is(err, unix.EAGAIN) { 138 break 139 } 140 t.Fatal(err) 141 } 142 if n == 0 { 143 break 144 } 145 } 146 147 t.Logf("setting socket filter: %v", marshalXTBPF(fprog)) 148 if err := c.SetBPF(fprog); err != nil { 149 t.Fatal(err) 150 } 151 } 152 153 // readUDPPacketFromRawSocket reads raw IP packets from pc until a UDP packet 154 // which matches the (src, dst) 4-tuple is found or the receive buffer is empty, 155 // and returns the payload of the UDP packet. 156 func readUDPPacketFromRawSocket(t *testing.T, pc *ipv4.PacketConn, src, dst netip.AddrPort) ([]byte, bool) { 157 t.Helper() 158 159 ms := []ipv4.Message{ 160 {Buffers: [][]byte{make([]byte, 1500)}}, 161 } 162 163 // Set a time limit to prevent an infinite loop if there is a lot of 164 // loopback traffic being captured which prevents the buffer from 165 // emptying. 166 deadline := time.Now().Add(1 * time.Second) 167 for time.Now().Before(deadline) { 168 n, err := pc.ReadBatch(ms, unix.MSG_DONTWAIT) 169 if err != nil { 170 if !errors.Is(err, unix.EAGAIN) { 171 t.Fatal(err) 172 } 173 break 174 } 175 if n == 0 { 176 break 177 } 178 pkt := ms[0].Buffers[0][:ms[0].N] 179 psrc, pdst, payload, ok := parseUDP(pkt) 180 // Discard captured packets which belong to other unrelated flows. 181 if !ok || psrc != src || pdst != dst { 182 t.Logf("discarding packet:\n% x", pkt) 183 continue 184 } 185 t.Logf("received packet (%v -> %v):\n% x", psrc, pdst, payload) 186 // While not strictly required, copy payload into a new 187 // slice which does not share a backing array with pkt 188 // so the IP and UDP headers can be garbage collected. 189 return append([]byte(nil), payload...), true 190 } 191 return nil, false 192 } 193 194 func parseIPv4(b []byte) (src, dst netip.Addr, protocol byte, payload []byte, ok bool) { 195 if len(b) < 20 { 196 return netip.Addr{}, netip.Addr{}, 0, nil, false 197 } 198 hlen := int(b[0]&0x0f) * 4 199 if hlen < 20 { 200 return netip.Addr{}, netip.Addr{}, 0, nil, false 201 } 202 src, _ = netip.AddrFromSlice(b[12:16]) 203 dst, _ = netip.AddrFromSlice(b[16:20]) 204 protocol = b[9] 205 payload = b[hlen:] 206 return src, dst, protocol, payload, true 207 } 208 209 // parseUDP parses the IP and UDP headers from the raw Layer-3 packet data in b. 210 func parseUDP(b []byte) (src, dst netip.AddrPort, payload []byte, ok bool) { 211 srcip, dstip, protocol, ippayload, ok := parseIPv4(b) 212 if !ok { 213 return netip.AddrPort{}, netip.AddrPort{}, nil, false 214 } 215 if protocol != 17 { 216 return netip.AddrPort{}, netip.AddrPort{}, nil, false 217 } 218 if len(ippayload) < 8 { 219 return netip.AddrPort{}, netip.AddrPort{}, nil, false 220 } 221 sport := binary.BigEndian.Uint16(ippayload[0:2]) 222 dport := binary.BigEndian.Uint16(ippayload[2:4]) 223 src = netip.AddrPortFrom(srcip, sport) 224 dst = netip.AddrPortFrom(dstip, dport) 225 payload = ippayload[8:] 226 return src, dst, payload, true 227 }