gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/benchmarks/tcp/tcp_proxy.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Binary tcp_proxy is a simple TCP proxy. 16 package main 17 18 import ( 19 "encoding/gob" 20 "flag" 21 "fmt" 22 "io" 23 "log" 24 "math/rand" 25 "net" 26 "os" 27 "os/signal" 28 "regexp" 29 "runtime" 30 "runtime/pprof" 31 "runtime/trace" 32 "strconv" 33 "time" 34 35 "golang.org/x/sys/unix" 36 "gvisor.dev/gvisor/pkg/tcpip" 37 "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet" 38 "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" 39 "gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo" 40 "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" 41 "gvisor.dev/gvisor/pkg/tcpip/network/arp" 42 "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" 43 "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" 44 "gvisor.dev/gvisor/pkg/tcpip/stack" 45 "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" 46 "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" 47 "gvisor.dev/gvisor/pkg/tcpip/transport/udp" 48 ) 49 50 var ( 51 port = flag.Int("port", 0, "bind port (all addresses)") 52 forward = flag.String("forward", "", "forwarding target") 53 client = flag.Bool("client", false, "use netstack for listen") 54 server = flag.Bool("server", false, "use netstack for dial") 55 56 // Netstack-specific options. 57 mtu = flag.Int("mtu", 1280, "mtu for network stack") 58 addr = flag.String("addr", "", "address for tap-based netstack") 59 mask = flag.Int("mask", 8, "mask size for address") 60 iface = flag.String("iface", "", "network interface name to bind for netstack") 61 sack = flag.Bool("sack", false, "enable SACK support for netstack") 62 rack = flag.Bool("rack", true, "enable RACK in TCP") 63 moderateRecvBuf = flag.Bool("moderate_recv_buf", true, "enable TCP Receive Buffer Auto-tuning") 64 cubic = flag.Bool("cubic", false, "enable use of CUBIC congestion control for netstack") 65 gso = flag.Int("gso", 0, "GSO maximum size") 66 swgso = flag.Bool("swgso", false, "gVisor-level GSO") 67 gro = flag.Bool("gro", false, "gVisor-level GRO") 68 clientTCPProbeFile = flag.String("client_tcp_probe_file", "", "if specified, installs a tcp probe to dump endpoint state to the specified file.") 69 serverTCPProbeFile = flag.String("server_tcp_probe_file", "", "if specified, installs a tcp probe to dump endpoint state to the specified file.") 70 cpuprofile = flag.String("cpuprofile", "", "write cpu profile to the specified file.") 71 memprofile = flag.String("memprofile", "", "write memory profile to the specified file.") 72 blockprofile = flag.String("blockprofile", "", "write a goroutine blocking profile to the specified file.") 73 mutexprofile = flag.String("mutexprofile", "", "write a mutex profile to the specified file.") 74 traceprofile = flag.String("traceprofile", "", "write a 5s trace of the benchmark to the specified file.") 75 useIpv6 = flag.Bool("ipv6", false, "use ipv6 instead of ipv4.") 76 sniff = flag.Bool("sniff", false, "log sniffed packets") 77 useXDP = flag.Bool("xdp", false, "use AF_XDP as a link enpoint instead of fdbased") 78 ) 79 80 type impl interface { 81 dial(address string) (net.Conn, error) 82 listen(port int) (net.Listener, error) 83 printStats() 84 } 85 86 type netImpl struct{} 87 88 func (netImpl) dial(address string) (net.Conn, error) { 89 return net.Dial("tcp", address) 90 } 91 92 func (netImpl) listen(port int) (net.Listener, error) { 93 return net.Listen("tcp", fmt.Sprintf(":%d", port)) 94 } 95 96 func (netImpl) printStats() { 97 } 98 99 const ( 100 nicID = 1 // Fixed. 101 bufSize = 4 << 20 // 4MB. 102 ) 103 104 type netstackImpl struct { 105 s *stack.Stack 106 addr tcpip.Address 107 mode string 108 } 109 110 func setupNetwork(ifaceName string, numChannels int) (fds []int, err error) { 111 // Get all interfaces in the namespace. 112 ifaces, err := net.Interfaces() 113 if err != nil { 114 return nil, fmt.Errorf("querying interfaces: %v", err) 115 } 116 117 for _, iface := range ifaces { 118 if iface.Name != ifaceName { 119 continue 120 } 121 // Create the socket. 122 const protocol = 0x0300 // htons(ETH_P_ALL) 123 fds := make([]int, numChannels) 124 for i := range fds { 125 fd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, protocol) 126 if err != nil { 127 return nil, fmt.Errorf("unable to create raw socket: %v", err) 128 } 129 130 // Bind to the appropriate device. 131 ll := unix.SockaddrLinklayer{ 132 Protocol: protocol, 133 Ifindex: iface.Index, 134 Pkttype: unix.PACKET_HOST, 135 } 136 if err := unix.Bind(fd, &ll); err != nil { 137 return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err) 138 } 139 140 // RAW Sockets by default have a very small SO_RCVBUF of 256KB, 141 // up it to at least 4MB to reduce packet drops. 142 if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, bufSize); err != nil { 143 return nil, fmt.Errorf("setsockopt(..., SO_RCVBUF, %v,..) = %v", bufSize, err) 144 } 145 146 if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF, bufSize); err != nil { 147 return nil, fmt.Errorf("setsockopt(..., SO_SNDBUF, %v,..) = %v", bufSize, err) 148 } 149 150 if !*swgso && *gso != 0 { 151 if err := unix.SetsockoptInt(fd, unix.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil { 152 return nil, fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err) 153 } 154 } 155 fds[i] = fd 156 } 157 return fds, nil 158 } 159 return nil, fmt.Errorf("failed to find interface: %v", ifaceName) 160 } 161 162 func newNetstackImpl(mode string) (impl, error) { 163 // Parse details. 164 var parsedAddr tcpip.Address 165 if *useIpv6 { 166 parsedAddr = tcpip.AddrFrom16Slice(net.ParseIP(*addr).To16()) 167 } else { 168 parsedAddr = tcpip.AddrFrom4Slice(net.ParseIP(*addr).To4()) 169 } 170 parsedBytes := parsedAddr.AsSlice() 171 var parsedDest tcpip.Address // Filled in below. 172 var parsedMask tcpip.AddressMask // Filled in below. 173 var parsedDest6 tcpip.Address // Filled in below. 174 var parsedMask6 tcpip.AddressMask // Filled in below. 175 switch *mask { 176 case 8: 177 parsedDest = tcpip.AddrFrom4([4]byte{parsedBytes[0], 0, 0, 0}) 178 parsedMask = tcpip.MaskFromBytes([]byte{0xff, 0, 0, 0}) 179 parsedDest6 = tcpip.AddrFrom16Slice(append([]byte{parsedBytes[0]}, make([]byte, 15)...)) 180 parsedMask6 = tcpip.MaskFromBytes(append([]byte{0xff}, make([]byte, 15)...)) 181 case 16: 182 parsedDest = tcpip.AddrFrom4([4]byte{parsedBytes[0], parsedBytes[1], 0, 0}) 183 parsedMask = tcpip.MaskFromBytes([]byte{0xff, 0xff, 0, 0}) 184 parsedDest6 = tcpip.AddrFrom16Slice(append([]byte{parsedBytes[0], parsedBytes[1]}, make([]byte, 14)...)) 185 parsedMask6 = tcpip.MaskFromBytes(append([]byte{0xff, 0xff}, make([]byte, 14)...)) 186 case 24: 187 parsedDest = tcpip.AddrFrom4([4]byte{parsedBytes[0], parsedBytes[1], parsedBytes[2], 0}) 188 parsedMask = tcpip.MaskFromBytes([]byte{0xff, 0xff, 0xff, 0}) 189 parsedDest6 = tcpip.AddrFrom16Slice(append([]byte{parsedBytes[0], parsedBytes[1], parsedBytes[2]}, make([]byte, 13)...)) 190 parsedMask6 = tcpip.MaskFromBytes(append([]byte{0xff, 0xff, 0xff}, make([]byte, 13)...)) 191 default: 192 // This is just laziness; we don't expect a different mask. 193 return nil, fmt.Errorf("mask %d not supported", mask) 194 } 195 196 // Create a new network stack. 197 netProtos := []stack.NetworkProtocolFactory{ipv6.NewProtocol, ipv4.NewProtocol, arp.NewProtocol} 198 transProtos := []stack.TransportProtocolFactory{tcp.NewProtocol, udp.NewProtocol, icmp.NewProtocol4, icmp.NewProtocol6} 199 s := stack.New(stack.Options{ 200 NetworkProtocols: netProtos, 201 TransportProtocols: transProtos, 202 }) 203 204 // Generate a new mac for the eth device. 205 mac := make(net.HardwareAddr, 6) 206 rand.Read(mac) // Fill with random data. 207 mac[0] &^= 0x1 // Clear multicast bit. 208 mac[0] |= 0x2 // Set local assignment bit (IEEE802). 209 var ep stack.LinkEndpoint 210 var err error 211 if *useXDP { 212 ep, err = newXDPEndpoint(*iface, mac) 213 } else { 214 var fds []int 215 fds, err = setupNetwork(*iface, runtime.GOMAXPROCS(0)) 216 if err != nil { 217 return nil, err 218 } 219 ep, err = fdbased.New(&fdbased.Options{ 220 FDs: fds, 221 MTU: uint32(*mtu), 222 EthernetHeader: true, 223 Address: tcpip.LinkAddress(mac), 224 // Enable checksum generation as we need to generate valid 225 // checksums for the veth device to deliver our packets to the 226 // peer. But we do want to disable checksum verification as veth 227 // devices do perform GRO and the linux host kernel may not 228 // regenerate valid checksums after GRO. 229 TXChecksumOffload: false, 230 RXChecksumOffload: true, 231 PacketDispatchMode: fdbased.RecvMMsg, 232 // PacketDispatchMode: fdbased.PacketMMap, 233 GSOMaxSize: uint32(*gso), 234 GVisorGSOEnabled: *swgso, 235 GRO: *gro, 236 }) 237 } 238 if err != nil { 239 return nil, fmt.Errorf("failed to create endpoint: %v", err) 240 } 241 242 if *sniff { 243 ep = sniffer.New(ep) 244 } 245 246 qDisc := fifo.New(ep, runtime.GOMAXPROCS(0), 1000) 247 opts := stack.NICOptions{QDisc: qDisc} 248 if err := s.CreateNICWithOptions(nicID, ep, opts); err != nil { 249 return nil, fmt.Errorf("error creating NIC %q: %v", *iface, err) 250 } 251 proto := ipv4.ProtocolNumber 252 if *useIpv6 { 253 proto = ipv6.ProtocolNumber 254 } 255 protocolAddr := tcpip.ProtocolAddress{ 256 Protocol: proto, 257 AddressWithPrefix: parsedAddr.WithPrefix(), 258 } 259 if err := s.AddProtocolAddress(nicID, protocolAddr, stack.AddressProperties{}); err != nil { 260 return nil, fmt.Errorf("error adding IP address %+v to %q: %s", protocolAddr, *iface, err) 261 } 262 263 subnet4, err := tcpip.NewSubnet(parsedDest, parsedMask) 264 if err != nil { 265 return nil, fmt.Errorf("tcpip.Subnet(%s, %s): %s", parsedDest, parsedMask, err) 266 } 267 subnet6, err := tcpip.NewSubnet(parsedDest6, parsedMask6) 268 if err != nil { 269 return nil, fmt.Errorf("tcpip.Subnet(%s, %s): %s", parsedDest, parsedMask, err) 270 } 271 272 // Add default route; we only support 273 s.SetRouteTable([]tcpip.Route{ 274 { 275 Destination: subnet4, 276 NIC: nicID, 277 }, 278 { 279 Destination: subnet6, 280 NIC: nicID, 281 }, 282 }) 283 284 // Set protocol options. 285 { 286 opt := tcpip.TCPSACKEnabled(*sack) 287 if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { 288 return nil, fmt.Errorf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err) 289 } 290 } 291 292 // RACK is enabled by default in netstack. 293 if !*rack { 294 opt := tcpip.TCPRecovery(0) 295 if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { 296 return nil, fmt.Errorf("disabling RACK failed: %v", err) 297 } 298 } 299 300 // Enable Receive Buffer Auto-Tuning. 301 { 302 opt := tcpip.TCPModerateReceiveBufferOption(*moderateRecvBuf) 303 if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { 304 return nil, fmt.Errorf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err) 305 } 306 } 307 308 // Set Congestion Control to cubic if requested. 309 if *cubic { 310 opt := tcpip.CongestionControlOption("cubic") 311 if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { 312 return nil, fmt.Errorf("SetTransportProtocolOption(%d, &%T(%s)): %s", tcp.ProtocolNumber, opt, opt, err) 313 } 314 } 315 316 return netstackImpl{ 317 s: s, 318 addr: parsedAddr, 319 mode: mode, 320 }, nil 321 } 322 323 func (n netstackImpl) dial(address string) (net.Conn, error) { 324 host, port, err := net.SplitHostPort(address) 325 if err != nil { 326 return nil, err 327 } 328 if host == "" { 329 // A host must be provided for the dial. 330 return nil, fmt.Errorf("no host provided") 331 } 332 portNumber, err := strconv.Atoi(port) 333 if err != nil { 334 return nil, err 335 } 336 addr := tcpip.FullAddress{ 337 NIC: nicID, 338 Addr: tcpip.AddrFromSlice(net.ParseIP(host)), 339 Port: uint16(portNumber), 340 } 341 proto := ipv4.ProtocolNumber 342 if *useIpv6 { 343 proto = ipv6.ProtocolNumber 344 } 345 conn, err := gonet.DialTCP(n.s, addr, proto) 346 if err != nil { 347 return nil, err 348 } 349 return conn, nil 350 } 351 352 func (n netstackImpl) listen(port int) (net.Listener, error) { 353 addr := tcpip.FullAddress{ 354 NIC: nicID, 355 Port: uint16(port), 356 } 357 proto := ipv4.ProtocolNumber 358 if *useIpv6 { 359 proto = ipv6.ProtocolNumber 360 } 361 listener, err := gonet.ListenTCP(n.s, addr, proto) 362 if err != nil { 363 return nil, err 364 } 365 return listener, nil 366 } 367 368 var zeroFieldsRegexp = regexp.MustCompile(`\s*[a-zA-Z0-9]*:0`) 369 370 func (n netstackImpl) printStats() { 371 // Don't show zero fields. 372 stats := zeroFieldsRegexp.ReplaceAllString(fmt.Sprintf("%+v", n.s.Stats()), "") 373 log.Printf("netstack %s Stats: %+v\n", n.mode, stats) 374 } 375 376 // installProbe installs a TCP Probe function that will dump endpoint 377 // state to the specified file. It also returns a close func() that 378 // can be used to close the probeFile. 379 func (n netstackImpl) installProbe(probeFileName string) (close func()) { 380 // Install Probe to dump out end point state. 381 probeFile, err := os.Create(probeFileName) 382 if err != nil { 383 log.Fatalf("failed to create tcp_probe file %s: %v", probeFileName, err) 384 } 385 probeEncoder := gob.NewEncoder(probeFile) 386 // Install a TCP Probe. 387 n.s.AddTCPProbe(func(state *stack.TCPEndpointState) { 388 probeEncoder.Encode(state) 389 }) 390 return func() { probeFile.Close() } 391 } 392 393 func main() { 394 flag.Parse() 395 if *port == 0 { 396 log.Fatalf("no port provided") 397 } 398 if *forward == "" { 399 log.Fatalf("no forward provided") 400 } 401 // Seed the random number generator to ensure that we are given MAC addresses that don't 402 // for the case of the client and server stack. 403 rand.Seed(time.Now().UTC().UnixNano()) 404 405 if *cpuprofile != "" { 406 f, err := os.Create(*cpuprofile) 407 if err != nil { 408 log.Fatal("could not create CPU profile: ", err) 409 } 410 defer func() { 411 if err := f.Close(); err != nil { 412 log.Print("error closing CPU profile: ", err) 413 } 414 }() 415 if err := pprof.StartCPUProfile(f); err != nil { 416 log.Fatal("could not start CPU profile: ", err) 417 } 418 defer pprof.StopCPUProfile() 419 } 420 421 if *traceprofile != "" { 422 f, err := os.Create(*traceprofile) 423 if err != nil { 424 log.Fatal("could not create trace profile: ", err) 425 } 426 defer func() { 427 if err := f.Close(); err != nil { 428 log.Print("error closing trace profile: ", err) 429 } 430 }() 431 go func() { 432 // Delay tracing to give workload sometime to start. 433 time.Sleep(2 * time.Second) 434 if err := trace.Start(f); err != nil { 435 log.Fatal("could not start Go trace:", err) 436 } 437 defer trace.Stop() 438 <-time.After(5 * time.Second) 439 }() 440 } 441 442 if *mutexprofile != "" { 443 runtime.SetMutexProfileFraction(100) 444 } 445 446 if *blockprofile != "" { 447 runtime.SetBlockProfileRate(1000) 448 } 449 450 var ( 451 in impl 452 out impl 453 err error 454 ) 455 if *server { 456 in, err = newNetstackImpl("server") 457 if *serverTCPProbeFile != "" { 458 defer in.(netstackImpl).installProbe(*serverTCPProbeFile)() 459 } 460 461 } else { 462 in = netImpl{} 463 } 464 if err != nil { 465 log.Fatalf("netstack error: %v", err) 466 } 467 if *client { 468 out, err = newNetstackImpl("client") 469 if *clientTCPProbeFile != "" { 470 defer out.(netstackImpl).installProbe(*clientTCPProbeFile)() 471 } 472 } else { 473 out = netImpl{} 474 } 475 if err != nil { 476 log.Fatalf("netstack error: %v", err) 477 } 478 479 // Dial forward before binding. 480 var next net.Conn 481 for { 482 next, err = out.dial(*forward) 483 if err == nil { 484 break 485 } 486 time.Sleep(50 * time.Millisecond) 487 log.Printf("connect failed retrying: %v", err) 488 } 489 490 // Bind once to the server socket. 491 listener, err := in.listen(*port) 492 if err != nil { 493 // Should not happen, everything must be bound by this time 494 // this proxy is started. 495 log.Fatalf("unable to listen: %v", err) 496 } 497 log.Printf("client=%v, server=%v, ready.", *client, *server) 498 499 sigs := make(chan os.Signal, 1) 500 signal.Notify(sigs, unix.SIGTERM) 501 502 // Accept connections and proxy data between them. 503 go func() { 504 for { 505 // Forward all connections. 506 inConn, err := listener.Accept() 507 if err != nil { 508 // This should not happen; we are listening 509 // successfully. Exhausted all available FDs? 510 log.Fatalf("accept error: %v", err) 511 } 512 log.Printf("incoming connection established.") 513 514 // Copy both ways. We wrap everything in another 515 // Reader/Writer to prevent optimizations that 516 // otherwise call splice() to move data between 517 // sockets. That penalizes netstack, but isn't relevant 518 // to real use cases where only one end of netstack is 519 // attached to a socket. 520 go io.Copy(io.MultiWriter(inConn), io.MultiReader(next)) 521 go io.Copy(io.MultiWriter(next), io.MultiReader(inConn)) 522 523 // Print stats every second. 524 go func() { 525 t := time.NewTicker(time.Second) 526 defer t.Stop() 527 for { 528 <-t.C 529 in.printStats() 530 out.printStats() 531 } 532 }() 533 534 for { 535 // Dial again. 536 next, err = out.dial(*forward) 537 if err == nil { 538 break 539 } 540 } 541 } 542 }() 543 544 // Wait for the SIGTERM notifying us to stop. 545 <-sigs 546 547 if *memprofile != "" { 548 f, err := os.Create(*memprofile) 549 if err != nil { 550 log.Fatal("could not create memory profile: ", err) 551 } 552 defer func() { 553 if err := f.Close(); err != nil { 554 log.Print("error closing memory profile: ", err) 555 } 556 }() 557 runtime.GC() // get up-to-date statistics 558 if err := pprof.WriteHeapProfile(f); err != nil { 559 log.Fatalf("Unable to write heap profile: %v", err) 560 } 561 } 562 if *blockprofile != "" { 563 f, err := os.Create(*blockprofile) 564 if err != nil { 565 log.Fatal("could not create block profile: ", err) 566 } 567 defer func() { 568 if err := f.Close(); err != nil { 569 log.Print("error closing block profile: ", err) 570 } 571 }() 572 if err := pprof.Lookup("block").WriteTo(f, 0); err != nil { 573 log.Fatalf("Unable to write block profile: %v", err) 574 } 575 } 576 if *mutexprofile != "" { 577 f, err := os.Create(*mutexprofile) 578 if err != nil { 579 log.Fatal("could not create mutex profile: ", err) 580 } 581 defer func() { 582 if err := f.Close(); err != nil { 583 log.Print("error closing mutex profile: ", err) 584 } 585 }() 586 if err := pprof.Lookup("mutex").WriteTo(f, 0); err != nil { 587 log.Fatalf("Unable to write mutex profile: %v", err) 588 } 589 } 590 }