github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/runsc/boot/network.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package boot
    16  
    17  import (
    18  	"fmt"
    19  	"net"
    20  	"os"
    21  	"runtime"
    22  	"strings"
    23  	"time"
    24  
    25  	"golang.org/x/sys/unix"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/hostos"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/log"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/link/ethernet"
    30  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/link/fdbased"
    31  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/link/loopback"
    32  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/link/packetsocket"
    33  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/link/qdisc/fifo"
    34  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/link/sniffer"
    35  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/link/xdp"
    36  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/network/ipv4"
    37  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/network/ipv6"
    38  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/stack"
    39  	"github.com/nicocha30/gvisor-ligolo/pkg/urpc"
    40  	"github.com/nicocha30/gvisor-ligolo/runsc/config"
    41  )
    42  
    43  var (
    44  	// DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and
    45  	// "::1/8" on "lo" interface.
    46  	DefaultLoopbackLink = LoopbackLink{
    47  		Name: "lo",
    48  		Addresses: []IPWithPrefix{
    49  			{Address: net.IP("\x7f\x00\x00\x01"), PrefixLen: 8},
    50  			{Address: net.IPv6loopback, PrefixLen: 128},
    51  		},
    52  		Routes: []Route{
    53  			{
    54  				Destination: net.IPNet{
    55  					IP:   net.IPv4(0x7f, 0, 0, 0),
    56  					Mask: net.IPv4Mask(0xff, 0, 0, 0),
    57  				},
    58  			},
    59  			{
    60  				Destination: net.IPNet{
    61  					IP:   net.IPv6loopback,
    62  					Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)),
    63  				},
    64  			},
    65  		},
    66  	}
    67  )
    68  
    69  // Network exposes methods that can be used to configure a network stack.
    70  type Network struct {
    71  	Stack *stack.Stack
    72  }
    73  
    74  // Route represents a route in the network stack.
    75  type Route struct {
    76  	Destination net.IPNet
    77  	Gateway     net.IP
    78  }
    79  
    80  // DefaultRoute represents a catch all route to the default gateway.
    81  type DefaultRoute struct {
    82  	Route Route
    83  	Name  string
    84  }
    85  
    86  type Neighbor struct {
    87  	IP           net.IP
    88  	HardwareAddr net.HardwareAddr
    89  }
    90  
    91  // FDBasedLink configures an fd-based link.
    92  type FDBasedLink struct {
    93  	Name              string
    94  	InterfaceIndex    int
    95  	MTU               int
    96  	Addresses         []IPWithPrefix
    97  	Routes            []Route
    98  	GSOMaxSize        uint32
    99  	GvisorGSOEnabled  bool
   100  	GvisorGROTimeout  time.Duration
   101  	TXChecksumOffload bool
   102  	RXChecksumOffload bool
   103  	LinkAddress       net.HardwareAddr
   104  	QDisc             config.QueueingDiscipline
   105  	Neighbors         []Neighbor
   106  
   107  	// NumChannels controls how many underlying FDs are to be used to
   108  	// create this endpoint.
   109  	NumChannels int
   110  }
   111  
   112  // XDPLink configures an XDP link.
   113  type XDPLink struct {
   114  	Name              string
   115  	InterfaceIndex    int
   116  	MTU               int
   117  	Addresses         []IPWithPrefix
   118  	Routes            []Route
   119  	TXChecksumOffload bool
   120  	RXChecksumOffload bool
   121  	LinkAddress       net.HardwareAddr
   122  	QDisc             config.QueueingDiscipline
   123  	Neighbors         []Neighbor
   124  	GvisorGROTimeout  time.Duration
   125  
   126  	// NumChannels controls how many underlying FDs are to be used to
   127  	// create this endpoint.
   128  	NumChannels int
   129  }
   130  
   131  // LoopbackLink configures a loopback link.
   132  type LoopbackLink struct {
   133  	Name             string
   134  	Addresses        []IPWithPrefix
   135  	Routes           []Route
   136  	GvisorGROTimeout time.Duration
   137  }
   138  
   139  // CreateLinksAndRoutesArgs are arguments to CreateLinkAndRoutes.
   140  type CreateLinksAndRoutesArgs struct {
   141  	// FilePayload contains the fds associated with the FDBasedLinks. The
   142  	// number of fd's should match the sum of the NumChannels field of the
   143  	// FDBasedLink entries below.
   144  	urpc.FilePayload
   145  
   146  	LoopbackLinks []LoopbackLink
   147  	FDBasedLinks  []FDBasedLink
   148  	XDPLinks      []XDPLink
   149  
   150  	Defaultv4Gateway DefaultRoute
   151  	Defaultv6Gateway DefaultRoute
   152  
   153  	// PCAP indicates that FilePayload also contains a PCAP log file.
   154  	PCAP bool
   155  }
   156  
   157  // IPWithPrefix is an address with its subnet prefix length.
   158  type IPWithPrefix struct {
   159  	// Address is a network address.
   160  	Address net.IP
   161  
   162  	// PrefixLen is the subnet prefix length.
   163  	PrefixLen int
   164  }
   165  
   166  func (ip IPWithPrefix) String() string {
   167  	return fmt.Sprintf("%s/%d", ip.Address, ip.PrefixLen)
   168  }
   169  
   170  // Empty returns true if route hasn't been set.
   171  func (r *Route) Empty() bool {
   172  	return r.Destination.IP == nil && r.Destination.Mask == nil && r.Gateway == nil
   173  }
   174  
   175  func (r *Route) toTcpipRoute(id tcpip.NICID) (tcpip.Route, error) {
   176  	subnet, err := tcpip.NewSubnet(ipToAddress(r.Destination.IP), ipMaskToAddressMask(r.Destination.Mask))
   177  	if err != nil {
   178  		return tcpip.Route{}, err
   179  	}
   180  	return tcpip.Route{
   181  		Destination: subnet,
   182  		Gateway:     ipToAddress(r.Gateway),
   183  		NIC:         id,
   184  	}, nil
   185  }
   186  
   187  // CreateLinksAndRoutes creates links and routes in a network stack.  It should
   188  // only be called once.
   189  func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct{}) error {
   190  	if len(args.FDBasedLinks) > 0 && len(args.XDPLinks) > 0 {
   191  		return fmt.Errorf("received both fdbased and XDP links, but only one can be used at a time")
   192  	}
   193  	wantFDs := 0
   194  	for _, l := range args.FDBasedLinks {
   195  		wantFDs += l.NumChannels
   196  	}
   197  	if len(args.XDPLinks) > 0 {
   198  		wantFDs += 4
   199  	}
   200  	if args.PCAP {
   201  		wantFDs++
   202  	}
   203  	if got := len(args.FilePayload.Files); got != wantFDs {
   204  		return fmt.Errorf("args.FilePayload.Files has %d FDs but we need %d entries based on FDBasedLinks, XDPLinks, and PCAP", got, wantFDs)
   205  	}
   206  
   207  	var nicID tcpip.NICID
   208  	nicids := make(map[string]tcpip.NICID)
   209  
   210  	// Collect routes from all links.
   211  	var routes []tcpip.Route
   212  
   213  	// Loopback normally appear before other interfaces.
   214  	for _, link := range args.LoopbackLinks {
   215  		nicID++
   216  		nicids[link.Name] = nicID
   217  
   218  		linkEP := packetsocket.New(ethernet.New(loopback.New()))
   219  
   220  		log.Infof("Enabling loopback interface %q with id %d on addresses %+v", link.Name, nicID, link.Addresses)
   221  		opts := stack.NICOptions{
   222  			Name:       link.Name,
   223  			GROTimeout: link.GvisorGROTimeout,
   224  		}
   225  		if err := n.createNICWithAddrs(nicID, linkEP, opts, link.Addresses); err != nil {
   226  			return err
   227  		}
   228  
   229  		// Collect the routes from this link.
   230  		for _, r := range link.Routes {
   231  			route, err := r.toTcpipRoute(nicID)
   232  			if err != nil {
   233  				return err
   234  			}
   235  			routes = append(routes, route)
   236  		}
   237  	}
   238  
   239  	// Setup fdbased or XDP links.
   240  	if len(args.FDBasedLinks) > 0 {
   241  		// Choose a dispatch mode.
   242  		dispatchMode := fdbased.RecvMMsg
   243  		version, err := hostos.KernelVersion()
   244  		if err != nil {
   245  			return err
   246  		}
   247  		if version.AtLeast(5, 6) {
   248  			dispatchMode = fdbased.PacketMMap
   249  		} else {
   250  			log.Infof("Host kernel version < 5.6, falling back to RecvMMsg dispatch")
   251  		}
   252  
   253  		fdOffset := 0
   254  		for _, link := range args.FDBasedLinks {
   255  			nicID++
   256  			nicids[link.Name] = nicID
   257  
   258  			FDs := make([]int, 0, link.NumChannels)
   259  			for j := 0; j < link.NumChannels; j++ {
   260  				// Copy the underlying FD.
   261  				oldFD := args.FilePayload.Files[fdOffset].Fd()
   262  				newFD, err := unix.Dup(int(oldFD))
   263  				if err != nil {
   264  					return fmt.Errorf("failed to dup FD %v: %v", oldFD, err)
   265  				}
   266  				FDs = append(FDs, newFD)
   267  				fdOffset++
   268  			}
   269  
   270  			mac := tcpip.LinkAddress(link.LinkAddress)
   271  			log.Infof("gso max size is: %d", link.GSOMaxSize)
   272  
   273  			linkEP, err := fdbased.New(&fdbased.Options{
   274  				FDs:                FDs,
   275  				MTU:                uint32(link.MTU),
   276  				EthernetHeader:     mac != "",
   277  				Address:            mac,
   278  				PacketDispatchMode: dispatchMode,
   279  				GSOMaxSize:         link.GSOMaxSize,
   280  				GvisorGSOEnabled:   link.GvisorGSOEnabled,
   281  				TXChecksumOffload:  link.TXChecksumOffload,
   282  				RXChecksumOffload:  link.RXChecksumOffload,
   283  			})
   284  			if err != nil {
   285  				return err
   286  			}
   287  
   288  			// Wrap linkEP in a sniffer to enable packet logging.
   289  			sniffEP := sniffer.New(packetsocket.New(linkEP))
   290  
   291  			var qDisc stack.QueueingDiscipline
   292  			switch link.QDisc {
   293  			case config.QDiscNone:
   294  			case config.QDiscFIFO:
   295  				log.Infof("Enabling FIFO QDisc on %q", link.Name)
   296  				qDisc = fifo.New(sniffEP, runtime.GOMAXPROCS(0), 1000)
   297  			}
   298  
   299  			log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
   300  			opts := stack.NICOptions{
   301  				Name:       link.Name,
   302  				QDisc:      qDisc,
   303  				GROTimeout: link.GvisorGROTimeout,
   304  			}
   305  			if err := n.createNICWithAddrs(nicID, sniffEP, opts, link.Addresses); err != nil {
   306  				return err
   307  			}
   308  
   309  			// Collect the routes from this link.
   310  			for _, r := range link.Routes {
   311  				route, err := r.toTcpipRoute(nicID)
   312  				if err != nil {
   313  					return err
   314  				}
   315  				routes = append(routes, route)
   316  			}
   317  
   318  			for _, neigh := range link.Neighbors {
   319  				proto, tcpipAddr := ipToAddressAndProto(neigh.IP)
   320  				n.Stack.AddStaticNeighbor(nicID, proto, tcpipAddr, tcpip.LinkAddress(neigh.HardwareAddr))
   321  			}
   322  		}
   323  	} else if len(args.XDPLinks) > 0 {
   324  		if nlinks := len(args.XDPLinks); nlinks > 1 {
   325  			return fmt.Errorf("XDP only supports one link device, but got %d", nlinks)
   326  		}
   327  		link := args.XDPLinks[0]
   328  		nicID++
   329  		nicids[link.Name] = nicID
   330  
   331  		// Get the AF_XDP socket.
   332  		fdOffset := 0
   333  		oldFD := args.FilePayload.Files[fdOffset].Fd()
   334  		fd, err := unix.Dup(int(oldFD))
   335  		if err != nil {
   336  			return fmt.Errorf("failed to dup AF_XDP fd %v: %v", oldFD, err)
   337  		}
   338  		fdOffset++
   339  
   340  		// The parent process sends several other FDs in order
   341  		// to keep them open and alive. These are for BPF
   342  		// programs and maps that, if closed, will break the
   343  		// dispatcher.
   344  		for _, fdName := range []string{"program-fd", "sockmap-fd", "link-fd"} {
   345  			oldFD := args.FilePayload.Files[fdOffset].Fd()
   346  			if _, err := unix.Dup(int(oldFD)); err != nil {
   347  				return fmt.Errorf("failed to dup %s with FD %d: %v", fdName, oldFD, err)
   348  			}
   349  			fdOffset++
   350  		}
   351  
   352  		mac := tcpip.LinkAddress(link.LinkAddress)
   353  		linkEP, err := xdp.New(&xdp.Options{
   354  			FD:                fd,
   355  			Address:           mac,
   356  			TXChecksumOffload: link.TXChecksumOffload,
   357  			RXChecksumOffload: link.RXChecksumOffload,
   358  			InterfaceIndex:    link.InterfaceIndex,
   359  		})
   360  		if err != nil {
   361  			return err
   362  		}
   363  
   364  		// Wrap linkEP in a sniffer to enable packet logging.
   365  		var sniffEP stack.LinkEndpoint
   366  		if args.PCAP {
   367  			newFD, err := unix.Dup(int(args.FilePayload.Files[fdOffset].Fd()))
   368  			if err != nil {
   369  				return fmt.Errorf("failed to dup pcap FD: %v", err)
   370  			}
   371  			const packetTruncateSize = 4096
   372  			sniffEP, err = sniffer.NewWithWriter(packetsocket.New(linkEP), os.NewFile(uintptr(newFD), "pcap-file"), packetTruncateSize)
   373  			if err != nil {
   374  				return fmt.Errorf("failed to create PCAP logger: %v", err)
   375  			}
   376  			fdOffset++
   377  		} else {
   378  			sniffEP = sniffer.New(packetsocket.New(linkEP))
   379  		}
   380  
   381  		var qDisc stack.QueueingDiscipline
   382  		switch link.QDisc {
   383  		case config.QDiscNone:
   384  		case config.QDiscFIFO:
   385  			log.Infof("Enabling FIFO QDisc on %q", link.Name)
   386  			qDisc = fifo.New(sniffEP, runtime.GOMAXPROCS(0), 1000)
   387  		}
   388  
   389  		log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
   390  		opts := stack.NICOptions{
   391  			Name:       link.Name,
   392  			QDisc:      qDisc,
   393  			GROTimeout: link.GvisorGROTimeout,
   394  		}
   395  		if err := n.createNICWithAddrs(nicID, sniffEP, opts, link.Addresses); err != nil {
   396  			return err
   397  		}
   398  
   399  		// Collect the routes from this link.
   400  		for _, r := range link.Routes {
   401  			route, err := r.toTcpipRoute(nicID)
   402  			if err != nil {
   403  				return err
   404  			}
   405  			routes = append(routes, route)
   406  		}
   407  
   408  		for _, neigh := range link.Neighbors {
   409  			proto, tcpipAddr := ipToAddressAndProto(neigh.IP)
   410  			n.Stack.AddStaticNeighbor(nicID, proto, tcpipAddr, tcpip.LinkAddress(neigh.HardwareAddr))
   411  		}
   412  	}
   413  
   414  	if !args.Defaultv4Gateway.Route.Empty() {
   415  		nicID, ok := nicids[args.Defaultv4Gateway.Name]
   416  		if !ok {
   417  			return fmt.Errorf("invalid interface name %q for default route", args.Defaultv4Gateway.Name)
   418  		}
   419  		route, err := args.Defaultv4Gateway.Route.toTcpipRoute(nicID)
   420  		if err != nil {
   421  			return err
   422  		}
   423  		routes = append(routes, route)
   424  	}
   425  
   426  	if !args.Defaultv6Gateway.Route.Empty() {
   427  		nicID, ok := nicids[args.Defaultv6Gateway.Name]
   428  		if !ok {
   429  			return fmt.Errorf("invalid interface name %q for default route", args.Defaultv6Gateway.Name)
   430  		}
   431  		route, err := args.Defaultv6Gateway.Route.toTcpipRoute(nicID)
   432  		if err != nil {
   433  			return err
   434  		}
   435  		routes = append(routes, route)
   436  	}
   437  
   438  	log.Infof("Setting routes %+v", routes)
   439  	n.Stack.SetRouteTable(routes)
   440  	return nil
   441  }
   442  
   443  // createNICWithAddrs creates a NIC in the network stack and adds the given
   444  // addresses.
   445  func (n *Network) createNICWithAddrs(id tcpip.NICID, ep stack.LinkEndpoint, opts stack.NICOptions, addrs []IPWithPrefix) error {
   446  	if err := n.Stack.CreateNICWithOptions(id, ep, opts); err != nil {
   447  		return fmt.Errorf("CreateNICWithOptions(%d, _, %+v) failed: %v", id, opts, err)
   448  	}
   449  
   450  	for _, addr := range addrs {
   451  		proto, tcpipAddr := ipToAddressAndProto(addr.Address)
   452  		protocolAddr := tcpip.ProtocolAddress{
   453  			Protocol: proto,
   454  			AddressWithPrefix: tcpip.AddressWithPrefix{
   455  				Address:   tcpipAddr,
   456  				PrefixLen: addr.PrefixLen,
   457  			},
   458  		}
   459  		if err := n.Stack.AddProtocolAddress(id, protocolAddr, stack.AddressProperties{}); err != nil {
   460  			return fmt.Errorf("AddProtocolAddress(%d, %+v, {}) failed: %s", id, protocolAddr, err)
   461  		}
   462  	}
   463  	return nil
   464  }
   465  
   466  // ipToAddressAndProto converts IP to tcpip.Address and a protocol number.
   467  //
   468  // Note: don't use 'len(ip)' to determine IP version because length is always 16.
   469  func ipToAddressAndProto(ip net.IP) (tcpip.NetworkProtocolNumber, tcpip.Address) {
   470  	if i4 := ip.To4(); i4 != nil {
   471  		return ipv4.ProtocolNumber, tcpip.AddrFromSlice(i4)
   472  	}
   473  	return ipv6.ProtocolNumber, tcpip.AddrFromSlice(ip)
   474  }
   475  
   476  // ipToAddress converts IP to tcpip.Address, ignoring the protocol.
   477  func ipToAddress(ip net.IP) tcpip.Address {
   478  	_, addr := ipToAddressAndProto(ip)
   479  	return addr
   480  }
   481  
   482  // ipMaskToAddressMask converts IPMask to tcpip.AddressMask, ignoring the
   483  // protocol.
   484  func ipMaskToAddressMask(ipMask net.IPMask) tcpip.AddressMask {
   485  	addr := ipToAddress(net.IP(ipMask))
   486  	return tcpip.MaskFromBytes(addr.AsSlice())
   487  }