github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/socket/hostinet/stack.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hostinet
    16  
    17  import (
    18  	"encoding/binary"
    19  	"fmt"
    20  	"io"
    21  	"io/ioutil"
    22  	"os"
    23  	"reflect"
    24  	"strconv"
    25  	"strings"
    26  
    27  	"syscall"
    28  
    29  	"golang.org/x/sys/unix"
    30  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    31  	"github.com/SagerNet/gvisor/pkg/context"
    32  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    33  	"github.com/SagerNet/gvisor/pkg/log"
    34  	"github.com/SagerNet/gvisor/pkg/marshal/primitive"
    35  	"github.com/SagerNet/gvisor/pkg/sentry/inet"
    36  	"github.com/SagerNet/gvisor/pkg/syserr"
    37  	"github.com/SagerNet/gvisor/pkg/tcpip"
    38  	"github.com/SagerNet/gvisor/pkg/tcpip/stack"
    39  	"github.com/SagerNet/gvisor/pkg/usermem"
    40  )
    41  
    42  var defaultRecvBufSize = inet.TCPBufferSize{
    43  	Min:     4096,
    44  	Default: 87380,
    45  	Max:     6291456,
    46  }
    47  
    48  var defaultSendBufSize = inet.TCPBufferSize{
    49  	Min:     4096,
    50  	Default: 16384,
    51  	Max:     4194304,
    52  }
    53  
    54  // Stack implements inet.Stack for host sockets.
    55  type Stack struct {
    56  	// Stack is immutable.
    57  	interfaces     map[int32]inet.Interface
    58  	interfaceAddrs map[int32][]inet.InterfaceAddr
    59  	routes         []inet.Route
    60  	supportsIPv6   bool
    61  	tcpRecovery    inet.TCPLossRecovery
    62  	tcpRecvBufSize inet.TCPBufferSize
    63  	tcpSendBufSize inet.TCPBufferSize
    64  	tcpSACKEnabled bool
    65  	netDevFile     *os.File
    66  	netSNMPFile    *os.File
    67  }
    68  
    69  // NewStack returns an empty Stack containing no configuration.
    70  func NewStack() *Stack {
    71  	return &Stack{
    72  		interfaces:     make(map[int32]inet.Interface),
    73  		interfaceAddrs: make(map[int32][]inet.InterfaceAddr),
    74  	}
    75  }
    76  
    77  // Configure sets up the stack using the current state of the host network.
    78  func (s *Stack) Configure() error {
    79  	if err := addHostInterfaces(s); err != nil {
    80  		return err
    81  	}
    82  
    83  	if err := addHostRoutes(s); err != nil {
    84  		return err
    85  	}
    86  
    87  	if _, err := os.Stat("/proc/net/if_inet6"); err == nil {
    88  		s.supportsIPv6 = true
    89  	}
    90  
    91  	s.tcpRecvBufSize = defaultRecvBufSize
    92  	if tcpRMem, err := readTCPBufferSizeFile("/proc/sys/net/ipv4/tcp_rmem"); err == nil {
    93  		s.tcpRecvBufSize = tcpRMem
    94  	} else {
    95  		log.Warningf("Failed to read TCP receive buffer size, using default values")
    96  	}
    97  
    98  	s.tcpSendBufSize = defaultSendBufSize
    99  	if tcpWMem, err := readTCPBufferSizeFile("/proc/sys/net/ipv4/tcp_wmem"); err == nil {
   100  		s.tcpSendBufSize = tcpWMem
   101  	} else {
   102  		log.Warningf("Failed to read TCP send buffer size, using default values")
   103  	}
   104  
   105  	// SACK is important for performance and even compatibility, assume it's
   106  	// enabled if we can't find the actual value.
   107  	s.tcpSACKEnabled = true
   108  	if sack, err := ioutil.ReadFile("/proc/sys/net/ipv4/tcp_sack"); err == nil {
   109  		s.tcpSACKEnabled = strings.TrimSpace(string(sack)) != "0"
   110  	} else {
   111  		log.Warningf("Failed to read if TCP SACK if enabled, setting to true")
   112  	}
   113  
   114  	if f, err := os.Open("/proc/net/dev"); err != nil {
   115  		log.Warningf("Failed to open /proc/net/dev: %v", err)
   116  	} else {
   117  		s.netDevFile = f
   118  	}
   119  
   120  	if f, err := os.Open("/proc/net/snmp"); err != nil {
   121  		log.Warningf("Failed to open /proc/net/snmp: %v", err)
   122  	} else {
   123  		s.netSNMPFile = f
   124  	}
   125  
   126  	return nil
   127  }
   128  
   129  // ExtractHostInterfaces will populate an interface map and
   130  // interfaceAddrs map with the results of the equivalent
   131  // netlink messages.
   132  func ExtractHostInterfaces(links []syscall.NetlinkMessage, addrs []syscall.NetlinkMessage, interfaces map[int32]inet.Interface, interfaceAddrs map[int32][]inet.InterfaceAddr) error {
   133  	for _, link := range links {
   134  		if link.Header.Type != unix.RTM_NEWLINK {
   135  			continue
   136  		}
   137  		if len(link.Data) < unix.SizeofIfInfomsg {
   138  			return fmt.Errorf("RTM_GETLINK returned RTM_NEWLINK message with invalid data length (%d bytes, expected at least %d bytes)", len(link.Data), unix.SizeofIfInfomsg)
   139  		}
   140  		var ifinfo linux.InterfaceInfoMessage
   141  		ifinfo.UnmarshalUnsafe(link.Data[:ifinfo.SizeBytes()])
   142  		inetIF := inet.Interface{
   143  			DeviceType: ifinfo.Type,
   144  			Flags:      ifinfo.Flags,
   145  		}
   146  		// Not clearly documented: syscall.ParseNetlinkRouteAttr will check the
   147  		// syscall.NetlinkMessage.Header.Type and skip the struct ifinfomsg
   148  		// accordingly.
   149  		attrs, err := syscall.ParseNetlinkRouteAttr(&link)
   150  		if err != nil {
   151  			return fmt.Errorf("RTM_GETLINK returned RTM_NEWLINK message with invalid rtattrs: %v", err)
   152  		}
   153  		for _, attr := range attrs {
   154  			switch attr.Attr.Type {
   155  			case unix.IFLA_ADDRESS:
   156  				inetIF.Addr = attr.Value
   157  			case unix.IFLA_IFNAME:
   158  				inetIF.Name = string(attr.Value[:len(attr.Value)-1])
   159  			}
   160  		}
   161  		interfaces[ifinfo.Index] = inetIF
   162  	}
   163  
   164  	for _, addr := range addrs {
   165  		if addr.Header.Type != unix.RTM_NEWADDR {
   166  			continue
   167  		}
   168  		if len(addr.Data) < unix.SizeofIfAddrmsg {
   169  			return fmt.Errorf("RTM_GETADDR returned RTM_NEWADDR message with invalid data length (%d bytes, expected at least %d bytes)", len(addr.Data), unix.SizeofIfAddrmsg)
   170  		}
   171  		var ifaddr linux.InterfaceAddrMessage
   172  		ifaddr.UnmarshalUnsafe(addr.Data[:ifaddr.SizeBytes()])
   173  		inetAddr := inet.InterfaceAddr{
   174  			Family:    ifaddr.Family,
   175  			PrefixLen: ifaddr.PrefixLen,
   176  			Flags:     ifaddr.Flags,
   177  		}
   178  		attrs, err := syscall.ParseNetlinkRouteAttr(&addr)
   179  		if err != nil {
   180  			return fmt.Errorf("RTM_GETADDR returned RTM_NEWADDR message with invalid rtattrs: %v", err)
   181  		}
   182  		for _, attr := range attrs {
   183  			switch attr.Attr.Type {
   184  			case unix.IFA_ADDRESS:
   185  				inetAddr.Addr = attr.Value
   186  			}
   187  		}
   188  		interfaceAddrs[int32(ifaddr.Index)] = append(interfaceAddrs[int32(ifaddr.Index)], inetAddr)
   189  	}
   190  
   191  	return nil
   192  }
   193  
   194  // ExtractHostRoutes populates the given routes slice with the data from the
   195  // host route table.
   196  func ExtractHostRoutes(routeMsgs []syscall.NetlinkMessage) ([]inet.Route, error) {
   197  	var routes []inet.Route
   198  	for _, routeMsg := range routeMsgs {
   199  		if routeMsg.Header.Type != unix.RTM_NEWROUTE {
   200  			continue
   201  		}
   202  
   203  		var ifRoute linux.RouteMessage
   204  		ifRoute.UnmarshalUnsafe(routeMsg.Data[:ifRoute.SizeBytes()])
   205  		inetRoute := inet.Route{
   206  			Family:   ifRoute.Family,
   207  			DstLen:   ifRoute.DstLen,
   208  			SrcLen:   ifRoute.SrcLen,
   209  			TOS:      ifRoute.TOS,
   210  			Table:    ifRoute.Table,
   211  			Protocol: ifRoute.Protocol,
   212  			Scope:    ifRoute.Scope,
   213  			Type:     ifRoute.Type,
   214  			Flags:    ifRoute.Flags,
   215  		}
   216  
   217  		// Not clearly documented: syscall.ParseNetlinkRouteAttr will check the
   218  		// syscall.NetlinkMessage.Header.Type and skip the struct rtmsg
   219  		// accordingly.
   220  		attrs, err := syscall.ParseNetlinkRouteAttr(&routeMsg)
   221  		if err != nil {
   222  			return nil, fmt.Errorf("RTM_GETROUTE returned RTM_NEWROUTE message with invalid rtattrs: %v", err)
   223  		}
   224  
   225  		for _, attr := range attrs {
   226  			switch attr.Attr.Type {
   227  			case unix.RTA_DST:
   228  				inetRoute.DstAddr = attr.Value
   229  			case unix.RTA_SRC:
   230  				inetRoute.SrcAddr = attr.Value
   231  			case unix.RTA_GATEWAY:
   232  				inetRoute.GatewayAddr = attr.Value
   233  			case unix.RTA_OIF:
   234  				expected := int(binary.Size(inetRoute.OutputInterface))
   235  				if len(attr.Value) != expected {
   236  					return nil, fmt.Errorf("RTM_GETROUTE returned RTM_NEWROUTE message with invalid attribute data length (%d bytes, expected %d bytes)", len(attr.Value), expected)
   237  				}
   238  				var outputIF primitive.Int32
   239  				outputIF.UnmarshalUnsafe(attr.Value)
   240  				inetRoute.OutputInterface = int32(outputIF)
   241  			}
   242  		}
   243  
   244  		routes = append(routes, inetRoute)
   245  	}
   246  
   247  	return routes, nil
   248  }
   249  
   250  func addHostInterfaces(s *Stack) error {
   251  	links, err := doNetlinkRouteRequest(unix.RTM_GETLINK)
   252  	if err != nil {
   253  		return fmt.Errorf("RTM_GETLINK failed: %v", err)
   254  	}
   255  
   256  	addrs, err := doNetlinkRouteRequest(unix.RTM_GETADDR)
   257  	if err != nil {
   258  		return fmt.Errorf("RTM_GETADDR failed: %v", err)
   259  	}
   260  
   261  	return ExtractHostInterfaces(links, addrs, s.interfaces, s.interfaceAddrs)
   262  }
   263  
   264  func addHostRoutes(s *Stack) error {
   265  	routes, err := doNetlinkRouteRequest(unix.RTM_GETROUTE)
   266  	if err != nil {
   267  		return fmt.Errorf("RTM_GETROUTE failed: %v", err)
   268  	}
   269  
   270  	s.routes, err = ExtractHostRoutes(routes)
   271  	if err != nil {
   272  		return err
   273  	}
   274  
   275  	return nil
   276  }
   277  
   278  func doNetlinkRouteRequest(req int) ([]syscall.NetlinkMessage, error) {
   279  	data, err := syscall.NetlinkRIB(req, syscall.AF_UNSPEC)
   280  	if err != nil {
   281  		return nil, err
   282  	}
   283  	return syscall.ParseNetlinkMessage(data)
   284  }
   285  
   286  func readTCPBufferSizeFile(filename string) (inet.TCPBufferSize, error) {
   287  	contents, err := ioutil.ReadFile(filename)
   288  	if err != nil {
   289  		return inet.TCPBufferSize{}, fmt.Errorf("failed to read %s: %v", filename, err)
   290  	}
   291  	ioseq := usermem.BytesIOSequence(contents)
   292  	fields := make([]int32, 3)
   293  	if n, err := usermem.CopyInt32StringsInVec(context.Background(), ioseq.IO, ioseq.Addrs, fields, ioseq.Opts); n != ioseq.NumBytes() || err != nil {
   294  		return inet.TCPBufferSize{}, fmt.Errorf("failed to parse %s (%q): got %v after %d/%d bytes", filename, contents, err, n, ioseq.NumBytes())
   295  	}
   296  	return inet.TCPBufferSize{
   297  		Min:     int(fields[0]),
   298  		Default: int(fields[1]),
   299  		Max:     int(fields[2]),
   300  	}, nil
   301  }
   302  
   303  // Interfaces implements inet.Stack.Interfaces.
   304  func (s *Stack) Interfaces() map[int32]inet.Interface {
   305  	interfaces := make(map[int32]inet.Interface)
   306  	for k, v := range s.interfaces {
   307  		interfaces[k] = v
   308  	}
   309  	return interfaces
   310  }
   311  
   312  // InterfaceAddrs implements inet.Stack.InterfaceAddrs.
   313  func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr {
   314  	addrs := make(map[int32][]inet.InterfaceAddr)
   315  	for k, v := range s.interfaceAddrs {
   316  		addrs[k] = append([]inet.InterfaceAddr(nil), v...)
   317  	}
   318  	return addrs
   319  }
   320  
   321  // AddInterfaceAddr implements inet.Stack.AddInterfaceAddr.
   322  func (s *Stack) AddInterfaceAddr(int32, inet.InterfaceAddr) error {
   323  	return linuxerr.EACCES
   324  }
   325  
   326  // RemoveInterfaceAddr implements inet.Stack.RemoveInterfaceAddr.
   327  func (s *Stack) RemoveInterfaceAddr(int32, inet.InterfaceAddr) error {
   328  	return linuxerr.EACCES
   329  }
   330  
   331  // SupportsIPv6 implements inet.Stack.SupportsIPv6.
   332  func (s *Stack) SupportsIPv6() bool {
   333  	return s.supportsIPv6
   334  }
   335  
   336  // TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize.
   337  func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
   338  	return s.tcpRecvBufSize, nil
   339  }
   340  
   341  // SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize.
   342  func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error {
   343  	return linuxerr.EACCES
   344  }
   345  
   346  // TCPSendBufferSize implements inet.Stack.TCPSendBufferSize.
   347  func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) {
   348  	return s.tcpSendBufSize, nil
   349  }
   350  
   351  // SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize.
   352  func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error {
   353  	return linuxerr.EACCES
   354  }
   355  
   356  // TCPSACKEnabled implements inet.Stack.TCPSACKEnabled.
   357  func (s *Stack) TCPSACKEnabled() (bool, error) {
   358  	return s.tcpSACKEnabled, nil
   359  }
   360  
   361  // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled.
   362  func (s *Stack) SetTCPSACKEnabled(bool) error {
   363  	return linuxerr.EACCES
   364  }
   365  
   366  // TCPRecovery implements inet.Stack.TCPRecovery.
   367  func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) {
   368  	return s.tcpRecovery, nil
   369  }
   370  
   371  // SetTCPRecovery implements inet.Stack.SetTCPRecovery.
   372  func (s *Stack) SetTCPRecovery(inet.TCPLossRecovery) error {
   373  	return linuxerr.EACCES
   374  }
   375  
   376  // getLine reads one line from proc file, with specified prefix.
   377  // The last argument, withHeader, specifies if it contains line header.
   378  func getLine(f *os.File, prefix string, withHeader bool) string {
   379  	data := make([]byte, 4096)
   380  
   381  	if _, err := f.Seek(0, 0); err != nil {
   382  		return ""
   383  	}
   384  
   385  	if _, err := io.ReadFull(f, data); err != io.ErrUnexpectedEOF {
   386  		return ""
   387  	}
   388  
   389  	prefix = prefix + ":"
   390  	lines := strings.Split(string(data), "\n")
   391  	for _, l := range lines {
   392  		l = strings.TrimSpace(l)
   393  		if strings.HasPrefix(l, prefix) {
   394  			if withHeader {
   395  				withHeader = false
   396  				continue
   397  			}
   398  			return l
   399  		}
   400  	}
   401  	return ""
   402  }
   403  
   404  func toSlice(i interface{}) []uint64 {
   405  	v := reflect.Indirect(reflect.ValueOf(i))
   406  	return v.Slice(0, v.Len()).Interface().([]uint64)
   407  }
   408  
   409  // Statistics implements inet.Stack.Statistics.
   410  func (s *Stack) Statistics(stat interface{}, arg string) error {
   411  	var (
   412  		snmpTCP   bool
   413  		rawLine   string
   414  		sliceStat []uint64
   415  	)
   416  
   417  	switch stat.(type) {
   418  	case *inet.StatDev:
   419  		if s.netDevFile == nil {
   420  			return fmt.Errorf("/proc/net/dev is not opened for hostinet")
   421  		}
   422  		rawLine = getLine(s.netDevFile, arg, false /* with no header */)
   423  	case *inet.StatSNMPIP, *inet.StatSNMPICMP, *inet.StatSNMPICMPMSG, *inet.StatSNMPTCP, *inet.StatSNMPUDP, *inet.StatSNMPUDPLite:
   424  		if s.netSNMPFile == nil {
   425  			return fmt.Errorf("/proc/net/snmp is not opened for hostinet")
   426  		}
   427  		rawLine = getLine(s.netSNMPFile, arg, true)
   428  	default:
   429  		return syserr.ErrEndpointOperation.ToError()
   430  	}
   431  
   432  	if rawLine == "" {
   433  		return fmt.Errorf("failed to get raw line")
   434  	}
   435  
   436  	parts := strings.SplitN(rawLine, ":", 2)
   437  	if len(parts) != 2 {
   438  		return fmt.Errorf("failed to get prefix from: %q", rawLine)
   439  	}
   440  
   441  	sliceStat = toSlice(stat)
   442  	fields := strings.Fields(strings.TrimSpace(parts[1]))
   443  	if len(fields) != len(sliceStat) {
   444  		return fmt.Errorf("failed to parse fields: %q", rawLine)
   445  	}
   446  	if _, ok := stat.(*inet.StatSNMPTCP); ok {
   447  		snmpTCP = true
   448  	}
   449  	for i := 0; i < len(sliceStat); i++ {
   450  		var err error
   451  		if snmpTCP && i == 3 {
   452  			var tmp int64
   453  			// MaxConn field is signed, RFC 2012.
   454  			tmp, err = strconv.ParseInt(fields[i], 10, 64)
   455  			sliceStat[i] = uint64(tmp) // Convert back to int before use.
   456  		} else {
   457  			sliceStat[i], err = strconv.ParseUint(fields[i], 10, 64)
   458  		}
   459  		if err != nil {
   460  			return fmt.Errorf("failed to parse field %d from: %q, %v", i, rawLine, err)
   461  		}
   462  	}
   463  
   464  	return nil
   465  }
   466  
   467  // RouteTable implements inet.Stack.RouteTable.
   468  func (s *Stack) RouteTable() []inet.Route {
   469  	return append([]inet.Route(nil), s.routes...)
   470  }
   471  
   472  // Resume implements inet.Stack.Resume.
   473  func (s *Stack) Resume() {}
   474  
   475  // RegisteredEndpoints implements inet.Stack.RegisteredEndpoints.
   476  func (s *Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil }
   477  
   478  // CleanupEndpoints implements inet.Stack.CleanupEndpoints.
   479  func (s *Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil }
   480  
   481  // RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints.
   482  func (s *Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {}
   483  
   484  // SetForwarding implements inet.Stack.SetForwarding.
   485  func (s *Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error {
   486  	return linuxerr.EACCES
   487  }
   488  
   489  // PortRange implements inet.Stack.PortRange.
   490  func (*Stack) PortRange() (uint16, uint16) {
   491  	// Use the default Linux values per net/ipv4/af_inet.c:inet_init_net().
   492  	return 32768, 28232
   493  }
   494  
   495  // SetPortRange implements inet.Stack.SetPortRange.
   496  func (*Stack) SetPortRange(start uint16, end uint16) error {
   497  	return linuxerr.EACCES
   498  }