gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/socket/hostinet/stack.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hostinet
    16  
    17  import (
    18  	"fmt"
    19  	"io"
    20  	"io/ioutil"
    21  	"os"
    22  	"reflect"
    23  	"strconv"
    24  	"strings"
    25  
    26  	"gvisor.dev/gvisor/pkg/abi/linux"
    27  	"gvisor.dev/gvisor/pkg/context"
    28  	"gvisor.dev/gvisor/pkg/errors/linuxerr"
    29  	"gvisor.dev/gvisor/pkg/log"
    30  	"gvisor.dev/gvisor/pkg/sentry/inet"
    31  	"gvisor.dev/gvisor/pkg/sentry/socket/netlink/nlmsg"
    32  	"gvisor.dev/gvisor/pkg/syserr"
    33  	"gvisor.dev/gvisor/pkg/tcpip"
    34  	"gvisor.dev/gvisor/pkg/tcpip/stack"
    35  	"gvisor.dev/gvisor/pkg/usermem"
    36  )
    37  
    38  var defaultRecvBufSize = inet.TCPBufferSize{
    39  	Min:     4096,
    40  	Default: 87380,
    41  	Max:     6291456,
    42  }
    43  
    44  var defaultSendBufSize = inet.TCPBufferSize{
    45  	Min:     4096,
    46  	Default: 16384,
    47  	Max:     4194304,
    48  }
    49  
    50  // Stack implements inet.Stack for host sockets.
    51  type Stack struct {
    52  	// Stack is immutable.
    53  	supportsIPv6   bool
    54  	tcpRecovery    inet.TCPLossRecovery
    55  	tcpRecvBufSize inet.TCPBufferSize
    56  	tcpSendBufSize inet.TCPBufferSize
    57  	tcpSACKEnabled bool
    58  	netDevFile     *os.File
    59  	netSNMPFile    *os.File
    60  	// allowedSocketTypes is the list of allowed socket types
    61  	allowedSocketTypes []AllowedSocketType
    62  }
    63  
    64  // Destroy implements inet.Stack.Destroy.
    65  func (*Stack) Destroy() {
    66  }
    67  
    68  // NewStack returns an empty Stack containing no configuration.
    69  func NewStack() *Stack {
    70  	return &Stack{}
    71  }
    72  
    73  // Configure sets up the stack using the current state of the host network.
    74  func (s *Stack) Configure(allowRawSockets bool) error {
    75  	if _, err := os.Stat("/proc/net/if_inet6"); err == nil {
    76  		s.supportsIPv6 = true
    77  	}
    78  
    79  	s.tcpRecvBufSize = defaultRecvBufSize
    80  	if tcpRMem, err := readTCPBufferSizeFile("/proc/sys/net/ipv4/tcp_rmem"); err == nil {
    81  		s.tcpRecvBufSize = tcpRMem
    82  	} else {
    83  		log.Warningf("Failed to read TCP receive buffer size, using default values")
    84  	}
    85  
    86  	s.tcpSendBufSize = defaultSendBufSize
    87  	if tcpWMem, err := readTCPBufferSizeFile("/proc/sys/net/ipv4/tcp_wmem"); err == nil {
    88  		s.tcpSendBufSize = tcpWMem
    89  	} else {
    90  		log.Warningf("Failed to read TCP send buffer size, using default values")
    91  	}
    92  
    93  	// SACK is important for performance and even compatibility, assume it's
    94  	// enabled if we can't find the actual value.
    95  	s.tcpSACKEnabled = true
    96  	if sack, err := ioutil.ReadFile("/proc/sys/net/ipv4/tcp_sack"); err == nil {
    97  		s.tcpSACKEnabled = strings.TrimSpace(string(sack)) != "0"
    98  	} else {
    99  		log.Warningf("Failed to read if TCP SACK if enabled, setting to true")
   100  	}
   101  
   102  	if f, err := os.Open("/proc/net/dev"); err != nil {
   103  		log.Warningf("Failed to open /proc/net/dev: %v", err)
   104  	} else {
   105  		s.netDevFile = f
   106  	}
   107  
   108  	if f, err := os.Open("/proc/net/snmp"); err != nil {
   109  		log.Warningf("Failed to open /proc/net/snmp: %v", err)
   110  	} else {
   111  		s.netSNMPFile = f
   112  	}
   113  
   114  	s.allowedSocketTypes = AllowedSocketTypes
   115  	if allowRawSockets {
   116  		s.allowedSocketTypes = append(s.allowedSocketTypes, AllowedRawSocketTypes...)
   117  	}
   118  
   119  	return nil
   120  }
   121  
   122  func readTCPBufferSizeFile(filename string) (inet.TCPBufferSize, error) {
   123  	contents, err := ioutil.ReadFile(filename)
   124  	if err != nil {
   125  		return inet.TCPBufferSize{}, fmt.Errorf("failed to read %s: %v", filename, err)
   126  	}
   127  	ioseq := usermem.BytesIOSequence(contents)
   128  	fields := make([]int32, 3)
   129  	if n, err := usermem.CopyInt32StringsInVec(context.Background(), ioseq.IO, ioseq.Addrs, fields, ioseq.Opts); n != ioseq.NumBytes() || err != nil {
   130  		return inet.TCPBufferSize{}, fmt.Errorf("failed to parse %s (%q): got %v after %d/%d bytes", filename, contents, err, n, ioseq.NumBytes())
   131  	}
   132  	return inet.TCPBufferSize{
   133  		Min:     int(fields[0]),
   134  		Default: int(fields[1]),
   135  		Max:     int(fields[2]),
   136  	}, nil
   137  }
   138  
   139  // Interfaces implements inet.Stack.Interfaces.
   140  func (s *Stack) Interfaces() map[int32]inet.Interface {
   141  	ifs, err := getInterfaces()
   142  	if err != nil {
   143  		log.Warningf("could not get host interface: %v", err)
   144  		return nil
   145  	}
   146  
   147  	// query interface features for each of the host interfaces.
   148  	if err := queryInterfaceFeatures(ifs); err != nil {
   149  		log.Warningf("could not query host interfaces: %v", err)
   150  		return nil
   151  	}
   152  	return ifs
   153  }
   154  
   155  // RemoveInterface implements inet.Stack.RemoveInterface.
   156  func (*Stack) RemoveInterface(idx int32) error {
   157  	return removeInterface(idx)
   158  }
   159  
   160  // InterfaceAddrs implements inet.Stack.InterfaceAddrs.
   161  func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr {
   162  	addrs, err := getInterfaceAddrs()
   163  	if err != nil {
   164  		log.Warningf("failed to get host interface addresses: %v", err)
   165  		return nil
   166  	}
   167  	return addrs
   168  }
   169  
   170  // SetInterface implements inet.Stack.SetInterface.
   171  func (s *Stack) SetInterface(ctx context.Context, msg *nlmsg.Message) *syserr.Error {
   172  	var ifinfomsg linux.InterfaceInfoMessage
   173  	attrs, ok := msg.GetData(&ifinfomsg)
   174  	if !ok {
   175  		return syserr.ErrInvalidArgument
   176  	}
   177  	for !attrs.Empty() {
   178  		// The index is unspecified, search by the interface name.
   179  		ahdr, value, rest, ok := attrs.ParseFirst()
   180  		if !ok {
   181  			return syserr.ErrInvalidArgument
   182  		}
   183  		attrs = rest
   184  		switch ahdr.Type {
   185  		case linux.IFLA_IFNAME:
   186  			if len(value) < 1 {
   187  				return syserr.ErrInvalidArgument
   188  			}
   189  			if ifinfomsg.Index != 0 {
   190  				// Device name changing isn't supported yet.
   191  				return syserr.ErrNotSupported
   192  			}
   193  			ifname := string(value[:len(value)-1])
   194  			for idx, ifa := range s.Interfaces() {
   195  				if ifname == ifa.Name {
   196  					ifinfomsg.Index = idx
   197  					break
   198  				}
   199  			}
   200  		default:
   201  			ctx.Warningf("unexpected attribute: %x", ahdr.Type)
   202  			return syserr.ErrNotSupported
   203  		}
   204  	}
   205  	if ifinfomsg.Index == 0 {
   206  		return syserr.ErrNoDevice
   207  	}
   208  
   209  	flags := msg.Header().Flags
   210  	if flags&(linux.NLM_F_EXCL|linux.NLM_F_REPLACE) != 0 {
   211  		return syserr.ErrExists
   212  	}
   213  
   214  	if ifinfomsg.Flags != 0 || ifinfomsg.Change != 0 {
   215  		if ifinfomsg.Change & ^uint32(linux.IFF_UP) != 0 {
   216  			ctx.Warningf("Unsupported ifi_change flags: %x", ifinfomsg.Change)
   217  			return syserr.ErrInvalidArgument
   218  		}
   219  		if ifinfomsg.Flags & ^uint32(linux.IFF_UP) != 0 {
   220  			ctx.Warningf("Unsupported ifi_flags: %x", ifinfomsg.Change)
   221  			return syserr.ErrInvalidArgument
   222  		}
   223  		// Netstack interfaces are always up.
   224  	}
   225  	return nil
   226  }
   227  
   228  // AddInterfaceAddr implements inet.Stack.AddInterfaceAddr.
   229  func (*Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error {
   230  	return addInterfaceAddr(idx, addr)
   231  }
   232  
   233  // RemoveInterfaceAddr implements inet.Stack.RemoveInterfaceAddr.
   234  func (*Stack) RemoveInterfaceAddr(idx int32, addr inet.InterfaceAddr) error {
   235  	return removeInterfaceAddr(idx, addr)
   236  }
   237  
   238  // SupportsIPv6 implements inet.Stack.SupportsIPv6.
   239  func (s *Stack) SupportsIPv6() bool {
   240  	return s.supportsIPv6
   241  }
   242  
   243  // TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize.
   244  func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
   245  	return s.tcpRecvBufSize, nil
   246  }
   247  
   248  // SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize.
   249  func (*Stack) SetTCPReceiveBufferSize(inet.TCPBufferSize) error {
   250  	return linuxerr.EACCES
   251  }
   252  
   253  // TCPSendBufferSize implements inet.Stack.TCPSendBufferSize.
   254  func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) {
   255  	return s.tcpSendBufSize, nil
   256  }
   257  
   258  // SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize.
   259  func (*Stack) SetTCPSendBufferSize(inet.TCPBufferSize) error {
   260  	return linuxerr.EACCES
   261  }
   262  
   263  // TCPSACKEnabled implements inet.Stack.TCPSACKEnabled.
   264  func (s *Stack) TCPSACKEnabled() (bool, error) {
   265  	return s.tcpSACKEnabled, nil
   266  }
   267  
   268  // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled.
   269  func (*Stack) SetTCPSACKEnabled(bool) error {
   270  	return linuxerr.EACCES
   271  }
   272  
   273  // TCPRecovery implements inet.Stack.TCPRecovery.
   274  func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) {
   275  	return s.tcpRecovery, nil
   276  }
   277  
   278  // SetTCPRecovery implements inet.Stack.SetTCPRecovery.
   279  func (*Stack) SetTCPRecovery(inet.TCPLossRecovery) error {
   280  	return linuxerr.EACCES
   281  }
   282  
   283  // getLine reads one line from proc file, with specified prefix.
   284  // The last argument, withHeader, specifies if it contains line header.
   285  func getLine(f *os.File, prefix string, withHeader bool) string {
   286  	data := make([]byte, 4096)
   287  
   288  	if _, err := f.Seek(0, 0); err != nil {
   289  		return ""
   290  	}
   291  
   292  	if _, err := io.ReadFull(f, data); err != io.ErrUnexpectedEOF {
   293  		return ""
   294  	}
   295  
   296  	prefix = prefix + ":"
   297  	lines := strings.Split(string(data), "\n")
   298  	for _, l := range lines {
   299  		l = strings.TrimSpace(l)
   300  		if strings.HasPrefix(l, prefix) {
   301  			if withHeader {
   302  				withHeader = false
   303  				continue
   304  			}
   305  			return l
   306  		}
   307  	}
   308  	return ""
   309  }
   310  
   311  func toSlice(i any) []uint64 {
   312  	v := reflect.Indirect(reflect.ValueOf(i))
   313  	return v.Slice(0, v.Len()).Interface().([]uint64)
   314  }
   315  
   316  // Statistics implements inet.Stack.Statistics.
   317  func (s *Stack) Statistics(stat any, arg string) error {
   318  	var (
   319  		snmpTCP   bool
   320  		rawLine   string
   321  		sliceStat []uint64
   322  	)
   323  
   324  	switch stat.(type) {
   325  	case *inet.StatDev:
   326  		if s.netDevFile == nil {
   327  			return fmt.Errorf("/proc/net/dev is not opened for hostinet")
   328  		}
   329  		rawLine = getLine(s.netDevFile, arg, false /* with no header */)
   330  	case *inet.StatSNMPIP, *inet.StatSNMPICMP, *inet.StatSNMPICMPMSG, *inet.StatSNMPTCP, *inet.StatSNMPUDP, *inet.StatSNMPUDPLite:
   331  		if s.netSNMPFile == nil {
   332  			return fmt.Errorf("/proc/net/snmp is not opened for hostinet")
   333  		}
   334  		rawLine = getLine(s.netSNMPFile, arg, true)
   335  	default:
   336  		return syserr.ErrEndpointOperation.ToError()
   337  	}
   338  
   339  	if rawLine == "" {
   340  		return fmt.Errorf("failed to get raw line")
   341  	}
   342  
   343  	parts := strings.SplitN(rawLine, ":", 2)
   344  	if len(parts) != 2 {
   345  		return fmt.Errorf("failed to get prefix from: %q", rawLine)
   346  	}
   347  
   348  	sliceStat = toSlice(stat)
   349  	fields := strings.Fields(strings.TrimSpace(parts[1]))
   350  	if len(fields) != len(sliceStat) {
   351  		return fmt.Errorf("failed to parse fields: %q", rawLine)
   352  	}
   353  	if _, ok := stat.(*inet.StatSNMPTCP); ok {
   354  		snmpTCP = true
   355  	}
   356  	for i := 0; i < len(sliceStat); i++ {
   357  		var err error
   358  		if snmpTCP && i == 3 {
   359  			var tmp int64
   360  			// MaxConn field is signed, RFC 2012.
   361  			tmp, err = strconv.ParseInt(fields[i], 10, 64)
   362  			sliceStat[i] = uint64(tmp) // Convert back to int before use.
   363  		} else {
   364  			sliceStat[i], err = strconv.ParseUint(fields[i], 10, 64)
   365  		}
   366  		if err != nil {
   367  			return fmt.Errorf("failed to parse field %d from: %q, %v", i, rawLine, err)
   368  		}
   369  	}
   370  
   371  	return nil
   372  }
   373  
   374  // RouteTable implements inet.Stack.RouteTable.
   375  func (s *Stack) RouteTable() []inet.Route {
   376  	routes, err := getRoutes()
   377  	if err != nil {
   378  		log.Warningf("failed to get routes: %v", err)
   379  		return nil
   380  	}
   381  	// Prepend empty route.
   382  	return append([]inet.Route(nil), routes...)
   383  }
   384  
   385  // Pause implements inet.Stack.Pause.
   386  func (*Stack) Pause() {}
   387  
   388  // Restore implements inet.Stack.Restore.
   389  func (*Stack) Restore() {}
   390  
   391  // Resume implements inet.Stack.Resume.
   392  func (*Stack) Resume() {}
   393  
   394  // RegisteredEndpoints implements inet.Stack.RegisteredEndpoints.
   395  func (*Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil }
   396  
   397  // CleanupEndpoints implements inet.Stack.CleanupEndpoints.
   398  func (*Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil }
   399  
   400  // RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints.
   401  func (*Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {}
   402  
   403  // SetForwarding implements inet.Stack.SetForwarding.
   404  func (*Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error {
   405  	return linuxerr.EACCES
   406  }
   407  
   408  // PortRange implements inet.Stack.PortRange.
   409  func (*Stack) PortRange() (uint16, uint16) {
   410  	// Use the default Linux values per net/ipv4/af_inet.c:inet_init_net().
   411  	return 32768, 60999
   412  }
   413  
   414  // SetPortRange implements inet.Stack.SetPortRange.
   415  func (*Stack) SetPortRange(uint16, uint16) error {
   416  	return linuxerr.EACCES
   417  }