github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/proc/sys_net.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proc
    16  
    17  import (
    18  	"fmt"
    19  	"io"
    20  	"math"
    21  
    22  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    23  	"github.com/SagerNet/gvisor/pkg/context"
    24  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    25  	"github.com/SagerNet/gvisor/pkg/hostarch"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/fs/proc/device"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/fs/ramfs"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/inet"
    31  	"github.com/SagerNet/gvisor/pkg/sync"
    32  	"github.com/SagerNet/gvisor/pkg/tcpip/network/ipv4"
    33  	"github.com/SagerNet/gvisor/pkg/usermem"
    34  	"github.com/SagerNet/gvisor/pkg/waiter"
    35  )
    36  
    37  // LINT.IfChange
    38  
    39  type tcpMemDir int
    40  
    41  const (
    42  	tcpRMem tcpMemDir = iota
    43  	tcpWMem
    44  )
    45  
    46  // tcpMemInode is used to read/write the size of netstack tcp buffers.
    47  //
    48  // TODO(b/121381035): If we have multiple proc mounts, concurrent writes can
    49  // leave netstack and the proc files in an inconsistent state. Since we set the
    50  // buffer size from these proc files on restore, we may also race and end up in
    51  // an inconsistent state on restore.
    52  //
    53  // +stateify savable
    54  type tcpMemInode struct {
    55  	fsutil.SimpleFileInode
    56  	dir tcpMemDir
    57  	s   inet.Stack `state:"wait"`
    58  
    59  	// size stores the tcp buffer size during save, and sets the buffer
    60  	// size in netstack in restore. We must save/restore this here, since
    61  	// a netstack instance is created on restore.
    62  	size inet.TCPBufferSize
    63  
    64  	// mu protects against concurrent reads/writes to files based on this
    65  	// inode.
    66  	mu sync.Mutex `state:"nosave"`
    67  }
    68  
    69  var _ fs.InodeOperations = (*tcpMemInode)(nil)
    70  
    71  func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir tcpMemDir) *fs.Inode {
    72  	tm := &tcpMemInode{
    73  		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC),
    74  		s:               s,
    75  		dir:             dir,
    76  	}
    77  	sattr := fs.StableAttr{
    78  		DeviceID:  device.ProcDevice.DeviceID(),
    79  		InodeID:   device.ProcDevice.NextIno(),
    80  		BlockSize: hostarch.PageSize,
    81  		Type:      fs.SpecialFile,
    82  	}
    83  	return fs.NewInode(ctx, tm, msrc, sattr)
    84  }
    85  
    86  // Truncate implements fs.InodeOperations.Truncate.
    87  func (*tcpMemInode) Truncate(context.Context, *fs.Inode, int64) error {
    88  	return nil
    89  }
    90  
    91  // GetFile implements fs.InodeOperations.GetFile.
    92  func (t *tcpMemInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
    93  	flags.Pread = true
    94  	return fs.NewFile(ctx, dirent, flags, &tcpMemFile{tcpMemInode: t}), nil
    95  }
    96  
    97  // +stateify savable
    98  type tcpMemFile struct {
    99  	fsutil.FileGenericSeek          `state:"nosave"`
   100  	fsutil.FileNoIoctl              `state:"nosave"`
   101  	fsutil.FileNoMMap               `state:"nosave"`
   102  	fsutil.FileNoSplice             `state:"nosave"`
   103  	fsutil.FileNoopRelease          `state:"nosave"`
   104  	fsutil.FileNoopFlush            `state:"nosave"`
   105  	fsutil.FileNoopFsync            `state:"nosave"`
   106  	fsutil.FileNotDirReaddir        `state:"nosave"`
   107  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   108  	waiter.AlwaysReady              `state:"nosave"`
   109  
   110  	tcpMemInode *tcpMemInode
   111  }
   112  
   113  var _ fs.FileOperations = (*tcpMemFile)(nil)
   114  
   115  // Read implements fs.FileOperations.Read.
   116  func (f *tcpMemFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   117  	if offset != 0 {
   118  		return 0, io.EOF
   119  	}
   120  	f.tcpMemInode.mu.Lock()
   121  	defer f.tcpMemInode.mu.Unlock()
   122  
   123  	size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s)
   124  	if err != nil {
   125  		return 0, err
   126  	}
   127  	s := fmt.Sprintf("%d\t%d\t%d\n", size.Min, size.Default, size.Max)
   128  	n, err := dst.CopyOut(ctx, []byte(s))
   129  	return int64(n), err
   130  }
   131  
   132  // Write implements fs.FileOperations.Write.
   133  func (f *tcpMemFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
   134  	if src.NumBytes() == 0 {
   135  		return 0, nil
   136  	}
   137  	f.tcpMemInode.mu.Lock()
   138  	defer f.tcpMemInode.mu.Unlock()
   139  
   140  	src = src.TakeFirst(hostarch.PageSize - 1)
   141  	size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s)
   142  	if err != nil {
   143  		return 0, err
   144  	}
   145  	buf := []int32{int32(size.Min), int32(size.Default), int32(size.Max)}
   146  	n, cperr := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, buf, src.Opts)
   147  	newSize := inet.TCPBufferSize{
   148  		Min:     int(buf[0]),
   149  		Default: int(buf[1]),
   150  		Max:     int(buf[2]),
   151  	}
   152  	if err := writeSize(f.tcpMemInode.dir, f.tcpMemInode.s, newSize); err != nil {
   153  		return n, err
   154  	}
   155  	return n, cperr
   156  }
   157  
   158  func readSize(dirType tcpMemDir, s inet.Stack) (inet.TCPBufferSize, error) {
   159  	switch dirType {
   160  	case tcpRMem:
   161  		return s.TCPReceiveBufferSize()
   162  	case tcpWMem:
   163  		return s.TCPSendBufferSize()
   164  	default:
   165  		panic(fmt.Sprintf("unknown tcpMemFile type: %v", dirType))
   166  	}
   167  }
   168  
   169  func writeSize(dirType tcpMemDir, s inet.Stack, size inet.TCPBufferSize) error {
   170  	switch dirType {
   171  	case tcpRMem:
   172  		return s.SetTCPReceiveBufferSize(size)
   173  	case tcpWMem:
   174  		return s.SetTCPSendBufferSize(size)
   175  	default:
   176  		panic(fmt.Sprintf("unknown tcpMemFile type: %v", dirType))
   177  	}
   178  }
   179  
   180  // +stateify savable
   181  type tcpSack struct {
   182  	fsutil.SimpleFileInode
   183  
   184  	stack   inet.Stack `state:"wait"`
   185  	enabled *bool
   186  }
   187  
   188  func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
   189  	ts := &tcpSack{
   190  		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC),
   191  		stack:           s,
   192  	}
   193  	sattr := fs.StableAttr{
   194  		DeviceID:  device.ProcDevice.DeviceID(),
   195  		InodeID:   device.ProcDevice.NextIno(),
   196  		BlockSize: hostarch.PageSize,
   197  		Type:      fs.SpecialFile,
   198  	}
   199  	return fs.NewInode(ctx, ts, msrc, sattr)
   200  }
   201  
   202  // Truncate implements fs.InodeOperations.Truncate.
   203  func (*tcpSack) Truncate(context.Context, *fs.Inode, int64) error {
   204  	return nil
   205  }
   206  
   207  // GetFile implements fs.InodeOperations.GetFile.
   208  func (s *tcpSack) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   209  	flags.Pread = true
   210  	flags.Pwrite = true
   211  	return fs.NewFile(ctx, dirent, flags, &tcpSackFile{
   212  		tcpSack: s,
   213  		stack:   s.stack,
   214  	}), nil
   215  }
   216  
   217  // +stateify savable
   218  type tcpSackFile struct {
   219  	fsutil.FileGenericSeek          `state:"nosave"`
   220  	fsutil.FileNoIoctl              `state:"nosave"`
   221  	fsutil.FileNoMMap               `state:"nosave"`
   222  	fsutil.FileNoSplice             `state:"nosave"`
   223  	fsutil.FileNoopRelease          `state:"nosave"`
   224  	fsutil.FileNoopFlush            `state:"nosave"`
   225  	fsutil.FileNoopFsync            `state:"nosave"`
   226  	fsutil.FileNotDirReaddir        `state:"nosave"`
   227  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   228  	waiter.AlwaysReady              `state:"nosave"`
   229  
   230  	tcpSack *tcpSack
   231  
   232  	stack inet.Stack `state:"wait"`
   233  }
   234  
   235  // Read implements fs.FileOperations.Read.
   236  func (f *tcpSackFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   237  	if offset != 0 {
   238  		return 0, io.EOF
   239  	}
   240  
   241  	if f.tcpSack.enabled == nil {
   242  		sack, err := f.stack.TCPSACKEnabled()
   243  		if err != nil {
   244  			return 0, err
   245  		}
   246  		f.tcpSack.enabled = &sack
   247  	}
   248  
   249  	val := "0\n"
   250  	if *f.tcpSack.enabled {
   251  		// Technically, this is not quite compatible with Linux. Linux
   252  		// stores these as an integer, so if you write "2" into
   253  		// tcp_sack, you should get 2 back. Tough luck.
   254  		val = "1\n"
   255  	}
   256  	n, err := dst.CopyOut(ctx, []byte(val))
   257  	return int64(n), err
   258  }
   259  
   260  // Write implements fs.FileOperations.Write.
   261  func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
   262  	if src.NumBytes() == 0 {
   263  		return 0, nil
   264  	}
   265  
   266  	// Only consider size of one memory page for input for performance reasons.
   267  	// We are only reading if it's zero or not anyway.
   268  	src = src.TakeFirst(hostarch.PageSize - 1)
   269  
   270  	var v int32
   271  	n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
   272  	if err != nil {
   273  		return n, err
   274  	}
   275  	if f.tcpSack.enabled == nil {
   276  		f.tcpSack.enabled = new(bool)
   277  	}
   278  	*f.tcpSack.enabled = v != 0
   279  	return n, f.tcpSack.stack.SetTCPSACKEnabled(*f.tcpSack.enabled)
   280  }
   281  
   282  // +stateify savable
   283  type tcpRecovery struct {
   284  	fsutil.SimpleFileInode
   285  
   286  	stack    inet.Stack `state:"wait"`
   287  	recovery inet.TCPLossRecovery
   288  }
   289  
   290  func newTCPRecoveryInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
   291  	ts := &tcpRecovery{
   292  		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC),
   293  		stack:           s,
   294  	}
   295  	sattr := fs.StableAttr{
   296  		DeviceID:  device.ProcDevice.DeviceID(),
   297  		InodeID:   device.ProcDevice.NextIno(),
   298  		BlockSize: hostarch.PageSize,
   299  		Type:      fs.SpecialFile,
   300  	}
   301  	return fs.NewInode(ctx, ts, msrc, sattr)
   302  }
   303  
   304  // Truncate implements fs.InodeOperations.Truncate.
   305  func (*tcpRecovery) Truncate(context.Context, *fs.Inode, int64) error {
   306  	return nil
   307  }
   308  
   309  // GetFile implements fs.InodeOperations.GetFile.
   310  func (r *tcpRecovery) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   311  	flags.Pread = true
   312  	flags.Pwrite = true
   313  	return fs.NewFile(ctx, dirent, flags, &tcpRecoveryFile{
   314  		tcpRecovery: r,
   315  		stack:       r.stack,
   316  	}), nil
   317  }
   318  
   319  // +stateify savable
   320  type tcpRecoveryFile struct {
   321  	fsutil.FileGenericSeek          `state:"nosave"`
   322  	fsutil.FileNoIoctl              `state:"nosave"`
   323  	fsutil.FileNoMMap               `state:"nosave"`
   324  	fsutil.FileNoSplice             `state:"nosave"`
   325  	fsutil.FileNoopRelease          `state:"nosave"`
   326  	fsutil.FileNoopFlush            `state:"nosave"`
   327  	fsutil.FileNoopFsync            `state:"nosave"`
   328  	fsutil.FileNotDirReaddir        `state:"nosave"`
   329  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   330  	waiter.AlwaysReady              `state:"nosave"`
   331  
   332  	tcpRecovery *tcpRecovery
   333  
   334  	stack inet.Stack `state:"wait"`
   335  }
   336  
   337  // Read implements fs.FileOperations.Read.
   338  func (f *tcpRecoveryFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   339  	if offset != 0 {
   340  		return 0, io.EOF
   341  	}
   342  
   343  	recovery, err := f.stack.TCPRecovery()
   344  	if err != nil {
   345  		return 0, err
   346  	}
   347  	f.tcpRecovery.recovery = recovery
   348  	s := fmt.Sprintf("%d\n", f.tcpRecovery.recovery)
   349  	n, err := dst.CopyOut(ctx, []byte(s))
   350  	return int64(n), err
   351  }
   352  
   353  // Write implements fs.FileOperations.Write.
   354  func (f *tcpRecoveryFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
   355  	if src.NumBytes() == 0 {
   356  		return 0, nil
   357  	}
   358  	src = src.TakeFirst(hostarch.PageSize - 1)
   359  
   360  	var v int32
   361  	n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
   362  	if err != nil {
   363  		return 0, err
   364  	}
   365  	f.tcpRecovery.recovery = inet.TCPLossRecovery(v)
   366  	if err := f.tcpRecovery.stack.SetTCPRecovery(f.tcpRecovery.recovery); err != nil {
   367  		return 0, err
   368  	}
   369  	return n, nil
   370  }
   371  
   372  func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
   373  	// The following files are simple stubs until they are implemented in
   374  	// netstack, most of these files are configuration related. We use the
   375  	// value closest to the actual netstack behavior or any empty file,
   376  	// all of these files will have mode 0444 (read-only for all users).
   377  	contents := map[string]*fs.Inode{
   378  		"default_qdisc": newStaticProcInode(ctx, msrc, []byte("pfifo_fast")),
   379  		"message_burst": newStaticProcInode(ctx, msrc, []byte("10")),
   380  		"message_cost":  newStaticProcInode(ctx, msrc, []byte("5")),
   381  		"optmem_max":    newStaticProcInode(ctx, msrc, []byte("0")),
   382  		"rmem_default":  newStaticProcInode(ctx, msrc, []byte("212992")),
   383  		"rmem_max":      newStaticProcInode(ctx, msrc, []byte("212992")),
   384  		"somaxconn":     newStaticProcInode(ctx, msrc, []byte("128")),
   385  		"wmem_default":  newStaticProcInode(ctx, msrc, []byte("212992")),
   386  		"wmem_max":      newStaticProcInode(ctx, msrc, []byte("212992")),
   387  	}
   388  
   389  	d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
   390  	return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
   391  }
   392  
   393  // ipForwarding implements fs.InodeOperations.
   394  //
   395  // ipForwarding is used to enable/disable packet forwarding of netstack.
   396  //
   397  // +stateify savable
   398  type ipForwarding struct {
   399  	fsutil.SimpleFileInode
   400  
   401  	stack inet.Stack `state:"wait"`
   402  
   403  	// enabled stores the IPv4 forwarding state on save.
   404  	// We must save/restore this here, since a netstack instance
   405  	// is created on restore.
   406  	enabled bool
   407  }
   408  
   409  func newIPForwardingInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
   410  	ipf := &ipForwarding{
   411  		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC),
   412  		stack:           s,
   413  	}
   414  	sattr := fs.StableAttr{
   415  		DeviceID:  device.ProcDevice.DeviceID(),
   416  		InodeID:   device.ProcDevice.NextIno(),
   417  		BlockSize: hostarch.PageSize,
   418  		Type:      fs.SpecialFile,
   419  	}
   420  	return fs.NewInode(ctx, ipf, msrc, sattr)
   421  }
   422  
   423  // Truncate implements fs.InodeOperations.Truncate. Truncate is called when
   424  // O_TRUNC is specified for any kind of existing Dirent but is not called via
   425  // (f)truncate for proc files.
   426  func (*ipForwarding) Truncate(context.Context, *fs.Inode, int64) error {
   427  	return nil
   428  }
   429  
   430  // +stateify savable
   431  type ipForwardingFile struct {
   432  	fsutil.FileGenericSeek          `state:"nosave"`
   433  	fsutil.FileNoIoctl              `state:"nosave"`
   434  	fsutil.FileNoMMap               `state:"nosave"`
   435  	fsutil.FileNoSplice             `state:"nosave"`
   436  	fsutil.FileNoopFlush            `state:"nosave"`
   437  	fsutil.FileNoopFsync            `state:"nosave"`
   438  	fsutil.FileNoopRelease          `state:"nosave"`
   439  	fsutil.FileNotDirReaddir        `state:"nosave"`
   440  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   441  	waiter.AlwaysReady              `state:"nosave"`
   442  
   443  	ipf *ipForwarding
   444  
   445  	stack inet.Stack `state:"wait"`
   446  }
   447  
   448  // GetFile implements fs.InodeOperations.GetFile.
   449  func (ipf *ipForwarding) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   450  	flags.Pread = true
   451  	flags.Pwrite = true
   452  	return fs.NewFile(ctx, dirent, flags, &ipForwardingFile{
   453  		stack: ipf.stack,
   454  		ipf:   ipf,
   455  	}), nil
   456  }
   457  
   458  // Read implements fs.FileOperations.Read.
   459  func (f *ipForwardingFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   460  	if offset != 0 {
   461  		return 0, io.EOF
   462  	}
   463  
   464  	val := "0\n"
   465  	if f.ipf.enabled {
   466  		// Technically, this is not quite compatible with Linux. Linux
   467  		// stores these as an integer, so if you write "2" into
   468  		// ip_forward, you should get 2 back.
   469  		val = "1\n"
   470  	}
   471  	n, err := dst.CopyOut(ctx, []byte(val))
   472  	return int64(n), err
   473  }
   474  
   475  // Write implements fs.FileOperations.Write.
   476  //
   477  // Offset is ignored, multiple writes are not supported.
   478  func (f *ipForwardingFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
   479  	if src.NumBytes() == 0 {
   480  		return 0, nil
   481  	}
   482  
   483  	// Only consider size of one memory page for input for performance reasons.
   484  	// We are only reading if it's zero or not anyway.
   485  	src = src.TakeFirst(hostarch.PageSize - 1)
   486  
   487  	var v int32
   488  	n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
   489  	if err != nil {
   490  		return n, err
   491  	}
   492  	f.ipf.enabled = v != 0
   493  	return n, f.stack.SetForwarding(ipv4.ProtocolNumber, f.ipf.enabled)
   494  }
   495  
   496  // portRangeInode implements fs.InodeOperations. It provides and allows
   497  // modification of the range of ephemeral ports that IPv4 and IPv6 sockets
   498  // choose from.
   499  //
   500  // +stateify savable
   501  type portRangeInode struct {
   502  	fsutil.SimpleFileInode
   503  
   504  	stack inet.Stack `state:"wait"`
   505  
   506  	// start and end store the port range. We must save/restore this here,
   507  	// since a netstack instance is created on restore.
   508  	start *uint16
   509  	end   *uint16
   510  }
   511  
   512  func newPortRangeInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
   513  	ipf := &portRangeInode{
   514  		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC),
   515  		stack:           s,
   516  	}
   517  	sattr := fs.StableAttr{
   518  		DeviceID:  device.ProcDevice.DeviceID(),
   519  		InodeID:   device.ProcDevice.NextIno(),
   520  		BlockSize: hostarch.PageSize,
   521  		Type:      fs.SpecialFile,
   522  	}
   523  	return fs.NewInode(ctx, ipf, msrc, sattr)
   524  }
   525  
   526  // Truncate implements fs.InodeOperations.Truncate. Truncate is called when
   527  // O_TRUNC is specified for any kind of existing Dirent but is not called via
   528  // (f)truncate for proc files.
   529  func (*portRangeInode) Truncate(context.Context, *fs.Inode, int64) error {
   530  	return nil
   531  }
   532  
   533  // +stateify savable
   534  type portRangeFile struct {
   535  	fsutil.FileGenericSeek          `state:"nosave"`
   536  	fsutil.FileNoIoctl              `state:"nosave"`
   537  	fsutil.FileNoMMap               `state:"nosave"`
   538  	fsutil.FileNoSplice             `state:"nosave"`
   539  	fsutil.FileNoopFlush            `state:"nosave"`
   540  	fsutil.FileNoopFsync            `state:"nosave"`
   541  	fsutil.FileNoopRelease          `state:"nosave"`
   542  	fsutil.FileNotDirReaddir        `state:"nosave"`
   543  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   544  	waiter.AlwaysReady              `state:"nosave"`
   545  
   546  	inode *portRangeInode
   547  }
   548  
   549  // GetFile implements fs.InodeOperations.GetFile.
   550  func (in *portRangeInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   551  	flags.Pread = true
   552  	flags.Pwrite = true
   553  	return fs.NewFile(ctx, dirent, flags, &portRangeFile{
   554  		inode: in,
   555  	}), nil
   556  }
   557  
   558  // Read implements fs.FileOperations.Read.
   559  func (pf *portRangeFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   560  	if offset != 0 {
   561  		return 0, io.EOF
   562  	}
   563  
   564  	if pf.inode.start == nil {
   565  		start, end := pf.inode.stack.PortRange()
   566  		pf.inode.start = &start
   567  		pf.inode.end = &end
   568  	}
   569  
   570  	contents := fmt.Sprintf("%d %d\n", *pf.inode.start, *pf.inode.end)
   571  	n, err := dst.CopyOut(ctx, []byte(contents))
   572  	return int64(n), err
   573  }
   574  
   575  // Write implements fs.FileOperations.Write.
   576  //
   577  // Offset is ignored, multiple writes are not supported.
   578  func (pf *portRangeFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
   579  	if src.NumBytes() == 0 {
   580  		return 0, nil
   581  	}
   582  
   583  	// Only consider size of one memory page for input for performance
   584  	// reasons.
   585  	src = src.TakeFirst(hostarch.PageSize - 1)
   586  
   587  	ports := make([]int32, 2)
   588  	n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, ports, src.Opts)
   589  	if err != nil {
   590  		return 0, err
   591  	}
   592  
   593  	// Port numbers must be uint16s.
   594  	if ports[0] < 0 || ports[1] < 0 || ports[0] > math.MaxUint16 || ports[1] > math.MaxUint16 {
   595  		return 0, linuxerr.EINVAL
   596  	}
   597  
   598  	if err := pf.inode.stack.SetPortRange(uint16(ports[0]), uint16(ports[1])); err != nil {
   599  		return 0, err
   600  	}
   601  	if pf.inode.start == nil {
   602  		pf.inode.start = new(uint16)
   603  		pf.inode.end = new(uint16)
   604  	}
   605  	*pf.inode.start = uint16(ports[0])
   606  	*pf.inode.end = uint16(ports[1])
   607  	return n, nil
   608  }
   609  
   610  func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
   611  	contents := map[string]*fs.Inode{
   612  		// Add tcp_sack.
   613  		"tcp_sack": newTCPSackInode(ctx, msrc, s),
   614  
   615  		// Add ip_forward.
   616  		"ip_forward": newIPForwardingInode(ctx, msrc, s),
   617  
   618  		// Allow for configurable ephemeral port ranges. Note that this
   619  		// controls ports for both IPv4 and IPv6 sockets.
   620  		"ip_local_port_range": newPortRangeInode(ctx, msrc, s),
   621  
   622  		// The following files are simple stubs until they are
   623  		// implemented in netstack, most of these files are
   624  		// configuration related. We use the value closest to the
   625  		// actual netstack behavior or any empty file, all of these
   626  		// files will have mode 0444 (read-only for all users).
   627  		"ip_local_reserved_ports": newStaticProcInode(ctx, msrc, []byte("")),
   628  		"ipfrag_time":             newStaticProcInode(ctx, msrc, []byte("30")),
   629  		"ip_nonlocal_bind":        newStaticProcInode(ctx, msrc, []byte("0")),
   630  		"ip_no_pmtu_disc":         newStaticProcInode(ctx, msrc, []byte("1")),
   631  
   632  		// tcp_allowed_congestion_control tell the user what they are
   633  		// able to do as an unprivledged process so we leave it empty.
   634  		"tcp_allowed_congestion_control":   newStaticProcInode(ctx, msrc, []byte("")),
   635  		"tcp_available_congestion_control": newStaticProcInode(ctx, msrc, []byte("reno")),
   636  		"tcp_congestion_control":           newStaticProcInode(ctx, msrc, []byte("reno")),
   637  
   638  		// Many of the following stub files are features netstack
   639  		// doesn't support. The unsupported features return "0" to
   640  		// indicate they are disabled.
   641  		"tcp_base_mss":              newStaticProcInode(ctx, msrc, []byte("1280")),
   642  		"tcp_dsack":                 newStaticProcInode(ctx, msrc, []byte("0")),
   643  		"tcp_early_retrans":         newStaticProcInode(ctx, msrc, []byte("0")),
   644  		"tcp_fack":                  newStaticProcInode(ctx, msrc, []byte("0")),
   645  		"tcp_fastopen":              newStaticProcInode(ctx, msrc, []byte("0")),
   646  		"tcp_fastopen_key":          newStaticProcInode(ctx, msrc, []byte("")),
   647  		"tcp_invalid_ratelimit":     newStaticProcInode(ctx, msrc, []byte("0")),
   648  		"tcp_keepalive_intvl":       newStaticProcInode(ctx, msrc, []byte("0")),
   649  		"tcp_keepalive_probes":      newStaticProcInode(ctx, msrc, []byte("0")),
   650  		"tcp_keepalive_time":        newStaticProcInode(ctx, msrc, []byte("7200")),
   651  		"tcp_mtu_probing":           newStaticProcInode(ctx, msrc, []byte("0")),
   652  		"tcp_no_metrics_save":       newStaticProcInode(ctx, msrc, []byte("1")),
   653  		"tcp_probe_interval":        newStaticProcInode(ctx, msrc, []byte("0")),
   654  		"tcp_probe_threshold":       newStaticProcInode(ctx, msrc, []byte("0")),
   655  		"tcp_retries1":              newStaticProcInode(ctx, msrc, []byte("3")),
   656  		"tcp_retries2":              newStaticProcInode(ctx, msrc, []byte("15")),
   657  		"tcp_rfc1337":               newStaticProcInode(ctx, msrc, []byte("1")),
   658  		"tcp_slow_start_after_idle": newStaticProcInode(ctx, msrc, []byte("1")),
   659  		"tcp_synack_retries":        newStaticProcInode(ctx, msrc, []byte("5")),
   660  		"tcp_syn_retries":           newStaticProcInode(ctx, msrc, []byte("3")),
   661  		"tcp_timestamps":            newStaticProcInode(ctx, msrc, []byte("1")),
   662  	}
   663  
   664  	// Add tcp_rmem.
   665  	if _, err := s.TCPReceiveBufferSize(); err == nil {
   666  		contents["tcp_rmem"] = newTCPMemInode(ctx, msrc, s, tcpRMem)
   667  	}
   668  
   669  	// Add tcp_wmem.
   670  	if _, err := s.TCPSendBufferSize(); err == nil {
   671  		contents["tcp_wmem"] = newTCPMemInode(ctx, msrc, s, tcpWMem)
   672  	}
   673  
   674  	// Add tcp_recovery.
   675  	if _, err := s.TCPRecovery(); err == nil {
   676  		contents["tcp_recovery"] = newTCPRecoveryInode(ctx, msrc, s)
   677  	}
   678  
   679  	d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
   680  	return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
   681  }
   682  
   683  func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
   684  	var contents map[string]*fs.Inode
   685  	// TODO(github.com/SagerNet/issue/1833): Support for using the network stack in the
   686  	// network namespace of the calling process.
   687  	if s := p.k.RootNetworkNamespace().Stack(); s != nil {
   688  		contents = map[string]*fs.Inode{
   689  			"ipv4": p.newSysNetIPv4Dir(ctx, msrc, s),
   690  			"core": p.newSysNetCore(ctx, msrc, s),
   691  		}
   692  	}
   693  	d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
   694  	return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
   695  }
   696  
   697  // LINT.ThenChange(../../fsimpl/proc/tasks_sys.go)