github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/proc/task_net.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proc
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"io"
    21  	"reflect"
    22  	"time"
    23  
    24  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    25  	"github.com/SagerNet/gvisor/pkg/context"
    26  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    27  	"github.com/SagerNet/gvisor/pkg/hostarch"
    28  	"github.com/SagerNet/gvisor/pkg/log"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/kernfs"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/inet"
    31  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    32  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    33  	"github.com/SagerNet/gvisor/pkg/sentry/socket"
    34  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix"
    35  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    36  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    37  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    38  )
    39  
    40  func (fs *filesystem) newTaskNetDir(ctx context.Context, task *kernel.Task) kernfs.Inode {
    41  	k := task.Kernel()
    42  	pidns := task.PIDNamespace()
    43  	root := auth.NewRootCredentials(pidns.UserNamespace())
    44  
    45  	var contents map[string]kernfs.Inode
    46  	if stack := task.NetworkNamespace().Stack(); stack != nil {
    47  		const (
    48  			arp       = "IP address       HW type     Flags       HW address            Mask     Device\n"
    49  			netlink   = "sk       Eth Pid    Groups   Rmem     Wmem     Dump     Locks     Drops     Inode\n"
    50  			packet    = "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n"
    51  			protocols = "protocol  size sockets  memory press maxhdr  slab module     cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"
    52  			ptype     = "Type Device      Function\n"
    53  			upd6      = "  sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode\n"
    54  		)
    55  		psched := fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond))
    56  
    57  		// TODO(github.com/SagerNet/issue/1833): Make sure file contents reflect the task
    58  		// network namespace.
    59  		contents = map[string]kernfs.Inode{
    60  			"dev":  fs.newInode(ctx, root, 0444, &netDevData{stack: stack}),
    61  			"snmp": fs.newInode(ctx, root, 0444, &netSnmpData{stack: stack}),
    62  
    63  			// The following files are simple stubs until they are implemented in
    64  			// netstack, if the file contains a header the stub is just the header
    65  			// otherwise it is an empty file.
    66  			"arp":       fs.newInode(ctx, root, 0444, newStaticFile(arp)),
    67  			"netlink":   fs.newInode(ctx, root, 0444, newStaticFile(netlink)),
    68  			"netstat":   fs.newInode(ctx, root, 0444, &netStatData{}),
    69  			"packet":    fs.newInode(ctx, root, 0444, newStaticFile(packet)),
    70  			"protocols": fs.newInode(ctx, root, 0444, newStaticFile(protocols)),
    71  
    72  			// Linux sets psched values to: nsec per usec, psched tick in ns, 1000000,
    73  			// high res timer ticks per sec (ClockGetres returns 1ns resolution).
    74  			"psched": fs.newInode(ctx, root, 0444, newStaticFile(psched)),
    75  			"ptype":  fs.newInode(ctx, root, 0444, newStaticFile(ptype)),
    76  			"route":  fs.newInode(ctx, root, 0444, &netRouteData{stack: stack}),
    77  			"tcp":    fs.newInode(ctx, root, 0444, &netTCPData{kernel: k}),
    78  			"udp":    fs.newInode(ctx, root, 0444, &netUDPData{kernel: k}),
    79  			"unix":   fs.newInode(ctx, root, 0444, &netUnixData{kernel: k}),
    80  		}
    81  
    82  		if stack.SupportsIPv6() {
    83  			contents["if_inet6"] = fs.newInode(ctx, root, 0444, &ifinet6{stack: stack})
    84  			contents["ipv6_route"] = fs.newInode(ctx, root, 0444, newStaticFile(""))
    85  			contents["tcp6"] = fs.newInode(ctx, root, 0444, &netTCP6Data{kernel: k})
    86  			contents["udp6"] = fs.newInode(ctx, root, 0444, newStaticFile(upd6))
    87  		}
    88  	}
    89  
    90  	return fs.newTaskOwnedDir(ctx, task, fs.NextIno(), 0555, contents)
    91  }
    92  
    93  // ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6.
    94  //
    95  // +stateify savable
    96  type ifinet6 struct {
    97  	kernfs.DynamicBytesFile
    98  
    99  	stack inet.Stack
   100  }
   101  
   102  var _ dynamicInode = (*ifinet6)(nil)
   103  
   104  func (n *ifinet6) contents() []string {
   105  	var lines []string
   106  	nics := n.stack.Interfaces()
   107  	for id, naddrs := range n.stack.InterfaceAddrs() {
   108  		nic, ok := nics[id]
   109  		if !ok {
   110  			// NIC was added after NICNames was called. We'll just ignore it.
   111  			continue
   112  		}
   113  
   114  		for _, a := range naddrs {
   115  			// IPv6 only.
   116  			if a.Family != linux.AF_INET6 {
   117  				continue
   118  			}
   119  
   120  			// Fields:
   121  			// IPv6 address displayed in 32 hexadecimal chars without colons
   122  			// Netlink device number (interface index) in hexadecimal (use nic id)
   123  			// Prefix length in hexadecimal
   124  			// Scope value (use 0)
   125  			// Interface flags
   126  			// Device name
   127  			lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name))
   128  		}
   129  	}
   130  	return lines
   131  }
   132  
   133  // Generate implements vfs.DynamicBytesSource.Generate.
   134  func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error {
   135  	for _, l := range n.contents() {
   136  		buf.WriteString(l)
   137  	}
   138  	return nil
   139  }
   140  
   141  // netDevData implements vfs.DynamicBytesSource for /proc/net/dev.
   142  //
   143  // +stateify savable
   144  type netDevData struct {
   145  	kernfs.DynamicBytesFile
   146  
   147  	stack inet.Stack
   148  }
   149  
   150  var _ dynamicInode = (*netDevData)(nil)
   151  
   152  // Generate implements vfs.DynamicBytesSource.Generate.
   153  func (n *netDevData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   154  	interfaces := n.stack.Interfaces()
   155  	buf.WriteString("Inter-|   Receive                                                |  Transmit\n")
   156  	buf.WriteString(" face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed\n")
   157  
   158  	for _, i := range interfaces {
   159  		// Implements the same format as
   160  		// net/core/net-procfs.c:dev_seq_printf_stats.
   161  		var stats inet.StatDev
   162  		if err := n.stack.Statistics(&stats, i.Name); err != nil {
   163  			log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err)
   164  			continue
   165  		}
   166  		fmt.Fprintf(
   167  			buf,
   168  			"%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n",
   169  			i.Name,
   170  			// Received
   171  			stats[0], // bytes
   172  			stats[1], // packets
   173  			stats[2], // errors
   174  			stats[3], // dropped
   175  			stats[4], // fifo
   176  			stats[5], // frame
   177  			stats[6], // compressed
   178  			stats[7], // multicast
   179  			// Transmitted
   180  			stats[8],  // bytes
   181  			stats[9],  // packets
   182  			stats[10], // errors
   183  			stats[11], // dropped
   184  			stats[12], // fifo
   185  			stats[13], // frame
   186  			stats[14], // compressed
   187  			stats[15], // multicast
   188  		)
   189  	}
   190  
   191  	return nil
   192  }
   193  
   194  // netUnixData implements vfs.DynamicBytesSource for /proc/net/unix.
   195  //
   196  // +stateify savable
   197  type netUnixData struct {
   198  	kernfs.DynamicBytesFile
   199  
   200  	kernel *kernel.Kernel
   201  }
   202  
   203  var _ dynamicInode = (*netUnixData)(nil)
   204  
   205  // Generate implements vfs.DynamicBytesSource.Generate.
   206  func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   207  	buf.WriteString("Num       RefCount Protocol Flags    Type St Inode Path\n")
   208  	for _, se := range n.kernel.ListSockets() {
   209  		s := se.SockVFS2
   210  		if !s.TryIncRef() {
   211  			// Racing with socket destruction, this is ok.
   212  			continue
   213  		}
   214  		if family, _, _ := s.Impl().(socket.SocketVFS2).Type(); family != linux.AF_UNIX {
   215  			s.DecRef(ctx)
   216  			// Not a unix socket.
   217  			continue
   218  		}
   219  		sops := s.Impl().(*unix.SocketVFS2)
   220  
   221  		addr, err := sops.Endpoint().GetLocalAddress()
   222  		if err != nil {
   223  			log.Warningf("Failed to retrieve socket name from %+v: %v", s, err)
   224  			addr.Addr = "<unknown>"
   225  		}
   226  
   227  		sockFlags := 0
   228  		if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok {
   229  			if ce.Listening() {
   230  				// For unix domain sockets, linux reports a single flag
   231  				// value if the socket is listening, of __SO_ACCEPTCON.
   232  				sockFlags = linux.SO_ACCEPTCON
   233  			}
   234  		}
   235  
   236  		// Get inode number.
   237  		var ino uint64
   238  		stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_INO})
   239  		if statErr != nil || stat.Mask&linux.STATX_INO == 0 {
   240  			log.Warningf("Failed to retrieve ino for socket file: %v", statErr)
   241  		} else {
   242  			ino = stat.Ino
   243  		}
   244  
   245  		// In the socket entry below, the value for the 'Num' field requires
   246  		// some consideration. Linux prints the address to the struct
   247  		// unix_sock representing a socket in the kernel, but may redact the
   248  		// value for unprivileged users depending on the kptr_restrict
   249  		// sysctl.
   250  		//
   251  		// One use for this field is to allow a privileged user to
   252  		// introspect into the kernel memory to determine information about
   253  		// a socket not available through procfs, such as the socket's peer.
   254  		//
   255  		// In gvisor, returning a pointer to our internal structures would
   256  		// be pointless, as it wouldn't match the memory layout for struct
   257  		// unix_sock, making introspection difficult. We could populate a
   258  		// struct unix_sock with the appropriate data, but even that
   259  		// requires consideration for which kernel version to emulate, as
   260  		// the definition of this struct changes over time.
   261  		//
   262  		// For now, we always redact this pointer.
   263  		fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %8d",
   264  			(*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct.
   265  			s.ReadRefs()-1,                // RefCount, don't count our own ref.
   266  			0,                             // Protocol, always 0 for UDS.
   267  			sockFlags,                     // Flags.
   268  			sops.Endpoint().Type(),        // Type.
   269  			sops.State(),                  // State.
   270  			ino,                           // Inode.
   271  		)
   272  
   273  		// Path
   274  		if len(addr.Addr) != 0 {
   275  			if addr.Addr[0] == 0 {
   276  				// Abstract path.
   277  				fmt.Fprintf(buf, " @%s", string(addr.Addr[1:]))
   278  			} else {
   279  				fmt.Fprintf(buf, " %s", string(addr.Addr))
   280  			}
   281  		}
   282  		fmt.Fprintf(buf, "\n")
   283  
   284  		s.DecRef(ctx)
   285  	}
   286  	return nil
   287  }
   288  
   289  func networkToHost16(n uint16) uint16 {
   290  	// n is in network byte order, so is big-endian. The most-significant byte
   291  	// should be stored in the lower address.
   292  	//
   293  	// We manually inline binary.BigEndian.Uint16() because Go does not support
   294  	// non-primitive consts, so binary.BigEndian is a (mutable) var, so calls to
   295  	// binary.BigEndian.Uint16() require a read of binary.BigEndian and an
   296  	// interface method call, defeating inlining.
   297  	buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)}
   298  	return hostarch.ByteOrder.Uint16(buf[:])
   299  }
   300  
   301  func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
   302  	switch family {
   303  	case linux.AF_INET:
   304  		var a linux.SockAddrInet
   305  		if i != nil {
   306  			a = *i.(*linux.SockAddrInet)
   307  		}
   308  
   309  		// linux.SockAddrInet.Port is stored in the network byte order and is
   310  		// printed like a number in host byte order. Note that all numbers in host
   311  		// byte order are printed with the most-significant byte first when
   312  		// formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux.
   313  		port := networkToHost16(a.Port)
   314  
   315  		// linux.SockAddrInet.Addr is stored as a byte slice in big-endian order
   316  		// (i.e. most-significant byte in index 0). Linux represents this as a
   317  		// __be32 which is a typedef for an unsigned int, and is printed with
   318  		// %X. This means that for a little-endian machine, Linux prints the
   319  		// least-significant byte of the address first. To emulate this, we first
   320  		// invert the byte order for the address using hostarch.ByteOrder.Uint32,
   321  		// which makes it have the equivalent encoding to a __be32 on a little
   322  		// endian machine. Note that this operation is a no-op on a big endian
   323  		// machine. Then similar to Linux, we format it with %X, which will print
   324  		// the most-significant byte of the __be32 address first, which is now
   325  		// actually the least-significant byte of the original address in
   326  		// linux.SockAddrInet.Addr on little endian machines, due to the conversion.
   327  		addr := hostarch.ByteOrder.Uint32(a.Addr[:])
   328  
   329  		fmt.Fprintf(w, "%08X:%04X ", addr, port)
   330  	case linux.AF_INET6:
   331  		var a linux.SockAddrInet6
   332  		if i != nil {
   333  			a = *i.(*linux.SockAddrInet6)
   334  		}
   335  
   336  		port := networkToHost16(a.Port)
   337  		addr0 := hostarch.ByteOrder.Uint32(a.Addr[0:4])
   338  		addr1 := hostarch.ByteOrder.Uint32(a.Addr[4:8])
   339  		addr2 := hostarch.ByteOrder.Uint32(a.Addr[8:12])
   340  		addr3 := hostarch.ByteOrder.Uint32(a.Addr[12:16])
   341  		fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port)
   342  	}
   343  }
   344  
   345  func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error {
   346  	// t may be nil here if our caller is not part of a task goroutine. This can
   347  	// happen for example if we're here for "sentryctl cat". When t is nil,
   348  	// degrade gracefully and retrieve what we can.
   349  	t := kernel.TaskFromContext(ctx)
   350  
   351  	for _, se := range k.ListSockets() {
   352  		s := se.SockVFS2
   353  		if !s.TryIncRef() {
   354  			// Racing with socket destruction, this is ok.
   355  			continue
   356  		}
   357  		sops, ok := s.Impl().(socket.SocketVFS2)
   358  		if !ok {
   359  			panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s))
   360  		}
   361  		if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) {
   362  			s.DecRef(ctx)
   363  			// Not tcp4 sockets.
   364  			continue
   365  		}
   366  
   367  		// Linux's documentation for the fields below can be found at
   368  		// https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
   369  		// For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
   370  		// Note that the header doesn't contain labels for all the fields.
   371  
   372  		// Field: sl; entry number.
   373  		fmt.Fprintf(buf, "%4d: ", se.ID)
   374  
   375  		// Field: local_adddress.
   376  		var localAddr linux.SockAddr
   377  		if t != nil {
   378  			if local, _, err := sops.GetSockName(t); err == nil {
   379  				localAddr = local
   380  			}
   381  		}
   382  		writeInetAddr(buf, family, localAddr)
   383  
   384  		// Field: rem_address.
   385  		var remoteAddr linux.SockAddr
   386  		if t != nil {
   387  			if remote, _, err := sops.GetPeerName(t); err == nil {
   388  				remoteAddr = remote
   389  			}
   390  		}
   391  		writeInetAddr(buf, family, remoteAddr)
   392  
   393  		// Field: state; socket state.
   394  		fmt.Fprintf(buf, "%02X ", sops.State())
   395  
   396  		// Field: tx_queue, rx_queue; number of packets in the transmit and
   397  		// receive queue. Unimplemented.
   398  		fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
   399  
   400  		// Field: tr, tm->when; timer active state and number of jiffies
   401  		// until timer expires. Unimplemented.
   402  		fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
   403  
   404  		// Field: retrnsmt; number of unrecovered RTO timeouts.
   405  		// Unimplemented.
   406  		fmt.Fprintf(buf, "%08X ", 0)
   407  
   408  		stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_UID | linux.STATX_INO})
   409  
   410  		// Field: uid.
   411  		if statErr != nil || stat.Mask&linux.STATX_UID == 0 {
   412  			log.Warningf("Failed to retrieve uid for socket file: %v", statErr)
   413  			fmt.Fprintf(buf, "%5d ", 0)
   414  		} else {
   415  			creds := auth.CredentialsFromContext(ctx)
   416  			fmt.Fprintf(buf, "%5d ", uint32(auth.KUID(stat.UID).In(creds.UserNamespace).OrOverflow()))
   417  		}
   418  
   419  		// Field: timeout; number of unanswered 0-window probes.
   420  		// Unimplemented.
   421  		fmt.Fprintf(buf, "%8d ", 0)
   422  
   423  		// Field: inode.
   424  		if statErr != nil || stat.Mask&linux.STATX_INO == 0 {
   425  			log.Warningf("Failed to retrieve inode for socket file: %v", statErr)
   426  			fmt.Fprintf(buf, "%8d ", 0)
   427  		} else {
   428  			fmt.Fprintf(buf, "%8d ", stat.Ino)
   429  		}
   430  
   431  		// Field: refcount. Don't count the ref we obtain while deferencing
   432  		// the weakref to this socket.
   433  		fmt.Fprintf(buf, "%d ", s.ReadRefs()-1)
   434  
   435  		// Field: Socket struct address. Redacted due to the same reason as
   436  		// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
   437  		fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
   438  
   439  		// Field: retransmit timeout. Unimplemented.
   440  		fmt.Fprintf(buf, "%d ", 0)
   441  
   442  		// Field: predicted tick of soft clock (delayed ACK control data).
   443  		// Unimplemented.
   444  		fmt.Fprintf(buf, "%d ", 0)
   445  
   446  		// Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
   447  		fmt.Fprintf(buf, "%d ", 0)
   448  
   449  		// Field: sending congestion window, Unimplemented.
   450  		fmt.Fprintf(buf, "%d ", 0)
   451  
   452  		// Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
   453  		// Unimplemented, report as large threshold.
   454  		fmt.Fprintf(buf, "%d", -1)
   455  
   456  		fmt.Fprintf(buf, "\n")
   457  
   458  		s.DecRef(ctx)
   459  	}
   460  
   461  	return nil
   462  }
   463  
   464  // netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp.
   465  //
   466  // +stateify savable
   467  type netTCPData struct {
   468  	kernfs.DynamicBytesFile
   469  
   470  	kernel *kernel.Kernel
   471  }
   472  
   473  var _ dynamicInode = (*netTCPData)(nil)
   474  
   475  func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   476  	buf.WriteString("  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode                                                     \n")
   477  	return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET)
   478  }
   479  
   480  // netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6.
   481  //
   482  // +stateify savable
   483  type netTCP6Data struct {
   484  	kernfs.DynamicBytesFile
   485  
   486  	kernel *kernel.Kernel
   487  }
   488  
   489  var _ dynamicInode = (*netTCP6Data)(nil)
   490  
   491  func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error {
   492  	buf.WriteString("  sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode\n")
   493  	return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6)
   494  }
   495  
   496  // netUDPData implements vfs.DynamicBytesSource for /proc/net/udp.
   497  //
   498  // +stateify savable
   499  type netUDPData struct {
   500  	kernfs.DynamicBytesFile
   501  
   502  	kernel *kernel.Kernel
   503  }
   504  
   505  var _ dynamicInode = (*netUDPData)(nil)
   506  
   507  // Generate implements vfs.DynamicBytesSource.Generate.
   508  func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   509  	// t may be nil here if our caller is not part of a task goroutine. This can
   510  	// happen for example if we're here for "sentryctl cat". When t is nil,
   511  	// degrade gracefully and retrieve what we can.
   512  	t := kernel.TaskFromContext(ctx)
   513  
   514  	buf.WriteString("  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops             \n")
   515  
   516  	for _, se := range d.kernel.ListSockets() {
   517  		s := se.SockVFS2
   518  		if !s.TryIncRef() {
   519  			// Racing with socket destruction, this is ok.
   520  			continue
   521  		}
   522  		sops, ok := s.Impl().(socket.SocketVFS2)
   523  		if !ok {
   524  			panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s))
   525  		}
   526  		if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM {
   527  			s.DecRef(ctx)
   528  			// Not udp4 socket.
   529  			continue
   530  		}
   531  
   532  		// For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock().
   533  
   534  		// Field: sl; entry number.
   535  		fmt.Fprintf(buf, "%5d: ", se.ID)
   536  
   537  		// Field: local_adddress.
   538  		var localAddr linux.SockAddrInet
   539  		if t != nil {
   540  			if local, _, err := sops.GetSockName(t); err == nil {
   541  				localAddr = *local.(*linux.SockAddrInet)
   542  			}
   543  		}
   544  		writeInetAddr(buf, linux.AF_INET, &localAddr)
   545  
   546  		// Field: rem_address.
   547  		var remoteAddr linux.SockAddrInet
   548  		if t != nil {
   549  			if remote, _, err := sops.GetPeerName(t); err == nil {
   550  				remoteAddr = *remote.(*linux.SockAddrInet)
   551  			}
   552  		}
   553  		writeInetAddr(buf, linux.AF_INET, &remoteAddr)
   554  
   555  		// Field: state; socket state.
   556  		fmt.Fprintf(buf, "%02X ", sops.State())
   557  
   558  		// Field: tx_queue, rx_queue; number of packets in the transmit and
   559  		// receive queue. Unimplemented.
   560  		fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
   561  
   562  		// Field: tr, tm->when. Always 0 for UDP.
   563  		fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
   564  
   565  		// Field: retrnsmt. Always 0 for UDP.
   566  		fmt.Fprintf(buf, "%08X ", 0)
   567  
   568  		stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_UID | linux.STATX_INO})
   569  
   570  		// Field: uid.
   571  		if statErr != nil || stat.Mask&linux.STATX_UID == 0 {
   572  			log.Warningf("Failed to retrieve uid for socket file: %v", statErr)
   573  			fmt.Fprintf(buf, "%5d ", 0)
   574  		} else {
   575  			creds := auth.CredentialsFromContext(ctx)
   576  			fmt.Fprintf(buf, "%5d ", uint32(auth.KUID(stat.UID).In(creds.UserNamespace).OrOverflow()))
   577  		}
   578  
   579  		// Field: timeout. Always 0 for UDP.
   580  		fmt.Fprintf(buf, "%8d ", 0)
   581  
   582  		// Field: inode.
   583  		if statErr != nil || stat.Mask&linux.STATX_INO == 0 {
   584  			log.Warningf("Failed to retrieve inode for socket file: %v", statErr)
   585  			fmt.Fprintf(buf, "%8d ", 0)
   586  		} else {
   587  			fmt.Fprintf(buf, "%8d ", stat.Ino)
   588  		}
   589  
   590  		// Field: ref; reference count on the socket inode. Don't count the ref
   591  		// we obtain while deferencing the weakref to this socket.
   592  		fmt.Fprintf(buf, "%d ", s.ReadRefs()-1)
   593  
   594  		// Field: Socket struct address. Redacted due to the same reason as
   595  		// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
   596  		fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
   597  
   598  		// Field: drops; number of dropped packets. Unimplemented.
   599  		fmt.Fprintf(buf, "%d", 0)
   600  
   601  		fmt.Fprintf(buf, "\n")
   602  
   603  		s.DecRef(ctx)
   604  	}
   605  	return nil
   606  }
   607  
   608  // netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp.
   609  //
   610  // +stateify savable
   611  type netSnmpData struct {
   612  	kernfs.DynamicBytesFile
   613  
   614  	stack inet.Stack
   615  }
   616  
   617  var _ dynamicInode = (*netSnmpData)(nil)
   618  
   619  // +stateify savable
   620  type snmpLine struct {
   621  	prefix string
   622  	header string
   623  }
   624  
   625  var snmp = []snmpLine{
   626  	{
   627  		prefix: "Ip",
   628  		header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates",
   629  	},
   630  	{
   631  		prefix: "Icmp",
   632  		header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps",
   633  	},
   634  	{
   635  		prefix: "IcmpMsg",
   636  	},
   637  	{
   638  		prefix: "Tcp",
   639  		header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors",
   640  	},
   641  	{
   642  		prefix: "Udp",
   643  		header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
   644  	},
   645  	{
   646  		prefix: "UdpLite",
   647  		header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
   648  	},
   649  }
   650  
   651  func toSlice(a interface{}) []uint64 {
   652  	v := reflect.Indirect(reflect.ValueOf(a))
   653  	return v.Slice(0, v.Len()).Interface().([]uint64)
   654  }
   655  
   656  func sprintSlice(s []uint64) string {
   657  	if len(s) == 0 {
   658  		return ""
   659  	}
   660  	r := fmt.Sprint(s)
   661  	return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice.
   662  }
   663  
   664  // Generate implements vfs.DynamicBytesSource.Generate.
   665  func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   666  	types := []interface{}{
   667  		&inet.StatSNMPIP{},
   668  		&inet.StatSNMPICMP{},
   669  		nil, // TODO(github.com/SagerNet/issue/628): Support IcmpMsg stats.
   670  		&inet.StatSNMPTCP{},
   671  		&inet.StatSNMPUDP{},
   672  		&inet.StatSNMPUDPLite{},
   673  	}
   674  	for i, stat := range types {
   675  		line := snmp[i]
   676  		if stat == nil {
   677  			fmt.Fprintf(buf, "%s:\n", line.prefix)
   678  			fmt.Fprintf(buf, "%s:\n", line.prefix)
   679  			continue
   680  		}
   681  		if err := d.stack.Statistics(stat, line.prefix); err != nil {
   682  			if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) {
   683  				log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
   684  			} else {
   685  				log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
   686  			}
   687  		}
   688  
   689  		fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header)
   690  
   691  		if line.prefix == "Tcp" {
   692  			tcp := stat.(*inet.StatSNMPTCP)
   693  			// "Tcp" needs special processing because MaxConn is signed. RFC 2012.
   694  			fmt.Fprintf(buf, "%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:]))
   695  		} else {
   696  			fmt.Fprintf(buf, "%s: %s\n", line.prefix, sprintSlice(toSlice(stat)))
   697  		}
   698  	}
   699  	return nil
   700  }
   701  
   702  // netRouteData implements vfs.DynamicBytesSource for /proc/net/route.
   703  //
   704  // +stateify savable
   705  type netRouteData struct {
   706  	kernfs.DynamicBytesFile
   707  
   708  	stack inet.Stack
   709  }
   710  
   711  var _ dynamicInode = (*netRouteData)(nil)
   712  
   713  // Generate implements vfs.DynamicBytesSource.Generate.
   714  // See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
   715  func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   716  	fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT")
   717  
   718  	interfaces := d.stack.Interfaces()
   719  	for _, rt := range d.stack.RouteTable() {
   720  		// /proc/net/route only includes ipv4 routes.
   721  		if rt.Family != linux.AF_INET {
   722  			continue
   723  		}
   724  
   725  		// /proc/net/route does not include broadcast or multicast routes.
   726  		if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST {
   727  			continue
   728  		}
   729  
   730  		iface, ok := interfaces[rt.OutputInterface]
   731  		if !ok || iface.Name == "lo" {
   732  			continue
   733  		}
   734  
   735  		var (
   736  			gw     uint32
   737  			prefix uint32
   738  			flags  = linux.RTF_UP
   739  		)
   740  		if len(rt.GatewayAddr) == header.IPv4AddressSize {
   741  			flags |= linux.RTF_GATEWAY
   742  			gw = hostarch.ByteOrder.Uint32(rt.GatewayAddr)
   743  		}
   744  		if len(rt.DstAddr) == header.IPv4AddressSize {
   745  			prefix = hostarch.ByteOrder.Uint32(rt.DstAddr)
   746  		}
   747  		l := fmt.Sprintf(
   748  			"%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d",
   749  			iface.Name,
   750  			prefix,
   751  			gw,
   752  			flags,
   753  			0, // RefCnt.
   754  			0, // Use.
   755  			0, // Metric.
   756  			(uint32(1)<<rt.DstLen)-1,
   757  			0, // MTU.
   758  			0, // Window.
   759  			0, // RTT.
   760  		)
   761  		fmt.Fprintf(buf, "%-127s\n", l)
   762  	}
   763  	return nil
   764  }
   765  
   766  // netStatData implements vfs.DynamicBytesSource for /proc/net/netstat.
   767  //
   768  // +stateify savable
   769  type netStatData struct {
   770  	kernfs.DynamicBytesFile
   771  
   772  	stack inet.Stack
   773  }
   774  
   775  var _ dynamicInode = (*netStatData)(nil)
   776  
   777  // Generate implements vfs.DynamicBytesSource.Generate.
   778  // See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
   779  func (d *netStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   780  	buf.WriteString("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed " +
   781  		"EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps " +
   782  		"LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive " +
   783  		"PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost " +
   784  		"ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog " +
   785  		"TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser " +
   786  		"TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging " +
   787  		"TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo " +
   788  		"TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit " +
   789  		"TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans " +
   790  		"TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes " +
   791  		"TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail " +
   792  		"TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent " +
   793  		"TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose " +
   794  		"TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed " +
   795  		"TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld " +
   796  		"TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected " +
   797  		"TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback " +
   798  		"TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter " +
   799  		"TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail " +
   800  		"TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK " +
   801  		"TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail " +
   802  		"TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow " +
   803  		"TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets " +
   804  		"TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv " +
   805  		"TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect " +
   806  		"TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd " +
   807  		"TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq " +
   808  		"TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge " +
   809  		"TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")
   810  	return nil
   811  }