github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/proc/task_net.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proc
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"io"
    21  	"reflect"
    22  	"time"
    23  
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/log"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/kernfs"
    30  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/inet"
    31  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel"
    32  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth"
    33  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/socket"
    34  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/socket/unix"
    35  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/socket/unix/transport"
    36  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs"
    37  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/header"
    38  )
    39  
    40  func (fs *filesystem) newTaskNetDir(ctx context.Context, task *kernel.Task) kernfs.Inode {
    41  	k := task.Kernel()
    42  	pidns := task.PIDNamespace()
    43  	root := auth.NewRootCredentials(pidns.UserNamespace())
    44  
    45  	var contents map[string]kernfs.Inode
    46  	if stack := task.NetworkNamespace().Stack(); stack != nil {
    47  		const (
    48  			arp       = "IP address       HW type     Flags       HW address            Mask     Device\n"
    49  			netlink   = "sk       Eth Pid    Groups   Rmem     Wmem     Dump     Locks     Drops     Inode\n"
    50  			packet    = "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n"
    51  			protocols = "protocol  size sockets  memory press maxhdr  slab module     cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"
    52  			ptype     = "Type Device      Function\n"
    53  			upd6      = "  sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode\n"
    54  		)
    55  		psched := fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond))
    56  
    57  		// TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task
    58  		// network namespace.
    59  		contents = map[string]kernfs.Inode{
    60  			"dev":  fs.newInode(ctx, root, 0444, &netDevData{stack: stack}),
    61  			"snmp": fs.newInode(ctx, root, 0444, &netSnmpData{stack: stack}),
    62  
    63  			// The following files are simple stubs until they are implemented in
    64  			// netstack, if the file contains a header the stub is just the header
    65  			// otherwise it is an empty file.
    66  			"arp":       fs.newInode(ctx, root, 0444, newStaticFile(arp)),
    67  			"netlink":   fs.newInode(ctx, root, 0444, newStaticFile(netlink)),
    68  			"netstat":   fs.newInode(ctx, root, 0444, &netStatData{}),
    69  			"packet":    fs.newInode(ctx, root, 0444, newStaticFile(packet)),
    70  			"protocols": fs.newInode(ctx, root, 0444, newStaticFile(protocols)),
    71  
    72  			// Linux sets psched values to: nsec per usec, psched tick in ns, 1000000,
    73  			// high res timer ticks per sec (ClockGetres returns 1ns resolution).
    74  			"psched": fs.newInode(ctx, root, 0444, newStaticFile(psched)),
    75  			"ptype":  fs.newInode(ctx, root, 0444, newStaticFile(ptype)),
    76  			"route":  fs.newInode(ctx, root, 0444, &netRouteData{stack: stack}),
    77  			"tcp":    fs.newInode(ctx, root, 0444, &netTCPData{kernel: k}),
    78  			"udp":    fs.newInode(ctx, root, 0444, &netUDPData{kernel: k}),
    79  			"unix":   fs.newInode(ctx, root, 0444, &netUnixData{kernel: k}),
    80  		}
    81  
    82  		if stack.SupportsIPv6() {
    83  			contents["if_inet6"] = fs.newInode(ctx, root, 0444, &ifinet6{stack: stack})
    84  			contents["ipv6_route"] = fs.newInode(ctx, root, 0444, newStaticFile(""))
    85  			contents["tcp6"] = fs.newInode(ctx, root, 0444, &netTCP6Data{kernel: k})
    86  			contents["udp6"] = fs.newInode(ctx, root, 0444, newStaticFile(upd6))
    87  		}
    88  	}
    89  
    90  	return fs.newTaskOwnedDir(ctx, task, fs.NextIno(), 0555, contents)
    91  }
    92  
    93  // ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6.
    94  //
    95  // +stateify savable
    96  type ifinet6 struct {
    97  	kernfs.DynamicBytesFile
    98  
    99  	stack inet.Stack
   100  }
   101  
   102  var _ dynamicInode = (*ifinet6)(nil)
   103  
   104  func (n *ifinet6) contents() []string {
   105  	var lines []string
   106  	nics := n.stack.Interfaces()
   107  	for id, naddrs := range n.stack.InterfaceAddrs() {
   108  		nic, ok := nics[id]
   109  		if !ok {
   110  			// NIC was added after NICNames was called. We'll just ignore it.
   111  			continue
   112  		}
   113  
   114  		for _, a := range naddrs {
   115  			// IPv6 only.
   116  			if a.Family != linux.AF_INET6 {
   117  				continue
   118  			}
   119  
   120  			// Fields:
   121  			// IPv6 address displayed in 32 hexadecimal chars without colons
   122  			// Netlink device number (interface index) in hexadecimal (use nic id)
   123  			// Prefix length in hexadecimal
   124  			// Scope value (use 0)
   125  			// Interface flags
   126  			// Device name
   127  			lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name))
   128  		}
   129  	}
   130  	return lines
   131  }
   132  
   133  // Generate implements vfs.DynamicBytesSource.Generate.
   134  func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error {
   135  	for _, l := range n.contents() {
   136  		buf.WriteString(l)
   137  	}
   138  	return nil
   139  }
   140  
   141  // netDevData implements vfs.DynamicBytesSource for /proc/net/dev.
   142  //
   143  // +stateify savable
   144  type netDevData struct {
   145  	kernfs.DynamicBytesFile
   146  
   147  	stack inet.Stack
   148  }
   149  
   150  var _ dynamicInode = (*netDevData)(nil)
   151  
   152  // Generate implements vfs.DynamicBytesSource.Generate.
   153  func (n *netDevData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   154  	interfaces := n.stack.Interfaces()
   155  	buf.WriteString("Inter-|   Receive                                                |  Transmit\n")
   156  	buf.WriteString(" face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed\n")
   157  
   158  	for _, i := range interfaces {
   159  		// Implements the same format as
   160  		// net/core/net-procfs.c:dev_seq_printf_stats.
   161  		var stats inet.StatDev
   162  		if err := n.stack.Statistics(&stats, i.Name); err != nil {
   163  			log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err)
   164  			continue
   165  		}
   166  		fmt.Fprintf(
   167  			buf,
   168  			"%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n",
   169  			i.Name,
   170  			// Received
   171  			stats[0], // bytes
   172  			stats[1], // packets
   173  			stats[2], // errors
   174  			stats[3], // dropped
   175  			stats[4], // fifo
   176  			stats[5], // frame
   177  			stats[6], // compressed
   178  			stats[7], // multicast
   179  			// Transmitted
   180  			stats[8],  // bytes
   181  			stats[9],  // packets
   182  			stats[10], // errors
   183  			stats[11], // dropped
   184  			stats[12], // fifo
   185  			stats[13], // frame
   186  			stats[14], // compressed
   187  			stats[15], // multicast
   188  		)
   189  	}
   190  
   191  	return nil
   192  }
   193  
   194  // netUnixData implements vfs.DynamicBytesSource for /proc/net/unix.
   195  //
   196  // +stateify savable
   197  type netUnixData struct {
   198  	kernfs.DynamicBytesFile
   199  
   200  	kernel *kernel.Kernel
   201  }
   202  
   203  var _ dynamicInode = (*netUnixData)(nil)
   204  
   205  // Generate implements vfs.DynamicBytesSource.Generate.
   206  func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   207  	buf.WriteString("Num       RefCount Protocol Flags    Type St Inode Path\n")
   208  	for _, se := range n.kernel.ListSockets() {
   209  		s := se.Sock
   210  		if !s.TryIncRef() {
   211  			// Racing with socket destruction, this is ok.
   212  			continue
   213  		}
   214  		if family, _, _ := s.Impl().(socket.Socket).Type(); family != linux.AF_UNIX {
   215  			s.DecRef(ctx)
   216  			// Not a unix socket.
   217  			continue
   218  		}
   219  		sops := s.Impl().(*unix.Socket)
   220  
   221  		addr, err := sops.Endpoint().GetLocalAddress()
   222  		if err != nil {
   223  			log.Warningf("Failed to retrieve socket name from %+v: %v", s, err)
   224  			addr.Addr = "<unknown>"
   225  		}
   226  
   227  		sockFlags := 0
   228  		if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok {
   229  			ce.Lock()
   230  			if ce.ListeningLocked() {
   231  				// For unix domain sockets, linux reports a single flag
   232  				// value if the socket is listening, of __SO_ACCEPTCON.
   233  				sockFlags = linux.SO_ACCEPTCON
   234  			}
   235  			ce.Unlock()
   236  		}
   237  
   238  		// Get inode number.
   239  		var ino uint64
   240  		stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_INO})
   241  		if statErr != nil || stat.Mask&linux.STATX_INO == 0 {
   242  			log.Warningf("Failed to retrieve ino for socket file: %v", statErr)
   243  		} else {
   244  			ino = stat.Ino
   245  		}
   246  
   247  		// In the socket entry below, the value for the 'Num' field requires
   248  		// some consideration. Linux prints the address to the struct
   249  		// unix_sock representing a socket in the kernel, but may redact the
   250  		// value for unprivileged users depending on the kptr_restrict
   251  		// sysctl.
   252  		//
   253  		// One use for this field is to allow a privileged user to
   254  		// introspect into the kernel memory to determine information about
   255  		// a socket not available through procfs, such as the socket's peer.
   256  		//
   257  		// In gvisor, returning a pointer to our internal structures would
   258  		// be pointless, as it wouldn't match the memory layout for struct
   259  		// unix_sock, making introspection difficult. We could populate a
   260  		// struct unix_sock with the appropriate data, but even that
   261  		// requires consideration for which kernel version to emulate, as
   262  		// the definition of this struct changes over time.
   263  		//
   264  		// For now, we always redact this pointer.
   265  		fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %8d",
   266  			(*unix.Socket)(nil),    // Num, pointer to kernel socket struct.
   267  			s.ReadRefs()-1,         // RefCount, don't count our own ref.
   268  			0,                      // Protocol, always 0 for UDS.
   269  			sockFlags,              // Flags.
   270  			sops.Endpoint().Type(), // Type.
   271  			sops.State(),           // State.
   272  			ino,                    // Inode.
   273  		)
   274  
   275  		// Path
   276  		if len(addr.Addr) != 0 {
   277  			if addr.Addr[0] == 0 {
   278  				// Abstract path.
   279  				fmt.Fprintf(buf, " @%s", string(addr.Addr[1:]))
   280  			} else {
   281  				fmt.Fprintf(buf, " %s", string(addr.Addr))
   282  			}
   283  		}
   284  		fmt.Fprintf(buf, "\n")
   285  
   286  		s.DecRef(ctx)
   287  	}
   288  	return nil
   289  }
   290  
   291  func networkToHost16(n uint16) uint16 {
   292  	// n is in network byte order, so is big-endian. The most-significant byte
   293  	// should be stored in the lower address.
   294  	//
   295  	// We manually inline binary.BigEndian.Uint16() because Go does not support
   296  	// non-primitive consts, so binary.BigEndian is a (mutable) var, so calls to
   297  	// binary.BigEndian.Uint16() require a read of binary.BigEndian and an
   298  	// interface method call, defeating inlining.
   299  	buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)}
   300  	return hostarch.ByteOrder.Uint16(buf[:])
   301  }
   302  
   303  func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
   304  	switch family {
   305  	case linux.AF_INET:
   306  		var a linux.SockAddrInet
   307  		if i != nil {
   308  			a = *i.(*linux.SockAddrInet)
   309  		}
   310  
   311  		// linux.SockAddrInet.Port is stored in the network byte order and is
   312  		// printed like a number in host byte order. Note that all numbers in host
   313  		// byte order are printed with the most-significant byte first when
   314  		// formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux.
   315  		port := networkToHost16(a.Port)
   316  
   317  		// linux.SockAddrInet.Addr is stored as a byte slice in big-endian order
   318  		// (i.e. most-significant byte in index 0). Linux represents this as a
   319  		// __be32 which is a typedef for an unsigned int, and is printed with
   320  		// %X. This means that for a little-endian machine, Linux prints the
   321  		// least-significant byte of the address first. To emulate this, we first
   322  		// invert the byte order for the address using hostarch.ByteOrder.Uint32,
   323  		// which makes it have the equivalent encoding to a __be32 on a little
   324  		// endian machine. Note that this operation is a no-op on a big endian
   325  		// machine. Then similar to Linux, we format it with %X, which will print
   326  		// the most-significant byte of the __be32 address first, which is now
   327  		// actually the least-significant byte of the original address in
   328  		// linux.SockAddrInet.Addr on little endian machines, due to the conversion.
   329  		addr := hostarch.ByteOrder.Uint32(a.Addr[:])
   330  
   331  		fmt.Fprintf(w, "%08X:%04X ", addr, port)
   332  	case linux.AF_INET6:
   333  		var a linux.SockAddrInet6
   334  		if i != nil {
   335  			a = *i.(*linux.SockAddrInet6)
   336  		}
   337  
   338  		port := networkToHost16(a.Port)
   339  		addr0 := hostarch.ByteOrder.Uint32(a.Addr[0:4])
   340  		addr1 := hostarch.ByteOrder.Uint32(a.Addr[4:8])
   341  		addr2 := hostarch.ByteOrder.Uint32(a.Addr[8:12])
   342  		addr3 := hostarch.ByteOrder.Uint32(a.Addr[12:16])
   343  		fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port)
   344  	}
   345  }
   346  
   347  func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error {
   348  	// t may be nil here if our caller is not part of a task goroutine. This can
   349  	// happen for example if we're here for "sentryctl cat". When t is nil,
   350  	// degrade gracefully and retrieve what we can.
   351  	t := kernel.TaskFromContext(ctx)
   352  
   353  	for _, se := range k.ListSockets() {
   354  		s := se.Sock
   355  		if !s.TryIncRef() {
   356  			// Racing with socket destruction, this is ok.
   357  			continue
   358  		}
   359  		sops, ok := s.Impl().(socket.Socket)
   360  		if !ok {
   361  			panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s))
   362  		}
   363  		if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) {
   364  			s.DecRef(ctx)
   365  			// Not tcp4 sockets.
   366  			continue
   367  		}
   368  
   369  		// Linux's documentation for the fields below can be found at
   370  		// https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
   371  		// For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
   372  		// Note that the header doesn't contain labels for all the fields.
   373  
   374  		// Field: sl; entry number.
   375  		fmt.Fprintf(buf, "%4d: ", se.ID)
   376  
   377  		// Field: local_adddress.
   378  		var localAddr linux.SockAddr
   379  		if t != nil {
   380  			if local, _, err := sops.GetSockName(t); err == nil {
   381  				localAddr = local
   382  			}
   383  		}
   384  		writeInetAddr(buf, family, localAddr)
   385  
   386  		// Field: rem_address.
   387  		var remoteAddr linux.SockAddr
   388  		if t != nil {
   389  			if remote, _, err := sops.GetPeerName(t); err == nil {
   390  				remoteAddr = remote
   391  			}
   392  		}
   393  		writeInetAddr(buf, family, remoteAddr)
   394  
   395  		// Field: state; socket state.
   396  		fmt.Fprintf(buf, "%02X ", sops.State())
   397  
   398  		// Field: tx_queue, rx_queue; number of packets in the transmit and
   399  		// receive queue. Unimplemented.
   400  		fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
   401  
   402  		// Field: tr, tm->when; timer active state and number of jiffies
   403  		// until timer expires. Unimplemented.
   404  		fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
   405  
   406  		// Field: retrnsmt; number of unrecovered RTO timeouts.
   407  		// Unimplemented.
   408  		fmt.Fprintf(buf, "%08X ", 0)
   409  
   410  		stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_UID | linux.STATX_INO})
   411  
   412  		// Field: uid.
   413  		if statErr != nil || stat.Mask&linux.STATX_UID == 0 {
   414  			log.Warningf("Failed to retrieve uid for socket file: %v", statErr)
   415  			fmt.Fprintf(buf, "%5d ", 0)
   416  		} else {
   417  			creds := auth.CredentialsFromContext(ctx)
   418  			fmt.Fprintf(buf, "%5d ", uint32(auth.KUID(stat.UID).In(creds.UserNamespace).OrOverflow()))
   419  		}
   420  
   421  		// Field: timeout; number of unanswered 0-window probes.
   422  		// Unimplemented.
   423  		fmt.Fprintf(buf, "%8d ", 0)
   424  
   425  		// Field: inode.
   426  		if statErr != nil || stat.Mask&linux.STATX_INO == 0 {
   427  			log.Warningf("Failed to retrieve inode for socket file: %v", statErr)
   428  			fmt.Fprintf(buf, "%8d ", 0)
   429  		} else {
   430  			fmt.Fprintf(buf, "%8d ", stat.Ino)
   431  		}
   432  
   433  		// Field: refcount. Don't count the ref we obtain while deferencing
   434  		// the weakref to this socket.
   435  		fmt.Fprintf(buf, "%d ", s.ReadRefs()-1)
   436  
   437  		// Field: Socket struct address. Redacted due to the same reason as
   438  		// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
   439  		fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
   440  
   441  		// Field: retransmit timeout. Unimplemented.
   442  		fmt.Fprintf(buf, "%d ", 0)
   443  
   444  		// Field: predicted tick of soft clock (delayed ACK control data).
   445  		// Unimplemented.
   446  		fmt.Fprintf(buf, "%d ", 0)
   447  
   448  		// Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
   449  		fmt.Fprintf(buf, "%d ", 0)
   450  
   451  		// Field: sending congestion window, Unimplemented.
   452  		fmt.Fprintf(buf, "%d ", 0)
   453  
   454  		// Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
   455  		// Unimplemented, report as large threshold.
   456  		fmt.Fprintf(buf, "%d", -1)
   457  
   458  		fmt.Fprintf(buf, "\n")
   459  
   460  		s.DecRef(ctx)
   461  	}
   462  
   463  	return nil
   464  }
   465  
   466  // netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp.
   467  //
   468  // +stateify savable
   469  type netTCPData struct {
   470  	kernfs.DynamicBytesFile
   471  
   472  	kernel *kernel.Kernel
   473  }
   474  
   475  var _ dynamicInode = (*netTCPData)(nil)
   476  
   477  func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   478  	buf.WriteString("  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode                                                     \n")
   479  	return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET)
   480  }
   481  
   482  // netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6.
   483  //
   484  // +stateify savable
   485  type netTCP6Data struct {
   486  	kernfs.DynamicBytesFile
   487  
   488  	kernel *kernel.Kernel
   489  }
   490  
   491  var _ dynamicInode = (*netTCP6Data)(nil)
   492  
   493  func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error {
   494  	buf.WriteString("  sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode\n")
   495  	return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6)
   496  }
   497  
   498  // netUDPData implements vfs.DynamicBytesSource for /proc/net/udp.
   499  //
   500  // +stateify savable
   501  type netUDPData struct {
   502  	kernfs.DynamicBytesFile
   503  
   504  	kernel *kernel.Kernel
   505  }
   506  
   507  var _ dynamicInode = (*netUDPData)(nil)
   508  
   509  // Generate implements vfs.DynamicBytesSource.Generate.
   510  func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   511  	// t may be nil here if our caller is not part of a task goroutine. This can
   512  	// happen for example if we're here for "sentryctl cat". When t is nil,
   513  	// degrade gracefully and retrieve what we can.
   514  	t := kernel.TaskFromContext(ctx)
   515  
   516  	buf.WriteString("  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops             \n")
   517  
   518  	for _, se := range d.kernel.ListSockets() {
   519  		s := se.Sock
   520  		if !s.TryIncRef() {
   521  			// Racing with socket destruction, this is ok.
   522  			continue
   523  		}
   524  		sops, ok := s.Impl().(socket.Socket)
   525  		if !ok {
   526  			panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s))
   527  		}
   528  		if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM {
   529  			s.DecRef(ctx)
   530  			// Not udp4 socket.
   531  			continue
   532  		}
   533  
   534  		// For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock().
   535  
   536  		// Field: sl; entry number.
   537  		fmt.Fprintf(buf, "%5d: ", se.ID)
   538  
   539  		// Field: local_adddress.
   540  		var localAddr linux.SockAddrInet
   541  		if t != nil {
   542  			if local, _, err := sops.GetSockName(t); err == nil {
   543  				localAddr = *local.(*linux.SockAddrInet)
   544  			}
   545  		}
   546  		writeInetAddr(buf, linux.AF_INET, &localAddr)
   547  
   548  		// Field: rem_address.
   549  		var remoteAddr linux.SockAddrInet
   550  		if t != nil {
   551  			if remote, _, err := sops.GetPeerName(t); err == nil {
   552  				remoteAddr = *remote.(*linux.SockAddrInet)
   553  			}
   554  		}
   555  		writeInetAddr(buf, linux.AF_INET, &remoteAddr)
   556  
   557  		// Field: state; socket state.
   558  		fmt.Fprintf(buf, "%02X ", sops.State())
   559  
   560  		// Field: tx_queue, rx_queue; number of packets in the transmit and
   561  		// receive queue. Unimplemented.
   562  		fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
   563  
   564  		// Field: tr, tm->when. Always 0 for UDP.
   565  		fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
   566  
   567  		// Field: retrnsmt. Always 0 for UDP.
   568  		fmt.Fprintf(buf, "%08X ", 0)
   569  
   570  		stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_UID | linux.STATX_INO})
   571  
   572  		// Field: uid.
   573  		if statErr != nil || stat.Mask&linux.STATX_UID == 0 {
   574  			log.Warningf("Failed to retrieve uid for socket file: %v", statErr)
   575  			fmt.Fprintf(buf, "%5d ", 0)
   576  		} else {
   577  			creds := auth.CredentialsFromContext(ctx)
   578  			fmt.Fprintf(buf, "%5d ", uint32(auth.KUID(stat.UID).In(creds.UserNamespace).OrOverflow()))
   579  		}
   580  
   581  		// Field: timeout. Always 0 for UDP.
   582  		fmt.Fprintf(buf, "%8d ", 0)
   583  
   584  		// Field: inode.
   585  		if statErr != nil || stat.Mask&linux.STATX_INO == 0 {
   586  			log.Warningf("Failed to retrieve inode for socket file: %v", statErr)
   587  			fmt.Fprintf(buf, "%8d ", 0)
   588  		} else {
   589  			fmt.Fprintf(buf, "%8d ", stat.Ino)
   590  		}
   591  
   592  		// Field: ref; reference count on the socket inode. Don't count the ref
   593  		// we obtain while deferencing the weakref to this socket.
   594  		fmt.Fprintf(buf, "%d ", s.ReadRefs()-1)
   595  
   596  		// Field: Socket struct address. Redacted due to the same reason as
   597  		// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
   598  		fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
   599  
   600  		// Field: drops; number of dropped packets. Unimplemented.
   601  		fmt.Fprintf(buf, "%d", 0)
   602  
   603  		fmt.Fprintf(buf, "\n")
   604  
   605  		s.DecRef(ctx)
   606  	}
   607  	return nil
   608  }
   609  
   610  // netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp.
   611  //
   612  // +stateify savable
   613  type netSnmpData struct {
   614  	kernfs.DynamicBytesFile
   615  
   616  	stack inet.Stack
   617  }
   618  
   619  var _ dynamicInode = (*netSnmpData)(nil)
   620  
   621  // +stateify savable
   622  type snmpLine struct {
   623  	prefix string
   624  	header string
   625  }
   626  
   627  var snmp = []snmpLine{
   628  	{
   629  		prefix: "Ip",
   630  		header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates",
   631  	},
   632  	{
   633  		prefix: "Icmp",
   634  		header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps",
   635  	},
   636  	{
   637  		prefix: "IcmpMsg",
   638  	},
   639  	{
   640  		prefix: "Tcp",
   641  		header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors",
   642  	},
   643  	{
   644  		prefix: "Udp",
   645  		header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
   646  	},
   647  	{
   648  		prefix: "UdpLite",
   649  		header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
   650  	},
   651  }
   652  
   653  func toSlice(a any) []uint64 {
   654  	v := reflect.Indirect(reflect.ValueOf(a))
   655  	return v.Slice(0, v.Len()).Interface().([]uint64)
   656  }
   657  
   658  func sprintSlice(s []uint64) string {
   659  	if len(s) == 0 {
   660  		return ""
   661  	}
   662  	r := fmt.Sprint(s)
   663  	return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice.
   664  }
   665  
   666  // Generate implements vfs.DynamicBytesSource.Generate.
   667  func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   668  	types := []any{
   669  		&inet.StatSNMPIP{},
   670  		&inet.StatSNMPICMP{},
   671  		nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats.
   672  		&inet.StatSNMPTCP{},
   673  		&inet.StatSNMPUDP{},
   674  		&inet.StatSNMPUDPLite{},
   675  	}
   676  	for i, stat := range types {
   677  		line := snmp[i]
   678  		if stat == nil {
   679  			fmt.Fprintf(buf, "%s:\n", line.prefix)
   680  			fmt.Fprintf(buf, "%s:\n", line.prefix)
   681  			continue
   682  		}
   683  		if err := d.stack.Statistics(stat, line.prefix); err != nil {
   684  			if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) {
   685  				log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
   686  			} else {
   687  				log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
   688  			}
   689  		}
   690  
   691  		fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header)
   692  
   693  		if line.prefix == "Tcp" {
   694  			tcp := stat.(*inet.StatSNMPTCP)
   695  			// "Tcp" needs special processing because MaxConn is signed. RFC 2012.
   696  			fmt.Fprintf(buf, "%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:]))
   697  		} else {
   698  			fmt.Fprintf(buf, "%s: %s\n", line.prefix, sprintSlice(toSlice(stat)))
   699  		}
   700  	}
   701  	return nil
   702  }
   703  
   704  // netRouteData implements vfs.DynamicBytesSource for /proc/net/route.
   705  //
   706  // +stateify savable
   707  type netRouteData struct {
   708  	kernfs.DynamicBytesFile
   709  
   710  	stack inet.Stack
   711  }
   712  
   713  var _ dynamicInode = (*netRouteData)(nil)
   714  
   715  // Generate implements vfs.DynamicBytesSource.Generate.
   716  // See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
   717  func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   718  	fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT")
   719  
   720  	interfaces := d.stack.Interfaces()
   721  	for _, rt := range d.stack.RouteTable() {
   722  		// /proc/net/route only includes ipv4 routes.
   723  		if rt.Family != linux.AF_INET {
   724  			continue
   725  		}
   726  
   727  		// /proc/net/route does not include broadcast or multicast routes.
   728  		if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST {
   729  			continue
   730  		}
   731  
   732  		iface, ok := interfaces[rt.OutputInterface]
   733  		if !ok || iface.Name == "lo" {
   734  			continue
   735  		}
   736  
   737  		var (
   738  			gw     uint32
   739  			prefix uint32
   740  			flags  = linux.RTF_UP
   741  		)
   742  		if len(rt.GatewayAddr) == header.IPv4AddressSize {
   743  			flags |= linux.RTF_GATEWAY
   744  			gw = hostarch.ByteOrder.Uint32(rt.GatewayAddr)
   745  		}
   746  		if len(rt.DstAddr) == header.IPv4AddressSize {
   747  			prefix = hostarch.ByteOrder.Uint32(rt.DstAddr)
   748  		}
   749  		l := fmt.Sprintf(
   750  			"%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d",
   751  			iface.Name,
   752  			prefix,
   753  			gw,
   754  			flags,
   755  			0, // RefCnt.
   756  			0, // Use.
   757  			0, // Metric.
   758  			(uint32(1)<<rt.DstLen)-1,
   759  			0, // MTU.
   760  			0, // Window.
   761  			0, // RTT.
   762  		)
   763  		fmt.Fprintf(buf, "%-127s\n", l)
   764  	}
   765  	return nil
   766  }
   767  
   768  // netStatData implements vfs.DynamicBytesSource for /proc/net/netstat.
   769  //
   770  // +stateify savable
   771  type netStatData struct {
   772  	kernfs.DynamicBytesFile
   773  
   774  	stack inet.Stack
   775  }
   776  
   777  var _ dynamicInode = (*netStatData)(nil)
   778  
   779  // Generate implements vfs.DynamicBytesSource.Generate.
   780  // See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
   781  func (d *netStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   782  	buf.WriteString("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed " +
   783  		"EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps " +
   784  		"LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive " +
   785  		"PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost " +
   786  		"ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog " +
   787  		"TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser " +
   788  		"TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging " +
   789  		"TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo " +
   790  		"TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit " +
   791  		"TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans " +
   792  		"TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes " +
   793  		"TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail " +
   794  		"TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent " +
   795  		"TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose " +
   796  		"TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed " +
   797  		"TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld " +
   798  		"TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected " +
   799  		"TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback " +
   800  		"TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter " +
   801  		"TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail " +
   802  		"TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK " +
   803  		"TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail " +
   804  		"TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow " +
   805  		"TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets " +
   806  		"TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv " +
   807  		"TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect " +
   808  		"TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd " +
   809  		"TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq " +
   810  		"TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge " +
   811  		"TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")
   812  	return nil
   813  }