github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/socket/socket.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package socket provides the interfaces that need to be provided by socket
    16  // implementations and providers, as well as per family demultiplexing of socket
    17  // creation.
    18  package socket
    19  
    20  import (
    21  	"bytes"
    22  	"fmt"
    23  	"sync/atomic"
    24  
    25  	"golang.org/x/sys/unix"
    26  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    27  	"github.com/SagerNet/gvisor/pkg/context"
    28  	"github.com/SagerNet/gvisor/pkg/hostarch"
    29  	"github.com/SagerNet/gvisor/pkg/marshal"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/device"
    31  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    32  	"github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil"
    33  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    34  	ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time"
    35  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    36  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    37  	"github.com/SagerNet/gvisor/pkg/syserr"
    38  	"github.com/SagerNet/gvisor/pkg/tcpip"
    39  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    40  	"github.com/SagerNet/gvisor/pkg/usermem"
    41  )
    42  
    43  // ControlMessages represents the union of unix control messages and tcpip
    44  // control messages.
    45  type ControlMessages struct {
    46  	Unix transport.ControlMessages
    47  	IP   IPControlMessages
    48  }
    49  
    50  // packetInfoToLinux converts IPPacketInfo from tcpip format to Linux format.
    51  func packetInfoToLinux(packetInfo tcpip.IPPacketInfo) linux.ControlMessageIPPacketInfo {
    52  	var p linux.ControlMessageIPPacketInfo
    53  	p.NIC = int32(packetInfo.NIC)
    54  	copy(p.LocalAddr[:], []byte(packetInfo.LocalAddr))
    55  	copy(p.DestinationAddr[:], []byte(packetInfo.DestinationAddr))
    56  	return p
    57  }
    58  
    59  // errOriginToLinux maps tcpip socket origin to Linux socket origin constants.
    60  func errOriginToLinux(origin tcpip.SockErrOrigin) uint8 {
    61  	switch origin {
    62  	case tcpip.SockExtErrorOriginNone:
    63  		return linux.SO_EE_ORIGIN_NONE
    64  	case tcpip.SockExtErrorOriginLocal:
    65  		return linux.SO_EE_ORIGIN_LOCAL
    66  	case tcpip.SockExtErrorOriginICMP:
    67  		return linux.SO_EE_ORIGIN_ICMP
    68  	case tcpip.SockExtErrorOriginICMP6:
    69  		return linux.SO_EE_ORIGIN_ICMP6
    70  	default:
    71  		panic(fmt.Sprintf("unknown socket origin: %d", origin))
    72  	}
    73  }
    74  
    75  // sockErrCmsgToLinux converts SockError control message from tcpip format to
    76  // Linux format.
    77  func sockErrCmsgToLinux(sockErr *tcpip.SockError) linux.SockErrCMsg {
    78  	if sockErr == nil {
    79  		return nil
    80  	}
    81  
    82  	ee := linux.SockExtendedErr{
    83  		Errno:  uint32(syserr.TranslateNetstackError(sockErr.Err).ToLinux()),
    84  		Origin: errOriginToLinux(sockErr.Cause.Origin()),
    85  		Type:   sockErr.Cause.Type(),
    86  		Code:   sockErr.Cause.Code(),
    87  		Info:   sockErr.Cause.Info(),
    88  	}
    89  
    90  	switch sockErr.NetProto {
    91  	case header.IPv4ProtocolNumber:
    92  		errMsg := &linux.SockErrCMsgIPv4{SockExtendedErr: ee}
    93  		if len(sockErr.Offender.Addr) > 0 {
    94  			addr, _ := ConvertAddress(linux.AF_INET, sockErr.Offender)
    95  			errMsg.Offender = *addr.(*linux.SockAddrInet)
    96  		}
    97  		return errMsg
    98  	case header.IPv6ProtocolNumber:
    99  		errMsg := &linux.SockErrCMsgIPv6{SockExtendedErr: ee}
   100  		if len(sockErr.Offender.Addr) > 0 {
   101  			addr, _ := ConvertAddress(linux.AF_INET6, sockErr.Offender)
   102  			errMsg.Offender = *addr.(*linux.SockAddrInet6)
   103  		}
   104  		return errMsg
   105  	default:
   106  		panic(fmt.Sprintf("invalid net proto for creating SockErrCMsg: %d", sockErr.NetProto))
   107  	}
   108  }
   109  
   110  // NewIPControlMessages converts the tcpip ControlMessgaes (which does not
   111  // have Linux specific format) to Linux format.
   112  func NewIPControlMessages(family int, cmgs tcpip.ControlMessages) IPControlMessages {
   113  	var orgDstAddr linux.SockAddr
   114  	if cmgs.HasOriginalDstAddress {
   115  		orgDstAddr, _ = ConvertAddress(family, cmgs.OriginalDstAddress)
   116  	}
   117  	return IPControlMessages{
   118  		HasTimestamp:       cmgs.HasTimestamp,
   119  		Timestamp:          cmgs.Timestamp,
   120  		HasInq:             cmgs.HasInq,
   121  		Inq:                cmgs.Inq,
   122  		HasTOS:             cmgs.HasTOS,
   123  		TOS:                cmgs.TOS,
   124  		HasTClass:          cmgs.HasTClass,
   125  		TClass:             cmgs.TClass,
   126  		HasIPPacketInfo:    cmgs.HasIPPacketInfo,
   127  		PacketInfo:         packetInfoToLinux(cmgs.PacketInfo),
   128  		OriginalDstAddress: orgDstAddr,
   129  		SockErr:            sockErrCmsgToLinux(cmgs.SockErr),
   130  	}
   131  }
   132  
   133  // IPControlMessages contains socket control messages for IP sockets.
   134  // This can contain Linux specific structures unlike tcpip.ControlMessages.
   135  //
   136  // +stateify savable
   137  type IPControlMessages struct {
   138  	// HasTimestamp indicates whether Timestamp is valid/set.
   139  	HasTimestamp bool
   140  
   141  	// Timestamp is the time (in ns) that the last packet used to create
   142  	// the read data was received.
   143  	Timestamp int64
   144  
   145  	// HasInq indicates whether Inq is valid/set.
   146  	HasInq bool
   147  
   148  	// Inq is the number of bytes ready to be received.
   149  	Inq int32
   150  
   151  	// HasTOS indicates whether Tos is valid/set.
   152  	HasTOS bool
   153  
   154  	// TOS is the IPv4 type of service of the associated packet.
   155  	TOS uint8
   156  
   157  	// HasTClass indicates whether TClass is valid/set.
   158  	HasTClass bool
   159  
   160  	// TClass is the IPv6 traffic class of the associated packet.
   161  	TClass uint32
   162  
   163  	// HasIPPacketInfo indicates whether PacketInfo is set.
   164  	HasIPPacketInfo bool
   165  
   166  	// PacketInfo holds interface and address data on an incoming packet.
   167  	PacketInfo linux.ControlMessageIPPacketInfo
   168  
   169  	// OriginalDestinationAddress holds the original destination address
   170  	// and port of the incoming packet.
   171  	OriginalDstAddress linux.SockAddr
   172  
   173  	// SockErr is the dequeued socket error on recvmsg(MSG_ERRQUEUE).
   174  	SockErr linux.SockErrCMsg
   175  }
   176  
   177  // Release releases Unix domain socket credentials and rights.
   178  func (c *ControlMessages) Release(ctx context.Context) {
   179  	c.Unix.Release(ctx)
   180  }
   181  
   182  // Socket is an interface combining fs.FileOperations and SocketOps,
   183  // representing a VFS1 socket file.
   184  type Socket interface {
   185  	fs.FileOperations
   186  	SocketOps
   187  }
   188  
   189  // SocketVFS2 is an interface combining vfs.FileDescription and SocketOps,
   190  // representing a VFS2 socket file.
   191  type SocketVFS2 interface {
   192  	vfs.FileDescriptionImpl
   193  	SocketOps
   194  }
   195  
   196  // SocketOps is the interface containing socket syscalls used by the syscall
   197  // layer to redirect them to the appropriate implementation.
   198  //
   199  // It is implemented by both Socket and SocketVFS2.
   200  type SocketOps interface {
   201  	// Connect implements the connect(2) linux unix.
   202  	Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error
   203  
   204  	// Accept implements the accept4(2) linux unix.
   205  	// Returns fd, real peer address length and error. Real peer address
   206  	// length is only set if len(peer) > 0.
   207  	Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error)
   208  
   209  	// Bind implements the bind(2) linux unix.
   210  	Bind(t *kernel.Task, sockaddr []byte) *syserr.Error
   211  
   212  	// Listen implements the listen(2) linux unix.
   213  	Listen(t *kernel.Task, backlog int) *syserr.Error
   214  
   215  	// Shutdown implements the shutdown(2) linux unix.
   216  	Shutdown(t *kernel.Task, how int) *syserr.Error
   217  
   218  	// GetSockOpt implements the getsockopt(2) linux unix.
   219  	GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error)
   220  
   221  	// SetSockOpt implements the setsockopt(2) linux unix.
   222  	SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error
   223  
   224  	// GetSockName implements the getsockname(2) linux unix.
   225  	//
   226  	// addrLen is the address length to be returned to the application, not
   227  	// necessarily the actual length of the address.
   228  	GetSockName(t *kernel.Task) (addr linux.SockAddr, addrLen uint32, err *syserr.Error)
   229  
   230  	// GetPeerName implements the getpeername(2) linux unix.
   231  	//
   232  	// addrLen is the address length to be returned to the application, not
   233  	// necessarily the actual length of the address.
   234  	GetPeerName(t *kernel.Task) (addr linux.SockAddr, addrLen uint32, err *syserr.Error)
   235  
   236  	// RecvMsg implements the recvmsg(2) linux unix.
   237  	//
   238  	// senderAddrLen is the address length to be returned to the application,
   239  	// not necessarily the actual length of the address.
   240  	//
   241  	// flags control how RecvMsg should be completed. msgFlags indicate how
   242  	// the RecvMsg call was completed. Note that control message truncation
   243  	// may still be required even if the MSG_CTRUNC bit is not set in
   244  	// msgFlags. In that case, the caller should set MSG_CTRUNC appropriately.
   245  	//
   246  	// If err != nil, the recv was not successful.
   247  	RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages ControlMessages, err *syserr.Error)
   248  
   249  	// SendMsg implements the sendmsg(2) linux unix. SendMsg does not take
   250  	// ownership of the ControlMessage on error.
   251  	//
   252  	// If n > 0, err will either be nil or an error from t.Block.
   253  	SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages ControlMessages) (n int, err *syserr.Error)
   254  
   255  	// SetRecvTimeout sets the timeout (in ns) for recv operations. Zero means
   256  	// no timeout, and negative means DONTWAIT.
   257  	SetRecvTimeout(nanoseconds int64)
   258  
   259  	// RecvTimeout gets the current timeout (in ns) for recv operations. Zero
   260  	// means no timeout, and negative means DONTWAIT.
   261  	RecvTimeout() int64
   262  
   263  	// SetSendTimeout sets the timeout (in ns) for send operations. Zero means
   264  	// no timeout, and negative means DONTWAIT.
   265  	SetSendTimeout(nanoseconds int64)
   266  
   267  	// SendTimeout gets the current timeout (in ns) for send operations. Zero
   268  	// means no timeout, and negative means DONTWAIT.
   269  	SendTimeout() int64
   270  
   271  	// State returns the current state of the socket, as represented by Linux in
   272  	// procfs. The returned state value is protocol-specific.
   273  	State() uint32
   274  
   275  	// Type returns the family, socket type and protocol of the socket.
   276  	Type() (family int, skType linux.SockType, protocol int)
   277  }
   278  
   279  // Provider is the interface implemented by providers of sockets for specific
   280  // address families (e.g., AF_INET).
   281  type Provider interface {
   282  	// Socket creates a new socket.
   283  	//
   284  	// If a nil Socket _and_ a nil error is returned, it means that the
   285  	// protocol is not supported. A non-nil error should only be returned
   286  	// if the protocol is supported, but an error occurs during creation.
   287  	Socket(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *syserr.Error)
   288  
   289  	// Pair creates a pair of connected sockets.
   290  	//
   291  	// See Socket for error information.
   292  	Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error)
   293  }
   294  
   295  // families holds a map of all known address families and their providers.
   296  var families = make(map[int][]Provider)
   297  
   298  // RegisterProvider registers the provider of a given address family so that
   299  // sockets of that type can be created via socket() and/or socketpair()
   300  // syscalls.
   301  //
   302  // This should only be called during the initialization of the address family.
   303  func RegisterProvider(family int, provider Provider) {
   304  	families[family] = append(families[family], provider)
   305  }
   306  
   307  // New creates a new socket with the given family, type and protocol.
   308  func New(t *kernel.Task, family int, stype linux.SockType, protocol int) (*fs.File, *syserr.Error) {
   309  	for _, p := range families[family] {
   310  		s, err := p.Socket(t, stype, protocol)
   311  		if err != nil {
   312  			return nil, err
   313  		}
   314  		if s != nil {
   315  			t.Kernel().RecordSocket(s)
   316  			return s, nil
   317  		}
   318  	}
   319  
   320  	return nil, syserr.ErrAddressFamilyNotSupported
   321  }
   322  
   323  // Pair creates a new connected socket pair with the given family, type and
   324  // protocol.
   325  func Pair(t *kernel.Task, family int, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error) {
   326  	providers, ok := families[family]
   327  	if !ok {
   328  		return nil, nil, syserr.ErrAddressFamilyNotSupported
   329  	}
   330  
   331  	for _, p := range providers {
   332  		s1, s2, err := p.Pair(t, stype, protocol)
   333  		if err != nil {
   334  			return nil, nil, err
   335  		}
   336  		if s1 != nil && s2 != nil {
   337  			k := t.Kernel()
   338  			k.RecordSocket(s1)
   339  			k.RecordSocket(s2)
   340  			return s1, s2, nil
   341  		}
   342  	}
   343  
   344  	return nil, nil, syserr.ErrSocketNotSupported
   345  }
   346  
   347  // NewDirent returns a sockfs fs.Dirent that resides on device d.
   348  func NewDirent(ctx context.Context, d *device.Device) *fs.Dirent {
   349  	ino := d.NextIno()
   350  	iops := &fsutil.SimpleFileInode{
   351  		InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, fs.FileOwnerFromContext(ctx), fs.FilePermissions{
   352  			User: fs.PermMask{Read: true, Write: true},
   353  		}, linux.SOCKFS_MAGIC),
   354  	}
   355  	inode := fs.NewInode(ctx, iops, fs.NewPseudoMountSource(ctx), fs.StableAttr{
   356  		Type:      fs.Socket,
   357  		DeviceID:  d.DeviceID(),
   358  		InodeID:   ino,
   359  		BlockSize: hostarch.PageSize,
   360  	})
   361  
   362  	// Dirent name matches net/socket.c:sockfs_dname.
   363  	return fs.NewDirent(ctx, inode, fmt.Sprintf("socket:[%d]", ino))
   364  }
   365  
   366  // ProviderVFS2 is the vfs2 interface implemented by providers of sockets for
   367  // specific address families (e.g., AF_INET).
   368  type ProviderVFS2 interface {
   369  	// Socket creates a new socket.
   370  	//
   371  	// If a nil Socket _and_ a nil error is returned, it means that the
   372  	// protocol is not supported. A non-nil error should only be returned
   373  	// if the protocol is supported, but an error occurs during creation.
   374  	Socket(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error)
   375  
   376  	// Pair creates a pair of connected sockets.
   377  	//
   378  	// See Socket for error information.
   379  	Pair(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error)
   380  }
   381  
   382  // familiesVFS2 holds a map of all known address families and their providers.
   383  var familiesVFS2 = make(map[int][]ProviderVFS2)
   384  
   385  // RegisterProviderVFS2 registers the provider of a given address family so that
   386  // sockets of that type can be created via socket() and/or socketpair()
   387  // syscalls.
   388  //
   389  // This should only be called during the initialization of the address family.
   390  func RegisterProviderVFS2(family int, provider ProviderVFS2) {
   391  	familiesVFS2[family] = append(familiesVFS2[family], provider)
   392  }
   393  
   394  // NewVFS2 creates a new socket with the given family, type and protocol.
   395  func NewVFS2(t *kernel.Task, family int, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) {
   396  	for _, p := range familiesVFS2[family] {
   397  		s, err := p.Socket(t, stype, protocol)
   398  		if err != nil {
   399  			return nil, err
   400  		}
   401  		if s != nil {
   402  			t.Kernel().RecordSocketVFS2(s)
   403  			return s, nil
   404  		}
   405  	}
   406  
   407  	return nil, syserr.ErrAddressFamilyNotSupported
   408  }
   409  
   410  // PairVFS2 creates a new connected socket pair with the given family, type and
   411  // protocol.
   412  func PairVFS2(t *kernel.Task, family int, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) {
   413  	providers, ok := familiesVFS2[family]
   414  	if !ok {
   415  		return nil, nil, syserr.ErrAddressFamilyNotSupported
   416  	}
   417  
   418  	for _, p := range providers {
   419  		s1, s2, err := p.Pair(t, stype, protocol)
   420  		if err != nil {
   421  			return nil, nil, err
   422  		}
   423  		if s1 != nil && s2 != nil {
   424  			k := t.Kernel()
   425  			k.RecordSocketVFS2(s1)
   426  			k.RecordSocketVFS2(s2)
   427  			return s1, s2, nil
   428  		}
   429  	}
   430  
   431  	return nil, nil, syserr.ErrSocketNotSupported
   432  }
   433  
   434  // SendReceiveTimeout stores timeouts for send and receive calls.
   435  //
   436  // It is meant to be embedded into Socket implementations to help satisfy the
   437  // interface.
   438  //
   439  // Care must be taken when copying SendReceiveTimeout as it contains atomic
   440  // variables.
   441  //
   442  // +stateify savable
   443  type SendReceiveTimeout struct {
   444  	// send is length of the send timeout in nanoseconds.
   445  	//
   446  	// send must be accessed atomically.
   447  	send int64
   448  
   449  	// recv is length of the receive timeout in nanoseconds.
   450  	//
   451  	// recv must be accessed atomically.
   452  	recv int64
   453  }
   454  
   455  // SetRecvTimeout implements Socket.SetRecvTimeout.
   456  func (to *SendReceiveTimeout) SetRecvTimeout(nanoseconds int64) {
   457  	atomic.StoreInt64(&to.recv, nanoseconds)
   458  }
   459  
   460  // RecvTimeout implements Socket.RecvTimeout.
   461  func (to *SendReceiveTimeout) RecvTimeout() int64 {
   462  	return atomic.LoadInt64(&to.recv)
   463  }
   464  
   465  // SetSendTimeout implements Socket.SetSendTimeout.
   466  func (to *SendReceiveTimeout) SetSendTimeout(nanoseconds int64) {
   467  	atomic.StoreInt64(&to.send, nanoseconds)
   468  }
   469  
   470  // SendTimeout implements Socket.SendTimeout.
   471  func (to *SendReceiveTimeout) SendTimeout() int64 {
   472  	return atomic.LoadInt64(&to.send)
   473  }
   474  
   475  // GetSockOptEmitUnimplementedEvent emits unimplemented event if name is valid.
   476  // It contains names that are valid for GetSockOpt when level is SOL_SOCKET.
   477  func GetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) {
   478  	switch name {
   479  	case linux.SO_ACCEPTCONN,
   480  		linux.SO_BPF_EXTENSIONS,
   481  		linux.SO_COOKIE,
   482  		linux.SO_DOMAIN,
   483  		linux.SO_ERROR,
   484  		linux.SO_GET_FILTER,
   485  		linux.SO_INCOMING_NAPI_ID,
   486  		linux.SO_MEMINFO,
   487  		linux.SO_PEERCRED,
   488  		linux.SO_PEERGROUPS,
   489  		linux.SO_PEERNAME,
   490  		linux.SO_PEERSEC,
   491  		linux.SO_PROTOCOL,
   492  		linux.SO_SNDLOWAT,
   493  		linux.SO_TYPE:
   494  
   495  		t.Kernel().EmitUnimplementedEvent(t)
   496  
   497  	default:
   498  		emitUnimplementedEvent(t, name)
   499  	}
   500  }
   501  
   502  // SetSockOptEmitUnimplementedEvent emits unimplemented event if name is valid.
   503  // It contains names that are valid for SetSockOpt when level is SOL_SOCKET.
   504  func SetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) {
   505  	switch name {
   506  	case linux.SO_ATTACH_BPF,
   507  		linux.SO_ATTACH_FILTER,
   508  		linux.SO_ATTACH_REUSEPORT_CBPF,
   509  		linux.SO_ATTACH_REUSEPORT_EBPF,
   510  		linux.SO_CNX_ADVICE,
   511  		linux.SO_DETACH_FILTER,
   512  		linux.SO_RCVBUFFORCE,
   513  		linux.SO_SNDBUFFORCE:
   514  
   515  		t.Kernel().EmitUnimplementedEvent(t)
   516  
   517  	default:
   518  		emitUnimplementedEvent(t, name)
   519  	}
   520  }
   521  
   522  // emitUnimplementedEvent emits unimplemented event if name is valid. It
   523  // contains names that are common between Get and SetSocketOpt when level is
   524  // SOL_SOCKET.
   525  func emitUnimplementedEvent(t *kernel.Task, name int) {
   526  	switch name {
   527  	case linux.SO_BINDTODEVICE,
   528  		linux.SO_BROADCAST,
   529  		linux.SO_BSDCOMPAT,
   530  		linux.SO_BUSY_POLL,
   531  		linux.SO_DEBUG,
   532  		linux.SO_DONTROUTE,
   533  		linux.SO_INCOMING_CPU,
   534  		linux.SO_KEEPALIVE,
   535  		linux.SO_LINGER,
   536  		linux.SO_LOCK_FILTER,
   537  		linux.SO_MARK,
   538  		linux.SO_MAX_PACING_RATE,
   539  		linux.SO_NOFCS,
   540  		linux.SO_OOBINLINE,
   541  		linux.SO_PASSCRED,
   542  		linux.SO_PASSSEC,
   543  		linux.SO_PEEK_OFF,
   544  		linux.SO_PRIORITY,
   545  		linux.SO_RCVBUF,
   546  		linux.SO_RCVLOWAT,
   547  		linux.SO_RCVTIMEO,
   548  		linux.SO_REUSEADDR,
   549  		linux.SO_REUSEPORT,
   550  		linux.SO_RXQ_OVFL,
   551  		linux.SO_SELECT_ERR_QUEUE,
   552  		linux.SO_SNDBUF,
   553  		linux.SO_SNDTIMEO,
   554  		linux.SO_TIMESTAMP,
   555  		linux.SO_TIMESTAMPING,
   556  		linux.SO_TIMESTAMPNS,
   557  		linux.SO_TXTIME,
   558  		linux.SO_WIFI_STATUS,
   559  		linux.SO_ZEROCOPY:
   560  
   561  		t.Kernel().EmitUnimplementedEvent(t)
   562  	}
   563  }
   564  
   565  // UnmarshalSockAddr unmarshals memory representing a struct sockaddr to one of
   566  // the ABI socket address types.
   567  //
   568  // Precondition: data must be long enough to represent a socket address of the
   569  // given family.
   570  func UnmarshalSockAddr(family int, data []byte) linux.SockAddr {
   571  	switch family {
   572  	case unix.AF_INET:
   573  		var addr linux.SockAddrInet
   574  		addr.UnmarshalUnsafe(data[:addr.SizeBytes()])
   575  		return &addr
   576  	case unix.AF_INET6:
   577  		var addr linux.SockAddrInet6
   578  		addr.UnmarshalUnsafe(data[:addr.SizeBytes()])
   579  		return &addr
   580  	case unix.AF_UNIX:
   581  		var addr linux.SockAddrUnix
   582  		addr.UnmarshalUnsafe(data[:addr.SizeBytes()])
   583  		return &addr
   584  	case unix.AF_NETLINK:
   585  		var addr linux.SockAddrNetlink
   586  		addr.UnmarshalUnsafe(data[:addr.SizeBytes()])
   587  		return &addr
   588  	default:
   589  		panic(fmt.Sprintf("Unsupported socket family %v", family))
   590  	}
   591  }
   592  
   593  var sockAddrLinkSize = (&linux.SockAddrLink{}).SizeBytes()
   594  var sockAddrInetSize = (&linux.SockAddrInet{}).SizeBytes()
   595  var sockAddrInet6Size = (&linux.SockAddrInet6{}).SizeBytes()
   596  
   597  // Ntohs converts a 16-bit number from network byte order to host byte order. It
   598  // assumes that the host is little endian.
   599  func Ntohs(v uint16) uint16 {
   600  	return v<<8 | v>>8
   601  }
   602  
   603  // Htons converts a 16-bit number from host byte order to network byte order. It
   604  // assumes that the host is little endian.
   605  func Htons(v uint16) uint16 {
   606  	return Ntohs(v)
   607  }
   608  
   609  // isLinkLocal determines if the given IPv6 address is link-local. This is the
   610  // case when it has the fe80::/10 prefix. This check is used to determine when
   611  // the NICID is relevant for a given IPv6 address.
   612  func isLinkLocal(addr tcpip.Address) bool {
   613  	return len(addr) >= 2 && addr[0] == 0xfe && addr[1]&0xc0 == 0x80
   614  }
   615  
   616  // ConvertAddress converts the given address to a native format.
   617  func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32) {
   618  	switch family {
   619  	case linux.AF_UNIX:
   620  		var out linux.SockAddrUnix
   621  		out.Family = linux.AF_UNIX
   622  		l := len([]byte(addr.Addr))
   623  		for i := 0; i < l; i++ {
   624  			out.Path[i] = int8(addr.Addr[i])
   625  		}
   626  
   627  		// Linux returns the used length of the address struct (including the
   628  		// null terminator) for filesystem paths. The Family field is 2 bytes.
   629  		// It is sometimes allowed to exclude the null terminator if the
   630  		// address length is the max. Abstract and empty paths always return
   631  		// the full exact length.
   632  		if l == 0 || out.Path[0] == 0 || l == len(out.Path) {
   633  			return &out, uint32(2 + l)
   634  		}
   635  		return &out, uint32(3 + l)
   636  
   637  	case linux.AF_INET:
   638  		var out linux.SockAddrInet
   639  		copy(out.Addr[:], addr.Addr)
   640  		out.Family = linux.AF_INET
   641  		out.Port = Htons(addr.Port)
   642  		return &out, uint32(sockAddrInetSize)
   643  
   644  	case linux.AF_INET6:
   645  		var out linux.SockAddrInet6
   646  		if len(addr.Addr) == header.IPv4AddressSize {
   647  			// Copy address in v4-mapped format.
   648  			copy(out.Addr[12:], addr.Addr)
   649  			out.Addr[10] = 0xff
   650  			out.Addr[11] = 0xff
   651  		} else {
   652  			copy(out.Addr[:], addr.Addr)
   653  		}
   654  		out.Family = linux.AF_INET6
   655  		out.Port = Htons(addr.Port)
   656  		if isLinkLocal(addr.Addr) {
   657  			out.Scope_id = uint32(addr.NIC)
   658  		}
   659  		return &out, uint32(sockAddrInet6Size)
   660  
   661  	case linux.AF_PACKET:
   662  		var out linux.SockAddrLink
   663  		out.Family = linux.AF_PACKET
   664  		out.InterfaceIndex = int32(addr.NIC)
   665  		out.HardwareAddrLen = header.EthernetAddressSize
   666  		copy(out.HardwareAddr[:], addr.Addr)
   667  		return &out, uint32(sockAddrLinkSize)
   668  
   669  	default:
   670  		return nil, 0
   671  	}
   672  }
   673  
   674  // BytesToIPAddress converts an IPv4 or IPv6 address from the user to the
   675  // netstack representation taking any addresses into account.
   676  func BytesToIPAddress(addr []byte) tcpip.Address {
   677  	if bytes.Equal(addr, make([]byte, 4)) || bytes.Equal(addr, make([]byte, 16)) {
   678  		return ""
   679  	}
   680  	return tcpip.Address(addr)
   681  }
   682  
   683  // AddressAndFamily reads an sockaddr struct from the given address and
   684  // converts it to the FullAddress format. It supports AF_UNIX, AF_INET,
   685  // AF_INET6, and AF_PACKET addresses.
   686  //
   687  // AddressAndFamily returns an address and its family.
   688  func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) {
   689  	// Make sure we have at least 2 bytes for the address family.
   690  	if len(addr) < 2 {
   691  		return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument
   692  	}
   693  
   694  	// Get the rest of the fields based on the address family.
   695  	switch family := hostarch.ByteOrder.Uint16(addr); family {
   696  	case linux.AF_UNIX:
   697  		path := addr[2:]
   698  		if len(path) > linux.UnixPathMax {
   699  			return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
   700  		}
   701  		// Drop the terminating NUL (if one exists) and everything after
   702  		// it for filesystem (non-abstract) addresses.
   703  		if len(path) > 0 && path[0] != 0 {
   704  			if n := bytes.IndexByte(path[1:], 0); n >= 0 {
   705  				path = path[:n+1]
   706  			}
   707  		}
   708  		return tcpip.FullAddress{
   709  			Addr: tcpip.Address(path),
   710  		}, family, nil
   711  
   712  	case linux.AF_INET:
   713  		var a linux.SockAddrInet
   714  		if len(addr) < sockAddrInetSize {
   715  			return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
   716  		}
   717  		a.UnmarshalUnsafe(addr[:sockAddrInetSize])
   718  
   719  		out := tcpip.FullAddress{
   720  			Addr: BytesToIPAddress(a.Addr[:]),
   721  			Port: Ntohs(a.Port),
   722  		}
   723  		return out, family, nil
   724  
   725  	case linux.AF_INET6:
   726  		var a linux.SockAddrInet6
   727  		if len(addr) < sockAddrInet6Size {
   728  			return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
   729  		}
   730  		a.UnmarshalUnsafe(addr[:sockAddrInet6Size])
   731  
   732  		out := tcpip.FullAddress{
   733  			Addr: BytesToIPAddress(a.Addr[:]),
   734  			Port: Ntohs(a.Port),
   735  		}
   736  		if isLinkLocal(out.Addr) {
   737  			out.NIC = tcpip.NICID(a.Scope_id)
   738  		}
   739  		return out, family, nil
   740  
   741  	case linux.AF_PACKET:
   742  		var a linux.SockAddrLink
   743  		if len(addr) < sockAddrLinkSize {
   744  			return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
   745  		}
   746  		a.UnmarshalUnsafe(addr[:sockAddrLinkSize])
   747  		if a.Family != linux.AF_PACKET || a.HardwareAddrLen != header.EthernetAddressSize {
   748  			return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
   749  		}
   750  
   751  		return tcpip.FullAddress{
   752  			NIC:  tcpip.NICID(a.InterfaceIndex),
   753  			Addr: tcpip.Address(a.HardwareAddr[:header.EthernetAddressSize]),
   754  		}, family, nil
   755  
   756  	case linux.AF_UNSPEC:
   757  		return tcpip.FullAddress{}, family, nil
   758  
   759  	default:
   760  		return tcpip.FullAddress{}, 0, syserr.ErrAddressFamilyNotSupported
   761  	}
   762  }