github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/abi/linux/socket.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/marshal"
    19  )
    20  
    21  // Address families, from linux/socket.h.
    22  const (
    23  	AF_UNSPEC     = 0
    24  	AF_UNIX       = 1
    25  	AF_INET       = 2
    26  	AF_AX25       = 3
    27  	AF_IPX        = 4
    28  	AF_APPLETALK  = 5
    29  	AF_NETROM     = 6
    30  	AF_BRIDGE     = 7
    31  	AF_ATMPVC     = 8
    32  	AF_X25        = 9
    33  	AF_INET6      = 10
    34  	AF_ROSE       = 11
    35  	AF_DECnet     = 12
    36  	AF_NETBEUI    = 13
    37  	AF_SECURITY   = 14
    38  	AF_KEY        = 15
    39  	AF_NETLINK    = 16
    40  	AF_PACKET     = 17
    41  	AF_ASH        = 18
    42  	AF_ECONET     = 19
    43  	AF_ATMSVC     = 20
    44  	AF_RDS        = 21
    45  	AF_SNA        = 22
    46  	AF_IRDA       = 23
    47  	AF_PPPOX      = 24
    48  	AF_WANPIPE    = 25
    49  	AF_LLC        = 26
    50  	AF_IB         = 27
    51  	AF_MPLS       = 28
    52  	AF_CAN        = 29
    53  	AF_TIPC       = 30
    54  	AF_BLUETOOTH  = 31
    55  	AF_IUCV       = 32
    56  	AF_RXRPC      = 33
    57  	AF_ISDN       = 34
    58  	AF_PHONET     = 35
    59  	AF_IEEE802154 = 36
    60  	AF_CAIF       = 37
    61  	AF_ALG        = 38
    62  	AF_NFC        = 39
    63  	AF_VSOCK      = 40
    64  )
    65  
    66  // sendmsg(2)/recvmsg(2) flags, from linux/socket.h.
    67  const (
    68  	MSG_OOB              = 0x1
    69  	MSG_PEEK             = 0x2
    70  	MSG_DONTROUTE        = 0x4
    71  	MSG_TRYHARD          = 0x4
    72  	MSG_CTRUNC           = 0x8
    73  	MSG_PROBE            = 0x10
    74  	MSG_TRUNC            = 0x20
    75  	MSG_DONTWAIT         = 0x40
    76  	MSG_EOR              = 0x80
    77  	MSG_WAITALL          = 0x100
    78  	MSG_FIN              = 0x200
    79  	MSG_EOF              = MSG_FIN
    80  	MSG_SYN              = 0x400
    81  	MSG_CONFIRM          = 0x800
    82  	MSG_RST              = 0x1000
    83  	MSG_ERRQUEUE         = 0x2000
    84  	MSG_NOSIGNAL         = 0x4000
    85  	MSG_MORE             = 0x8000
    86  	MSG_WAITFORONE       = 0x10000
    87  	MSG_SENDPAGE_NOTLAST = 0x20000
    88  	MSG_ZEROCOPY         = 0x4000000
    89  	MSG_FASTOPEN         = 0x20000000
    90  	MSG_CMSG_CLOEXEC     = 0x40000000
    91  )
    92  
    93  // Set/get socket option levels, from socket.h.
    94  const (
    95  	SOL_IP      = 0
    96  	SOL_SOCKET  = 1
    97  	SOL_TCP     = 6
    98  	SOL_UDP     = 17
    99  	SOL_IPV6    = 41
   100  	SOL_ICMPV6  = 58
   101  	SOL_RAW     = 255
   102  	SOL_PACKET  = 263
   103  	SOL_NETLINK = 270
   104  )
   105  
   106  // A SockType is a type (as opposed to family) of sockets. These are enumerated
   107  // below as SOCK_* constants.
   108  type SockType int
   109  
   110  // Socket types, from linux/net.h.
   111  const (
   112  	SOCK_STREAM    SockType = 1
   113  	SOCK_DGRAM     SockType = 2
   114  	SOCK_RAW       SockType = 3
   115  	SOCK_RDM       SockType = 4
   116  	SOCK_SEQPACKET SockType = 5
   117  	SOCK_DCCP      SockType = 6
   118  	SOCK_PACKET    SockType = 10
   119  )
   120  
   121  // SOCK_TYPE_MASK covers all of the above socket types. The remaining bits are
   122  // flags. From linux/net.h.
   123  const SOCK_TYPE_MASK = 0xf
   124  
   125  // socket(2)/socketpair(2)/accept4(2) flags, from linux/net.h.
   126  const (
   127  	SOCK_CLOEXEC  = O_CLOEXEC
   128  	SOCK_NONBLOCK = O_NONBLOCK
   129  )
   130  
   131  // shutdown(2) how commands, from <linux/net.h>.
   132  const (
   133  	SHUT_RD   = 0
   134  	SHUT_WR   = 1
   135  	SHUT_RDWR = 2
   136  )
   137  
   138  // Packet types from <linux/if_packet.h>
   139  const (
   140  	PACKET_HOST      = 0 // To us
   141  	PACKET_BROADCAST = 1 // To all
   142  	PACKET_MULTICAST = 2 // To group
   143  	PACKET_OTHERHOST = 3 // To someone else
   144  	PACKET_OUTGOING  = 4 // Outgoing of any type
   145  )
   146  
   147  // Socket options from socket.h.
   148  const (
   149  	SO_DEBUG                 = 1
   150  	SO_REUSEADDR             = 2
   151  	SO_TYPE                  = 3
   152  	SO_ERROR                 = 4
   153  	SO_DONTROUTE             = 5
   154  	SO_BROADCAST             = 6
   155  	SO_SNDBUF                = 7
   156  	SO_RCVBUF                = 8
   157  	SO_KEEPALIVE             = 9
   158  	SO_OOBINLINE             = 10
   159  	SO_NO_CHECK              = 11
   160  	SO_PRIORITY              = 12
   161  	SO_LINGER                = 13
   162  	SO_BSDCOMPAT             = 14
   163  	SO_REUSEPORT             = 15
   164  	SO_PASSCRED              = 16
   165  	SO_PEERCRED              = 17
   166  	SO_RCVLOWAT              = 18
   167  	SO_SNDLOWAT              = 19
   168  	SO_RCVTIMEO              = 20
   169  	SO_SNDTIMEO              = 21
   170  	SO_BINDTODEVICE          = 25
   171  	SO_ATTACH_FILTER         = 26
   172  	SO_DETACH_FILTER         = 27
   173  	SO_GET_FILTER            = SO_ATTACH_FILTER
   174  	SO_PEERNAME              = 28
   175  	SO_TIMESTAMP             = 29
   176  	SO_ACCEPTCONN            = 30
   177  	SO_PEERSEC               = 31
   178  	SO_SNDBUFFORCE           = 32
   179  	SO_RCVBUFFORCE           = 33
   180  	SO_PASSSEC               = 34
   181  	SO_TIMESTAMPNS           = 35
   182  	SO_MARK                  = 36
   183  	SO_TIMESTAMPING          = 37
   184  	SO_PROTOCOL              = 38
   185  	SO_DOMAIN                = 39
   186  	SO_RXQ_OVFL              = 40
   187  	SO_WIFI_STATUS           = 41
   188  	SO_PEEK_OFF              = 42
   189  	SO_NOFCS                 = 43
   190  	SO_LOCK_FILTER           = 44
   191  	SO_SELECT_ERR_QUEUE      = 45
   192  	SO_BUSY_POLL             = 46
   193  	SO_MAX_PACING_RATE       = 47
   194  	SO_BPF_EXTENSIONS        = 48
   195  	SO_INCOMING_CPU          = 49
   196  	SO_ATTACH_BPF            = 50
   197  	SO_ATTACH_REUSEPORT_CBPF = 51
   198  	SO_ATTACH_REUSEPORT_EBPF = 52
   199  	SO_CNX_ADVICE            = 53
   200  	SO_MEMINFO               = 55
   201  	SO_INCOMING_NAPI_ID      = 56
   202  	SO_COOKIE                = 57
   203  	SO_PEERGROUPS            = 59
   204  	SO_ZEROCOPY              = 60
   205  	SO_TXTIME                = 61
   206  )
   207  
   208  // enum socket_state, from uapi/linux/net.h.
   209  const (
   210  	SS_FREE          = 0 // Not allocated.
   211  	SS_UNCONNECTED   = 1 // Unconnected to any socket.
   212  	SS_CONNECTING    = 2 // In process of connecting.
   213  	SS_CONNECTED     = 3 // Connected to socket.
   214  	SS_DISCONNECTING = 4 // In process of disconnecting.
   215  )
   216  
   217  // TCP protocol states, from include/net/tcp_states.h.
   218  const (
   219  	TCP_ESTABLISHED uint32 = iota + 1
   220  	TCP_SYN_SENT
   221  	TCP_SYN_RECV
   222  	TCP_FIN_WAIT1
   223  	TCP_FIN_WAIT2
   224  	TCP_TIME_WAIT
   225  	TCP_CLOSE
   226  	TCP_CLOSE_WAIT
   227  	TCP_LAST_ACK
   228  	TCP_LISTEN
   229  	TCP_CLOSING
   230  	TCP_NEW_SYN_RECV
   231  )
   232  
   233  // SockAddrMax is the maximum size of a struct sockaddr, from
   234  // uapi/linux/socket.h.
   235  const SockAddrMax = 128
   236  
   237  // InetAddr is struct in_addr, from uapi/linux/in.h.
   238  //
   239  // +marshal
   240  type InetAddr [4]byte
   241  
   242  // SockAddrInet is struct sockaddr_in, from uapi/linux/in.h.
   243  //
   244  // +marshal
   245  type SockAddrInet struct {
   246  	Family uint16
   247  	Port   uint16
   248  	Addr   InetAddr
   249  	_      [8]uint8 // pad to sizeof(struct sockaddr).
   250  }
   251  
   252  // Inet6MulticastRequest is struct ipv6_mreq, from uapi/linux/in6.h.
   253  //
   254  // +marshal
   255  type Inet6MulticastRequest struct {
   256  	MulticastAddr  Inet6Addr
   257  	InterfaceIndex int32
   258  }
   259  
   260  // InetMulticastRequest is struct ip_mreq, from uapi/linux/in.h.
   261  //
   262  // +marshal
   263  type InetMulticastRequest struct {
   264  	MulticastAddr InetAddr
   265  	InterfaceAddr InetAddr
   266  }
   267  
   268  // InetMulticastRequestWithNIC is struct ip_mreqn, from uapi/linux/in.h.
   269  //
   270  // +marshal
   271  type InetMulticastRequestWithNIC struct {
   272  	InetMulticastRequest
   273  	InterfaceIndex int32
   274  }
   275  
   276  // Inet6Addr is struct in6_addr, from uapi/linux/in6.h.
   277  //
   278  // +marshal
   279  type Inet6Addr [16]byte
   280  
   281  // SockAddrInet6 is struct sockaddr_in6, from uapi/linux/in6.h.
   282  //
   283  // +marshal
   284  type SockAddrInet6 struct {
   285  	Family   uint16
   286  	Port     uint16
   287  	Flowinfo uint32
   288  	Addr     [16]byte
   289  	Scope_id uint32
   290  }
   291  
   292  // SockAddrLink is a struct sockaddr_ll, from uapi/linux/if_packet.h.
   293  //
   294  // +marshal
   295  type SockAddrLink struct {
   296  	Family          uint16
   297  	Protocol        uint16
   298  	InterfaceIndex  int32
   299  	ARPHardwareType uint16
   300  	PacketType      byte
   301  	HardwareAddrLen byte
   302  	HardwareAddr    [8]byte
   303  }
   304  
   305  // UnixPathMax is the maximum length of the path in an AF_UNIX socket.
   306  //
   307  // From uapi/linux/un.h.
   308  const UnixPathMax = 108
   309  
   310  // SockAddrUnix is struct sockaddr_un, from uapi/linux/un.h.
   311  //
   312  // +marshal
   313  type SockAddrUnix struct {
   314  	Family uint16
   315  	Path   [UnixPathMax]int8
   316  }
   317  
   318  // SockAddr represents a union of valid socket address types. This is logically
   319  // equivalent to struct sockaddr. SockAddr ensures that a well-defined set of
   320  // types can be used as socket addresses.
   321  type SockAddr interface {
   322  	marshal.Marshallable
   323  
   324  	// implementsSockAddr exists purely to allow a type to indicate that they
   325  	// implement this interface. This method is a no-op and shouldn't be called.
   326  	implementsSockAddr()
   327  }
   328  
   329  func (s *SockAddrInet) implementsSockAddr()    {}
   330  func (s *SockAddrInet6) implementsSockAddr()   {}
   331  func (s *SockAddrLink) implementsSockAddr()    {}
   332  func (s *SockAddrUnix) implementsSockAddr()    {}
   333  func (s *SockAddrNetlink) implementsSockAddr() {}
   334  
   335  // Linger is struct linger, from include/linux/socket.h.
   336  //
   337  // +marshal
   338  type Linger struct {
   339  	OnOff  int32
   340  	Linger int32
   341  }
   342  
   343  // SizeOfLinger is the binary size of a Linger struct.
   344  const SizeOfLinger = 8
   345  
   346  // TCPInfo is a collection of TCP statistics.
   347  //
   348  // From uapi/linux/tcp.h. Newer versions of Linux continue to add new fields to
   349  // the end of this struct or within existing unusued space, so its size grows
   350  // over time. The current iteration is based on linux v4.17. New versions are
   351  // always backwards compatible.
   352  //
   353  // +marshal
   354  type TCPInfo struct {
   355  	// State is the state of the connection.
   356  	State uint8
   357  
   358  	// CaState is the congestion control state.
   359  	CaState uint8
   360  
   361  	// Retransmits is the number of retransmissions triggered by RTO.
   362  	Retransmits uint8
   363  
   364  	// Probes is the number of unanswered zero window probes.
   365  	Probes uint8
   366  
   367  	// BackOff indicates exponential backoff.
   368  	Backoff uint8
   369  
   370  	// Options indicates the options enabled for the connection.
   371  	Options uint8
   372  
   373  	// WindowScale is the combination of snd_wscale (first 4 bits) and
   374  	// rcv_wscale (second 4 bits)
   375  	WindowScale uint8
   376  
   377  	// DeliveryRateAppLimited is a boolean and only the first bit is
   378  	// meaningful.
   379  	DeliveryRateAppLimited uint8
   380  
   381  	// RTO is the retransmission timeout.
   382  	RTO uint32
   383  
   384  	// ATO is the acknowledgement timeout interval.
   385  	ATO uint32
   386  
   387  	// SndMss is the send maximum segment size.
   388  	SndMss uint32
   389  
   390  	// RcvMss is the receive maximum segment size.
   391  	RcvMss uint32
   392  
   393  	// Unacked is the number of packets sent but not acknowledged.
   394  	Unacked uint32
   395  
   396  	// Sacked is the number of packets which are selectively acknowledged.
   397  	Sacked uint32
   398  
   399  	// Lost is the number of packets marked as lost.
   400  	Lost uint32
   401  
   402  	// Retrans is the number of retransmitted packets.
   403  	Retrans uint32
   404  
   405  	// Fackets is not used and is always zero.
   406  	Fackets uint32
   407  
   408  	// Times.
   409  	LastDataSent uint32
   410  	LastAckSent  uint32
   411  	LastDataRecv uint32
   412  	LastAckRecv  uint32
   413  
   414  	// Metrics.
   415  	PMTU        uint32
   416  	RcvSsthresh uint32
   417  	RTT         uint32
   418  	RTTVar      uint32
   419  	SndSsthresh uint32
   420  	SndCwnd     uint32
   421  	Advmss      uint32
   422  	Reordering  uint32
   423  
   424  	// RcvRTT is the receiver round trip time.
   425  	RcvRTT uint32
   426  
   427  	// RcvSpace is the current buffer space available for receiving data.
   428  	RcvSpace uint32
   429  
   430  	// TotalRetrans is the total number of retransmits seen since the start
   431  	// of the connection.
   432  	TotalRetrans uint32
   433  
   434  	// PacingRate is the pacing rate in bytes per second.
   435  	PacingRate uint64
   436  
   437  	// MaxPacingRate is the maximum pacing rate.
   438  	MaxPacingRate uint64
   439  
   440  	// BytesAcked is RFC4898 tcpEStatsAppHCThruOctetsAcked.
   441  	BytesAcked uint64
   442  
   443  	// BytesReceived is RFC4898 tcpEStatsAppHCThruOctetsReceived.
   444  	BytesReceived uint64
   445  
   446  	// SegsOut is RFC4898 tcpEStatsPerfSegsOut.
   447  	SegsOut uint32
   448  
   449  	// SegsIn is RFC4898 tcpEStatsPerfSegsIn.
   450  	SegsIn uint32
   451  
   452  	// NotSentBytes is the amount of bytes in the write queue that are not
   453  	// yet sent.
   454  	NotSentBytes uint32
   455  
   456  	// MinRTT is the minimum round trip time seen in the connection.
   457  	MinRTT uint32
   458  
   459  	// DataSegsIn is RFC4898 tcpEStatsDataSegsIn.
   460  	DataSegsIn uint32
   461  
   462  	// DataSegsOut is RFC4898 tcpEStatsDataSegsOut.
   463  	DataSegsOut uint32
   464  
   465  	// DeliveryRate is the most recent delivery rate in bytes per second.
   466  	DeliveryRate uint64
   467  
   468  	// BusyTime is the time in microseconds busy sending data.
   469  	BusyTime uint64
   470  
   471  	// RwndLimited is the time in microseconds limited by receive window.
   472  	RwndLimited uint64
   473  
   474  	// SndBufLimited is the time in microseconds limited by send buffer.
   475  	SndBufLimited uint64
   476  
   477  	// Delivered is the total data packets delivered including retransmits.
   478  	Delivered uint32
   479  
   480  	// DeliveredCE is the total ECE marked data packets delivered including
   481  	// retransmits.
   482  	DeliveredCE uint32
   483  
   484  	// BytesSent is RFC4898 tcpEStatsPerfHCDataOctetsOut.
   485  	BytesSent uint64
   486  
   487  	// BytesRetrans is RFC4898 tcpEStatsPerfOctetsRetrans.
   488  	BytesRetrans uint64
   489  
   490  	// DSACKDups is RFC4898 tcpEStatsStackDSACKDups.
   491  	DSACKDups uint32
   492  
   493  	// ReordSeen is the number of reordering events seen since the start of
   494  	// the connection.
   495  	ReordSeen uint32
   496  }
   497  
   498  // SizeOfTCPInfo is the binary size of a TCPInfo struct.
   499  var SizeOfTCPInfo = (*TCPInfo)(nil).SizeBytes()
   500  
   501  // Control message types, from linux/socket.h.
   502  const (
   503  	SCM_CREDENTIALS = 0x2
   504  	SCM_RIGHTS      = 0x1
   505  )
   506  
   507  // A ControlMessageHeader is the header for a socket control message.
   508  //
   509  // ControlMessageHeader represents struct cmsghdr from linux/socket.h.
   510  //
   511  // +marshal
   512  type ControlMessageHeader struct {
   513  	Length uint64
   514  	Level  int32
   515  	Type   int32
   516  }
   517  
   518  // SizeOfControlMessageHeader is the binary size of a ControlMessageHeader
   519  // struct.
   520  var SizeOfControlMessageHeader = (*ControlMessageHeader)(nil).SizeBytes()
   521  
   522  // A ControlMessageCredentials is an SCM_CREDENTIALS socket control message.
   523  //
   524  // ControlMessageCredentials represents struct ucred from linux/socket.h.
   525  //
   526  // +marshal
   527  type ControlMessageCredentials struct {
   528  	PID int32
   529  	UID uint32
   530  	GID uint32
   531  }
   532  
   533  // A ControlMessageIPPacketInfo is IP_PKTINFO socket control message.
   534  //
   535  // ControlMessageIPPacketInfo represents struct in_pktinfo from linux/in.h.
   536  //
   537  // +marshal
   538  // +stateify savable
   539  type ControlMessageIPPacketInfo struct {
   540  	NIC             int32
   541  	LocalAddr       InetAddr
   542  	DestinationAddr InetAddr
   543  }
   544  
   545  // SizeOfControlMessageCredentials is the binary size of a
   546  // ControlMessageCredentials struct.
   547  var SizeOfControlMessageCredentials = (*ControlMessageCredentials)(nil).SizeBytes()
   548  
   549  // A ControlMessageRights is an SCM_RIGHTS socket control message.
   550  type ControlMessageRights []int32
   551  
   552  // SizeOfControlMessageRight is the size of a single element in
   553  // ControlMessageRights.
   554  const SizeOfControlMessageRight = 4
   555  
   556  // SizeOfControlMessageInq is the size of a TCP_INQ control message.
   557  const SizeOfControlMessageInq = 4
   558  
   559  // SizeOfControlMessageTOS is the size of an IP_TOS control message.
   560  const SizeOfControlMessageTOS = 1
   561  
   562  // SizeOfControlMessageTClass is the size of an IPV6_TCLASS control message.
   563  const SizeOfControlMessageTClass = 4
   564  
   565  // SizeOfControlMessageIPPacketInfo is the size of an IP_PKTINFO
   566  // control message.
   567  const SizeOfControlMessageIPPacketInfo = 12
   568  
   569  // SCM_MAX_FD is the maximum number of FDs accepted in a single sendmsg call.
   570  // From net/scm.h.
   571  const SCM_MAX_FD = 253
   572  
   573  // SO_ACCEPTCON is defined as __SO_ACCEPTCON in
   574  // include/uapi/linux/net.h, which represents a listening socket
   575  // state. Note that this is distinct from SO_ACCEPTCONN, which is a
   576  // socket option for querying whether a socket is in a listening
   577  // state.
   578  const SO_ACCEPTCON = 1 << 16