gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/abi/linux/socket.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"gvisor.dev/gvisor/pkg/marshal"
    19  )
    20  
    21  // Address families, from linux/socket.h.
    22  const (
    23  	AF_UNSPEC     = 0
    24  	AF_UNIX       = 1
    25  	AF_INET       = 2
    26  	AF_AX25       = 3
    27  	AF_IPX        = 4
    28  	AF_APPLETALK  = 5
    29  	AF_NETROM     = 6
    30  	AF_BRIDGE     = 7
    31  	AF_ATMPVC     = 8
    32  	AF_X25        = 9
    33  	AF_INET6      = 10
    34  	AF_ROSE       = 11
    35  	AF_DECnet     = 12
    36  	AF_NETBEUI    = 13
    37  	AF_SECURITY   = 14
    38  	AF_KEY        = 15
    39  	AF_NETLINK    = 16
    40  	AF_PACKET     = 17
    41  	AF_ASH        = 18
    42  	AF_ECONET     = 19
    43  	AF_ATMSVC     = 20
    44  	AF_RDS        = 21
    45  	AF_SNA        = 22
    46  	AF_IRDA       = 23
    47  	AF_PPPOX      = 24
    48  	AF_WANPIPE    = 25
    49  	AF_LLC        = 26
    50  	AF_IB         = 27
    51  	AF_MPLS       = 28
    52  	AF_CAN        = 29
    53  	AF_TIPC       = 30
    54  	AF_BLUETOOTH  = 31
    55  	AF_IUCV       = 32
    56  	AF_RXRPC      = 33
    57  	AF_ISDN       = 34
    58  	AF_PHONET     = 35
    59  	AF_IEEE802154 = 36
    60  	AF_CAIF       = 37
    61  	AF_ALG        = 38
    62  	AF_NFC        = 39
    63  	AF_VSOCK      = 40
    64  )
    65  
    66  // sendmsg(2)/recvmsg(2) flags, from linux/socket.h.
    67  const (
    68  	MSG_OOB              = 0x1
    69  	MSG_PEEK             = 0x2
    70  	MSG_DONTROUTE        = 0x4
    71  	MSG_TRYHARD          = 0x4
    72  	MSG_CTRUNC           = 0x8
    73  	MSG_PROBE            = 0x10
    74  	MSG_TRUNC            = 0x20
    75  	MSG_DONTWAIT         = 0x40
    76  	MSG_EOR              = 0x80
    77  	MSG_WAITALL          = 0x100
    78  	MSG_FIN              = 0x200
    79  	MSG_EOF              = MSG_FIN
    80  	MSG_SYN              = 0x400
    81  	MSG_CONFIRM          = 0x800
    82  	MSG_RST              = 0x1000
    83  	MSG_ERRQUEUE         = 0x2000
    84  	MSG_NOSIGNAL         = 0x4000
    85  	MSG_MORE             = 0x8000
    86  	MSG_WAITFORONE       = 0x10000
    87  	MSG_SENDPAGE_NOTLAST = 0x20000
    88  	MSG_ZEROCOPY         = 0x4000000
    89  	MSG_FASTOPEN         = 0x20000000
    90  	MSG_CMSG_CLOEXEC     = 0x40000000
    91  )
    92  
    93  // Set/get socket option levels, from socket.h.
    94  const (
    95  	SOL_IP      = 0
    96  	SOL_SOCKET  = 1
    97  	SOL_TCP     = 6
    98  	SOL_UDP     = 17
    99  	SOL_IPV6    = 41
   100  	SOL_ICMPV6  = 58
   101  	SOL_RAW     = 255
   102  	SOL_PACKET  = 263
   103  	SOL_NETLINK = 270
   104  )
   105  
   106  // A SockType is a type (as opposed to family) of sockets. These are enumerated
   107  // below as SOCK_* constants.
   108  type SockType int
   109  
   110  // Socket types, from linux/net.h.
   111  const (
   112  	SOCK_STREAM    SockType = 1
   113  	SOCK_DGRAM     SockType = 2
   114  	SOCK_RAW       SockType = 3
   115  	SOCK_RDM       SockType = 4
   116  	SOCK_SEQPACKET SockType = 5
   117  	SOCK_DCCP      SockType = 6
   118  	SOCK_PACKET    SockType = 10
   119  )
   120  
   121  // SOCK_TYPE_MASK covers all of the above socket types. The remaining bits are
   122  // flags. From linux/net.h.
   123  const SOCK_TYPE_MASK = 0xf
   124  
   125  // socket(2)/socketpair(2)/accept4(2) flags, from linux/net.h.
   126  const (
   127  	SOCK_CLOEXEC  = O_CLOEXEC
   128  	SOCK_NONBLOCK = O_NONBLOCK
   129  )
   130  
   131  // shutdown(2) how commands, from <linux/net.h>.
   132  const (
   133  	SHUT_RD   = 0
   134  	SHUT_WR   = 1
   135  	SHUT_RDWR = 2
   136  )
   137  
   138  // Packet types from <linux/if_packet.h>
   139  const (
   140  	PACKET_HOST      = 0 // To us
   141  	PACKET_BROADCAST = 1 // To all
   142  	PACKET_MULTICAST = 2 // To group
   143  	PACKET_OTHERHOST = 3 // To someone else
   144  	PACKET_OUTGOING  = 4 // Outgoing of any type
   145  )
   146  
   147  // Socket options from socket.h.
   148  const (
   149  	SO_DEBUG                 = 1
   150  	SO_REUSEADDR             = 2
   151  	SO_TYPE                  = 3
   152  	SO_ERROR                 = 4
   153  	SO_DONTROUTE             = 5
   154  	SO_BROADCAST             = 6
   155  	SO_SNDBUF                = 7
   156  	SO_RCVBUF                = 8
   157  	SO_KEEPALIVE             = 9
   158  	SO_OOBINLINE             = 10
   159  	SO_NO_CHECK              = 11
   160  	SO_PRIORITY              = 12
   161  	SO_LINGER                = 13
   162  	SO_BSDCOMPAT             = 14
   163  	SO_REUSEPORT             = 15
   164  	SO_PASSCRED              = 16
   165  	SO_PEERCRED              = 17
   166  	SO_RCVLOWAT              = 18
   167  	SO_SNDLOWAT              = 19
   168  	SO_RCVTIMEO              = 20
   169  	SO_SNDTIMEO              = 21
   170  	SO_BINDTODEVICE          = 25
   171  	SO_ATTACH_FILTER         = 26
   172  	SO_DETACH_FILTER         = 27
   173  	SO_GET_FILTER            = SO_ATTACH_FILTER
   174  	SO_PEERNAME              = 28
   175  	SO_TIMESTAMP             = 29
   176  	SO_ACCEPTCONN            = 30
   177  	SO_PEERSEC               = 31
   178  	SO_SNDBUFFORCE           = 32
   179  	SO_RCVBUFFORCE           = 33
   180  	SO_PASSSEC               = 34
   181  	SO_TIMESTAMPNS           = 35
   182  	SO_MARK                  = 36
   183  	SO_TIMESTAMPING          = 37
   184  	SO_PROTOCOL              = 38
   185  	SO_DOMAIN                = 39
   186  	SO_RXQ_OVFL              = 40
   187  	SO_WIFI_STATUS           = 41
   188  	SO_PEEK_OFF              = 42
   189  	SO_NOFCS                 = 43
   190  	SO_LOCK_FILTER           = 44
   191  	SO_SELECT_ERR_QUEUE      = 45
   192  	SO_BUSY_POLL             = 46
   193  	SO_MAX_PACING_RATE       = 47
   194  	SO_BPF_EXTENSIONS        = 48
   195  	SO_INCOMING_CPU          = 49
   196  	SO_ATTACH_BPF            = 50
   197  	SO_ATTACH_REUSEPORT_CBPF = 51
   198  	SO_ATTACH_REUSEPORT_EBPF = 52
   199  	SO_CNX_ADVICE            = 53
   200  	SO_MEMINFO               = 55
   201  	SO_INCOMING_NAPI_ID      = 56
   202  	SO_COOKIE                = 57
   203  	SO_PEERGROUPS            = 59
   204  	SO_ZEROCOPY              = 60
   205  	SO_TXTIME                = 61
   206  )
   207  
   208  // enum socket_state, from uapi/linux/net.h.
   209  const (
   210  	SS_FREE          = 0 // Not allocated.
   211  	SS_UNCONNECTED   = 1 // Unconnected to any socket.
   212  	SS_CONNECTING    = 2 // In process of connecting.
   213  	SS_CONNECTED     = 3 // Connected to socket.
   214  	SS_DISCONNECTING = 4 // In process of disconnecting.
   215  )
   216  
   217  // TCP protocol states, from include/net/tcp_states.h.
   218  const (
   219  	TCP_ESTABLISHED uint32 = iota + 1
   220  	TCP_SYN_SENT
   221  	TCP_SYN_RECV
   222  	TCP_FIN_WAIT1
   223  	TCP_FIN_WAIT2
   224  	TCP_TIME_WAIT
   225  	TCP_CLOSE
   226  	TCP_CLOSE_WAIT
   227  	TCP_LAST_ACK
   228  	TCP_LISTEN
   229  	TCP_CLOSING
   230  	TCP_NEW_SYN_RECV
   231  )
   232  
   233  // SockAddrMax is the maximum size of a struct sockaddr, from
   234  // uapi/linux/socket.h.
   235  const SockAddrMax = 128
   236  
   237  // InetAddr is struct in_addr, from uapi/linux/in.h.
   238  //
   239  // +marshal
   240  type InetAddr [4]byte
   241  
   242  // SizeOfInetAddr is the size of InetAddr.
   243  var SizeOfInetAddr = uint32((*InetAddr)(nil).SizeBytes())
   244  
   245  // SockAddrInet is struct sockaddr_in, from uapi/linux/in.h.
   246  //
   247  // +marshal
   248  type SockAddrInet struct {
   249  	Family uint16
   250  	Port   uint16
   251  	Addr   InetAddr
   252  	_      [8]uint8 // pad to sizeof(struct sockaddr).
   253  }
   254  
   255  // Inet6MulticastRequest is struct ipv6_mreq, from uapi/linux/in6.h.
   256  //
   257  // +marshal
   258  type Inet6MulticastRequest struct {
   259  	MulticastAddr  Inet6Addr
   260  	InterfaceIndex int32
   261  }
   262  
   263  // InetMulticastRequest is struct ip_mreq, from uapi/linux/in.h.
   264  //
   265  // +marshal
   266  type InetMulticastRequest struct {
   267  	MulticastAddr InetAddr
   268  	InterfaceAddr InetAddr
   269  }
   270  
   271  // InetMulticastRequestWithNIC is struct ip_mreqn, from uapi/linux/in.h.
   272  //
   273  // +marshal
   274  type InetMulticastRequestWithNIC struct {
   275  	InetMulticastRequest
   276  	InterfaceIndex int32
   277  }
   278  
   279  // Inet6Addr is struct in6_addr, from uapi/linux/in6.h.
   280  //
   281  // +marshal
   282  type Inet6Addr [16]byte
   283  
   284  // SockAddrInet6 is struct sockaddr_in6, from uapi/linux/in6.h.
   285  //
   286  // +marshal
   287  type SockAddrInet6 struct {
   288  	Family   uint16
   289  	Port     uint16
   290  	Flowinfo uint32
   291  	Addr     [16]byte
   292  	Scope_id uint32
   293  }
   294  
   295  // SockAddrLink is a struct sockaddr_ll, from uapi/linux/if_packet.h.
   296  //
   297  // +marshal
   298  type SockAddrLink struct {
   299  	Family          uint16
   300  	Protocol        uint16
   301  	InterfaceIndex  int32
   302  	ARPHardwareType uint16
   303  	PacketType      byte
   304  	HardwareAddrLen byte
   305  	HardwareAddr    [8]byte
   306  }
   307  
   308  // UnixPathMax is the maximum length of the path in an AF_UNIX socket.
   309  //
   310  // From uapi/linux/un.h.
   311  const UnixPathMax = 108
   312  
   313  // SockAddrUnix is struct sockaddr_un, from uapi/linux/un.h.
   314  //
   315  // +marshal
   316  type SockAddrUnix struct {
   317  	Family uint16
   318  	Path   [UnixPathMax]int8
   319  }
   320  
   321  // SockAddr represents a union of valid socket address types. This is logically
   322  // equivalent to struct sockaddr. SockAddr ensures that a well-defined set of
   323  // types can be used as socket addresses.
   324  type SockAddr interface {
   325  	marshal.Marshallable
   326  
   327  	// implementsSockAddr exists purely to allow a type to indicate that they
   328  	// implement this interface. This method is a no-op and shouldn't be called.
   329  	implementsSockAddr()
   330  }
   331  
   332  func (s *SockAddrInet) implementsSockAddr()    {}
   333  func (s *SockAddrInet6) implementsSockAddr()   {}
   334  func (s *SockAddrLink) implementsSockAddr()    {}
   335  func (s *SockAddrUnix) implementsSockAddr()    {}
   336  func (s *SockAddrNetlink) implementsSockAddr() {}
   337  
   338  // Linger is struct linger, from include/linux/socket.h.
   339  //
   340  // +marshal
   341  type Linger struct {
   342  	OnOff  int32
   343  	Linger int32
   344  }
   345  
   346  // SizeOfLinger is the binary size of a Linger struct.
   347  const SizeOfLinger = 8
   348  
   349  // TCPInfo is a collection of TCP statistics.
   350  //
   351  // From uapi/linux/tcp.h. Newer versions of Linux continue to add new fields to
   352  // the end of this struct or within existing unused space, so its size grows
   353  // over time. The current iteration is based on linux v4.17. New versions are
   354  // always backwards compatible.
   355  //
   356  // +marshal
   357  type TCPInfo struct {
   358  	// State is the state of the connection.
   359  	State uint8
   360  
   361  	// CaState is the congestion control state.
   362  	CaState uint8
   363  
   364  	// Retransmits is the number of retransmissions triggered by RTO.
   365  	Retransmits uint8
   366  
   367  	// Probes is the number of unanswered zero window probes.
   368  	Probes uint8
   369  
   370  	// BackOff indicates exponential backoff.
   371  	Backoff uint8
   372  
   373  	// Options indicates the options enabled for the connection.
   374  	Options uint8
   375  
   376  	// WindowScale is the combination of snd_wscale (first 4 bits) and
   377  	// rcv_wscale (second 4 bits)
   378  	WindowScale uint8
   379  
   380  	// DeliveryRateAppLimited is a boolean and only the first bit is
   381  	// meaningful.
   382  	DeliveryRateAppLimited uint8
   383  
   384  	// RTO is the retransmission timeout.
   385  	RTO uint32
   386  
   387  	// ATO is the acknowledgement timeout interval.
   388  	ATO uint32
   389  
   390  	// SndMss is the send maximum segment size.
   391  	SndMss uint32
   392  
   393  	// RcvMss is the receive maximum segment size.
   394  	RcvMss uint32
   395  
   396  	// Unacked is the number of packets sent but not acknowledged.
   397  	Unacked uint32
   398  
   399  	// Sacked is the number of packets which are selectively acknowledged.
   400  	Sacked uint32
   401  
   402  	// Lost is the number of packets marked as lost.
   403  	Lost uint32
   404  
   405  	// Retrans is the number of retransmitted packets.
   406  	Retrans uint32
   407  
   408  	// Fackets is not used and is always zero.
   409  	Fackets uint32
   410  
   411  	// Times.
   412  	LastDataSent uint32
   413  	LastAckSent  uint32
   414  	LastDataRecv uint32
   415  	LastAckRecv  uint32
   416  
   417  	// Metrics.
   418  	PMTU        uint32
   419  	RcvSsthresh uint32
   420  	RTT         uint32
   421  	RTTVar      uint32
   422  	SndSsthresh uint32
   423  	SndCwnd     uint32
   424  	Advmss      uint32
   425  	Reordering  uint32
   426  
   427  	// RcvRTT is the receiver round trip time.
   428  	RcvRTT uint32
   429  
   430  	// RcvSpace is the current buffer space available for receiving data.
   431  	RcvSpace uint32
   432  
   433  	// TotalRetrans is the total number of retransmits seen since the start
   434  	// of the connection.
   435  	TotalRetrans uint32
   436  
   437  	// PacingRate is the pacing rate in bytes per second.
   438  	PacingRate uint64
   439  
   440  	// MaxPacingRate is the maximum pacing rate.
   441  	MaxPacingRate uint64
   442  
   443  	// BytesAcked is RFC4898 tcpEStatsAppHCThruOctetsAcked.
   444  	BytesAcked uint64
   445  
   446  	// BytesReceived is RFC4898 tcpEStatsAppHCThruOctetsReceived.
   447  	BytesReceived uint64
   448  
   449  	// SegsOut is RFC4898 tcpEStatsPerfSegsOut.
   450  	SegsOut uint32
   451  
   452  	// SegsIn is RFC4898 tcpEStatsPerfSegsIn.
   453  	SegsIn uint32
   454  
   455  	// NotSentBytes is the amount of bytes in the write queue that are not
   456  	// yet sent.
   457  	NotSentBytes uint32
   458  
   459  	// MinRTT is the minimum round trip time seen in the connection.
   460  	MinRTT uint32
   461  
   462  	// DataSegsIn is RFC4898 tcpEStatsDataSegsIn.
   463  	DataSegsIn uint32
   464  
   465  	// DataSegsOut is RFC4898 tcpEStatsDataSegsOut.
   466  	DataSegsOut uint32
   467  
   468  	// DeliveryRate is the most recent delivery rate in bytes per second.
   469  	DeliveryRate uint64
   470  
   471  	// BusyTime is the time in microseconds busy sending data.
   472  	BusyTime uint64
   473  
   474  	// RwndLimited is the time in microseconds limited by receive window.
   475  	RwndLimited uint64
   476  
   477  	// SndBufLimited is the time in microseconds limited by send buffer.
   478  	SndBufLimited uint64
   479  
   480  	// Delivered is the total data packets delivered including retransmits.
   481  	Delivered uint32
   482  
   483  	// DeliveredCE is the total ECE marked data packets delivered including
   484  	// retransmits.
   485  	DeliveredCE uint32
   486  
   487  	// BytesSent is RFC4898 tcpEStatsPerfHCDataOctetsOut.
   488  	BytesSent uint64
   489  
   490  	// BytesRetrans is RFC4898 tcpEStatsPerfOctetsRetrans.
   491  	BytesRetrans uint64
   492  
   493  	// DSACKDups is RFC4898 tcpEStatsStackDSACKDups.
   494  	DSACKDups uint32
   495  
   496  	// ReordSeen is the number of reordering events seen since the start of
   497  	// the connection.
   498  	ReordSeen uint32
   499  }
   500  
   501  // SizeOfTCPInfo is the binary size of a TCPInfo struct.
   502  var SizeOfTCPInfo = (*TCPInfo)(nil).SizeBytes()
   503  
   504  // Control message types, from linux/socket.h.
   505  const (
   506  	SCM_CREDENTIALS = 0x2
   507  	SCM_RIGHTS      = 0x1
   508  )
   509  
   510  // A ControlMessageHeader is the header for a socket control message.
   511  //
   512  // ControlMessageHeader represents struct cmsghdr from linux/socket.h.
   513  //
   514  // +marshal
   515  type ControlMessageHeader struct {
   516  	Length uint64
   517  	Level  int32
   518  	Type   int32
   519  }
   520  
   521  // SizeOfControlMessageHeader is the binary size of a ControlMessageHeader
   522  // struct.
   523  var SizeOfControlMessageHeader = (*ControlMessageHeader)(nil).SizeBytes()
   524  
   525  // A ControlMessageCredentials is an SCM_CREDENTIALS socket control message.
   526  //
   527  // ControlMessageCredentials represents struct ucred from linux/socket.h.
   528  //
   529  // +marshal
   530  type ControlMessageCredentials struct {
   531  	PID int32
   532  	UID uint32
   533  	GID uint32
   534  }
   535  
   536  // A ControlMessageIPPacketInfo is IP_PKTINFO socket control message.
   537  //
   538  // ControlMessageIPPacketInfo represents struct in_pktinfo from linux/in.h.
   539  //
   540  // +marshal
   541  // +stateify savable
   542  type ControlMessageIPPacketInfo struct {
   543  	NIC             int32
   544  	LocalAddr       InetAddr
   545  	DestinationAddr InetAddr
   546  }
   547  
   548  // ControlMessageIPv6PacketInfo represents struct in6_pktinfo from linux/ipv6.h.
   549  //
   550  // +marshal
   551  // +stateify savable
   552  type ControlMessageIPv6PacketInfo struct {
   553  	Addr Inet6Addr
   554  	NIC  uint32
   555  }
   556  
   557  // SizeOfControlMessageCredentials is the binary size of a
   558  // ControlMessageCredentials struct.
   559  var SizeOfControlMessageCredentials = (*ControlMessageCredentials)(nil).SizeBytes()
   560  
   561  // SizeOfControlMessageRight is the size of a single element in
   562  // ControlMessageRights.
   563  const SizeOfControlMessageRight = 4
   564  
   565  // SizeOfControlMessageInq is the size of a TCP_INQ control message.
   566  const SizeOfControlMessageInq = 4
   567  
   568  // SizeOfControlMessageTOS is the size of an IP_TOS control message.
   569  const SizeOfControlMessageTOS = 1
   570  
   571  // SizeOfControlMessageTTL is the size of an IP_TTL control message.
   572  const SizeOfControlMessageTTL = 4
   573  
   574  // SizeOfControlMessageTClass is the size of an IPV6_TCLASS control message.
   575  const SizeOfControlMessageTClass = 4
   576  
   577  // SizeOfControlMessageHopLimit is the size of an IPV6_HOPLIMIT control message.
   578  const SizeOfControlMessageHopLimit = 4
   579  
   580  // SizeOfControlMessageIPPacketInfo is the size of an IP_PKTINFO control
   581  // message.
   582  const SizeOfControlMessageIPPacketInfo = 12
   583  
   584  // SizeOfControlMessageIPv6PacketInfo is the size of a
   585  // ControlMessageIPv6PacketInfo.
   586  const SizeOfControlMessageIPv6PacketInfo = 20
   587  
   588  // SCM_MAX_FD is the maximum number of FDs accepted in a single sendmsg call.
   589  // From net/scm.h.
   590  const SCM_MAX_FD = 253
   591  
   592  // SO_ACCEPTCON is defined as __SO_ACCEPTCON in
   593  // include/uapi/linux/net.h, which represents a listening socket
   594  // state. Note that this is distinct from SO_ACCEPTCONN, which is a
   595  // socket option for querying whether a socket is in a listening
   596  // state.
   597  const SO_ACCEPTCON = 1 << 16
   598  
   599  // ICMP6Filter represents struct icmp6_filter from linux/icmpv6.h.
   600  //
   601  // +marshal
   602  // +stateify savable
   603  type ICMP6Filter struct {
   604  	Filter [8]uint32
   605  }
   606  
   607  // SizeOfICMP6Filter is the size of ICMP6Filter struct.
   608  var SizeOfICMP6Filter = uint32((*ICMP6Filter)(nil).SizeBytes())