github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/unet/unet.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package unet provides a minimal net package based on Unix Domain Sockets.
    16  //
    17  // This does no pooling, and should only be used for a limited number of
    18  // connections in a Go process. Don't use this package for arbitrary servers.
    19  package unet
    20  
    21  import (
    22  	"errors"
    23  	"sync/atomic"
    24  
    25  	"golang.org/x/sys/unix"
    26  	"github.com/SagerNet/gvisor/pkg/sync"
    27  )
    28  
    29  // backlog is used for the listen request.
    30  const backlog = 16
    31  
    32  // errClosing is returned by wait if the Socket is in the process of closing.
    33  var errClosing = errors.New("Socket is closing")
    34  
    35  // errMessageTruncated indicates that data was lost because the provided buffer
    36  // was too small.
    37  var errMessageTruncated = errors.New("message truncated")
    38  
    39  // socketType returns the appropriate type.
    40  func socketType(packet bool) int {
    41  	if packet {
    42  		return unix.SOCK_SEQPACKET
    43  	}
    44  	return unix.SOCK_STREAM
    45  }
    46  
    47  // socket creates a new host socket.
    48  func socket(packet bool) (int, error) {
    49  	// Make a new socket.
    50  	fd, err := unix.Socket(unix.AF_UNIX, socketType(packet), 0)
    51  	if err != nil {
    52  		return 0, err
    53  	}
    54  
    55  	return fd, nil
    56  }
    57  
    58  // eventFD returns a new event FD with initial value 0.
    59  func eventFD() (int, error) {
    60  	f, _, e := unix.Syscall(unix.SYS_EVENTFD2, 0, 0, 0)
    61  	if e != 0 {
    62  		return -1, e
    63  	}
    64  	return int(f), nil
    65  }
    66  
    67  // Socket is a connected unix domain socket.
    68  type Socket struct {
    69  	// gate protects use of fd.
    70  	gate sync.Gate
    71  
    72  	// fd is the bound socket.
    73  	//
    74  	// fd must be read atomically, and only remains valid if read while
    75  	// within gate.
    76  	fd int32
    77  
    78  	// efd is an event FD that is signaled when the socket is closing.
    79  	//
    80  	// efd is immutable and remains valid until Close/Release.
    81  	efd int
    82  
    83  	// race is an atomic variable used to avoid triggering the race
    84  	// detector. See comment in SocketPair below.
    85  	race *int32
    86  }
    87  
    88  // NewSocket returns a socket from an existing FD.
    89  //
    90  // NewSocket takes ownership of fd.
    91  func NewSocket(fd int) (*Socket, error) {
    92  	// fd must be non-blocking for non-blocking unix.Accept in
    93  	// ServerSocket.Accept.
    94  	if err := unix.SetNonblock(fd, true); err != nil {
    95  		return nil, err
    96  	}
    97  
    98  	efd, err := eventFD()
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  
   103  	return &Socket{
   104  		fd:  int32(fd),
   105  		efd: efd,
   106  	}, nil
   107  }
   108  
   109  // finish completes use of s.fd by evicting any waiters, closing the gate, and
   110  // closing the event FD.
   111  func (s *Socket) finish() error {
   112  	// Signal any blocked or future polls.
   113  	//
   114  	// N.B. eventfd writes must be 8 bytes.
   115  	if _, err := unix.Write(s.efd, []byte{1, 0, 0, 0, 0, 0, 0, 0}); err != nil {
   116  		return err
   117  	}
   118  
   119  	// Close the gate, blocking until all FD users leave.
   120  	s.gate.Close()
   121  
   122  	return unix.Close(s.efd)
   123  }
   124  
   125  // Close closes the socket.
   126  func (s *Socket) Close() error {
   127  	// Set the FD in the socket to -1, to ensure that all future calls to
   128  	// FD/Release get nothing and Close calls return immediately.
   129  	fd := int(atomic.SwapInt32(&s.fd, -1))
   130  	if fd < 0 {
   131  		// Already closed or closing.
   132  		return unix.EBADF
   133  	}
   134  
   135  	// Shutdown the socket to cancel any pending accepts.
   136  	s.shutdown(fd)
   137  
   138  	if err := s.finish(); err != nil {
   139  		return err
   140  	}
   141  
   142  	return unix.Close(fd)
   143  }
   144  
   145  // Release releases ownership of the socket FD.
   146  //
   147  // The returned FD is non-blocking.
   148  //
   149  // Any concurrent or future callers of Socket methods will receive EBADF.
   150  func (s *Socket) Release() (int, error) {
   151  	// Set the FD in the socket to -1, to ensure that all future calls to
   152  	// FD/Release get nothing and Close calls return immediately.
   153  	fd := int(atomic.SwapInt32(&s.fd, -1))
   154  	if fd < 0 {
   155  		// Already closed or closing.
   156  		return -1, unix.EBADF
   157  	}
   158  
   159  	if err := s.finish(); err != nil {
   160  		return -1, err
   161  	}
   162  
   163  	return fd, nil
   164  }
   165  
   166  // FD returns the FD for this Socket.
   167  //
   168  // The FD is non-blocking and must not be made blocking.
   169  //
   170  // N.B. os.File.Fd makes the FD blocking. Use of Release instead of FD is
   171  // strongly preferred.
   172  //
   173  // The returned FD cannot be used safely if there may be concurrent callers to
   174  // Close or Release.
   175  //
   176  // Use Release to take ownership of the FD.
   177  func (s *Socket) FD() int {
   178  	return int(atomic.LoadInt32(&s.fd))
   179  }
   180  
   181  // enterFD enters the FD gate and returns the FD value.
   182  //
   183  // If enterFD returns ok, s.gate.Leave must be called when done with the FD.
   184  // Callers may only block while within the gate using s.wait.
   185  //
   186  // The returned FD is guaranteed to remain valid until s.gate.Leave.
   187  func (s *Socket) enterFD() (int, bool) {
   188  	if !s.gate.Enter() {
   189  		return -1, false
   190  	}
   191  
   192  	fd := int(atomic.LoadInt32(&s.fd))
   193  	if fd < 0 {
   194  		s.gate.Leave()
   195  		return -1, false
   196  	}
   197  
   198  	return fd, true
   199  }
   200  
   201  // SocketPair creates a pair of connected sockets.
   202  func SocketPair(packet bool) (*Socket, *Socket, error) {
   203  	// Make a new pair.
   204  	fds, err := unix.Socketpair(unix.AF_UNIX, socketType(packet)|unix.SOCK_CLOEXEC, 0)
   205  	if err != nil {
   206  		return nil, nil, err
   207  	}
   208  
   209  	// race is an atomic variable used to avoid triggering the race
   210  	// detector. We have to fool TSAN into thinking there is a race
   211  	// variable between our two sockets. We only use SocketPair in tests
   212  	// anyway.
   213  	//
   214  	// NOTE(b/27107811): This is purely due to the fact that the raw
   215  	// syscall does not serve as a boundary for the sanitizer.
   216  	var race int32
   217  	a, err := NewSocket(fds[0])
   218  	if err != nil {
   219  		unix.Close(fds[0])
   220  		unix.Close(fds[1])
   221  		return nil, nil, err
   222  	}
   223  	a.race = &race
   224  	b, err := NewSocket(fds[1])
   225  	if err != nil {
   226  		a.Close()
   227  		unix.Close(fds[1])
   228  		return nil, nil, err
   229  	}
   230  	b.race = &race
   231  	return a, b, nil
   232  }
   233  
   234  // Connect connects to a server.
   235  func Connect(addr string, packet bool) (*Socket, error) {
   236  	fd, err := socket(packet)
   237  	if err != nil {
   238  		return nil, err
   239  	}
   240  
   241  	// Connect the socket.
   242  	usa := &unix.SockaddrUnix{Name: addr}
   243  	if err := unix.Connect(fd, usa); err != nil {
   244  		unix.Close(fd)
   245  		return nil, err
   246  	}
   247  
   248  	return NewSocket(fd)
   249  }
   250  
   251  // ControlMessage wraps around a byte array and provides functions for parsing
   252  // as a Unix Domain Socket control message.
   253  type ControlMessage []byte
   254  
   255  // EnableFDs enables receiving FDs via control message.
   256  //
   257  // This guarantees only a MINIMUM number of FDs received. You may receive MORE
   258  // than this due to the way FDs are packed. To be specific, the number of
   259  // receivable buffers will be rounded up to the nearest even number.
   260  //
   261  // This must be called prior to ReadVec if you want to receive FDs.
   262  func (c *ControlMessage) EnableFDs(count int) {
   263  	*c = make([]byte, unix.CmsgSpace(count*4))
   264  }
   265  
   266  // ExtractFDs returns the list of FDs in the control message.
   267  //
   268  // Either this or CloseFDs should be used after EnableFDs.
   269  func (c *ControlMessage) ExtractFDs() ([]int, error) {
   270  	msgs, err := unix.ParseSocketControlMessage(*c)
   271  	if err != nil {
   272  		return nil, err
   273  	}
   274  	var fds []int
   275  	for _, msg := range msgs {
   276  		thisFds, err := unix.ParseUnixRights(&msg)
   277  		if err != nil {
   278  			// Different control message.
   279  			return nil, err
   280  		}
   281  		for _, fd := range thisFds {
   282  			if fd >= 0 {
   283  				fds = append(fds, fd)
   284  			}
   285  		}
   286  	}
   287  	return fds, nil
   288  }
   289  
   290  // CloseFDs closes the list of FDs in the control message.
   291  //
   292  // Either this or ExtractFDs should be used after EnableFDs.
   293  func (c *ControlMessage) CloseFDs() {
   294  	fds, _ := c.ExtractFDs()
   295  	for _, fd := range fds {
   296  		if fd >= 0 {
   297  			unix.Close(fd)
   298  		}
   299  	}
   300  }
   301  
   302  // PackFDs packs the given list of FDs in the control message.
   303  //
   304  // This must be used prior to WriteVec.
   305  func (c *ControlMessage) PackFDs(fds ...int) {
   306  	*c = ControlMessage(unix.UnixRights(fds...))
   307  }
   308  
   309  // UnpackFDs clears the control message.
   310  func (c *ControlMessage) UnpackFDs() {
   311  	*c = nil
   312  }
   313  
   314  // SocketWriter wraps an individual send operation.
   315  //
   316  // The normal entrypoint is WriteVec.
   317  type SocketWriter struct {
   318  	socket   *Socket
   319  	to       []byte
   320  	blocking bool
   321  	race     *int32
   322  
   323  	ControlMessage
   324  }
   325  
   326  // Writer returns a writer for this socket.
   327  func (s *Socket) Writer(blocking bool) SocketWriter {
   328  	return SocketWriter{socket: s, blocking: blocking, race: s.race}
   329  }
   330  
   331  // Write implements io.Writer.Write.
   332  func (s *Socket) Write(p []byte) (int, error) {
   333  	r := s.Writer(true)
   334  	return r.WriteVec([][]byte{p})
   335  }
   336  
   337  // GetSockOpt gets the given socket option.
   338  func (s *Socket) GetSockOpt(level int, name int, b []byte) (uint32, error) {
   339  	fd, ok := s.enterFD()
   340  	if !ok {
   341  		return 0, unix.EBADF
   342  	}
   343  	defer s.gate.Leave()
   344  
   345  	return getsockopt(fd, level, name, b)
   346  }
   347  
   348  // SetSockOpt sets the given socket option.
   349  func (s *Socket) SetSockOpt(level, name int, b []byte) error {
   350  	fd, ok := s.enterFD()
   351  	if !ok {
   352  		return unix.EBADF
   353  	}
   354  	defer s.gate.Leave()
   355  
   356  	return setsockopt(fd, level, name, b)
   357  }
   358  
   359  // GetSockName returns the socket name.
   360  func (s *Socket) GetSockName() ([]byte, error) {
   361  	fd, ok := s.enterFD()
   362  	if !ok {
   363  		return nil, unix.EBADF
   364  	}
   365  	defer s.gate.Leave()
   366  
   367  	var buf []byte
   368  	l := unix.SizeofSockaddrAny
   369  
   370  	for {
   371  		// If the buffer is not large enough, allocate a new one with the hint.
   372  		buf = make([]byte, l)
   373  		l, err := getsockname(fd, buf)
   374  		if err != nil {
   375  			return nil, err
   376  		}
   377  
   378  		if l <= uint32(len(buf)) {
   379  			return buf[:l], nil
   380  		}
   381  	}
   382  }
   383  
   384  // GetPeerName returns the peer name.
   385  func (s *Socket) GetPeerName() ([]byte, error) {
   386  	fd, ok := s.enterFD()
   387  	if !ok {
   388  		return nil, unix.EBADF
   389  	}
   390  	defer s.gate.Leave()
   391  
   392  	var buf []byte
   393  	l := unix.SizeofSockaddrAny
   394  
   395  	for {
   396  		// See above.
   397  		buf = make([]byte, l)
   398  		l, err := getpeername(fd, buf)
   399  		if err != nil {
   400  			return nil, err
   401  		}
   402  
   403  		if l <= uint32(len(buf)) {
   404  			return buf[:l], nil
   405  		}
   406  	}
   407  }
   408  
   409  // GetPeerCred returns the peer's unix credentials.
   410  func (s *Socket) GetPeerCred() (*unix.Ucred, error) {
   411  	fd, ok := s.enterFD()
   412  	if !ok {
   413  		return nil, unix.EBADF
   414  	}
   415  	defer s.gate.Leave()
   416  
   417  	return unix.GetsockoptUcred(fd, unix.SOL_SOCKET, unix.SO_PEERCRED)
   418  }
   419  
   420  // SocketReader wraps an individual receive operation.
   421  //
   422  // This may be used for doing vectorized reads and/or sending additional
   423  // control messages (e.g. FDs). The normal entrypoint is ReadVec.
   424  //
   425  // One of ExtractFDs or DisposeFDs must be called if EnableFDs is used.
   426  type SocketReader struct {
   427  	socket   *Socket
   428  	source   []byte
   429  	blocking bool
   430  	race     *int32
   431  
   432  	ControlMessage
   433  }
   434  
   435  // Reader returns a reader for this socket.
   436  func (s *Socket) Reader(blocking bool) SocketReader {
   437  	return SocketReader{socket: s, blocking: blocking, race: s.race}
   438  }
   439  
   440  // Read implements io.Reader.Read.
   441  func (s *Socket) Read(p []byte) (int, error) {
   442  	r := s.Reader(true)
   443  	return r.ReadVec([][]byte{p})
   444  }
   445  
   446  func (s *Socket) shutdown(fd int) error {
   447  	// Shutdown the socket to cancel any pending accepts.
   448  	return unix.Shutdown(fd, unix.SHUT_RDWR)
   449  }
   450  
   451  // Shutdown closes the socket for read and write.
   452  func (s *Socket) Shutdown() error {
   453  	fd, ok := s.enterFD()
   454  	if !ok {
   455  		return unix.EBADF
   456  	}
   457  	defer s.gate.Leave()
   458  
   459  	return s.shutdown(fd)
   460  }
   461  
   462  // ServerSocket is a bound unix domain socket.
   463  type ServerSocket struct {
   464  	socket *Socket
   465  }
   466  
   467  // NewServerSocket returns a socket from an existing FD.
   468  func NewServerSocket(fd int) (*ServerSocket, error) {
   469  	s, err := NewSocket(fd)
   470  	if err != nil {
   471  		return nil, err
   472  	}
   473  	return &ServerSocket{socket: s}, nil
   474  }
   475  
   476  // Bind creates and binds a new socket.
   477  func Bind(addr string, packet bool) (*ServerSocket, error) {
   478  	fd, err := socket(packet)
   479  	if err != nil {
   480  		return nil, err
   481  	}
   482  
   483  	// Do the bind.
   484  	usa := &unix.SockaddrUnix{Name: addr}
   485  	if err := unix.Bind(fd, usa); err != nil {
   486  		unix.Close(fd)
   487  		return nil, err
   488  	}
   489  
   490  	return NewServerSocket(fd)
   491  }
   492  
   493  // BindAndListen creates, binds and listens on a new socket.
   494  func BindAndListen(addr string, packet bool) (*ServerSocket, error) {
   495  	s, err := Bind(addr, packet)
   496  	if err != nil {
   497  		return nil, err
   498  	}
   499  
   500  	// Start listening.
   501  	if err := s.Listen(); err != nil {
   502  		s.Close()
   503  		return nil, err
   504  	}
   505  
   506  	return s, nil
   507  }
   508  
   509  // Listen starts listening on the socket.
   510  func (s *ServerSocket) Listen() error {
   511  	fd, ok := s.socket.enterFD()
   512  	if !ok {
   513  		return unix.EBADF
   514  	}
   515  	defer s.socket.gate.Leave()
   516  
   517  	return unix.Listen(fd, backlog)
   518  }
   519  
   520  // Accept accepts a new connection.
   521  //
   522  // This is always blocking.
   523  //
   524  // Preconditions:
   525  // * ServerSocket is listening (Listen called).
   526  func (s *ServerSocket) Accept() (*Socket, error) {
   527  	fd, ok := s.socket.enterFD()
   528  	if !ok {
   529  		return nil, unix.EBADF
   530  	}
   531  	defer s.socket.gate.Leave()
   532  
   533  	for {
   534  		nfd, _, err := unix.Accept(fd)
   535  		switch err {
   536  		case nil:
   537  			return NewSocket(nfd)
   538  		case unix.EAGAIN:
   539  			err = s.socket.wait(false)
   540  			if err == errClosing {
   541  				err = unix.EBADF
   542  			}
   543  		}
   544  		if err != nil {
   545  			return nil, err
   546  		}
   547  	}
   548  }
   549  
   550  // Close closes the server socket.
   551  //
   552  // This must only be called once.
   553  func (s *ServerSocket) Close() error {
   554  	return s.socket.Close()
   555  }
   556  
   557  // FD returns the socket's file descriptor.
   558  //
   559  // See Socket.FD.
   560  func (s *ServerSocket) FD() int {
   561  	return s.socket.FD()
   562  }
   563  
   564  // Release releases ownership of the socket's file descriptor.
   565  //
   566  // See Socket.Release.
   567  func (s *ServerSocket) Release() (int, error) {
   568  	return s.socket.Release()
   569  }