github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/socket/unix/transport/host.go (about)

     1  // Copyright 2021 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package transport
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    21  	"github.com/MerlinKodo/gvisor/pkg/atomicbitops"
    22  	"github.com/MerlinKodo/gvisor/pkg/context"
    23  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    24  	"github.com/MerlinKodo/gvisor/pkg/fdnotifier"
    25  	"github.com/MerlinKodo/gvisor/pkg/log"
    26  	"github.com/MerlinKodo/gvisor/pkg/sync"
    27  	"github.com/MerlinKodo/gvisor/pkg/syserr"
    28  	"github.com/MerlinKodo/gvisor/pkg/tcpip"
    29  	"github.com/MerlinKodo/gvisor/pkg/unet"
    30  	"github.com/MerlinKodo/gvisor/pkg/waiter"
    31  	"golang.org/x/sys/unix"
    32  )
    33  
    34  // SCMRights implements RightsControlMessage with host FDs.
    35  type SCMRights struct {
    36  	FDs []int
    37  }
    38  
    39  // Clone implements RightsControlMessage.Clone.
    40  func (c *SCMRights) Clone() RightsControlMessage {
    41  	// Host rights never need to be cloned.
    42  	return nil
    43  }
    44  
    45  // Release implements RightsControlMessage.Release.
    46  func (c *SCMRights) Release(ctx context.Context) {
    47  	for _, fd := range c.FDs {
    48  		unix.Close(fd)
    49  	}
    50  	c.FDs = nil
    51  }
    52  
    53  // HostConnectedEndpoint is an implementation of ConnectedEndpoint and
    54  // Receiver. It is backed by a host fd that was imported at sentry startup.
    55  // This fd is shared with a hostfs inode, which retains ownership of it.
    56  //
    57  // HostConnectedEndpoint is saveable, since we expect that the host will
    58  // provide the same fd upon restore.
    59  //
    60  // As of this writing, we only allow Unix sockets to be imported.
    61  //
    62  // +stateify savable
    63  type HostConnectedEndpoint struct {
    64  	HostConnectedEndpointRefs
    65  
    66  	// mu protects fd below.
    67  	mu sync.RWMutex `state:"nosave"`
    68  
    69  	// fd is the host fd backing this endpoint.
    70  	fd int
    71  
    72  	// addr is the address at which this endpoint is bound.
    73  	addr string
    74  
    75  	// sndbuf is the size of the send buffer.
    76  	//
    77  	// N.B. When this is smaller than the host size, we present it via
    78  	// GetSockOpt and message splitting/rejection in SendMsg, but do not
    79  	// prevent lots of small messages from filling the real send buffer
    80  	// size on the host.
    81  	sndbuf atomicbitops.Int64 `state:"nosave"`
    82  
    83  	// stype is the type of Unix socket.
    84  	stype linux.SockType
    85  }
    86  
    87  // init performs initialization required for creating new
    88  // HostConnectedEndpoints and for restoring them.
    89  func (c *HostConnectedEndpoint) init() *syserr.Error {
    90  	c.InitRefs()
    91  	return c.initFromOptions()
    92  }
    93  
    94  func (c *HostConnectedEndpoint) initFromOptions() *syserr.Error {
    95  	family, err := unix.GetsockoptInt(c.fd, unix.SOL_SOCKET, unix.SO_DOMAIN)
    96  	if err != nil {
    97  		return syserr.FromError(err)
    98  	}
    99  
   100  	if family != unix.AF_UNIX {
   101  		// We only allow Unix sockets.
   102  		return syserr.ErrInvalidEndpointState
   103  	}
   104  
   105  	stype, err := unix.GetsockoptInt(c.fd, unix.SOL_SOCKET, unix.SO_TYPE)
   106  	if err != nil {
   107  		return syserr.FromError(err)
   108  	}
   109  
   110  	if err := unix.SetNonblock(c.fd, true); err != nil {
   111  		return syserr.FromError(err)
   112  	}
   113  
   114  	sndbuf, err := unix.GetsockoptInt(c.fd, unix.SOL_SOCKET, unix.SO_SNDBUF)
   115  	if err != nil {
   116  		return syserr.FromError(err)
   117  	}
   118  
   119  	c.stype = linux.SockType(stype)
   120  	c.sndbuf.Store(int64(sndbuf))
   121  
   122  	return nil
   123  }
   124  
   125  // NewHostConnectedEndpoint creates a new HostConnectedEndpoint backed by a
   126  // host fd imported at sentry startup.
   127  //
   128  // The caller is responsible for calling Init(). Additionally, Release needs to
   129  // be called twice because HostConnectedEndpoint is both a Receiver and
   130  // HostConnectedEndpoint.
   131  func NewHostConnectedEndpoint(hostFD int, addr string) (*HostConnectedEndpoint, *syserr.Error) {
   132  	e := HostConnectedEndpoint{
   133  		fd:   hostFD,
   134  		addr: addr,
   135  	}
   136  
   137  	if err := e.init(); err != nil {
   138  		return nil, err
   139  	}
   140  
   141  	// HostConnectedEndpointRefs start off with a single reference. We need two.
   142  	e.IncRef()
   143  	return &e, nil
   144  }
   145  
   146  // SockType returns the underlying socket type.
   147  func (c *HostConnectedEndpoint) SockType() linux.SockType {
   148  	return c.stype
   149  }
   150  
   151  // Send implements ConnectedEndpoint.Send.
   152  func (c *HostConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMessages ControlMessages, from Address) (int64, bool, *syserr.Error) {
   153  	c.mu.RLock()
   154  	defer c.mu.RUnlock()
   155  
   156  	if !controlMessages.Empty() {
   157  		return 0, false, syserr.ErrInvalidEndpointState
   158  	}
   159  
   160  	// Since stream sockets don't preserve message boundaries, we can write
   161  	// only as much of the message as fits in the send buffer.
   162  	truncate := c.stype == linux.SOCK_STREAM
   163  
   164  	n, totalLen, err := fdWriteVec(c.fd, data, c.SendMaxQueueSize(), truncate)
   165  	if n < totalLen && err == nil {
   166  		// The host only returns a short write if it would otherwise
   167  		// block (and only for stream sockets).
   168  		err = linuxerr.EAGAIN
   169  	}
   170  	if n > 0 && !linuxerr.Equals(linuxerr.EAGAIN, err) {
   171  		// The caller may need to block to send more data, but
   172  		// otherwise there isn't anything that can be done about an
   173  		// error with a partial write.
   174  		err = nil
   175  	}
   176  
   177  	// There is no need for the callee to call SendNotify because fdWriteVec
   178  	// uses the host's sendmsg(2) and the host kernel's queue.
   179  	return n, false, syserr.FromError(err)
   180  }
   181  
   182  // SendNotify implements ConnectedEndpoint.SendNotify.
   183  func (c *HostConnectedEndpoint) SendNotify() {}
   184  
   185  // CloseSend implements ConnectedEndpoint.CloseSend.
   186  func (c *HostConnectedEndpoint) CloseSend() {
   187  	c.mu.Lock()
   188  	defer c.mu.Unlock()
   189  
   190  	if err := unix.Shutdown(c.fd, unix.SHUT_WR); err != nil {
   191  		// A well-formed UDS shutdown can't fail. See
   192  		// net/unix/af_unix.c:unix_shutdown.
   193  		panic(fmt.Sprintf("failed write shutdown on host socket %+v: %v", c, err))
   194  	}
   195  }
   196  
   197  // CloseNotify implements ConnectedEndpoint.CloseNotify.
   198  func (c *HostConnectedEndpoint) CloseNotify() {}
   199  
   200  // Writable implements ConnectedEndpoint.Writable.
   201  func (c *HostConnectedEndpoint) Writable() bool {
   202  	c.mu.RLock()
   203  	defer c.mu.RUnlock()
   204  
   205  	return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.WritableEvents)&waiter.WritableEvents != 0
   206  }
   207  
   208  // Passcred implements ConnectedEndpoint.Passcred.
   209  func (c *HostConnectedEndpoint) Passcred() bool {
   210  	// We don't support credential passing for host sockets.
   211  	return false
   212  }
   213  
   214  // GetLocalAddress implements ConnectedEndpoint.GetLocalAddress.
   215  func (c *HostConnectedEndpoint) GetLocalAddress() (Address, tcpip.Error) {
   216  	return Address{Addr: c.addr}, nil
   217  }
   218  
   219  // EventUpdate implements ConnectedEndpoint.EventUpdate.
   220  func (c *HostConnectedEndpoint) EventUpdate() error {
   221  	c.mu.RLock()
   222  	defer c.mu.RUnlock()
   223  	if c.fd != -1 {
   224  		if err := fdnotifier.UpdateFD(int32(c.fd)); err != nil {
   225  			return err
   226  		}
   227  	}
   228  	return nil
   229  }
   230  
   231  // Recv implements Receiver.Recv.
   232  func (c *HostConnectedEndpoint) Recv(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, Address, bool, *syserr.Error) {
   233  	c.mu.RLock()
   234  	defer c.mu.RUnlock()
   235  
   236  	var cm unet.ControlMessage
   237  	if numRights > 0 {
   238  		cm.EnableFDs(int(numRights))
   239  	}
   240  
   241  	// N.B. Unix sockets don't have a receive buffer, the send buffer
   242  	// serves both purposes.
   243  	rl, ml, cl, cTrunc, err := fdReadVec(c.fd, data, []byte(cm), peek, c.RecvMaxQueueSize())
   244  	if rl > 0 && err != nil {
   245  		// We got some data, so all we need to do on error is return
   246  		// the data that we got. Short reads are fine, no need to
   247  		// block.
   248  		err = nil
   249  	}
   250  	if err != nil {
   251  		return 0, 0, ControlMessages{}, false, Address{}, false, syserr.FromError(err)
   252  	}
   253  
   254  	// There is no need for the callee to call RecvNotify because fdReadVec uses
   255  	// the host's recvmsg(2) and the host kernel's queue.
   256  
   257  	// Trim the control data if we received less than the full amount.
   258  	if cl < uint64(len(cm)) {
   259  		cm = cm[:cl]
   260  	}
   261  
   262  	// Avoid extra allocations in the case where there isn't any control data.
   263  	if len(cm) == 0 {
   264  		return rl, ml, ControlMessages{}, cTrunc, Address{Addr: c.addr}, false, nil
   265  	}
   266  
   267  	fds, err := cm.ExtractFDs()
   268  	if err != nil {
   269  		return 0, 0, ControlMessages{}, false, Address{}, false, syserr.FromError(err)
   270  	}
   271  
   272  	if len(fds) == 0 {
   273  		return rl, ml, ControlMessages{}, cTrunc, Address{Addr: c.addr}, false, nil
   274  	}
   275  	return rl, ml, ControlMessages{Rights: &SCMRights{fds}}, cTrunc, Address{Addr: c.addr}, false, nil
   276  }
   277  
   278  // RecvNotify implements Receiver.RecvNotify.
   279  func (c *HostConnectedEndpoint) RecvNotify() {}
   280  
   281  // CloseRecv implements Receiver.CloseRecv.
   282  func (c *HostConnectedEndpoint) CloseRecv() {
   283  	c.mu.Lock()
   284  	defer c.mu.Unlock()
   285  
   286  	if err := unix.Shutdown(c.fd, unix.SHUT_RD); err != nil {
   287  		// A well-formed UDS shutdown can't fail. See
   288  		// net/unix/af_unix.c:unix_shutdown.
   289  		panic(fmt.Sprintf("failed read shutdown on host socket %+v: %v", c, err))
   290  	}
   291  }
   292  
   293  // Readable implements Receiver.Readable.
   294  func (c *HostConnectedEndpoint) Readable() bool {
   295  	c.mu.RLock()
   296  	defer c.mu.RUnlock()
   297  
   298  	return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.ReadableEvents)&waiter.ReadableEvents != 0
   299  }
   300  
   301  // SendQueuedSize implements Receiver.SendQueuedSize.
   302  func (c *HostConnectedEndpoint) SendQueuedSize() int64 {
   303  	// TODO(gvisor.dev/issue/273): SendQueuedSize isn't supported for host
   304  	// sockets because we don't allow the sentry to call ioctl(2).
   305  	return -1
   306  }
   307  
   308  // RecvQueuedSize implements Receiver.RecvQueuedSize.
   309  func (c *HostConnectedEndpoint) RecvQueuedSize() int64 {
   310  	// TODO(gvisor.dev/issue/273): RecvQueuedSize isn't supported for host
   311  	// sockets because we don't allow the sentry to call ioctl(2).
   312  	return -1
   313  }
   314  
   315  // SendMaxQueueSize implements Receiver.SendMaxQueueSize.
   316  func (c *HostConnectedEndpoint) SendMaxQueueSize() int64 {
   317  	return c.sndbuf.Load()
   318  }
   319  
   320  // RecvMaxQueueSize implements Receiver.RecvMaxQueueSize.
   321  func (c *HostConnectedEndpoint) RecvMaxQueueSize() int64 {
   322  	// N.B. Unix sockets don't use the receive buffer. We'll claim it is
   323  	// the same size as the send buffer.
   324  	return c.sndbuf.Load()
   325  }
   326  
   327  func (c *HostConnectedEndpoint) destroyLocked() {
   328  	c.fd = -1
   329  }
   330  
   331  // Release implements ConnectedEndpoint.Release and Receiver.Release.
   332  func (c *HostConnectedEndpoint) Release(ctx context.Context) {
   333  	c.DecRef(func() {
   334  		c.mu.Lock()
   335  		c.destroyLocked()
   336  		c.mu.Unlock()
   337  	})
   338  }
   339  
   340  // CloseUnread implements ConnectedEndpoint.CloseUnread.
   341  func (c *HostConnectedEndpoint) CloseUnread() {}
   342  
   343  // SetSendBufferSize implements ConnectedEndpoint.SetSendBufferSize.
   344  func (c *HostConnectedEndpoint) SetSendBufferSize(v int64) (newSz int64) {
   345  	// gVisor does not permit setting of SO_SNDBUF for host backed unix
   346  	// domain sockets.
   347  	return c.sndbuf.Load()
   348  }
   349  
   350  // SetReceiveBufferSize implements ConnectedEndpoint.SetReceiveBufferSize.
   351  func (c *HostConnectedEndpoint) SetReceiveBufferSize(v int64) (newSz int64) {
   352  	// gVisor does not permit setting of SO_RCVBUF for host backed unix
   353  	// domain sockets. Receive buffer does not have any effect for unix
   354  	// sockets and we claim to be the same as send buffer.
   355  	return c.sndbuf.Load()
   356  }
   357  
   358  // SCMConnectedEndpoint represents an endpoint backed by a host fd that was
   359  // passed through a gofer Unix socket. It resembles HostConnectedEndpoint, with the
   360  // following differences:
   361  //   - SCMConnectedEndpoint is not saveable, because the host cannot guarantee
   362  //     the same descriptor number across S/R.
   363  //   - SCMConnectedEndpoint holds ownership of its fd and notification queue.
   364  type SCMConnectedEndpoint struct {
   365  	HostConnectedEndpoint
   366  
   367  	queue *waiter.Queue
   368  }
   369  
   370  // Init will do the initialization required without holding other locks.
   371  func (e *SCMConnectedEndpoint) Init() error {
   372  	return fdnotifier.AddFD(int32(e.fd), e.queue)
   373  }
   374  
   375  // Release implements ConnectedEndpoint.Release and Receiver.Release.
   376  func (e *SCMConnectedEndpoint) Release(ctx context.Context) {
   377  	e.DecRef(func() {
   378  		e.mu.Lock()
   379  		fdnotifier.RemoveFD(int32(e.fd))
   380  		if err := unix.Close(e.fd); err != nil {
   381  			log.Warningf("Failed to close host fd %d: %v", err)
   382  		}
   383  		e.destroyLocked()
   384  		e.mu.Unlock()
   385  	})
   386  }
   387  
   388  // NewSCMEndpoint creates a new SCMConnectedEndpoint backed by a host fd that
   389  // was passed through a Unix socket.
   390  //
   391  // The caller is responsible for calling Init(). Additionaly, Release needs to
   392  // be called twice because ConnectedEndpoint is both a Receiver and
   393  // ConnectedEndpoint.
   394  func NewSCMEndpoint(hostFD int, queue *waiter.Queue, addr string) (*SCMConnectedEndpoint, *syserr.Error) {
   395  	e := SCMConnectedEndpoint{
   396  		HostConnectedEndpoint: HostConnectedEndpoint{
   397  			fd:   hostFD,
   398  			addr: addr,
   399  		},
   400  		queue: queue,
   401  	}
   402  
   403  	if err := e.init(); err != nil {
   404  		return nil, err
   405  	}
   406  
   407  	// e starts off with a single reference. We need two.
   408  	e.IncRef()
   409  	return &e, nil
   410  }