github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/socket/unix/unix_vfs2.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package unix
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    19  	"github.com/SagerNet/gvisor/pkg/context"
    20  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    21  	"github.com/SagerNet/gvisor/pkg/fspath"
    22  	"github.com/SagerNet/gvisor/pkg/hostarch"
    23  	"github.com/SagerNet/gvisor/pkg/marshal"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/sockfs"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/socket"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/socket/control"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/socket/netstack"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    31  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    32  	"github.com/SagerNet/gvisor/pkg/syserr"
    33  	"github.com/SagerNet/gvisor/pkg/syserror"
    34  	"github.com/SagerNet/gvisor/pkg/tcpip"
    35  	"github.com/SagerNet/gvisor/pkg/usermem"
    36  	"github.com/SagerNet/gvisor/pkg/waiter"
    37  )
    38  
    39  // SocketVFS2 implements socket.SocketVFS2 (and by extension,
    40  // vfs.FileDescriptionImpl) for Unix sockets.
    41  //
    42  // +stateify savable
    43  type SocketVFS2 struct {
    44  	vfsfd vfs.FileDescription
    45  	vfs.FileDescriptionDefaultImpl
    46  	vfs.DentryMetadataFileDescriptionImpl
    47  	vfs.LockFD
    48  
    49  	socketVFS2Refs
    50  	socketOpsCommon
    51  }
    52  
    53  var _ = socket.SocketVFS2(&SocketVFS2{})
    54  
    55  // NewSockfsFile creates a new socket file in the global sockfs mount and
    56  // returns a corresponding file description.
    57  func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
    58  	mnt := t.Kernel().SocketMount()
    59  	d := sockfs.NewDentry(t, mnt)
    60  	defer d.DecRef(t)
    61  
    62  	fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &vfs.FileLocks{})
    63  	if err != nil {
    64  		return nil, syserr.FromError(err)
    65  	}
    66  	return fd, nil
    67  }
    68  
    69  // NewFileDescription creates and returns a socket file description
    70  // corresponding to the given mount and dentry.
    71  func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry, locks *vfs.FileLocks) (*vfs.FileDescription, error) {
    72  	// You can create AF_UNIX, SOCK_RAW sockets. They're the same as
    73  	// SOCK_DGRAM and don't require CAP_NET_RAW.
    74  	if stype == linux.SOCK_RAW {
    75  		stype = linux.SOCK_DGRAM
    76  	}
    77  
    78  	sock := &SocketVFS2{
    79  		socketOpsCommon: socketOpsCommon{
    80  			ep:    ep,
    81  			stype: stype,
    82  		},
    83  	}
    84  	sock.InitRefs()
    85  	sock.LockFD.Init(locks)
    86  	vfsfd := &sock.vfsfd
    87  	if err := vfsfd.Init(sock, flags, mnt, d, &vfs.FileDescriptionOptions{
    88  		DenyPRead:         true,
    89  		DenyPWrite:        true,
    90  		UseDentryMetadata: true,
    91  	}); err != nil {
    92  		return nil, err
    93  	}
    94  	return vfsfd, nil
    95  }
    96  
    97  // DecRef implements RefCounter.DecRef.
    98  func (s *SocketVFS2) DecRef(ctx context.Context) {
    99  	s.socketVFS2Refs.DecRef(func() {
   100  		kernel.KernelFromContext(ctx).DeleteSocketVFS2(&s.vfsfd)
   101  		s.ep.Close(ctx)
   102  		if s.abstractNamespace != nil {
   103  			s.abstractNamespace.Remove(s.abstractName, s)
   104  		}
   105  	})
   106  }
   107  
   108  // Release implements vfs.FileDescriptionImpl.Release.
   109  func (s *SocketVFS2) Release(ctx context.Context) {
   110  	// Release only decrements a reference on s because s may be referenced in
   111  	// the abstract socket namespace.
   112  	s.DecRef(ctx)
   113  }
   114  
   115  // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
   116  // a transport.Endpoint.
   117  func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
   118  	return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen)
   119  }
   120  
   121  // blockingAccept implements a blocking version of accept(2), that is, if no
   122  // connections are ready to be accept, it will block until one becomes ready.
   123  func (s *SocketVFS2) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAddress) (transport.Endpoint, *syserr.Error) {
   124  	// Register for notifications.
   125  	e, ch := waiter.NewChannelEntry(nil)
   126  	s.socketOpsCommon.EventRegister(&e, waiter.ReadableEvents)
   127  	defer s.socketOpsCommon.EventUnregister(&e)
   128  
   129  	// Try to accept the connection; if it fails, then wait until we get a
   130  	// notification.
   131  	for {
   132  		if ep, err := s.ep.Accept(peerAddr); err != syserr.ErrWouldBlock {
   133  			return ep, err
   134  		}
   135  
   136  		if err := t.Block(ch); err != nil {
   137  			return nil, syserr.FromError(err)
   138  		}
   139  	}
   140  }
   141  
   142  // Accept implements the linux syscall accept(2) for sockets backed by
   143  // a transport.Endpoint.
   144  func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
   145  	var peerAddr *tcpip.FullAddress
   146  	if peerRequested {
   147  		peerAddr = &tcpip.FullAddress{}
   148  	}
   149  	ep, err := s.ep.Accept(peerAddr)
   150  	if err != nil {
   151  		if err != syserr.ErrWouldBlock || !blocking {
   152  			return 0, nil, 0, err
   153  		}
   154  
   155  		var err *syserr.Error
   156  		ep, err = s.blockingAccept(t, peerAddr)
   157  		if err != nil {
   158  			return 0, nil, 0, err
   159  		}
   160  	}
   161  
   162  	ns, err := NewSockfsFile(t, ep, s.stype)
   163  	if err != nil {
   164  		return 0, nil, 0, err
   165  	}
   166  	defer ns.DecRef(t)
   167  
   168  	if flags&linux.SOCK_NONBLOCK != 0 {
   169  		ns.SetStatusFlags(t, t.Credentials(), linux.SOCK_NONBLOCK)
   170  	}
   171  
   172  	var addr linux.SockAddr
   173  	var addrLen uint32
   174  	if peerAddr != nil {
   175  		addr, addrLen = socket.ConvertAddress(linux.AF_UNIX, *peerAddr)
   176  	}
   177  
   178  	fd, e := t.NewFDFromVFS2(0, ns, kernel.FDFlags{
   179  		CloseOnExec: flags&linux.SOCK_CLOEXEC != 0,
   180  	})
   181  	if e != nil {
   182  		return 0, nil, 0, syserr.FromError(e)
   183  	}
   184  
   185  	t.Kernel().RecordSocketVFS2(ns)
   186  	return fd, addr, addrLen, nil
   187  }
   188  
   189  // Bind implements the linux syscall bind(2) for unix sockets.
   190  func (s *SocketVFS2) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
   191  	p, e := extractPath(sockaddr)
   192  	if e != nil {
   193  		return e
   194  	}
   195  
   196  	bep, ok := s.ep.(transport.BoundEndpoint)
   197  	if !ok {
   198  		// This socket can't be bound.
   199  		return syserr.ErrInvalidArgument
   200  	}
   201  
   202  	return s.ep.Bind(tcpip.FullAddress{Addr: tcpip.Address(p)}, func() *syserr.Error {
   203  		// Is it abstract?
   204  		if p[0] == 0 {
   205  			if t.IsNetworkNamespaced() {
   206  				return syserr.ErrInvalidEndpointState
   207  			}
   208  			asn := t.AbstractSockets()
   209  			name := p[1:]
   210  			if err := asn.Bind(t, name, bep, s); err != nil {
   211  				// syserr.ErrPortInUse corresponds to EADDRINUSE.
   212  				return syserr.ErrPortInUse
   213  			}
   214  			s.abstractName = name
   215  			s.abstractNamespace = asn
   216  		} else {
   217  			path := fspath.Parse(p)
   218  			root := t.FSContext().RootDirectoryVFS2()
   219  			defer root.DecRef(t)
   220  			start := root
   221  			relPath := !path.Absolute
   222  			if relPath {
   223  				start = t.FSContext().WorkingDirectoryVFS2()
   224  				defer start.DecRef(t)
   225  			}
   226  			pop := vfs.PathOperation{
   227  				Root:  root,
   228  				Start: start,
   229  				Path:  path,
   230  			}
   231  			stat, err := s.vfsfd.Stat(t, vfs.StatOptions{Mask: linux.STATX_MODE})
   232  			if err != nil {
   233  				return syserr.FromError(err)
   234  			}
   235  			err = t.Kernel().VFS().MknodAt(t, t.Credentials(), &pop, &vfs.MknodOptions{
   236  				// File permissions correspond to net/unix/af_unix.c:unix_bind.
   237  				Mode:     linux.FileMode(linux.S_IFSOCK | uint(stat.Mode)&^t.FSContext().Umask()),
   238  				Endpoint: bep,
   239  			})
   240  			if linuxerr.Equals(linuxerr.EEXIST, err) {
   241  				return syserr.ErrAddressInUse
   242  			}
   243  			return syserr.FromError(err)
   244  		}
   245  
   246  		return nil
   247  	})
   248  }
   249  
   250  // Ioctl implements vfs.FileDescriptionImpl.
   251  func (s *SocketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
   252  	return netstack.Ioctl(ctx, s.ep, uio, args)
   253  }
   254  
   255  // PRead implements vfs.FileDescriptionImpl.
   256  func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
   257  	return 0, linuxerr.ESPIPE
   258  }
   259  
   260  // Read implements vfs.FileDescriptionImpl.
   261  func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
   262  	// All flags other than RWF_NOWAIT should be ignored.
   263  	// TODO(github.com/SagerNet/issue/2601): Support RWF_NOWAIT.
   264  	if opts.Flags != 0 {
   265  		return 0, syserror.EOPNOTSUPP
   266  	}
   267  
   268  	if dst.NumBytes() == 0 {
   269  		return 0, nil
   270  	}
   271  	r := &EndpointReader{
   272  		Ctx:       ctx,
   273  		Endpoint:  s.ep,
   274  		NumRights: 0,
   275  		Peek:      false,
   276  		From:      nil,
   277  	}
   278  	n, err := dst.CopyOutFrom(ctx, r)
   279  	// Drop control messages.
   280  	r.Control.Release(ctx)
   281  	return n, err
   282  }
   283  
   284  // PWrite implements vfs.FileDescriptionImpl.
   285  func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
   286  	return 0, linuxerr.ESPIPE
   287  }
   288  
   289  // Write implements vfs.FileDescriptionImpl.
   290  func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
   291  	// All flags other than RWF_NOWAIT should be ignored.
   292  	// TODO(github.com/SagerNet/issue/2601): Support RWF_NOWAIT.
   293  	if opts.Flags != 0 {
   294  		return 0, syserror.EOPNOTSUPP
   295  	}
   296  
   297  	t := kernel.TaskFromContext(ctx)
   298  	ctrl := control.New(t, s.ep, nil)
   299  
   300  	if src.NumBytes() == 0 {
   301  		nInt, err := s.ep.SendMsg(ctx, [][]byte{}, ctrl, nil)
   302  		return int64(nInt), err.ToError()
   303  	}
   304  
   305  	return src.CopyInTo(ctx, &EndpointWriter{
   306  		Ctx:      ctx,
   307  		Endpoint: s.ep,
   308  		Control:  ctrl,
   309  		To:       nil,
   310  	})
   311  }
   312  
   313  // Readiness implements waiter.Waitable.Readiness.
   314  func (s *SocketVFS2) Readiness(mask waiter.EventMask) waiter.EventMask {
   315  	return s.socketOpsCommon.Readiness(mask)
   316  }
   317  
   318  // EventRegister implements waiter.Waitable.EventRegister.
   319  func (s *SocketVFS2) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
   320  	s.socketOpsCommon.EventRegister(e, mask)
   321  }
   322  
   323  // EventUnregister implements waiter.Waitable.EventUnregister.
   324  func (s *SocketVFS2) EventUnregister(e *waiter.Entry) {
   325  	s.socketOpsCommon.EventUnregister(e)
   326  }
   327  
   328  // SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by
   329  // a transport.Endpoint.
   330  func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error {
   331  	return netstack.SetSockOpt(t, s, s.ep, level, name, optVal)
   332  }
   333  
   334  // providerVFS2 is a unix domain socket provider for VFS2.
   335  type providerVFS2 struct{}
   336  
   337  func (*providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) {
   338  	// Check arguments.
   339  	if protocol != 0 && protocol != linux.AF_UNIX /* PF_UNIX */ {
   340  		return nil, syserr.ErrProtocolNotSupported
   341  	}
   342  
   343  	// Create the endpoint and socket.
   344  	var ep transport.Endpoint
   345  	switch stype {
   346  	case linux.SOCK_DGRAM, linux.SOCK_RAW:
   347  		ep = transport.NewConnectionless(t)
   348  	case linux.SOCK_SEQPACKET, linux.SOCK_STREAM:
   349  		ep = transport.NewConnectioned(t, stype, t.Kernel())
   350  	default:
   351  		return nil, syserr.ErrInvalidArgument
   352  	}
   353  
   354  	f, err := NewSockfsFile(t, ep, stype)
   355  	if err != nil {
   356  		ep.Close(t)
   357  		return nil, err
   358  	}
   359  	return f, nil
   360  }
   361  
   362  // Pair creates a new pair of AF_UNIX connected sockets.
   363  func (*providerVFS2) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) {
   364  	// Check arguments.
   365  	if protocol != 0 && protocol != linux.AF_UNIX /* PF_UNIX */ {
   366  		return nil, nil, syserr.ErrProtocolNotSupported
   367  	}
   368  
   369  	switch stype {
   370  	case linux.SOCK_STREAM, linux.SOCK_DGRAM, linux.SOCK_SEQPACKET, linux.SOCK_RAW:
   371  		// Ok
   372  	default:
   373  		return nil, nil, syserr.ErrInvalidArgument
   374  	}
   375  
   376  	// Create the endpoints and sockets.
   377  	ep1, ep2 := transport.NewPair(t, stype, t.Kernel())
   378  	s1, err := NewSockfsFile(t, ep1, stype)
   379  	if err != nil {
   380  		ep1.Close(t)
   381  		ep2.Close(t)
   382  		return nil, nil, err
   383  	}
   384  	s2, err := NewSockfsFile(t, ep2, stype)
   385  	if err != nil {
   386  		s1.DecRef(t)
   387  		ep2.Close(t)
   388  		return nil, nil, err
   389  	}
   390  
   391  	return s1, s2, nil
   392  }