github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/vfs2/fd.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vfs2
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    19  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    20  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    21  	"github.com/SagerNet/gvisor/pkg/sentry/fs/lock"
    22  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/tmpfs"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/fasync"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/pipe"
    26  	slinux "github.com/SagerNet/gvisor/pkg/sentry/syscalls/linux"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    28  	"github.com/SagerNet/gvisor/pkg/syserror"
    29  )
    30  
    31  // Close implements Linux syscall close(2).
    32  func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    33  	fd := args[0].Int()
    34  
    35  	// Note that Remove provides a reference on the file that we may use to
    36  	// flush. It is still active until we drop the final reference below
    37  	// (and other reference-holding operations complete).
    38  	_, file := t.FDTable().Remove(t, fd)
    39  	if file == nil {
    40  		return 0, nil, linuxerr.EBADF
    41  	}
    42  	defer file.DecRef(t)
    43  
    44  	err := file.OnClose(t)
    45  	return 0, nil, slinux.HandleIOErrorVFS2(t, false /* partial */, err, syserror.EINTR, "close", file)
    46  }
    47  
    48  // Dup implements Linux syscall dup(2).
    49  func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    50  	fd := args[0].Int()
    51  
    52  	file := t.GetFileVFS2(fd)
    53  	if file == nil {
    54  		return 0, nil, linuxerr.EBADF
    55  	}
    56  	defer file.DecRef(t)
    57  
    58  	newFD, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{})
    59  	if err != nil {
    60  		return 0, nil, linuxerr.EMFILE
    61  	}
    62  	return uintptr(newFD), nil, nil
    63  }
    64  
    65  // Dup2 implements Linux syscall dup2(2).
    66  func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    67  	oldfd := args[0].Int()
    68  	newfd := args[1].Int()
    69  
    70  	if oldfd == newfd {
    71  		// As long as oldfd is valid, dup2() does nothing and returns newfd.
    72  		file := t.GetFileVFS2(oldfd)
    73  		if file == nil {
    74  			return 0, nil, linuxerr.EBADF
    75  		}
    76  		file.DecRef(t)
    77  		return uintptr(newfd), nil, nil
    78  	}
    79  
    80  	return dup3(t, oldfd, newfd, 0)
    81  }
    82  
    83  // Dup3 implements Linux syscall dup3(2).
    84  func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    85  	oldfd := args[0].Int()
    86  	newfd := args[1].Int()
    87  	flags := args[2].Uint()
    88  
    89  	if oldfd == newfd {
    90  		return 0, nil, linuxerr.EINVAL
    91  	}
    92  
    93  	return dup3(t, oldfd, newfd, flags)
    94  }
    95  
    96  func dup3(t *kernel.Task, oldfd, newfd int32, flags uint32) (uintptr, *kernel.SyscallControl, error) {
    97  	if flags&^linux.O_CLOEXEC != 0 {
    98  		return 0, nil, linuxerr.EINVAL
    99  	}
   100  
   101  	file := t.GetFileVFS2(oldfd)
   102  	if file == nil {
   103  		return 0, nil, linuxerr.EBADF
   104  	}
   105  	defer file.DecRef(t)
   106  
   107  	err := t.NewFDAtVFS2(newfd, file, kernel.FDFlags{
   108  		CloseOnExec: flags&linux.O_CLOEXEC != 0,
   109  	})
   110  	if err != nil {
   111  		return 0, nil, err
   112  	}
   113  	return uintptr(newfd), nil, nil
   114  }
   115  
   116  // Fcntl implements linux syscall fcntl(2).
   117  func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   118  	fd := args[0].Int()
   119  	cmd := args[1].Int()
   120  
   121  	file, flags := t.FDTable().GetVFS2(fd)
   122  	if file == nil {
   123  		return 0, nil, linuxerr.EBADF
   124  	}
   125  	defer file.DecRef(t)
   126  
   127  	if file.StatusFlags()&linux.O_PATH != 0 {
   128  		switch cmd {
   129  		case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC, linux.F_GETFD, linux.F_SETFD, linux.F_GETFL:
   130  			// allowed
   131  		default:
   132  			return 0, nil, linuxerr.EBADF
   133  		}
   134  	}
   135  
   136  	switch cmd {
   137  	case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC:
   138  		minfd := args[2].Int()
   139  		fd, err := t.NewFDFromVFS2(minfd, file, kernel.FDFlags{
   140  			CloseOnExec: cmd == linux.F_DUPFD_CLOEXEC,
   141  		})
   142  		if err != nil {
   143  			return 0, nil, err
   144  		}
   145  		return uintptr(fd), nil, nil
   146  	case linux.F_GETFD:
   147  		return uintptr(flags.ToLinuxFDFlags()), nil, nil
   148  	case linux.F_SETFD:
   149  		flags := args[2].Uint()
   150  		err := t.FDTable().SetFlagsVFS2(t, fd, kernel.FDFlags{
   151  			CloseOnExec: flags&linux.FD_CLOEXEC != 0,
   152  		})
   153  		return 0, nil, err
   154  	case linux.F_GETFL:
   155  		return uintptr(file.StatusFlags()), nil, nil
   156  	case linux.F_SETFL:
   157  		return 0, nil, file.SetStatusFlags(t, t.Credentials(), args[2].Uint())
   158  	case linux.F_GETOWN:
   159  		owner, hasOwner := getAsyncOwner(t, file)
   160  		if !hasOwner {
   161  			return 0, nil, nil
   162  		}
   163  		if owner.Type == linux.F_OWNER_PGRP {
   164  			return uintptr(-owner.PID), nil, nil
   165  		}
   166  		return uintptr(owner.PID), nil, nil
   167  	case linux.F_SETOWN:
   168  		who := args[2].Int()
   169  		ownerType := int32(linux.F_OWNER_PID)
   170  		if who < 0 {
   171  			// Check for overflow before flipping the sign.
   172  			if who-1 > who {
   173  				return 0, nil, linuxerr.EINVAL
   174  			}
   175  			ownerType = linux.F_OWNER_PGRP
   176  			who = -who
   177  		}
   178  		return 0, nil, setAsyncOwner(t, int(fd), file, ownerType, who)
   179  	case linux.F_GETOWN_EX:
   180  		owner, hasOwner := getAsyncOwner(t, file)
   181  		if !hasOwner {
   182  			return 0, nil, nil
   183  		}
   184  		_, err := owner.CopyOut(t, args[2].Pointer())
   185  		return 0, nil, err
   186  	case linux.F_SETOWN_EX:
   187  		var owner linux.FOwnerEx
   188  		_, err := owner.CopyIn(t, args[2].Pointer())
   189  		if err != nil {
   190  			return 0, nil, err
   191  		}
   192  		return 0, nil, setAsyncOwner(t, int(fd), file, owner.Type, owner.PID)
   193  	case linux.F_SETPIPE_SZ:
   194  		pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
   195  		if !ok {
   196  			return 0, nil, linuxerr.EBADF
   197  		}
   198  		n, err := pipefile.SetPipeSize(int64(args[2].Int()))
   199  		if err != nil {
   200  			return 0, nil, err
   201  		}
   202  		return uintptr(n), nil, nil
   203  	case linux.F_GETPIPE_SZ:
   204  		pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
   205  		if !ok {
   206  			return 0, nil, linuxerr.EBADF
   207  		}
   208  		return uintptr(pipefile.PipeSize()), nil, nil
   209  	case linux.F_GET_SEALS:
   210  		val, err := tmpfs.GetSeals(file)
   211  		return uintptr(val), nil, err
   212  	case linux.F_ADD_SEALS:
   213  		if !file.IsWritable() {
   214  			return 0, nil, linuxerr.EPERM
   215  		}
   216  		err := tmpfs.AddSeals(file, args[2].Uint())
   217  		return 0, nil, err
   218  	case linux.F_SETLK:
   219  		return 0, nil, posixLock(t, args, file, false /* blocking */)
   220  	case linux.F_SETLKW:
   221  		return 0, nil, posixLock(t, args, file, true /* blocking */)
   222  	case linux.F_GETLK:
   223  		return 0, nil, posixTestLock(t, args, file)
   224  	case linux.F_GETSIG:
   225  		a := file.AsyncHandler()
   226  		if a == nil {
   227  			// Default behavior aka SIGIO.
   228  			return 0, nil, nil
   229  		}
   230  		return uintptr(a.(*fasync.FileAsync).Signal()), nil, nil
   231  	case linux.F_SETSIG:
   232  		a := file.SetAsyncHandler(fasync.NewVFS2(int(fd))).(*fasync.FileAsync)
   233  		return 0, nil, a.SetSignal(linux.Signal(args[2].Int()))
   234  	default:
   235  		// Everything else is not yet supported.
   236  		return 0, nil, linuxerr.EINVAL
   237  	}
   238  }
   239  
   240  func getAsyncOwner(t *kernel.Task, fd *vfs.FileDescription) (ownerEx linux.FOwnerEx, hasOwner bool) {
   241  	a := fd.AsyncHandler()
   242  	if a == nil {
   243  		return linux.FOwnerEx{}, false
   244  	}
   245  
   246  	ot, otg, opg := a.(*fasync.FileAsync).Owner()
   247  	switch {
   248  	case ot != nil:
   249  		return linux.FOwnerEx{
   250  			Type: linux.F_OWNER_TID,
   251  			PID:  int32(t.PIDNamespace().IDOfTask(ot)),
   252  		}, true
   253  	case otg != nil:
   254  		return linux.FOwnerEx{
   255  			Type: linux.F_OWNER_PID,
   256  			PID:  int32(t.PIDNamespace().IDOfThreadGroup(otg)),
   257  		}, true
   258  	case opg != nil:
   259  		return linux.FOwnerEx{
   260  			Type: linux.F_OWNER_PGRP,
   261  			PID:  int32(t.PIDNamespace().IDOfProcessGroup(opg)),
   262  		}, true
   263  	default:
   264  		return linux.FOwnerEx{}, true
   265  	}
   266  }
   267  
   268  func setAsyncOwner(t *kernel.Task, fd int, file *vfs.FileDescription, ownerType, pid int32) error {
   269  	switch ownerType {
   270  	case linux.F_OWNER_TID, linux.F_OWNER_PID, linux.F_OWNER_PGRP:
   271  		// Acceptable type.
   272  	default:
   273  		return linuxerr.EINVAL
   274  	}
   275  
   276  	a := file.SetAsyncHandler(fasync.NewVFS2(fd)).(*fasync.FileAsync)
   277  	if pid == 0 {
   278  		a.ClearOwner()
   279  		return nil
   280  	}
   281  
   282  	switch ownerType {
   283  	case linux.F_OWNER_TID:
   284  		task := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid))
   285  		if task == nil {
   286  			return syserror.ESRCH
   287  		}
   288  		a.SetOwnerTask(t, task)
   289  		return nil
   290  	case linux.F_OWNER_PID:
   291  		tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(pid))
   292  		if tg == nil {
   293  			return syserror.ESRCH
   294  		}
   295  		a.SetOwnerThreadGroup(t, tg)
   296  		return nil
   297  	case linux.F_OWNER_PGRP:
   298  		pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(pid))
   299  		if pg == nil {
   300  			return syserror.ESRCH
   301  		}
   302  		a.SetOwnerProcessGroup(t, pg)
   303  		return nil
   304  	default:
   305  		return linuxerr.EINVAL
   306  	}
   307  }
   308  
   309  func posixTestLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription) error {
   310  	// Copy in the lock request.
   311  	flockAddr := args[2].Pointer()
   312  	var flock linux.Flock
   313  	if _, err := flock.CopyIn(t, flockAddr); err != nil {
   314  		return err
   315  	}
   316  	var typ lock.LockType
   317  	switch flock.Type {
   318  	case linux.F_RDLCK:
   319  		typ = lock.ReadLock
   320  	case linux.F_WRLCK:
   321  		typ = lock.WriteLock
   322  	default:
   323  		return linuxerr.EINVAL
   324  	}
   325  	r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence)
   326  	if err != nil {
   327  		return err
   328  	}
   329  
   330  	newFlock, err := file.TestPOSIX(t, t.FDTable(), typ, r)
   331  	if err != nil {
   332  		return err
   333  	}
   334  	newFlock.PID = translatePID(t.PIDNamespace().Root(), t.PIDNamespace(), newFlock.PID)
   335  	if _, err = newFlock.CopyOut(t, flockAddr); err != nil {
   336  		return err
   337  	}
   338  	return nil
   339  }
   340  
   341  // translatePID translates a pid from one namespace to another. Note that this
   342  // may race with task termination/creation, in which case the original task
   343  // corresponding to pid may no longer exist. This is used to implement the
   344  // F_GETLK fcntl, which has the same potential race in Linux as well (i.e.,
   345  // there is no synchronization between retrieving the lock PID and translating
   346  // it). See fs/locks.c:posix_lock_to_flock.
   347  func translatePID(old, new *kernel.PIDNamespace, pid int32) int32 {
   348  	return int32(new.IDOfTask(old.TaskWithID(kernel.ThreadID(pid))))
   349  }
   350  
   351  func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, blocking bool) error {
   352  	// Copy in the lock request.
   353  	flockAddr := args[2].Pointer()
   354  	var flock linux.Flock
   355  	if _, err := flock.CopyIn(t, flockAddr); err != nil {
   356  		return err
   357  	}
   358  
   359  	var blocker lock.Blocker
   360  	if blocking {
   361  		blocker = t
   362  	}
   363  
   364  	r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence)
   365  	if err != nil {
   366  		return err
   367  	}
   368  
   369  	switch flock.Type {
   370  	case linux.F_RDLCK:
   371  		if !file.IsReadable() {
   372  			return linuxerr.EBADF
   373  		}
   374  		return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.ReadLock, r, blocker)
   375  
   376  	case linux.F_WRLCK:
   377  		if !file.IsWritable() {
   378  			return linuxerr.EBADF
   379  		}
   380  		return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.WriteLock, r, blocker)
   381  
   382  	case linux.F_UNLCK:
   383  		return file.UnlockPOSIX(t, t.FDTable(), r)
   384  
   385  	default:
   386  		return linuxerr.EINVAL
   387  	}
   388  }
   389  
   390  // Fadvise64 implements fadvise64(2).
   391  // This implementation currently ignores the provided advice.
   392  func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   393  	fd := args[0].Int()
   394  	length := args[2].Int64()
   395  	advice := args[3].Int()
   396  
   397  	// Note: offset is allowed to be negative.
   398  	if length < 0 {
   399  		return 0, nil, linuxerr.EINVAL
   400  	}
   401  
   402  	file := t.GetFileVFS2(fd)
   403  	if file == nil {
   404  		return 0, nil, linuxerr.EBADF
   405  	}
   406  	defer file.DecRef(t)
   407  
   408  	if file.StatusFlags()&linux.O_PATH != 0 {
   409  		return 0, nil, linuxerr.EBADF
   410  	}
   411  
   412  	// If the FD refers to a pipe or FIFO, return error.
   413  	if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe {
   414  		return 0, nil, linuxerr.ESPIPE
   415  	}
   416  
   417  	switch advice {
   418  	case linux.POSIX_FADV_NORMAL:
   419  	case linux.POSIX_FADV_RANDOM:
   420  	case linux.POSIX_FADV_SEQUENTIAL:
   421  	case linux.POSIX_FADV_WILLNEED:
   422  	case linux.POSIX_FADV_DONTNEED:
   423  	case linux.POSIX_FADV_NOREUSE:
   424  	default:
   425  		return 0, nil, linuxerr.EINVAL
   426  	}
   427  
   428  	// Sure, whatever.
   429  	return 0, nil, nil
   430  }