github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/vfs2/fd.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs2 16 17 import ( 18 "github.com/SagerNet/gvisor/pkg/abi/linux" 19 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 20 "github.com/SagerNet/gvisor/pkg/sentry/arch" 21 "github.com/SagerNet/gvisor/pkg/sentry/fs/lock" 22 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/tmpfs" 23 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 24 "github.com/SagerNet/gvisor/pkg/sentry/kernel/fasync" 25 "github.com/SagerNet/gvisor/pkg/sentry/kernel/pipe" 26 slinux "github.com/SagerNet/gvisor/pkg/sentry/syscalls/linux" 27 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 28 "github.com/SagerNet/gvisor/pkg/syserror" 29 ) 30 31 // Close implements Linux syscall close(2). 32 func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 33 fd := args[0].Int() 34 35 // Note that Remove provides a reference on the file that we may use to 36 // flush. It is still active until we drop the final reference below 37 // (and other reference-holding operations complete). 38 _, file := t.FDTable().Remove(t, fd) 39 if file == nil { 40 return 0, nil, linuxerr.EBADF 41 } 42 defer file.DecRef(t) 43 44 err := file.OnClose(t) 45 return 0, nil, slinux.HandleIOErrorVFS2(t, false /* partial */, err, syserror.EINTR, "close", file) 46 } 47 48 // Dup implements Linux syscall dup(2). 49 func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 50 fd := args[0].Int() 51 52 file := t.GetFileVFS2(fd) 53 if file == nil { 54 return 0, nil, linuxerr.EBADF 55 } 56 defer file.DecRef(t) 57 58 newFD, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{}) 59 if err != nil { 60 return 0, nil, linuxerr.EMFILE 61 } 62 return uintptr(newFD), nil, nil 63 } 64 65 // Dup2 implements Linux syscall dup2(2). 66 func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 67 oldfd := args[0].Int() 68 newfd := args[1].Int() 69 70 if oldfd == newfd { 71 // As long as oldfd is valid, dup2() does nothing and returns newfd. 72 file := t.GetFileVFS2(oldfd) 73 if file == nil { 74 return 0, nil, linuxerr.EBADF 75 } 76 file.DecRef(t) 77 return uintptr(newfd), nil, nil 78 } 79 80 return dup3(t, oldfd, newfd, 0) 81 } 82 83 // Dup3 implements Linux syscall dup3(2). 84 func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 85 oldfd := args[0].Int() 86 newfd := args[1].Int() 87 flags := args[2].Uint() 88 89 if oldfd == newfd { 90 return 0, nil, linuxerr.EINVAL 91 } 92 93 return dup3(t, oldfd, newfd, flags) 94 } 95 96 func dup3(t *kernel.Task, oldfd, newfd int32, flags uint32) (uintptr, *kernel.SyscallControl, error) { 97 if flags&^linux.O_CLOEXEC != 0 { 98 return 0, nil, linuxerr.EINVAL 99 } 100 101 file := t.GetFileVFS2(oldfd) 102 if file == nil { 103 return 0, nil, linuxerr.EBADF 104 } 105 defer file.DecRef(t) 106 107 err := t.NewFDAtVFS2(newfd, file, kernel.FDFlags{ 108 CloseOnExec: flags&linux.O_CLOEXEC != 0, 109 }) 110 if err != nil { 111 return 0, nil, err 112 } 113 return uintptr(newfd), nil, nil 114 } 115 116 // Fcntl implements linux syscall fcntl(2). 117 func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 118 fd := args[0].Int() 119 cmd := args[1].Int() 120 121 file, flags := t.FDTable().GetVFS2(fd) 122 if file == nil { 123 return 0, nil, linuxerr.EBADF 124 } 125 defer file.DecRef(t) 126 127 if file.StatusFlags()&linux.O_PATH != 0 { 128 switch cmd { 129 case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC, linux.F_GETFD, linux.F_SETFD, linux.F_GETFL: 130 // allowed 131 default: 132 return 0, nil, linuxerr.EBADF 133 } 134 } 135 136 switch cmd { 137 case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: 138 minfd := args[2].Int() 139 fd, err := t.NewFDFromVFS2(minfd, file, kernel.FDFlags{ 140 CloseOnExec: cmd == linux.F_DUPFD_CLOEXEC, 141 }) 142 if err != nil { 143 return 0, nil, err 144 } 145 return uintptr(fd), nil, nil 146 case linux.F_GETFD: 147 return uintptr(flags.ToLinuxFDFlags()), nil, nil 148 case linux.F_SETFD: 149 flags := args[2].Uint() 150 err := t.FDTable().SetFlagsVFS2(t, fd, kernel.FDFlags{ 151 CloseOnExec: flags&linux.FD_CLOEXEC != 0, 152 }) 153 return 0, nil, err 154 case linux.F_GETFL: 155 return uintptr(file.StatusFlags()), nil, nil 156 case linux.F_SETFL: 157 return 0, nil, file.SetStatusFlags(t, t.Credentials(), args[2].Uint()) 158 case linux.F_GETOWN: 159 owner, hasOwner := getAsyncOwner(t, file) 160 if !hasOwner { 161 return 0, nil, nil 162 } 163 if owner.Type == linux.F_OWNER_PGRP { 164 return uintptr(-owner.PID), nil, nil 165 } 166 return uintptr(owner.PID), nil, nil 167 case linux.F_SETOWN: 168 who := args[2].Int() 169 ownerType := int32(linux.F_OWNER_PID) 170 if who < 0 { 171 // Check for overflow before flipping the sign. 172 if who-1 > who { 173 return 0, nil, linuxerr.EINVAL 174 } 175 ownerType = linux.F_OWNER_PGRP 176 who = -who 177 } 178 return 0, nil, setAsyncOwner(t, int(fd), file, ownerType, who) 179 case linux.F_GETOWN_EX: 180 owner, hasOwner := getAsyncOwner(t, file) 181 if !hasOwner { 182 return 0, nil, nil 183 } 184 _, err := owner.CopyOut(t, args[2].Pointer()) 185 return 0, nil, err 186 case linux.F_SETOWN_EX: 187 var owner linux.FOwnerEx 188 _, err := owner.CopyIn(t, args[2].Pointer()) 189 if err != nil { 190 return 0, nil, err 191 } 192 return 0, nil, setAsyncOwner(t, int(fd), file, owner.Type, owner.PID) 193 case linux.F_SETPIPE_SZ: 194 pipefile, ok := file.Impl().(*pipe.VFSPipeFD) 195 if !ok { 196 return 0, nil, linuxerr.EBADF 197 } 198 n, err := pipefile.SetPipeSize(int64(args[2].Int())) 199 if err != nil { 200 return 0, nil, err 201 } 202 return uintptr(n), nil, nil 203 case linux.F_GETPIPE_SZ: 204 pipefile, ok := file.Impl().(*pipe.VFSPipeFD) 205 if !ok { 206 return 0, nil, linuxerr.EBADF 207 } 208 return uintptr(pipefile.PipeSize()), nil, nil 209 case linux.F_GET_SEALS: 210 val, err := tmpfs.GetSeals(file) 211 return uintptr(val), nil, err 212 case linux.F_ADD_SEALS: 213 if !file.IsWritable() { 214 return 0, nil, linuxerr.EPERM 215 } 216 err := tmpfs.AddSeals(file, args[2].Uint()) 217 return 0, nil, err 218 case linux.F_SETLK: 219 return 0, nil, posixLock(t, args, file, false /* blocking */) 220 case linux.F_SETLKW: 221 return 0, nil, posixLock(t, args, file, true /* blocking */) 222 case linux.F_GETLK: 223 return 0, nil, posixTestLock(t, args, file) 224 case linux.F_GETSIG: 225 a := file.AsyncHandler() 226 if a == nil { 227 // Default behavior aka SIGIO. 228 return 0, nil, nil 229 } 230 return uintptr(a.(*fasync.FileAsync).Signal()), nil, nil 231 case linux.F_SETSIG: 232 a := file.SetAsyncHandler(fasync.NewVFS2(int(fd))).(*fasync.FileAsync) 233 return 0, nil, a.SetSignal(linux.Signal(args[2].Int())) 234 default: 235 // Everything else is not yet supported. 236 return 0, nil, linuxerr.EINVAL 237 } 238 } 239 240 func getAsyncOwner(t *kernel.Task, fd *vfs.FileDescription) (ownerEx linux.FOwnerEx, hasOwner bool) { 241 a := fd.AsyncHandler() 242 if a == nil { 243 return linux.FOwnerEx{}, false 244 } 245 246 ot, otg, opg := a.(*fasync.FileAsync).Owner() 247 switch { 248 case ot != nil: 249 return linux.FOwnerEx{ 250 Type: linux.F_OWNER_TID, 251 PID: int32(t.PIDNamespace().IDOfTask(ot)), 252 }, true 253 case otg != nil: 254 return linux.FOwnerEx{ 255 Type: linux.F_OWNER_PID, 256 PID: int32(t.PIDNamespace().IDOfThreadGroup(otg)), 257 }, true 258 case opg != nil: 259 return linux.FOwnerEx{ 260 Type: linux.F_OWNER_PGRP, 261 PID: int32(t.PIDNamespace().IDOfProcessGroup(opg)), 262 }, true 263 default: 264 return linux.FOwnerEx{}, true 265 } 266 } 267 268 func setAsyncOwner(t *kernel.Task, fd int, file *vfs.FileDescription, ownerType, pid int32) error { 269 switch ownerType { 270 case linux.F_OWNER_TID, linux.F_OWNER_PID, linux.F_OWNER_PGRP: 271 // Acceptable type. 272 default: 273 return linuxerr.EINVAL 274 } 275 276 a := file.SetAsyncHandler(fasync.NewVFS2(fd)).(*fasync.FileAsync) 277 if pid == 0 { 278 a.ClearOwner() 279 return nil 280 } 281 282 switch ownerType { 283 case linux.F_OWNER_TID: 284 task := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) 285 if task == nil { 286 return syserror.ESRCH 287 } 288 a.SetOwnerTask(t, task) 289 return nil 290 case linux.F_OWNER_PID: 291 tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(pid)) 292 if tg == nil { 293 return syserror.ESRCH 294 } 295 a.SetOwnerThreadGroup(t, tg) 296 return nil 297 case linux.F_OWNER_PGRP: 298 pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(pid)) 299 if pg == nil { 300 return syserror.ESRCH 301 } 302 a.SetOwnerProcessGroup(t, pg) 303 return nil 304 default: 305 return linuxerr.EINVAL 306 } 307 } 308 309 func posixTestLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription) error { 310 // Copy in the lock request. 311 flockAddr := args[2].Pointer() 312 var flock linux.Flock 313 if _, err := flock.CopyIn(t, flockAddr); err != nil { 314 return err 315 } 316 var typ lock.LockType 317 switch flock.Type { 318 case linux.F_RDLCK: 319 typ = lock.ReadLock 320 case linux.F_WRLCK: 321 typ = lock.WriteLock 322 default: 323 return linuxerr.EINVAL 324 } 325 r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence) 326 if err != nil { 327 return err 328 } 329 330 newFlock, err := file.TestPOSIX(t, t.FDTable(), typ, r) 331 if err != nil { 332 return err 333 } 334 newFlock.PID = translatePID(t.PIDNamespace().Root(), t.PIDNamespace(), newFlock.PID) 335 if _, err = newFlock.CopyOut(t, flockAddr); err != nil { 336 return err 337 } 338 return nil 339 } 340 341 // translatePID translates a pid from one namespace to another. Note that this 342 // may race with task termination/creation, in which case the original task 343 // corresponding to pid may no longer exist. This is used to implement the 344 // F_GETLK fcntl, which has the same potential race in Linux as well (i.e., 345 // there is no synchronization between retrieving the lock PID and translating 346 // it). See fs/locks.c:posix_lock_to_flock. 347 func translatePID(old, new *kernel.PIDNamespace, pid int32) int32 { 348 return int32(new.IDOfTask(old.TaskWithID(kernel.ThreadID(pid)))) 349 } 350 351 func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, blocking bool) error { 352 // Copy in the lock request. 353 flockAddr := args[2].Pointer() 354 var flock linux.Flock 355 if _, err := flock.CopyIn(t, flockAddr); err != nil { 356 return err 357 } 358 359 var blocker lock.Blocker 360 if blocking { 361 blocker = t 362 } 363 364 r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence) 365 if err != nil { 366 return err 367 } 368 369 switch flock.Type { 370 case linux.F_RDLCK: 371 if !file.IsReadable() { 372 return linuxerr.EBADF 373 } 374 return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.ReadLock, r, blocker) 375 376 case linux.F_WRLCK: 377 if !file.IsWritable() { 378 return linuxerr.EBADF 379 } 380 return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.WriteLock, r, blocker) 381 382 case linux.F_UNLCK: 383 return file.UnlockPOSIX(t, t.FDTable(), r) 384 385 default: 386 return linuxerr.EINVAL 387 } 388 } 389 390 // Fadvise64 implements fadvise64(2). 391 // This implementation currently ignores the provided advice. 392 func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 393 fd := args[0].Int() 394 length := args[2].Int64() 395 advice := args[3].Int() 396 397 // Note: offset is allowed to be negative. 398 if length < 0 { 399 return 0, nil, linuxerr.EINVAL 400 } 401 402 file := t.GetFileVFS2(fd) 403 if file == nil { 404 return 0, nil, linuxerr.EBADF 405 } 406 defer file.DecRef(t) 407 408 if file.StatusFlags()&linux.O_PATH != 0 { 409 return 0, nil, linuxerr.EBADF 410 } 411 412 // If the FD refers to a pipe or FIFO, return error. 413 if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe { 414 return 0, nil, linuxerr.ESPIPE 415 } 416 417 switch advice { 418 case linux.POSIX_FADV_NORMAL: 419 case linux.POSIX_FADV_RANDOM: 420 case linux.POSIX_FADV_SEQUENTIAL: 421 case linux.POSIX_FADV_WILLNEED: 422 case linux.POSIX_FADV_DONTNEED: 423 case linux.POSIX_FADV_NOREUSE: 424 default: 425 return 0, nil, linuxerr.EINVAL 426 } 427 428 // Sure, whatever. 429 return 0, nil, nil 430 }