github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/sys_file.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package linux 16 17 import ( 18 "golang.org/x/sys/unix" 19 "github.com/SagerNet/gvisor/pkg/abi/linux" 20 "github.com/SagerNet/gvisor/pkg/context" 21 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 22 "github.com/SagerNet/gvisor/pkg/hostarch" 23 "github.com/SagerNet/gvisor/pkg/marshal/primitive" 24 "github.com/SagerNet/gvisor/pkg/sentry/arch" 25 "github.com/SagerNet/gvisor/pkg/sentry/fs" 26 "github.com/SagerNet/gvisor/pkg/sentry/fs/lock" 27 "github.com/SagerNet/gvisor/pkg/sentry/fs/tmpfs" 28 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 29 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 30 "github.com/SagerNet/gvisor/pkg/sentry/kernel/fasync" 31 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 32 "github.com/SagerNet/gvisor/pkg/sentry/limits" 33 "github.com/SagerNet/gvisor/pkg/syserror" 34 ) 35 36 // fileOpAt performs an operation on the second last component in the path. 37 func fileOpAt(t *kernel.Task, dirFD int32, path string, fn func(root *fs.Dirent, d *fs.Dirent, name string, remainingTraversals uint) error) error { 38 // Extract the last component. 39 dir, name := fs.SplitLast(path) 40 if dir == "/" { 41 // Common case: we are accessing a file in the root. 42 root := t.FSContext().RootDirectory() 43 err := fn(root, root, name, linux.MaxSymlinkTraversals) 44 root.DecRef(t) 45 return err 46 } else if dir == "." && dirFD == linux.AT_FDCWD { 47 // Common case: we are accessing a file relative to the current 48 // working directory; skip the look-up. 49 wd := t.FSContext().WorkingDirectory() 50 root := t.FSContext().RootDirectory() 51 err := fn(root, wd, name, linux.MaxSymlinkTraversals) 52 wd.DecRef(t) 53 root.DecRef(t) 54 return err 55 } 56 57 return fileOpOn(t, dirFD, dir, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, remainingTraversals uint) error { 58 return fn(root, d, name, remainingTraversals) 59 }) 60 } 61 62 // fileOpOn performs an operation on the last entry of the path. 63 func fileOpOn(t *kernel.Task, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent, remainingTraversals uint) error) error { 64 var ( 65 d *fs.Dirent // The file. 66 wd *fs.Dirent // The working directory (if required.) 67 rel *fs.Dirent // The relative directory for search (if required.) 68 f *fs.File // The file corresponding to dirFD (if required.) 69 err error 70 ) 71 72 // Extract the working directory (maybe). 73 if len(path) > 0 && path[0] == '/' { 74 // Absolute path; rel can be nil. 75 } else if dirFD == linux.AT_FDCWD { 76 // Need to reference the working directory. 77 wd = t.FSContext().WorkingDirectory() 78 rel = wd 79 } else { 80 // Need to extract the given FD. 81 f = t.GetFile(dirFD) 82 if f == nil { 83 return linuxerr.EBADF 84 } 85 rel = f.Dirent 86 if !fs.IsDir(rel.Inode.StableAttr) { 87 f.DecRef(t) 88 return syserror.ENOTDIR 89 } 90 } 91 92 // Grab the root (always required.) 93 root := t.FSContext().RootDirectory() 94 95 // Lookup the node. 96 remainingTraversals := uint(linux.MaxSymlinkTraversals) 97 if resolve { 98 d, err = t.MountNamespace().FindInode(t, root, rel, path, &remainingTraversals) 99 } else { 100 d, err = t.MountNamespace().FindLink(t, root, rel, path, &remainingTraversals) 101 } 102 root.DecRef(t) 103 if wd != nil { 104 wd.DecRef(t) 105 } 106 if f != nil { 107 f.DecRef(t) 108 } 109 if err != nil { 110 return err 111 } 112 113 err = fn(root, d, remainingTraversals) 114 d.DecRef(t) 115 return err 116 } 117 118 // copyInPath copies a path in. 119 func copyInPath(t *kernel.Task, addr hostarch.Addr, allowEmpty bool) (path string, dirPath bool, err error) { 120 path, err = t.CopyInString(addr, linux.PATH_MAX) 121 if err != nil { 122 return "", false, err 123 } 124 if path == "" && !allowEmpty { 125 return "", false, syserror.ENOENT 126 } 127 128 // If the path ends with a /, then checks must be enforced in various 129 // ways in the different callers. We pass this back to the caller. 130 path, dirPath = fs.TrimTrailingSlashes(path) 131 132 return path, dirPath, nil 133 } 134 135 // LINT.IfChange 136 137 func openAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint) (fd uintptr, err error) { 138 path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) 139 if err != nil { 140 return 0, err 141 } 142 143 resolve := flags&linux.O_NOFOLLOW == 0 144 err = fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { 145 // First check a few things about the filesystem before trying to get the file 146 // reference. 147 // 148 // It's required that Check does not try to open files not that aren't backed by 149 // this dirent (e.g. pipes and sockets) because this would result in opening these 150 // files an extra time just to check permissions. 151 if err := d.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil { 152 return err 153 } 154 155 if fs.IsSymlink(d.Inode.StableAttr) && !resolve { 156 return linuxerr.ELOOP 157 } 158 159 fileFlags := linuxToFlags(flags) 160 // Linux always adds the O_LARGEFILE flag when running in 64-bit mode. 161 fileFlags.LargeFile = true 162 if fs.IsDir(d.Inode.StableAttr) { 163 // Don't allow directories to be opened writable. 164 if fileFlags.Write { 165 return syserror.EISDIR 166 } 167 } else { 168 // If O_DIRECTORY is set, but the file is not a directory, then fail. 169 if fileFlags.Directory { 170 return syserror.ENOTDIR 171 } 172 // If it's a directory, then make sure. 173 if dirPath { 174 return syserror.ENOTDIR 175 } 176 } 177 178 file, err := d.Inode.GetFile(t, d, fileFlags) 179 if err != nil { 180 return syserror.ConvertIntr(err, syserror.ERESTARTSYS) 181 } 182 defer file.DecRef(t) 183 184 // Truncate is called when O_TRUNC is specified for any kind of 185 // existing Dirent. Behavior is delegated to the entry's Truncate 186 // implementation. 187 if flags&linux.O_TRUNC != 0 { 188 if err := d.Inode.Truncate(t, d, 0); err != nil { 189 return err 190 } 191 } 192 193 // Success. 194 newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{ 195 CloseOnExec: flags&linux.O_CLOEXEC != 0, 196 }) 197 if err != nil { 198 return err 199 } 200 201 // Set return result in frame. 202 fd = uintptr(newFD) 203 204 // Generate notification for opened file. 205 d.InotifyEvent(linux.IN_OPEN, 0) 206 207 return nil 208 }) 209 return fd, err // Use result in frame. 210 } 211 212 func mknodAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMode) error { 213 path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) 214 if err != nil { 215 return err 216 } 217 if dirPath { 218 return syserror.ENOENT 219 } 220 221 return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { 222 if !fs.IsDir(d.Inode.StableAttr) { 223 return syserror.ENOTDIR 224 } 225 226 // Do we have the appropriate permissions on the parent? 227 if err := d.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { 228 return err 229 } 230 231 // Attempt a creation. 232 perms := fs.FilePermsFromMode(mode &^ linux.FileMode(t.FSContext().Umask())) 233 234 switch mode.FileType() { 235 case 0: 236 // "Zero file type is equivalent to type S_IFREG." - mknod(2) 237 fallthrough 238 case linux.ModeRegular: 239 // We are not going to return the file, so the actual 240 // flags used don't matter, but they cannot be empty or 241 // Create will complain. 242 flags := fs.FileFlags{Read: true, Write: true} 243 file, err := d.Create(t, root, name, flags, perms) 244 if err != nil { 245 return err 246 } 247 file.DecRef(t) 248 return nil 249 250 case linux.ModeNamedPipe: 251 return d.CreateFifo(t, root, name, perms) 252 253 case linux.ModeSocket: 254 // While it is possible create a unix domain socket file on linux 255 // using mknod(2), in practice this is pretty useless from an 256 // application. Linux internally uses mknod() to create the socket 257 // node during bind(2), but we implement bind(2) independently. If 258 // an application explicitly creates a socket node using mknod(), 259 // you can't seem to bind() or connect() to the resulting socket. 260 // 261 // Instead of emulating this seemingly useless behaviour, we'll 262 // indicate that the filesystem doesn't support the creation of 263 // sockets. 264 return syserror.EOPNOTSUPP 265 266 case linux.ModeCharacterDevice: 267 fallthrough 268 case linux.ModeBlockDevice: 269 // TODO(b/72101894): We don't support creating block or character 270 // devices at the moment. 271 // 272 // When we start supporting block and character devices, we'll 273 // need to check for CAP_MKNOD here. 274 return linuxerr.EPERM 275 276 default: 277 // "EINVAL - mode requested creation of something other than a 278 // regular file, device special file, FIFO or socket." - mknod(2) 279 return linuxerr.EINVAL 280 } 281 }) 282 } 283 284 // Mknod implements the linux syscall mknod(2). 285 func Mknod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 286 path := args[0].Pointer() 287 mode := linux.FileMode(args[1].ModeT()) 288 // We don't need this argument until we support creation of device nodes. 289 _ = args[2].Uint() // dev 290 291 return 0, nil, mknodAt(t, linux.AT_FDCWD, path, mode) 292 } 293 294 // Mknodat implements the linux syscall mknodat(2). 295 func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 296 dirFD := args[0].Int() 297 path := args[1].Pointer() 298 mode := linux.FileMode(args[2].ModeT()) 299 // We don't need this argument until we support creation of device nodes. 300 _ = args[3].Uint() // dev 301 302 return 0, nil, mknodAt(t, dirFD, path, mode) 303 } 304 305 func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) { 306 path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) 307 if err != nil { 308 return 0, err 309 } 310 if dirPath { 311 return 0, syserror.ENOENT 312 } 313 314 fileFlags := linuxToFlags(flags) 315 // Linux always adds the O_LARGEFILE flag when running in 64-bit mode. 316 fileFlags.LargeFile = true 317 318 err = fileOpAt(t, dirFD, path, func(root *fs.Dirent, parent *fs.Dirent, name string, remainingTraversals uint) error { 319 // Resolve the name to see if it exists, and follow any 320 // symlinks along the way. We must do the symlink resolution 321 // manually because if the symlink target does not exist, we 322 // must create the target (and not the symlink itself). 323 var ( 324 found *fs.Dirent 325 err error 326 ) 327 for { 328 if !fs.IsDir(parent.Inode.StableAttr) { 329 return syserror.ENOTDIR 330 } 331 332 // Start by looking up the dirent at 'name'. 333 found, err = t.MountNamespace().FindLink(t, root, parent, name, &remainingTraversals) 334 if err != nil { 335 break 336 } 337 defer found.DecRef(t) 338 339 // We found something (possibly a symlink). If the 340 // O_EXCL flag was passed, then we can immediately 341 // return EEXIST. 342 if flags&linux.O_EXCL != 0 { 343 return syserror.EEXIST 344 } 345 346 // If we have a non-symlink, then we can proceed. 347 if !fs.IsSymlink(found.Inode.StableAttr) { 348 break 349 } 350 351 // If O_NOFOLLOW was passed, then don't try to resolve 352 // anything. 353 if flags&linux.O_NOFOLLOW != 0 { 354 return linuxerr.ELOOP 355 } 356 357 // Try to resolve the symlink directly to a Dirent. 358 var resolved *fs.Dirent 359 resolved, err = found.Inode.Getlink(t) 360 if err == nil { 361 // No more resolution necessary. 362 defer resolved.DecRef(t) 363 break 364 } 365 if err != fs.ErrResolveViaReadlink { 366 return err 367 } 368 369 // Are we able to resolve further? 370 if remainingTraversals == 0 { 371 return unix.ELOOP 372 } 373 374 // Resolve the symlink to a path via Readlink. 375 var path string 376 path, err = found.Inode.Readlink(t) 377 if err != nil { 378 break 379 } 380 remainingTraversals-- 381 382 // Get the new parent from the target path. 383 var newParent *fs.Dirent 384 newParentPath, newName := fs.SplitLast(path) 385 newParent, err = t.MountNamespace().FindInode(t, root, parent, newParentPath, &remainingTraversals) 386 if err != nil { 387 break 388 } 389 defer newParent.DecRef(t) 390 391 // Repeat the process with the parent and name of the 392 // symlink target. 393 parent = newParent 394 name = newName 395 } 396 397 var newFile *fs.File 398 switch { 399 case err == nil: 400 // Like sys_open, check for a few things about the 401 // filesystem before trying to get a reference to the 402 // fs.File. The same constraints on Check apply. 403 if err := found.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil { 404 return err 405 } 406 407 // Truncate is called when O_TRUNC is specified for any kind of 408 // existing Dirent. Behavior is delegated to the entry's Truncate 409 // implementation. 410 if flags&linux.O_TRUNC != 0 { 411 if err := found.Inode.Truncate(t, found, 0); err != nil { 412 return err 413 } 414 } 415 416 // Create a new fs.File. 417 newFile, err = found.Inode.GetFile(t, found, fileFlags) 418 if err != nil { 419 return syserror.ConvertIntr(err, syserror.ERESTARTSYS) 420 } 421 defer newFile.DecRef(t) 422 case linuxerr.Equals(linuxerr.ENOENT, err): 423 // File does not exist. Proceed with creation. 424 425 // Do we have write permissions on the parent? 426 if err := parent.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { 427 return err 428 } 429 430 // Attempt a creation. 431 perms := fs.FilePermsFromMode(mode &^ linux.FileMode(t.FSContext().Umask())) 432 newFile, err = parent.Create(t, root, name, fileFlags, perms) 433 if err != nil { 434 // No luck, bail. 435 return err 436 } 437 defer newFile.DecRef(t) 438 found = newFile.Dirent 439 default: 440 return err 441 } 442 443 // Success. 444 newFD, err := t.NewFDFrom(0, newFile, kernel.FDFlags{ 445 CloseOnExec: flags&linux.O_CLOEXEC != 0, 446 }) 447 if err != nil { 448 return err 449 } 450 451 // Set result in frame. 452 fd = uintptr(newFD) 453 454 // Queue the open inotify event. The creation event is 455 // automatically queued when the dirent is found. The open 456 // events are implemented at the syscall layer so we need to 457 // manually queue one here. 458 found.InotifyEvent(linux.IN_OPEN, 0) 459 460 return nil 461 }) 462 return fd, err // Use result in frame. 463 } 464 465 // Open implements linux syscall open(2). 466 func Open(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 467 addr := args[0].Pointer() 468 flags := uint(args[1].Uint()) 469 if flags&linux.O_CREAT != 0 { 470 mode := linux.FileMode(args[2].ModeT()) 471 n, err := createAt(t, linux.AT_FDCWD, addr, flags, mode) 472 return n, nil, err 473 } 474 n, err := openAt(t, linux.AT_FDCWD, addr, flags) 475 return n, nil, err 476 } 477 478 // Openat implements linux syscall openat(2). 479 func Openat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 480 dirFD := args[0].Int() 481 addr := args[1].Pointer() 482 flags := uint(args[2].Uint()) 483 if flags&linux.O_CREAT != 0 { 484 mode := linux.FileMode(args[3].ModeT()) 485 n, err := createAt(t, dirFD, addr, flags, mode) 486 return n, nil, err 487 } 488 n, err := openAt(t, dirFD, addr, flags) 489 return n, nil, err 490 } 491 492 // Creat implements linux syscall creat(2). 493 func Creat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 494 addr := args[0].Pointer() 495 mode := linux.FileMode(args[1].ModeT()) 496 n, err := createAt(t, linux.AT_FDCWD, addr, linux.O_WRONLY|linux.O_TRUNC, mode) 497 return n, nil, err 498 } 499 500 // accessContext is a context that overrides the credentials used, but 501 // otherwise carries the same values as the embedded context. 502 // 503 // accessContext should only be used for access(2). 504 type accessContext struct { 505 context.Context 506 creds *auth.Credentials 507 } 508 509 // Value implements context.Context. 510 func (ac accessContext) Value(key interface{}) interface{} { 511 switch key { 512 case auth.CtxCredentials: 513 return ac.creds 514 default: 515 return ac.Context.Value(key) 516 } 517 } 518 519 func accessAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode uint) error { 520 const rOK = 4 521 const wOK = 2 522 const xOK = 1 523 524 path, _, err := copyInPath(t, addr, false /* allowEmpty */) 525 if err != nil { 526 return err 527 } 528 529 // Sanity check the mode. 530 if mode&^(rOK|wOK|xOK) != 0 { 531 return linuxerr.EINVAL 532 } 533 534 return fileOpOn(t, dirFD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { 535 // access(2) and faccessat(2) check permissions using real 536 // UID/GID, not effective UID/GID. 537 // 538 // "access() needs to use the real uid/gid, not the effective 539 // uid/gid. We do this by temporarily clearing all FS-related 540 // capabilities and switching the fsuid/fsgid around to the 541 // real ones." -fs/open.c:faccessat 542 creds := t.Credentials().Fork() 543 creds.EffectiveKUID = creds.RealKUID 544 creds.EffectiveKGID = creds.RealKGID 545 if creds.EffectiveKUID.In(creds.UserNamespace) == auth.RootUID { 546 creds.EffectiveCaps = creds.PermittedCaps 547 } else { 548 creds.EffectiveCaps = 0 549 } 550 551 ctx := &accessContext{ 552 Context: t, 553 creds: creds, 554 } 555 556 return d.Inode.CheckPermission(ctx, fs.PermMask{ 557 Read: mode&rOK != 0, 558 Write: mode&wOK != 0, 559 Execute: mode&xOK != 0, 560 }) 561 }) 562 } 563 564 // Access implements linux syscall access(2). 565 func Access(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 566 addr := args[0].Pointer() 567 mode := args[1].ModeT() 568 569 return 0, nil, accessAt(t, linux.AT_FDCWD, addr, mode) 570 } 571 572 // Faccessat implements linux syscall faccessat(2). 573 // 574 // Note that the faccessat() system call does not take a flags argument: 575 // "The raw faccessat() system call takes only the first three arguments. The 576 // AT_EACCESS and AT_SYMLINK_NOFOLLOW flags are actually implemented within 577 // the glibc wrapper function for faccessat(). If either of these flags is 578 // specified, then the wrapper function employs fstatat(2) to determine access 579 // permissions." - faccessat(2) 580 func Faccessat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 581 dirFD := args[0].Int() 582 addr := args[1].Pointer() 583 mode := args[2].ModeT() 584 585 return 0, nil, accessAt(t, dirFD, addr, mode) 586 } 587 588 // LINT.ThenChange(vfs2/filesystem.go) 589 590 // LINT.IfChange 591 592 // Ioctl implements linux syscall ioctl(2). 593 func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 594 fd := args[0].Int() 595 request := int(args[1].Int()) 596 597 file := t.GetFile(fd) 598 if file == nil { 599 return 0, nil, linuxerr.EBADF 600 } 601 defer file.DecRef(t) 602 603 // Shared flags between file and socket. 604 switch request { 605 case linux.FIONCLEX: 606 t.FDTable().SetFlags(t, fd, kernel.FDFlags{ 607 CloseOnExec: false, 608 }) 609 return 0, nil, nil 610 case linux.FIOCLEX: 611 t.FDTable().SetFlags(t, fd, kernel.FDFlags{ 612 CloseOnExec: true, 613 }) 614 return 0, nil, nil 615 616 case linux.FIONBIO: 617 var set int32 618 if _, err := primitive.CopyInt32In(t, args[2].Pointer(), &set); err != nil { 619 return 0, nil, err 620 } 621 flags := file.Flags() 622 if set != 0 { 623 flags.NonBlocking = true 624 } else { 625 flags.NonBlocking = false 626 } 627 file.SetFlags(flags.Settable()) 628 return 0, nil, nil 629 630 case linux.FIOASYNC: 631 var set int32 632 if _, err := primitive.CopyInt32In(t, args[2].Pointer(), &set); err != nil { 633 return 0, nil, err 634 } 635 flags := file.Flags() 636 if set != 0 { 637 flags.Async = true 638 } else { 639 flags.Async = false 640 } 641 file.SetFlags(flags.Settable()) 642 return 0, nil, nil 643 644 case linux.FIOSETOWN, linux.SIOCSPGRP: 645 var set int32 646 if _, err := primitive.CopyInt32In(t, args[2].Pointer(), &set); err != nil { 647 return 0, nil, err 648 } 649 fSetOwn(t, int(fd), file, set) 650 return 0, nil, nil 651 652 case linux.FIOGETOWN, linux.SIOCGPGRP: 653 _, err := primitive.CopyInt32Out(t, args[2].Pointer(), fGetOwn(t, file)) 654 return 0, nil, err 655 656 default: 657 ret, err := file.FileOperations.Ioctl(t, file, t.MemoryManager(), args) 658 if err != nil { 659 return 0, nil, err 660 } 661 662 return ret, nil, nil 663 } 664 } 665 666 // LINT.ThenChange(vfs2/ioctl.go) 667 668 // LINT.IfChange 669 670 // Getcwd implements the linux syscall getcwd(2). 671 func Getcwd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 672 addr := args[0].Pointer() 673 size := args[1].SizeT() 674 cwd := t.FSContext().WorkingDirectory() 675 defer cwd.DecRef(t) 676 root := t.FSContext().RootDirectory() 677 defer root.DecRef(t) 678 679 // Get our fullname from the root and preprend unreachable if the root was 680 // unreachable from our current dirent this is the same behavior as on linux. 681 s, reachable := cwd.FullName(root) 682 if !reachable { 683 s = "(unreachable)" + s 684 } 685 686 // Note this is >= because we need a terminator. 687 if uint(len(s)) >= size { 688 return 0, nil, syserror.ERANGE 689 } 690 691 // Copy out the path name for the node. 692 bytes, err := t.CopyOutBytes(addr, []byte(s)) 693 if err != nil { 694 return 0, nil, err 695 } 696 697 // Top it off with a terminator. 698 _, err = t.CopyOutBytes(addr+hostarch.Addr(bytes), []byte("\x00")) 699 return uintptr(bytes + 1), nil, err 700 } 701 702 // Chroot implements the linux syscall chroot(2). 703 func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 704 addr := args[0].Pointer() 705 706 if !t.HasCapability(linux.CAP_SYS_CHROOT) { 707 return 0, nil, linuxerr.EPERM 708 } 709 710 path, _, err := copyInPath(t, addr, false /* allowEmpty */) 711 if err != nil { 712 return 0, nil, err 713 } 714 715 return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { 716 // Is it a directory? 717 if !fs.IsDir(d.Inode.StableAttr) { 718 return syserror.ENOTDIR 719 } 720 721 // Does it have execute permissions? 722 if err := d.Inode.CheckPermission(t, fs.PermMask{Execute: true}); err != nil { 723 return err 724 } 725 726 t.FSContext().SetRootDirectory(t, d) 727 return nil 728 }) 729 } 730 731 // Chdir implements the linux syscall chdir(2). 732 func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 733 addr := args[0].Pointer() 734 735 path, _, err := copyInPath(t, addr, false /* allowEmpty */) 736 if err != nil { 737 return 0, nil, err 738 } 739 740 return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { 741 // Is it a directory? 742 if !fs.IsDir(d.Inode.StableAttr) { 743 return syserror.ENOTDIR 744 } 745 746 // Does it have execute permissions? 747 if err := d.Inode.CheckPermission(t, fs.PermMask{Execute: true}); err != nil { 748 return err 749 } 750 751 t.FSContext().SetWorkingDirectory(t, d) 752 return nil 753 }) 754 } 755 756 // Fchdir implements the linux syscall fchdir(2). 757 func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 758 fd := args[0].Int() 759 760 file := t.GetFile(fd) 761 if file == nil { 762 return 0, nil, linuxerr.EBADF 763 } 764 defer file.DecRef(t) 765 766 // Is it a directory? 767 if !fs.IsDir(file.Dirent.Inode.StableAttr) { 768 return 0, nil, syserror.ENOTDIR 769 } 770 771 // Does it have execute permissions? 772 if err := file.Dirent.Inode.CheckPermission(t, fs.PermMask{Execute: true}); err != nil { 773 return 0, nil, err 774 } 775 776 t.FSContext().SetWorkingDirectory(t, file.Dirent) 777 return 0, nil, nil 778 } 779 780 // LINT.ThenChange(vfs2/fscontext.go) 781 782 // LINT.IfChange 783 784 // Close implements linux syscall close(2). 785 func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 786 fd := args[0].Int() 787 788 // Note that Remove provides a reference on the file that we may use to 789 // flush. It is still active until we drop the final reference below 790 // (and other reference-holding operations complete). 791 file, _ := t.FDTable().Remove(t, fd) 792 if file == nil { 793 return 0, nil, linuxerr.EBADF 794 } 795 defer file.DecRef(t) 796 797 err := file.Flush(t) 798 return 0, nil, handleIOError(t, false /* partial */, err, syserror.EINTR, "close", file) 799 } 800 801 // Dup implements linux syscall dup(2). 802 func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 803 fd := args[0].Int() 804 805 file := t.GetFile(fd) 806 if file == nil { 807 return 0, nil, linuxerr.EBADF 808 } 809 defer file.DecRef(t) 810 811 newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{}) 812 if err != nil { 813 return 0, nil, linuxerr.EMFILE 814 } 815 return uintptr(newFD), nil, nil 816 } 817 818 // Dup2 implements linux syscall dup2(2). 819 func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 820 oldfd := args[0].Int() 821 newfd := args[1].Int() 822 823 // If oldfd is a valid file descriptor, and newfd has the same value as oldfd, 824 // then dup2() does nothing, and returns newfd. 825 if oldfd == newfd { 826 oldFile := t.GetFile(oldfd) 827 if oldFile == nil { 828 return 0, nil, linuxerr.EBADF 829 } 830 defer oldFile.DecRef(t) 831 832 return uintptr(newfd), nil, nil 833 } 834 835 // Zero out flags arg to be used by Dup3. 836 args[2].Value = 0 837 return Dup3(t, args) 838 } 839 840 // Dup3 implements linux syscall dup3(2). 841 func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 842 oldfd := args[0].Int() 843 newfd := args[1].Int() 844 flags := args[2].Uint() 845 846 if oldfd == newfd { 847 return 0, nil, linuxerr.EINVAL 848 } 849 850 oldFile := t.GetFile(oldfd) 851 if oldFile == nil { 852 return 0, nil, linuxerr.EBADF 853 } 854 defer oldFile.DecRef(t) 855 856 err := t.NewFDAt(newfd, oldFile, kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0}) 857 if err != nil { 858 return 0, nil, err 859 } 860 861 return uintptr(newfd), nil, nil 862 } 863 864 func fGetOwnEx(t *kernel.Task, file *fs.File) linux.FOwnerEx { 865 ma := file.Async(nil) 866 if ma == nil { 867 return linux.FOwnerEx{} 868 } 869 a := ma.(*fasync.FileAsync) 870 ot, otg, opg := a.Owner() 871 switch { 872 case ot != nil: 873 return linux.FOwnerEx{ 874 Type: linux.F_OWNER_TID, 875 PID: int32(t.PIDNamespace().IDOfTask(ot)), 876 } 877 case otg != nil: 878 return linux.FOwnerEx{ 879 Type: linux.F_OWNER_PID, 880 PID: int32(t.PIDNamespace().IDOfThreadGroup(otg)), 881 } 882 case opg != nil: 883 return linux.FOwnerEx{ 884 Type: linux.F_OWNER_PGRP, 885 PID: int32(t.PIDNamespace().IDOfProcessGroup(opg)), 886 } 887 default: 888 return linux.FOwnerEx{} 889 } 890 } 891 892 func fGetOwn(t *kernel.Task, file *fs.File) int32 { 893 owner := fGetOwnEx(t, file) 894 if owner.Type == linux.F_OWNER_PGRP { 895 return -owner.PID 896 } 897 return owner.PID 898 } 899 900 // fSetOwn sets the file's owner with the semantics of F_SETOWN in Linux. 901 // 902 // If who is positive, it represents a PID. If negative, it represents a PGID. 903 // If the PID or PGID is invalid, the owner is silently unset. 904 func fSetOwn(t *kernel.Task, fd int, file *fs.File, who int32) error { 905 a := file.Async(fasync.New(fd)).(*fasync.FileAsync) 906 if who < 0 { 907 // Check for overflow before flipping the sign. 908 if who-1 > who { 909 return linuxerr.EINVAL 910 } 911 pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(-who)) 912 a.SetOwnerProcessGroup(t, pg) 913 } else { 914 tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(who)) 915 a.SetOwnerThreadGroup(t, tg) 916 } 917 return nil 918 } 919 920 // Fcntl implements linux syscall fcntl(2). 921 func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 922 fd := args[0].Int() 923 cmd := args[1].Int() 924 925 file, flags := t.FDTable().Get(fd) 926 if file == nil { 927 return 0, nil, linuxerr.EBADF 928 } 929 defer file.DecRef(t) 930 931 switch cmd { 932 case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: 933 from := args[2].Int() 934 fd, err := t.NewFDFrom(from, file, kernel.FDFlags{ 935 CloseOnExec: cmd == linux.F_DUPFD_CLOEXEC, 936 }) 937 if err != nil { 938 return 0, nil, err 939 } 940 return uintptr(fd), nil, nil 941 case linux.F_GETFD: 942 return uintptr(flags.ToLinuxFDFlags()), nil, nil 943 case linux.F_SETFD: 944 flags := args[2].Uint() 945 err := t.FDTable().SetFlags(t, fd, kernel.FDFlags{ 946 CloseOnExec: flags&linux.FD_CLOEXEC != 0, 947 }) 948 return 0, nil, err 949 case linux.F_GETFL: 950 return uintptr(file.Flags().ToLinux()), nil, nil 951 case linux.F_SETFL: 952 flags := uint(args[2].Uint()) 953 file.SetFlags(linuxToFlags(flags).Settable()) 954 return 0, nil, nil 955 case linux.F_SETLK, linux.F_SETLKW: 956 // In Linux the file system can choose to provide lock operations for an inode. 957 // Normally pipe and socket types lack lock operations. We diverge and use a heavy 958 // hammer by only allowing locks on files and directories. 959 if !fs.IsFile(file.Dirent.Inode.StableAttr) && !fs.IsDir(file.Dirent.Inode.StableAttr) { 960 return 0, nil, linuxerr.EBADF 961 } 962 963 // Copy in the lock request. 964 flockAddr := args[2].Pointer() 965 var flock linux.Flock 966 if _, err := flock.CopyIn(t, flockAddr); err != nil { 967 return 0, nil, err 968 } 969 970 // Compute the lock whence. 971 var sw fs.SeekWhence 972 switch flock.Whence { 973 case 0: 974 sw = fs.SeekSet 975 case 1: 976 sw = fs.SeekCurrent 977 case 2: 978 sw = fs.SeekEnd 979 default: 980 return 0, nil, linuxerr.EINVAL 981 } 982 983 // Compute the lock offset. 984 var off int64 985 switch sw { 986 case fs.SeekSet: 987 off = 0 988 case fs.SeekCurrent: 989 // Note that Linux does not hold any mutexes while retrieving the file offset, 990 // see fs/locks.c:flock_to_posix_lock and fs/locks.c:fcntl_setlk. 991 off = file.Offset() 992 case fs.SeekEnd: 993 uattr, err := file.Dirent.Inode.UnstableAttr(t) 994 if err != nil { 995 return 0, nil, err 996 } 997 off = uattr.Size 998 default: 999 return 0, nil, linuxerr.EINVAL 1000 } 1001 1002 // Compute the lock range. 1003 rng, err := lock.ComputeRange(flock.Start, flock.Len, off) 1004 if err != nil { 1005 return 0, nil, err 1006 } 1007 1008 // These locks don't block; execute the non-blocking operation using the inode's lock 1009 // context directly. 1010 switch flock.Type { 1011 case linux.F_RDLCK: 1012 if !file.Flags().Read { 1013 return 0, nil, linuxerr.EBADF 1014 } 1015 if cmd == linux.F_SETLK { 1016 // Non-blocking lock, provide a nil lock.Blocker. 1017 if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, nil) { 1018 return 0, nil, linuxerr.EAGAIN 1019 } 1020 } else { 1021 // Blocking lock, pass in the task to satisfy the lock.Blocker interface. 1022 if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, t) { 1023 return 0, nil, syserror.EINTR 1024 } 1025 } 1026 return 0, nil, nil 1027 case linux.F_WRLCK: 1028 if !file.Flags().Write { 1029 return 0, nil, linuxerr.EBADF 1030 } 1031 if cmd == linux.F_SETLK { 1032 // Non-blocking lock, provide a nil lock.Blocker. 1033 if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, nil) { 1034 return 0, nil, linuxerr.EAGAIN 1035 } 1036 } else { 1037 // Blocking lock, pass in the task to satisfy the lock.Blocker interface. 1038 if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, t) { 1039 return 0, nil, syserror.EINTR 1040 } 1041 } 1042 return 0, nil, nil 1043 case linux.F_UNLCK: 1044 file.Dirent.Inode.LockCtx.Posix.UnlockRegion(t.FDTable(), rng) 1045 return 0, nil, nil 1046 default: 1047 return 0, nil, linuxerr.EINVAL 1048 } 1049 case linux.F_GETOWN: 1050 return uintptr(fGetOwn(t, file)), nil, nil 1051 case linux.F_SETOWN: 1052 return 0, nil, fSetOwn(t, int(fd), file, args[2].Int()) 1053 case linux.F_GETOWN_EX: 1054 addr := args[2].Pointer() 1055 owner := fGetOwnEx(t, file) 1056 _, err := owner.CopyOut(t, addr) 1057 return 0, nil, err 1058 case linux.F_SETOWN_EX: 1059 addr := args[2].Pointer() 1060 var owner linux.FOwnerEx 1061 _, err := owner.CopyIn(t, addr) 1062 if err != nil { 1063 return 0, nil, err 1064 } 1065 a := file.Async(fasync.New(int(fd))).(*fasync.FileAsync) 1066 switch owner.Type { 1067 case linux.F_OWNER_TID: 1068 task := t.PIDNamespace().TaskWithID(kernel.ThreadID(owner.PID)) 1069 if task == nil { 1070 return 0, nil, syserror.ESRCH 1071 } 1072 a.SetOwnerTask(t, task) 1073 return 0, nil, nil 1074 case linux.F_OWNER_PID: 1075 tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(owner.PID)) 1076 if tg == nil { 1077 return 0, nil, syserror.ESRCH 1078 } 1079 a.SetOwnerThreadGroup(t, tg) 1080 return 0, nil, nil 1081 case linux.F_OWNER_PGRP: 1082 pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(owner.PID)) 1083 if pg == nil { 1084 return 0, nil, syserror.ESRCH 1085 } 1086 a.SetOwnerProcessGroup(t, pg) 1087 return 0, nil, nil 1088 default: 1089 return 0, nil, linuxerr.EINVAL 1090 } 1091 case linux.F_GET_SEALS: 1092 val, err := tmpfs.GetSeals(file.Dirent.Inode) 1093 return uintptr(val), nil, err 1094 case linux.F_ADD_SEALS: 1095 if !file.Flags().Write { 1096 return 0, nil, linuxerr.EPERM 1097 } 1098 err := tmpfs.AddSeals(file.Dirent.Inode, args[2].Uint()) 1099 return 0, nil, err 1100 case linux.F_GETPIPE_SZ: 1101 sz, ok := file.FileOperations.(fs.FifoSizer) 1102 if !ok { 1103 return 0, nil, linuxerr.EINVAL 1104 } 1105 size, err := sz.FifoSize(t, file) 1106 return uintptr(size), nil, err 1107 case linux.F_SETPIPE_SZ: 1108 sz, ok := file.FileOperations.(fs.FifoSizer) 1109 if !ok { 1110 return 0, nil, linuxerr.EINVAL 1111 } 1112 n, err := sz.SetFifoSize(int64(args[2].Int())) 1113 return uintptr(n), nil, err 1114 case linux.F_GETSIG: 1115 a := file.Async(fasync.New(int(fd))).(*fasync.FileAsync) 1116 return uintptr(a.Signal()), nil, nil 1117 case linux.F_SETSIG: 1118 a := file.Async(fasync.New(int(fd))).(*fasync.FileAsync) 1119 return 0, nil, a.SetSignal(linux.Signal(args[2].Int())) 1120 default: 1121 // Everything else is not yet supported. 1122 return 0, nil, linuxerr.EINVAL 1123 } 1124 } 1125 1126 // Fadvise64 implements linux syscall fadvise64(2). 1127 // This implementation currently ignores the provided advice. 1128 func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1129 fd := args[0].Int() 1130 length := args[2].Int64() 1131 advice := args[3].Int() 1132 1133 // Note: offset is allowed to be negative. 1134 if length < 0 { 1135 return 0, nil, linuxerr.EINVAL 1136 } 1137 1138 file := t.GetFile(fd) 1139 if file == nil { 1140 return 0, nil, linuxerr.EBADF 1141 } 1142 defer file.DecRef(t) 1143 1144 // If the FD refers to a pipe or FIFO, return error. 1145 if fs.IsPipe(file.Dirent.Inode.StableAttr) { 1146 return 0, nil, linuxerr.ESPIPE 1147 } 1148 1149 switch advice { 1150 case linux.POSIX_FADV_NORMAL: 1151 case linux.POSIX_FADV_RANDOM: 1152 case linux.POSIX_FADV_SEQUENTIAL: 1153 case linux.POSIX_FADV_WILLNEED: 1154 case linux.POSIX_FADV_DONTNEED: 1155 case linux.POSIX_FADV_NOREUSE: 1156 default: 1157 return 0, nil, linuxerr.EINVAL 1158 } 1159 1160 // Sure, whatever. 1161 return 0, nil, nil 1162 } 1163 1164 // LINT.ThenChange(vfs2/fd.go) 1165 1166 // LINT.IfChange 1167 1168 func mkdirAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMode) error { 1169 path, _, err := copyInPath(t, addr, false /* allowEmpty */) 1170 if err != nil { 1171 return err 1172 } 1173 1174 return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { 1175 if !fs.IsDir(d.Inode.StableAttr) { 1176 return syserror.ENOTDIR 1177 } 1178 1179 // Does this directory exist already? 1180 remainingTraversals := uint(linux.MaxSymlinkTraversals) 1181 f, err := t.MountNamespace().FindInode(t, root, d, name, &remainingTraversals) 1182 switch { 1183 case err == nil: 1184 // The directory existed. 1185 defer f.DecRef(t) 1186 return syserror.EEXIST 1187 case linuxerr.Equals(linuxerr.EACCES, err): 1188 // Permission denied while walking to the directory. 1189 return err 1190 default: 1191 // Do we have write permissions on the parent? 1192 if err := d.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { 1193 return err 1194 } 1195 1196 // Create the directory. 1197 perms := fs.FilePermsFromMode(mode &^ linux.FileMode(t.FSContext().Umask())) 1198 return d.CreateDirectory(t, root, name, perms) 1199 } 1200 }) 1201 } 1202 1203 // Mkdir implements linux syscall mkdir(2). 1204 func Mkdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1205 addr := args[0].Pointer() 1206 mode := linux.FileMode(args[1].ModeT()) 1207 1208 return 0, nil, mkdirAt(t, linux.AT_FDCWD, addr, mode) 1209 } 1210 1211 // Mkdirat implements linux syscall mkdirat(2). 1212 func Mkdirat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1213 dirFD := args[0].Int() 1214 addr := args[1].Pointer() 1215 mode := linux.FileMode(args[2].ModeT()) 1216 1217 return 0, nil, mkdirAt(t, dirFD, addr, mode) 1218 } 1219 1220 func rmdirAt(t *kernel.Task, dirFD int32, addr hostarch.Addr) error { 1221 path, _, err := copyInPath(t, addr, false /* allowEmpty */) 1222 if err != nil { 1223 return err 1224 } 1225 1226 // Special case: removing the root always returns EBUSY. 1227 if path == "/" { 1228 return linuxerr.EBUSY 1229 } 1230 1231 return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { 1232 if !fs.IsDir(d.Inode.StableAttr) { 1233 return syserror.ENOTDIR 1234 } 1235 1236 // Linux returns different ernos when the path ends in single 1237 // dot vs. double dots. 1238 switch name { 1239 case ".": 1240 return linuxerr.EINVAL 1241 case "..": 1242 return linuxerr.ENOTEMPTY 1243 } 1244 1245 if err := d.MayDelete(t, root, name); err != nil { 1246 return err 1247 } 1248 1249 return d.RemoveDirectory(t, root, name) 1250 }) 1251 } 1252 1253 // Rmdir implements linux syscall rmdir(2). 1254 func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1255 addr := args[0].Pointer() 1256 1257 return 0, nil, rmdirAt(t, linux.AT_FDCWD, addr) 1258 } 1259 1260 func symlinkAt(t *kernel.Task, dirFD int32, newAddr hostarch.Addr, oldAddr hostarch.Addr) error { 1261 newPath, dirPath, err := copyInPath(t, newAddr, false /* allowEmpty */) 1262 if err != nil { 1263 return err 1264 } 1265 if dirPath { 1266 return syserror.ENOENT 1267 } 1268 1269 // The oldPath is copied in verbatim. This is because the symlink 1270 // will include all details, including trailing slashes. 1271 oldPath, err := t.CopyInString(oldAddr, linux.PATH_MAX) 1272 if err != nil { 1273 return err 1274 } 1275 if oldPath == "" { 1276 return syserror.ENOENT 1277 } 1278 1279 return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { 1280 if !fs.IsDir(d.Inode.StableAttr) { 1281 return syserror.ENOTDIR 1282 } 1283 1284 // Make sure we have write permissions on the parent directory. 1285 if err := d.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { 1286 return err 1287 } 1288 return d.CreateLink(t, root, oldPath, name) 1289 }) 1290 } 1291 1292 // Symlink implements linux syscall symlink(2). 1293 func Symlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1294 oldAddr := args[0].Pointer() 1295 newAddr := args[1].Pointer() 1296 1297 return 0, nil, symlinkAt(t, linux.AT_FDCWD, newAddr, oldAddr) 1298 } 1299 1300 // Symlinkat implements linux syscall symlinkat(2). 1301 func Symlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1302 oldAddr := args[0].Pointer() 1303 dirFD := args[1].Int() 1304 newAddr := args[2].Pointer() 1305 1306 return 0, nil, symlinkAt(t, dirFD, newAddr, oldAddr) 1307 } 1308 1309 // mayLinkAt determines whether t can create a hard link to target. 1310 // 1311 // This corresponds to Linux's fs/namei.c:may_linkat. 1312 func mayLinkAt(t *kernel.Task, target *fs.Inode) error { 1313 // Linux will impose the following restrictions on hard links only if 1314 // sysctl_protected_hardlinks is enabled. The kernel disables this 1315 // setting by default for backward compatibility (see commit 1316 // 561ec64ae67e), but also recommends that distributions enable it (and 1317 // Debian does: 1318 // https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=889098). 1319 // 1320 // gVisor currently behaves as though sysctl_protected_hardlinks is 1321 // always enabled, and thus imposes the following restrictions on hard 1322 // links. 1323 1324 if target.CheckOwnership(t) { 1325 // fs/namei.c:may_linkat: "Source inode owner (or CAP_FOWNER) 1326 // can hardlink all they like." 1327 return nil 1328 } 1329 1330 // If we are not the owner, then the file must be regular and have 1331 // Read+Write permissions. 1332 if !fs.IsRegular(target.StableAttr) { 1333 return linuxerr.EPERM 1334 } 1335 if target.CheckPermission(t, fs.PermMask{Read: true, Write: true}) != nil { 1336 return linuxerr.EPERM 1337 } 1338 1339 return nil 1340 } 1341 1342 // linkAt creates a hard link to the target specified by oldDirFD and oldAddr, 1343 // specified by newDirFD and newAddr. If resolve is true, then the symlinks 1344 // will be followed when evaluating the target. 1345 func linkAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int32, newAddr hostarch.Addr, resolve, allowEmpty bool) error { 1346 oldPath, _, err := copyInPath(t, oldAddr, allowEmpty) 1347 if err != nil { 1348 return err 1349 } 1350 newPath, dirPath, err := copyInPath(t, newAddr, false /* allowEmpty */) 1351 if err != nil { 1352 return err 1353 } 1354 if dirPath { 1355 return syserror.ENOENT 1356 } 1357 1358 if allowEmpty && oldPath == "" { 1359 target := t.GetFile(oldDirFD) 1360 if target == nil { 1361 return linuxerr.EBADF 1362 } 1363 defer target.DecRef(t) 1364 if err := mayLinkAt(t, target.Dirent.Inode); err != nil { 1365 return err 1366 } 1367 1368 // Resolve the target directory. 1369 return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { 1370 if !fs.IsDir(newParent.Inode.StableAttr) { 1371 return syserror.ENOTDIR 1372 } 1373 1374 // Make sure we have write permissions on the parent directory. 1375 if err := newParent.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { 1376 return err 1377 } 1378 return newParent.CreateHardLink(t, root, target.Dirent, newName) 1379 }) 1380 } 1381 1382 // Resolve oldDirFD and oldAddr to a dirent. The "resolve" argument 1383 // only applies to this name. 1384 return fileOpOn(t, oldDirFD, oldPath, resolve, func(root *fs.Dirent, target *fs.Dirent, _ uint) error { 1385 if err := mayLinkAt(t, target.Inode); err != nil { 1386 return err 1387 } 1388 1389 // Next resolve newDirFD and newAddr to the parent dirent and name. 1390 return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { 1391 if !fs.IsDir(newParent.Inode.StableAttr) { 1392 return syserror.ENOTDIR 1393 } 1394 1395 // Make sure we have write permissions on the parent directory. 1396 if err := newParent.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { 1397 return err 1398 } 1399 return newParent.CreateHardLink(t, root, target, newName) 1400 }) 1401 }) 1402 } 1403 1404 // Link implements linux syscall link(2). 1405 func Link(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1406 oldAddr := args[0].Pointer() 1407 newAddr := args[1].Pointer() 1408 1409 // man link(2): 1410 // POSIX.1-2001 says that link() should dereference oldpath if it is a 1411 // symbolic link. However, since kernel 2.0, Linux does not do so: if 1412 // oldpath is a symbolic link, then newpath is created as a (hard) link 1413 // to the same symbolic link file (i.e., newpath becomes a symbolic 1414 // link to the same file that oldpath refers to). 1415 resolve := false 1416 return 0, nil, linkAt(t, linux.AT_FDCWD, oldAddr, linux.AT_FDCWD, newAddr, resolve, false /* allowEmpty */) 1417 } 1418 1419 // Linkat implements linux syscall linkat(2). 1420 func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1421 oldDirFD := args[0].Int() 1422 oldAddr := args[1].Pointer() 1423 newDirFD := args[2].Int() 1424 newAddr := args[3].Pointer() 1425 1426 // man linkat(2): 1427 // By default, linkat(), does not dereference oldpath if it is a 1428 // symbolic link (like link(2)). Since Linux 2.6.18, the flag 1429 // AT_SYMLINK_FOLLOW can be specified in flags to cause oldpath to be 1430 // dereferenced if it is a symbolic link. 1431 flags := args[4].Int() 1432 1433 // Sanity check flags. 1434 if flags&^(linux.AT_SYMLINK_FOLLOW|linux.AT_EMPTY_PATH) != 0 { 1435 return 0, nil, linuxerr.EINVAL 1436 } 1437 1438 resolve := flags&linux.AT_SYMLINK_FOLLOW == linux.AT_SYMLINK_FOLLOW 1439 allowEmpty := flags&linux.AT_EMPTY_PATH == linux.AT_EMPTY_PATH 1440 1441 if allowEmpty && !t.HasCapabilityIn(linux.CAP_DAC_READ_SEARCH, t.UserNamespace().Root()) { 1442 return 0, nil, syserror.ENOENT 1443 } 1444 1445 return 0, nil, linkAt(t, oldDirFD, oldAddr, newDirFD, newAddr, resolve, allowEmpty) 1446 } 1447 1448 // LINT.ThenChange(vfs2/filesystem.go) 1449 1450 // LINT.IfChange 1451 1452 func readlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, bufAddr hostarch.Addr, size uint) (copied uintptr, err error) { 1453 path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) 1454 if err != nil { 1455 return 0, err 1456 } 1457 if dirPath { 1458 return 0, syserror.ENOENT 1459 } 1460 1461 err = fileOpOn(t, dirFD, path, false /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { 1462 // Check for Read permission. 1463 if err := d.Inode.CheckPermission(t, fs.PermMask{Read: true}); err != nil { 1464 return err 1465 } 1466 1467 s, err := d.Inode.Readlink(t) 1468 if linuxerr.Equals(linuxerr.ENOLINK, err) { 1469 return linuxerr.EINVAL 1470 } 1471 if err != nil { 1472 return err 1473 } 1474 1475 buffer := []byte(s) 1476 if uint(len(buffer)) > size { 1477 buffer = buffer[:size] 1478 } 1479 1480 n, err := t.CopyOutBytes(bufAddr, buffer) 1481 1482 // Update frame return value. 1483 copied = uintptr(n) 1484 1485 return err 1486 }) 1487 return copied, err // Return frame value. 1488 } 1489 1490 // Readlink implements linux syscall readlink(2). 1491 func Readlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1492 addr := args[0].Pointer() 1493 bufAddr := args[1].Pointer() 1494 size := args[2].SizeT() 1495 1496 n, err := readlinkAt(t, linux.AT_FDCWD, addr, bufAddr, size) 1497 return n, nil, err 1498 } 1499 1500 // Readlinkat implements linux syscall readlinkat(2). 1501 func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1502 dirFD := args[0].Int() 1503 addr := args[1].Pointer() 1504 bufAddr := args[2].Pointer() 1505 size := args[3].SizeT() 1506 1507 n, err := readlinkAt(t, dirFD, addr, bufAddr, size) 1508 return n, nil, err 1509 } 1510 1511 // LINT.ThenChange(vfs2/stat.go) 1512 1513 // LINT.IfChange 1514 1515 func unlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr) error { 1516 path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) 1517 if err != nil { 1518 return err 1519 } 1520 1521 return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { 1522 if !fs.IsDir(d.Inode.StableAttr) { 1523 return syserror.ENOTDIR 1524 } 1525 1526 if err := d.MayDelete(t, root, name); err != nil { 1527 return err 1528 } 1529 1530 return d.Remove(t, root, name, dirPath) 1531 }) 1532 } 1533 1534 // Unlink implements linux syscall unlink(2). 1535 func Unlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1536 addr := args[0].Pointer() 1537 return 0, nil, unlinkAt(t, linux.AT_FDCWD, addr) 1538 } 1539 1540 // Unlinkat implements linux syscall unlinkat(2). 1541 func Unlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1542 dirFD := args[0].Int() 1543 addr := args[1].Pointer() 1544 flags := args[2].Uint() 1545 if flags&linux.AT_REMOVEDIR != 0 { 1546 return 0, nil, rmdirAt(t, dirFD, addr) 1547 } 1548 return 0, nil, unlinkAt(t, dirFD, addr) 1549 } 1550 1551 // LINT.ThenChange(vfs2/filesystem.go) 1552 1553 // LINT.IfChange 1554 1555 // Truncate implements linux syscall truncate(2). 1556 func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1557 addr := args[0].Pointer() 1558 length := args[1].Int64() 1559 1560 if length < 0 { 1561 return 0, nil, linuxerr.EINVAL 1562 } 1563 1564 path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) 1565 if err != nil { 1566 return 0, nil, err 1567 } 1568 if dirPath { 1569 return 0, nil, linuxerr.EINVAL 1570 } 1571 1572 if uint64(length) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur { 1573 t.SendSignal(&linux.SignalInfo{ 1574 Signo: int32(linux.SIGXFSZ), 1575 Code: linux.SI_USER, 1576 }) 1577 return 0, nil, linuxerr.EFBIG 1578 } 1579 1580 return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { 1581 if fs.IsDir(d.Inode.StableAttr) { 1582 return syserror.EISDIR 1583 } 1584 // In contrast to open(O_TRUNC), truncate(2) is only valid for file 1585 // types. 1586 if !fs.IsFile(d.Inode.StableAttr) { 1587 return linuxerr.EINVAL 1588 } 1589 1590 // Reject truncation if the access permissions do not allow truncation. 1591 // This is different from the behavior of sys_ftruncate, see below. 1592 if err := d.Inode.CheckPermission(t, fs.PermMask{Write: true}); err != nil { 1593 return err 1594 } 1595 1596 if err := d.Inode.Truncate(t, d, length); err != nil { 1597 return err 1598 } 1599 1600 // File length modified, generate notification. 1601 d.InotifyEvent(linux.IN_MODIFY, 0) 1602 1603 return nil 1604 }) 1605 } 1606 1607 // Ftruncate implements linux syscall ftruncate(2). 1608 func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1609 fd := args[0].Int() 1610 length := args[1].Int64() 1611 1612 file := t.GetFile(fd) 1613 if file == nil { 1614 return 0, nil, linuxerr.EBADF 1615 } 1616 defer file.DecRef(t) 1617 1618 // Reject truncation if the file flags do not permit this operation. 1619 // This is different from truncate(2) above. 1620 if !file.Flags().Write { 1621 return 0, nil, linuxerr.EINVAL 1622 } 1623 1624 // In contrast to open(O_TRUNC), truncate(2) is only valid for file 1625 // types. Note that this is different from truncate(2) above, where a 1626 // directory returns EISDIR. 1627 if !fs.IsFile(file.Dirent.Inode.StableAttr) { 1628 return 0, nil, linuxerr.EINVAL 1629 } 1630 1631 if length < 0 { 1632 return 0, nil, linuxerr.EINVAL 1633 } 1634 1635 if uint64(length) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur { 1636 t.SendSignal(&linux.SignalInfo{ 1637 Signo: int32(linux.SIGXFSZ), 1638 Code: linux.SI_USER, 1639 }) 1640 return 0, nil, linuxerr.EFBIG 1641 } 1642 1643 if err := file.Dirent.Inode.Truncate(t, file.Dirent, length); err != nil { 1644 return 0, nil, err 1645 } 1646 1647 // File length modified, generate notification. 1648 file.Dirent.InotifyEvent(linux.IN_MODIFY, 0) 1649 1650 return 0, nil, nil 1651 } 1652 1653 // LINT.ThenChange(vfs2/setstat.go) 1654 1655 // Umask implements linux syscall umask(2). 1656 func Umask(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1657 mask := args[0].ModeT() 1658 mask = t.FSContext().SwapUmask(mask & 0777) 1659 return uintptr(mask), nil, nil 1660 } 1661 1662 // LINT.IfChange 1663 1664 // Change ownership of a file. 1665 // 1666 // uid and gid may be -1, in which case they will not be changed. 1667 func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error { 1668 owner := fs.FileOwner{ 1669 UID: auth.NoID, 1670 GID: auth.NoID, 1671 } 1672 1673 uattr, err := d.Inode.UnstableAttr(t) 1674 if err != nil { 1675 return err 1676 } 1677 1678 c := t.Credentials() 1679 hasCap := d.Inode.CheckCapability(t, linux.CAP_CHOWN) 1680 isOwner := uattr.Owner.UID == c.EffectiveKUID 1681 var clearPrivilege bool 1682 if uid.Ok() { 1683 kuid := c.UserNamespace.MapToKUID(uid) 1684 // Valid UID must be supplied if UID is to be changed. 1685 if !kuid.Ok() { 1686 return linuxerr.EINVAL 1687 } 1688 1689 // "Only a privileged process (CAP_CHOWN) may change the owner 1690 // of a file." -chown(2) 1691 // 1692 // Linux also allows chown if you own the file and are 1693 // explicitly not changing its UID. 1694 isNoop := uattr.Owner.UID == kuid 1695 if !(hasCap || (isOwner && isNoop)) { 1696 return linuxerr.EPERM 1697 } 1698 1699 // The setuid and setgid bits are cleared during a chown. 1700 if uattr.Owner.UID != kuid { 1701 clearPrivilege = true 1702 } 1703 1704 owner.UID = kuid 1705 } 1706 if gid.Ok() { 1707 kgid := c.UserNamespace.MapToKGID(gid) 1708 // Valid GID must be supplied if GID is to be changed. 1709 if !kgid.Ok() { 1710 return linuxerr.EINVAL 1711 } 1712 1713 // "The owner of a file may change the group of the file to any 1714 // group of which that owner is a member. A privileged process 1715 // (CAP_CHOWN) may change the group arbitrarily." -chown(2) 1716 isNoop := uattr.Owner.GID == kgid 1717 isMemberGroup := c.InGroup(kgid) 1718 if !(hasCap || (isOwner && (isNoop || isMemberGroup))) { 1719 return linuxerr.EPERM 1720 } 1721 1722 // The setuid and setgid bits are cleared during a chown. 1723 if uattr.Owner.GID != kgid { 1724 clearPrivilege = true 1725 } 1726 1727 owner.GID = kgid 1728 } 1729 1730 // FIXME(b/62949101): This is racy; the inode's owner may have changed in 1731 // the meantime. (Linux holds i_mutex while calling 1732 // fs/attr.c:notify_change() => inode_operations::setattr => 1733 // inode_change_ok().) 1734 if err := d.Inode.SetOwner(t, d, owner); err != nil { 1735 return err 1736 } 1737 // Clear privilege bits if needed and they are set. 1738 if clearPrivilege && uattr.Perms.HasSetUIDOrGID() && !fs.IsDir(d.Inode.StableAttr) { 1739 uattr.Perms.DropSetUIDAndMaybeGID() 1740 if !d.Inode.SetPermissions(t, d, uattr.Perms) { 1741 return linuxerr.EPERM 1742 } 1743 } 1744 1745 return nil 1746 } 1747 1748 func chownAt(t *kernel.Task, fd int32, addr hostarch.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error { 1749 path, _, err := copyInPath(t, addr, allowEmpty) 1750 if err != nil { 1751 return err 1752 } 1753 1754 if path == "" { 1755 // Annoying. What's wrong with fchown? 1756 file := t.GetFile(fd) 1757 if file == nil { 1758 return linuxerr.EBADF 1759 } 1760 defer file.DecRef(t) 1761 1762 return chown(t, file.Dirent, uid, gid) 1763 } 1764 1765 return fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { 1766 return chown(t, d, uid, gid) 1767 }) 1768 } 1769 1770 // Chown implements linux syscall chown(2). 1771 func Chown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1772 addr := args[0].Pointer() 1773 uid := auth.UID(args[1].Uint()) 1774 gid := auth.GID(args[2].Uint()) 1775 1776 return 0, nil, chownAt(t, linux.AT_FDCWD, addr, true /* resolve */, false /* allowEmpty */, uid, gid) 1777 } 1778 1779 // Lchown implements linux syscall lchown(2). 1780 func Lchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1781 addr := args[0].Pointer() 1782 uid := auth.UID(args[1].Uint()) 1783 gid := auth.GID(args[2].Uint()) 1784 1785 return 0, nil, chownAt(t, linux.AT_FDCWD, addr, false /* resolve */, false /* allowEmpty */, uid, gid) 1786 } 1787 1788 // Fchown implements linux syscall fchown(2). 1789 func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1790 fd := args[0].Int() 1791 uid := auth.UID(args[1].Uint()) 1792 gid := auth.GID(args[2].Uint()) 1793 1794 file := t.GetFile(fd) 1795 if file == nil { 1796 return 0, nil, linuxerr.EBADF 1797 } 1798 defer file.DecRef(t) 1799 1800 return 0, nil, chown(t, file.Dirent, uid, gid) 1801 } 1802 1803 // Fchownat implements Linux syscall fchownat(2). 1804 func Fchownat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1805 dirFD := args[0].Int() 1806 addr := args[1].Pointer() 1807 uid := auth.UID(args[2].Uint()) 1808 gid := auth.GID(args[3].Uint()) 1809 flags := args[4].Int() 1810 1811 if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { 1812 return 0, nil, linuxerr.EINVAL 1813 } 1814 1815 return 0, nil, chownAt(t, dirFD, addr, flags&linux.AT_SYMLINK_NOFOLLOW == 0, flags&linux.AT_EMPTY_PATH != 0, uid, gid) 1816 } 1817 1818 func chmod(t *kernel.Task, d *fs.Dirent, mode linux.FileMode) error { 1819 // Must own file to change mode. 1820 if !d.Inode.CheckOwnership(t) { 1821 return linuxerr.EPERM 1822 } 1823 1824 p := fs.FilePermsFromMode(mode) 1825 if !d.Inode.SetPermissions(t, d, p) { 1826 return linuxerr.EPERM 1827 } 1828 1829 // File attribute changed, generate notification. 1830 d.InotifyEvent(linux.IN_ATTRIB, 0) 1831 1832 return nil 1833 } 1834 1835 func chmodAt(t *kernel.Task, fd int32, addr hostarch.Addr, mode linux.FileMode) error { 1836 path, _, err := copyInPath(t, addr, false /* allowEmpty */) 1837 if err != nil { 1838 return err 1839 } 1840 1841 return fileOpOn(t, fd, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { 1842 return chmod(t, d, mode) 1843 }) 1844 } 1845 1846 // Chmod implements linux syscall chmod(2). 1847 func Chmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1848 addr := args[0].Pointer() 1849 mode := linux.FileMode(args[1].ModeT()) 1850 1851 return 0, nil, chmodAt(t, linux.AT_FDCWD, addr, mode) 1852 } 1853 1854 // Fchmod implements linux syscall fchmod(2). 1855 func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1856 fd := args[0].Int() 1857 mode := linux.FileMode(args[1].ModeT()) 1858 1859 file := t.GetFile(fd) 1860 if file == nil { 1861 return 0, nil, linuxerr.EBADF 1862 } 1863 defer file.DecRef(t) 1864 1865 return 0, nil, chmod(t, file.Dirent, mode) 1866 } 1867 1868 // Fchmodat implements linux syscall fchmodat(2). 1869 func Fchmodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1870 fd := args[0].Int() 1871 addr := args[1].Pointer() 1872 mode := linux.FileMode(args[2].ModeT()) 1873 1874 return 0, nil, chmodAt(t, fd, addr, mode) 1875 } 1876 1877 // defaultSetToSystemTimeSpec returns a TimeSpec that will set ATime and MTime 1878 // to the system time. 1879 func defaultSetToSystemTimeSpec() fs.TimeSpec { 1880 return fs.TimeSpec{ 1881 ATimeSetSystemTime: true, 1882 MTimeSetSystemTime: true, 1883 } 1884 } 1885 1886 func utimes(t *kernel.Task, dirFD int32, addr hostarch.Addr, ts fs.TimeSpec, resolve bool) error { 1887 setTimestamp := func(root *fs.Dirent, d *fs.Dirent, _ uint) error { 1888 // Does the task own the file? 1889 if !d.Inode.CheckOwnership(t) { 1890 // Trying to set a specific time? Must be owner. 1891 if (ts.ATimeOmit || !ts.ATimeSetSystemTime) && (ts.MTimeOmit || !ts.MTimeSetSystemTime) { 1892 return linuxerr.EPERM 1893 } 1894 1895 // Trying to set to current system time? Must have write access. 1896 if err := d.Inode.CheckPermission(t, fs.PermMask{Write: true}); err != nil { 1897 return err 1898 } 1899 } 1900 1901 if err := d.Inode.SetTimestamps(t, d, ts); err != nil { 1902 return err 1903 } 1904 1905 // File attribute changed, generate notification. 1906 d.InotifyEvent(linux.IN_ATTRIB, 0) 1907 return nil 1908 } 1909 1910 // From utimes.c: 1911 // "If filename is NULL and dfd refers to an open file, then operate on 1912 // the file. Otherwise look up filename, possibly using dfd as a 1913 // starting point." 1914 if addr == 0 && dirFD != linux.AT_FDCWD { 1915 if !resolve { 1916 // Linux returns EINVAL in this case. See utimes.c. 1917 return linuxerr.EINVAL 1918 } 1919 f := t.GetFile(dirFD) 1920 if f == nil { 1921 return linuxerr.EBADF 1922 } 1923 defer f.DecRef(t) 1924 1925 root := t.FSContext().RootDirectory() 1926 defer root.DecRef(t) 1927 1928 return setTimestamp(root, f.Dirent, linux.MaxSymlinkTraversals) 1929 } 1930 1931 path, _, err := copyInPath(t, addr, false /* allowEmpty */) 1932 if err != nil { 1933 return err 1934 } 1935 1936 return fileOpOn(t, dirFD, path, resolve, setTimestamp) 1937 } 1938 1939 // Utime implements linux syscall utime(2). 1940 func Utime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1941 filenameAddr := args[0].Pointer() 1942 timesAddr := args[1].Pointer() 1943 1944 // No timesAddr argument will be interpreted as current system time. 1945 ts := defaultSetToSystemTimeSpec() 1946 if timesAddr != 0 { 1947 var times linux.Utime 1948 if _, err := times.CopyIn(t, timesAddr); err != nil { 1949 return 0, nil, err 1950 } 1951 ts = fs.TimeSpec{ 1952 ATime: ktime.FromSeconds(times.Actime), 1953 MTime: ktime.FromSeconds(times.Modtime), 1954 } 1955 } 1956 return 0, nil, utimes(t, linux.AT_FDCWD, filenameAddr, ts, true) 1957 } 1958 1959 // Utimes implements linux syscall utimes(2). 1960 func Utimes(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1961 filenameAddr := args[0].Pointer() 1962 timesAddr := args[1].Pointer() 1963 1964 // No timesAddr argument will be interpreted as current system time. 1965 ts := defaultSetToSystemTimeSpec() 1966 if timesAddr != 0 { 1967 var times [2]linux.Timeval 1968 if _, err := linux.CopyTimevalSliceIn(t, timesAddr, times[:]); err != nil { 1969 return 0, nil, err 1970 } 1971 ts = fs.TimeSpec{ 1972 ATime: ktime.FromTimeval(times[0]), 1973 MTime: ktime.FromTimeval(times[1]), 1974 } 1975 } 1976 return 0, nil, utimes(t, linux.AT_FDCWD, filenameAddr, ts, true) 1977 } 1978 1979 // timespecIsValid checks that the timespec is valid for use in utimensat. 1980 func timespecIsValid(ts linux.Timespec) bool { 1981 // Nsec must be UTIME_OMIT, UTIME_NOW, or less than 10^9. 1982 return ts.Nsec == linux.UTIME_OMIT || ts.Nsec == linux.UTIME_NOW || ts.Nsec < 1e9 1983 } 1984 1985 // Utimensat implements linux syscall utimensat(2). 1986 func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1987 dirFD := args[0].Int() 1988 pathnameAddr := args[1].Pointer() 1989 timesAddr := args[2].Pointer() 1990 flags := args[3].Int() 1991 1992 // No timesAddr argument will be interpreted as current system time. 1993 ts := defaultSetToSystemTimeSpec() 1994 if timesAddr != 0 { 1995 var times [2]linux.Timespec 1996 if _, err := linux.CopyTimespecSliceIn(t, timesAddr, times[:]); err != nil { 1997 return 0, nil, err 1998 } 1999 if !timespecIsValid(times[0]) || !timespecIsValid(times[1]) { 2000 return 0, nil, linuxerr.EINVAL 2001 } 2002 2003 // If both are UTIME_OMIT, this is a noop. 2004 if times[0].Nsec == linux.UTIME_OMIT && times[1].Nsec == linux.UTIME_OMIT { 2005 return 0, nil, nil 2006 } 2007 2008 ts = fs.TimeSpec{ 2009 ATime: ktime.FromTimespec(times[0]), 2010 ATimeOmit: times[0].Nsec == linux.UTIME_OMIT, 2011 ATimeSetSystemTime: times[0].Nsec == linux.UTIME_NOW, 2012 MTime: ktime.FromTimespec(times[1]), 2013 MTimeOmit: times[1].Nsec == linux.UTIME_OMIT, 2014 MTimeSetSystemTime: times[0].Nsec == linux.UTIME_NOW, 2015 } 2016 } 2017 return 0, nil, utimes(t, dirFD, pathnameAddr, ts, flags&linux.AT_SYMLINK_NOFOLLOW == 0) 2018 } 2019 2020 // Futimesat implements linux syscall futimesat(2). 2021 func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 2022 dirFD := args[0].Int() 2023 pathnameAddr := args[1].Pointer() 2024 timesAddr := args[2].Pointer() 2025 2026 // No timesAddr argument will be interpreted as current system time. 2027 ts := defaultSetToSystemTimeSpec() 2028 if timesAddr != 0 { 2029 var times [2]linux.Timeval 2030 if _, err := linux.CopyTimevalSliceIn(t, timesAddr, times[:]); err != nil { 2031 return 0, nil, err 2032 } 2033 if times[0].Usec >= 1e6 || times[0].Usec < 0 || 2034 times[1].Usec >= 1e6 || times[1].Usec < 0 { 2035 return 0, nil, linuxerr.EINVAL 2036 } 2037 2038 ts = fs.TimeSpec{ 2039 ATime: ktime.FromTimeval(times[0]), 2040 MTime: ktime.FromTimeval(times[1]), 2041 } 2042 } 2043 return 0, nil, utimes(t, dirFD, pathnameAddr, ts, true) 2044 } 2045 2046 // LINT.ThenChange(vfs2/setstat.go) 2047 2048 // LINT.IfChange 2049 2050 func renameAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int32, newAddr hostarch.Addr) error { 2051 newPath, _, err := copyInPath(t, newAddr, false /* allowEmpty */) 2052 if err != nil { 2053 return err 2054 } 2055 oldPath, _, err := copyInPath(t, oldAddr, false /* allowEmpty */) 2056 if err != nil { 2057 return err 2058 } 2059 2060 return fileOpAt(t, oldDirFD, oldPath, func(root *fs.Dirent, oldParent *fs.Dirent, oldName string, _ uint) error { 2061 if !fs.IsDir(oldParent.Inode.StableAttr) { 2062 return syserror.ENOTDIR 2063 } 2064 2065 // Rename rejects paths that end in ".", "..", or empty (i.e. 2066 // the root) with EBUSY. 2067 switch oldName { 2068 case "", ".", "..": 2069 return linuxerr.EBUSY 2070 } 2071 2072 return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { 2073 if !fs.IsDir(newParent.Inode.StableAttr) { 2074 return syserror.ENOTDIR 2075 } 2076 2077 // Rename rejects paths that end in ".", "..", or empty 2078 // (i.e. the root) with EBUSY. 2079 switch newName { 2080 case "", ".", "..": 2081 return linuxerr.EBUSY 2082 } 2083 2084 return fs.Rename(t, root, oldParent, oldName, newParent, newName) 2085 }) 2086 }) 2087 } 2088 2089 // Rename implements linux syscall rename(2). 2090 func Rename(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 2091 oldPathAddr := args[0].Pointer() 2092 newPathAddr := args[1].Pointer() 2093 return 0, nil, renameAt(t, linux.AT_FDCWD, oldPathAddr, linux.AT_FDCWD, newPathAddr) 2094 } 2095 2096 // Renameat implements linux syscall renameat(2). 2097 func Renameat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 2098 oldDirFD := args[0].Int() 2099 oldPathAddr := args[1].Pointer() 2100 newDirFD := args[2].Int() 2101 newPathAddr := args[3].Pointer() 2102 return 0, nil, renameAt(t, oldDirFD, oldPathAddr, newDirFD, newPathAddr) 2103 } 2104 2105 // LINT.ThenChange(vfs2/filesystem.go) 2106 2107 // Fallocate implements linux system call fallocate(2). 2108 func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 2109 fd := args[0].Int() 2110 mode := args[1].Int64() 2111 offset := args[2].Int64() 2112 length := args[3].Int64() 2113 2114 file := t.GetFile(fd) 2115 if file == nil { 2116 return 0, nil, linuxerr.EBADF 2117 } 2118 defer file.DecRef(t) 2119 2120 if offset < 0 || length <= 0 { 2121 return 0, nil, linuxerr.EINVAL 2122 } 2123 if mode != 0 { 2124 t.Kernel().EmitUnimplementedEvent(t) 2125 return 0, nil, linuxerr.ENOTSUP 2126 } 2127 if !file.Flags().Write { 2128 return 0, nil, linuxerr.EBADF 2129 } 2130 if fs.IsPipe(file.Dirent.Inode.StableAttr) { 2131 return 0, nil, linuxerr.ESPIPE 2132 } 2133 if fs.IsDir(file.Dirent.Inode.StableAttr) { 2134 return 0, nil, syserror.EISDIR 2135 } 2136 if !fs.IsRegular(file.Dirent.Inode.StableAttr) { 2137 return 0, nil, linuxerr.ENODEV 2138 } 2139 size := offset + length 2140 if size < 0 { 2141 return 0, nil, linuxerr.EFBIG 2142 } 2143 if uint64(size) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur { 2144 t.SendSignal(&linux.SignalInfo{ 2145 Signo: int32(linux.SIGXFSZ), 2146 Code: linux.SI_USER, 2147 }) 2148 return 0, nil, linuxerr.EFBIG 2149 } 2150 2151 if err := file.Dirent.Inode.Allocate(t, file.Dirent, offset, length); err != nil { 2152 return 0, nil, err 2153 } 2154 2155 // File length modified, generate notification. 2156 file.Dirent.InotifyEvent(linux.IN_MODIFY, 0) 2157 2158 return 0, nil, nil 2159 } 2160 2161 // Flock implements linux syscall flock(2). 2162 func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 2163 fd := args[0].Int() 2164 operation := args[1].Int() 2165 2166 file := t.GetFile(fd) 2167 if file == nil { 2168 // flock(2): EBADF fd is not an open file descriptor. 2169 return 0, nil, linuxerr.EBADF 2170 } 2171 defer file.DecRef(t) 2172 2173 nonblocking := operation&linux.LOCK_NB != 0 2174 operation &^= linux.LOCK_NB 2175 2176 // A BSD style lock spans the entire file. 2177 rng := lock.LockRange{ 2178 Start: 0, 2179 End: lock.LockEOF, 2180 } 2181 2182 switch operation { 2183 case linux.LOCK_EX: 2184 if nonblocking { 2185 // Since we're nonblocking we pass a nil lock.Blocker implementation. 2186 if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, nil) { 2187 return 0, nil, linuxerr.EWOULDBLOCK 2188 } 2189 } else { 2190 // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. 2191 if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, t) { 2192 return 0, nil, syserror.EINTR 2193 } 2194 } 2195 case linux.LOCK_SH: 2196 if nonblocking { 2197 // Since we're nonblocking we pass a nil lock.Blocker implementation. 2198 if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, nil) { 2199 return 0, nil, linuxerr.EWOULDBLOCK 2200 } 2201 } else { 2202 // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. 2203 if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, t) { 2204 return 0, nil, syserror.EINTR 2205 } 2206 } 2207 case linux.LOCK_UN: 2208 file.Dirent.Inode.LockCtx.BSD.UnlockRegion(file, rng) 2209 default: 2210 // flock(2): EINVAL operation is invalid. 2211 return 0, nil, linuxerr.EINVAL 2212 } 2213 2214 return 0, nil, nil 2215 } 2216 2217 const ( 2218 memfdPrefix = "/memfd:" 2219 memfdAllFlags = uint32(linux.MFD_CLOEXEC | linux.MFD_ALLOW_SEALING) 2220 memfdMaxNameLen = linux.NAME_MAX - len(memfdPrefix) + 1 2221 ) 2222 2223 // MemfdCreate implements the linux syscall memfd_create(2). 2224 func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 2225 addr := args[0].Pointer() 2226 flags := args[1].Uint() 2227 2228 if flags&^memfdAllFlags != 0 { 2229 // Unknown bits in flags. 2230 return 0, nil, linuxerr.EINVAL 2231 } 2232 2233 allowSeals := flags&linux.MFD_ALLOW_SEALING != 0 2234 cloExec := flags&linux.MFD_CLOEXEC != 0 2235 2236 name, err := t.CopyInString(addr, unix.PathMax-len(memfdPrefix)) 2237 if err != nil { 2238 return 0, nil, err 2239 } 2240 if len(name) > memfdMaxNameLen { 2241 return 0, nil, linuxerr.EINVAL 2242 } 2243 name = memfdPrefix + name 2244 2245 inode := tmpfs.NewMemfdInode(t, allowSeals) 2246 dirent := fs.NewDirent(t, inode, name) 2247 // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with 2248 // FMODE_READ | FMODE_WRITE. 2249 file, err := inode.GetFile(t, dirent, fs.FileFlags{Read: true, Write: true}) 2250 if err != nil { 2251 return 0, nil, err 2252 } 2253 2254 defer dirent.DecRef(t) 2255 defer file.DecRef(t) 2256 2257 newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{ 2258 CloseOnExec: cloExec, 2259 }) 2260 if err != nil { 2261 return 0, nil, err 2262 } 2263 2264 return uintptr(newFD), nil, nil 2265 }