github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/runsc/fsgofer/lisafs.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package fsgofer provides a lisafs server implementation which gives access 16 // to local files. 17 package fsgofer 18 19 import ( 20 "fmt" 21 "io" 22 "math" 23 "os" 24 "path" 25 "path/filepath" 26 "strconv" 27 28 "golang.org/x/sys/unix" 29 "github.com/metacubex/gvisor/pkg/abi/linux" 30 "github.com/metacubex/gvisor/pkg/atomicbitops" 31 "github.com/metacubex/gvisor/pkg/cleanup" 32 rwfd "github.com/metacubex/gvisor/pkg/fd" 33 "github.com/metacubex/gvisor/pkg/fsutil" 34 "github.com/metacubex/gvisor/pkg/lisafs" 35 "github.com/metacubex/gvisor/pkg/log" 36 "github.com/metacubex/gvisor/pkg/marshal/primitive" 37 "github.com/metacubex/gvisor/runsc/config" 38 ) 39 40 // LINT.IfChange 41 42 const ( 43 openFlags = unix.O_NOFOLLOW | unix.O_CLOEXEC 44 45 // UNIX_PATH_MAX as defined in include/uapi/linux/un.h. 46 unixPathMax = 108 47 ) 48 49 // Config sets configuration options for each attach point. 50 type Config struct { 51 // ROMount is set to true if this is a readonly mount. 52 ROMount bool 53 54 // PanicOnWrite panics on attempts to write to RO mounts. 55 PanicOnWrite bool 56 57 // HostUDS signals whether the gofer can connect to host unix domain sockets. 58 HostUDS config.HostUDS 59 60 // HostFifo signals whether the gofer can connect to host FIFOs. 61 HostFifo config.HostFifo 62 63 // DonateMountPointFD indicates whether a host FD to the mount point should 64 // be donated to the client on Mount RPC. 65 DonateMountPointFD bool 66 } 67 68 var procSelfFD *rwfd.FD 69 70 // OpenProcSelfFD opens the /proc/self/fd directory, which will be used to 71 // reopen file descriptors. 72 func OpenProcSelfFD(path string) error { 73 d, err := unix.Open(path, unix.O_RDONLY|unix.O_DIRECTORY, 0) 74 if err != nil { 75 return fmt.Errorf("error opening /proc/self/fd: %v", err) 76 } 77 procSelfFD = rwfd.New(d) 78 return nil 79 } 80 81 // LisafsServer implements lisafs.ServerImpl for fsgofer. 82 type LisafsServer struct { 83 lisafs.Server 84 config Config 85 } 86 87 var _ lisafs.ServerImpl = (*LisafsServer)(nil) 88 89 // NewLisafsServer initializes a new lisafs server for fsgofer. 90 func NewLisafsServer(config Config) *LisafsServer { 91 s := &LisafsServer{config: config} 92 s.Server.Init(s, lisafs.ServerOpts{ 93 WalkStatSupported: true, 94 SetAttrOnDeleted: true, 95 AllocateOnDeleted: true, 96 }) 97 return s 98 } 99 100 // Mount implements lisafs.ServerImpl.Mount. 101 func (s *LisafsServer) Mount(c *lisafs.Connection, mountNode *lisafs.Node) (*lisafs.ControlFD, linux.Statx, int, error) { 102 mountPath := mountNode.FilePath() 103 rootHostFD, err := tryOpen(func(flags int) (int, error) { 104 return unix.Open(mountPath, flags, 0) 105 }) 106 if err != nil { 107 return nil, linux.Statx{}, -1, err 108 } 109 cu := cleanup.Make(func() { 110 _ = unix.Close(rootHostFD) 111 }) 112 defer cu.Clean() 113 114 stat, err := fstatTo(rootHostFD) 115 if err != nil { 116 return nil, linux.Statx{}, -1, err 117 } 118 119 if err := checkSupportedFileType(uint32(stat.Mode)); err != nil { 120 log.Warningf("Mount: checkSupportedFileType() failed for file %q with mode %o: %v", mountPath, stat.Mode, err) 121 return nil, linux.Statx{}, -1, err 122 } 123 124 clientHostFD := -1 125 if s.config.DonateMountPointFD { 126 clientHostFD, err = unix.Dup(rootHostFD) 127 if err != nil { 128 return nil, linux.Statx{}, -1, err 129 } 130 } 131 cu.Release() 132 133 rootFD := &controlFDLisa{ 134 hostFD: rootHostFD, 135 writableHostFD: atomicbitops.FromInt32(-1), 136 isMountPoint: true, 137 } 138 mountNode.IncRef() // Ref is transferred to ControlFD. 139 rootFD.ControlFD.Init(c, mountNode, linux.FileMode(stat.Mode), rootFD) 140 return rootFD.FD(), stat, clientHostFD, nil 141 } 142 143 // MaxMessageSize implements lisafs.ServerImpl.MaxMessageSize. 144 func (s *LisafsServer) MaxMessageSize() uint32 { 145 return lisafs.MaxMessageSize() 146 } 147 148 // SupportedMessages implements lisafs.ServerImpl.SupportedMessages. 149 func (s *LisafsServer) SupportedMessages() []lisafs.MID { 150 // Note that Flush, FListXattr and FRemoveXattr are not supported. 151 return []lisafs.MID{ 152 lisafs.Mount, 153 lisafs.Channel, 154 lisafs.FStat, 155 lisafs.SetStat, 156 lisafs.Walk, 157 lisafs.WalkStat, 158 lisafs.OpenAt, 159 lisafs.OpenCreateAt, 160 lisafs.Close, 161 lisafs.FSync, 162 lisafs.PWrite, 163 lisafs.PRead, 164 lisafs.MkdirAt, 165 lisafs.MknodAt, 166 lisafs.SymlinkAt, 167 lisafs.LinkAt, 168 lisafs.FStatFS, 169 lisafs.FAllocate, 170 lisafs.ReadLinkAt, 171 lisafs.Connect, 172 lisafs.UnlinkAt, 173 lisafs.RenameAt, 174 lisafs.Getdents64, 175 lisafs.FGetXattr, 176 lisafs.FSetXattr, 177 lisafs.BindAt, 178 lisafs.Listen, 179 lisafs.Accept, 180 } 181 } 182 183 // controlFDLisa implements lisafs.ControlFDImpl. 184 type controlFDLisa struct { 185 lisafs.ControlFD 186 187 // hostFD is the file descriptor which can be used to make host syscalls. 188 hostFD int 189 190 // writableHostFD is the file descriptor number for a writable FD opened on 191 // the same FD as `hostFD`. It is initialized to -1, and can change in value 192 // exactly once. 193 writableHostFD atomicbitops.Int32 194 195 // isMountpoint indicates whether this FD represents the mount point for its 196 // owning connection. isMountPoint is immutable. 197 isMountPoint bool 198 } 199 200 var _ lisafs.ControlFDImpl = (*controlFDLisa)(nil) 201 202 func newControlFDLisa(hostFD int, parent *controlFDLisa, name string, mode linux.FileMode) *controlFDLisa { 203 var ( 204 childFD *controlFDLisa 205 childNode *lisafs.Node 206 parentNode = parent.Node() 207 ) 208 parentNode.WithChildrenMu(func() { 209 childNode = parentNode.LookupChildLocked(name) 210 if childNode == nil { 211 // Common case. Performance hack which is used to allocate the node and 212 // its control FD together in the heap. For a well-behaving client, there 213 // will be a 1:1 mapping between control FD and node and their lifecycle 214 // will be similar too. This will help reduce allocations and memory 215 // fragmentation. This is more cache friendly too. 216 temp := struct { 217 node lisafs.Node 218 fd controlFDLisa 219 }{} 220 childFD = &temp.fd 221 childNode = &temp.node 222 childNode.InitLocked(name, parentNode) 223 } else { 224 childNode.IncRef() 225 childFD = &controlFDLisa{} 226 } 227 }) 228 childFD.hostFD = hostFD 229 childFD.writableHostFD = atomicbitops.FromInt32(-1) 230 childFD.ControlFD.Init(parent.Conn(), childNode, mode, childFD) 231 return childFD 232 } 233 234 func (fd *controlFDLisa) getWritableFD() (int, error) { 235 if writableFD := fd.writableHostFD.Load(); writableFD != -1 { 236 return int(writableFD), nil 237 } 238 239 writableFD, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(fd.hostFD), (unix.O_WRONLY|openFlags)&^unix.O_NOFOLLOW, 0) 240 if err != nil { 241 return -1, err 242 } 243 if !fd.writableHostFD.CompareAndSwap(-1, int32(writableFD)) { 244 // Race detected, use the new value and clean this up. 245 unix.Close(writableFD) 246 return int(fd.writableHostFD.Load()), nil 247 } 248 return writableFD, nil 249 } 250 251 func (fd *controlFDLisa) getParentFD() (int, string, error) { 252 filePath := fd.Node().FilePath() 253 if filePath == "/" { 254 log.Warningf("getParentFD() call on the root") 255 return -1, "", unix.EINVAL 256 } 257 parent, err := unix.Open(path.Dir(filePath), openFlags|unix.O_PATH, 0) 258 return parent, path.Base(filePath), err 259 } 260 261 // FD implements lisafs.ControlFDImpl.FD. 262 func (fd *controlFDLisa) FD() *lisafs.ControlFD { 263 if fd == nil { 264 return nil 265 } 266 return &fd.ControlFD 267 } 268 269 // Close implements lisafs.ControlFDImpl.Close. 270 func (fd *controlFDLisa) Close() { 271 if fd.hostFD >= 0 { 272 _ = unix.Close(fd.hostFD) 273 fd.hostFD = -1 274 } 275 // No concurrent access is possible so no need to use atomics. 276 if fd.writableHostFD.RacyLoad() >= 0 { 277 _ = unix.Close(int(fd.writableHostFD.RacyLoad())) 278 fd.writableHostFD = atomicbitops.FromInt32(-1) 279 } 280 } 281 282 // Stat implements lisafs.ControlFDImpl.Stat. 283 func (fd *controlFDLisa) Stat() (linux.Statx, error) { 284 return fstatTo(fd.hostFD) 285 } 286 287 // SetStat implements lisafs.ControlFDImpl.SetStat. 288 func (fd *controlFDLisa) SetStat(stat lisafs.SetStatReq) (failureMask uint32, failureErr error) { 289 if stat.Mask&unix.STATX_MODE != 0 { 290 if fd.IsSocket() { 291 // fchmod(2) on socket files created via bind(2) fails. We need to 292 // fchmodat(2) it from its parent. 293 parent, sockName, err := fd.getParentFD() 294 if err == nil { 295 // Note that AT_SYMLINK_NOFOLLOW flag is not currently supported. 296 err = unix.Fchmodat(parent, sockName, stat.Mode&^unix.S_IFMT, 0 /* flags */) 297 unix.Close(parent) 298 } 299 if err != nil { 300 log.Warningf("SetStat fchmod failed on socket %q, err: %v", fd.Node().FilePath(), err) 301 failureMask |= unix.STATX_MODE 302 failureErr = err 303 } 304 } else { 305 if err := unix.Fchmod(fd.hostFD, stat.Mode&^unix.S_IFMT); err != nil { 306 log.Warningf("SetStat fchmod failed %q, err: %v", fd.Node().FilePath(), err) 307 failureMask |= unix.STATX_MODE 308 failureErr = err 309 } 310 } 311 } 312 313 if stat.Mask&unix.STATX_SIZE != 0 { 314 // ftruncate(2) requires the FD to be open for writing. 315 writableFD, err := fd.getWritableFD() 316 if err == nil { 317 err = unix.Ftruncate(writableFD, int64(stat.Size)) 318 } 319 if err != nil { 320 log.Warningf("SetStat ftruncate failed %q, err: %v", fd.Node().FilePath(), err) 321 failureMask |= unix.STATX_SIZE 322 failureErr = err 323 } 324 } 325 326 if stat.Mask&(unix.STATX_ATIME|unix.STATX_MTIME) != 0 { 327 utimes := [2]unix.Timespec{ 328 {Sec: 0, Nsec: unix.UTIME_OMIT}, 329 {Sec: 0, Nsec: unix.UTIME_OMIT}, 330 } 331 if stat.Mask&unix.STATX_ATIME != 0 { 332 utimes[0].Sec = stat.Atime.Sec 333 utimes[0].Nsec = stat.Atime.Nsec 334 } 335 if stat.Mask&unix.STATX_MTIME != 0 { 336 utimes[1].Sec = stat.Mtime.Sec 337 utimes[1].Nsec = stat.Mtime.Nsec 338 } 339 340 if fd.IsSymlink() { 341 // utimensat operates different that other syscalls. To operate on a 342 // symlink it *requires* AT_SYMLINK_NOFOLLOW with dirFD and a non-empty 343 // name. We need the parent FD. 344 parent, symlinkName, err := fd.getParentFD() 345 if err == nil { 346 err = fsutil.Utimensat(parent, symlinkName, utimes, unix.AT_SYMLINK_NOFOLLOW) 347 unix.Close(parent) 348 } 349 if err != nil { 350 failureMask |= (stat.Mask & (unix.STATX_ATIME | unix.STATX_MTIME)) 351 failureErr = err 352 } 353 } else { 354 hostFD := fd.hostFD 355 if fd.IsRegular() { 356 // For regular files, utimensat(2) requires the FD to be open for 357 // writing, see BUGS section. 358 if writableFD, err := fd.getWritableFD(); err == nil { 359 hostFD = writableFD 360 } else { 361 log.Warningf("SetStat getWritableFD failed %q, err: %v", fd.Node().FilePath(), err) 362 } 363 } 364 // Directories and regular files can operate directly on the fd 365 // using empty name. 366 err := fsutil.Utimensat(hostFD, "", utimes, 0) 367 if err != nil { 368 log.Warningf("SetStat utimens failed %q, err: %v", fd.Node().FilePath(), err) 369 failureMask |= (stat.Mask & (unix.STATX_ATIME | unix.STATX_MTIME)) 370 failureErr = err 371 } 372 } 373 } 374 375 if stat.Mask&(unix.STATX_UID|unix.STATX_GID) != 0 { 376 // "If the owner or group is specified as -1, then that ID is not changed" 377 // - chown(2) 378 uid := -1 379 if stat.Mask&unix.STATX_UID != 0 { 380 uid = int(stat.UID) 381 } 382 gid := -1 383 if stat.Mask&unix.STATX_GID != 0 { 384 gid = int(stat.GID) 385 } 386 if err := unix.Fchownat(fd.hostFD, "", uid, gid, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 387 log.Warningf("SetStat fchown failed %q, err: %v", fd.Node().FilePath(), err) 388 failureMask |= stat.Mask & (unix.STATX_UID | unix.STATX_GID) 389 failureErr = err 390 } 391 } 392 393 return 394 } 395 396 // Walk implements lisafs.ControlFDImpl.Walk. 397 func (fd *controlFDLisa) Walk(name string) (*lisafs.ControlFD, linux.Statx, error) { 398 childHostFD, err := tryOpen(func(flags int) (int, error) { 399 return unix.Openat(fd.hostFD, name, flags, 0) 400 }) 401 if err != nil { 402 return nil, linux.Statx{}, err 403 } 404 405 stat, err := fstatTo(childHostFD) 406 if err != nil { 407 _ = unix.Close(childHostFD) 408 return nil, linux.Statx{}, err 409 } 410 411 if err := checkSupportedFileType(uint32(stat.Mode)); err != nil { 412 _ = unix.Close(childHostFD) 413 log.Warningf("Walk: checkSupportedFileType() failed for %q with mode %o: %v", name, stat.Mode, err) 414 return nil, linux.Statx{}, err 415 } 416 417 return newControlFDLisa(childHostFD, fd, name, linux.FileMode(stat.Mode)).FD(), stat, nil 418 } 419 420 // WalkStat implements lisafs.ControlFDImpl.WalkStat. 421 func (fd *controlFDLisa) WalkStat(path lisafs.StringArray, recordStat func(linux.Statx)) error { 422 // Note that while performing the walk below, we do not have read concurrency 423 // guarantee for any descendants. So files can be created/deleted inside fd 424 // while the walk is being performed. However, this should be fine from a 425 // security perspective as we are using host FDs to walk and checking that 426 // each opened path component is not a symlink. 427 curDirFD := fd.hostFD 428 closeCurDirFD := func() { 429 if curDirFD != fd.hostFD { 430 unix.Close(curDirFD) 431 } 432 } 433 defer closeCurDirFD() 434 if len(path) > 0 && len(path[0]) == 0 { 435 // Write stat results for dirFD if the first path component is "". 436 stat, err := fstatTo(fd.hostFD) 437 if err != nil { 438 return err 439 } 440 recordStat(stat) 441 path = path[1:] 442 } 443 444 // Don't attempt walking if parent is a symlink. 445 if fd.IsSymlink() { 446 return nil 447 } 448 for _, name := range path { 449 curFD, err := unix.Openat(curDirFD, name, unix.O_PATH|openFlags, 0) 450 if err == unix.ENOENT { 451 // No more path components exist on the filesystem. Return the partial 452 // walk to the client. 453 break 454 } 455 if err != nil { 456 return err 457 } 458 closeCurDirFD() 459 curDirFD = curFD 460 461 stat, err := fstatTo(curFD) 462 if err != nil { 463 return err 464 } 465 if err := checkSupportedFileType(uint32(stat.Mode)); err != nil { 466 log.Warningf("WalkStat: checkSupportedFileType() failed for file %q with mode %o while walking path %+v: %v", name, stat.Mode, path, err) 467 return err 468 } 469 recordStat(stat) 470 471 // Symlinks terminate walk. This client gets the symlink stat result, but 472 // will have to invoke Walk again with the resolved path. 473 if stat.Mode&unix.S_IFMT == unix.S_IFLNK { 474 break 475 } 476 } 477 478 return nil 479 } 480 481 // Open implements lisafs.ControlFDImpl.Open. 482 func (fd *controlFDLisa) Open(flags uint32) (*lisafs.OpenFD, int, error) { 483 ftype := fd.FileType() 484 server := fd.Conn().ServerImpl().(*LisafsServer) 485 switch ftype { 486 case unix.S_IFIFO: 487 if !server.config.HostFifo.AllowOpen() { 488 return nil, -1, unix.EPERM 489 } 490 case unix.S_IFSOCK: 491 if !server.config.HostUDS.AllowOpen() { 492 return nil, -1, unix.EPERM 493 } 494 } 495 flags |= openFlags 496 openHostFD, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(fd.hostFD), int(flags)&^unix.O_NOFOLLOW, 0) 497 if err != nil { 498 return nil, -1, err 499 } 500 501 hostFDToDonate := -1 502 switch { 503 case ftype == unix.S_IFREG: 504 // Best effort to donate file to the Sentry (for performance only). 505 hostFDToDonate, _ = unix.Dup(openHostFD) 506 507 case ftype == unix.S_IFIFO, 508 ftype == unix.S_IFCHR, 509 fd.isMountPoint && fd.Conn().ServerImpl().(*LisafsServer).config.DonateMountPointFD: 510 // Character devices and pipes can block indefinitely during reads/writes, 511 // which is not allowed for gofer operations. Ensure that it donates an FD 512 // back to the caller, so it can wait on the FD when reads/writes return 513 // EWOULDBLOCK. For mount points, if DonateMountPointFD option is set, an 514 // FD must be donated. 515 var err error 516 hostFDToDonate, err = unix.Dup(openHostFD) 517 if err != nil { 518 return nil, 0, err 519 } 520 } 521 522 openFD := fd.newOpenFDLisa(openHostFD, flags) 523 return openFD.FD(), hostFDToDonate, nil 524 } 525 526 // OpenCreate implements lisafs.ControlFDImpl.OpenCreate. 527 func (fd *controlFDLisa) OpenCreate(mode linux.FileMode, uid lisafs.UID, gid lisafs.GID, name string, flags uint32) (*lisafs.ControlFD, linux.Statx, *lisafs.OpenFD, int, error) { 528 createFlags := unix.O_CREAT | unix.O_EXCL | unix.O_RDONLY | unix.O_NONBLOCK | openFlags 529 childHostFD, err := unix.Openat(fd.hostFD, name, createFlags, uint32(mode&^linux.FileTypeMask)) 530 if err != nil { 531 return nil, linux.Statx{}, nil, -1, err 532 } 533 534 cu := cleanup.Make(func() { 535 // Best effort attempt to remove the file in case of failure. 536 if err := unix.Unlinkat(fd.hostFD, name, 0); err != nil { 537 log.Warningf("error unlinking file %q after failure: %v", path.Join(fd.Node().FilePath(), name), err) 538 } 539 unix.Close(childHostFD) 540 }) 541 defer cu.Clean() 542 543 // Set the owners as requested by the client. 544 if err := unix.Fchownat(childHostFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 545 return nil, linux.Statx{}, nil, -1, err 546 } 547 548 // Get stat results. 549 childStat, err := fstatTo(childHostFD) 550 if err != nil { 551 return nil, linux.Statx{}, nil, -1, err 552 } 553 554 // Now open an FD to the newly created file with the flags requested by the client. 555 flags |= openFlags 556 newHostFD, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(childHostFD), int(flags)&^unix.O_NOFOLLOW, 0) 557 if err != nil { 558 return nil, linux.Statx{}, nil, -1, err 559 } 560 cu.Release() 561 562 childFD := newControlFDLisa(childHostFD, fd, name, linux.ModeRegular) 563 newFD := childFD.newOpenFDLisa(newHostFD, uint32(flags)) 564 565 // Donate FD because open(O_CREAT|O_EXCL) always creates a regular file. 566 // Since FD donation is a destructive operation, we should duplicate the 567 // to-be-donated FD. Eat the error if one occurs, it is better to have an FD 568 // without a host FD, than failing the Open attempt. 569 hostOpenFD := -1 570 if dupFD, err := unix.Dup(newFD.hostFD); err == nil { 571 hostOpenFD = dupFD 572 } 573 574 return childFD.FD(), childStat, newFD.FD(), hostOpenFD, nil 575 } 576 577 // Mkdir implements lisafs.ControlFDImpl.Mkdir. 578 func (fd *controlFDLisa) Mkdir(mode linux.FileMode, uid lisafs.UID, gid lisafs.GID, name string) (*lisafs.ControlFD, linux.Statx, error) { 579 if err := unix.Mkdirat(fd.hostFD, name, uint32(mode&^linux.FileTypeMask)); err != nil { 580 return nil, linux.Statx{}, err 581 } 582 cu := cleanup.Make(func() { 583 // Best effort attempt to remove the dir in case of failure. 584 if err := unix.Unlinkat(fd.hostFD, name, unix.AT_REMOVEDIR); err != nil { 585 log.Warningf("error unlinking dir %q after failure: %v", path.Join(fd.Node().FilePath(), name), err) 586 } 587 }) 588 defer cu.Clean() 589 590 // Open directory to change ownership. 591 childDirFd, err := tryOpen(func(flags int) (int, error) { 592 return unix.Openat(fd.hostFD, name, flags|unix.O_DIRECTORY, 0) 593 }) 594 if err != nil { 595 return nil, linux.Statx{}, err 596 } 597 if err := unix.Fchownat(childDirFd, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 598 unix.Close(childDirFd) 599 return nil, linux.Statx{}, err 600 } 601 602 // Get stat results. 603 childDirStat, err := fstatTo(childDirFd) 604 if err != nil { 605 unix.Close(childDirFd) 606 return nil, linux.Statx{}, err 607 } 608 609 cu.Release() 610 return newControlFDLisa(childDirFd, fd, name, linux.ModeDirectory).FD(), childDirStat, nil 611 } 612 613 // Mknod implements lisafs.ControlFDImpl.Mknod. 614 func (fd *controlFDLisa) Mknod(mode linux.FileMode, uid lisafs.UID, gid lisafs.GID, name string, minor uint32, major uint32) (*lisafs.ControlFD, linux.Statx, error) { 615 // From mknod(2) man page: 616 // "EPERM: [...] if the filesystem containing pathname does not support 617 // the type of node requested." 618 if mode.FileType() != linux.ModeRegular { 619 return nil, linux.Statx{}, unix.EPERM 620 } 621 622 if err := unix.Mknodat(fd.hostFD, name, uint32(mode), 0); err != nil { 623 return nil, linux.Statx{}, err 624 } 625 cu := cleanup.Make(func() { 626 // Best effort attempt to remove the file in case of failure. 627 if err := unix.Unlinkat(fd.hostFD, name, 0); err != nil { 628 log.Warningf("error unlinking file %q after failure: %v", path.Join(fd.Node().FilePath(), name), err) 629 } 630 }) 631 defer cu.Clean() 632 633 // Open file to change ownership. 634 childFD, err := tryOpen(func(flags int) (int, error) { 635 return unix.Openat(fd.hostFD, name, flags, 0) 636 }) 637 if err != nil { 638 return nil, linux.Statx{}, err 639 } 640 if err := unix.Fchownat(childFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 641 unix.Close(childFD) 642 return nil, linux.Statx{}, err 643 } 644 645 // Get stat results. 646 childStat, err := fstatTo(childFD) 647 if err != nil { 648 unix.Close(childFD) 649 return nil, linux.Statx{}, err 650 } 651 cu.Release() 652 653 return newControlFDLisa(childFD, fd, name, mode).FD(), childStat, nil 654 } 655 656 // Symlink implements lisafs.ControlFDImpl.Symlink. 657 func (fd *controlFDLisa) Symlink(name string, target string, uid lisafs.UID, gid lisafs.GID) (*lisafs.ControlFD, linux.Statx, error) { 658 if err := unix.Symlinkat(target, fd.hostFD, name); err != nil { 659 return nil, linux.Statx{}, err 660 } 661 cu := cleanup.Make(func() { 662 // Best effort attempt to remove the symlink in case of failure. 663 if err := unix.Unlinkat(fd.hostFD, name, 0); err != nil { 664 log.Warningf("error unlinking file %q after failure: %v", path.Join(fd.Node().FilePath(), name), err) 665 } 666 }) 667 defer cu.Clean() 668 669 // Open symlink to change ownership. 670 symlinkFD, err := unix.Openat(fd.hostFD, name, unix.O_PATH|openFlags, 0) 671 if err != nil { 672 return nil, linux.Statx{}, err 673 } 674 if err := unix.Fchownat(symlinkFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 675 unix.Close(symlinkFD) 676 return nil, linux.Statx{}, err 677 } 678 679 symlinkStat, err := fstatTo(symlinkFD) 680 if err != nil { 681 unix.Close(symlinkFD) 682 return nil, linux.Statx{}, err 683 } 684 cu.Release() 685 return newControlFDLisa(symlinkFD, fd, name, linux.ModeSymlink).FD(), symlinkStat, nil 686 } 687 688 // Link implements lisafs.ControlFDImpl.Link. 689 func (fd *controlFDLisa) Link(dir lisafs.ControlFDImpl, name string) (*lisafs.ControlFD, linux.Statx, error) { 690 // Using linkat(targetFD, "", newdirfd, name, AT_EMPTY_PATH) requires 691 // CAP_DAC_READ_SEARCH in the *root* userns. The gofer process has 692 // CAP_DAC_READ_SEARCH in its own userns. But sometimes the gofer may be 693 // running in a different userns. So we can't use AT_EMPTY_PATH. Fallback 694 // to using olddirfd to call linkat(2). 695 oldDirFD, oldName, err := fd.getParentFD() 696 if err != nil { 697 return nil, linux.Statx{}, err 698 } 699 dirFD := dir.(*controlFDLisa) 700 if err := unix.Linkat(oldDirFD, oldName, dirFD.hostFD, name, 0); err != nil { 701 return nil, linux.Statx{}, err 702 } 703 cu := cleanup.Make(func() { 704 // Best effort attempt to remove the hard link in case of failure. 705 if err := unix.Unlinkat(dirFD.hostFD, name, 0); err != nil { 706 log.Warningf("error unlinking file %q after failure: %v", path.Join(dirFD.Node().FilePath(), name), err) 707 } 708 }) 709 defer cu.Clean() 710 711 linkFD, err := tryOpen(func(flags int) (int, error) { 712 return unix.Openat(dirFD.hostFD, name, flags, 0) 713 }) 714 if err != nil { 715 return nil, linux.Statx{}, err 716 } 717 718 linkStat, err := fstatTo(linkFD) 719 if err != nil { 720 return nil, linux.Statx{}, err 721 } 722 cu.Release() 723 return newControlFDLisa(linkFD, dirFD, name, linux.FileMode(linkStat.Mode)).FD(), linkStat, nil 724 } 725 726 // StatFS implements lisafs.ControlFDImpl.StatFS. 727 func (fd *controlFDLisa) StatFS() (lisafs.StatFS, error) { 728 var s unix.Statfs_t 729 if err := unix.Fstatfs(fd.hostFD, &s); err != nil { 730 return lisafs.StatFS{}, err 731 } 732 733 return lisafs.StatFS{ 734 Type: uint64(s.Type), 735 BlockSize: s.Bsize, 736 Blocks: s.Blocks, 737 BlocksFree: s.Bfree, 738 BlocksAvailable: s.Bavail, 739 Files: s.Files, 740 FilesFree: s.Ffree, 741 NameLength: uint64(s.Namelen), 742 }, nil 743 } 744 745 // Readlink implements lisafs.ControlFDImpl.Readlink. 746 func (fd *controlFDLisa) Readlink(getLinkBuf func(uint32) []byte) (uint16, error) { 747 // This is similar to what os.Readlink does. 748 for linkLen := 128; linkLen < math.MaxUint16; linkLen *= 2 { 749 b := getLinkBuf(uint32(linkLen)) 750 n, err := unix.Readlinkat(fd.hostFD, "", b) 751 if err != nil { 752 return 0, err 753 } 754 if n < int(linkLen) { 755 return uint16(n), nil 756 } 757 } 758 return 0, unix.ENOMEM 759 } 760 761 func isSockTypeSupported(sockType uint32) bool { 762 switch sockType { 763 case unix.SOCK_STREAM, unix.SOCK_DGRAM, unix.SOCK_SEQPACKET: 764 return true 765 default: 766 log.Debugf("socket type %d is not supported", sockType) 767 return false 768 } 769 } 770 771 // Connect implements lisafs.ControlFDImpl.Connect. 772 func (fd *controlFDLisa) Connect(sockType uint32) (int, error) { 773 if !fd.Conn().ServerImpl().(*LisafsServer).config.HostUDS.AllowOpen() { 774 return -1, unix.EPERM 775 } 776 777 // TODO(gvisor.dev/issue/1003): Due to different app vs replacement 778 // mappings, the app path may have fit in the sockaddr, but we can't fit 779 // hostPath in our sockaddr. We'd need to redirect through a shorter path 780 // in order to actually connect to this socket. 781 hostPath := fd.Node().FilePath() 782 if len(hostPath) >= linux.UnixPathMax { 783 return -1, unix.EINVAL 784 } 785 786 if !isSockTypeSupported(sockType) { 787 return -1, unix.ENXIO 788 } 789 790 sock, err := unix.Socket(unix.AF_UNIX, int(sockType), 0) 791 if err != nil { 792 return -1, err 793 } 794 795 sa := unix.SockaddrUnix{Name: hostPath} 796 if err := unix.Connect(sock, &sa); err != nil { 797 unix.Close(sock) 798 return -1, err 799 } 800 return sock, nil 801 } 802 803 // BindAt implements lisafs.ControlFDImpl.BindAt. 804 func (fd *controlFDLisa) BindAt(name string, sockType uint32, mode linux.FileMode, uid lisafs.UID, gid lisafs.GID) (*lisafs.ControlFD, linux.Statx, *lisafs.BoundSocketFD, int, error) { 805 if !fd.Conn().ServerImpl().(*LisafsServer).config.HostUDS.AllowCreate() { 806 return nil, linux.Statx{}, nil, -1, unix.EPERM 807 } 808 809 // Because there is no "bindat" syscall in Linux, we must create an 810 // absolute path to the socket we are creating, 811 socketPath := filepath.Join(fd.Node().FilePath(), name) 812 813 // TODO(gvisor.dev/issue/1003): Due to different app vs replacement 814 // mappings, the app path may have fit in the sockaddr, but we can't fit 815 // hostPath in our sockaddr. We'd need to redirect through a shorter path 816 // in order to actually connect to this socket. 817 if len(socketPath) >= linux.UnixPathMax { 818 log.Warningf("BindAt called with name too long: %q (len=%d)", socketPath, len(socketPath)) 819 return nil, linux.Statx{}, nil, -1, unix.EINVAL 820 } 821 822 // Only the following types are supported. 823 if !isSockTypeSupported(sockType) { 824 return nil, linux.Statx{}, nil, -1, unix.ENXIO 825 } 826 827 // Create and bind the socket using the sockPath which may be a 828 // symlink. 829 sockFD, err := unix.Socket(unix.AF_UNIX, int(sockType), 0) 830 if err != nil { 831 return nil, linux.Statx{}, nil, -1, err 832 } 833 cu := cleanup.Make(func() { 834 _ = unix.Close(sockFD) 835 }) 836 defer cu.Clean() 837 838 // fchmod(2) has to happen *before* the bind(2). sockFD's file mode will 839 // be used in creating the filesystem-object in bind(2). 840 if err := unix.Fchmod(sockFD, uint32(mode&^linux.FileTypeMask)); err != nil { 841 return nil, linux.Statx{}, nil, -1, err 842 } 843 844 if err := unix.Bind(sockFD, &unix.SockaddrUnix{Name: socketPath}); err != nil { 845 return nil, linux.Statx{}, nil, -1, err 846 } 847 cu.Add(func() { 848 _ = unix.Unlink(socketPath) 849 }) 850 851 sockFileFD, err := tryOpen(func(flags int) (int, error) { 852 return unix.Openat(fd.hostFD, name, flags, 0) 853 }) 854 if err != nil { 855 return nil, linux.Statx{}, nil, -1, err 856 } 857 cu.Add(func() { 858 _ = unix.Close(sockFileFD) 859 }) 860 861 if err := unix.Fchownat(sockFileFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 862 return nil, linux.Statx{}, nil, -1, err 863 } 864 865 // Stat the socket. 866 sockStat, err := fstatTo(sockFileFD) 867 if err != nil { 868 return nil, linux.Statx{}, nil, -1, err 869 } 870 871 // Create an FD that will be donated to the sandbox. 872 sockFDToDonate, err := unix.Dup(sockFD) 873 if err != nil { 874 return nil, linux.Statx{}, nil, -1, err 875 } 876 cu.Release() 877 878 socketControlFD := newControlFDLisa(sockFD, fd, name, linux.ModeSocket) 879 boundSocketFD := &boundSocketFDLisa{ 880 sock: os.NewFile(uintptr(sockFD), socketPath), 881 } 882 boundSocketFD.Init(socketControlFD.FD(), boundSocketFD) 883 884 return socketControlFD.FD(), sockStat, boundSocketFD.FD(), sockFDToDonate, nil 885 } 886 887 // Unlink implements lisafs.ControlFDImpl.Unlink. 888 func (fd *controlFDLisa) Unlink(name string, flags uint32) error { 889 return unix.Unlinkat(fd.hostFD, name, int(flags)) 890 } 891 892 // RenameAt implements lisafs.ControlFDImpl.RenameAt. 893 func (fd *controlFDLisa) RenameAt(oldName string, newDir lisafs.ControlFDImpl, newName string) error { 894 return fsutil.RenameAt(fd.hostFD, oldName, newDir.(*controlFDLisa).hostFD, newName) 895 } 896 897 // Renamed implements lisafs.ControlFDImpl.Renamed. 898 func (fd *controlFDLisa) Renamed() { 899 // controlFDLisa does not have any state to update on rename. 900 } 901 902 // GetXattr implements lisafs.ControlFDImpl.GetXattr. 903 func (fd *controlFDLisa) GetXattr(name string, size uint32, getValueBuf func(uint32) []byte) (uint16, error) { 904 data := getValueBuf(size) 905 xattrSize, err := unix.Fgetxattr(fd.hostFD, name, data) 906 return uint16(xattrSize), err 907 } 908 909 // SetXattr implements lisafs.ControlFDImpl.SetXattr. 910 func (fd *controlFDLisa) SetXattr(name string, value string, flags uint32) error { 911 return unix.EOPNOTSUPP 912 } 913 914 // ListXattr implements lisafs.ControlFDImpl.ListXattr. 915 func (fd *controlFDLisa) ListXattr(size uint64) (lisafs.StringArray, error) { 916 return nil, unix.EOPNOTSUPP 917 } 918 919 // RemoveXattr implements lisafs.ControlFDImpl.RemoveXattr. 920 func (fd *controlFDLisa) RemoveXattr(name string) error { 921 return unix.EOPNOTSUPP 922 } 923 924 // openFDLisa implements lisafs.OpenFDImpl. 925 type openFDLisa struct { 926 lisafs.OpenFD 927 928 // hostFD is the host file descriptor which can be used to make syscalls. 929 hostFD int 930 } 931 932 var _ lisafs.OpenFDImpl = (*openFDLisa)(nil) 933 934 func (fd *controlFDLisa) newOpenFDLisa(hostFD int, flags uint32) *openFDLisa { 935 newFD := &openFDLisa{ 936 hostFD: hostFD, 937 } 938 newFD.OpenFD.Init(fd.FD(), flags, newFD) 939 return newFD 940 } 941 942 // FD implements lisafs.OpenFDImpl.FD. 943 func (fd *openFDLisa) FD() *lisafs.OpenFD { 944 if fd == nil { 945 return nil 946 } 947 return &fd.OpenFD 948 } 949 950 // Close implements lisafs.OpenFDImpl.Close. 951 func (fd *openFDLisa) Close() { 952 if fd.hostFD >= 0 { 953 _ = unix.Close(fd.hostFD) 954 fd.hostFD = -1 955 } 956 } 957 958 // Stat implements lisafs.OpenFDImpl.Stat. 959 func (fd *openFDLisa) Stat() (linux.Statx, error) { 960 return fstatTo(fd.hostFD) 961 } 962 963 // Sync implements lisafs.OpenFDImpl.Sync. 964 func (fd *openFDLisa) Sync() error { 965 return unix.Fsync(fd.hostFD) 966 } 967 968 // Write implements lisafs.OpenFDImpl.Write. 969 func (fd *openFDLisa) Write(buf []byte, off uint64) (uint64, error) { 970 rw := rwfd.NewReadWriter(fd.hostFD) 971 n, err := rw.WriteAt(buf, int64(off)) 972 return uint64(n), err 973 } 974 975 // Read implements lisafs.OpenFDImpl.Read. 976 func (fd *openFDLisa) Read(buf []byte, off uint64) (uint64, error) { 977 rw := rwfd.NewReadWriter(fd.hostFD) 978 n, err := rw.ReadAt(buf, int64(off)) 979 if err != nil && err != io.EOF { 980 return 0, err 981 } 982 return uint64(n), nil 983 } 984 985 // Allocate implements lisafs.OpenFDImpl.Allocate. 986 func (fd *openFDLisa) Allocate(mode, off, length uint64) error { 987 return unix.Fallocate(fd.hostFD, uint32(mode), int64(off), int64(length)) 988 } 989 990 // Flush implements lisafs.OpenFDImpl.Flush. 991 func (fd *openFDLisa) Flush() error { 992 return nil 993 } 994 995 // Getdent64 implements lisafs.OpenFDImpl.Getdent64. 996 func (fd *openFDLisa) Getdent64(count uint32, seek0 bool, recordDirent func(lisafs.Dirent64)) error { 997 if seek0 { 998 if _, err := unix.Seek(fd.hostFD, 0, 0); err != nil { 999 return err 1000 } 1001 } 1002 1003 var direntsBuf [8192]byte 1004 var bytesRead int 1005 for bytesRead < int(count) { 1006 bufEnd := len(direntsBuf) 1007 if remaining := int(count) - bytesRead; remaining < bufEnd { 1008 bufEnd = remaining 1009 } 1010 n, err := unix.Getdents(fd.hostFD, direntsBuf[:bufEnd]) 1011 if err != nil { 1012 if err == unix.EINVAL && bufEnd < fsutil.UnixDirentMaxSize { 1013 // getdents64(2) returns EINVAL is returned when the result 1014 // buffer is too small. If bufEnd is smaller than the max 1015 // size of unix.Dirent, then just break here to return all 1016 // dirents collected till now. 1017 break 1018 } 1019 return err 1020 } 1021 if n <= 0 { 1022 break 1023 } 1024 1025 fsutil.ParseDirents(direntsBuf[:n], func(ino uint64, off int64, ftype uint8, name string, reclen uint16) { 1026 dirent := lisafs.Dirent64{ 1027 Ino: primitive.Uint64(ino), 1028 Off: primitive.Uint64(off), 1029 Type: primitive.Uint8(ftype), 1030 Name: lisafs.SizedString(name), 1031 } 1032 1033 // The client also wants the device ID, which annoyingly incurs an 1034 // additional syscall per dirent. 1035 // TODO(gvisor.dev/issue/6665): Get rid of per-dirent stat. 1036 stat, err := fsutil.StatAt(fd.hostFD, name) 1037 if err != nil { 1038 log.Warningf("Getdent64: skipping file %q with failed stat, err: %v", path.Join(fd.ControlFD().FD().Node().FilePath(), name), err) 1039 return 1040 } 1041 dirent.DevMinor = primitive.Uint32(unix.Minor(stat.Dev)) 1042 dirent.DevMajor = primitive.Uint32(unix.Major(stat.Dev)) 1043 recordDirent(dirent) 1044 bytesRead += int(reclen) 1045 }) 1046 } 1047 return nil 1048 } 1049 1050 // Renamed implements lisafs.OpenFDImpl.Renamed. 1051 func (fd *openFDLisa) Renamed() { 1052 // openFDLisa does not have any state to update on rename. 1053 } 1054 1055 type boundSocketFDLisa struct { 1056 lisafs.BoundSocketFD 1057 1058 sock *os.File 1059 } 1060 1061 var _ lisafs.BoundSocketFDImpl = (*boundSocketFDLisa)(nil) 1062 1063 // Close implements lisafs.BoundSocketFD.Close. 1064 func (fd *boundSocketFDLisa) Close() { 1065 fd.sock.Close() 1066 } 1067 1068 // FD implements lisafs.BoundSocketFD.FD. 1069 func (fd *boundSocketFDLisa) FD() *lisafs.BoundSocketFD { 1070 if fd == nil { 1071 return nil 1072 } 1073 return &fd.BoundSocketFD 1074 } 1075 1076 // Listen implements lisafs.BoundSocketFD.Listen. 1077 func (fd *boundSocketFDLisa) Listen(backlog int32) error { 1078 return unix.Listen(int(fd.sock.Fd()), int(backlog)) 1079 } 1080 1081 // Listen implements lisafs.BoundSocketFD.Accept. 1082 func (fd *boundSocketFDLisa) Accept() (int, string, error) { 1083 flags := unix.O_NONBLOCK | unix.O_CLOEXEC 1084 nfd, _, err := unix.Accept4(int(fd.sock.Fd()), flags) 1085 if err != nil { 1086 return -1, "", err 1087 } 1088 // Return an empty peer address so that we don't leak the actual host 1089 // address. 1090 return nfd, "", err 1091 } 1092 1093 // tryOpen tries to open() with different modes as documented. 1094 func tryOpen(open func(int) (int, error)) (hostFD int, err error) { 1095 // Attempt to open file in the following in order: 1096 // 1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs. 1097 // Use non-blocking to prevent getting stuck inside open(2) for 1098 // FIFOs. This option has no effect on regular files. 1099 // 2. PATH: for symlinks, sockets. 1100 flags := []int{ 1101 unix.O_RDONLY | unix.O_NONBLOCK, 1102 unix.O_PATH, 1103 } 1104 1105 for _, flag := range flags { 1106 hostFD, err = open(flag | openFlags) 1107 if err == nil { 1108 return 1109 } 1110 1111 if e := extractErrno(err); e == unix.ENOENT { 1112 // File doesn't exist, no point in retrying. 1113 return -1, e 1114 } 1115 } 1116 return 1117 } 1118 1119 func fstatTo(hostFD int) (linux.Statx, error) { 1120 var stat unix.Stat_t 1121 if err := unix.Fstat(hostFD, &stat); err != nil { 1122 return linux.Statx{}, err 1123 } 1124 1125 return linux.Statx{ 1126 Mask: unix.STATX_TYPE | unix.STATX_MODE | unix.STATX_INO | unix.STATX_NLINK | unix.STATX_UID | unix.STATX_GID | unix.STATX_SIZE | unix.STATX_BLOCKS | unix.STATX_ATIME | unix.STATX_MTIME | unix.STATX_CTIME, 1127 Mode: uint16(stat.Mode), 1128 DevMinor: unix.Minor(stat.Dev), 1129 DevMajor: unix.Major(stat.Dev), 1130 Ino: stat.Ino, 1131 Nlink: uint32(stat.Nlink), 1132 UID: stat.Uid, 1133 GID: stat.Gid, 1134 RdevMinor: unix.Minor(stat.Rdev), 1135 RdevMajor: unix.Major(stat.Rdev), 1136 Size: uint64(stat.Size), 1137 Blksize: uint32(stat.Blksize), 1138 Blocks: uint64(stat.Blocks), 1139 Atime: linux.StatxTimestamp{ 1140 Sec: stat.Atim.Sec, 1141 Nsec: uint32(stat.Atim.Nsec), 1142 }, 1143 Mtime: linux.StatxTimestamp{ 1144 Sec: stat.Mtim.Sec, 1145 Nsec: uint32(stat.Mtim.Nsec), 1146 }, 1147 Ctime: linux.StatxTimestamp{ 1148 Sec: stat.Ctim.Sec, 1149 Nsec: uint32(stat.Ctim.Nsec), 1150 }, 1151 }, nil 1152 } 1153 1154 func checkSupportedFileType(mode uint32) error { 1155 switch mode & unix.S_IFMT { 1156 case unix.S_IFREG, unix.S_IFDIR, unix.S_IFLNK, unix.S_IFCHR, unix.S_IFSOCK, unix.S_IFIFO: 1157 return nil 1158 1159 default: 1160 return unix.EPERM 1161 } 1162 } 1163 1164 // extractErrno tries to determine the errno. 1165 func extractErrno(err error) unix.Errno { 1166 if err == nil { 1167 // This should never happen. The likely result will be that 1168 // some user gets the frustrating "error: SUCCESS" message. 1169 log.Warningf("extractErrno called with nil error!") 1170 return 0 1171 } 1172 1173 switch err { 1174 case os.ErrNotExist: 1175 return unix.ENOENT 1176 case os.ErrExist: 1177 return unix.EEXIST 1178 case os.ErrPermission: 1179 return unix.EACCES 1180 case os.ErrInvalid: 1181 return unix.EINVAL 1182 } 1183 1184 // See if it's an errno or a common wrapped error. 1185 switch e := err.(type) { 1186 case unix.Errno: 1187 return e 1188 case *os.PathError: 1189 return extractErrno(e.Err) 1190 case *os.LinkError: 1191 return extractErrno(e.Err) 1192 case *os.SyscallError: 1193 return extractErrno(e.Err) 1194 } 1195 1196 // Fall back to EIO. 1197 log.Debugf("Unknown error: %v, defaulting to EIO", err) 1198 return unix.EIO 1199 } 1200 1201 // LINT.ThenChange(../../pkg/sentry/fsimpl/gofer/directfs_dentry.go)