github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/runsc/fsgofer/lisafs.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package fsgofer provides a lisafs server implementation which gives access 16 // to local files. 17 package fsgofer 18 19 import ( 20 "fmt" 21 "io" 22 "math" 23 "os" 24 "path" 25 "path/filepath" 26 "strconv" 27 28 "golang.org/x/sys/unix" 29 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 30 "github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops" 31 "github.com/nicocha30/gvisor-ligolo/pkg/cleanup" 32 rwfd "github.com/nicocha30/gvisor-ligolo/pkg/fd" 33 "github.com/nicocha30/gvisor-ligolo/pkg/fsutil" 34 "github.com/nicocha30/gvisor-ligolo/pkg/lisafs" 35 "github.com/nicocha30/gvisor-ligolo/pkg/log" 36 "github.com/nicocha30/gvisor-ligolo/pkg/marshal/primitive" 37 "github.com/nicocha30/gvisor-ligolo/runsc/config" 38 ) 39 40 // LINT.IfChange 41 42 const ( 43 openFlags = unix.O_NOFOLLOW | unix.O_CLOEXEC 44 45 // UNIX_PATH_MAX as defined in include/uapi/linux/un.h. 46 unixPathMax = 108 47 ) 48 49 // Config sets configuration options for each attach point. 50 type Config struct { 51 // ROMount is set to true if this is a readonly mount. 52 ROMount bool 53 54 // PanicOnWrite panics on attempts to write to RO mounts. 55 PanicOnWrite bool 56 57 // HostUDS signals whether the gofer can connect to host unix domain sockets. 58 HostUDS config.HostUDS 59 60 // HostFifo signals whether the gofer can connect to host FIFOs. 61 HostFifo config.HostFifo 62 63 // DonateMountPointFD indicates whether a host FD to the mount point should 64 // be donated to the client on Mount RPC. 65 DonateMountPointFD bool 66 } 67 68 var procSelfFD *rwfd.FD 69 70 // OpenProcSelfFD opens the /proc/self/fd directory, which will be used to 71 // reopen file descriptors. 72 func OpenProcSelfFD() error { 73 d, err := unix.Open("/proc/self/fd", unix.O_RDONLY|unix.O_DIRECTORY, 0) 74 if err != nil { 75 return fmt.Errorf("error opening /proc/self/fd: %v", err) 76 } 77 procSelfFD = rwfd.New(d) 78 return nil 79 } 80 81 // LisafsServer implements lisafs.ServerImpl for fsgofer. 82 type LisafsServer struct { 83 lisafs.Server 84 config Config 85 } 86 87 var _ lisafs.ServerImpl = (*LisafsServer)(nil) 88 89 // NewLisafsServer initializes a new lisafs server for fsgofer. 90 func NewLisafsServer(config Config) *LisafsServer { 91 s := &LisafsServer{config: config} 92 s.Server.Init(s, lisafs.ServerOpts{ 93 WalkStatSupported: true, 94 SetAttrOnDeleted: true, 95 AllocateOnDeleted: true, 96 }) 97 return s 98 } 99 100 // Mount implements lisafs.ServerImpl.Mount. 101 func (s *LisafsServer) Mount(c *lisafs.Connection, mountNode *lisafs.Node) (*lisafs.ControlFD, linux.Statx, int, error) { 102 mountPath := mountNode.FilePath() 103 rootHostFD, err := tryOpen(func(flags int) (int, error) { 104 return unix.Open(mountPath, flags, 0) 105 }) 106 if err != nil { 107 return nil, linux.Statx{}, -1, err 108 } 109 cu := cleanup.Make(func() { 110 _ = unix.Close(rootHostFD) 111 }) 112 defer cu.Clean() 113 114 stat, err := fstatTo(rootHostFD) 115 if err != nil { 116 return nil, linux.Statx{}, -1, err 117 } 118 119 if err := checkSupportedFileType(uint32(stat.Mode)); err != nil { 120 log.Warningf("Mount: checkSupportedFileType() failed for file %q with mode %o: %v", mountPath, stat.Mode, err) 121 return nil, linux.Statx{}, -1, err 122 } 123 124 clientHostFD := -1 125 if s.config.DonateMountPointFD { 126 clientHostFD, err = unix.Dup(rootHostFD) 127 if err != nil { 128 return nil, linux.Statx{}, -1, err 129 } 130 } 131 cu.Release() 132 133 rootFD := &controlFDLisa{ 134 hostFD: rootHostFD, 135 writableHostFD: atomicbitops.FromInt32(-1), 136 isMountPoint: true, 137 } 138 mountNode.IncRef() // Ref is transferred to ControlFD. 139 rootFD.ControlFD.Init(c, mountNode, linux.FileMode(stat.Mode), rootFD) 140 return rootFD.FD(), stat, clientHostFD, nil 141 } 142 143 // MaxMessageSize implements lisafs.ServerImpl.MaxMessageSize. 144 func (s *LisafsServer) MaxMessageSize() uint32 { 145 return lisafs.MaxMessageSize() 146 } 147 148 // SupportedMessages implements lisafs.ServerImpl.SupportedMessages. 149 func (s *LisafsServer) SupportedMessages() []lisafs.MID { 150 // Note that Flush, FListXattr and FRemoveXattr are not supported. 151 return []lisafs.MID{ 152 lisafs.Mount, 153 lisafs.Channel, 154 lisafs.FStat, 155 lisafs.SetStat, 156 lisafs.Walk, 157 lisafs.WalkStat, 158 lisafs.OpenAt, 159 lisafs.OpenCreateAt, 160 lisafs.Close, 161 lisafs.FSync, 162 lisafs.PWrite, 163 lisafs.PRead, 164 lisafs.MkdirAt, 165 lisafs.MknodAt, 166 lisafs.SymlinkAt, 167 lisafs.LinkAt, 168 lisafs.FStatFS, 169 lisafs.FAllocate, 170 lisafs.ReadLinkAt, 171 lisafs.Connect, 172 lisafs.UnlinkAt, 173 lisafs.RenameAt, 174 lisafs.Getdents64, 175 lisafs.FGetXattr, 176 lisafs.FSetXattr, 177 lisafs.BindAt, 178 lisafs.Listen, 179 lisafs.Accept, 180 } 181 } 182 183 // controlFDLisa implements lisafs.ControlFDImpl. 184 type controlFDLisa struct { 185 lisafs.ControlFD 186 187 // hostFD is the file descriptor which can be used to make host syscalls. 188 hostFD int 189 190 // writableHostFD is the file descriptor number for a writable FD opened on 191 // the same FD as `hostFD`. It is initialized to -1, and can change in value 192 // exactly once. 193 writableHostFD atomicbitops.Int32 194 195 // isMountpoint indicates whether this FD represents the mount point for its 196 // owning connection. isMountPoint is immutable. 197 isMountPoint bool 198 } 199 200 var _ lisafs.ControlFDImpl = (*controlFDLisa)(nil) 201 202 func newControlFDLisa(hostFD int, parent *controlFDLisa, name string, mode linux.FileMode) *controlFDLisa { 203 var ( 204 childFD *controlFDLisa 205 childNode *lisafs.Node 206 parentNode = parent.Node() 207 ) 208 parentNode.WithChildrenMu(func() { 209 childNode = parentNode.LookupChildLocked(name) 210 if childNode == nil { 211 // Common case. Performance hack which is used to allocate the node and 212 // its control FD together in the heap. For a well-behaving client, there 213 // will be a 1:1 mapping between control FD and node and their lifecycle 214 // will be similar too. This will help reduce allocations and memory 215 // fragmentation. This is more cache friendly too. 216 temp := struct { 217 node lisafs.Node 218 fd controlFDLisa 219 }{} 220 childFD = &temp.fd 221 childNode = &temp.node 222 childNode.InitLocked(name, parentNode) 223 } else { 224 childNode.IncRef() 225 childFD = &controlFDLisa{} 226 } 227 }) 228 childFD.hostFD = hostFD 229 childFD.writableHostFD = atomicbitops.FromInt32(-1) 230 childFD.ControlFD.Init(parent.Conn(), childNode, mode, childFD) 231 return childFD 232 } 233 234 func (fd *controlFDLisa) getWritableFD() (int, error) { 235 if writableFD := fd.writableHostFD.Load(); writableFD != -1 { 236 return int(writableFD), nil 237 } 238 239 writableFD, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(fd.hostFD), (unix.O_WRONLY|openFlags)&^unix.O_NOFOLLOW, 0) 240 if err != nil { 241 return -1, err 242 } 243 if !fd.writableHostFD.CompareAndSwap(-1, int32(writableFD)) { 244 // Race detected, use the new value and clean this up. 245 unix.Close(writableFD) 246 return int(fd.writableHostFD.Load()), nil 247 } 248 return writableFD, nil 249 } 250 251 func (fd *controlFDLisa) getParentFD() (int, string, error) { 252 filePath := fd.Node().FilePath() 253 if filePath == "/" { 254 log.Warningf("getParentFD() call on the root") 255 return -1, "", unix.EINVAL 256 } 257 parent, err := unix.Open(path.Dir(filePath), openFlags|unix.O_PATH, 0) 258 return parent, path.Base(filePath), err 259 } 260 261 // FD implements lisafs.ControlFDImpl.FD. 262 func (fd *controlFDLisa) FD() *lisafs.ControlFD { 263 if fd == nil { 264 return nil 265 } 266 return &fd.ControlFD 267 } 268 269 // Close implements lisafs.ControlFDImpl.Close. 270 func (fd *controlFDLisa) Close() { 271 if fd.hostFD >= 0 { 272 _ = unix.Close(fd.hostFD) 273 fd.hostFD = -1 274 } 275 // No concurrent access is possible so no need to use atomics. 276 if fd.writableHostFD.RacyLoad() >= 0 { 277 _ = unix.Close(int(fd.writableHostFD.RacyLoad())) 278 fd.writableHostFD = atomicbitops.FromInt32(-1) 279 } 280 } 281 282 // Stat implements lisafs.ControlFDImpl.Stat. 283 func (fd *controlFDLisa) Stat() (linux.Statx, error) { 284 return fstatTo(fd.hostFD) 285 } 286 287 // SetStat implements lisafs.ControlFDImpl.SetStat. 288 func (fd *controlFDLisa) SetStat(stat lisafs.SetStatReq) (failureMask uint32, failureErr error) { 289 if stat.Mask&unix.STATX_MODE != 0 { 290 if fd.IsSocket() { 291 // fchmod(2) on socket files created via bind(2) fails. We need to 292 // fchmodat(2) it from its parent. 293 parent, sockName, err := fd.getParentFD() 294 if err == nil { 295 // Note that AT_SYMLINK_NOFOLLOW flag is not currently supported. 296 err = unix.Fchmodat(parent, sockName, stat.Mode&^unix.S_IFMT, 0 /* flags */) 297 unix.Close(parent) 298 } 299 if err != nil { 300 log.Warningf("SetStat fchmod failed on socket %q, err: %v", fd.Node().FilePath(), err) 301 failureMask |= unix.STATX_MODE 302 failureErr = err 303 } 304 } else { 305 if err := unix.Fchmod(fd.hostFD, stat.Mode&^unix.S_IFMT); err != nil { 306 log.Warningf("SetStat fchmod failed %q, err: %v", fd.Node().FilePath(), err) 307 failureMask |= unix.STATX_MODE 308 failureErr = err 309 } 310 } 311 } 312 313 if stat.Mask&unix.STATX_SIZE != 0 { 314 // ftruncate(2) requires the FD to be open for writing. 315 writableFD, err := fd.getWritableFD() 316 if err == nil { 317 err = unix.Ftruncate(writableFD, int64(stat.Size)) 318 } 319 if err != nil { 320 log.Warningf("SetStat ftruncate failed %q, err: %v", fd.Node().FilePath(), err) 321 failureMask |= unix.STATX_SIZE 322 failureErr = err 323 } 324 } 325 326 if stat.Mask&(unix.STATX_ATIME|unix.STATX_MTIME) != 0 { 327 utimes := [2]unix.Timespec{ 328 {Sec: 0, Nsec: unix.UTIME_OMIT}, 329 {Sec: 0, Nsec: unix.UTIME_OMIT}, 330 } 331 if stat.Mask&unix.STATX_ATIME != 0 { 332 utimes[0].Sec = stat.Atime.Sec 333 utimes[0].Nsec = stat.Atime.Nsec 334 } 335 if stat.Mask&unix.STATX_MTIME != 0 { 336 utimes[1].Sec = stat.Mtime.Sec 337 utimes[1].Nsec = stat.Mtime.Nsec 338 } 339 340 if fd.IsSymlink() { 341 // utimensat operates different that other syscalls. To operate on a 342 // symlink it *requires* AT_SYMLINK_NOFOLLOW with dirFD and a non-empty 343 // name. We need the parent FD. 344 parent, symlinkName, err := fd.getParentFD() 345 if err == nil { 346 err = fsutil.Utimensat(parent, symlinkName, utimes, unix.AT_SYMLINK_NOFOLLOW) 347 unix.Close(parent) 348 } 349 if err != nil { 350 failureMask |= (stat.Mask & (unix.STATX_ATIME | unix.STATX_MTIME)) 351 failureErr = err 352 } 353 } else { 354 hostFD := fd.hostFD 355 if fd.IsRegular() { 356 // For regular files, utimensat(2) requires the FD to be open for 357 // writing, see BUGS section. 358 if writableFD, err := fd.getWritableFD(); err == nil { 359 hostFD = writableFD 360 } else { 361 log.Warningf("SetStat getWritableFD failed %q, err: %v", fd.Node().FilePath(), err) 362 } 363 } 364 // Directories and regular files can operate directly on the fd 365 // using empty name. 366 err := fsutil.Utimensat(hostFD, "", utimes, 0) 367 if err != nil { 368 log.Warningf("SetStat utimens failed %q, err: %v", fd.Node().FilePath(), err) 369 failureMask |= (stat.Mask & (unix.STATX_ATIME | unix.STATX_MTIME)) 370 failureErr = err 371 } 372 } 373 } 374 375 if stat.Mask&(unix.STATX_UID|unix.STATX_GID) != 0 { 376 // "If the owner or group is specified as -1, then that ID is not changed" 377 // - chown(2) 378 uid := -1 379 if stat.Mask&unix.STATX_UID != 0 { 380 uid = int(stat.UID) 381 } 382 gid := -1 383 if stat.Mask&unix.STATX_GID != 0 { 384 gid = int(stat.GID) 385 } 386 if err := unix.Fchownat(fd.hostFD, "", uid, gid, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 387 log.Warningf("SetStat fchown failed %q, err: %v", fd.Node().FilePath(), err) 388 failureMask |= stat.Mask & (unix.STATX_UID | unix.STATX_GID) 389 failureErr = err 390 } 391 } 392 393 return 394 } 395 396 // Walk implements lisafs.ControlFDImpl.Walk. 397 func (fd *controlFDLisa) Walk(name string) (*lisafs.ControlFD, linux.Statx, error) { 398 childHostFD, err := tryOpen(func(flags int) (int, error) { 399 return unix.Openat(fd.hostFD, name, flags, 0) 400 }) 401 if err != nil { 402 return nil, linux.Statx{}, err 403 } 404 405 stat, err := fstatTo(childHostFD) 406 if err != nil { 407 _ = unix.Close(childHostFD) 408 return nil, linux.Statx{}, err 409 } 410 411 if err := checkSupportedFileType(uint32(stat.Mode)); err != nil { 412 _ = unix.Close(childHostFD) 413 log.Warningf("Walk: checkSupportedFileType() failed for %q with mode %o: %v", name, stat.Mode, err) 414 return nil, linux.Statx{}, err 415 } 416 417 return newControlFDLisa(childHostFD, fd, name, linux.FileMode(stat.Mode)).FD(), stat, nil 418 } 419 420 // WalkStat implements lisafs.ControlFDImpl.WalkStat. 421 func (fd *controlFDLisa) WalkStat(path lisafs.StringArray, recordStat func(linux.Statx)) error { 422 // Note that while performing the walk below, we do not have read concurrency 423 // guarantee for any descendants. So files can be created/deleted inside fd 424 // while the walk is being performed. However, this should be fine from a 425 // security perspective as we are using host FDs to walk and checking that 426 // each opened path component is not a symlink. 427 curDirFD := fd.hostFD 428 closeCurDirFD := func() { 429 if curDirFD != fd.hostFD { 430 unix.Close(curDirFD) 431 } 432 } 433 defer closeCurDirFD() 434 if len(path) > 0 && len(path[0]) == 0 { 435 // Write stat results for dirFD if the first path component is "". 436 stat, err := fstatTo(fd.hostFD) 437 if err != nil { 438 return err 439 } 440 recordStat(stat) 441 path = path[1:] 442 } 443 444 // Don't attempt walking if parent is a symlink. 445 if fd.IsSymlink() { 446 return nil 447 } 448 for _, name := range path { 449 curFD, err := unix.Openat(curDirFD, name, unix.O_PATH|openFlags, 0) 450 if err == unix.ENOENT { 451 // No more path components exist on the filesystem. Return the partial 452 // walk to the client. 453 break 454 } 455 if err != nil { 456 return err 457 } 458 closeCurDirFD() 459 curDirFD = curFD 460 461 stat, err := fstatTo(curFD) 462 if err != nil { 463 return err 464 } 465 if err := checkSupportedFileType(uint32(stat.Mode)); err != nil { 466 log.Warningf("WalkStat: checkSupportedFileType() failed for file %q with mode %o while walking path %+v: %v", name, stat.Mode, path, err) 467 return err 468 } 469 recordStat(stat) 470 471 // Symlinks terminate walk. This client gets the symlink stat result, but 472 // will have to invoke Walk again with the resolved path. 473 if stat.Mode&unix.S_IFMT == unix.S_IFLNK { 474 break 475 } 476 } 477 478 return nil 479 } 480 481 // Open implements lisafs.ControlFDImpl.Open. 482 func (fd *controlFDLisa) Open(flags uint32) (*lisafs.OpenFD, int, error) { 483 ftype := fd.FileType() 484 server := fd.Conn().ServerImpl().(*LisafsServer) 485 switch ftype { 486 case unix.S_IFIFO: 487 if !server.config.HostFifo.AllowOpen() { 488 return nil, -1, unix.EPERM 489 } 490 case unix.S_IFSOCK: 491 if !server.config.HostUDS.AllowOpen() { 492 return nil, -1, unix.EPERM 493 } 494 } 495 flags |= openFlags 496 openHostFD, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(fd.hostFD), int(flags)&^unix.O_NOFOLLOW, 0) 497 if err != nil { 498 return nil, -1, err 499 } 500 501 hostFDToDonate := -1 502 switch { 503 case ftype == unix.S_IFREG: 504 // Best effort to donate file to the Sentry (for performance only). 505 hostFDToDonate, _ = unix.Dup(openHostFD) 506 507 case ftype == unix.S_IFIFO, 508 ftype == unix.S_IFCHR, 509 fd.isMountPoint && fd.Conn().ServerImpl().(*LisafsServer).config.DonateMountPointFD: 510 // Character devices and pipes can block indefinitely during reads/writes, 511 // which is not allowed for gofer operations. Ensure that it donates an FD 512 // back to the caller, so it can wait on the FD when reads/writes return 513 // EWOULDBLOCK. For mount points, if DonateMountPointFD option is set, an 514 // FD must be donated. 515 var err error 516 hostFDToDonate, err = unix.Dup(openHostFD) 517 if err != nil { 518 return nil, 0, err 519 } 520 } 521 522 openFD := fd.newOpenFDLisa(openHostFD, flags) 523 return openFD.FD(), hostFDToDonate, nil 524 } 525 526 // OpenCreate implements lisafs.ControlFDImpl.OpenCreate. 527 func (fd *controlFDLisa) OpenCreate(mode linux.FileMode, uid lisafs.UID, gid lisafs.GID, name string, flags uint32) (*lisafs.ControlFD, linux.Statx, *lisafs.OpenFD, int, error) { 528 createFlags := unix.O_CREAT | unix.O_EXCL | unix.O_RDONLY | unix.O_NONBLOCK | openFlags 529 childHostFD, err := unix.Openat(fd.hostFD, name, createFlags, uint32(mode&^linux.FileTypeMask)) 530 if err != nil { 531 return nil, linux.Statx{}, nil, -1, err 532 } 533 534 cu := cleanup.Make(func() { 535 // Best effort attempt to remove the file in case of failure. 536 if err := unix.Unlinkat(fd.hostFD, name, 0); err != nil { 537 log.Warningf("error unlinking file %q after failure: %v", path.Join(fd.Node().FilePath(), name), err) 538 } 539 unix.Close(childHostFD) 540 }) 541 defer cu.Clean() 542 543 // Set the owners as requested by the client. 544 if err := unix.Fchownat(childHostFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 545 return nil, linux.Statx{}, nil, -1, err 546 } 547 548 // Get stat results. 549 childStat, err := fstatTo(childHostFD) 550 if err != nil { 551 return nil, linux.Statx{}, nil, -1, err 552 } 553 554 // Now open an FD to the newly created file with the flags requested by the client. 555 flags |= openFlags 556 newHostFD, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(childHostFD), int(flags)&^unix.O_NOFOLLOW, 0) 557 if err != nil { 558 return nil, linux.Statx{}, nil, -1, err 559 } 560 cu.Release() 561 562 childFD := newControlFDLisa(childHostFD, fd, name, linux.ModeRegular) 563 newFD := childFD.newOpenFDLisa(newHostFD, uint32(flags)) 564 565 // Donate FD because open(O_CREAT|O_EXCL) always creates a regular file. 566 // Since FD donation is a destructive operation, we should duplicate the 567 // to-be-donated FD. Eat the error if one occurs, it is better to have an FD 568 // without a host FD, than failing the Open attempt. 569 hostOpenFD := -1 570 if dupFD, err := unix.Dup(newFD.hostFD); err == nil { 571 hostOpenFD = dupFD 572 } 573 574 return childFD.FD(), childStat, newFD.FD(), hostOpenFD, nil 575 } 576 577 // Mkdir implements lisafs.ControlFDImpl.Mkdir. 578 func (fd *controlFDLisa) Mkdir(mode linux.FileMode, uid lisafs.UID, gid lisafs.GID, name string) (*lisafs.ControlFD, linux.Statx, error) { 579 if err := unix.Mkdirat(fd.hostFD, name, uint32(mode&^linux.FileTypeMask)); err != nil { 580 return nil, linux.Statx{}, err 581 } 582 cu := cleanup.Make(func() { 583 // Best effort attempt to remove the dir in case of failure. 584 if err := unix.Unlinkat(fd.hostFD, name, unix.AT_REMOVEDIR); err != nil { 585 log.Warningf("error unlinking dir %q after failure: %v", path.Join(fd.Node().FilePath(), name), err) 586 } 587 }) 588 defer cu.Clean() 589 590 // Open directory to change ownership. 591 childDirFd, err := tryOpen(func(flags int) (int, error) { 592 return unix.Openat(fd.hostFD, name, flags|unix.O_DIRECTORY, 0) 593 }) 594 if err != nil { 595 return nil, linux.Statx{}, err 596 } 597 if err := unix.Fchownat(childDirFd, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 598 unix.Close(childDirFd) 599 return nil, linux.Statx{}, err 600 } 601 602 // Get stat results. 603 childDirStat, err := fstatTo(childDirFd) 604 if err != nil { 605 unix.Close(childDirFd) 606 return nil, linux.Statx{}, err 607 } 608 609 cu.Release() 610 return newControlFDLisa(childDirFd, fd, name, linux.ModeDirectory).FD(), childDirStat, nil 611 } 612 613 // Mknod implements lisafs.ControlFDImpl.Mknod. 614 func (fd *controlFDLisa) Mknod(mode linux.FileMode, uid lisafs.UID, gid lisafs.GID, name string, minor uint32, major uint32) (*lisafs.ControlFD, linux.Statx, error) { 615 // From mknod(2) man page: 616 // "EPERM: [...] if the filesystem containing pathname does not support 617 // the type of node requested." 618 if mode.FileType() != linux.ModeRegular { 619 return nil, linux.Statx{}, unix.EPERM 620 } 621 622 if err := unix.Mknodat(fd.hostFD, name, uint32(mode), 0); err != nil { 623 return nil, linux.Statx{}, err 624 } 625 cu := cleanup.Make(func() { 626 // Best effort attempt to remove the file in case of failure. 627 if err := unix.Unlinkat(fd.hostFD, name, 0); err != nil { 628 log.Warningf("error unlinking file %q after failure: %v", path.Join(fd.Node().FilePath(), name), err) 629 } 630 }) 631 defer cu.Clean() 632 633 // Open file to change ownership. 634 childFD, err := tryOpen(func(flags int) (int, error) { 635 return unix.Openat(fd.hostFD, name, flags, 0) 636 }) 637 if err != nil { 638 return nil, linux.Statx{}, err 639 } 640 if err := unix.Fchownat(childFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 641 unix.Close(childFD) 642 return nil, linux.Statx{}, err 643 } 644 645 // Get stat results. 646 childStat, err := fstatTo(childFD) 647 if err != nil { 648 unix.Close(childFD) 649 return nil, linux.Statx{}, err 650 } 651 cu.Release() 652 653 return newControlFDLisa(childFD, fd, name, mode).FD(), childStat, nil 654 } 655 656 // Symlink implements lisafs.ControlFDImpl.Symlink. 657 func (fd *controlFDLisa) Symlink(name string, target string, uid lisafs.UID, gid lisafs.GID) (*lisafs.ControlFD, linux.Statx, error) { 658 if err := unix.Symlinkat(target, fd.hostFD, name); err != nil { 659 return nil, linux.Statx{}, err 660 } 661 cu := cleanup.Make(func() { 662 // Best effort attempt to remove the symlink in case of failure. 663 if err := unix.Unlinkat(fd.hostFD, name, 0); err != nil { 664 log.Warningf("error unlinking file %q after failure: %v", path.Join(fd.Node().FilePath(), name), err) 665 } 666 }) 667 defer cu.Clean() 668 669 // Open symlink to change ownership. 670 symlinkFD, err := unix.Openat(fd.hostFD, name, unix.O_PATH|openFlags, 0) 671 if err != nil { 672 return nil, linux.Statx{}, err 673 } 674 if err := unix.Fchownat(symlinkFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 675 unix.Close(symlinkFD) 676 return nil, linux.Statx{}, err 677 } 678 679 symlinkStat, err := fstatTo(symlinkFD) 680 if err != nil { 681 unix.Close(symlinkFD) 682 return nil, linux.Statx{}, err 683 } 684 cu.Release() 685 return newControlFDLisa(symlinkFD, fd, name, linux.ModeSymlink).FD(), symlinkStat, nil 686 } 687 688 // Link implements lisafs.ControlFDImpl.Link. 689 func (fd *controlFDLisa) Link(dir lisafs.ControlFDImpl, name string) (*lisafs.ControlFD, linux.Statx, error) { 690 // Using linkat(targetFD, "", newdirfd, name, AT_EMPTY_PATH) requires 691 // CAP_DAC_READ_SEARCH in the *root* userns. The gofer process has 692 // CAP_DAC_READ_SEARCH in its own userns. But sometimes the gofer may be 693 // running in a different userns. So we can't use AT_EMPTY_PATH. Fallback 694 // to using olddirfd to call linkat(2). 695 oldDirFD, oldName, err := fd.getParentFD() 696 if err != nil { 697 return nil, linux.Statx{}, err 698 } 699 dirFD := dir.(*controlFDLisa) 700 if err := unix.Linkat(oldDirFD, oldName, dirFD.hostFD, name, 0); err != nil { 701 return nil, linux.Statx{}, err 702 } 703 cu := cleanup.Make(func() { 704 // Best effort attempt to remove the hard link in case of failure. 705 if err := unix.Unlinkat(dirFD.hostFD, name, 0); err != nil { 706 log.Warningf("error unlinking file %q after failure: %v", path.Join(dirFD.Node().FilePath(), name), err) 707 } 708 }) 709 defer cu.Clean() 710 711 linkFD, err := tryOpen(func(flags int) (int, error) { 712 return unix.Openat(dirFD.hostFD, name, flags, 0) 713 }) 714 if err != nil { 715 return nil, linux.Statx{}, err 716 } 717 718 linkStat, err := fstatTo(linkFD) 719 if err != nil { 720 return nil, linux.Statx{}, err 721 } 722 cu.Release() 723 return newControlFDLisa(linkFD, dirFD, name, linux.FileMode(linkStat.Mode)).FD(), linkStat, nil 724 } 725 726 // StatFS implements lisafs.ControlFDImpl.StatFS. 727 func (fd *controlFDLisa) StatFS() (lisafs.StatFS, error) { 728 var s unix.Statfs_t 729 if err := unix.Fstatfs(fd.hostFD, &s); err != nil { 730 return lisafs.StatFS{}, err 731 } 732 733 return lisafs.StatFS{ 734 Type: uint64(s.Type), 735 BlockSize: s.Bsize, 736 Blocks: s.Blocks, 737 BlocksFree: s.Bfree, 738 BlocksAvailable: s.Bavail, 739 Files: s.Files, 740 FilesFree: s.Ffree, 741 NameLength: uint64(s.Namelen), 742 }, nil 743 } 744 745 // Readlink implements lisafs.ControlFDImpl.Readlink. 746 func (fd *controlFDLisa) Readlink(getLinkBuf func(uint32) []byte) (uint16, error) { 747 // This is similar to what os.Readlink does. 748 for linkLen := 128; linkLen < math.MaxUint16; linkLen *= 2 { 749 b := getLinkBuf(uint32(linkLen)) 750 n, err := unix.Readlinkat(fd.hostFD, "", b) 751 if err != nil { 752 return 0, err 753 } 754 if n < int(linkLen) { 755 return uint16(n), nil 756 } 757 } 758 return 0, unix.ENOMEM 759 } 760 761 func isSockTypeSupported(sockType uint32) bool { 762 switch sockType { 763 case unix.SOCK_STREAM, unix.SOCK_DGRAM, unix.SOCK_SEQPACKET: 764 return true 765 default: 766 log.Debugf("socket type %d is not supported", sockType) 767 return false 768 } 769 } 770 771 // Connect implements lisafs.ControlFDImpl.Connect. 772 func (fd *controlFDLisa) Connect(sockType uint32) (int, error) { 773 if !fd.Conn().ServerImpl().(*LisafsServer).config.HostUDS.AllowOpen() { 774 return -1, unix.EPERM 775 } 776 777 // TODO(gvisor.dev/issue/1003): Due to different app vs replacement 778 // mappings, the app path may have fit in the sockaddr, but we can't fit 779 // hostPath in our sockaddr. We'd need to redirect through a shorter path 780 // in order to actually connect to this socket. 781 hostPath := fd.Node().FilePath() 782 if len(hostPath) >= linux.UnixPathMax { 783 return -1, unix.EINVAL 784 } 785 786 if !isSockTypeSupported(sockType) { 787 return -1, unix.ENXIO 788 } 789 790 sock, err := unix.Socket(unix.AF_UNIX, int(sockType), 0) 791 if err != nil { 792 return -1, err 793 } 794 795 sa := unix.SockaddrUnix{Name: hostPath} 796 if err := unix.Connect(sock, &sa); err != nil { 797 unix.Close(sock) 798 return -1, err 799 } 800 return sock, nil 801 } 802 803 // BindAt implements lisafs.ControlFDImpl.BindAt. 804 func (fd *controlFDLisa) BindAt(name string, sockType uint32, mode linux.FileMode, uid lisafs.UID, gid lisafs.GID) (*lisafs.ControlFD, linux.Statx, *lisafs.BoundSocketFD, int, error) { 805 if !fd.Conn().ServerImpl().(*LisafsServer).config.HostUDS.AllowCreate() { 806 return nil, linux.Statx{}, nil, -1, unix.EPERM 807 } 808 809 // Because there is no "bindat" syscall in Linux, we must create an 810 // absolute path to the socket we are creating, 811 socketPath := filepath.Join(fd.Node().FilePath(), name) 812 813 // TODO(gvisor.dev/issue/1003): Due to different app vs replacement 814 // mappings, the app path may have fit in the sockaddr, but we can't fit 815 // hostPath in our sockaddr. We'd need to redirect through a shorter path 816 // in order to actually connect to this socket. 817 if len(socketPath) >= linux.UnixPathMax { 818 log.Warningf("BindAt called with name too long: %q (len=%d)", socketPath, len(socketPath)) 819 return nil, linux.Statx{}, nil, -1, unix.EINVAL 820 } 821 822 // Only the following types are supported. 823 if !isSockTypeSupported(sockType) { 824 return nil, linux.Statx{}, nil, -1, unix.ENXIO 825 } 826 827 // Create and bind the socket using the sockPath which may be a 828 // symlink. 829 sockFD, err := unix.Socket(unix.AF_UNIX, int(sockType), 0) 830 if err != nil { 831 return nil, linux.Statx{}, nil, -1, err 832 } 833 cu := cleanup.Make(func() { 834 _ = unix.Close(sockFD) 835 }) 836 defer cu.Clean() 837 838 // fchmod(2) has to happen *before* the bind(2). sockFD's file mode will 839 // be used in creating the filesystem-object in bind(2). 840 if err := unix.Fchmod(sockFD, uint32(mode&^linux.FileTypeMask)); err != nil { 841 return nil, linux.Statx{}, nil, -1, err 842 } 843 844 if err := unix.Bind(sockFD, &unix.SockaddrUnix{Name: socketPath}); err != nil { 845 return nil, linux.Statx{}, nil, -1, err 846 } 847 cu.Add(func() { 848 _ = unix.Unlink(socketPath) 849 }) 850 851 sockFileFD, err := tryOpen(func(flags int) (int, error) { 852 return unix.Openat(fd.hostFD, name, flags, 0) 853 }) 854 if err != nil { 855 return nil, linux.Statx{}, nil, -1, err 856 } 857 cu.Add(func() { 858 _ = unix.Close(sockFileFD) 859 }) 860 861 if err := unix.Fchownat(sockFileFD, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW); err != nil { 862 return nil, linux.Statx{}, nil, -1, err 863 } 864 865 // Stat the socket. 866 sockStat, err := fstatTo(sockFileFD) 867 if err != nil { 868 return nil, linux.Statx{}, nil, -1, err 869 } 870 871 // Create an FD that will be donated to the sandbox. 872 sockFDToDonate, err := unix.Dup(sockFD) 873 if err != nil { 874 return nil, linux.Statx{}, nil, -1, err 875 } 876 cu.Release() 877 878 socketControlFD := newControlFDLisa(sockFD, fd, name, linux.ModeSocket) 879 boundSocketFD := &boundSocketFDLisa{ 880 sock: os.NewFile(uintptr(sockFD), socketPath), 881 } 882 boundSocketFD.Init(socketControlFD.FD(), boundSocketFD) 883 884 return socketControlFD.FD(), sockStat, boundSocketFD.FD(), sockFDToDonate, nil 885 } 886 887 // Unlink implements lisafs.ControlFDImpl.Unlink. 888 func (fd *controlFDLisa) Unlink(name string, flags uint32) error { 889 return unix.Unlinkat(fd.hostFD, name, int(flags)) 890 } 891 892 // RenameAt implements lisafs.ControlFDImpl.RenameAt. 893 func (fd *controlFDLisa) RenameAt(oldName string, newDir lisafs.ControlFDImpl, newName string) error { 894 return fsutil.RenameAt(fd.hostFD, oldName, newDir.(*controlFDLisa).hostFD, newName) 895 } 896 897 // Renamed implements lisafs.ControlFDImpl.Renamed. 898 func (fd *controlFDLisa) Renamed() { 899 // controlFDLisa does not have any state to update on rename. 900 } 901 902 // GetXattr implements lisafs.ControlFDImpl.GetXattr. 903 func (fd *controlFDLisa) GetXattr(name string, size uint32, getValueBuf func(uint32) []byte) (uint16, error) { 904 return 0, unix.EOPNOTSUPP 905 } 906 907 // SetXattr implements lisafs.ControlFDImpl.SetXattr. 908 func (fd *controlFDLisa) SetXattr(name string, value string, flags uint32) error { 909 return unix.EOPNOTSUPP 910 } 911 912 // ListXattr implements lisafs.ControlFDImpl.ListXattr. 913 func (fd *controlFDLisa) ListXattr(size uint64) (lisafs.StringArray, error) { 914 return nil, unix.EOPNOTSUPP 915 } 916 917 // RemoveXattr implements lisafs.ControlFDImpl.RemoveXattr. 918 func (fd *controlFDLisa) RemoveXattr(name string) error { 919 return unix.EOPNOTSUPP 920 } 921 922 // openFDLisa implements lisafs.OpenFDImpl. 923 type openFDLisa struct { 924 lisafs.OpenFD 925 926 // hostFD is the host file descriptor which can be used to make syscalls. 927 hostFD int 928 } 929 930 var _ lisafs.OpenFDImpl = (*openFDLisa)(nil) 931 932 func (fd *controlFDLisa) newOpenFDLisa(hostFD int, flags uint32) *openFDLisa { 933 newFD := &openFDLisa{ 934 hostFD: hostFD, 935 } 936 newFD.OpenFD.Init(fd.FD(), flags, newFD) 937 return newFD 938 } 939 940 // FD implements lisafs.OpenFDImpl.FD. 941 func (fd *openFDLisa) FD() *lisafs.OpenFD { 942 if fd == nil { 943 return nil 944 } 945 return &fd.OpenFD 946 } 947 948 // Close implements lisafs.OpenFDImpl.Close. 949 func (fd *openFDLisa) Close() { 950 if fd.hostFD >= 0 { 951 _ = unix.Close(fd.hostFD) 952 fd.hostFD = -1 953 } 954 } 955 956 // Stat implements lisafs.OpenFDImpl.Stat. 957 func (fd *openFDLisa) Stat() (linux.Statx, error) { 958 return fstatTo(fd.hostFD) 959 } 960 961 // Sync implements lisafs.OpenFDImpl.Sync. 962 func (fd *openFDLisa) Sync() error { 963 return unix.Fsync(fd.hostFD) 964 } 965 966 // Write implements lisafs.OpenFDImpl.Write. 967 func (fd *openFDLisa) Write(buf []byte, off uint64) (uint64, error) { 968 rw := rwfd.NewReadWriter(fd.hostFD) 969 n, err := rw.WriteAt(buf, int64(off)) 970 return uint64(n), err 971 } 972 973 // Read implements lisafs.OpenFDImpl.Read. 974 func (fd *openFDLisa) Read(buf []byte, off uint64) (uint64, error) { 975 rw := rwfd.NewReadWriter(fd.hostFD) 976 n, err := rw.ReadAt(buf, int64(off)) 977 if err != nil && err != io.EOF { 978 return 0, err 979 } 980 return uint64(n), nil 981 } 982 983 // Allocate implements lisafs.OpenFDImpl.Allocate. 984 func (fd *openFDLisa) Allocate(mode, off, length uint64) error { 985 return unix.Fallocate(fd.hostFD, uint32(mode), int64(off), int64(length)) 986 } 987 988 // Flush implements lisafs.OpenFDImpl.Flush. 989 func (fd *openFDLisa) Flush() error { 990 return nil 991 } 992 993 // Getdent64 implements lisafs.OpenFDImpl.Getdent64. 994 func (fd *openFDLisa) Getdent64(count uint32, seek0 bool, recordDirent func(lisafs.Dirent64)) error { 995 if seek0 { 996 if _, err := unix.Seek(fd.hostFD, 0, 0); err != nil { 997 return err 998 } 999 } 1000 1001 var direntsBuf [8192]byte 1002 var bytesRead int 1003 for bytesRead < int(count) { 1004 bufEnd := len(direntsBuf) 1005 if remaining := int(count) - bytesRead; remaining < bufEnd { 1006 bufEnd = remaining 1007 } 1008 n, err := unix.Getdents(fd.hostFD, direntsBuf[:bufEnd]) 1009 if err != nil { 1010 if err == unix.EINVAL && bufEnd < fsutil.UnixDirentMaxSize { 1011 // getdents64(2) returns EINVAL is returned when the result 1012 // buffer is too small. If bufEnd is smaller than the max 1013 // size of unix.Dirent, then just break here to return all 1014 // dirents collected till now. 1015 break 1016 } 1017 return err 1018 } 1019 if n <= 0 { 1020 break 1021 } 1022 1023 fsutil.ParseDirents(direntsBuf[:n], func(ino uint64, off int64, ftype uint8, name string, reclen uint16) bool { 1024 dirent := lisafs.Dirent64{ 1025 Ino: primitive.Uint64(ino), 1026 Off: primitive.Uint64(off), 1027 Type: primitive.Uint8(ftype), 1028 Name: lisafs.SizedString(name), 1029 } 1030 1031 // The client also wants the device ID, which annoyingly incurs an 1032 // additional syscall per dirent. 1033 // TODO(gvisor.dev/issue/6665): Get rid of per-dirent stat. 1034 stat, err := fsutil.StatAt(fd.hostFD, name) 1035 if err != nil { 1036 log.Warningf("Getdent64: skipping file %q with failed stat, err: %v", path.Join(fd.ControlFD().FD().Node().FilePath(), name), err) 1037 return true 1038 } 1039 dirent.DevMinor = primitive.Uint32(unix.Minor(stat.Dev)) 1040 dirent.DevMajor = primitive.Uint32(unix.Major(stat.Dev)) 1041 recordDirent(dirent) 1042 bytesRead += int(reclen) 1043 return true 1044 }) 1045 } 1046 return nil 1047 } 1048 1049 // Renamed implements lisafs.OpenFDImpl.Renamed. 1050 func (fd *openFDLisa) Renamed() { 1051 // openFDLisa does not have any state to update on rename. 1052 } 1053 1054 type boundSocketFDLisa struct { 1055 lisafs.BoundSocketFD 1056 1057 sock *os.File 1058 } 1059 1060 var _ lisafs.BoundSocketFDImpl = (*boundSocketFDLisa)(nil) 1061 1062 // Close implements lisafs.BoundSocketFD.Close. 1063 func (fd *boundSocketFDLisa) Close() { 1064 fd.sock.Close() 1065 } 1066 1067 // FD implements lisafs.BoundSocketFD.FD. 1068 func (fd *boundSocketFDLisa) FD() *lisafs.BoundSocketFD { 1069 if fd == nil { 1070 return nil 1071 } 1072 return &fd.BoundSocketFD 1073 } 1074 1075 // Listen implements lisafs.BoundSocketFD.Listen. 1076 func (fd *boundSocketFDLisa) Listen(backlog int32) error { 1077 return unix.Listen(int(fd.sock.Fd()), int(backlog)) 1078 } 1079 1080 // Listen implements lisafs.BoundSocketFD.Accept. 1081 func (fd *boundSocketFDLisa) Accept() (int, string, error) { 1082 flags := unix.O_NONBLOCK | unix.O_CLOEXEC 1083 nfd, _, err := unix.Accept4(int(fd.sock.Fd()), flags) 1084 if err != nil { 1085 return -1, "", err 1086 } 1087 // Return an empty peer address so that we don't leak the actual host 1088 // address. 1089 return nfd, "", err 1090 } 1091 1092 // tryOpen tries to open() with different modes as documented. 1093 func tryOpen(open func(int) (int, error)) (hostFD int, err error) { 1094 // Attempt to open file in the following in order: 1095 // 1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs. 1096 // Use non-blocking to prevent getting stuck inside open(2) for 1097 // FIFOs. This option has no effect on regular files. 1098 // 2. PATH: for symlinks, sockets. 1099 flags := []int{ 1100 unix.O_RDONLY | unix.O_NONBLOCK, 1101 unix.O_PATH, 1102 } 1103 1104 for _, flag := range flags { 1105 hostFD, err = open(flag | openFlags) 1106 if err == nil { 1107 return 1108 } 1109 1110 if e := extractErrno(err); e == unix.ENOENT { 1111 // File doesn't exist, no point in retrying. 1112 return -1, e 1113 } 1114 } 1115 return 1116 } 1117 1118 func fstatTo(hostFD int) (linux.Statx, error) { 1119 var stat unix.Stat_t 1120 if err := unix.Fstat(hostFD, &stat); err != nil { 1121 return linux.Statx{}, err 1122 } 1123 1124 return linux.Statx{ 1125 Mask: unix.STATX_TYPE | unix.STATX_MODE | unix.STATX_INO | unix.STATX_NLINK | unix.STATX_UID | unix.STATX_GID | unix.STATX_SIZE | unix.STATX_BLOCKS | unix.STATX_ATIME | unix.STATX_MTIME | unix.STATX_CTIME, 1126 Mode: uint16(stat.Mode), 1127 DevMinor: unix.Minor(stat.Dev), 1128 DevMajor: unix.Major(stat.Dev), 1129 Ino: stat.Ino, 1130 Nlink: uint32(stat.Nlink), 1131 UID: stat.Uid, 1132 GID: stat.Gid, 1133 RdevMinor: unix.Minor(stat.Rdev), 1134 RdevMajor: unix.Major(stat.Rdev), 1135 Size: uint64(stat.Size), 1136 Blksize: uint32(stat.Blksize), 1137 Blocks: uint64(stat.Blocks), 1138 Atime: linux.StatxTimestamp{ 1139 Sec: stat.Atim.Sec, 1140 Nsec: uint32(stat.Atim.Nsec), 1141 }, 1142 Mtime: linux.StatxTimestamp{ 1143 Sec: stat.Mtim.Sec, 1144 Nsec: uint32(stat.Mtim.Nsec), 1145 }, 1146 Ctime: linux.StatxTimestamp{ 1147 Sec: stat.Ctim.Sec, 1148 Nsec: uint32(stat.Ctim.Nsec), 1149 }, 1150 }, nil 1151 } 1152 1153 func checkSupportedFileType(mode uint32) error { 1154 switch mode & unix.S_IFMT { 1155 case unix.S_IFREG, unix.S_IFDIR, unix.S_IFLNK, unix.S_IFCHR, unix.S_IFSOCK, unix.S_IFIFO: 1156 return nil 1157 1158 default: 1159 return unix.EPERM 1160 } 1161 } 1162 1163 // extractErrno tries to determine the errno. 1164 func extractErrno(err error) unix.Errno { 1165 if err == nil { 1166 // This should never happen. The likely result will be that 1167 // some user gets the frustrating "error: SUCCESS" message. 1168 log.Warningf("extractErrno called with nil error!") 1169 return 0 1170 } 1171 1172 switch err { 1173 case os.ErrNotExist: 1174 return unix.ENOENT 1175 case os.ErrExist: 1176 return unix.EEXIST 1177 case os.ErrPermission: 1178 return unix.EACCES 1179 case os.ErrInvalid: 1180 return unix.EINVAL 1181 } 1182 1183 // See if it's an errno or a common wrapped error. 1184 switch e := err.(type) { 1185 case unix.Errno: 1186 return e 1187 case *os.PathError: 1188 return extractErrno(e.Err) 1189 case *os.LinkError: 1190 return extractErrno(e.Err) 1191 case *os.SyscallError: 1192 return extractErrno(e.Err) 1193 } 1194 1195 // Fall back to EIO. 1196 log.Debugf("Unknown error: %v, defaulting to EIO", err) 1197 return unix.EIO 1198 } 1199 1200 // LINT.ThenChange(../../pkg/sentry/fsimpl/gofer/directfs_dentry.go)