gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/fsimpl/fuse/inode.go (about)

     1  // Copyright 2022 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	"fmt"
    19  	gotime "time"
    20  
    21  	"gvisor.dev/gvisor/pkg/abi/linux"
    22  
    23  	"gvisor.dev/gvisor/pkg/atomicbitops"
    24  	"gvisor.dev/gvisor/pkg/context"
    25  	"gvisor.dev/gvisor/pkg/errors/linuxerr"
    26  	"gvisor.dev/gvisor/pkg/hostarch"
    27  	"gvisor.dev/gvisor/pkg/marshal"
    28  	"gvisor.dev/gvisor/pkg/marshal/primitive"
    29  	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
    30  	"gvisor.dev/gvisor/pkg/sentry/kernel"
    31  	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
    32  	"gvisor.dev/gvisor/pkg/sentry/kernel/time"
    33  	"gvisor.dev/gvisor/pkg/sentry/vfs"
    34  	"gvisor.dev/gvisor/pkg/sync"
    35  )
    36  
    37  // +stateify savable
    38  type fileHandle struct {
    39  	new    bool
    40  	handle uint64
    41  	flags  uint32
    42  }
    43  
    44  // inode implements kernfs.Inode.
    45  //
    46  // +stateify savable
    47  type inode struct {
    48  	inodeRefs
    49  	kernfs.InodeNotAnonymous
    50  	kernfs.InodeNotSymlink
    51  	kernfs.InodeWatches
    52  	kernfs.OrderedChildren
    53  	kernfs.CachedMappable
    54  
    55  	// the owning filesystem. fs is immutable.
    56  	fs *filesystem
    57  
    58  	// nodeID is a unique id which identifies the inode between userspace and
    59  	// the sentry. generation is used to distinguish inodes in case of nodeID
    60  	// reuse. Both are immutable.
    61  	nodeID     uint64
    62  	generation uint64
    63  
    64  	// entryTime is the time at which the entry must be revalidated. Reading
    65  	// entryTime requires either using entryTimeSeq and SeqAtomicLoadTime, or
    66  	// that attrMu is locked. Writing entryTime requires that attrMu is locked
    67  	// and that entryTimeSeq is in a writer critical section.
    68  	entryTimeSeq sync.SeqCount `state:"nosave"`
    69  	entryTime    time.Time
    70  
    71  	// attrVersion is the version of the last attribute change.
    72  	attrVersion atomicbitops.Uint64
    73  
    74  	// attrTime is the time at which the attributes become invalid.
    75  	attrTime time.Time
    76  
    77  	// link is result of following a symbolic link.
    78  	link string
    79  
    80  	// fh caches the file handle returned by the server from a FUSE_CREATE request
    81  	// so we don't have to send a separate FUSE_OPEN request.
    82  	fh fileHandle
    83  
    84  	locks   vfs.FileLocks
    85  	watches vfs.Watches
    86  
    87  	// attrMu protects the attributes of this inode.
    88  	attrMu sync.Mutex `state:"nosave"`
    89  
    90  	// +checklocks:attrMu
    91  	ino atomicbitops.Uint64 // Stat data, not accessed for path walking.
    92  	// +checklocks:attrMu
    93  	uid atomicbitops.Uint32 // auth.KUID, but stored as raw uint32 for sync/atomic.
    94  	// +checklocks:attrMu
    95  	gid atomicbitops.Uint32 // auth.KGID, but...
    96  	// +checklocks:attrMu
    97  	mode atomicbitops.Uint32 // File type and mode.
    98  
    99  	// Timestamps in nanoseconds from the unix epoch.
   100  	// +checklocks:attrMu
   101  	atime atomicbitops.Int64
   102  	// +checklocks:attrMu
   103  	mtime atomicbitops.Int64
   104  	// +checklocks:attrMu
   105  	ctime atomicbitops.Int64
   106  
   107  	// +checklocks:attrMu
   108  	size atomicbitops.Uint64
   109  
   110  	// nlink counts the number of hard links to this inode. It's updated and
   111  	// accessed used atomic operations but not protected by attrMu.
   112  	nlink atomicbitops.Uint32
   113  
   114  	// +checklocks:attrMu
   115  	blockSize atomicbitops.Uint32 // 0 if unknown.
   116  }
   117  
   118  func blockerFromContext(ctx context.Context) context.Blocker {
   119  	kernelTask := kernel.TaskFromContext(ctx)
   120  	if kernelTask == nil {
   121  		return ctx
   122  	}
   123  	return kernelTask
   124  }
   125  
   126  func pidFromContext(ctx context.Context) uint32 {
   127  	kernelTask := kernel.TaskFromContext(ctx)
   128  	if kernelTask == nil {
   129  		return 0
   130  	}
   131  	return uint32(kernelTask.ThreadID())
   132  }
   133  
   134  func umaskFromContext(ctx context.Context) uint32 {
   135  	kernelTask := kernel.TaskFromContext(ctx)
   136  	umask := uint32(0)
   137  	if kernelTask != nil {
   138  		umask = uint32(kernelTask.FSContext().Umask())
   139  	}
   140  	return umask
   141  }
   142  
   143  func (i *inode) Mode() linux.FileMode {
   144  	i.attrMu.Lock()
   145  	defer i.attrMu.Unlock()
   146  	return i.filemode()
   147  }
   148  
   149  func (i *inode) UID() auth.KUID {
   150  	i.attrMu.Lock()
   151  	defer i.attrMu.Unlock()
   152  	return auth.KUID(i.uid.Load())
   153  }
   154  
   155  func (i *inode) GID() auth.KGID {
   156  	i.attrMu.Lock()
   157  	defer i.attrMu.Unlock()
   158  	return auth.KGID(i.gid.Load())
   159  }
   160  
   161  // +checklocks:i.attrMu
   162  func (i *inode) filemode() linux.FileMode {
   163  	return linux.FileMode(i.mode.Load())
   164  }
   165  
   166  // touchCMTime updates the ctime and mtime attributes to be the current time.
   167  //
   168  // +checklocks:i.attrMu
   169  func (i *inode) touchCMtime() {
   170  	now := i.fs.clock.Now().Nanoseconds()
   171  	i.mtime.Store(now)
   172  	i.ctime.Store(now)
   173  }
   174  
   175  // touchAtime updates the atime attribute to be the current time.
   176  //
   177  // +checklocks:i.attrMu
   178  func (i *inode) touchAtime() {
   179  	i.atime.Store(i.fs.clock.Now().Nanoseconds())
   180  }
   181  
   182  // +checklocks:i.attrMu
   183  func (i *inode) init(creds *auth.Credentials, devMajor, devMinor uint32, nodeid uint64, mode linux.FileMode, nlink uint32) {
   184  	if mode.FileType() == 0 {
   185  		panic(fmt.Sprintf("No file type specified in 'mode' for InodeAttrs.Init(): mode=0%o", mode))
   186  	}
   187  
   188  	i.nodeID = nodeid
   189  	i.ino.Store(nodeid)
   190  	i.mode.Store(uint32(mode))
   191  	i.uid.Store(uint32(creds.EffectiveKUID))
   192  	i.gid.Store(uint32(creds.EffectiveKGID))
   193  	i.nlink.Store(nlink)
   194  	i.blockSize.Store(hostarch.PageSize)
   195  
   196  	now := i.fs.clock.Now().Nanoseconds()
   197  	i.atime.Store(now)
   198  	i.mtime.Store(now)
   199  	i.ctime.Store(now)
   200  }
   201  
   202  // +checklocks:i.attrMu
   203  func (i *inode) updateEntryTime(entrySec, entryNSec int64) {
   204  	entryTime := time.FromTimespec(linux.Timespec{Sec: entrySec, Nsec: entryNSec})
   205  	SeqAtomicStoreTime(&i.entryTimeSeq, &i.entryTime, i.fs.clock.Now().AddTime(entryTime))
   206  }
   207  
   208  // CheckPermissions implements kernfs.Inode.CheckPermissions.
   209  func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
   210  	// Since FUSE operations are ultimately backed by a userspace process (the
   211  	// fuse daemon), allowing a process to call into fusefs grants the daemon
   212  	// ptrace-like capabilities over the calling process. Because of this, by
   213  	// default FUSE only allows the mount owner to interact with the
   214  	// filesystem. This explicitly excludes setuid/setgid processes.
   215  	//
   216  	// This behaviour can be overridden with the 'allow_other' mount option.
   217  	//
   218  	// See fs/fuse/dir.c:fuse_allow_current_process() in Linux.
   219  	if !i.fs.opts.allowOther {
   220  		if creds.RealKUID != i.fs.opts.uid ||
   221  			creds.EffectiveKUID != i.fs.opts.uid ||
   222  			creds.SavedKUID != i.fs.opts.uid ||
   223  			creds.RealKGID != i.fs.opts.gid ||
   224  			creds.EffectiveKGID != i.fs.opts.gid ||
   225  			creds.SavedKGID != i.fs.opts.gid {
   226  			return linuxerr.EACCES
   227  		}
   228  	}
   229  
   230  	// By default, fusefs delegates all permission checks to the server.
   231  	// However, standard unix permission checks can be enabled with the
   232  	// default_permissions mount option.
   233  	i.attrMu.Lock()
   234  	defer i.attrMu.Unlock()
   235  	refreshed := false
   236  	opts := vfs.StatOptions{Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID}
   237  	if i.fs.opts.defaultPermissions || (ats.MayExec() && i.filemode().FileType() == linux.S_IFREG) {
   238  		if i.fs.clock.Now().After(i.attrTime) {
   239  			refreshed = true
   240  			if _, err := i.getAttr(ctx, i.fs.VFSFilesystem(), opts, 0, 0); err != nil {
   241  				return err
   242  			}
   243  		}
   244  	}
   245  
   246  	if i.fs.opts.defaultPermissions || (ats.MayExec() && i.filemode().FileType() == linux.S_IFREG) {
   247  		err := vfs.GenericCheckPermissions(creds, ats, linux.FileMode(i.mode.Load()), auth.KUID(i.uid.Load()), auth.KGID(i.gid.Load()))
   248  		if linuxerr.Equals(linuxerr.EACCES, err) && !refreshed {
   249  			if _, err := i.getAttr(ctx, i.fs.VFSFilesystem(), opts, 0, 0); err != nil {
   250  				return err
   251  			}
   252  			return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(i.mode.Load()), auth.KUID(i.uid.Load()), auth.KGID(i.gid.Load()))
   253  		}
   254  		return err
   255  	} else if ats.MayRead() || ats.MayWrite() || ats.MayExec() {
   256  		in := linux.FUSEAccessIn{Mask: uint32(ats)}
   257  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_ACCESS, &in)
   258  		res, err := i.fs.conn.Call(ctx, req)
   259  		if err != nil {
   260  			return err
   261  		}
   262  		return res.Error()
   263  	}
   264  	return nil
   265  }
   266  
   267  // Open implements kernfs.Inode.Open.
   268  func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   269  	opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC |
   270  		linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY |
   271  		linux.O_APPEND | linux.O_DIRECT
   272  	i.attrMu.Lock()
   273  	defer i.attrMu.Unlock()
   274  	if opts.Flags&linux.O_LARGEFILE == 0 && i.size.Load() > linux.MAX_NON_LFS {
   275  		return nil, linuxerr.EOVERFLOW
   276  	}
   277  
   278  	var (
   279  		fd     *fileDescription
   280  		fdImpl vfs.FileDescriptionImpl
   281  		opcode linux.FUSEOpcode
   282  	)
   283  	switch i.filemode().FileType() {
   284  	case linux.S_IFREG:
   285  		regularFD := &regularFileFD{}
   286  		fd = &(regularFD.fileDescription)
   287  		fdImpl = regularFD
   288  		opcode = linux.FUSE_OPEN
   289  	case linux.S_IFDIR:
   290  		if opts.Flags&linux.O_CREAT != 0 {
   291  			return nil, linuxerr.EISDIR
   292  		}
   293  		if ats := vfs.AccessTypesForOpenFlags(&opts); ats.MayWrite() {
   294  			return nil, linuxerr.EISDIR
   295  		}
   296  		if opts.Flags&linux.O_DIRECT != 0 {
   297  			return nil, linuxerr.EINVAL
   298  		}
   299  		directoryFD := &directoryFD{}
   300  		fd = &(directoryFD.fileDescription)
   301  		fdImpl = directoryFD
   302  		opcode = linux.FUSE_OPENDIR
   303  	case linux.S_IFLNK:
   304  		return nil, linuxerr.ELOOP
   305  	}
   306  
   307  	fd.LockFD.Init(&i.locks)
   308  	// FOPEN_KEEP_CACHE is the default flag for noOpen.
   309  	fd.OpenFlag = linux.FOPEN_KEEP_CACHE
   310  
   311  	truncateRegFile := opts.Flags&linux.O_TRUNC != 0 && i.filemode().FileType() == linux.S_IFREG
   312  	if truncateRegFile && (i.fh.new || !i.fs.conn.atomicOTrunc) {
   313  		// If the regular file needs to be truncated, but the connection doesn't
   314  		// support O_TRUNC or if we are optimizing away the Open RPC, then manually
   315  		// truncate the file *before* Open. As per libfuse, "If [atomic O_TRUNC is]
   316  		// disabled, and an application specifies O_TRUNC, fuse first calls
   317  		// truncate() and then open() with O_TRUNC filtered out.".
   318  		opts := vfs.SetStatOptions{Stat: linux.Statx{Size: 0, Mask: linux.STATX_SIZE}}
   319  		if err := i.setAttr(ctx, i.fs.VFSFilesystem(), auth.CredentialsFromContext(ctx), opts, fhOptions{useFh: false}); err != nil {
   320  			return nil, err
   321  		}
   322  	}
   323  
   324  	if i.fh.new {
   325  		fd.OpenFlag = i.fh.flags
   326  		fd.Fh = i.fh.handle
   327  		i.fh.new = false
   328  		// Only send an open request when the FUSE server supports open or is
   329  		// opening a directory.
   330  	} else if !i.fs.conn.noOpen || i.filemode().IsDir() {
   331  		in := linux.FUSEOpenIn{Flags: opts.Flags & ^uint32(linux.O_CREAT|linux.O_EXCL|linux.O_NOCTTY)}
   332  		// Clear O_TRUNC if the server doesn't support it.
   333  		if !i.fs.conn.atomicOTrunc {
   334  			in.Flags &= ^uint32(linux.O_TRUNC)
   335  		}
   336  
   337  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, opcode, &in)
   338  		res, err := i.fs.conn.Call(ctx, req)
   339  		if err != nil {
   340  			return nil, err
   341  		}
   342  		if err := res.Error(); err != nil {
   343  			if linuxerr.Equals(linuxerr.ENOSYS, err) && !i.filemode().IsDir() {
   344  				i.fs.conn.noOpen = true
   345  			} else {
   346  				return nil, err
   347  			}
   348  		} else {
   349  			out := linux.FUSEOpenOut{}
   350  			if err := res.UnmarshalPayload(&out); err != nil {
   351  				return nil, err
   352  			}
   353  			fd.OpenFlag = out.OpenFlag
   354  			fd.Fh = out.Fh
   355  			// Open was successful. Update inode's size if atomicOTrunc && O_TRUNC.
   356  			if truncateRegFile && i.fs.conn.atomicOTrunc {
   357  				i.fs.conn.mu.Lock()
   358  				i.attrVersion.Store(i.fs.conn.attributeVersion.Add(1))
   359  				i.fs.conn.mu.Unlock()
   360  				i.size.Store(0)
   361  				i.touchCMtime()
   362  			}
   363  		}
   364  	}
   365  	if i.filemode().IsDir() {
   366  		fd.OpenFlag &= ^uint32(linux.FOPEN_DIRECT_IO)
   367  	}
   368  
   369  	// TODO(gvisor.dev/issue/3234): invalidate mmap after implemented it for FUSE Inode
   370  	fd.DirectIO = fd.OpenFlag&linux.FOPEN_DIRECT_IO != 0
   371  	fdOptions := &vfs.FileDescriptionOptions{}
   372  	if fd.OpenFlag&linux.FOPEN_NONSEEKABLE != 0 {
   373  		fdOptions.DenyPRead = true
   374  		fdOptions.DenyPWrite = true
   375  		fd.Nonseekable = true
   376  	}
   377  
   378  	if err := fd.vfsfd.Init(fdImpl, opts.Flags, rp.Mount(), d.VFSDentry(), fdOptions); err != nil {
   379  		return nil, err
   380  	}
   381  	return &fd.vfsfd, nil
   382  }
   383  
   384  func (i *inode) Valid(ctx context.Context, parent *kernfs.Dentry, name string) bool {
   385  	now := i.fs.clock.Now()
   386  	if entryTime := SeqAtomicLoadTime(&i.entryTimeSeq, &i.entryTime); entryTime.After(now) {
   387  		return true
   388  	}
   389  
   390  	i.attrMu.Lock()
   391  	defer i.attrMu.Unlock()
   392  	if i.entryTime.After(now) {
   393  		return true
   394  	}
   395  
   396  	in := linux.FUSELookupIn{Name: linux.CString(name)}
   397  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), parent.Inode().(*inode).nodeID, linux.FUSE_LOOKUP, &in)
   398  	res, err := i.fs.conn.Call(ctx, req)
   399  	if err != nil {
   400  		return false
   401  	}
   402  	if res.Error() != nil {
   403  		return false
   404  	}
   405  	var out linux.FUSEEntryOut
   406  	if res.UnmarshalPayload(&out) != nil {
   407  		return false
   408  	}
   409  	if i.nodeID != out.NodeID {
   410  		return false
   411  	}
   412  	// Don't enforce fuse_invalid_attr() => fuse_valid_type(),
   413  	// fuse_valid_size() since inode.updateAttrs() and its callers
   414  	// don't. But do enforce fuse_stale_inode():
   415  	if i.generation != out.Generation {
   416  		return false
   417  	}
   418  	if (i.mode.RacyLoad()^out.Attr.Mode)&linux.S_IFMT != 0 {
   419  		return false
   420  	}
   421  	i.updateEntryTime(int64(out.EntryValid), int64(out.EntryValidNSec))
   422  	return true
   423  }
   424  
   425  // Lookup implements kernfs.Inode.Lookup.
   426  func (i *inode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
   427  	in := linux.FUSELookupIn{Name: linux.CString(name)}
   428  	return i.newEntry(ctx, name, 0, linux.FUSE_LOOKUP, &in)
   429  }
   430  
   431  // Keep implements kernfs.Inode.Keep.
   432  func (i *inode) Keep() bool {
   433  	// Return true so that kernfs keeps the new dentry pointing to this
   434  	// inode in the dentry tree. This is needed because inodes created via
   435  	// Lookup are not temporary. They might refer to existing files on server
   436  	// that can be Unlink'd/Rmdir'd.
   437  	return true
   438  }
   439  
   440  // IterDirents implements kernfs.Inode.IterDirents.
   441  func (*inode) IterDirents(ctx context.Context, mnt *vfs.Mount, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
   442  	return offset, nil
   443  }
   444  
   445  // NewFile implements kernfs.Inode.NewFile.
   446  func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (kernfs.Inode, error) {
   447  	opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC |
   448  		linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY
   449  	in := linux.FUSECreateIn{
   450  		CreateMeta: linux.FUSECreateMeta{
   451  			Flags: opts.Flags,
   452  			Mode:  uint32(opts.Mode) | linux.S_IFREG,
   453  			Umask: umaskFromContext(ctx),
   454  		},
   455  		Name: linux.CString(name),
   456  	}
   457  	return i.newEntry(ctx, name, linux.S_IFREG, linux.FUSE_CREATE, &in)
   458  }
   459  
   460  // NewNode implements kernfs.Inode.NewNode.
   461  func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (kernfs.Inode, error) {
   462  	in := linux.FUSEMknodIn{
   463  		MknodMeta: linux.FUSEMknodMeta{
   464  			Mode:  uint32(opts.Mode),
   465  			Rdev:  linux.MakeDeviceID(uint16(opts.DevMajor), opts.DevMinor),
   466  			Umask: umaskFromContext(ctx),
   467  		},
   468  		Name: linux.CString(name),
   469  	}
   470  	return i.newEntry(ctx, name, opts.Mode.FileType(), linux.FUSE_MKNOD, &in)
   471  }
   472  
   473  // NewSymlink implements kernfs.Inode.NewSymlink.
   474  func (i *inode) NewSymlink(ctx context.Context, name, target string) (kernfs.Inode, error) {
   475  	in := linux.FUSESymlinkIn{
   476  		Name:   linux.CString(name),
   477  		Target: linux.CString(target),
   478  	}
   479  	return i.newEntry(ctx, name, linux.S_IFLNK, linux.FUSE_SYMLINK, &in)
   480  }
   481  
   482  // NewLink implements kernfs.Inode.NewLink.
   483  func (i *inode) NewLink(ctx context.Context, name string, target kernfs.Inode) (kernfs.Inode, error) {
   484  	targetInode := target.(*inode)
   485  	in := linux.FUSELinkIn{
   486  		OldNodeID: primitive.Uint64(targetInode.nodeID),
   487  		Name:      linux.CString(name),
   488  	}
   489  	return i.newEntry(ctx, name, targetInode.Mode().FileType(), linux.FUSE_LINK, &in)
   490  }
   491  
   492  // Unlink implements kernfs.Inode.Unlink.
   493  func (i *inode) Unlink(ctx context.Context, name string, child kernfs.Inode) error {
   494  	in := linux.FUSEUnlinkIn{Name: linux.CString(name)}
   495  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_UNLINK, &in)
   496  	res, err := i.fs.conn.Call(ctx, req)
   497  	if err != nil {
   498  		return err
   499  	}
   500  	// only return error, discard res.
   501  	return res.Error()
   502  }
   503  
   504  // NewDir implements kernfs.Inode.NewDir.
   505  func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) {
   506  	in := linux.FUSEMkdirIn{
   507  		MkdirMeta: linux.FUSEMkdirMeta{
   508  			Mode:  uint32(opts.Mode),
   509  			Umask: umaskFromContext(ctx),
   510  		},
   511  		Name: linux.CString(name),
   512  	}
   513  	return i.newEntry(ctx, name, linux.S_IFDIR, linux.FUSE_MKDIR, &in)
   514  }
   515  
   516  // RmDir implements kernfs.Inode.RmDir.
   517  func (i *inode) RmDir(ctx context.Context, name string, child kernfs.Inode) error {
   518  	in := linux.FUSERmDirIn{Name: linux.CString(name)}
   519  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_RMDIR, &in)
   520  	res, err := i.fs.conn.Call(ctx, req)
   521  	if err != nil {
   522  		return err
   523  	}
   524  	return res.Error()
   525  }
   526  
   527  // Rename implements kernfs.Inode.Rename.
   528  func (i *inode) Rename(ctx context.Context, oldname, newname string, child, dstDir kernfs.Inode) error {
   529  	dstDirInode := dstDir.(*inode)
   530  	in := linux.FUSERenameIn{
   531  		Newdir:  primitive.Uint64(dstDirInode.nodeID),
   532  		Oldname: linux.CString(oldname),
   533  		Newname: linux.CString(newname),
   534  	}
   535  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_RENAME, &in)
   536  	res, err := i.fs.conn.Call(ctx, req)
   537  	if err != nil {
   538  		return err
   539  	}
   540  	return res.Error()
   541  }
   542  
   543  // newEntry calls FUSE server for entry creation and allocates corresponding
   544  // entry according to response. Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK,
   545  // FUSE_LINK and FUSE_LOOKUP.
   546  func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (kernfs.Inode, error) {
   547  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, opcode, payload)
   548  	res, err := i.fs.conn.Call(ctx, req)
   549  	if err != nil {
   550  		return nil, err
   551  	}
   552  	if err := res.Error(); err != nil {
   553  		return nil, err
   554  	}
   555  	out := linux.FUSECreateOut{}
   556  	if opcode == linux.FUSE_CREATE {
   557  		if err := res.UnmarshalPayload(&out); err != nil {
   558  			return nil, err
   559  		}
   560  	} else {
   561  		if err := res.UnmarshalPayload(&out.FUSEEntryOut); err != nil {
   562  			return nil, err
   563  		}
   564  	}
   565  	if opcode != linux.FUSE_LOOKUP && ((out.Attr.Mode&linux.S_IFMT)^uint32(fileType) != 0 || out.NodeID == 0 || out.NodeID == linux.FUSE_ROOT_ID) {
   566  		return nil, linuxerr.EIO
   567  	}
   568  	child := i.fs.newInode(ctx, out.FUSEEntryOut)
   569  	if opcode == linux.FUSE_CREATE {
   570  		// File handler is returned by fuse server at a time of file create.
   571  		// Save it temporary in a created child, so Open could return it when invoked
   572  		// to be sure after fh is consumed reset 'isNewFh' flag of inode
   573  		childI, ok := child.(*inode)
   574  		if ok {
   575  			childI.fh.new = true
   576  			childI.fh.handle = out.FUSEOpenOut.Fh
   577  			childI.fh.flags = out.FUSEOpenOut.OpenFlag
   578  		}
   579  	}
   580  	return child, nil
   581  }
   582  
   583  // Getlink implements kernfs.Inode.Getlink.
   584  func (i *inode) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
   585  	path, err := i.Readlink(ctx, mnt)
   586  	return vfs.VirtualDentry{}, path, err
   587  }
   588  
   589  // Readlink implements kernfs.Inode.Readlink.
   590  func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
   591  	i.attrMu.Lock()
   592  	defer i.attrMu.Unlock()
   593  	if i.filemode().FileType()&linux.S_IFLNK == 0 {
   594  		return "", linuxerr.EINVAL
   595  	}
   596  	if len(i.link) == 0 {
   597  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_READLINK, &linux.FUSEEmptyIn{})
   598  		res, err := i.fs.conn.Call(ctx, req)
   599  		if err != nil {
   600  			return "", err
   601  		}
   602  		i.link = string(res.data[res.hdr.SizeBytes():])
   603  		if !mnt.Options().ReadOnly {
   604  			i.attrTime = time.ZeroTime
   605  		}
   606  	}
   607  	return i.link, nil
   608  }
   609  
   610  // getFUSEAttr returns a linux.FUSEAttr of this inode stored in local cache.
   611  //
   612  // +checklocks:i.attrMu
   613  func (i *inode) getFUSEAttr() linux.FUSEAttr {
   614  	ns := gotime.Second.Nanoseconds()
   615  	return linux.FUSEAttr{
   616  		Ino:       i.nodeID,
   617  		UID:       i.uid.Load(),
   618  		GID:       i.gid.Load(),
   619  		Size:      i.size.Load(),
   620  		Mode:      uint32(i.filemode()),
   621  		BlkSize:   i.blockSize.Load(),
   622  		Atime:     uint64(i.atime.Load() / ns),
   623  		Mtime:     uint64(i.mtime.Load() / ns),
   624  		Ctime:     uint64(i.ctime.Load() / ns),
   625  		AtimeNsec: uint32(i.atime.Load() % ns),
   626  		MtimeNsec: uint32(i.mtime.Load() % ns),
   627  		CtimeNsec: uint32(i.ctime.Load() % ns),
   628  		Nlink:     i.nlink.Load(),
   629  	}
   630  }
   631  
   632  // statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The
   633  // opts.Sync attribute is ignored since the synchronization is handled by the
   634  // FUSE server.
   635  func statFromFUSEAttr(attr linux.FUSEAttr, mask, devMinor uint32) linux.Statx {
   636  	var stat linux.Statx
   637  	stat.Blksize = attr.BlkSize
   638  	stat.DevMajor, stat.DevMinor = linux.UNNAMED_MAJOR, devMinor
   639  
   640  	rdevMajor, rdevMinor := linux.DecodeDeviceID(attr.Rdev)
   641  	stat.RdevMajor, stat.RdevMinor = uint32(rdevMajor), rdevMinor
   642  
   643  	if mask&linux.STATX_MODE != 0 {
   644  		stat.Mode = uint16(attr.Mode)
   645  	}
   646  	if mask&linux.STATX_NLINK != 0 {
   647  		stat.Nlink = attr.Nlink
   648  	}
   649  	if mask&linux.STATX_UID != 0 {
   650  		stat.UID = attr.UID
   651  	}
   652  	if mask&linux.STATX_GID != 0 {
   653  		stat.GID = attr.GID
   654  	}
   655  	if mask&linux.STATX_ATIME != 0 {
   656  		stat.Atime = linux.StatxTimestamp{
   657  			Sec:  int64(attr.Atime),
   658  			Nsec: attr.AtimeNsec,
   659  		}
   660  	}
   661  	if mask&linux.STATX_MTIME != 0 {
   662  		stat.Mtime = linux.StatxTimestamp{
   663  			Sec:  int64(attr.Mtime),
   664  			Nsec: attr.MtimeNsec,
   665  		}
   666  	}
   667  	if mask&linux.STATX_CTIME != 0 {
   668  		stat.Ctime = linux.StatxTimestamp{
   669  			Sec:  int64(attr.Ctime),
   670  			Nsec: attr.CtimeNsec,
   671  		}
   672  	}
   673  	if mask&linux.STATX_INO != 0 {
   674  		stat.Ino = attr.Ino
   675  	}
   676  	if mask&linux.STATX_SIZE != 0 {
   677  		stat.Size = attr.Size
   678  	}
   679  	if mask&linux.STATX_BLOCKS != 0 {
   680  		stat.Blocks = attr.Blocks
   681  	}
   682  	return stat
   683  }
   684  
   685  // getAttr gets the attribute of this inode by issuing a FUSE_GETATTR request
   686  // or read from local cache. It updates the corresponding attributes if
   687  // necessary.
   688  //
   689  // +checklocks:i.attrMu
   690  func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions, flags uint32, fh uint64) (linux.FUSEAttr, error) {
   691  	// TODO(gvisor.dev/issue/3679): send the request only if
   692  	//	- invalid local cache for fields specified in the opts.Mask
   693  	//	- forced update
   694  	//	- i.attributeTime expired
   695  	// If local cache is still valid, return local cache.
   696  	// Currently we always send a request,
   697  	// and we always set the metadata with the new result,
   698  	// unless attributeVersion has changed.
   699  	creds := auth.CredentialsFromContext(ctx)
   700  
   701  	in := linux.FUSEGetAttrIn{
   702  		GetAttrFlags: flags,
   703  		Fh:           fh,
   704  	}
   705  	req := i.fs.conn.NewRequest(creds, pidFromContext(ctx), i.nodeID, linux.FUSE_GETATTR, &in)
   706  	res, err := i.fs.conn.Call(ctx, req)
   707  	if err != nil {
   708  		return linux.FUSEAttr{}, err
   709  	}
   710  	if err := res.Error(); err != nil {
   711  		return linux.FUSEAttr{}, err
   712  	}
   713  	var out linux.FUSEAttrOut
   714  	if err := res.UnmarshalPayload(&out); err != nil {
   715  		return linux.FUSEAttr{}, err
   716  	}
   717  
   718  	// Local version is newer, return the local one.
   719  	i.fs.conn.mu.Lock()
   720  	attributeVersion := i.fs.conn.attributeVersion.Load()
   721  	if attributeVersion != 0 && i.attrVersion.Load() > attributeVersion {
   722  		i.fs.conn.mu.Unlock()
   723  		return i.getFUSEAttr(), nil
   724  	}
   725  	i.fs.conn.mu.Unlock()
   726  	i.updateAttrs(out.Attr, int64(out.AttrValid), int64(out.AttrValidNsec))
   727  	return out.Attr, nil
   728  }
   729  
   730  // reviseAttr attempts to update the attributes for internal purposes
   731  // by calling getAttr with a pre-specified mask.
   732  // Used by read, write, lseek.
   733  //
   734  // +checklocks:i.attrMu
   735  func (i *inode) reviseAttr(ctx context.Context, flags uint32, fh uint64) error {
   736  	// Never need atime for internal purposes.
   737  	_, err := i.getAttr(ctx, i.fs.VFSFilesystem(), vfs.StatOptions{
   738  		Mask: linux.STATX_BASIC_STATS &^ linux.STATX_ATIME,
   739  	}, flags, fh)
   740  	return err
   741  }
   742  
   743  // Stat implements kernfs.Inode.Stat.
   744  func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
   745  	i.attrMu.Lock()
   746  	defer i.attrMu.Unlock()
   747  	attr, err := i.getAttr(ctx, fs, opts, 0, 0)
   748  	if err != nil {
   749  		return linux.Statx{}, err
   750  	}
   751  
   752  	return statFromFUSEAttr(attr, opts.Mask, i.fs.devMinor), nil
   753  }
   754  
   755  // DecRef implements kernfs.Inode.DecRef.
   756  func (i *inode) DecRef(ctx context.Context) {
   757  	i.inodeRefs.DecRef(func() { i.Destroy(ctx) })
   758  }
   759  
   760  // StatFS implements kernfs.Inode.StatFS.
   761  func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
   762  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID,
   763  		linux.FUSE_STATFS, &linux.FUSEEmptyIn{},
   764  	)
   765  	res, err := i.fs.conn.Call(ctx, req)
   766  	if err != nil {
   767  		return linux.Statfs{}, err
   768  	}
   769  	if err := res.Error(); err != nil {
   770  		return linux.Statfs{}, err
   771  	}
   772  
   773  	var out linux.FUSEStatfsOut
   774  	if err := res.UnmarshalPayload(&out); err != nil {
   775  		return linux.Statfs{}, err
   776  	}
   777  
   778  	return linux.Statfs{
   779  		Type:            linux.FUSE_SUPER_MAGIC,
   780  		Blocks:          uint64(out.Blocks),
   781  		BlocksFree:      out.BlocksFree,
   782  		BlocksAvailable: out.BlocksAvailable,
   783  		Files:           out.Files,
   784  		FilesFree:       out.FilesFree,
   785  		BlockSize:       int64(out.BlockSize),
   786  		NameLength:      uint64(out.NameLength),
   787  		FragmentSize:    int64(out.FragmentSize),
   788  	}, nil
   789  }
   790  
   791  // fattrMaskFromStats converts vfs.SetStatOptions.Stat.Mask to linux stats mask
   792  // aligned with the attribute mask defined in include/linux/fs.h.
   793  func fattrMaskFromStats(mask uint32) uint32 {
   794  	var fuseAttrMask uint32
   795  	maskMap := map[uint32]uint32{
   796  		linux.STATX_MODE:  linux.FATTR_MODE,
   797  		linux.STATX_UID:   linux.FATTR_UID,
   798  		linux.STATX_GID:   linux.FATTR_GID,
   799  		linux.STATX_SIZE:  linux.FATTR_SIZE,
   800  		linux.STATX_ATIME: linux.FATTR_ATIME,
   801  		linux.STATX_MTIME: linux.FATTR_MTIME,
   802  		linux.STATX_CTIME: linux.FATTR_CTIME,
   803  	}
   804  	for statxMask, fattrMask := range maskMap {
   805  		if mask&statxMask != 0 {
   806  			fuseAttrMask |= fattrMask
   807  		}
   808  	}
   809  	return fuseAttrMask
   810  }
   811  
   812  // SetStat implements kernfs.Inode.SetStat.
   813  func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
   814  	i.attrMu.Lock()
   815  	defer i.attrMu.Unlock()
   816  	if err := vfs.CheckSetStat(ctx, creds, &opts, i.filemode(), auth.KUID(i.uid.Load()), auth.KGID(i.gid.Load())); err != nil {
   817  		return err
   818  	}
   819  	if opts.Stat.Mask == 0 {
   820  		return nil
   821  	}
   822  	return i.setAttr(ctx, fs, creds, opts, fhOptions{useFh: false})
   823  }
   824  
   825  type fhOptions struct {
   826  	useFh bool
   827  	fh    uint64
   828  }
   829  
   830  // +checklocks:i.attrMu
   831  func (i *inode) setAttr(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions, fhOpts fhOptions) error {
   832  	// We should retain the original file type when assigning a new mode.
   833  	fattrMask := fattrMaskFromStats(opts.Stat.Mask)
   834  	if fhOpts.useFh {
   835  		fattrMask |= linux.FATTR_FH
   836  	}
   837  	if opts.Stat.Mask&linux.STATX_ATIME != 0 && opts.Stat.Atime.Nsec == linux.UTIME_NOW {
   838  		fattrMask |= linux.FATTR_ATIME_NOW
   839  	}
   840  	if opts.Stat.Mask&linux.STATX_MTIME != 0 && opts.Stat.Mtime.Nsec == linux.UTIME_NOW {
   841  		fattrMask |= linux.FATTR_ATIME_NOW
   842  	}
   843  	in := linux.FUSESetAttrIn{
   844  		Valid:     fattrMask,
   845  		Fh:        fhOpts.fh,
   846  		Size:      opts.Stat.Size,
   847  		Atime:     uint64(opts.Stat.Atime.Sec),
   848  		Mtime:     uint64(opts.Stat.Mtime.Sec),
   849  		Ctime:     uint64(opts.Stat.Ctime.Sec),
   850  		AtimeNsec: opts.Stat.Atime.Nsec,
   851  		MtimeNsec: opts.Stat.Mtime.Nsec,
   852  		CtimeNsec: opts.Stat.Ctime.Nsec,
   853  		Mode:      uint32(uint16(i.filemode().FileType()) | opts.Stat.Mode),
   854  		UID:       opts.Stat.UID,
   855  		GID:       opts.Stat.GID,
   856  	}
   857  	req := i.fs.conn.NewRequest(creds, pidFromContext(ctx), i.nodeID, linux.FUSE_SETATTR, &in)
   858  	res, err := i.fs.conn.Call(ctx, req)
   859  	if err != nil {
   860  		return err
   861  	}
   862  	if err := res.Error(); err != nil {
   863  		return err
   864  	}
   865  	out := linux.FUSEAttrOut{}
   866  	if err := res.UnmarshalPayload(&out); err != nil {
   867  		return err
   868  	}
   869  	i.updateAttrs(out.Attr, int64(out.AttrValid), int64(out.AttrValidNsec))
   870  	return nil
   871  }
   872  
   873  // +checklocks:i.attrMu
   874  func (i *inode) updateAttrs(attr linux.FUSEAttr, validSec, validNSec int64) {
   875  	i.fs.conn.mu.Lock()
   876  	i.attrVersion.Store(i.fs.conn.attributeVersion.Add(1))
   877  	i.fs.conn.mu.Unlock()
   878  	i.attrTime = i.fs.clock.Now().AddTime(time.FromTimespec(linux.Timespec{Sec: validSec, Nsec: validNSec}))
   879  
   880  	i.ino.Store(attr.Ino)
   881  
   882  	i.mode.Store((attr.Mode & 07777) | (i.mode.Load() & linux.S_IFMT))
   883  	i.uid.Store(attr.UID)
   884  	i.gid.Store(attr.GID)
   885  
   886  	i.atime.Store(attr.ATimeNsec())
   887  	i.mtime.Store(attr.MTimeNsec())
   888  	i.ctime.Store(attr.CTimeNsec())
   889  
   890  	i.size.Store(attr.Size)
   891  	i.nlink.Store(attr.Nlink)
   892  
   893  	if !i.fs.opts.defaultPermissions {
   894  		i.mode.Store(i.mode.Load() & ^uint32(linux.S_ISVTX))
   895  	}
   896  }