github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/fsimpl/fuse/inode.go (about)

     1  // Copyright 2022 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	"fmt"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    23  	"github.com/MerlinKodo/gvisor/pkg/atomicbitops"
    24  	"github.com/MerlinKodo/gvisor/pkg/context"
    25  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    26  	"github.com/MerlinKodo/gvisor/pkg/hostarch"
    27  	"github.com/MerlinKodo/gvisor/pkg/marshal"
    28  	"github.com/MerlinKodo/gvisor/pkg/marshal/primitive"
    29  	"github.com/MerlinKodo/gvisor/pkg/sentry/fsimpl/kernfs"
    30  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel"
    31  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth"
    32  	"github.com/MerlinKodo/gvisor/pkg/sentry/vfs"
    33  )
    34  
    35  // +stateify savable
    36  type fileHandle struct {
    37  	new    bool
    38  	handle uint64
    39  	flags  uint32
    40  }
    41  
    42  // inode implements kernfs.Inode.
    43  //
    44  // +stateify savable
    45  type inode struct {
    46  	inodeRefs
    47  	kernfs.InodeAlwaysValid
    48  	kernfs.InodeNotAnonymous
    49  	kernfs.InodeNotSymlink
    50  	kernfs.InodeWatches
    51  	kernfs.OrderedChildren
    52  	kernfs.CachedMappable
    53  
    54  	// the owning filesystem. fs is immutable.
    55  	fs *filesystem
    56  
    57  	// nodeID is a unique id which identifies the inode between userspace
    58  	// and the sentry. Immutable.
    59  	nodeID uint64
    60  
    61  	// attrVersion is the version of the last attribute change.
    62  	attrVersion atomicbitops.Uint64
    63  
    64  	// attrTime is the time until the attributes are valid.
    65  	attrTime uint64
    66  
    67  	// link is result of following a symbolic link.
    68  	link string
    69  
    70  	// fh caches the file handle returned by the server from a FUSE_CREATE request
    71  	// so we don't have to send a separate FUSE_OPEN request.
    72  	fh fileHandle
    73  
    74  	locks   vfs.FileLocks
    75  	watches vfs.Watches
    76  
    77  	// attrMu protects the attributes of this inode.
    78  	attrMu sync.Mutex
    79  
    80  	// +checklocks:attrMu
    81  	ino atomicbitops.Uint64 // Stat data, not accessed for path walking.
    82  	// +checklocks:attrMu
    83  	uid atomicbitops.Uint32 // auth.KUID, but stored as raw uint32 for sync/atomic.
    84  	// +checklocks:attrMu
    85  	gid atomicbitops.Uint32 // auth.KGID, but...
    86  	// +checklocks:attrMu
    87  	mode atomicbitops.Uint32 // File type and mode.
    88  
    89  	// Timestamps in nanoseconds from the unix epoch.
    90  	// +checklocks:attrMu
    91  	atime atomicbitops.Int64
    92  	// +checklocks:attrMu
    93  	mtime atomicbitops.Int64
    94  	// +checklocks:attrMu
    95  	ctime atomicbitops.Int64
    96  
    97  	// +checklocks:attrMu
    98  	size atomicbitops.Uint64
    99  
   100  	// nlink counts the number of hard links to this inode. It's updated and
   101  	// accessed used atomic operations but not protected by attrMu.
   102  	nlink atomicbitops.Uint32
   103  
   104  	// +checklocks:attrMu
   105  	blockSize atomicbitops.Uint32 // 0 if unknown.
   106  }
   107  
   108  func blockerFromContext(ctx context.Context) context.Blocker {
   109  	kernelTask := kernel.TaskFromContext(ctx)
   110  	if kernelTask == nil {
   111  		return ctx
   112  	}
   113  	return kernelTask
   114  }
   115  
   116  func pidFromContext(ctx context.Context) uint32 {
   117  	kernelTask := kernel.TaskFromContext(ctx)
   118  	if kernelTask == nil {
   119  		return 0
   120  	}
   121  	return uint32(kernelTask.ThreadID())
   122  }
   123  
   124  func umaskFromContext(ctx context.Context) uint32 {
   125  	kernelTask := kernel.TaskFromContext(ctx)
   126  	umask := uint32(0)
   127  	if kernelTask != nil {
   128  		umask = uint32(kernelTask.FSContext().Umask())
   129  	}
   130  	return umask
   131  }
   132  
   133  func (i *inode) Mode() linux.FileMode {
   134  	i.attrMu.Lock()
   135  	defer i.attrMu.Unlock()
   136  	return i.filemode()
   137  }
   138  
   139  func (i *inode) UID() auth.KUID {
   140  	i.attrMu.Lock()
   141  	defer i.attrMu.Unlock()
   142  	return auth.KUID(i.uid.Load())
   143  }
   144  
   145  func (i *inode) GID() auth.KGID {
   146  	i.attrMu.Lock()
   147  	defer i.attrMu.Unlock()
   148  	return auth.KGID(i.gid.Load())
   149  }
   150  
   151  // +checklocks:i.attrMu
   152  func (i *inode) filemode() linux.FileMode {
   153  	return linux.FileMode(i.mode.Load())
   154  }
   155  
   156  // touchCMTime updates the ctime and mtime attributes to be the current time.
   157  //
   158  // +checklocks:i.attrMu
   159  func (i *inode) touchCMtime() {
   160  	now := i.fs.clock.Now().Nanoseconds()
   161  	i.mtime.Store(now)
   162  	i.ctime.Store(now)
   163  }
   164  
   165  // touchAtime updates the atime attribut to be the current time.
   166  //
   167  // +checklocks:i.attrMu
   168  func (i *inode) touchAtime() {
   169  	i.atime.Store(i.fs.clock.Now().Nanoseconds())
   170  }
   171  
   172  // +checklocks:i.attrMu
   173  func (i *inode) init(creds *auth.Credentials, devMajor, devMinor uint32, nodeid uint64, mode linux.FileMode, nlink uint32) {
   174  	if mode.FileType() == 0 {
   175  		panic(fmt.Sprintf("No file type specified in 'mode' for InodeAttrs.Init(): mode=0%o", mode))
   176  	}
   177  
   178  	i.nodeID = nodeid
   179  	i.ino.Store(nodeid)
   180  	i.mode.Store(uint32(mode))
   181  	i.uid.Store(uint32(creds.EffectiveKUID))
   182  	i.gid.Store(uint32(creds.EffectiveKGID))
   183  	i.nlink.Store(nlink)
   184  	i.blockSize.Store(hostarch.PageSize)
   185  
   186  	now := i.fs.clock.Now().Nanoseconds()
   187  	i.atime.Store(now)
   188  	i.mtime.Store(now)
   189  	i.ctime.Store(now)
   190  }
   191  
   192  // CheckPermissions implements kernfs.Inode.CheckPermissions.
   193  func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
   194  	// Since FUSE operations are ultimately backed by a userspace process (the
   195  	// fuse daemon), allowing a process to call into fusefs grants the daemon
   196  	// ptrace-like capabilities over the calling process. Because of this, by
   197  	// default FUSE only allows the mount owner to interact with the
   198  	// filesystem. This explicitly excludes setuid/setgid processes.
   199  	//
   200  	// This behaviour can be overriden with the 'allow_other' mount option.
   201  	//
   202  	// See fs/fuse/dir.c:fuse_allow_current_process() in Linux.
   203  	if !i.fs.opts.allowOther {
   204  		if creds.RealKUID != i.fs.opts.uid ||
   205  			creds.EffectiveKUID != i.fs.opts.uid ||
   206  			creds.SavedKUID != i.fs.opts.uid ||
   207  			creds.RealKGID != i.fs.opts.gid ||
   208  			creds.EffectiveKGID != i.fs.opts.gid ||
   209  			creds.SavedKGID != i.fs.opts.gid {
   210  			return linuxerr.EACCES
   211  		}
   212  	}
   213  
   214  	// By default, fusefs delegates all permission checks to the server.
   215  	// However, standard unix permission checks can be enabled with the
   216  	// default_permissions mount option.
   217  	i.attrMu.Lock()
   218  	defer i.attrMu.Unlock()
   219  	refreshed := false
   220  	opts := vfs.StatOptions{Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID}
   221  	if i.fs.opts.defaultPermissions || (ats.MayExec() && i.filemode().FileType() == linux.S_IFREG) {
   222  		if uint64(i.fs.clock.Now().Nanoseconds()) > i.attrTime {
   223  			refreshed = true
   224  			if _, err := i.getAttr(ctx, i.fs.VFSFilesystem(), opts, 0, 0); err != nil {
   225  				return err
   226  			}
   227  		}
   228  	}
   229  
   230  	if i.fs.opts.defaultPermissions || (ats.MayExec() && i.filemode().FileType() == linux.S_IFREG) {
   231  		err := vfs.GenericCheckPermissions(creds, ats, linux.FileMode(i.mode.Load()), auth.KUID(i.uid.Load()), auth.KGID(i.gid.Load()))
   232  		if linuxerr.Equals(linuxerr.EACCES, err) && !refreshed {
   233  			if _, err := i.getAttr(ctx, i.fs.VFSFilesystem(), opts, 0, 0); err != nil {
   234  				return err
   235  			}
   236  			return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(i.mode.Load()), auth.KUID(i.uid.Load()), auth.KGID(i.gid.Load()))
   237  		}
   238  		return err
   239  	} else if ats.MayRead() || ats.MayWrite() || ats.MayExec() {
   240  		in := linux.FUSEAccessIn{Mask: uint32(ats)}
   241  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_ACCESS, &in)
   242  		res, err := i.fs.conn.Call(ctx, req)
   243  		if err != nil {
   244  			return err
   245  		}
   246  		return res.Error()
   247  	}
   248  	return nil
   249  }
   250  
   251  // Open implements kernfs.Inode.Open.
   252  func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   253  	opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC |
   254  		linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY |
   255  		linux.O_APPEND | linux.O_DIRECT
   256  	i.attrMu.Lock()
   257  	defer i.attrMu.Unlock()
   258  	if opts.Flags&linux.O_LARGEFILE == 0 && i.size.Load() > linux.MAX_NON_LFS {
   259  		return nil, linuxerr.EOVERFLOW
   260  	}
   261  
   262  	var (
   263  		fd     *fileDescription
   264  		fdImpl vfs.FileDescriptionImpl
   265  		opcode linux.FUSEOpcode
   266  	)
   267  	switch i.filemode().FileType() {
   268  	case linux.S_IFREG:
   269  		regularFD := &regularFileFD{}
   270  		fd = &(regularFD.fileDescription)
   271  		fdImpl = regularFD
   272  		opcode = linux.FUSE_OPEN
   273  	case linux.S_IFDIR:
   274  		if opts.Flags&linux.O_CREAT != 0 {
   275  			return nil, linuxerr.EISDIR
   276  		}
   277  		if ats := vfs.AccessTypesForOpenFlags(&opts); ats.MayWrite() {
   278  			return nil, linuxerr.EISDIR
   279  		}
   280  		if opts.Flags&linux.O_DIRECT != 0 {
   281  			return nil, linuxerr.EINVAL
   282  		}
   283  		directoryFD := &directoryFD{}
   284  		fd = &(directoryFD.fileDescription)
   285  		fdImpl = directoryFD
   286  		opcode = linux.FUSE_OPENDIR
   287  	case linux.S_IFLNK:
   288  		return nil, linuxerr.ELOOP
   289  	}
   290  
   291  	fd.LockFD.Init(&i.locks)
   292  	// FOPEN_KEEP_CACHE is the default flag for noOpen.
   293  	fd.OpenFlag = linux.FOPEN_KEEP_CACHE
   294  
   295  	truncateRegFile := opts.Flags&linux.O_TRUNC != 0 && i.filemode().FileType() == linux.S_IFREG
   296  	if truncateRegFile && (i.fh.new || !i.fs.conn.atomicOTrunc) {
   297  		// If the regular file needs to be truncated, but the connection doesn't
   298  		// support O_TRUNC or if we are optimizing away the Open RPC, then manually
   299  		// truncate the file *before* Open. As per libfuse, "If [atomic O_TRUNC is]
   300  		// disabled, and an application specifies O_TRUNC, fuse first calls
   301  		// truncate() and then open() with O_TRUNC filtered out.".
   302  		opts := vfs.SetStatOptions{Stat: linux.Statx{Size: 0, Mask: linux.STATX_SIZE}}
   303  		if err := i.setAttr(ctx, i.fs.VFSFilesystem(), auth.CredentialsFromContext(ctx), opts, fhOptions{useFh: false}); err != nil {
   304  			return nil, err
   305  		}
   306  	}
   307  
   308  	if i.fh.new {
   309  		fd.OpenFlag = i.fh.flags
   310  		fd.Fh = i.fh.handle
   311  		i.fh.new = false
   312  		// Only send an open request when the FUSE server supports open or is
   313  		// opening a directory.
   314  	} else if !i.fs.conn.noOpen || i.filemode().IsDir() {
   315  		in := linux.FUSEOpenIn{Flags: opts.Flags & ^uint32(linux.O_CREAT|linux.O_EXCL|linux.O_NOCTTY)}
   316  		// Clear O_TRUNC if the server doesn't support it.
   317  		if !i.fs.conn.atomicOTrunc {
   318  			in.Flags &= ^uint32(linux.O_TRUNC)
   319  		}
   320  
   321  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, opcode, &in)
   322  		res, err := i.fs.conn.Call(ctx, req)
   323  		if err != nil {
   324  			return nil, err
   325  		}
   326  		if err := res.Error(); err != nil {
   327  			if linuxerr.Equals(linuxerr.ENOSYS, err) && !i.filemode().IsDir() {
   328  				i.fs.conn.noOpen = true
   329  			} else {
   330  				return nil, err
   331  			}
   332  		} else {
   333  			out := linux.FUSEOpenOut{}
   334  			if err := res.UnmarshalPayload(&out); err != nil {
   335  				return nil, err
   336  			}
   337  			fd.OpenFlag = out.OpenFlag
   338  			fd.Fh = out.Fh
   339  			// Open was successful. Update inode's size if atomicOTrunc && O_TRUNC.
   340  			if truncateRegFile && i.fs.conn.atomicOTrunc {
   341  				i.fs.conn.mu.Lock()
   342  				i.attrVersion.Store(i.fs.conn.attributeVersion.Add(1))
   343  				i.fs.conn.mu.Unlock()
   344  				i.size.Store(0)
   345  				i.touchCMtime()
   346  			}
   347  		}
   348  	}
   349  	if i.filemode().IsDir() {
   350  		fd.OpenFlag &= ^uint32(linux.FOPEN_DIRECT_IO)
   351  	}
   352  
   353  	// TODO(gvisor.dev/issue/3234): invalidate mmap after implemented it for FUSE Inode
   354  	fd.DirectIO = fd.OpenFlag&linux.FOPEN_DIRECT_IO != 0
   355  	fdOptions := &vfs.FileDescriptionOptions{}
   356  	if fd.OpenFlag&linux.FOPEN_NONSEEKABLE != 0 {
   357  		fdOptions.DenyPRead = true
   358  		fdOptions.DenyPWrite = true
   359  		fd.Nonseekable = true
   360  	}
   361  
   362  	if err := fd.vfsfd.Init(fdImpl, opts.Flags, rp.Mount(), d.VFSDentry(), fdOptions); err != nil {
   363  		return nil, err
   364  	}
   365  	return &fd.vfsfd, nil
   366  }
   367  
   368  // Lookup implements kernfs.Inode.Lookup.
   369  func (i *inode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
   370  	in := linux.FUSELookupIn{Name: linux.CString(name)}
   371  	return i.newEntry(ctx, name, 0, linux.FUSE_LOOKUP, &in)
   372  }
   373  
   374  // Keep implements kernfs.Inode.Keep.
   375  func (i *inode) Keep() bool {
   376  	// Return true so that kernfs keeps the new dentry pointing to this
   377  	// inode in the dentry tree. This is needed because inodes created via
   378  	// Lookup are not temporary. They might refer to existing files on server
   379  	// that can be Unlink'd/Rmdir'd.
   380  	return true
   381  }
   382  
   383  // IterDirents implements kernfs.Inode.IterDirents.
   384  func (*inode) IterDirents(ctx context.Context, mnt *vfs.Mount, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
   385  	return offset, nil
   386  }
   387  
   388  // NewFile implements kernfs.Inode.NewFile.
   389  func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (kernfs.Inode, error) {
   390  	opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC |
   391  		linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY
   392  	in := linux.FUSECreateIn{
   393  		CreateMeta: linux.FUSECreateMeta{
   394  			Flags: opts.Flags,
   395  			Mode:  uint32(opts.Mode) | linux.S_IFREG,
   396  			Umask: umaskFromContext(ctx),
   397  		},
   398  		Name: linux.CString(name),
   399  	}
   400  	return i.newEntry(ctx, name, linux.S_IFREG, linux.FUSE_CREATE, &in)
   401  }
   402  
   403  // NewNode implements kernfs.Inode.NewNode.
   404  func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (kernfs.Inode, error) {
   405  	in := linux.FUSEMknodIn{
   406  		MknodMeta: linux.FUSEMknodMeta{
   407  			Mode:  uint32(opts.Mode),
   408  			Rdev:  linux.MakeDeviceID(uint16(opts.DevMajor), opts.DevMinor),
   409  			Umask: umaskFromContext(ctx),
   410  		},
   411  		Name: linux.CString(name),
   412  	}
   413  	return i.newEntry(ctx, name, opts.Mode.FileType(), linux.FUSE_MKNOD, &in)
   414  }
   415  
   416  // NewSymlink implements kernfs.Inode.NewSymlink.
   417  func (i *inode) NewSymlink(ctx context.Context, name, target string) (kernfs.Inode, error) {
   418  	in := linux.FUSESymlinkIn{
   419  		Name:   linux.CString(name),
   420  		Target: linux.CString(target),
   421  	}
   422  	return i.newEntry(ctx, name, linux.S_IFLNK, linux.FUSE_SYMLINK, &in)
   423  }
   424  
   425  // NewLink implements kernfs.Inode.NewLink.
   426  func (i *inode) NewLink(ctx context.Context, name string, target kernfs.Inode) (kernfs.Inode, error) {
   427  	targetInode := target.(*inode)
   428  	in := linux.FUSELinkIn{
   429  		OldNodeID: primitive.Uint64(targetInode.nodeID),
   430  		Name:      linux.CString(name),
   431  	}
   432  	return i.newEntry(ctx, name, targetInode.Mode().FileType(), linux.FUSE_LINK, &in)
   433  }
   434  
   435  // Unlink implements kernfs.Inode.Unlink.
   436  func (i *inode) Unlink(ctx context.Context, name string, child kernfs.Inode) error {
   437  	in := linux.FUSEUnlinkIn{Name: linux.CString(name)}
   438  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_UNLINK, &in)
   439  	res, err := i.fs.conn.Call(ctx, req)
   440  	if err != nil {
   441  		return err
   442  	}
   443  	// only return error, discard res.
   444  	return res.Error()
   445  }
   446  
   447  // NewDir implements kernfs.Inode.NewDir.
   448  func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) {
   449  	in := linux.FUSEMkdirIn{
   450  		MkdirMeta: linux.FUSEMkdirMeta{
   451  			Mode:  uint32(opts.Mode),
   452  			Umask: umaskFromContext(ctx),
   453  		},
   454  		Name: linux.CString(name),
   455  	}
   456  	return i.newEntry(ctx, name, linux.S_IFDIR, linux.FUSE_MKDIR, &in)
   457  }
   458  
   459  // RmDir implements kernfs.Inode.RmDir.
   460  func (i *inode) RmDir(ctx context.Context, name string, child kernfs.Inode) error {
   461  	in := linux.FUSERmDirIn{Name: linux.CString(name)}
   462  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_RMDIR, &in)
   463  	res, err := i.fs.conn.Call(ctx, req)
   464  	if err != nil {
   465  		return err
   466  	}
   467  	return res.Error()
   468  }
   469  
   470  // Rename implements kernfs.Inode.Rename.
   471  func (i *inode) Rename(ctx context.Context, oldname, newname string, child, dstDir kernfs.Inode) error {
   472  	dstDirInode := dstDir.(*inode)
   473  	in := linux.FUSERenameIn{
   474  		Newdir:  primitive.Uint64(dstDirInode.nodeID),
   475  		Oldname: linux.CString(oldname),
   476  		Newname: linux.CString(newname),
   477  	}
   478  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_RENAME, &in)
   479  	res, err := i.fs.conn.Call(ctx, req)
   480  	if err != nil {
   481  		return err
   482  	}
   483  	return res.Error()
   484  }
   485  
   486  // newEntry calls FUSE server for entry creation and allocates corresponding
   487  // entry according to response. Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK,
   488  // FUSE_LINK and FUSE_LOOKUP.
   489  func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (kernfs.Inode, error) {
   490  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, opcode, payload)
   491  	res, err := i.fs.conn.Call(ctx, req)
   492  	if err != nil {
   493  		return nil, err
   494  	}
   495  	if err := res.Error(); err != nil {
   496  		return nil, err
   497  	}
   498  	out := linux.FUSECreateOut{}
   499  	if opcode == linux.FUSE_CREATE {
   500  		if err := res.UnmarshalPayload(&out); err != nil {
   501  			return nil, err
   502  		}
   503  	} else {
   504  		if err := res.UnmarshalPayload(&out.FUSEEntryOut); err != nil {
   505  			return nil, err
   506  		}
   507  	}
   508  	if opcode != linux.FUSE_LOOKUP && ((out.Attr.Mode&linux.S_IFMT)^uint32(fileType) != 0 || out.NodeID == 0 || out.NodeID == linux.FUSE_ROOT_ID) {
   509  		return nil, linuxerr.EIO
   510  	}
   511  	child := i.fs.newInode(ctx, out.NodeID, out.Attr)
   512  	if opcode == linux.FUSE_CREATE {
   513  		// File handler is returned by fuse server at a time of file create.
   514  		// Save it temporary in a created child, so Open could return it when invoked
   515  		// to be sure after fh is consumed reset 'isNewFh' flag of inode
   516  		childI, ok := child.(*inode)
   517  		if ok {
   518  			childI.fh.new = true
   519  			childI.fh.handle = out.FUSEOpenOut.Fh
   520  			childI.fh.flags = out.FUSEOpenOut.OpenFlag
   521  		}
   522  	}
   523  	return child, nil
   524  }
   525  
   526  // Getlink implements kernfs.Inode.Getlink.
   527  func (i *inode) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
   528  	path, err := i.Readlink(ctx, mnt)
   529  	return vfs.VirtualDentry{}, path, err
   530  }
   531  
   532  // Readlink implements kernfs.Inode.Readlink.
   533  func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
   534  	i.attrMu.Lock()
   535  	defer i.attrMu.Unlock()
   536  	if i.filemode().FileType()&linux.S_IFLNK == 0 {
   537  		return "", linuxerr.EINVAL
   538  	}
   539  	if len(i.link) == 0 {
   540  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_READLINK, &linux.FUSEEmptyIn{})
   541  		res, err := i.fs.conn.Call(ctx, req)
   542  		if err != nil {
   543  			return "", err
   544  		}
   545  		i.link = string(res.data[res.hdr.SizeBytes():])
   546  		if !mnt.Options().ReadOnly {
   547  			i.attrTime = 0
   548  		}
   549  	}
   550  	return i.link, nil
   551  }
   552  
   553  // getFUSEAttr returns a linux.FUSEAttr of this inode stored in local cache.
   554  //
   555  // +checklocks:i.attrMu
   556  func (i *inode) getFUSEAttr() linux.FUSEAttr {
   557  	ns := time.Second.Nanoseconds()
   558  	return linux.FUSEAttr{
   559  		Ino:       i.nodeID,
   560  		UID:       i.uid.Load(),
   561  		GID:       i.gid.Load(),
   562  		Size:      i.size.Load(),
   563  		Mode:      uint32(i.filemode()),
   564  		BlkSize:   i.blockSize.Load(),
   565  		Atime:     uint64(i.atime.Load() / ns),
   566  		Mtime:     uint64(i.mtime.Load() / ns),
   567  		Ctime:     uint64(i.ctime.Load() / ns),
   568  		AtimeNsec: uint32(i.atime.Load() % ns),
   569  		MtimeNsec: uint32(i.mtime.Load() % ns),
   570  		CtimeNsec: uint32(i.ctime.Load() % ns),
   571  		Nlink:     i.nlink.Load(),
   572  	}
   573  }
   574  
   575  // statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The
   576  // opts.Sync attribute is ignored since the synchronization is handled by the
   577  // FUSE server.
   578  func statFromFUSEAttr(attr linux.FUSEAttr, mask, devMinor uint32) linux.Statx {
   579  	var stat linux.Statx
   580  	stat.Blksize = attr.BlkSize
   581  	stat.DevMajor, stat.DevMinor = linux.UNNAMED_MAJOR, devMinor
   582  
   583  	rdevMajor, rdevMinor := linux.DecodeDeviceID(attr.Rdev)
   584  	stat.RdevMajor, stat.RdevMinor = uint32(rdevMajor), rdevMinor
   585  
   586  	if mask&linux.STATX_MODE != 0 {
   587  		stat.Mode = uint16(attr.Mode)
   588  	}
   589  	if mask&linux.STATX_NLINK != 0 {
   590  		stat.Nlink = attr.Nlink
   591  	}
   592  	if mask&linux.STATX_UID != 0 {
   593  		stat.UID = attr.UID
   594  	}
   595  	if mask&linux.STATX_GID != 0 {
   596  		stat.GID = attr.GID
   597  	}
   598  	if mask&linux.STATX_ATIME != 0 {
   599  		stat.Atime = linux.StatxTimestamp{
   600  			Sec:  int64(attr.Atime),
   601  			Nsec: attr.AtimeNsec,
   602  		}
   603  	}
   604  	if mask&linux.STATX_MTIME != 0 {
   605  		stat.Mtime = linux.StatxTimestamp{
   606  			Sec:  int64(attr.Mtime),
   607  			Nsec: attr.MtimeNsec,
   608  		}
   609  	}
   610  	if mask&linux.STATX_CTIME != 0 {
   611  		stat.Ctime = linux.StatxTimestamp{
   612  			Sec:  int64(attr.Ctime),
   613  			Nsec: attr.CtimeNsec,
   614  		}
   615  	}
   616  	if mask&linux.STATX_INO != 0 {
   617  		stat.Ino = attr.Ino
   618  	}
   619  	if mask&linux.STATX_SIZE != 0 {
   620  		stat.Size = attr.Size
   621  	}
   622  	if mask&linux.STATX_BLOCKS != 0 {
   623  		stat.Blocks = attr.Blocks
   624  	}
   625  	return stat
   626  }
   627  
   628  // getAttr gets the attribute of this inode by issuing a FUSE_GETATTR request
   629  // or read from local cache. It updates the corresponding attributes if
   630  // necessary.
   631  //
   632  // +checklocks:i.attrMu
   633  func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions, flags uint32, fh uint64) (linux.FUSEAttr, error) {
   634  	// TODO(gvisor.dev/issue/3679): send the request only if
   635  	//	- invalid local cache for fields specified in the opts.Mask
   636  	//	- forced update
   637  	//	- i.attributeTime expired
   638  	// If local cache is still valid, return local cache.
   639  	// Currently we always send a request,
   640  	// and we always set the metadata with the new result,
   641  	// unless attributeVersion has changed.
   642  	creds := auth.CredentialsFromContext(ctx)
   643  
   644  	in := linux.FUSEGetAttrIn{
   645  		GetAttrFlags: flags,
   646  		Fh:           fh,
   647  	}
   648  	req := i.fs.conn.NewRequest(creds, pidFromContext(ctx), i.nodeID, linux.FUSE_GETATTR, &in)
   649  	res, err := i.fs.conn.Call(ctx, req)
   650  	if err != nil {
   651  		return linux.FUSEAttr{}, err
   652  	}
   653  	if err := res.Error(); err != nil {
   654  		return linux.FUSEAttr{}, err
   655  	}
   656  	var out linux.FUSEAttrOut
   657  	if err := res.UnmarshalPayload(&out); err != nil {
   658  		return linux.FUSEAttr{}, err
   659  	}
   660  
   661  	// Local version is newer, return the local one.
   662  	i.fs.conn.mu.Lock()
   663  	attributeVersion := i.fs.conn.attributeVersion.Load()
   664  	if attributeVersion != 0 && i.attrVersion.Load() > attributeVersion {
   665  		i.fs.conn.mu.Unlock()
   666  		return i.getFUSEAttr(), nil
   667  	}
   668  	i.fs.conn.mu.Unlock()
   669  	i.updateAttrs(out.Attr, out.AttrValid)
   670  	return out.Attr, nil
   671  }
   672  
   673  // reviseAttr attempts to update the attributes for internal purposes
   674  // by calling getAttr with a pre-specified mask.
   675  // Used by read, write, lseek.
   676  //
   677  // +checklocks:i.attrMu
   678  func (i *inode) reviseAttr(ctx context.Context, flags uint32, fh uint64) error {
   679  	// Never need atime for internal purposes.
   680  	_, err := i.getAttr(ctx, i.fs.VFSFilesystem(), vfs.StatOptions{
   681  		Mask: linux.STATX_BASIC_STATS &^ linux.STATX_ATIME,
   682  	}, flags, fh)
   683  	return err
   684  }
   685  
   686  // Stat implements kernfs.Inode.Stat.
   687  func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
   688  	i.attrMu.Lock()
   689  	defer i.attrMu.Unlock()
   690  	attr, err := i.getAttr(ctx, fs, opts, 0, 0)
   691  	if err != nil {
   692  		return linux.Statx{}, err
   693  	}
   694  
   695  	return statFromFUSEAttr(attr, opts.Mask, i.fs.devMinor), nil
   696  }
   697  
   698  // DecRef implements kernfs.Inode.DecRef.
   699  func (i *inode) DecRef(ctx context.Context) {
   700  	i.inodeRefs.DecRef(func() { i.Destroy(ctx) })
   701  }
   702  
   703  // StatFS implements kernfs.Inode.StatFS.
   704  func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
   705  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID,
   706  		linux.FUSE_STATFS, &linux.FUSEEmptyIn{},
   707  	)
   708  	res, err := i.fs.conn.Call(ctx, req)
   709  	if err != nil {
   710  		return linux.Statfs{}, err
   711  	}
   712  	if err := res.Error(); err != nil {
   713  		return linux.Statfs{}, err
   714  	}
   715  
   716  	var out linux.FUSEStatfsOut
   717  	if err := res.UnmarshalPayload(&out); err != nil {
   718  		return linux.Statfs{}, err
   719  	}
   720  
   721  	return linux.Statfs{
   722  		Type:            linux.FUSE_SUPER_MAGIC,
   723  		Blocks:          uint64(out.Blocks),
   724  		BlocksFree:      out.BlocksFree,
   725  		BlocksAvailable: out.BlocksAvailable,
   726  		Files:           out.Files,
   727  		FilesFree:       out.FilesFree,
   728  		BlockSize:       int64(out.BlockSize),
   729  		NameLength:      uint64(out.NameLength),
   730  		FragmentSize:    int64(out.FragmentSize),
   731  	}, nil
   732  }
   733  
   734  // fattrMaskFromStats converts vfs.SetStatOptions.Stat.Mask to linux stats mask
   735  // aligned with the attribute mask defined in include/linux/fs.h.
   736  func fattrMaskFromStats(mask uint32) uint32 {
   737  	var fuseAttrMask uint32
   738  	maskMap := map[uint32]uint32{
   739  		linux.STATX_MODE:  linux.FATTR_MODE,
   740  		linux.STATX_UID:   linux.FATTR_UID,
   741  		linux.STATX_GID:   linux.FATTR_GID,
   742  		linux.STATX_SIZE:  linux.FATTR_SIZE,
   743  		linux.STATX_ATIME: linux.FATTR_ATIME,
   744  		linux.STATX_MTIME: linux.FATTR_MTIME,
   745  		linux.STATX_CTIME: linux.FATTR_CTIME,
   746  	}
   747  	for statxMask, fattrMask := range maskMap {
   748  		if mask&statxMask != 0 {
   749  			fuseAttrMask |= fattrMask
   750  		}
   751  	}
   752  	return fuseAttrMask
   753  }
   754  
   755  // SetStat implements kernfs.Inode.SetStat.
   756  func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
   757  	i.attrMu.Lock()
   758  	defer i.attrMu.Unlock()
   759  	if err := vfs.CheckSetStat(ctx, creds, &opts, i.filemode(), auth.KUID(i.uid.Load()), auth.KGID(i.gid.Load())); err != nil {
   760  		return err
   761  	}
   762  	if opts.Stat.Mask == 0 {
   763  		return nil
   764  	}
   765  	return i.setAttr(ctx, fs, creds, opts, fhOptions{useFh: false})
   766  }
   767  
   768  type fhOptions struct {
   769  	useFh bool
   770  	fh    uint64
   771  }
   772  
   773  // +checklocks:i.attrMu
   774  func (i *inode) setAttr(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions, fhOpts fhOptions) error {
   775  	// We should retain the original file type when assigning a new mode.
   776  	fattrMask := fattrMaskFromStats(opts.Stat.Mask)
   777  	if fhOpts.useFh {
   778  		fattrMask |= linux.FATTR_FH
   779  	}
   780  	if opts.Stat.Mask&linux.STATX_ATIME != 0 && opts.Stat.Atime.Nsec == linux.UTIME_NOW {
   781  		fattrMask |= linux.FATTR_ATIME_NOW
   782  	}
   783  	if opts.Stat.Mask&linux.STATX_MTIME != 0 && opts.Stat.Mtime.Nsec == linux.UTIME_NOW {
   784  		fattrMask |= linux.FATTR_ATIME_NOW
   785  	}
   786  	in := linux.FUSESetAttrIn{
   787  		Valid:     fattrMask,
   788  		Fh:        fhOpts.fh,
   789  		Size:      opts.Stat.Size,
   790  		Atime:     uint64(opts.Stat.Atime.Sec),
   791  		Mtime:     uint64(opts.Stat.Mtime.Sec),
   792  		Ctime:     uint64(opts.Stat.Ctime.Sec),
   793  		AtimeNsec: opts.Stat.Atime.Nsec,
   794  		MtimeNsec: opts.Stat.Mtime.Nsec,
   795  		CtimeNsec: opts.Stat.Ctime.Nsec,
   796  		Mode:      uint32(uint16(i.filemode().FileType()) | opts.Stat.Mode),
   797  		UID:       opts.Stat.UID,
   798  		GID:       opts.Stat.GID,
   799  	}
   800  	req := i.fs.conn.NewRequest(creds, pidFromContext(ctx), i.nodeID, linux.FUSE_SETATTR, &in)
   801  	res, err := i.fs.conn.Call(ctx, req)
   802  	if err != nil {
   803  		return err
   804  	}
   805  	if err := res.Error(); err != nil {
   806  		return err
   807  	}
   808  	out := linux.FUSEAttrOut{}
   809  	if err := res.UnmarshalPayload(&out); err != nil {
   810  		return err
   811  	}
   812  	i.updateAttrs(out.Attr, out.AttrValid)
   813  	return nil
   814  }
   815  
   816  // +checklocks:i.attrMu
   817  func (i *inode) updateAttrs(attr linux.FUSEAttr, attrTimeout uint64) {
   818  	i.fs.conn.mu.Lock()
   819  	i.attrVersion.Store(i.fs.conn.attributeVersion.Add(1))
   820  	i.fs.conn.mu.Unlock()
   821  	i.attrTime = attrTimeout
   822  
   823  	i.ino.Store(attr.Ino)
   824  
   825  	i.mode.Store((attr.Mode & 07777) | (i.mode.Load() & linux.S_IFMT))
   826  	i.uid.Store(attr.UID)
   827  	i.gid.Store(attr.GID)
   828  
   829  	i.atime.Store(attr.ATimeNsec())
   830  	i.mtime.Store(attr.MTimeNsec())
   831  	i.ctime.Store(attr.CTimeNsec())
   832  
   833  	i.size.Store(attr.Size)
   834  	i.nlink.Store(attr.Nlink)
   835  
   836  	if !i.fs.opts.defaultPermissions {
   837  		i.mode.Store(i.mode.Load() & ^uint32(linux.S_ISVTX))
   838  	}
   839  }