github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/fuse/inode.go (about)

     1  // Copyright 2022 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	"fmt"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/marshal"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/marshal/primitive"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/kernfs"
    30  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel"
    31  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth"
    32  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs"
    33  )
    34  
    35  // +stateify savable
    36  type fileHandle struct {
    37  	new    bool
    38  	handle uint64
    39  	flags  uint32
    40  }
    41  
    42  // inode implements kernfs.Inode.
    43  //
    44  // +stateify savable
    45  type inode struct {
    46  	inodeRefs
    47  	kernfs.InodeAlwaysValid
    48  	kernfs.InodeNotAnonymous
    49  	kernfs.InodeNotSymlink
    50  	kernfs.InodeWatches
    51  	kernfs.OrderedChildren
    52  	kernfs.CachedMappable
    53  
    54  	// the owning filesystem. fs is immutable.
    55  	fs *filesystem
    56  
    57  	// nodeID is a unique id which identifies the inode between userspace
    58  	// and the sentry. Immutable.
    59  	nodeID uint64
    60  
    61  	// attrVersion is the version of the last attribute change.
    62  	attrVersion atomicbitops.Uint64
    63  
    64  	// attrTime is the time until the attributes are valid.
    65  	attrTime uint64
    66  
    67  	// link is result of following a symbolic link.
    68  	link string
    69  
    70  	// fh caches the file handle returned by the server from a FUSE_CREATE request
    71  	// so we don't have to send a separate FUSE_OPEN request.
    72  	fh fileHandle
    73  
    74  	locks   vfs.FileLocks
    75  	watches vfs.Watches
    76  
    77  	// attrMu protects the attributes of this inode.
    78  	attrMu sync.Mutex
    79  
    80  	// +checklocks:attrMu
    81  	ino atomicbitops.Uint64 // Stat data, not accessed for path walking.
    82  	// +checklocks:attrMu
    83  	uid atomicbitops.Uint32 // auth.KUID, but stored as raw uint32 for sync/atomic.
    84  	// +checklocks:attrMu
    85  	gid atomicbitops.Uint32 // auth.KGID, but...
    86  	// +checklocks:attrMu
    87  	mode atomicbitops.Uint32 // File type and mode.
    88  
    89  	// Timestamps in nanoseconds from the unix epoch.
    90  	// +checklocks:attrMu
    91  	atime atomicbitops.Int64
    92  	// +checklocks:attrMu
    93  	mtime atomicbitops.Int64
    94  	// +checklocks:attrMu
    95  	ctime atomicbitops.Int64
    96  
    97  	// +checklocks:attrMu
    98  	size atomicbitops.Uint64
    99  
   100  	// nlink counts the number of hard links to this inode. It's updated and
   101  	// accessed used atomic operations but not protected by attrMu.
   102  	nlink atomicbitops.Uint32
   103  
   104  	// +checklocks:attrMu
   105  	blockSize atomicbitops.Uint32 // 0 if unknown.
   106  }
   107  
   108  func blockerFromContext(ctx context.Context) context.Blocker {
   109  	kernelTask := kernel.TaskFromContext(ctx)
   110  	if kernelTask == nil {
   111  		return ctx
   112  	}
   113  	return kernelTask
   114  }
   115  
   116  func pidFromContext(ctx context.Context) uint32 {
   117  	kernelTask := kernel.TaskFromContext(ctx)
   118  	if kernelTask == nil {
   119  		return 0
   120  	}
   121  	return uint32(kernelTask.ThreadID())
   122  }
   123  
   124  func umaskFromContext(ctx context.Context) uint32 {
   125  	kernelTask := kernel.TaskFromContext(ctx)
   126  	umask := uint32(0)
   127  	if kernelTask != nil {
   128  		umask = uint32(kernelTask.FSContext().Umask())
   129  	}
   130  	return umask
   131  }
   132  
   133  func (i *inode) Mode() linux.FileMode {
   134  	i.attrMu.Lock()
   135  	defer i.attrMu.Unlock()
   136  	return i.filemode()
   137  }
   138  
   139  func (i *inode) UID() auth.KUID {
   140  	i.attrMu.Lock()
   141  	defer i.attrMu.Unlock()
   142  	return auth.KUID(i.uid.Load())
   143  }
   144  
   145  func (i *inode) GID() auth.KGID {
   146  	i.attrMu.Lock()
   147  	defer i.attrMu.Unlock()
   148  	return auth.KGID(i.gid.Load())
   149  }
   150  
   151  // +checklocks:i.attrMu
   152  func (i *inode) filemode() linux.FileMode {
   153  	return linux.FileMode(i.mode.Load())
   154  }
   155  
   156  // touchCMTime updates the ctime and mtime attributes to be the current time.
   157  //
   158  // +checklocks:i.attrMu
   159  func (i *inode) touchCMtime() {
   160  	now := i.fs.clock.Now().Nanoseconds()
   161  	i.mtime.Store(now)
   162  	i.ctime.Store(now)
   163  }
   164  
   165  // touchAtime updates the atime attribut to be the current time.
   166  //
   167  // +checklocks:i.attrMu
   168  func (i *inode) touchAtime() {
   169  	i.atime.Store(i.fs.clock.Now().Nanoseconds())
   170  }
   171  
   172  // +checklocks:i.attrMu
   173  func (i *inode) init(creds *auth.Credentials, devMajor, devMinor uint32, nodeid uint64, mode linux.FileMode, nlink uint32) {
   174  	if mode.FileType() == 0 {
   175  		panic(fmt.Sprintf("No file type specified in 'mode' for InodeAttrs.Init(): mode=0%o", mode))
   176  	}
   177  
   178  	i.nodeID = nodeid
   179  	i.ino.Store(nodeid)
   180  	i.mode.Store(uint32(mode))
   181  	i.uid.Store(uint32(creds.EffectiveKUID))
   182  	i.gid.Store(uint32(creds.EffectiveKGID))
   183  	i.nlink.Store(nlink)
   184  	i.blockSize.Store(hostarch.PageSize)
   185  
   186  	now := i.fs.clock.Now().Nanoseconds()
   187  	i.atime.Store(now)
   188  	i.mtime.Store(now)
   189  	i.ctime.Store(now)
   190  }
   191  
   192  // CheckPermissions implements kernfs.Inode.CheckPermissions.
   193  func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
   194  	// Since FUSE operations are ultimately backed by a userspace process (the
   195  	// fuse daemon), allowing a process to call into fusefs grants the daemon
   196  	// ptrace-like capabilities over the calling process. Because of this, by
   197  	// default FUSE only allows the mount owner to interact with the
   198  	// filesystem. This explicitly excludes setuid/setgid processes.
   199  	//
   200  	// This behaviour can be overriden with the 'allow_other' mount option.
   201  	//
   202  	// See fs/fuse/dir.c:fuse_allow_current_process() in Linux.
   203  	if !i.fs.opts.allowOther {
   204  		if creds.RealKUID != i.fs.opts.uid ||
   205  			creds.EffectiveKUID != i.fs.opts.uid ||
   206  			creds.SavedKUID != i.fs.opts.uid ||
   207  			creds.RealKGID != i.fs.opts.gid ||
   208  			creds.EffectiveKGID != i.fs.opts.gid ||
   209  			creds.SavedKGID != i.fs.opts.gid {
   210  			return linuxerr.EACCES
   211  		}
   212  	}
   213  
   214  	// By default, fusefs delegates all permission checks to the server.
   215  	// However, standard unix permission checks can be enabled with the
   216  	// default_permissions mount option.
   217  	i.attrMu.Lock()
   218  	defer i.attrMu.Unlock()
   219  	refreshed := false
   220  	opts := vfs.StatOptions{Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID}
   221  	if i.fs.opts.defaultPermissions || (ats.MayExec() && i.filemode().FileType() == linux.S_IFREG) {
   222  		if uint64(i.fs.clock.Now().Nanoseconds()) > i.attrTime {
   223  			refreshed = true
   224  			if _, err := i.getAttr(ctx, i.fs.VFSFilesystem(), opts, 0, 0); err != nil {
   225  				return err
   226  			}
   227  		}
   228  	}
   229  
   230  	if i.fs.opts.defaultPermissions || (ats.MayExec() && i.filemode().FileType() == linux.S_IFREG) {
   231  		err := vfs.GenericCheckPermissions(creds, ats, linux.FileMode(i.mode.Load()), auth.KUID(i.uid.Load()), auth.KGID(i.gid.Load()))
   232  		if linuxerr.Equals(linuxerr.EACCES, err) && !refreshed {
   233  			if _, err := i.getAttr(ctx, i.fs.VFSFilesystem(), opts, 0, 0); err != nil {
   234  				return err
   235  			}
   236  			return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(i.mode.Load()), auth.KUID(i.uid.Load()), auth.KGID(i.gid.Load()))
   237  		}
   238  		return err
   239  	} else if ats.MayRead() || ats.MayWrite() || ats.MayExec() {
   240  		in := linux.FUSEAccessIn{Mask: uint32(ats)}
   241  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_ACCESS, &in)
   242  		res, err := i.fs.conn.Call(ctx, req)
   243  		if err != nil {
   244  			return err
   245  		}
   246  		return res.Error()
   247  	}
   248  	return nil
   249  }
   250  
   251  // Open implements kernfs.Inode.Open.
   252  func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   253  	opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC |
   254  		linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY |
   255  		linux.O_APPEND | linux.O_DIRECT
   256  	i.attrMu.Lock()
   257  	defer i.attrMu.Unlock()
   258  	if opts.Flags&linux.O_LARGEFILE == 0 && i.size.Load() > linux.MAX_NON_LFS {
   259  		return nil, linuxerr.EOVERFLOW
   260  	}
   261  
   262  	var (
   263  		fd     *fileDescription
   264  		fdImpl vfs.FileDescriptionImpl
   265  		opcode linux.FUSEOpcode
   266  	)
   267  	switch i.filemode().FileType() {
   268  	case linux.S_IFREG:
   269  		regularFD := &regularFileFD{}
   270  		fd = &(regularFD.fileDescription)
   271  		fdImpl = regularFD
   272  		opcode = linux.FUSE_OPEN
   273  	case linux.S_IFDIR:
   274  		if opts.Flags&linux.O_CREAT != 0 {
   275  			return nil, linuxerr.EISDIR
   276  		}
   277  		if ats := vfs.AccessTypesForOpenFlags(&opts); ats.MayWrite() {
   278  			return nil, linuxerr.EISDIR
   279  		}
   280  		if opts.Flags&linux.O_DIRECT != 0 {
   281  			return nil, linuxerr.EINVAL
   282  		}
   283  		directoryFD := &directoryFD{}
   284  		fd = &(directoryFD.fileDescription)
   285  		fdImpl = directoryFD
   286  		opcode = linux.FUSE_OPENDIR
   287  	case linux.S_IFLNK:
   288  		return nil, linuxerr.ELOOP
   289  	}
   290  
   291  	fd.LockFD.Init(&i.locks)
   292  	// FOPEN_KEEP_CACHE is the default flag for noOpen.
   293  	fd.OpenFlag = linux.FOPEN_KEEP_CACHE
   294  
   295  	if i.fh.new {
   296  		fd.OpenFlag = i.fh.flags
   297  		fd.Fh = i.fh.handle
   298  		i.fh.new = false
   299  		// Only send an open request when the FUSE server supports open or is
   300  		// opening a directory.
   301  	} else if !i.fs.conn.noOpen || i.filemode().IsDir() {
   302  		in := linux.FUSEOpenIn{Flags: opts.Flags & ^uint32(linux.O_CREAT|linux.O_EXCL|linux.O_NOCTTY)}
   303  		// Truncating with SETATTR instead of O_TRUNC, so clear the flag.
   304  		if !i.fs.conn.atomicOTrunc {
   305  			in.Flags &= ^uint32(linux.O_TRUNC)
   306  		}
   307  
   308  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, opcode, &in)
   309  		res, err := i.fs.conn.Call(ctx, req)
   310  		if err != nil {
   311  			return nil, err
   312  		}
   313  		if err := res.Error(); err != nil {
   314  			if linuxerr.Equals(linuxerr.ENOSYS, err) && !i.filemode().IsDir() {
   315  				i.fs.conn.noOpen = true
   316  			} else {
   317  				return nil, err
   318  			}
   319  		} else {
   320  			out := linux.FUSEOpenOut{}
   321  			if err := res.UnmarshalPayload(&out); err != nil {
   322  				return nil, err
   323  			}
   324  			fd.OpenFlag = out.OpenFlag
   325  			fd.Fh = out.Fh
   326  		}
   327  	}
   328  	if i.filemode().IsDir() {
   329  		fd.OpenFlag &= ^uint32(linux.FOPEN_DIRECT_IO)
   330  	}
   331  
   332  	// TODO(gvisor.dev/issue/3234): invalidate mmap after implemented it for FUSE Inode
   333  	fd.DirectIO = fd.OpenFlag&linux.FOPEN_DIRECT_IO != 0
   334  	fdOptions := &vfs.FileDescriptionOptions{}
   335  	if fd.OpenFlag&linux.FOPEN_NONSEEKABLE != 0 {
   336  		fdOptions.DenyPRead = true
   337  		fdOptions.DenyPWrite = true
   338  		fd.Nonseekable = true
   339  	}
   340  
   341  	// If atomicOTrunc and O_TRUNC are set, just update the inode's version number
   342  	// and set its size to 0 since the truncation is handled by the FUSE daemon.
   343  	// Otherwise send a separate SETATTR to truncate the file size.
   344  	if opts.Flags&linux.O_TRUNC != 0 && i.filemode().FileType() == linux.S_IFREG {
   345  		if i.fs.conn.atomicOTrunc {
   346  			i.fs.conn.mu.Lock()
   347  			i.attrVersion.Store(i.fs.conn.attributeVersion.Add(1))
   348  			i.fs.conn.mu.Unlock()
   349  			i.size.Store(0)
   350  			i.touchCMtime()
   351  		} else {
   352  			opts := vfs.SetStatOptions{Stat: linux.Statx{Size: 0, Mask: linux.STATX_SIZE}}
   353  			i.setAttr(ctx, i.fs.VFSFilesystem(), auth.CredentialsFromContext(ctx), opts, fhOptions{useFh: true, fh: i.fh.handle})
   354  		}
   355  	}
   356  
   357  	if err := fd.vfsfd.Init(fdImpl, opts.Flags, rp.Mount(), d.VFSDentry(), fdOptions); err != nil {
   358  		return nil, err
   359  	}
   360  	return &fd.vfsfd, nil
   361  }
   362  
   363  // Lookup implements kernfs.Inode.Lookup.
   364  func (i *inode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
   365  	in := linux.FUSELookupIn{Name: linux.CString(name)}
   366  	return i.newEntry(ctx, name, 0, linux.FUSE_LOOKUP, &in)
   367  }
   368  
   369  // Keep implements kernfs.Inode.Keep.
   370  func (i *inode) Keep() bool {
   371  	// Return true so that kernfs keeps the new dentry pointing to this
   372  	// inode in the dentry tree. This is needed because inodes created via
   373  	// Lookup are not temporary. They might refer to existing files on server
   374  	// that can be Unlink'd/Rmdir'd.
   375  	return true
   376  }
   377  
   378  // IterDirents implements kernfs.Inode.IterDirents.
   379  func (*inode) IterDirents(ctx context.Context, mnt *vfs.Mount, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
   380  	return offset, nil
   381  }
   382  
   383  // NewFile implements kernfs.Inode.NewFile.
   384  func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (kernfs.Inode, error) {
   385  	opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC |
   386  		linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY
   387  	in := linux.FUSECreateIn{
   388  		CreateMeta: linux.FUSECreateMeta{
   389  			Flags: opts.Flags,
   390  			Mode:  uint32(opts.Mode) | linux.S_IFREG,
   391  			Umask: umaskFromContext(ctx),
   392  		},
   393  		Name: linux.CString(name),
   394  	}
   395  	return i.newEntry(ctx, name, linux.S_IFREG, linux.FUSE_CREATE, &in)
   396  }
   397  
   398  // NewNode implements kernfs.Inode.NewNode.
   399  func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (kernfs.Inode, error) {
   400  	in := linux.FUSEMknodIn{
   401  		MknodMeta: linux.FUSEMknodMeta{
   402  			Mode:  uint32(opts.Mode),
   403  			Rdev:  linux.MakeDeviceID(uint16(opts.DevMajor), opts.DevMinor),
   404  			Umask: umaskFromContext(ctx),
   405  		},
   406  		Name: linux.CString(name),
   407  	}
   408  	return i.newEntry(ctx, name, opts.Mode.FileType(), linux.FUSE_MKNOD, &in)
   409  }
   410  
   411  // NewSymlink implements kernfs.Inode.NewSymlink.
   412  func (i *inode) NewSymlink(ctx context.Context, name, target string) (kernfs.Inode, error) {
   413  	in := linux.FUSESymlinkIn{
   414  		Name:   linux.CString(name),
   415  		Target: linux.CString(target),
   416  	}
   417  	return i.newEntry(ctx, name, linux.S_IFLNK, linux.FUSE_SYMLINK, &in)
   418  }
   419  
   420  // NewLink implements kernfs.Inode.NewLink.
   421  func (i *inode) NewLink(ctx context.Context, name string, target kernfs.Inode) (kernfs.Inode, error) {
   422  	targetInode := target.(*inode)
   423  	in := linux.FUSELinkIn{
   424  		OldNodeID: primitive.Uint64(targetInode.nodeID),
   425  		Name:      linux.CString(name),
   426  	}
   427  	return i.newEntry(ctx, name, targetInode.Mode().FileType(), linux.FUSE_LINK, &in)
   428  }
   429  
   430  // Unlink implements kernfs.Inode.Unlink.
   431  func (i *inode) Unlink(ctx context.Context, name string, child kernfs.Inode) error {
   432  	in := linux.FUSEUnlinkIn{Name: linux.CString(name)}
   433  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_UNLINK, &in)
   434  	res, err := i.fs.conn.Call(ctx, req)
   435  	if err != nil {
   436  		return err
   437  	}
   438  	// only return error, discard res.
   439  	return res.Error()
   440  }
   441  
   442  // NewDir implements kernfs.Inode.NewDir.
   443  func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) {
   444  	in := linux.FUSEMkdirIn{
   445  		MkdirMeta: linux.FUSEMkdirMeta{
   446  			Mode:  uint32(opts.Mode),
   447  			Umask: umaskFromContext(ctx),
   448  		},
   449  		Name: linux.CString(name),
   450  	}
   451  	return i.newEntry(ctx, name, linux.S_IFDIR, linux.FUSE_MKDIR, &in)
   452  }
   453  
   454  // RmDir implements kernfs.Inode.RmDir.
   455  func (i *inode) RmDir(ctx context.Context, name string, child kernfs.Inode) error {
   456  	in := linux.FUSERmDirIn{Name: linux.CString(name)}
   457  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_RMDIR, &in)
   458  	res, err := i.fs.conn.Call(ctx, req)
   459  	if err != nil {
   460  		return err
   461  	}
   462  	return res.Error()
   463  }
   464  
   465  // Rename implements kernfs.Inode.Rename.
   466  func (i *inode) Rename(ctx context.Context, oldname, newname string, child, dstDir kernfs.Inode) error {
   467  	dstDirInode := dstDir.(*inode)
   468  	in := linux.FUSERenameIn{
   469  		Newdir:  primitive.Uint64(dstDirInode.nodeID),
   470  		Oldname: linux.CString(oldname),
   471  		Newname: linux.CString(newname),
   472  	}
   473  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_RENAME, &in)
   474  	res, err := i.fs.conn.Call(ctx, req)
   475  	if err != nil {
   476  		return err
   477  	}
   478  	return res.Error()
   479  }
   480  
   481  // newEntry calls FUSE server for entry creation and allocates corresponding
   482  // entry according to response. Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK,
   483  // FUSE_LINK and FUSE_LOOKUP.
   484  func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (kernfs.Inode, error) {
   485  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, opcode, payload)
   486  	res, err := i.fs.conn.Call(ctx, req)
   487  	if err != nil {
   488  		return nil, err
   489  	}
   490  	if err := res.Error(); err != nil {
   491  		return nil, err
   492  	}
   493  	out := linux.FUSECreateOut{}
   494  	if opcode == linux.FUSE_CREATE {
   495  		if err := res.UnmarshalPayload(&out); err != nil {
   496  			return nil, err
   497  		}
   498  	} else {
   499  		if err := res.UnmarshalPayload(&out.FUSEEntryOut); err != nil {
   500  			return nil, err
   501  		}
   502  	}
   503  	if opcode != linux.FUSE_LOOKUP && ((out.Attr.Mode&linux.S_IFMT)^uint32(fileType) != 0 || out.NodeID == 0 || out.NodeID == linux.FUSE_ROOT_ID) {
   504  		return nil, linuxerr.EIO
   505  	}
   506  	child := i.fs.newInode(ctx, out.NodeID, out.Attr)
   507  	if opcode == linux.FUSE_CREATE {
   508  		// File handler is returned by fuse server at a time of file create.
   509  		// Save it temporary in a created child, so Open could return it when invoked
   510  		// to be sure after fh is consumed reset 'isNewFh' flag of inode
   511  		childI, ok := child.(*inode)
   512  		if ok {
   513  			childI.fh.new = true
   514  			childI.fh.handle = out.FUSEOpenOut.Fh
   515  			childI.fh.flags = out.FUSEOpenOut.OpenFlag
   516  		}
   517  	}
   518  	return child, nil
   519  }
   520  
   521  // Getlink implements kernfs.Inode.Getlink.
   522  func (i *inode) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
   523  	path, err := i.Readlink(ctx, mnt)
   524  	return vfs.VirtualDentry{}, path, err
   525  }
   526  
   527  // Readlink implements kernfs.Inode.Readlink.
   528  func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
   529  	i.attrMu.Lock()
   530  	defer i.attrMu.Unlock()
   531  	if i.filemode().FileType()&linux.S_IFLNK == 0 {
   532  		return "", linuxerr.EINVAL
   533  	}
   534  	if len(i.link) == 0 {
   535  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_READLINK, &linux.FUSEEmptyIn{})
   536  		res, err := i.fs.conn.Call(ctx, req)
   537  		if err != nil {
   538  			return "", err
   539  		}
   540  		i.link = string(res.data[res.hdr.SizeBytes():])
   541  		if !mnt.Options().ReadOnly {
   542  			i.attrTime = 0
   543  		}
   544  	}
   545  	return i.link, nil
   546  }
   547  
   548  // getFUSEAttr returns a linux.FUSEAttr of this inode stored in local cache.
   549  //
   550  // +checklocks:i.attrMu
   551  func (i *inode) getFUSEAttr() linux.FUSEAttr {
   552  	ns := time.Second.Nanoseconds()
   553  	return linux.FUSEAttr{
   554  		Ino:       i.nodeID,
   555  		UID:       i.uid.Load(),
   556  		GID:       i.gid.Load(),
   557  		Size:      i.size.Load(),
   558  		Mode:      uint32(i.filemode()),
   559  		BlkSize:   i.blockSize.Load(),
   560  		Atime:     uint64(i.atime.Load() / ns),
   561  		Mtime:     uint64(i.mtime.Load() / ns),
   562  		Ctime:     uint64(i.ctime.Load() / ns),
   563  		AtimeNsec: uint32(i.atime.Load() % ns),
   564  		MtimeNsec: uint32(i.mtime.Load() % ns),
   565  		CtimeNsec: uint32(i.ctime.Load() % ns),
   566  		Nlink:     i.nlink.Load(),
   567  	}
   568  }
   569  
   570  // statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The
   571  // opts.Sync attribute is ignored since the synchronization is handled by the
   572  // FUSE server.
   573  func statFromFUSEAttr(attr linux.FUSEAttr, mask, devMinor uint32) linux.Statx {
   574  	var stat linux.Statx
   575  	stat.Blksize = attr.BlkSize
   576  	stat.DevMajor, stat.DevMinor = linux.UNNAMED_MAJOR, devMinor
   577  
   578  	rdevMajor, rdevMinor := linux.DecodeDeviceID(attr.Rdev)
   579  	stat.RdevMajor, stat.RdevMinor = uint32(rdevMajor), rdevMinor
   580  
   581  	if mask&linux.STATX_MODE != 0 {
   582  		stat.Mode = uint16(attr.Mode)
   583  	}
   584  	if mask&linux.STATX_NLINK != 0 {
   585  		stat.Nlink = attr.Nlink
   586  	}
   587  	if mask&linux.STATX_UID != 0 {
   588  		stat.UID = attr.UID
   589  	}
   590  	if mask&linux.STATX_GID != 0 {
   591  		stat.GID = attr.GID
   592  	}
   593  	if mask&linux.STATX_ATIME != 0 {
   594  		stat.Atime = linux.StatxTimestamp{
   595  			Sec:  int64(attr.Atime),
   596  			Nsec: attr.AtimeNsec,
   597  		}
   598  	}
   599  	if mask&linux.STATX_MTIME != 0 {
   600  		stat.Mtime = linux.StatxTimestamp{
   601  			Sec:  int64(attr.Mtime),
   602  			Nsec: attr.MtimeNsec,
   603  		}
   604  	}
   605  	if mask&linux.STATX_CTIME != 0 {
   606  		stat.Ctime = linux.StatxTimestamp{
   607  			Sec:  int64(attr.Ctime),
   608  			Nsec: attr.CtimeNsec,
   609  		}
   610  	}
   611  	if mask&linux.STATX_INO != 0 {
   612  		stat.Ino = attr.Ino
   613  	}
   614  	if mask&linux.STATX_SIZE != 0 {
   615  		stat.Size = attr.Size
   616  	}
   617  	if mask&linux.STATX_BLOCKS != 0 {
   618  		stat.Blocks = attr.Blocks
   619  	}
   620  	return stat
   621  }
   622  
   623  // getAttr gets the attribute of this inode by issuing a FUSE_GETATTR request
   624  // or read from local cache. It updates the corresponding attributes if
   625  // necessary.
   626  //
   627  // +checklocks:i.attrMu
   628  func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions, flags uint32, fh uint64) (linux.FUSEAttr, error) {
   629  	// TODO(gvisor.dev/issue/3679): send the request only if
   630  	//	- invalid local cache for fields specified in the opts.Mask
   631  	//	- forced update
   632  	//	- i.attributeTime expired
   633  	// If local cache is still valid, return local cache.
   634  	// Currently we always send a request,
   635  	// and we always set the metadata with the new result,
   636  	// unless attributeVersion has changed.
   637  	creds := auth.CredentialsFromContext(ctx)
   638  
   639  	in := linux.FUSEGetAttrIn{
   640  		GetAttrFlags: flags,
   641  		Fh:           fh,
   642  	}
   643  	req := i.fs.conn.NewRequest(creds, pidFromContext(ctx), i.nodeID, linux.FUSE_GETATTR, &in)
   644  	res, err := i.fs.conn.Call(ctx, req)
   645  	if err != nil {
   646  		return linux.FUSEAttr{}, err
   647  	}
   648  	if err := res.Error(); err != nil {
   649  		return linux.FUSEAttr{}, err
   650  	}
   651  	var out linux.FUSEAttrOut
   652  	if err := res.UnmarshalPayload(&out); err != nil {
   653  		return linux.FUSEAttr{}, err
   654  	}
   655  
   656  	// Local version is newer, return the local one.
   657  	i.fs.conn.mu.Lock()
   658  	attributeVersion := i.fs.conn.attributeVersion.Load()
   659  	if attributeVersion != 0 && i.attrVersion.Load() > attributeVersion {
   660  		i.fs.conn.mu.Unlock()
   661  		return i.getFUSEAttr(), nil
   662  	}
   663  	i.fs.conn.mu.Unlock()
   664  	i.updateAttrs(out.Attr, out.AttrValid)
   665  	return out.Attr, nil
   666  }
   667  
   668  // reviseAttr attempts to update the attributes for internal purposes
   669  // by calling getAttr with a pre-specified mask.
   670  // Used by read, write, lseek.
   671  //
   672  // +checklocks:i.attrMu
   673  func (i *inode) reviseAttr(ctx context.Context, flags uint32, fh uint64) error {
   674  	// Never need atime for internal purposes.
   675  	_, err := i.getAttr(ctx, i.fs.VFSFilesystem(), vfs.StatOptions{
   676  		Mask: linux.STATX_BASIC_STATS &^ linux.STATX_ATIME,
   677  	}, flags, fh)
   678  	return err
   679  }
   680  
   681  // Stat implements kernfs.Inode.Stat.
   682  func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
   683  	i.attrMu.Lock()
   684  	defer i.attrMu.Unlock()
   685  	attr, err := i.getAttr(ctx, fs, opts, 0, 0)
   686  	if err != nil {
   687  		return linux.Statx{}, err
   688  	}
   689  
   690  	return statFromFUSEAttr(attr, opts.Mask, i.fs.devMinor), nil
   691  }
   692  
   693  // DecRef implements kernfs.Inode.DecRef.
   694  func (i *inode) DecRef(ctx context.Context) {
   695  	i.inodeRefs.DecRef(func() { i.Destroy(ctx) })
   696  }
   697  
   698  // StatFS implements kernfs.Inode.StatFS.
   699  func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
   700  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID,
   701  		linux.FUSE_STATFS, &linux.FUSEEmptyIn{},
   702  	)
   703  	res, err := i.fs.conn.Call(ctx, req)
   704  	if err != nil {
   705  		return linux.Statfs{}, err
   706  	}
   707  	if err := res.Error(); err != nil {
   708  		return linux.Statfs{}, err
   709  	}
   710  
   711  	var out linux.FUSEStatfsOut
   712  	if err := res.UnmarshalPayload(&out); err != nil {
   713  		return linux.Statfs{}, err
   714  	}
   715  
   716  	return linux.Statfs{
   717  		Type:            linux.FUSE_SUPER_MAGIC,
   718  		Blocks:          uint64(out.Blocks),
   719  		BlocksFree:      out.BlocksFree,
   720  		BlocksAvailable: out.BlocksAvailable,
   721  		Files:           out.Files,
   722  		FilesFree:       out.FilesFree,
   723  		BlockSize:       int64(out.BlockSize),
   724  		NameLength:      uint64(out.NameLength),
   725  		FragmentSize:    int64(out.FragmentSize),
   726  	}, nil
   727  }
   728  
   729  // fattrMaskFromStats converts vfs.SetStatOptions.Stat.Mask to linux stats mask
   730  // aligned with the attribute mask defined in include/linux/fs.h.
   731  func fattrMaskFromStats(mask uint32) uint32 {
   732  	var fuseAttrMask uint32
   733  	maskMap := map[uint32]uint32{
   734  		linux.STATX_MODE:  linux.FATTR_MODE,
   735  		linux.STATX_UID:   linux.FATTR_UID,
   736  		linux.STATX_GID:   linux.FATTR_GID,
   737  		linux.STATX_SIZE:  linux.FATTR_SIZE,
   738  		linux.STATX_ATIME: linux.FATTR_ATIME,
   739  		linux.STATX_MTIME: linux.FATTR_MTIME,
   740  		linux.STATX_CTIME: linux.FATTR_CTIME,
   741  	}
   742  	for statxMask, fattrMask := range maskMap {
   743  		if mask&statxMask != 0 {
   744  			fuseAttrMask |= fattrMask
   745  		}
   746  	}
   747  	return fuseAttrMask
   748  }
   749  
   750  // SetStat implements kernfs.Inode.SetStat.
   751  func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
   752  	i.attrMu.Lock()
   753  	defer i.attrMu.Unlock()
   754  	if err := vfs.CheckSetStat(ctx, creds, &opts, i.filemode(), auth.KUID(i.uid.Load()), auth.KGID(i.gid.Load())); err != nil {
   755  		return err
   756  	}
   757  	if opts.Stat.Mask == 0 {
   758  		return nil
   759  	}
   760  	return i.setAttr(ctx, fs, creds, opts, fhOptions{useFh: false})
   761  }
   762  
   763  type fhOptions struct {
   764  	useFh bool
   765  	fh    uint64
   766  }
   767  
   768  // +checklocks:i.attrMu
   769  func (i *inode) setAttr(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions, fhOpts fhOptions) error {
   770  	// We should retain the original file type when assigning a new mode.
   771  	fattrMask := fattrMaskFromStats(opts.Stat.Mask)
   772  	if fhOpts.useFh {
   773  		fattrMask |= linux.FATTR_FH
   774  	}
   775  	if opts.Stat.Mask&linux.STATX_ATIME != 0 && opts.Stat.Atime.Nsec == linux.UTIME_NOW {
   776  		fattrMask |= linux.FATTR_ATIME_NOW
   777  	}
   778  	if opts.Stat.Mask&linux.STATX_MTIME != 0 && opts.Stat.Mtime.Nsec == linux.UTIME_NOW {
   779  		fattrMask |= linux.FATTR_ATIME_NOW
   780  	}
   781  	in := linux.FUSESetAttrIn{
   782  		Valid:     fattrMask,
   783  		Fh:        fhOpts.fh,
   784  		Size:      opts.Stat.Size,
   785  		Atime:     uint64(opts.Stat.Atime.Sec),
   786  		Mtime:     uint64(opts.Stat.Mtime.Sec),
   787  		Ctime:     uint64(opts.Stat.Ctime.Sec),
   788  		AtimeNsec: opts.Stat.Atime.Nsec,
   789  		MtimeNsec: opts.Stat.Mtime.Nsec,
   790  		CtimeNsec: opts.Stat.Ctime.Nsec,
   791  		Mode:      uint32(uint16(i.filemode().FileType()) | opts.Stat.Mode),
   792  		UID:       opts.Stat.UID,
   793  		GID:       opts.Stat.GID,
   794  	}
   795  	req := i.fs.conn.NewRequest(creds, pidFromContext(ctx), i.nodeID, linux.FUSE_SETATTR, &in)
   796  	res, err := i.fs.conn.Call(ctx, req)
   797  	if err != nil {
   798  		return err
   799  	}
   800  	if err := res.Error(); err != nil {
   801  		return err
   802  	}
   803  	out := linux.FUSEAttrOut{}
   804  	if err := res.UnmarshalPayload(&out); err != nil {
   805  		return err
   806  	}
   807  	i.updateAttrs(out.Attr, out.AttrValid)
   808  	return nil
   809  }
   810  
   811  // +checklocks:i.attrMu
   812  func (i *inode) updateAttrs(attr linux.FUSEAttr, attrTimeout uint64) {
   813  	i.fs.conn.mu.Lock()
   814  	i.attrVersion.Store(i.fs.conn.attributeVersion.Add(1))
   815  	i.fs.conn.mu.Unlock()
   816  	i.attrTime = attrTimeout
   817  
   818  	i.ino.Store(attr.Ino)
   819  
   820  	i.mode.Store((attr.Mode & 07777) | (i.mode.Load() & linux.S_IFMT))
   821  	i.uid.Store(attr.UID)
   822  	i.gid.Store(attr.GID)
   823  
   824  	i.atime.Store(attr.ATimeNsec())
   825  	i.mtime.Store(attr.MTimeNsec())
   826  	i.ctime.Store(attr.CTimeNsec())
   827  
   828  	i.size.Store(attr.Size)
   829  	i.nlink.Store(attr.Nlink)
   830  
   831  	if !i.fs.opts.defaultPermissions {
   832  		i.mode.Store(i.mode.Load() & ^uint32(linux.S_ISVTX))
   833  	}
   834  }