github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/fuse/fusefs.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package fuse implements fusefs.
    16  package fuse
    17  
    18  import (
    19  	"math"
    20  	"strconv"
    21  	"sync"
    22  	"sync/atomic"
    23  
    24  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    25  	"github.com/SagerNet/gvisor/pkg/context"
    26  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    27  	"github.com/SagerNet/gvisor/pkg/log"
    28  	"github.com/SagerNet/gvisor/pkg/marshal"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/kernfs"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    31  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    32  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    33  	"github.com/SagerNet/gvisor/pkg/syserror"
    34  	"github.com/SagerNet/gvisor/pkg/waiter"
    35  )
    36  
    37  // Name is the default filesystem name.
    38  const Name = "fuse"
    39  
    40  // maxActiveRequestsDefault is the default setting controlling the upper bound
    41  // on the number of active requests at any given time.
    42  const maxActiveRequestsDefault = 10000
    43  
    44  // FilesystemType implements vfs.FilesystemType.
    45  //
    46  // +stateify savable
    47  type FilesystemType struct{}
    48  
    49  // +stateify savable
    50  type filesystemOptions struct {
    51  	// mopts contains the raw, unparsed mount options passed to this filesystem.
    52  	mopts string
    53  
    54  	// uid of the mount owner.
    55  	uid auth.KUID
    56  
    57  	// gid of the mount owner.
    58  	gid auth.KGID
    59  
    60  	// rootMode specifies the the file mode of the filesystem's root.
    61  	rootMode linux.FileMode
    62  
    63  	// maxActiveRequests specifies the maximum number of active requests that can
    64  	// exist at any time. Any further requests will block when trying to
    65  	// Call the server.
    66  	maxActiveRequests uint64
    67  
    68  	// maxRead is the max number of bytes to read,
    69  	// specified as "max_read" in fs parameters.
    70  	// If not specified by user, use math.MaxUint32 as default value.
    71  	maxRead uint32
    72  
    73  	// defaultPermissions is the default_permissions mount option. It instructs
    74  	// the kernel to perform a standard unix permission checks based on
    75  	// ownership and mode bits, instead of deferring the check to the server.
    76  	//
    77  	// Immutable after mount.
    78  	defaultPermissions bool
    79  
    80  	// allowOther is the allow_other mount option. It allows processes that
    81  	// don't own the FUSE mount to call into it.
    82  	//
    83  	// Immutable after mount.
    84  	allowOther bool
    85  }
    86  
    87  // filesystem implements vfs.FilesystemImpl.
    88  //
    89  // +stateify savable
    90  type filesystem struct {
    91  	kernfs.Filesystem
    92  	devMinor uint32
    93  
    94  	// conn is used for communication between the FUSE server
    95  	// daemon and the sentry fusefs.
    96  	conn *connection
    97  
    98  	// opts is the options the fusefs is initialized with.
    99  	opts *filesystemOptions
   100  
   101  	// umounted is true if filesystem.Release() has been called.
   102  	umounted bool
   103  }
   104  
   105  // Name implements vfs.FilesystemType.Name.
   106  func (FilesystemType) Name() string {
   107  	return Name
   108  }
   109  
   110  // Release implements vfs.FilesystemType.Release.
   111  func (FilesystemType) Release(ctx context.Context) {}
   112  
   113  // GetFilesystem implements vfs.FilesystemType.GetFilesystem.
   114  func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
   115  	devMinor, err := vfsObj.GetAnonBlockDevMinor()
   116  	if err != nil {
   117  		return nil, nil, err
   118  	}
   119  
   120  	fsopts := filesystemOptions{mopts: opts.Data}
   121  	mopts := vfs.GenericParseMountOptions(opts.Data)
   122  	deviceDescriptorStr, ok := mopts["fd"]
   123  	if !ok {
   124  		ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option fd missing")
   125  		return nil, nil, linuxerr.EINVAL
   126  	}
   127  	delete(mopts, "fd")
   128  
   129  	deviceDescriptor, err := strconv.ParseInt(deviceDescriptorStr, 10 /* base */, 32 /* bitSize */)
   130  	if err != nil {
   131  		ctx.Debugf("fusefs.FilesystemType.GetFilesystem: invalid fd: %q (%v)", deviceDescriptorStr, err)
   132  		return nil, nil, linuxerr.EINVAL
   133  	}
   134  
   135  	kernelTask := kernel.TaskFromContext(ctx)
   136  	if kernelTask == nil {
   137  		log.Warningf("%s.GetFilesystem: couldn't get kernel task from context", fsType.Name())
   138  		return nil, nil, linuxerr.EINVAL
   139  	}
   140  	fuseFDGeneric := kernelTask.GetFileVFS2(int32(deviceDescriptor))
   141  	if fuseFDGeneric == nil {
   142  		return nil, nil, linuxerr.EINVAL
   143  	}
   144  	defer fuseFDGeneric.DecRef(ctx)
   145  	fuseFD, ok := fuseFDGeneric.Impl().(*DeviceFD)
   146  	if !ok {
   147  		log.Warningf("%s.GetFilesystem: device FD is %T, not a FUSE device", fsType.Name, fuseFDGeneric)
   148  		return nil, nil, linuxerr.EINVAL
   149  	}
   150  
   151  	// Parse and set all the other supported FUSE mount options.
   152  	// TODO(github.com/SagerNet/issue/3229): Expand the supported mount options.
   153  	if uidStr, ok := mopts["user_id"]; ok {
   154  		delete(mopts, "user_id")
   155  		uid, err := strconv.ParseUint(uidStr, 10, 32)
   156  		if err != nil {
   157  			log.Warningf("%s.GetFilesystem: invalid user_id: user_id=%s", fsType.Name(), uidStr)
   158  			return nil, nil, linuxerr.EINVAL
   159  		}
   160  		kuid := creds.UserNamespace.MapToKUID(auth.UID(uid))
   161  		if !kuid.Ok() {
   162  			ctx.Warningf("fusefs.FilesystemType.GetFilesystem: unmapped uid: %d", uid)
   163  			return nil, nil, linuxerr.EINVAL
   164  		}
   165  		fsopts.uid = kuid
   166  	} else {
   167  		ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option user_id missing")
   168  		return nil, nil, linuxerr.EINVAL
   169  	}
   170  
   171  	if gidStr, ok := mopts["group_id"]; ok {
   172  		delete(mopts, "group_id")
   173  		gid, err := strconv.ParseUint(gidStr, 10, 32)
   174  		if err != nil {
   175  			log.Warningf("%s.GetFilesystem: invalid group_id: group_id=%s", fsType.Name(), gidStr)
   176  			return nil, nil, linuxerr.EINVAL
   177  		}
   178  		kgid := creds.UserNamespace.MapToKGID(auth.GID(gid))
   179  		if !kgid.Ok() {
   180  			ctx.Warningf("fusefs.FilesystemType.GetFilesystem: unmapped gid: %d", gid)
   181  			return nil, nil, linuxerr.EINVAL
   182  		}
   183  		fsopts.gid = kgid
   184  	} else {
   185  		ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option group_id missing")
   186  		return nil, nil, linuxerr.EINVAL
   187  	}
   188  
   189  	if modeStr, ok := mopts["rootmode"]; ok {
   190  		delete(mopts, "rootmode")
   191  		mode, err := strconv.ParseUint(modeStr, 8, 32)
   192  		if err != nil {
   193  			log.Warningf("%s.GetFilesystem: invalid mode: %q", fsType.Name(), modeStr)
   194  			return nil, nil, linuxerr.EINVAL
   195  		}
   196  		fsopts.rootMode = linux.FileMode(mode)
   197  	} else {
   198  		ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option rootmode missing")
   199  		return nil, nil, linuxerr.EINVAL
   200  	}
   201  
   202  	// Set the maxInFlightRequests option.
   203  	fsopts.maxActiveRequests = maxActiveRequestsDefault
   204  
   205  	if maxReadStr, ok := mopts["max_read"]; ok {
   206  		delete(mopts, "max_read")
   207  		maxRead, err := strconv.ParseUint(maxReadStr, 10, 32)
   208  		if err != nil {
   209  			log.Warningf("%s.GetFilesystem: invalid max_read: max_read=%s", fsType.Name(), maxReadStr)
   210  			return nil, nil, linuxerr.EINVAL
   211  		}
   212  		if maxRead < fuseMinMaxRead {
   213  			maxRead = fuseMinMaxRead
   214  		}
   215  		fsopts.maxRead = uint32(maxRead)
   216  	} else {
   217  		fsopts.maxRead = math.MaxUint32
   218  	}
   219  
   220  	if _, ok := mopts["default_permissions"]; ok {
   221  		delete(mopts, "default_permissions")
   222  		fsopts.defaultPermissions = true
   223  	}
   224  
   225  	if _, ok := mopts["allow_other"]; ok {
   226  		delete(mopts, "allow_other")
   227  		fsopts.allowOther = true
   228  	}
   229  
   230  	// Check for unparsed options.
   231  	if len(mopts) != 0 {
   232  		log.Warningf("%s.GetFilesystem: unsupported or unknown options: %v", fsType.Name(), mopts)
   233  		return nil, nil, linuxerr.EINVAL
   234  	}
   235  
   236  	// Create a new FUSE filesystem.
   237  	fs, err := newFUSEFilesystem(ctx, vfsObj, &fsType, fuseFD, devMinor, &fsopts)
   238  	if err != nil {
   239  		log.Warningf("%s.NewFUSEFilesystem: failed with error: %v", fsType.Name(), err)
   240  		return nil, nil, err
   241  	}
   242  
   243  	// Send a FUSE_INIT request to the FUSE daemon server before returning.
   244  	// This call is not blocking.
   245  	if err := fs.conn.InitSend(creds, uint32(kernelTask.ThreadID())); err != nil {
   246  		log.Warningf("%s.InitSend: failed with error: %v", fsType.Name(), err)
   247  		fs.VFSFilesystem().DecRef(ctx) // returned by newFUSEFilesystem
   248  		return nil, nil, err
   249  	}
   250  
   251  	// root is the fusefs root directory.
   252  	root := fs.newRoot(ctx, creds, fsopts.rootMode)
   253  
   254  	return fs.VFSFilesystem(), root.VFSDentry(), nil
   255  }
   256  
   257  // newFUSEFilesystem creates a new FUSE filesystem.
   258  func newFUSEFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, fsType *FilesystemType, fuseFD *DeviceFD, devMinor uint32, opts *filesystemOptions) (*filesystem, error) {
   259  	conn, err := newFUSEConnection(ctx, fuseFD, opts)
   260  	if err != nil {
   261  		log.Warningf("fuse.NewFUSEFilesystem: NewFUSEConnection failed with error: %v", err)
   262  		return nil, linuxerr.EINVAL
   263  	}
   264  
   265  	fs := &filesystem{
   266  		devMinor: devMinor,
   267  		opts:     opts,
   268  		conn:     conn,
   269  	}
   270  	fs.VFSFilesystem().Init(vfsObj, fsType, fs)
   271  
   272  	// FIXME(github.com/SagerNet/issue/4813): Doesn't conn or fs need to hold a
   273  	// reference on fuseFD, since conn uses fuseFD for communication with the
   274  	// server? Wouldn't doing so create a circular reference?
   275  	fs.VFSFilesystem().IncRef() // for fuseFD.fs
   276  	// FIXME(github.com/SagerNet/issue/4813): fuseFD.fs is accessed without
   277  	// synchronization.
   278  	fuseFD.fs = fs
   279  
   280  	return fs, nil
   281  }
   282  
   283  // Release implements vfs.FilesystemImpl.Release.
   284  func (fs *filesystem) Release(ctx context.Context) {
   285  	fs.conn.fd.mu.Lock()
   286  
   287  	fs.umounted = true
   288  	fs.conn.Abort(ctx)
   289  	// Notify all the waiters on this fd.
   290  	fs.conn.fd.waitQueue.Notify(waiter.ReadableEvents)
   291  
   292  	fs.conn.fd.mu.Unlock()
   293  
   294  	fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
   295  	fs.Filesystem.Release(ctx)
   296  }
   297  
   298  // MountOptions implements vfs.FilesystemImpl.MountOptions.
   299  func (fs *filesystem) MountOptions() string {
   300  	return fs.opts.mopts
   301  }
   302  
   303  // inode implements kernfs.Inode.
   304  //
   305  // +stateify savable
   306  type inode struct {
   307  	inodeRefs
   308  	kernfs.InodeAlwaysValid
   309  	kernfs.InodeAttrs
   310  	kernfs.InodeDirectoryNoNewChildren
   311  	kernfs.InodeNotSymlink
   312  	kernfs.OrderedChildren
   313  
   314  	// the owning filesystem. fs is immutable.
   315  	fs *filesystem
   316  
   317  	// metaDataMu protects the metadata of this inode.
   318  	metadataMu sync.Mutex
   319  
   320  	nodeID uint64
   321  
   322  	locks vfs.FileLocks
   323  
   324  	// size of the file.
   325  	size uint64
   326  
   327  	// attributeVersion is the version of inode's attributes.
   328  	attributeVersion uint64
   329  
   330  	// attributeTime is the remaining vaild time of attributes.
   331  	attributeTime uint64
   332  
   333  	// version of the inode.
   334  	version uint64
   335  
   336  	// link is result of following a symbolic link.
   337  	link string
   338  }
   339  
   340  func (fs *filesystem) newRoot(ctx context.Context, creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry {
   341  	i := &inode{fs: fs, nodeID: 1}
   342  	i.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, 1, linux.ModeDirectory|0755)
   343  	i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
   344  	i.InitRefs()
   345  
   346  	var d kernfs.Dentry
   347  	d.InitRoot(&fs.Filesystem, i)
   348  	return &d
   349  }
   350  
   351  func (fs *filesystem) newInode(ctx context.Context, nodeID uint64, attr linux.FUSEAttr) kernfs.Inode {
   352  	i := &inode{fs: fs, nodeID: nodeID}
   353  	creds := auth.Credentials{EffectiveKGID: auth.KGID(attr.UID), EffectiveKUID: auth.KUID(attr.UID)}
   354  	i.InodeAttrs.Init(ctx, &creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.FileMode(attr.Mode))
   355  	atomic.StoreUint64(&i.size, attr.Size)
   356  	i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
   357  	i.InitRefs()
   358  	return i
   359  }
   360  
   361  // CheckPermissions implements kernfs.Inode.CheckPermissions.
   362  func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
   363  	// Since FUSE operations are ultimately backed by a userspace process (the
   364  	// fuse daemon), allowing a process to call into fusefs grants the daemon
   365  	// ptrace-like capabilities over the calling process. Because of this, by
   366  	// default FUSE only allows the mount owner to interact with the
   367  	// filesystem. This explicitly excludes setuid/setgid processes.
   368  	//
   369  	// This behaviour can be overriden with the 'allow_other' mount option.
   370  	//
   371  	// See fs/fuse/dir.c:fuse_allow_current_process() in Linux.
   372  	if !i.fs.opts.allowOther {
   373  		if creds.RealKUID != i.fs.opts.uid ||
   374  			creds.EffectiveKUID != i.fs.opts.uid ||
   375  			creds.SavedKUID != i.fs.opts.uid ||
   376  			creds.RealKGID != i.fs.opts.gid ||
   377  			creds.EffectiveKGID != i.fs.opts.gid ||
   378  			creds.SavedKGID != i.fs.opts.gid {
   379  			return linuxerr.EACCES
   380  		}
   381  	}
   382  
   383  	// By default, fusefs delegates all permission checks to the server.
   384  	// However, standard unix permission checks can be enabled with the
   385  	// default_permissions mount option.
   386  	if i.fs.opts.defaultPermissions {
   387  		return i.InodeAttrs.CheckPermissions(ctx, creds, ats)
   388  	}
   389  	return nil
   390  }
   391  
   392  // Open implements kernfs.Inode.Open.
   393  func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   394  	isDir := i.InodeAttrs.Mode().IsDir()
   395  	// return error if specified to open directory but inode is not a directory.
   396  	if !isDir && opts.Mode.IsDir() {
   397  		return nil, syserror.ENOTDIR
   398  	}
   399  	if opts.Flags&linux.O_LARGEFILE == 0 && atomic.LoadUint64(&i.size) > linux.MAX_NON_LFS {
   400  		return nil, linuxerr.EOVERFLOW
   401  	}
   402  
   403  	var fd *fileDescription
   404  	var fdImpl vfs.FileDescriptionImpl
   405  	if isDir {
   406  		directoryFD := &directoryFD{}
   407  		fd = &(directoryFD.fileDescription)
   408  		fdImpl = directoryFD
   409  	} else {
   410  		regularFD := &regularFileFD{}
   411  		fd = &(regularFD.fileDescription)
   412  		fdImpl = regularFD
   413  	}
   414  	// FOPEN_KEEP_CACHE is the defualt flag for noOpen.
   415  	fd.OpenFlag = linux.FOPEN_KEEP_CACHE
   416  
   417  	// Only send open request when FUSE server support open or is opening a directory.
   418  	if !i.fs.conn.noOpen || isDir {
   419  		kernelTask := kernel.TaskFromContext(ctx)
   420  		if kernelTask == nil {
   421  			log.Warningf("fusefs.Inode.Open: couldn't get kernel task from context")
   422  			return nil, linuxerr.EINVAL
   423  		}
   424  
   425  		// Build the request.
   426  		var opcode linux.FUSEOpcode
   427  		if isDir {
   428  			opcode = linux.FUSE_OPENDIR
   429  		} else {
   430  			opcode = linux.FUSE_OPEN
   431  		}
   432  
   433  		in := linux.FUSEOpenIn{Flags: opts.Flags & ^uint32(linux.O_CREAT|linux.O_EXCL|linux.O_NOCTTY)}
   434  		if !i.fs.conn.atomicOTrunc {
   435  			in.Flags &= ^uint32(linux.O_TRUNC)
   436  		}
   437  
   438  		// Send the request and receive the reply.
   439  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, opcode, &in)
   440  		res, err := i.fs.conn.Call(kernelTask, req)
   441  		if err != nil {
   442  			return nil, err
   443  		}
   444  		if err := res.Error(); linuxerr.Equals(linuxerr.ENOSYS, err) && !isDir {
   445  			i.fs.conn.noOpen = true
   446  		} else if err != nil {
   447  			return nil, err
   448  		} else {
   449  			out := linux.FUSEOpenOut{}
   450  			if err := res.UnmarshalPayload(&out); err != nil {
   451  				return nil, err
   452  			}
   453  
   454  			// Process the reply.
   455  			fd.OpenFlag = out.OpenFlag
   456  			if isDir {
   457  				fd.OpenFlag &= ^uint32(linux.FOPEN_DIRECT_IO)
   458  			}
   459  
   460  			fd.Fh = out.Fh
   461  		}
   462  	}
   463  
   464  	// TODO(github.com/SagerNet/issue/3234): invalidate mmap after implemented it for FUSE Inode
   465  	fd.DirectIO = fd.OpenFlag&linux.FOPEN_DIRECT_IO != 0
   466  	fdOptions := &vfs.FileDescriptionOptions{}
   467  	if fd.OpenFlag&linux.FOPEN_NONSEEKABLE != 0 {
   468  		fdOptions.DenyPRead = true
   469  		fdOptions.DenyPWrite = true
   470  		fd.Nonseekable = true
   471  	}
   472  
   473  	// If we don't send SETATTR before open (which is indicated by atomicOTrunc)
   474  	// and O_TRUNC is set, update the inode's version number and clean existing data
   475  	// by setting the file size to 0.
   476  	if i.fs.conn.atomicOTrunc && opts.Flags&linux.O_TRUNC != 0 {
   477  		i.fs.conn.mu.Lock()
   478  		i.fs.conn.attributeVersion++
   479  		i.attributeVersion = i.fs.conn.attributeVersion
   480  		atomic.StoreUint64(&i.size, 0)
   481  		i.fs.conn.mu.Unlock()
   482  		i.attributeTime = 0
   483  	}
   484  
   485  	if err := fd.vfsfd.Init(fdImpl, opts.Flags, rp.Mount(), d.VFSDentry(), fdOptions); err != nil {
   486  		return nil, err
   487  	}
   488  	return &fd.vfsfd, nil
   489  }
   490  
   491  // Lookup implements kernfs.Inode.Lookup.
   492  func (i *inode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
   493  	in := linux.FUSELookupIn{Name: name}
   494  	return i.newEntry(ctx, name, 0, linux.FUSE_LOOKUP, &in)
   495  }
   496  
   497  // Keep implements kernfs.Inode.Keep.
   498  func (i *inode) Keep() bool {
   499  	// Return true so that kernfs keeps the new dentry pointing to this
   500  	// inode in the dentry tree. This is needed because inodes created via
   501  	// Lookup are not temporary. They might refer to existing files on server
   502  	// that can be Unlink'd/Rmdir'd.
   503  	return true
   504  }
   505  
   506  // IterDirents implements kernfs.Inode.IterDirents.
   507  func (*inode) IterDirents(ctx context.Context, mnt *vfs.Mount, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
   508  	return offset, nil
   509  }
   510  
   511  // NewFile implements kernfs.Inode.NewFile.
   512  func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (kernfs.Inode, error) {
   513  	kernelTask := kernel.TaskFromContext(ctx)
   514  	if kernelTask == nil {
   515  		log.Warningf("fusefs.Inode.NewFile: couldn't get kernel task from context", i.nodeID)
   516  		return nil, linuxerr.EINVAL
   517  	}
   518  	in := linux.FUSECreateIn{
   519  		CreateMeta: linux.FUSECreateMeta{
   520  			Flags: opts.Flags,
   521  			Mode:  uint32(opts.Mode) | linux.S_IFREG,
   522  			Umask: uint32(kernelTask.FSContext().Umask()),
   523  		},
   524  		Name: name,
   525  	}
   526  	return i.newEntry(ctx, name, linux.S_IFREG, linux.FUSE_CREATE, &in)
   527  }
   528  
   529  // NewNode implements kernfs.Inode.NewNode.
   530  func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (kernfs.Inode, error) {
   531  	in := linux.FUSEMknodIn{
   532  		MknodMeta: linux.FUSEMknodMeta{
   533  			Mode:  uint32(opts.Mode),
   534  			Rdev:  linux.MakeDeviceID(uint16(opts.DevMajor), opts.DevMinor),
   535  			Umask: uint32(kernel.TaskFromContext(ctx).FSContext().Umask()),
   536  		},
   537  		Name: name,
   538  	}
   539  	return i.newEntry(ctx, name, opts.Mode.FileType(), linux.FUSE_MKNOD, &in)
   540  }
   541  
   542  // NewSymlink implements kernfs.Inode.NewSymlink.
   543  func (i *inode) NewSymlink(ctx context.Context, name, target string) (kernfs.Inode, error) {
   544  	in := linux.FUSESymLinkIn{
   545  		Name:   name,
   546  		Target: target,
   547  	}
   548  	return i.newEntry(ctx, name, linux.S_IFLNK, linux.FUSE_SYMLINK, &in)
   549  }
   550  
   551  // Unlink implements kernfs.Inode.Unlink.
   552  func (i *inode) Unlink(ctx context.Context, name string, child kernfs.Inode) error {
   553  	kernelTask := kernel.TaskFromContext(ctx)
   554  	if kernelTask == nil {
   555  		log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID)
   556  		return linuxerr.EINVAL
   557  	}
   558  	in := linux.FUSEUnlinkIn{Name: name}
   559  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, linux.FUSE_UNLINK, &in)
   560  	res, err := i.fs.conn.Call(kernelTask, req)
   561  	if err != nil {
   562  		return err
   563  	}
   564  	// only return error, discard res.
   565  	return res.Error()
   566  }
   567  
   568  // NewDir implements kernfs.Inode.NewDir.
   569  func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) {
   570  	in := linux.FUSEMkdirIn{
   571  		MkdirMeta: linux.FUSEMkdirMeta{
   572  			Mode:  uint32(opts.Mode),
   573  			Umask: uint32(kernel.TaskFromContext(ctx).FSContext().Umask()),
   574  		},
   575  		Name: name,
   576  	}
   577  	return i.newEntry(ctx, name, linux.S_IFDIR, linux.FUSE_MKDIR, &in)
   578  }
   579  
   580  // RmDir implements kernfs.Inode.RmDir.
   581  func (i *inode) RmDir(ctx context.Context, name string, child kernfs.Inode) error {
   582  	fusefs := i.fs
   583  	task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx)
   584  
   585  	in := linux.FUSERmDirIn{Name: name}
   586  	req := fusefs.conn.NewRequest(creds, uint32(task.ThreadID()), i.nodeID, linux.FUSE_RMDIR, &in)
   587  	res, err := i.fs.conn.Call(task, req)
   588  	if err != nil {
   589  		return err
   590  	}
   591  	return res.Error()
   592  }
   593  
   594  // newEntry calls FUSE server for entry creation and allocates corresponding entry according to response.
   595  // Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and FUSE_LOOKUP.
   596  func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (kernfs.Inode, error) {
   597  	kernelTask := kernel.TaskFromContext(ctx)
   598  	if kernelTask == nil {
   599  		log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID)
   600  		return nil, linuxerr.EINVAL
   601  	}
   602  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, opcode, payload)
   603  	res, err := i.fs.conn.Call(kernelTask, req)
   604  	if err != nil {
   605  		return nil, err
   606  	}
   607  	if err := res.Error(); err != nil {
   608  		return nil, err
   609  	}
   610  	out := linux.FUSEEntryOut{}
   611  	if err := res.UnmarshalPayload(&out); err != nil {
   612  		return nil, err
   613  	}
   614  	if opcode != linux.FUSE_LOOKUP && ((out.Attr.Mode&linux.S_IFMT)^uint32(fileType) != 0 || out.NodeID == 0 || out.NodeID == linux.FUSE_ROOT_ID) {
   615  		return nil, syserror.EIO
   616  	}
   617  	child := i.fs.newInode(ctx, out.NodeID, out.Attr)
   618  	return child, nil
   619  }
   620  
   621  // Getlink implements kernfs.Inode.Getlink.
   622  func (i *inode) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
   623  	path, err := i.Readlink(ctx, mnt)
   624  	return vfs.VirtualDentry{}, path, err
   625  }
   626  
   627  // Readlink implements kernfs.Inode.Readlink.
   628  func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
   629  	if i.Mode().FileType()&linux.S_IFLNK == 0 {
   630  		return "", linuxerr.EINVAL
   631  	}
   632  	if len(i.link) == 0 {
   633  		kernelTask := kernel.TaskFromContext(ctx)
   634  		if kernelTask == nil {
   635  			log.Warningf("fusefs.Inode.Readlink: couldn't get kernel task from context")
   636  			return "", linuxerr.EINVAL
   637  		}
   638  		req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, linux.FUSE_READLINK, &linux.FUSEEmptyIn{})
   639  		res, err := i.fs.conn.Call(kernelTask, req)
   640  		if err != nil {
   641  			return "", err
   642  		}
   643  		i.link = string(res.data[res.hdr.SizeBytes():])
   644  		if !mnt.Options().ReadOnly {
   645  			i.attributeTime = 0
   646  		}
   647  	}
   648  	return i.link, nil
   649  }
   650  
   651  // getFUSEAttr returns a linux.FUSEAttr of this inode stored in local cache.
   652  // TODO(github.com/SagerNet/issue/3679): Add support for other fields.
   653  func (i *inode) getFUSEAttr() linux.FUSEAttr {
   654  	return linux.FUSEAttr{
   655  		Ino:  i.Ino(),
   656  		Size: atomic.LoadUint64(&i.size),
   657  		Mode: uint32(i.Mode()),
   658  	}
   659  }
   660  
   661  // statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The
   662  // opts.Sync attribute is ignored since the synchronization is handled by the
   663  // FUSE server.
   664  func statFromFUSEAttr(attr linux.FUSEAttr, mask, devMinor uint32) linux.Statx {
   665  	var stat linux.Statx
   666  	stat.Blksize = attr.BlkSize
   667  	stat.DevMajor, stat.DevMinor = linux.UNNAMED_MAJOR, devMinor
   668  
   669  	rdevMajor, rdevMinor := linux.DecodeDeviceID(attr.Rdev)
   670  	stat.RdevMajor, stat.RdevMinor = uint32(rdevMajor), rdevMinor
   671  
   672  	if mask&linux.STATX_MODE != 0 {
   673  		stat.Mode = uint16(attr.Mode)
   674  	}
   675  	if mask&linux.STATX_NLINK != 0 {
   676  		stat.Nlink = attr.Nlink
   677  	}
   678  	if mask&linux.STATX_UID != 0 {
   679  		stat.UID = attr.UID
   680  	}
   681  	if mask&linux.STATX_GID != 0 {
   682  		stat.GID = attr.GID
   683  	}
   684  	if mask&linux.STATX_ATIME != 0 {
   685  		stat.Atime = linux.StatxTimestamp{
   686  			Sec:  int64(attr.Atime),
   687  			Nsec: attr.AtimeNsec,
   688  		}
   689  	}
   690  	if mask&linux.STATX_MTIME != 0 {
   691  		stat.Mtime = linux.StatxTimestamp{
   692  			Sec:  int64(attr.Mtime),
   693  			Nsec: attr.MtimeNsec,
   694  		}
   695  	}
   696  	if mask&linux.STATX_CTIME != 0 {
   697  		stat.Ctime = linux.StatxTimestamp{
   698  			Sec:  int64(attr.Ctime),
   699  			Nsec: attr.CtimeNsec,
   700  		}
   701  	}
   702  	if mask&linux.STATX_INO != 0 {
   703  		stat.Ino = attr.Ino
   704  	}
   705  	if mask&linux.STATX_SIZE != 0 {
   706  		stat.Size = attr.Size
   707  	}
   708  	if mask&linux.STATX_BLOCKS != 0 {
   709  		stat.Blocks = attr.Blocks
   710  	}
   711  	return stat
   712  }
   713  
   714  // getAttr gets the attribute of this inode by issuing a FUSE_GETATTR request
   715  // or read from local cache. It updates the corresponding attributes if
   716  // necessary.
   717  func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions, flags uint32, fh uint64) (linux.FUSEAttr, error) {
   718  	attributeVersion := atomic.LoadUint64(&i.fs.conn.attributeVersion)
   719  
   720  	// TODO(github.com/SagerNet/issue/3679): send the request only if
   721  	// - invalid local cache for fields specified in the opts.Mask
   722  	// - forced update
   723  	// - i.attributeTime expired
   724  	// If local cache is still valid, return local cache.
   725  	// Currently we always send a request,
   726  	// and we always set the metadata with the new result,
   727  	// unless attributeVersion has changed.
   728  
   729  	task := kernel.TaskFromContext(ctx)
   730  	if task == nil {
   731  		log.Warningf("couldn't get kernel task from context")
   732  		return linux.FUSEAttr{}, linuxerr.EINVAL
   733  	}
   734  
   735  	creds := auth.CredentialsFromContext(ctx)
   736  
   737  	in := linux.FUSEGetAttrIn{
   738  		GetAttrFlags: flags,
   739  		Fh:           fh,
   740  	}
   741  	req := i.fs.conn.NewRequest(creds, uint32(task.ThreadID()), i.nodeID, linux.FUSE_GETATTR, &in)
   742  	res, err := i.fs.conn.Call(task, req)
   743  	if err != nil {
   744  		return linux.FUSEAttr{}, err
   745  	}
   746  	if err := res.Error(); err != nil {
   747  		return linux.FUSEAttr{}, err
   748  	}
   749  
   750  	var out linux.FUSEGetAttrOut
   751  	if err := res.UnmarshalPayload(&out); err != nil {
   752  		return linux.FUSEAttr{}, err
   753  	}
   754  
   755  	// Local version is newer, return the local one.
   756  	// Skip the update.
   757  	if attributeVersion != 0 && atomic.LoadUint64(&i.attributeVersion) > attributeVersion {
   758  		return i.getFUSEAttr(), nil
   759  	}
   760  
   761  	// Set the metadata of kernfs.InodeAttrs.
   762  	if err := i.InodeAttrs.SetStat(ctx, fs, creds, vfs.SetStatOptions{
   763  		Stat: statFromFUSEAttr(out.Attr, linux.STATX_ALL, i.fs.devMinor),
   764  	}); err != nil {
   765  		return linux.FUSEAttr{}, err
   766  	}
   767  
   768  	// Set the size if no error (after SetStat() check).
   769  	atomic.StoreUint64(&i.size, out.Attr.Size)
   770  
   771  	return out.Attr, nil
   772  }
   773  
   774  // reviseAttr attempts to update the attributes for internal purposes
   775  // by calling getAttr with a pre-specified mask.
   776  // Used by read, write, lseek.
   777  func (i *inode) reviseAttr(ctx context.Context, flags uint32, fh uint64) error {
   778  	// Never need atime for internal purposes.
   779  	_, err := i.getAttr(ctx, i.fs.VFSFilesystem(), vfs.StatOptions{
   780  		Mask: linux.STATX_BASIC_STATS &^ linux.STATX_ATIME,
   781  	}, flags, fh)
   782  	return err
   783  }
   784  
   785  // Stat implements kernfs.Inode.Stat.
   786  func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
   787  	attr, err := i.getAttr(ctx, fs, opts, 0, 0)
   788  	if err != nil {
   789  		return linux.Statx{}, err
   790  	}
   791  
   792  	return statFromFUSEAttr(attr, opts.Mask, i.fs.devMinor), nil
   793  }
   794  
   795  // DecRef implements kernfs.Inode.DecRef.
   796  func (i *inode) DecRef(ctx context.Context) {
   797  	i.inodeRefs.DecRef(func() { i.Destroy(ctx) })
   798  }
   799  
   800  // StatFS implements kernfs.Inode.StatFS.
   801  func (i *inode) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
   802  	// TODO(github.com/SagerNet/issues/3413): Complete the implementation of statfs.
   803  	return vfs.GenericStatFS(linux.FUSE_SUPER_MAGIC), nil
   804  }
   805  
   806  // fattrMaskFromStats converts vfs.SetStatOptions.Stat.Mask to linux stats mask
   807  // aligned with the attribute mask defined in include/linux/fs.h.
   808  func fattrMaskFromStats(mask uint32) uint32 {
   809  	var fuseAttrMask uint32
   810  	maskMap := map[uint32]uint32{
   811  		linux.STATX_MODE:  linux.FATTR_MODE,
   812  		linux.STATX_UID:   linux.FATTR_UID,
   813  		linux.STATX_GID:   linux.FATTR_GID,
   814  		linux.STATX_SIZE:  linux.FATTR_SIZE,
   815  		linux.STATX_ATIME: linux.FATTR_ATIME,
   816  		linux.STATX_MTIME: linux.FATTR_MTIME,
   817  		linux.STATX_CTIME: linux.FATTR_CTIME,
   818  	}
   819  	for statxMask, fattrMask := range maskMap {
   820  		if mask&statxMask != 0 {
   821  			fuseAttrMask |= fattrMask
   822  		}
   823  	}
   824  	return fuseAttrMask
   825  }
   826  
   827  // SetStat implements kernfs.Inode.SetStat.
   828  func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
   829  	return i.setAttr(ctx, fs, creds, opts, false, 0)
   830  }
   831  
   832  func (i *inode) setAttr(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions, useFh bool, fh uint64) error {
   833  	conn := i.fs.conn
   834  	task := kernel.TaskFromContext(ctx)
   835  	if task == nil {
   836  		log.Warningf("couldn't get kernel task from context")
   837  		return linuxerr.EINVAL
   838  	}
   839  
   840  	// We should retain the original file type when assigning new mode.
   841  	fileType := uint16(i.Mode()) & linux.S_IFMT
   842  	fattrMask := fattrMaskFromStats(opts.Stat.Mask)
   843  	if useFh {
   844  		fattrMask |= linux.FATTR_FH
   845  	}
   846  	in := linux.FUSESetAttrIn{
   847  		Valid:     fattrMask,
   848  		Fh:        fh,
   849  		Size:      opts.Stat.Size,
   850  		Atime:     uint64(opts.Stat.Atime.Sec),
   851  		Mtime:     uint64(opts.Stat.Mtime.Sec),
   852  		Ctime:     uint64(opts.Stat.Ctime.Sec),
   853  		AtimeNsec: opts.Stat.Atime.Nsec,
   854  		MtimeNsec: opts.Stat.Mtime.Nsec,
   855  		CtimeNsec: opts.Stat.Ctime.Nsec,
   856  		Mode:      uint32(fileType | opts.Stat.Mode),
   857  		UID:       opts.Stat.UID,
   858  		GID:       opts.Stat.GID,
   859  	}
   860  	req := conn.NewRequest(creds, uint32(task.ThreadID()), i.nodeID, linux.FUSE_SETATTR, &in)
   861  	res, err := conn.Call(task, req)
   862  	if err != nil {
   863  		return err
   864  	}
   865  	if err := res.Error(); err != nil {
   866  		return err
   867  	}
   868  	out := linux.FUSEGetAttrOut{}
   869  	if err := res.UnmarshalPayload(&out); err != nil {
   870  		return err
   871  	}
   872  
   873  	// Set the metadata of kernfs.InodeAttrs.
   874  	if err := i.InodeAttrs.SetStat(ctx, fs, creds, vfs.SetStatOptions{
   875  		Stat: statFromFUSEAttr(out.Attr, linux.STATX_ALL, i.fs.devMinor),
   876  	}); err != nil {
   877  		return err
   878  	}
   879  
   880  	return nil
   881  }