github.com/scaleoutsean/fusego@v0.0.0-20220224074057-4a6429e46bb8/fuseops/ops.go (about)

     1  // Copyright 2015 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuseops
    16  
    17  import (
    18  	"os"
    19  	"time"
    20  )
    21  
    22  ////////////////////////////////////////////////////////////////////////
    23  // File system
    24  ////////////////////////////////////////////////////////////////////////
    25  
    26  // OpMetadata contains metadata about the file system operation.
    27  type OpMetadata struct {
    28  	// PID of the process that is invoking the operation.
    29  	Pid uint32
    30  }
    31  
    32  // Return statistics about the file system's capacity and available resources.
    33  //
    34  // Called by statfs(2) and friends:
    35  //
    36  //     * (https://goo.gl/Xi1lDr) sys_statfs called user_statfs, which calls
    37  //        vfs_statfs, which calls statfs_by_dentry.
    38  //
    39  //     * (https://goo.gl/VAIOwU) statfs_by_dentry calls the superblock
    40  //       operation statfs, which in our case points at
    41  //       fuse_statfs (cf. https://goo.gl/L7BTM3)
    42  //
    43  //     * (https://goo.gl/Zn7Sgl) fuse_statfs sends a statfs op, then uses
    44  //       convert_fuse_statfs to convert the response in a straightforward
    45  //       manner.
    46  //
    47  // This op is particularly important on OS X: if you don't implement it, the
    48  // file system will not successfully mount. If you don't model a sane amount of
    49  // free space, the Finder will refuse to copy files into the file system.
    50  type StatFSOp struct {
    51  	// The size of the file system's blocks. This may be used, in combination
    52  	// with the block counts below,  by callers of statfs(2) to infer the file
    53  	// system's capacity and space availability.
    54  	//
    55  	// On Linux this is surfaced as statfs::f_frsize, matching the posix standard
    56  	// (http://goo.gl/LktgrF), which says that f_blocks and friends are in units
    57  	// of f_frsize. On OS X this is surfaced as statfs::f_bsize, which plays the
    58  	// same roll.
    59  	//
    60  	// It appears as though the original intent of statvfs::f_frsize in the posix
    61  	// standard was to support a smaller addressable unit than statvfs::f_bsize
    62  	// (cf. The Linux Programming Interface by Michael Kerrisk,
    63  	// https://goo.gl/5LZMxQ). Therefore users should probably arrange for this
    64  	// to be no larger than IoSize.
    65  	//
    66  	// On Linux this can be any value, and will be faithfully returned to the
    67  	// caller of statfs(2) (see the code walk above). On OS X it appears that
    68  	// only powers of 2 in the range [2^7, 2^20] are preserved, and a value of
    69  	// zero is treated as 4096.
    70  	//
    71  	// This interface does not distinguish between blocks and block fragments.
    72  	BlockSize uint32
    73  
    74  	// The total number of blocks in the file system, the number of unused
    75  	// blocks, and the count of the latter that are available for use by non-root
    76  	// users.
    77  	//
    78  	// For each category, the corresponding number of bytes is derived by
    79  	// multiplying by BlockSize.
    80  	Blocks          uint64
    81  	BlocksFree      uint64
    82  	BlocksAvailable uint64
    83  
    84  	// The preferred size of writes to and reads from the file system, in bytes.
    85  	// This may affect clients that use statfs(2) to size buffers correctly. It
    86  	// does not appear to influence the size of writes sent from the kernel to
    87  	// the file system daemon.
    88  	//
    89  	// On Linux this is surfaced as statfs::f_bsize, and on OS X as
    90  	// statfs::f_iosize. Both are documented in `man 2 statfs` as "optimal
    91  	// transfer block size".
    92  	//
    93  	// On Linux this can be any value. On OS X it appears that only powers of 2
    94  	// in the range [2^12, 2^25] are faithfully preserved, and a value of zero is
    95  	// treated as 65536.
    96  	IoSize uint32
    97  
    98  	// The total number of inodes in the file system, and how many remain free.
    99  	Inodes     uint64
   100  	InodesFree uint64
   101  }
   102  
   103  ////////////////////////////////////////////////////////////////////////
   104  // Inodes
   105  ////////////////////////////////////////////////////////////////////////
   106  
   107  // Look up a child by name within a parent directory. The kernel sends this
   108  // when resolving user paths to dentry structs, which are then cached.
   109  type LookUpInodeOp struct {
   110  	// The ID of the directory inode to which the child belongs.
   111  	Parent InodeID
   112  
   113  	// The name of the child of interest, relative to the parent. For example, in
   114  	// this directory structure:
   115  	//
   116  	//     foo/
   117  	//         bar/
   118  	//             baz
   119  	//
   120  	// the file system may receive a request to look up the child named "bar" for
   121  	// the parent foo/.
   122  	Name string
   123  
   124  	// The resulting entry. Must be filled out by the file system.
   125  	//
   126  	// The lookup count for the inode is implicitly incremented. See notes on
   127  	// ForgetInodeOp for more information.
   128  	Entry ChildInodeEntry
   129  }
   130  
   131  // Refresh the attributes for an inode whose ID was previously returned in a
   132  // LookUpInodeOp. The kernel sends this when the FUSE VFS layer's cache of
   133  // inode attributes is stale. This is controlled by the AttributesExpiration
   134  // field of ChildInodeEntry, etc.
   135  type GetInodeAttributesOp struct {
   136  	// The inode of interest.
   137  	Inode InodeID
   138  
   139  	// Set by the file system: attributes for the inode, and the time at which
   140  	// they should expire. See notes on ChildInodeEntry.AttributesExpiration for
   141  	// more.
   142  	Attributes           InodeAttributes
   143  	AttributesExpiration time.Time
   144  }
   145  
   146  // Change attributes for an inode.
   147  //
   148  // The kernel sends this for obvious cases like chmod(2), and for less obvious
   149  // cases like ftrunctate(2).
   150  type SetInodeAttributesOp struct {
   151  	// The inode of interest.
   152  	Inode InodeID
   153  
   154  	// If set, this is ftruncate(2), otherwise it's truncate(2)
   155  	Handle *HandleID
   156  
   157  	// The attributes to modify, or nil for attributes that don't need a change.
   158  	Size  *uint64
   159  	Mode  *os.FileMode
   160  	Atime *time.Time
   161  	Mtime *time.Time
   162  
   163  	// Set by the file system: the new attributes for the inode, and the time at
   164  	// which they should expire. See notes on
   165  	// ChildInodeEntry.AttributesExpiration for more.
   166  	Attributes           InodeAttributes
   167  	AttributesExpiration time.Time
   168  }
   169  
   170  // Decrement the reference count for an inode ID previously issued by the file
   171  // system.
   172  //
   173  // The comments for the ops that implicitly increment the reference count
   174  // contain a note of this (but see also the note about the root inode below).
   175  // For example, LookUpInodeOp and MkDirOp. The authoritative source is the
   176  // libfuse documentation, which states that any op that returns
   177  // fuse_reply_entry fuse_reply_create implicitly increments (cf.
   178  // http://goo.gl/o5C7Dx).
   179  //
   180  // If the reference count hits zero, the file system can forget about that ID
   181  // entirely, and even re-use it in future responses. The kernel guarantees that
   182  // it will not otherwise use it again.
   183  //
   184  // The reference count corresponds to fuse_inode::nlookup
   185  // (http://goo.gl/ut48S4). Some examples of where the kernel manipulates it:
   186  //
   187  //  *  (http://goo.gl/vPD9Oh) Any caller to fuse_iget increases the count.
   188  //  *  (http://goo.gl/B6tTTC) fuse_lookup_name calls fuse_iget.
   189  //  *  (http://goo.gl/IlcxWv) fuse_create_open calls fuse_iget.
   190  //  *  (http://goo.gl/VQMQul) fuse_dentry_revalidate increments after
   191  //     revalidating.
   192  //
   193  // In contrast to all other inodes, RootInodeID begins with an implicit
   194  // lookup count of one, without a corresponding op to increase it. (There
   195  // could be no such op, because the root cannot be referred to by name.) Code
   196  // walk:
   197  //
   198  //  *  (http://goo.gl/gWAheU) fuse_fill_super calls fuse_get_root_inode.
   199  //
   200  //  *  (http://goo.gl/AoLsbb) fuse_get_root_inode calls fuse_iget without
   201  //     sending any particular request.
   202  //
   203  //  *  (http://goo.gl/vPD9Oh) fuse_iget increments nlookup.
   204  //
   205  // File systems should tolerate but not rely on receiving forget ops for
   206  // remaining inodes when the file system unmounts, including the root inode.
   207  // Rather they should take fuse.Connection.ReadOp returning io.EOF as
   208  // implicitly decrementing all lookup counts to zero.
   209  type ForgetInodeOp struct {
   210  	// The inode whose reference count should be decremented.
   211  	Inode InodeID
   212  
   213  	// The amount to decrement the reference count.
   214  	N uint64
   215  }
   216  
   217  ////////////////////////////////////////////////////////////////////////
   218  // Inode creation
   219  ////////////////////////////////////////////////////////////////////////
   220  
   221  // Create a directory inode as a child of an existing directory inode. The
   222  // kernel sends this in response to a mkdir(2) call.
   223  //
   224  // The Linux kernel appears to verify the name doesn't already exist (mkdir
   225  // calls mkdirat calls user_path_create calls filename_create, which verifies:
   226  // http://goo.gl/FZpLu5). Indeed, the tests in samples/memfs that call in
   227  // parallel appear to bear this out. But osxfuse does not appear to guarantee
   228  // this (cf. https://goo.gl/PqzZDv). And if names may be created outside of the
   229  // kernel's control, it doesn't matter what the kernel does anyway.
   230  //
   231  // Therefore the file system should return EEXIST if the name already exists.
   232  type MkDirOp struct {
   233  	// The ID of parent directory inode within which to create the child.
   234  	Parent InodeID
   235  
   236  	// The name of the child to create, and the mode with which to create it.
   237  	Name string
   238  	Mode os.FileMode
   239  
   240  	// Set by the file system: information about the inode that was created.
   241  	//
   242  	// The lookup count for the inode is implicitly incremented. See notes on
   243  	// ForgetInodeOp for more information.
   244  	Entry ChildInodeEntry
   245  }
   246  
   247  // Create a file inode as a child of an existing directory inode. The kernel
   248  // sends this in response to a mknod(2) call. It may also send it in special
   249  // cases such as an NFS export (cf. https://goo.gl/HiLfnK). It is more typical
   250  // to see CreateFileOp, which is received for an open(2) that creates a file.
   251  //
   252  // The Linux kernel appears to verify the name doesn't already exist (mknod
   253  // calls sys_mknodat calls user_path_create calls filename_create, which
   254  // verifies: http://goo.gl/FZpLu5). But osxfuse may not guarantee this, as with
   255  // mkdir(2). And if names may be created outside of the kernel's control, it
   256  // doesn't matter what the kernel does anyway.
   257  //
   258  // Therefore the file system should return EEXIST if the name already exists.
   259  type MkNodeOp struct {
   260  	// The ID of parent directory inode within which to create the child.
   261  	Parent InodeID
   262  
   263  	// The name of the child to create, and the mode with which to create it.
   264  	Name string
   265  	Mode os.FileMode
   266  
   267  	// Set by the file system: information about the inode that was created.
   268  	//
   269  	// The lookup count for the inode is implicitly incremented. See notes on
   270  	// ForgetInodeOp for more information.
   271  	Entry ChildInodeEntry
   272  }
   273  
   274  // Create a file inode and open it.
   275  //
   276  // The kernel sends this when the user asks to open a file with the O_CREAT
   277  // flag and the kernel has observed that the file doesn't exist. (See for
   278  // example lookup_open, http://goo.gl/PlqE9d). However, osxfuse doesn't appear
   279  // to make this check atomically (cf. https://goo.gl/PqzZDv). And if names may
   280  // be created outside of the kernel's control, it doesn't matter what the
   281  // kernel does anyway.
   282  //
   283  // Therefore the file system should return EEXIST if the name already exists.
   284  type CreateFileOp struct {
   285  	// Metadata
   286  	Metadata OpMetadata
   287  
   288  	// The ID of parent directory inode within which to create the child file.
   289  	Parent InodeID
   290  
   291  	// The name of the child to create, and the mode with which to create it.
   292  	Name string
   293  	Mode os.FileMode
   294  
   295  	// Set by the file system: information about the inode that was created.
   296  	//
   297  	// The lookup count for the inode is implicitly incremented. See notes on
   298  	// ForgetInodeOp for more information.
   299  	Entry ChildInodeEntry
   300  
   301  	// Set by the file system: an opaque ID that will be echoed in follow-up
   302  	// calls for this file using the same struct file in the kernel. In practice
   303  	// this usually means follow-up calls using the file descriptor returned by
   304  	// open(2).
   305  	//
   306  	// The handle may be supplied in future ops like ReadFileOp that contain a
   307  	// file handle. The file system must ensure this ID remains valid until a
   308  	// later call to ReleaseFileHandle.
   309  	Handle HandleID
   310  }
   311  
   312  // Create a symlink inode. If the name already exists, the file system should
   313  // return EEXIST (cf. the notes on CreateFileOp and MkDirOp).
   314  type CreateSymlinkOp struct {
   315  	// The ID of parent directory inode within which to create the child symlink.
   316  	Parent InodeID
   317  
   318  	// The name of the symlink to create.
   319  	Name string
   320  
   321  	// The target of the symlink.
   322  	Target string
   323  
   324  	// Set by the file system: information about the symlink inode that was
   325  	// created.
   326  	//
   327  	// The lookup count for the inode is implicitly incremented. See notes on
   328  	// ForgetInodeOp for more information.
   329  	Entry ChildInodeEntry
   330  }
   331  
   332  // Create a hard link to an inode. If the name already exists, the file system
   333  // should return EEXIST (cf. the notes on CreateFileOp and MkDirOp).
   334  type CreateLinkOp struct {
   335  	// The ID of parent directory inode within which to create the child hard
   336  	// link.
   337  	Parent InodeID
   338  
   339  	// The name of the new inode.
   340  	Name string
   341  
   342  	// The ID of the target inode.
   343  	Target InodeID
   344  
   345  	// Set by the file system: information about the inode that was created.
   346  	//
   347  	// The lookup count for the inode is implicitly incremented. See notes on
   348  	// ForgetInodeOp for more information.
   349  	Entry ChildInodeEntry
   350  }
   351  
   352  ////////////////////////////////////////////////////////////////////////
   353  // Unlinking
   354  ////////////////////////////////////////////////////////////////////////
   355  
   356  // Rename a file or directory, given the IDs of the original parent directory
   357  // and the new one (which may be the same).
   358  //
   359  // In Linux, this is called by vfs_rename (https://goo.gl/eERItT), which is
   360  // called by sys_renameat2 (https://goo.gl/fCC9qC).
   361  //
   362  // The kernel takes care of ensuring that the source and destination are not
   363  // identical (in which case it does nothing), that the rename is not across
   364  // file system boundaries, and that the destination doesn't already exist with
   365  // the wrong type. Some subtleties that the file system must care about:
   366  //
   367  //  *  If the new name is an existing directory, the file system must ensure it
   368  //     is empty before replacing it, returning ENOTEMPTY otherwise. (This is
   369  //     per the posix spec: http://goo.gl/4XtT79)
   370  //
   371  //  *  The rename must be atomic from the point of view of an observer of the
   372  //     new name. That is, if the new name already exists, there must be no
   373  //     point at which it doesn't exist.
   374  //
   375  //  *  It is okay for the new name to be modified before the old name is
   376  //     removed; these need not be atomic. In fact, the Linux man page
   377  //     explicitly says this is likely (cf. https://goo.gl/Y1wVZc).
   378  //
   379  //  *  Linux bends over backwards (https://goo.gl/pLDn3r) to ensure that
   380  //     neither the old nor the new parent can be concurrently modified. But
   381  //     it's not clear whether OS X does this, and in any case it doesn't matter
   382  //     for file systems that may be modified remotely. Therefore a careful file
   383  //     system implementor should probably ensure if possible that the unlink
   384  //     step in the "link new name, unlink old name" process doesn't unlink a
   385  //     different inode than the one that was linked to the new name. Still,
   386  //     posix and the man pages are imprecise about the actual semantics of a
   387  //     rename if it's not atomic, so it is probably not disastrous to be loose
   388  //     about this.
   389  //
   390  type RenameOp struct {
   391  	// The old parent directory, and the name of the entry within it to be
   392  	// relocated.
   393  	OldParent InodeID
   394  	OldName   string
   395  
   396  	// The new parent directory, and the name of the entry to be created or
   397  	// overwritten within it.
   398  	NewParent InodeID
   399  	NewName   string
   400  }
   401  
   402  // Unlink a directory from its parent. Because directories cannot have a link
   403  // count above one, this means the directory inode should be deleted as well
   404  // once the kernel sends ForgetInodeOp.
   405  //
   406  // The file system is responsible for checking that the directory is empty.
   407  //
   408  // Sample implementation in ext2: ext2_rmdir (http://goo.gl/B9QmFf)
   409  type RmDirOp struct {
   410  	// The ID of parent directory inode, and the name of the directory being
   411  	// removed within it.
   412  	Parent InodeID
   413  	Name   string
   414  }
   415  
   416  // Unlink a file or symlink from its parent. If this brings the inode's link
   417  // count to zero, the inode should be deleted once the kernel sends
   418  // ForgetInodeOp. It may still be referenced before then if a user still has
   419  // the file open.
   420  //
   421  // Sample implementation in ext2: ext2_unlink (http://goo.gl/hY6r6C)
   422  type UnlinkOp struct {
   423  	// The ID of parent directory inode, and the name of the entry being removed
   424  	// within it.
   425  	Parent InodeID
   426  	Name   string
   427  }
   428  
   429  ////////////////////////////////////////////////////////////////////////
   430  // Directory handles
   431  ////////////////////////////////////////////////////////////////////////
   432  
   433  // Open a directory inode.
   434  //
   435  // On Linux the sends this when setting up a struct file for a particular inode
   436  // with type directory, usually in response to an open(2) call from a
   437  // user-space process. On OS X it may not be sent for every open(2) (cf.
   438  // https://github.com/osxfuse/osxfuse/issues/199).
   439  type OpenDirOp struct {
   440  	// The ID of the inode to be opened.
   441  	Inode InodeID
   442  
   443  	// Set by the file system: an opaque ID that will be echoed in follow-up
   444  	// calls for this directory using the same struct file in the kernel. In
   445  	// practice this usually means follow-up calls using the file descriptor
   446  	// returned by open(2).
   447  	//
   448  	// The handle may be supplied in future ops like ReadDirOp that contain a
   449  	// directory handle. The file system must ensure this ID remains valid until
   450  	// a later call to ReleaseDirHandle.
   451  	Handle HandleID
   452  }
   453  
   454  // Read entries from a directory previously opened with OpenDir.
   455  type ReadDirOp struct {
   456  	// The directory inode that we are reading, and the handle previously
   457  	// returned by OpenDir when opening that inode.
   458  	Inode  InodeID
   459  	Handle HandleID
   460  
   461  	// The offset within the directory at which to read.
   462  	//
   463  	// Warning: this field is not necessarily a count of bytes. Its legal values
   464  	// are defined by the results returned in ReadDirResponse. See the notes
   465  	// below and the notes on that struct.
   466  	//
   467  	// In the Linux kernel this ultimately comes from file::f_pos, which starts
   468  	// at zero and is set by llseek and by the final consumed result returned by
   469  	// each call to ReadDir:
   470  	//
   471  	//  *  (http://goo.gl/2nWJPL) iterate_dir, which is called by getdents(2) and
   472  	//     readdir(2), sets dir_context::pos to file::f_pos before calling
   473  	//     f_op->iterate, and then does the opposite assignment afterward.
   474  	//
   475  	//  *  (http://goo.gl/rTQVSL) fuse_readdir, which implements iterate for fuse
   476  	//     directories, passes dir_context::pos as the offset to fuse_read_fill,
   477  	//     which passes it on to user-space. fuse_readdir later calls
   478  	//     parse_dirfile with the same context.
   479  	//
   480  	//  *  (http://goo.gl/vU5ukv) For each returned result (except perhaps the
   481  	//     last, which may be truncated by the page boundary), parse_dirfile
   482  	//     updates dir_context::pos with fuse_dirent::off.
   483  	//
   484  	// It is affected by the Posix directory stream interfaces in the following
   485  	// manner:
   486  	//
   487  	//  *  (http://goo.gl/fQhbyn, http://goo.gl/ns1kDF) opendir initially causes
   488  	//     filepos to be set to zero.
   489  	//
   490  	//  *  (http://goo.gl/ezNKyR, http://goo.gl/xOmDv0) readdir allows the user
   491  	//     to iterate through the directory one entry at a time. As each entry is
   492  	//     consumed, its d_off field is stored in __dirstream::filepos.
   493  	//
   494  	//  *  (http://goo.gl/WEOXG8, http://goo.gl/rjSXl3) telldir allows the user
   495  	//     to obtain the d_off field from the most recently returned entry.
   496  	//
   497  	//  *  (http://goo.gl/WG3nDZ, http://goo.gl/Lp0U6W) seekdir allows the user
   498  	//     to seek backward to an offset previously returned by telldir. It
   499  	//     stores the new offset in filepos, and calls llseek to update the
   500  	//     kernel's struct file.
   501  	//
   502  	//  *  (http://goo.gl/gONQhz, http://goo.gl/VlrQkc) rewinddir allows the user
   503  	//     to go back to the beginning of the directory, obtaining a fresh view.
   504  	//     It updates filepos and calls llseek to update the kernel's struct
   505  	//     file.
   506  	//
   507  	// Unfortunately, FUSE offers no way to intercept seeks
   508  	// (http://goo.gl/H6gEXa), so there is no way to cause seekdir or rewinddir
   509  	// to fail. Additionally, there is no way to distinguish an explicit
   510  	// rewinddir followed by readdir from the initial readdir, or a rewinddir
   511  	// from a seekdir to the value returned by telldir just after opendir.
   512  	//
   513  	// Luckily, Posix is vague about what the user will see if they seek
   514  	// backwards, and requires the user not to seek to an old offset after a
   515  	// rewind. The only requirement on freshness is that rewinddir results in
   516  	// something that looks like a newly-opened directory. So FUSE file systems
   517  	// may e.g. cache an entire fresh listing for each ReadDir with a zero
   518  	// offset, and return array offsets into that cached listing.
   519  	Offset DirOffset
   520  
   521  	// The destination buffer, whose length gives the size of the read.
   522  	//
   523  	// The output data should consist of a sequence of FUSE directory entries in
   524  	// the format generated by fuse_add_direntry (http://goo.gl/qCcHCV), which is
   525  	// consumed by parse_dirfile (http://goo.gl/2WUmD2). Use fuseutil.WriteDirent
   526  	// to generate this data.
   527  	//
   528  	// Each entry returned exposes a directory offset to the user that may later
   529  	// show up in ReadDirRequest.Offset. See notes on that field for more
   530  	// information.
   531  	Dst []byte
   532  
   533  	// Set by the file system: the number of bytes read into Dst.
   534  	//
   535  	// It is okay for this to be less than len(Dst) if there are not enough
   536  	// entries available or the final entry would not fit.
   537  	//
   538  	// Zero means that the end of the directory has been reached. This is
   539  	// unambiguous because NAME_MAX (https://goo.gl/ZxzKaE) plus the size of
   540  	// fuse_dirent (https://goo.gl/WO8s3F) plus the 8-byte alignment of
   541  	// FUSE_DIRENT_ALIGN (http://goo.gl/UziWvH) is less than the read size of
   542  	// PAGE_SIZE used by fuse_readdir (cf. https://goo.gl/VajtS2).
   543  	BytesRead int
   544  }
   545  
   546  // Release a previously-minted directory handle. The kernel sends this when
   547  // there are no more references to an open directory: all file descriptors are
   548  // closed and all memory mappings are unmapped.
   549  //
   550  // The kernel guarantees that the handle ID will not be used in further ops
   551  // sent to the file system (unless it is reissued by the file system).
   552  //
   553  // Errors from this op are ignored by the kernel (cf. http://goo.gl/RL38Do).
   554  type ReleaseDirHandleOp struct {
   555  	// The handle ID to be released. The kernel guarantees that this ID will not
   556  	// be used in further calls to the file system (unless it is reissued by the
   557  	// file system).
   558  	Handle HandleID
   559  }
   560  
   561  ////////////////////////////////////////////////////////////////////////
   562  // File handles
   563  ////////////////////////////////////////////////////////////////////////
   564  
   565  // Open a file inode.
   566  //
   567  // On Linux the sends this when setting up a struct file for a particular inode
   568  // with type file, usually in response to an open(2) call from a user-space
   569  // process. On OS X it may not be sent for every open(2)
   570  // (cf.https://github.com/osxfuse/osxfuse/issues/199).
   571  type OpenFileOp struct {
   572  	// Metadata
   573  	Metadata OpMetadata
   574  
   575  	// The ID of the inode to be opened.
   576  	Inode InodeID
   577  
   578  	// An opaque ID that will be echoed in follow-up calls for this file using
   579  	// the same struct file in the kernel. In practice this usually means
   580  	// follow-up calls using the file descriptor returned by open(2).
   581  	//
   582  	// The handle may be supplied in future ops like ReadFileOp that contain a
   583  	// file handle. The file system must ensure this ID remains valid until a
   584  	// later call to ReleaseFileHandle.
   585  	Handle HandleID
   586  
   587  	// By default, fuse invalidates the kernel's page cache for an inode when a
   588  	// new file handle is opened for that inode (cf. https://goo.gl/2rZ9uk). The
   589  	// intent appears to be to allow users to "see" content that has changed
   590  	// remotely on a networked file system by re-opening the file.
   591  	//
   592  	// For file systems where this is not a concern because all modifications for
   593  	// a particular inode go through the kernel, set this field to true to
   594  	// disable this behavior.
   595  	//
   596  	// (More discussion: http://goo.gl/cafzWF)
   597  	//
   598  	// Note that on OS X it appears that the behavior is always as if this field
   599  	// is set to true, regardless of its value, at least for files opened in the
   600  	// same mode. (Cf. https://github.com/osxfuse/osxfuse/issues/223)
   601  	KeepPageCache bool
   602  
   603  	// Whether to use direct IO for this file handle. By default, the kernel
   604  	// suppresses what it sees as redundant operations (including reads beyond
   605  	// the precomputed EOF).
   606  	//
   607  	// Enabling direct IO ensures that all client operations reach the fuse
   608  	// layer. This allows for filesystems whose file sizes are not known in
   609  	// advance, for example, because contents are generated on the fly.
   610  	UseDirectIO bool
   611  }
   612  
   613  // Read data from a file previously opened with CreateFile or OpenFile.
   614  //
   615  // Note that this op is not sent for every call to read(2) by the end user;
   616  // some reads may be served by the page cache. See notes on WriteFileOp for
   617  // more.
   618  type ReadFileOp struct {
   619  	// The file inode that we are reading, and the handle previously returned by
   620  	// CreateFile or OpenFile when opening that inode.
   621  	Inode  InodeID
   622  	Handle HandleID
   623  
   624  	// The offset within the file at which to read.
   625  	Offset int64
   626  
   627  	// The destination buffer, whose length gives the size of the read.
   628  	Dst []byte
   629  
   630  	// Set by the file system: the number of bytes read.
   631  	//
   632  	// The FUSE documentation requires that exactly the requested number of bytes
   633  	// be returned, except in the case of EOF or error (http://goo.gl/ZgfBkF).
   634  	// This appears to be because it uses file mmapping machinery
   635  	// (http://goo.gl/SGxnaN) to read a page at a time. It appears to understand
   636  	// where EOF is by checking the inode size (http://goo.gl/0BkqKD), returned
   637  	// by a previous call to LookUpInode, GetInodeAttributes, etc.
   638  	//
   639  	// If direct IO is enabled, semantics should match those of read(2).
   640  	BytesRead int
   641  }
   642  
   643  // Write data to a file previously opened with CreateFile or OpenFile.
   644  //
   645  // When the user writes data using write(2), the write goes into the page
   646  // cache and the page is marked dirty. Later the kernel may write back the
   647  // page via the FUSE VFS layer, causing this op to be sent:
   648  //
   649  //  *  The kernel calls address_space_operations::writepage when a dirty page
   650  //     needs to be written to backing store (cf. http://goo.gl/Ezbewg). Fuse
   651  //     sets this to fuse_writepage (cf. http://goo.gl/IeNvLT).
   652  //
   653  //  *  (http://goo.gl/Eestuy) fuse_writepage calls fuse_writepage_locked.
   654  //
   655  //  *  (http://goo.gl/RqYIxY) fuse_writepage_locked makes a write request to
   656  //     the userspace server.
   657  //
   658  // Note that the kernel *will* ensure that writes are received and acknowledged
   659  // by the file system before sending a FlushFileOp when closing the file
   660  // descriptor to which they were written. Cf. the notes on
   661  // fuse.MountConfig.DisableWritebackCaching.
   662  //
   663  // (See also http://goo.gl/ocdTdM, fuse-devel thread "Fuse guarantees on
   664  // concurrent requests".)
   665  type WriteFileOp struct {
   666  	// The file inode that we are modifying, and the handle previously returned
   667  	// by CreateFile or OpenFile when opening that inode.
   668  	Inode  InodeID
   669  	Handle HandleID
   670  
   671  	// The offset at which to write the data below.
   672  	//
   673  	// The man page for pwrite(2) implies that aside from changing the file
   674  	// handle's offset, using pwrite is equivalent to using lseek(2) and then
   675  	// write(2). The man page for lseek(2) says the following:
   676  	//
   677  	// "The lseek() function allows the file offset to be set beyond the end of
   678  	// the file (but this does not change the size of the file). If data is later
   679  	// written at this point, subsequent reads of the data in the gap (a "hole")
   680  	// return null bytes (aq\0aq) until data is actually written into the gap."
   681  	//
   682  	// It is therefore reasonable to assume that the kernel is looking for
   683  	// the following semantics:
   684  	//
   685  	// *   If the offset is less than or equal to the current size, extend the
   686  	//     file as necessary to fit any data that goes past the end of the file.
   687  	//
   688  	// *   If the offset is greater than the current size, extend the file
   689  	//     with null bytes until it is not, then do the above.
   690  	//
   691  	Offset int64
   692  
   693  	// The data to write.
   694  	//
   695  	// The FUSE documentation requires that exactly the number of bytes supplied
   696  	// be written, except on error (http://goo.gl/KUpwwn). This appears to be
   697  	// because it uses file mmapping machinery (http://goo.gl/SGxnaN) to write a
   698  	// page at a time.
   699  	Data []byte
   700  }
   701  
   702  // Synchronize the current contents of an open file to storage.
   703  //
   704  // vfs.txt documents this as being called for by the fsync(2) system call
   705  // (cf. http://goo.gl/j9X8nB). Code walk for that case:
   706  //
   707  //  *  (http://goo.gl/IQkWZa) sys_fsync calls do_fsync, calls vfs_fsync, calls
   708  //     vfs_fsync_range.
   709  //
   710  //  *  (http://goo.gl/5L2SMy) vfs_fsync_range calls f_op->fsync.
   711  //
   712  // Note that this is also sent by fdatasync(2) (cf. http://goo.gl/01R7rF), and
   713  // may be sent for msync(2) with the MS_SYNC flag (see the notes on
   714  // FlushFileOp).
   715  //
   716  // See also: FlushFileOp, which may perform a similar function when closing a
   717  // file (but which is not used in "real" file systems).
   718  type SyncFileOp struct {
   719  	// The file and handle being sync'd.
   720  	Inode  InodeID
   721  	Handle HandleID
   722  }
   723  
   724  // Flush the current state of an open file to storage upon closing a file
   725  // descriptor.
   726  //
   727  // vfs.txt documents this as being sent for each close(2) system call (cf.
   728  // http://goo.gl/FSkbrq). Code walk for that case:
   729  //
   730  //  *  (http://goo.gl/e3lv0e) sys_close calls __close_fd, calls filp_close.
   731  //  *  (http://goo.gl/nI8fxD) filp_close calls f_op->flush (fuse_flush).
   732  //
   733  // But note that this is also sent in other contexts where a file descriptor is
   734  // closed, such as dup2(2) (cf. http://goo.gl/NQDvFS). In the case of close(2),
   735  // a flush error is returned to the user. For dup2(2), it is not.
   736  //
   737  // One potentially significant case where this may not be sent is mmap'd files,
   738  // where the behavior is complicated:
   739  //
   740  //  *  munmap(2) does not cause flushes (cf. http://goo.gl/j8B9g0).
   741  //
   742  //  *  On OS X, if a user modifies a mapped file via the mapping before
   743  //     closing the file with close(2), the WriteFileOps for the modifications
   744  //     may not be received before the FlushFileOp for the close(2) (cf.
   745  //     https://github.com/osxfuse/osxfuse/issues/202). It appears that this may
   746  //     be fixed in osxfuse 3 (cf. https://goo.gl/rtvbko).
   747  //
   748  //  *  However, you safely can arrange for writes via a mapping to be
   749  //     flushed by calling msync(2) followed by close(2). On OS X msync(2)
   750  //     will cause a WriteFileOps to go through and close(2) will cause a
   751  //     FlushFile as usual (cf. http://goo.gl/kVmNcx). On Linux, msync(2) does
   752  //     nothing unless you set the MS_SYNC flag, in which case it causes a
   753  //     SyncFileOp to be sent (cf. http://goo.gl/P3mErk).
   754  //
   755  // In summary: if you make data durable in both FlushFile and SyncFile, then
   756  // your users can get safe behavior from mapped files on both operating systems
   757  // by calling msync(2) with MS_SYNC, followed by munmap(2), followed by
   758  // close(2). On Linux, the msync(2) is optional (cf. http://goo.gl/EIhAxv and
   759  // the notes on WriteFileOp).
   760  //
   761  // Because of cases like dup2(2), FlushFileOps are not necessarily one to one
   762  // with OpenFileOps. They should not be used for reference counting, and the
   763  // handle must remain valid even after the flush op is received (use
   764  // ReleaseFileHandleOp for disposing of it).
   765  //
   766  // Typical "real" file systems do not implement this, presumably relying on
   767  // the kernel to write out the page cache to the block device eventually.
   768  // They can get away with this because a later open(2) will see the same
   769  // data. A file system that writes to remote storage however probably wants
   770  // to at least schedule a real flush, and maybe do it immediately in order to
   771  // return any errors that occur.
   772  type FlushFileOp struct {
   773  	// Metadata
   774  	Metadata OpMetadata
   775  
   776  	// The file and handle being flushed.
   777  	Inode  InodeID
   778  	Handle HandleID
   779  }
   780  
   781  // Release a previously-minted file handle. The kernel calls this when there
   782  // are no more references to an open file: all file descriptors are closed
   783  // and all memory mappings are unmapped.
   784  //
   785  // The kernel guarantees that the handle ID will not be used in further calls
   786  // to the file system (unless it is reissued by the file system).
   787  //
   788  // Errors from this op are ignored by the kernel (cf. http://goo.gl/RL38Do).
   789  type ReleaseFileHandleOp struct {
   790  	// The handle ID to be released. The kernel guarantees that this ID will not
   791  	// be used in further calls to the file system (unless it is reissued by the
   792  	// file system).
   793  	Handle HandleID
   794  }
   795  
   796  ////////////////////////////////////////////////////////////////////////
   797  // Reading symlinks
   798  ////////////////////////////////////////////////////////////////////////
   799  
   800  // Read the target of a symlink inode.
   801  type ReadSymlinkOp struct {
   802  	// The symlink inode that we are reading.
   803  	Inode InodeID
   804  
   805  	// Set by the file system: the target of the symlink.
   806  	Target string
   807  }
   808  
   809  ////////////////////////////////////////////////////////////////////////
   810  // eXtended attributes
   811  ////////////////////////////////////////////////////////////////////////
   812  
   813  // Remove an extended attribute.
   814  //
   815  // This is sent in response to removexattr(2). Return ENOATTR if the
   816  // extended attribute does not exist.
   817  type RemoveXattrOp struct {
   818  	// The inode that we are removing an extended attribute from.
   819  	Inode InodeID
   820  
   821  	// The name of the extended attribute.
   822  	Name string
   823  }
   824  
   825  // Get an extended attribute.
   826  //
   827  // This is sent in response to getxattr(2). Return ENOATTR if the
   828  // extended attribute does not exist.
   829  type GetXattrOp struct {
   830  	// The inode whose extended attribute we are reading.
   831  	Inode InodeID
   832  
   833  	// The name of the extended attribute.
   834  	Name string
   835  
   836  	// The destination buffer.  If the size is too small for the
   837  	// value, the ERANGE error should be sent.
   838  	Dst []byte
   839  
   840  	// Set by the file system: the number of bytes read into Dst, or
   841  	// the number of bytes that would have been read into Dst if Dst was
   842  	// big enough (return ERANGE in this case).
   843  	BytesRead int
   844  }
   845  
   846  // List all the extended attributes for a file.
   847  //
   848  // This is sent in response to listxattr(2).
   849  type ListXattrOp struct {
   850  	// The inode whose extended attributes we are listing.
   851  	Inode InodeID
   852  
   853  	// The destination buffer.  If the size is too small for the
   854  	// value, the ERANGE error should be sent.
   855  	//
   856  	// The output data should consist of a sequence of NUL-terminated strings,
   857  	// one for each xattr.
   858  	Dst []byte
   859  
   860  	// Set by the file system: the number of bytes read into Dst, or
   861  	// the number of bytes that would have been read into Dst if Dst was
   862  	// big enough (return ERANGE in this case).
   863  	BytesRead int
   864  }
   865  
   866  // Set an extended attribute.
   867  //
   868  // This is sent in response to setxattr(2). Return ENOSPC if there is
   869  // insufficient space remaining to store the extended attribute.
   870  type SetXattrOp struct {
   871  	// The inode whose extended attribute we are setting.
   872  	Inode InodeID
   873  
   874  	// The name of the extended attribute
   875  	Name string
   876  
   877  	// The value to for the extened attribute.
   878  	Value []byte
   879  
   880  	// If Flags is 0x1, and the attribute exists already, EEXIST should be returned.
   881  	// If Flags is 0x2, and the attribute does not exist, ENOATTR should be returned.
   882  	// If Flags is 0x0, the extended attribute will be created if need be, or will
   883  	// simply replace the value if the attribute exists.
   884  	Flags uint32
   885  }
   886  
   887  type FallocateOp struct {
   888  	// The inode and handle we are fallocating
   889  	Inode  InodeID
   890  	Handle HandleID
   891  
   892  	// Start of the byte range
   893  	Offset uint64
   894  
   895  	// Length of the byte range
   896  	Length uint64
   897  
   898  	// If Mode is 0x0, allocate disk space within the range specified
   899  	// If Mode has 0x1, allocate the space but don't increase the file size
   900  	// If Mode has 0x2, deallocate space within the range specified
   901  	// If Mode has 0x2, it sbould also have 0x1 (deallocate should not increase
   902  	// file size)
   903  	Mode uint32
   904  }