github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/fs/api.go (about)

     1  // Copyright (c) 2015-2021, NVIDIA CORPORATION.
     2  // SPDX-License-Identifier: Apache-2.0
     3  
     4  // Package fs, sitting on top of the inode manager, defines the filesystem exposed by ProxyFS.
     5  package fs
     6  
     7  // #include <limits.h>
     8  import "C"
     9  
    10  import (
    11  	"time"
    12  
    13  	"github.com/swiftstack/ProxyFS/inode"
    14  	"github.com/swiftstack/ProxyFS/utils"
    15  )
    16  
    17  // ReadRangeIn is the ReadPlan range requested
    18  //
    19  // Either Offset or Len can be omitted, but not both. Those correspond
    20  // to HTTP byteranges "bytes=N-" (no Len; asks for byte N to the end
    21  // of the file) and "bytes=-N" (no Offset; asks for the last N bytes
    22  // of the file).
    23  type ReadRangeIn struct {
    24  	Offset *uint64
    25  	Len    *uint64
    26  }
    27  
    28  // Returned by MiddlewareGetAccount
    29  type AccountEntry struct {
    30  	Basename         string
    31  	ModificationTime uint64 // nanoseconds since epoch
    32  	AttrChangeTime   uint64 // nanoseconds since epoch
    33  }
    34  
    35  // Returned by MiddlewareGetContainer
    36  //
    37  type ContainerEntry struct {
    38  	Basename         string
    39  	FileSize         uint64
    40  	ModificationTime uint64 // nanoseconds since epoch
    41  	AttrChangeTime   uint64 // nanoseconds since epoch
    42  	IsDir            bool
    43  	NumWrites        uint64
    44  	InodeNumber      uint64
    45  	Metadata         []byte
    46  }
    47  
    48  type HeadResponse struct {
    49  	Metadata         []byte
    50  	FileSize         uint64
    51  	ModificationTime uint64 // nanoseconds since epoch
    52  	AttrChangeTime   uint64 // nanoseconds since epoch
    53  	IsDir            bool
    54  	InodeNumber      inode.InodeNumber
    55  	NumWrites        uint64
    56  }
    57  
    58  // The following constants are used to ensure that the length of file fullpath and basenames are POSIX-compliant
    59  const (
    60  	FilePathMax = C.PATH_MAX
    61  	FileNameMax = C.NAME_MAX
    62  )
    63  
    64  // The maximum number of symlinks we will follow
    65  const MaxSymlinks = 8 // same as Linux; see include/linux/namei.h in Linux's Git repository
    66  
    67  // Constant defining the name of the alternate data stream used by Swift Middleware
    68  const MiddlewareStream = "middleware"
    69  
    70  // Base-2 constants
    71  const (
    72  	Kibi = 1024
    73  	Mebi = Kibi * 1024
    74  	Gibi = Mebi * 1024
    75  	Tebi = Gibi * 1024
    76  )
    77  
    78  // StatVfs defaults
    79  const (
    80  	DefaultReportedBlockSize    uint64 = 64 * Kibi
    81  	DefaultReportedFragmentSize uint64 = 64 * Kibi
    82  	DefaultReportedNumBlocks    uint64 = 100 * Tebi / DefaultReportedBlockSize
    83  	DefaultReportedNumInodes    uint64 = 100 * Gibi
    84  )
    85  
    86  // SetXAttr constants (Go should wrap these from /usr/include/attr/xattr.h>)
    87  const (
    88  	SetXAttrCreateOrReplace = 0
    89  	SetXAttrCreate          = 1
    90  	SetXAttrReplace         = 2
    91  )
    92  
    93  type FlockStruct struct {
    94  	Type   int32
    95  	Whence int32
    96  	Start  uint64
    97  	Len    uint64
    98  	Pid    uint64
    99  }
   100  
   101  type StatKey uint64
   102  
   103  const (
   104  	StatCTime     StatKey = iota + 1 // time of last inode attribute change (ctime in posix stat)
   105  	StatCRTime                       // time of inode creation              (crtime in posix stat)
   106  	StatMTime                        // time of last data modification      (mtime in posix stat)
   107  	StatATime                        // time of last data access            (atime in posix stat)
   108  	StatSize                         // inode data size in bytes
   109  	StatNLink                        // Number of hard links to the inode
   110  	StatFType                        // file type of inode
   111  	StatINum                         // inode number
   112  	StatMode                         // file mode
   113  	StatUserID                       // file userid
   114  	StatGroupID                      // file groupid
   115  	StatNumWrites                    // number of writes to inode
   116  )
   117  
   118  // TODO: StatMode, StatUserID, and StatGroupID are really
   119  //       uint32, not uint64. How to expose a stat map with
   120  //       values of different types?
   121  type Stat map[StatKey]uint64 // key is one of StatKey consts
   122  
   123  // Whole-filesystem stats for StatVfs calls
   124  //
   125  type StatVFSKey uint64
   126  
   127  const (
   128  	StatVFSBlockSize      StatVFSKey = iota + 1 // statvfs.f_bsize - Filesystem block size
   129  	StatVFSFragmentSize                         // statvfs.f_frsize - Filesystem fragment size, smallest addressable data size in the filesystem
   130  	StatVFSTotalBlocks                          // statvfs.f_blocks - Filesystem size in StatVFSFragmentSize units
   131  	StatVFSFreeBlocks                           // statvfs.f_bfree - number of free blocks
   132  	StatVFSAvailBlocks                          // statvfs.f_bavail - number of free blocks for unprivileged users
   133  	StatVFSTotalInodes                          // statvfs.f_files - number of inodes in the filesystem
   134  	StatVFSFreeInodes                           // statvfs.f_ffree - number of free inodes in the filesystem
   135  	StatVFSAvailInodes                          // statvfs.f_favail - number of free inodes for unprivileged users
   136  	StatVFSFilesystemID                         // statvfs.f_fsid  - Our filesystem ID
   137  	StatVFSMountFlags                           // statvfs.f_flag  - mount flags
   138  	StatVFSMaxFilenameLen                       // statvfs.f_namemax - maximum filename length
   139  )
   140  
   141  type StatVFS map[StatVFSKey]uint64 // key is one of StatVFSKey consts
   142  
   143  type JobHandle interface {
   144  	Active() (active bool)
   145  	Wait()
   146  	Cancel()
   147  	Error() (err []string)
   148  	Info() (info []string)
   149  }
   150  
   151  // Volume handle interface
   152  
   153  func FetchVolumeHandleByAccountName(accountName string) (volumeHandle VolumeHandle, err error) {
   154  	volumeHandle, err = fetchVolumeHandleByAccountName(accountName)
   155  	return
   156  }
   157  
   158  func FetchVolumeHandleByVolumeName(volumeName string) (volumeHandle VolumeHandle, err error) {
   159  	volumeHandle, err = fetchVolumeHandleByVolumeName(volumeName)
   160  	return
   161  }
   162  
   163  type VolumeHandle interface {
   164  	Access(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, accessMode inode.InodeMode) (accessReturn bool)
   165  	CallInodeToProvisionObject() (pPath string, err error)
   166  	Create(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (fileInodeNumber inode.InodeNumber, err error)
   167  	DefragmentFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber) (err error)
   168  	Destroy(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (err error)
   169  	FetchExtentMapChunk(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber, fileOffset uint64, maxEntriesFromFileOffset int64, maxEntriesBeforeFileOffset int64) (extentMapChunk *inode.ExtentMapChunkStruct, err error)
   170  	Flush(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (err error)
   171  	Flock(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, lockCmd int32, inFlockStruct *FlockStruct) (outFlockStruct *FlockStruct, err error)
   172  	Getstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (stat Stat, err error)
   173  	GetType(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeType inode.InodeType, err error)
   174  	GetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (value []byte, err error)
   175  	IsDir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsDir bool, err error)
   176  	IsFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsFile bool, err error)
   177  	IsSymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsSymlink bool, err error)
   178  	Link(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, targetInodeNumber inode.InodeNumber) (err error)
   179  	ListXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (streamNames []string, err error)
   180  	Lookup(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string) (inodeNumber inode.InodeNumber, err error)
   181  	LookupPath(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fullpath string) (inodeNumber inode.InodeNumber, err error)
   182  	MiddlewareCoalesce(destPath string, metaData []byte, elementPaths []string) (ino uint64, numWrites uint64, attrChangeTime uint64, modificationTime uint64, err error)
   183  	MiddlewareDelete(parentDir string, baseName string) (err error)
   184  	MiddlewareGetAccount(maxEntries uint64, marker string, endmarker string) (accountEnts []AccountEntry, mtime uint64, ctime uint64, err error)
   185  	MiddlewareGetContainer(vContainerName string, maxEntries uint64, marker string, endmarker string, prefix string, delimiter string) (containerEnts []ContainerEntry, err error)
   186  	MiddlewareGetObject(containerObjectPath string, readRangeIn []ReadRangeIn, readRangeOut *[]inode.ReadPlanStep) (response HeadResponse, err error)
   187  	MiddlewareHeadResponse(entityPath string) (response HeadResponse, err error)
   188  	MiddlewareMkdir(vContainerName string, vObjectPath string, metadata []byte) (mtime uint64, ctime uint64, inodeNumber inode.InodeNumber, numWrites uint64, err error)
   189  	MiddlewarePost(parentDir string, baseName string, newMetaData []byte, oldMetaData []byte) (err error)
   190  	MiddlewarePutComplete(vContainerName string, vObjectPath string, pObjectPaths []string, pObjectLengths []uint64, pObjectMetadata []byte) (mtime uint64, ctime uint64, fileInodeNumber inode.InodeNumber, numWrites uint64, err error)
   191  	MiddlewarePutContainer(containerName string, oldMetadata []byte, newMetadata []byte) (err error)
   192  	Mkdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (newDirInodeNumber inode.InodeNumber, err error)
   193  	Move(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (toDestroyInodeNumber inode.InodeNumber, err error)
   194  	RemoveXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (err error)
   195  	Rename(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (err error)
   196  	Read(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, length uint64, profiler *utils.Profiler) (buf []byte, err error)
   197  	Readdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (entries []inode.DirEntry, numEntries uint64, areMoreEntries bool, err error)
   198  	ReaddirPlus(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (dirEntries []inode.DirEntry, statEntries []Stat, numEntries uint64, areMoreEntries bool, err error)
   199  	Readsymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (target string, err error)
   200  	Resize(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, newSize uint64) (err error)
   201  	Rmdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error)
   202  	Setstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, stat Stat) (err error)
   203  	SetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string, value []byte, flags int) (err error)
   204  	StatVfs() (statVFS StatVFS, err error)
   205  	Symlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, target string) (symlinkInodeNumber inode.InodeNumber, err error)
   206  	Unlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error)
   207  	VolumeName() (volumeName string)
   208  	Write(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, buf []byte, profiler *utils.Profiler) (size uint64, err error)
   209  	Wrote(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, containerName string, objectName string, fileOffset []uint64, objectOffset []uint64, length []uint64, wroteTime uint64) (err error)
   210  }
   211  
   212  // ValidateVolume performs an "FSCK" on the specified volumeName.
   213  func ValidateVolume(volumeName string) (validateVolumeHandle JobHandle) {
   214  	var (
   215  		vVS *validateVolumeStruct
   216  	)
   217  	startTime := time.Now()
   218  	defer func() {
   219  		globals.ValidateVolumeUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   220  	}()
   221  
   222  	vVS = &validateVolumeStruct{}
   223  
   224  	vVS.jobType = "FSCK"
   225  	vVS.volumeName = volumeName
   226  	vVS.active = true
   227  	vVS.stopFlag = false
   228  	vVS.err = make([]string, 0)
   229  	vVS.info = make([]string, 0)
   230  
   231  	vVS.globalWaitGroup.Add(1)
   232  	go vVS.validateVolume()
   233  
   234  	validateVolumeHandle = vVS
   235  
   236  	return
   237  }
   238  
   239  // ScrubVolume performs a "SCRUB" on the specified volumeName.
   240  func ScrubVolume(volumeName string) (scrubVolumeHandle JobHandle) {
   241  	var (
   242  		sVS *scrubVolumeStruct
   243  	)
   244  	startTime := time.Now()
   245  	defer func() {
   246  		globals.ScrubVolumeUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   247  	}()
   248  
   249  	sVS = &scrubVolumeStruct{}
   250  
   251  	sVS.jobType = "SCRUB"
   252  	sVS.volumeName = volumeName
   253  	sVS.active = true
   254  	sVS.stopFlag = false
   255  	sVS.err = make([]string, 0)
   256  	sVS.info = make([]string, 0)
   257  
   258  	sVS.globalWaitGroup.Add(1)
   259  	go sVS.scrubVolume()
   260  
   261  	scrubVolumeHandle = sVS
   262  
   263  	return
   264  }
   265  
   266  // Utility functions
   267  
   268  func ValidateBaseName(baseName string) (err error) {
   269  	startTime := time.Now()
   270  	defer func() {
   271  		globals.ValidateBaseNameUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   272  		if err != nil {
   273  			globals.ValidateBaseNameErrors.Add(1)
   274  		}
   275  	}()
   276  
   277  	err = validateBaseName(baseName)
   278  	return
   279  }
   280  
   281  func ValidateFullPath(fullPath string) (err error) {
   282  	startTime := time.Now()
   283  	defer func() {
   284  		globals.ValidateFullPathUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   285  		if err != nil {
   286  			globals.ValidateFullPathErrors.Add(1)
   287  		}
   288  	}()
   289  
   290  	err = validateFullPath(fullPath)
   291  	return
   292  }
   293  
   294  func AccountNameToVolumeName(accountName string) (volumeName string, ok bool) {
   295  	startTime := time.Now()
   296  	defer func() {
   297  		globals.AccountNameToVolumeNameUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   298  	}()
   299  
   300  	volumeName, ok = inode.AccountNameToVolumeName(accountName)
   301  	return
   302  }
   303  
   304  func VolumeNameToActivePeerPrivateIPAddr(volumeName string) (activePeerPrivateIPAddr string, ok bool) {
   305  	startTime := time.Now()
   306  	defer func() {
   307  		globals.VolumeNameToActivePeerPrivateIPAddrUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   308  	}()
   309  
   310  	activePeerPrivateIPAddr, ok = inode.VolumeNameToActivePeerPrivateIPAddr(volumeName)
   311  	return
   312  }