github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/fs/api.go (about) 1 // Copyright (c) 2015-2021, NVIDIA CORPORATION. 2 // SPDX-License-Identifier: Apache-2.0 3 4 // Package fs, sitting on top of the inode manager, defines the filesystem exposed by ProxyFS. 5 package fs 6 7 // #include <limits.h> 8 import "C" 9 10 import ( 11 "time" 12 13 "github.com/swiftstack/ProxyFS/inode" 14 "github.com/swiftstack/ProxyFS/utils" 15 ) 16 17 // ReadRangeIn is the ReadPlan range requested 18 // 19 // Either Offset or Len can be omitted, but not both. Those correspond 20 // to HTTP byteranges "bytes=N-" (no Len; asks for byte N to the end 21 // of the file) and "bytes=-N" (no Offset; asks for the last N bytes 22 // of the file). 23 type ReadRangeIn struct { 24 Offset *uint64 25 Len *uint64 26 } 27 28 // Returned by MiddlewareGetAccount 29 type AccountEntry struct { 30 Basename string 31 ModificationTime uint64 // nanoseconds since epoch 32 AttrChangeTime uint64 // nanoseconds since epoch 33 } 34 35 // Returned by MiddlewareGetContainer 36 // 37 type ContainerEntry struct { 38 Basename string 39 FileSize uint64 40 ModificationTime uint64 // nanoseconds since epoch 41 AttrChangeTime uint64 // nanoseconds since epoch 42 IsDir bool 43 NumWrites uint64 44 InodeNumber uint64 45 Metadata []byte 46 } 47 48 type HeadResponse struct { 49 Metadata []byte 50 FileSize uint64 51 ModificationTime uint64 // nanoseconds since epoch 52 AttrChangeTime uint64 // nanoseconds since epoch 53 IsDir bool 54 InodeNumber inode.InodeNumber 55 NumWrites uint64 56 } 57 58 // The following constants are used to ensure that the length of file fullpath and basenames are POSIX-compliant 59 const ( 60 FilePathMax = C.PATH_MAX 61 FileNameMax = C.NAME_MAX 62 ) 63 64 // The maximum number of symlinks we will follow 65 const MaxSymlinks = 8 // same as Linux; see include/linux/namei.h in Linux's Git repository 66 67 // Constant defining the name of the alternate data stream used by Swift Middleware 68 const MiddlewareStream = "middleware" 69 70 // Base-2 constants 71 const ( 72 Kibi = 1024 73 Mebi = Kibi * 1024 74 Gibi = Mebi * 1024 75 Tebi = Gibi * 1024 76 ) 77 78 // StatVfs defaults 79 const ( 80 DefaultReportedBlockSize uint64 = 64 * Kibi 81 DefaultReportedFragmentSize uint64 = 64 * Kibi 82 DefaultReportedNumBlocks uint64 = 100 * Tebi / DefaultReportedBlockSize 83 DefaultReportedNumInodes uint64 = 100 * Gibi 84 ) 85 86 // SetXAttr constants (Go should wrap these from /usr/include/attr/xattr.h>) 87 const ( 88 SetXAttrCreateOrReplace = 0 89 SetXAttrCreate = 1 90 SetXAttrReplace = 2 91 ) 92 93 type FlockStruct struct { 94 Type int32 95 Whence int32 96 Start uint64 97 Len uint64 98 Pid uint64 99 } 100 101 type StatKey uint64 102 103 const ( 104 StatCTime StatKey = iota + 1 // time of last inode attribute change (ctime in posix stat) 105 StatCRTime // time of inode creation (crtime in posix stat) 106 StatMTime // time of last data modification (mtime in posix stat) 107 StatATime // time of last data access (atime in posix stat) 108 StatSize // inode data size in bytes 109 StatNLink // Number of hard links to the inode 110 StatFType // file type of inode 111 StatINum // inode number 112 StatMode // file mode 113 StatUserID // file userid 114 StatGroupID // file groupid 115 StatNumWrites // number of writes to inode 116 ) 117 118 // TODO: StatMode, StatUserID, and StatGroupID are really 119 // uint32, not uint64. How to expose a stat map with 120 // values of different types? 121 type Stat map[StatKey]uint64 // key is one of StatKey consts 122 123 // Whole-filesystem stats for StatVfs calls 124 // 125 type StatVFSKey uint64 126 127 const ( 128 StatVFSBlockSize StatVFSKey = iota + 1 // statvfs.f_bsize - Filesystem block size 129 StatVFSFragmentSize // statvfs.f_frsize - Filesystem fragment size, smallest addressable data size in the filesystem 130 StatVFSTotalBlocks // statvfs.f_blocks - Filesystem size in StatVFSFragmentSize units 131 StatVFSFreeBlocks // statvfs.f_bfree - number of free blocks 132 StatVFSAvailBlocks // statvfs.f_bavail - number of free blocks for unprivileged users 133 StatVFSTotalInodes // statvfs.f_files - number of inodes in the filesystem 134 StatVFSFreeInodes // statvfs.f_ffree - number of free inodes in the filesystem 135 StatVFSAvailInodes // statvfs.f_favail - number of free inodes for unprivileged users 136 StatVFSFilesystemID // statvfs.f_fsid - Our filesystem ID 137 StatVFSMountFlags // statvfs.f_flag - mount flags 138 StatVFSMaxFilenameLen // statvfs.f_namemax - maximum filename length 139 ) 140 141 type StatVFS map[StatVFSKey]uint64 // key is one of StatVFSKey consts 142 143 type JobHandle interface { 144 Active() (active bool) 145 Wait() 146 Cancel() 147 Error() (err []string) 148 Info() (info []string) 149 } 150 151 // Volume handle interface 152 153 func FetchVolumeHandleByAccountName(accountName string) (volumeHandle VolumeHandle, err error) { 154 volumeHandle, err = fetchVolumeHandleByAccountName(accountName) 155 return 156 } 157 158 func FetchVolumeHandleByVolumeName(volumeName string) (volumeHandle VolumeHandle, err error) { 159 volumeHandle, err = fetchVolumeHandleByVolumeName(volumeName) 160 return 161 } 162 163 type VolumeHandle interface { 164 Access(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, accessMode inode.InodeMode) (accessReturn bool) 165 CallInodeToProvisionObject() (pPath string, err error) 166 Create(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (fileInodeNumber inode.InodeNumber, err error) 167 DefragmentFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber) (err error) 168 Destroy(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (err error) 169 FetchExtentMapChunk(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber, fileOffset uint64, maxEntriesFromFileOffset int64, maxEntriesBeforeFileOffset int64) (extentMapChunk *inode.ExtentMapChunkStruct, err error) 170 Flush(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (err error) 171 Flock(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, lockCmd int32, inFlockStruct *FlockStruct) (outFlockStruct *FlockStruct, err error) 172 Getstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (stat Stat, err error) 173 GetType(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeType inode.InodeType, err error) 174 GetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (value []byte, err error) 175 IsDir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsDir bool, err error) 176 IsFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsFile bool, err error) 177 IsSymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsSymlink bool, err error) 178 Link(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, targetInodeNumber inode.InodeNumber) (err error) 179 ListXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (streamNames []string, err error) 180 Lookup(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string) (inodeNumber inode.InodeNumber, err error) 181 LookupPath(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fullpath string) (inodeNumber inode.InodeNumber, err error) 182 MiddlewareCoalesce(destPath string, metaData []byte, elementPaths []string) (ino uint64, numWrites uint64, attrChangeTime uint64, modificationTime uint64, err error) 183 MiddlewareDelete(parentDir string, baseName string) (err error) 184 MiddlewareGetAccount(maxEntries uint64, marker string, endmarker string) (accountEnts []AccountEntry, mtime uint64, ctime uint64, err error) 185 MiddlewareGetContainer(vContainerName string, maxEntries uint64, marker string, endmarker string, prefix string, delimiter string) (containerEnts []ContainerEntry, err error) 186 MiddlewareGetObject(containerObjectPath string, readRangeIn []ReadRangeIn, readRangeOut *[]inode.ReadPlanStep) (response HeadResponse, err error) 187 MiddlewareHeadResponse(entityPath string) (response HeadResponse, err error) 188 MiddlewareMkdir(vContainerName string, vObjectPath string, metadata []byte) (mtime uint64, ctime uint64, inodeNumber inode.InodeNumber, numWrites uint64, err error) 189 MiddlewarePost(parentDir string, baseName string, newMetaData []byte, oldMetaData []byte) (err error) 190 MiddlewarePutComplete(vContainerName string, vObjectPath string, pObjectPaths []string, pObjectLengths []uint64, pObjectMetadata []byte) (mtime uint64, ctime uint64, fileInodeNumber inode.InodeNumber, numWrites uint64, err error) 191 MiddlewarePutContainer(containerName string, oldMetadata []byte, newMetadata []byte) (err error) 192 Mkdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (newDirInodeNumber inode.InodeNumber, err error) 193 Move(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (toDestroyInodeNumber inode.InodeNumber, err error) 194 RemoveXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (err error) 195 Rename(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (err error) 196 Read(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, length uint64, profiler *utils.Profiler) (buf []byte, err error) 197 Readdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (entries []inode.DirEntry, numEntries uint64, areMoreEntries bool, err error) 198 ReaddirPlus(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (dirEntries []inode.DirEntry, statEntries []Stat, numEntries uint64, areMoreEntries bool, err error) 199 Readsymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (target string, err error) 200 Resize(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, newSize uint64) (err error) 201 Rmdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error) 202 Setstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, stat Stat) (err error) 203 SetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string, value []byte, flags int) (err error) 204 StatVfs() (statVFS StatVFS, err error) 205 Symlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, target string) (symlinkInodeNumber inode.InodeNumber, err error) 206 Unlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error) 207 VolumeName() (volumeName string) 208 Write(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, buf []byte, profiler *utils.Profiler) (size uint64, err error) 209 Wrote(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, containerName string, objectName string, fileOffset []uint64, objectOffset []uint64, length []uint64, wroteTime uint64) (err error) 210 } 211 212 // ValidateVolume performs an "FSCK" on the specified volumeName. 213 func ValidateVolume(volumeName string) (validateVolumeHandle JobHandle) { 214 var ( 215 vVS *validateVolumeStruct 216 ) 217 startTime := time.Now() 218 defer func() { 219 globals.ValidateVolumeUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 220 }() 221 222 vVS = &validateVolumeStruct{} 223 224 vVS.jobType = "FSCK" 225 vVS.volumeName = volumeName 226 vVS.active = true 227 vVS.stopFlag = false 228 vVS.err = make([]string, 0) 229 vVS.info = make([]string, 0) 230 231 vVS.globalWaitGroup.Add(1) 232 go vVS.validateVolume() 233 234 validateVolumeHandle = vVS 235 236 return 237 } 238 239 // ScrubVolume performs a "SCRUB" on the specified volumeName. 240 func ScrubVolume(volumeName string) (scrubVolumeHandle JobHandle) { 241 var ( 242 sVS *scrubVolumeStruct 243 ) 244 startTime := time.Now() 245 defer func() { 246 globals.ScrubVolumeUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 247 }() 248 249 sVS = &scrubVolumeStruct{} 250 251 sVS.jobType = "SCRUB" 252 sVS.volumeName = volumeName 253 sVS.active = true 254 sVS.stopFlag = false 255 sVS.err = make([]string, 0) 256 sVS.info = make([]string, 0) 257 258 sVS.globalWaitGroup.Add(1) 259 go sVS.scrubVolume() 260 261 scrubVolumeHandle = sVS 262 263 return 264 } 265 266 // Utility functions 267 268 func ValidateBaseName(baseName string) (err error) { 269 startTime := time.Now() 270 defer func() { 271 globals.ValidateBaseNameUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 272 if err != nil { 273 globals.ValidateBaseNameErrors.Add(1) 274 } 275 }() 276 277 err = validateBaseName(baseName) 278 return 279 } 280 281 func ValidateFullPath(fullPath string) (err error) { 282 startTime := time.Now() 283 defer func() { 284 globals.ValidateFullPathUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 285 if err != nil { 286 globals.ValidateFullPathErrors.Add(1) 287 } 288 }() 289 290 err = validateFullPath(fullPath) 291 return 292 } 293 294 func AccountNameToVolumeName(accountName string) (volumeName string, ok bool) { 295 startTime := time.Now() 296 defer func() { 297 globals.AccountNameToVolumeNameUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 298 }() 299 300 volumeName, ok = inode.AccountNameToVolumeName(accountName) 301 return 302 } 303 304 func VolumeNameToActivePeerPrivateIPAddr(volumeName string) (activePeerPrivateIPAddr string, ok bool) { 305 startTime := time.Now() 306 defer func() { 307 globals.VolumeNameToActivePeerPrivateIPAddrUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 308 }() 309 310 activePeerPrivateIPAddr, ok = inode.VolumeNameToActivePeerPrivateIPAddr(volumeName) 311 return 312 }