github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/ext/inode.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package ext 16 17 import ( 18 "fmt" 19 "sync/atomic" 20 21 "github.com/SagerNet/gvisor/pkg/abi/linux" 22 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 23 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/ext/disklayout" 24 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 25 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 26 "github.com/SagerNet/gvisor/pkg/syserror" 27 ) 28 29 // inode represents an ext inode. 30 // 31 // inode uses the same inheritance pattern that pkg/sentry/vfs structures use. 32 // This has been done to increase memory locality. 33 // 34 // Implementations: 35 // inode -- 36 // |-- dir 37 // |-- symlink 38 // |-- regular-- 39 // |-- extent file 40 // |-- block map file 41 // 42 // +stateify savable 43 type inode struct { 44 // refs is a reference count. refs is accessed using atomic memory operations. 45 refs int64 46 47 // fs is the containing filesystem. 48 fs *filesystem 49 50 // inodeNum is the inode number of this inode on disk. This is used to 51 // identify inodes within the ext filesystem. 52 inodeNum uint32 53 54 // blkSize is the fs data block size. Same as filesystem.sb.BlockSize(). 55 blkSize uint64 56 57 // diskInode gives us access to the inode struct on disk. Immutable. 58 diskInode disklayout.Inode 59 60 locks vfs.FileLocks 61 62 // This is immutable. The first field of the implementations must have inode 63 // as the first field to ensure temporality. 64 impl interface{} 65 } 66 67 // incRef increments the inode ref count. 68 func (in *inode) incRef() { 69 atomic.AddInt64(&in.refs, 1) 70 } 71 72 // tryIncRef tries to increment the ref count. Returns true if successful. 73 func (in *inode) tryIncRef() bool { 74 for { 75 refs := atomic.LoadInt64(&in.refs) 76 if refs == 0 { 77 return false 78 } 79 if atomic.CompareAndSwapInt64(&in.refs, refs, refs+1) { 80 return true 81 } 82 } 83 } 84 85 // decRef decrements the inode ref count and releases the inode resources if 86 // the ref count hits 0. 87 // 88 // Precondition: Must have locked filesystem.mu. 89 func (in *inode) decRef() { 90 if refs := atomic.AddInt64(&in.refs, -1); refs == 0 { 91 delete(in.fs.inodeCache, in.inodeNum) 92 } else if refs < 0 { 93 panic("ext.inode.decRef() called without holding a reference") 94 } 95 } 96 97 // newInode is the inode constructor. Reads the inode off disk. Identifies 98 // inodes based on the absolute inode number on disk. 99 func newInode(fs *filesystem, inodeNum uint32) (*inode, error) { 100 if inodeNum == 0 { 101 panic("inode number 0 on ext filesystems is not possible") 102 } 103 104 inodeRecordSize := fs.sb.InodeSize() 105 var diskInode disklayout.Inode 106 if inodeRecordSize == disklayout.OldInodeSize { 107 diskInode = &disklayout.InodeOld{} 108 } else { 109 diskInode = &disklayout.InodeNew{} 110 } 111 112 // Calculate where the inode is actually placed. 113 inodesPerGrp := fs.sb.InodesPerGroup() 114 blkSize := fs.sb.BlockSize() 115 inodeTableOff := fs.bgs[getBGNum(inodeNum, inodesPerGrp)].InodeTable() * blkSize 116 inodeOff := inodeTableOff + uint64(uint32(inodeRecordSize)*getBGOff(inodeNum, inodesPerGrp)) 117 118 if err := readFromDisk(fs.dev, int64(inodeOff), diskInode); err != nil { 119 return nil, err 120 } 121 122 // Build the inode based on its type. 123 args := inodeArgs{ 124 fs: fs, 125 inodeNum: inodeNum, 126 blkSize: blkSize, 127 diskInode: diskInode, 128 } 129 130 switch diskInode.Mode().FileType() { 131 case linux.ModeSymlink: 132 f, err := newSymlink(args) 133 if err != nil { 134 return nil, err 135 } 136 return &f.inode, nil 137 case linux.ModeRegular: 138 f, err := newRegularFile(args) 139 if err != nil { 140 return nil, err 141 } 142 return &f.inode, nil 143 case linux.ModeDirectory: 144 f, err := newDirectory(args, fs.sb.IncompatibleFeatures().DirentFileType) 145 if err != nil { 146 return nil, err 147 } 148 return &f.inode, nil 149 default: 150 // TODO(b/134676337): Return appropriate errors for sockets, pipes and devices. 151 return nil, linuxerr.EINVAL 152 } 153 } 154 155 type inodeArgs struct { 156 fs *filesystem 157 inodeNum uint32 158 blkSize uint64 159 diskInode disklayout.Inode 160 } 161 162 func (in *inode) init(args inodeArgs, impl interface{}) { 163 in.fs = args.fs 164 in.inodeNum = args.inodeNum 165 in.blkSize = args.blkSize 166 in.diskInode = args.diskInode 167 in.impl = impl 168 } 169 170 // open creates and returns a file description for the dentry passed in. 171 func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 172 ats := vfs.AccessTypesForOpenFlags(opts) 173 if err := in.checkPermissions(rp.Credentials(), ats); err != nil { 174 return nil, err 175 } 176 mnt := rp.Mount() 177 switch in.impl.(type) { 178 case *regularFile: 179 var fd regularFileFD 180 fd.LockFD.Init(&in.locks) 181 if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { 182 return nil, err 183 } 184 return &fd.vfsfd, nil 185 case *directory: 186 // Can't open directories writably. This check is not necessary for a read 187 // only filesystem but will be required when write is implemented. 188 if ats&vfs.MayWrite != 0 { 189 return nil, syserror.EISDIR 190 } 191 var fd directoryFD 192 fd.LockFD.Init(&in.locks) 193 if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { 194 return nil, err 195 } 196 return &fd.vfsfd, nil 197 case *symlink: 198 if opts.Flags&linux.O_PATH == 0 { 199 // Can't open symlinks without O_PATH. 200 return nil, linuxerr.ELOOP 201 } 202 var fd symlinkFD 203 fd.LockFD.Init(&in.locks) 204 if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { 205 return nil, err 206 } 207 return &fd.vfsfd, nil 208 default: 209 panic(fmt.Sprintf("unknown inode type: %T", in.impl)) 210 } 211 } 212 213 func (in *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error { 214 return vfs.GenericCheckPermissions(creds, ats, in.diskInode.Mode(), in.diskInode.UID(), in.diskInode.GID()) 215 } 216 217 // statTo writes the statx fields to the output parameter. 218 func (in *inode) statTo(stat *linux.Statx) { 219 stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | 220 linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_SIZE | 221 linux.STATX_ATIME | linux.STATX_CTIME | linux.STATX_MTIME 222 stat.Blksize = uint32(in.blkSize) 223 stat.Mode = uint16(in.diskInode.Mode()) 224 stat.Nlink = uint32(in.diskInode.LinksCount()) 225 stat.UID = uint32(in.diskInode.UID()) 226 stat.GID = uint32(in.diskInode.GID()) 227 stat.Ino = uint64(in.inodeNum) 228 stat.Size = in.diskInode.Size() 229 stat.Atime = in.diskInode.AccessTime().StatxTimestamp() 230 stat.Ctime = in.diskInode.ChangeTime().StatxTimestamp() 231 stat.Mtime = in.diskInode.ModificationTime().StatxTimestamp() 232 stat.DevMajor = linux.UNNAMED_MAJOR 233 stat.DevMinor = in.fs.devMinor 234 // TODO(b/134676337): Set stat.Blocks which is the number of 512 byte blocks 235 // (including metadata blocks) required to represent this file. 236 } 237 238 // getBGNum returns the block group number that a given inode belongs to. 239 func getBGNum(inodeNum uint32, inodesPerGrp uint32) uint32 { 240 return (inodeNum - 1) / inodesPerGrp 241 } 242 243 // getBGOff returns the offset at which the given inode lives in the block 244 // group's inode table, i.e. the index of the inode in the inode table. 245 func getBGOff(inodeNum uint32, inodesPerGrp uint32) uint32 { 246 return (inodeNum - 1) % inodesPerGrp 247 }