github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/gofer/session.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "fmt" 19 20 "github.com/SagerNet/gvisor/pkg/context" 21 "github.com/SagerNet/gvisor/pkg/p9" 22 "github.com/SagerNet/gvisor/pkg/refs" 23 "github.com/SagerNet/gvisor/pkg/sentry/device" 24 "github.com/SagerNet/gvisor/pkg/sentry/fs" 25 "github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil" 26 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 27 "github.com/SagerNet/gvisor/pkg/sync" 28 "github.com/SagerNet/gvisor/pkg/unet" 29 ) 30 31 // DefaultDirentCacheSize is the default dirent cache size for 9P mounts. It can 32 // be adjusted independently from the other dirent caches. 33 var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize 34 35 // +stateify savable 36 type overrideInfo struct { 37 dirent *fs.Dirent 38 39 // endpoint is set when dirent points to a socket. inode must not be set. 40 endpoint transport.BoundEndpoint 41 42 // inode is set when dirent points to a pipe. endpoint must not be set. 43 inode *fs.Inode 44 } 45 46 func (l *overrideInfo) inodeType() fs.InodeType { 47 switch { 48 case l.endpoint != nil: 49 return fs.Socket 50 case l.inode != nil: 51 return fs.Pipe 52 } 53 panic("endpoint or node must be set") 54 } 55 56 // +stateify savable 57 type overrideMaps struct { 58 // mu protexts the keyMap, and the pathMap below. 59 mu sync.RWMutex `state:"nosave"` 60 61 // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets/pipes. 62 // It is not stored during save because the inode ID may change upon restore. 63 keyMap map[device.MultiDeviceKey]*overrideInfo `state:"nosave"` 64 65 // pathMap links the sockets/pipes to their paths. 66 // It is filled before saving from the direntMap and is stored upon save. 67 // Upon restore, this map is used to re-populate the keyMap. 68 pathMap map[*overrideInfo]string 69 } 70 71 // addBoundEndpoint adds the bound endpoint to the map. 72 // A reference is taken on the dirent argument. 73 // 74 // Precondition: maps must have been locked with 'lock'. 75 func (e *overrideMaps) addBoundEndpoint(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) { 76 d.IncRef() 77 e.keyMap[key] = &overrideInfo{dirent: d, endpoint: ep} 78 } 79 80 // addPipe adds the pipe inode to the map. 81 // A reference is taken on the dirent argument. 82 // 83 // Precondition: maps must have been locked with 'lock'. 84 func (e *overrideMaps) addPipe(key device.MultiDeviceKey, d *fs.Dirent, inode *fs.Inode) { 85 d.IncRef() 86 e.keyMap[key] = &overrideInfo{dirent: d, inode: inode} 87 } 88 89 // remove deletes the key from the maps. 90 // 91 // Precondition: maps must have been locked with 'lock'. 92 func (e *overrideMaps) remove(ctx context.Context, key device.MultiDeviceKey) { 93 endpoint := e.keyMap[key] 94 delete(e.keyMap, key) 95 endpoint.dirent.DecRef(ctx) 96 } 97 98 // lock blocks other addition and removal operations from happening while 99 // the backing file is being created or deleted. Returns a function that unlocks 100 // the endpoint map. 101 // +checklocksacquire:e.mu 102 func (e *overrideMaps) lock() { 103 e.mu.Lock() 104 } 105 106 // +checklocksrelease:e.mu 107 func (e *overrideMaps) unlock() { 108 e.mu.Unlock() 109 } 110 111 // getBoundEndpoint returns the bound endpoint mapped to the given key. 112 // 113 // Precondition: maps must have been locked. 114 func (e *overrideMaps) getBoundEndpoint(key device.MultiDeviceKey) transport.BoundEndpoint { 115 if v := e.keyMap[key]; v != nil { 116 return v.endpoint 117 } 118 return nil 119 } 120 121 // getPipe returns the pipe inode mapped to the given key. 122 // 123 // Precondition: maps must have been locked. 124 func (e *overrideMaps) getPipe(key device.MultiDeviceKey) *fs.Inode { 125 if v := e.keyMap[key]; v != nil { 126 return v.inode 127 } 128 return nil 129 } 130 131 // getType returns the inode type if there is a corresponding endpoint for the 132 // given key. Returns false otherwise. 133 func (e *overrideMaps) getType(key device.MultiDeviceKey) (fs.InodeType, bool) { 134 e.mu.Lock() 135 v := e.keyMap[key] 136 e.mu.Unlock() 137 138 if v != nil { 139 return v.inodeType(), true 140 } 141 return 0, false 142 } 143 144 // session holds state for each 9p session established during sys_mount. 145 // 146 // +stateify savable 147 type session struct { 148 refs.AtomicRefCount 149 150 // msize is the value of the msize mount option, see fs/gofer/fs.go. 151 msize uint32 `state:"wait"` 152 153 // version is the value of the version mount option, see fs/gofer/fs.go. 154 version string `state:"wait"` 155 156 // cachePolicy is the cache policy. 157 cachePolicy cachePolicy `state:"wait"` 158 159 // aname is the value of the aname mount option, see fs/gofer/fs.go. 160 aname string `state:"wait"` 161 162 // The client associated with this session. This will be initialized lazily. 163 client *p9.Client `state:"nosave"` 164 165 // The p9.File pointing to attachName via the client. This will be initialized 166 // lazily. 167 attach contextFile `state:"nosave"` 168 169 // Flags provided to the mount. 170 superBlockFlags fs.MountSourceFlags `state:"wait"` 171 172 // limitHostFDTranslation is the value used for 173 // CachingInodeOperationsOptions.LimitHostFDTranslation for all 174 // CachingInodeOperations created by the session. 175 limitHostFDTranslation bool 176 177 // overlayfsStaleRead when set causes the readonly handle to be invalidated 178 // after file is open for write. 179 overlayfsStaleRead bool 180 181 // connID is a unique identifier for the session connection. 182 connID string `state:"wait"` 183 184 // inodeMappings contains mappings of fs.Inodes associated with this session 185 // to paths relative to the attach point, where inodeMappings is keyed by 186 // Inode.StableAttr.InodeID. 187 inodeMappings map[uint64]string `state:"wait"` 188 189 // mounter is the EUID/EGID that mounted this file system. 190 mounter fs.FileOwner `state:"wait"` 191 192 // overrides is used to map inodes that represent socket/pipes files to their 193 // corresponding endpoint/iops. These files are created as regular files in 194 // the gofer and their presence in this map indicate that they should indeed 195 // be socket/pipe files. This allows unix domain sockets and named pipes to 196 // be used with paths that belong to a gofer. 197 // 198 // There are a few possible races with someone stat'ing the file and another 199 // deleting it concurrently, where the file will not be reported as socket 200 // file. 201 overrides *overrideMaps `state:"wait"` 202 } 203 204 // Destroy tears down the session. 205 func (s *session) Destroy(ctx context.Context) { 206 s.client.Close() 207 } 208 209 // Revalidate implements MountSourceOperations.Revalidate. 210 func (s *session) Revalidate(ctx context.Context, name string, parent, child *fs.Inode) bool { 211 return s.cachePolicy.revalidate(ctx, name, parent, child) 212 } 213 214 // Keep implements MountSourceOperations.Keep. 215 func (s *session) Keep(d *fs.Dirent) bool { 216 return s.cachePolicy.keep(d) 217 } 218 219 // CacheReaddir implements MountSourceOperations.CacheReaddir. 220 func (s *session) CacheReaddir() bool { 221 return s.cachePolicy.cacheReaddir() 222 } 223 224 // ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings. 225 func (s *session) ResetInodeMappings() { 226 s.inodeMappings = make(map[uint64]string) 227 } 228 229 // SaveInodeMapping implements fs.MountSourceOperations.SaveInodeMapping. 230 func (s *session) SaveInodeMapping(inode *fs.Inode, path string) { 231 // This is very unintuitive. We *CANNOT* trust the inode's StableAttrs, 232 // because overlay copyUp may have changed them out from under us. 233 // So much for "immutable". 234 switch iops := inode.InodeOperations.(type) { 235 case *inodeOperations: 236 s.inodeMappings[iops.fileState.sattr.InodeID] = path 237 case *fifo: 238 s.inodeMappings[iops.fileIops.fileState.sattr.InodeID] = path 239 default: 240 panic(fmt.Sprintf("Invalid type: %T", iops)) 241 } 242 } 243 244 // newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File 245 // and attributes (p9.QID, p9.AttrMask, p9.Attr). 246 // 247 // Endpoints lock must not be held if socket == false. 248 func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr) (fs.StableAttr, *inodeOperations) { 249 deviceKey := device.MultiDeviceKey{ 250 Device: attr.RDev, 251 SecondaryDevice: s.connID, 252 Inode: qid.Path, 253 } 254 255 sattr := fs.StableAttr{ 256 Type: ntype(attr), 257 DeviceID: goferDevice.DeviceID(), 258 InodeID: goferDevice.Map(deviceKey), 259 BlockSize: bsize(attr), 260 } 261 262 if s.overrides != nil && sattr.Type == fs.RegularFile { 263 // If overrides are allowed on this filesystem, check if this file is 264 // supposed to be of a different type, e.g. socket. 265 if t, ok := s.overrides.getType(deviceKey); ok { 266 sattr.Type = t 267 } 268 } 269 270 fileState := &inodeFileState{ 271 s: s, 272 file: file, 273 sattr: sattr, 274 key: deviceKey, 275 } 276 if s.cachePolicy == cacheRemoteRevalidating && fs.IsFile(sattr) { 277 fileState.hostMappable = fsutil.NewHostMappable(fileState) 278 } 279 280 uattr := unstable(ctx, valid, attr, s.mounter, s.client) 281 return sattr, &inodeOperations{ 282 fileState: fileState, 283 cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{ 284 ForcePageCache: s.superBlockFlags.ForcePageCache, 285 LimitHostFDTranslation: s.limitHostFDTranslation, 286 }), 287 } 288 } 289 290 // Root returns the root of a 9p mount. This mount is bound to a 9p server 291 // based on conn. Otherwise configuration parameters are: 292 // 293 // * dev: connection id 294 // * filesystem: the filesystem backing the mount 295 // * superBlockFlags: the mount flags describing general mount options 296 // * opts: parsed 9p mount options 297 func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockFlags fs.MountSourceFlags, o opts) (*fs.Inode, error) { 298 // The mounting EUID/EGID will be cached by this file system. This will 299 // be used to assign ownership to files that the Gofer owns. 300 mounter := fs.FileOwnerFromContext(ctx) 301 302 conn, err := unet.NewSocket(o.fd) 303 if err != nil { 304 return nil, err 305 } 306 307 // Construct the session. 308 s := session{ 309 connID: dev, 310 msize: o.msize, 311 version: o.version, 312 cachePolicy: o.policy, 313 aname: o.aname, 314 superBlockFlags: superBlockFlags, 315 limitHostFDTranslation: o.limitHostFDTranslation, 316 overlayfsStaleRead: o.overlayfsStaleRead, 317 mounter: mounter, 318 } 319 s.EnableLeakCheck("gofer.session") 320 321 if o.privateunixsocket { 322 s.overrides = newOverrideMaps() 323 } 324 325 // Construct the MountSource with the session and superBlockFlags. 326 m := fs.NewMountSource(ctx, &s, filesystem, superBlockFlags) 327 328 // Given that gofer files can consume host FDs, restrict the number 329 // of files that can be held by the cache. 330 m.SetDirentCacheMaxSize(DefaultDirentCacheSize) 331 m.SetDirentCacheLimiter(fs.DirentCacheLimiterFromContext(ctx)) 332 333 // Send the Tversion request. 334 s.client, err = p9.NewClient(conn, s.msize, s.version) 335 if err != nil { 336 // Drop our reference on the session, it needs to be torn down. 337 s.DecRef(ctx) 338 return nil, err 339 } 340 341 // Notify that we're about to call the Gofer and block. 342 ctx.UninterruptibleSleepStart(false) 343 // Send the Tattach request. 344 s.attach.file, err = s.client.Attach(s.aname) 345 ctx.UninterruptibleSleepFinish(false) 346 if err != nil { 347 // Same as above. 348 s.DecRef(ctx) 349 return nil, err 350 } 351 352 qid, valid, attr, err := s.attach.getAttr(ctx, p9.AttrMaskAll()) 353 if err != nil { 354 s.attach.close(ctx) 355 // Same as above, but after we execute the Close request. 356 s.DecRef(ctx) 357 return nil, err 358 } 359 360 sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr) 361 return fs.NewInode(ctx, iops, m, sattr), nil 362 } 363 364 // newOverrideMaps creates a new overrideMaps. 365 func newOverrideMaps() *overrideMaps { 366 return &overrideMaps{ 367 keyMap: make(map[device.MultiDeviceKey]*overrideInfo), 368 pathMap: make(map[*overrideInfo]string), 369 } 370 } 371 372 // fillKeyMap populates key and dirent maps upon restore from saved pathmap. 373 func (s *session) fillKeyMap(ctx context.Context) error { 374 s.overrides.lock() 375 defer s.overrides.unlock() 376 377 for ep, dirPath := range s.overrides.pathMap { 378 _, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath)) 379 if err != nil { 380 return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err) 381 } 382 383 qid, _, attr, err := file.getAttr(ctx, p9.AttrMaskAll()) 384 if err != nil { 385 return fmt.Errorf("failed to get file attributes of %s: %v", dirPath, err) 386 } 387 388 key := device.MultiDeviceKey{ 389 Device: attr.RDev, 390 SecondaryDevice: s.connID, 391 Inode: qid.Path, 392 } 393 394 s.overrides.keyMap[key] = ep 395 } 396 return nil 397 } 398 399 // fillPathMap populates paths for overrides from dirents in direntMap 400 // before save. 401 func (s *session) fillPathMap(ctx context.Context) error { 402 s.overrides.lock() 403 defer s.overrides.unlock() 404 405 for _, endpoint := range s.overrides.keyMap { 406 mountRoot := endpoint.dirent.MountRoot() 407 defer mountRoot.DecRef(ctx) 408 dirPath, _ := endpoint.dirent.FullName(mountRoot) 409 if dirPath == "" { 410 return fmt.Errorf("error getting path from dirent") 411 } 412 s.overrides.pathMap[endpoint] = dirPath 413 } 414 return nil 415 } 416 417 // restoreEndpointMaps recreates and fills the key and dirent maps. 418 func (s *session) restoreEndpointMaps(ctx context.Context) error { 419 // When restoring, only need to create the keyMap because the dirent and path 420 // maps got stored through the save. 421 s.overrides.keyMap = make(map[device.MultiDeviceKey]*overrideInfo) 422 if err := s.fillKeyMap(ctx); err != nil { 423 return fmt.Errorf("failed to insert sockets into endpoint map: %v", err) 424 } 425 426 // Re-create pathMap because it can no longer be trusted as socket paths can 427 // change while process continues to run. Empty pathMap will be re-filled upon 428 // next save. 429 s.overrides.pathMap = make(map[*overrideInfo]string) 430 return nil 431 }