github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/gofer/session.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/SagerNet/gvisor/pkg/context"
    21  	"github.com/SagerNet/gvisor/pkg/p9"
    22  	"github.com/SagerNet/gvisor/pkg/refs"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/device"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    27  	"github.com/SagerNet/gvisor/pkg/sync"
    28  	"github.com/SagerNet/gvisor/pkg/unet"
    29  )
    30  
    31  // DefaultDirentCacheSize is the default dirent cache size for 9P mounts. It can
    32  // be adjusted independently from the other dirent caches.
    33  var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize
    34  
    35  // +stateify savable
    36  type overrideInfo struct {
    37  	dirent *fs.Dirent
    38  
    39  	// endpoint is set when dirent points to a socket. inode must not be set.
    40  	endpoint transport.BoundEndpoint
    41  
    42  	// inode is set when dirent points to a pipe. endpoint must not be set.
    43  	inode *fs.Inode
    44  }
    45  
    46  func (l *overrideInfo) inodeType() fs.InodeType {
    47  	switch {
    48  	case l.endpoint != nil:
    49  		return fs.Socket
    50  	case l.inode != nil:
    51  		return fs.Pipe
    52  	}
    53  	panic("endpoint or node must be set")
    54  }
    55  
    56  // +stateify savable
    57  type overrideMaps struct {
    58  	// mu protexts the keyMap, and the pathMap below.
    59  	mu sync.RWMutex `state:"nosave"`
    60  
    61  	// keyMap links MultiDeviceKeys (containing inode IDs) to their sockets/pipes.
    62  	// It is not stored during save because the inode ID may change upon restore.
    63  	keyMap map[device.MultiDeviceKey]*overrideInfo `state:"nosave"`
    64  
    65  	// pathMap links the sockets/pipes to their paths.
    66  	// It is filled before saving from the direntMap and is stored upon save.
    67  	// Upon restore, this map is used to re-populate the keyMap.
    68  	pathMap map[*overrideInfo]string
    69  }
    70  
    71  // addBoundEndpoint adds the bound endpoint to the map.
    72  // A reference is taken on the dirent argument.
    73  //
    74  // Precondition: maps must have been locked with 'lock'.
    75  func (e *overrideMaps) addBoundEndpoint(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) {
    76  	d.IncRef()
    77  	e.keyMap[key] = &overrideInfo{dirent: d, endpoint: ep}
    78  }
    79  
    80  // addPipe adds the pipe inode to the map.
    81  // A reference is taken on the dirent argument.
    82  //
    83  // Precondition: maps must have been locked with 'lock'.
    84  func (e *overrideMaps) addPipe(key device.MultiDeviceKey, d *fs.Dirent, inode *fs.Inode) {
    85  	d.IncRef()
    86  	e.keyMap[key] = &overrideInfo{dirent: d, inode: inode}
    87  }
    88  
    89  // remove deletes the key from the maps.
    90  //
    91  // Precondition: maps must have been locked with 'lock'.
    92  func (e *overrideMaps) remove(ctx context.Context, key device.MultiDeviceKey) {
    93  	endpoint := e.keyMap[key]
    94  	delete(e.keyMap, key)
    95  	endpoint.dirent.DecRef(ctx)
    96  }
    97  
    98  // lock blocks other addition and removal operations from happening while
    99  // the backing file is being created or deleted. Returns a function that unlocks
   100  // the endpoint map.
   101  // +checklocksacquire:e.mu
   102  func (e *overrideMaps) lock() {
   103  	e.mu.Lock()
   104  }
   105  
   106  // +checklocksrelease:e.mu
   107  func (e *overrideMaps) unlock() {
   108  	e.mu.Unlock()
   109  }
   110  
   111  // getBoundEndpoint returns the bound endpoint mapped to the given key.
   112  //
   113  // Precondition: maps must have been locked.
   114  func (e *overrideMaps) getBoundEndpoint(key device.MultiDeviceKey) transport.BoundEndpoint {
   115  	if v := e.keyMap[key]; v != nil {
   116  		return v.endpoint
   117  	}
   118  	return nil
   119  }
   120  
   121  // getPipe returns the pipe inode mapped to the given key.
   122  //
   123  // Precondition: maps must have been locked.
   124  func (e *overrideMaps) getPipe(key device.MultiDeviceKey) *fs.Inode {
   125  	if v := e.keyMap[key]; v != nil {
   126  		return v.inode
   127  	}
   128  	return nil
   129  }
   130  
   131  // getType returns the inode type if there is a corresponding endpoint for the
   132  // given key. Returns false otherwise.
   133  func (e *overrideMaps) getType(key device.MultiDeviceKey) (fs.InodeType, bool) {
   134  	e.mu.Lock()
   135  	v := e.keyMap[key]
   136  	e.mu.Unlock()
   137  
   138  	if v != nil {
   139  		return v.inodeType(), true
   140  	}
   141  	return 0, false
   142  }
   143  
   144  // session holds state for each 9p session established during sys_mount.
   145  //
   146  // +stateify savable
   147  type session struct {
   148  	refs.AtomicRefCount
   149  
   150  	// msize is the value of the msize mount option, see fs/gofer/fs.go.
   151  	msize uint32 `state:"wait"`
   152  
   153  	// version is the value of the version mount option, see fs/gofer/fs.go.
   154  	version string `state:"wait"`
   155  
   156  	// cachePolicy is the cache policy.
   157  	cachePolicy cachePolicy `state:"wait"`
   158  
   159  	// aname is the value of the aname mount option, see fs/gofer/fs.go.
   160  	aname string `state:"wait"`
   161  
   162  	// The client associated with this session. This will be initialized lazily.
   163  	client *p9.Client `state:"nosave"`
   164  
   165  	// The p9.File pointing to attachName via the client. This will be initialized
   166  	// lazily.
   167  	attach contextFile `state:"nosave"`
   168  
   169  	// Flags provided to the mount.
   170  	superBlockFlags fs.MountSourceFlags `state:"wait"`
   171  
   172  	// limitHostFDTranslation is the value used for
   173  	// CachingInodeOperationsOptions.LimitHostFDTranslation for all
   174  	// CachingInodeOperations created by the session.
   175  	limitHostFDTranslation bool
   176  
   177  	// overlayfsStaleRead when set causes the readonly handle to be invalidated
   178  	// after file is open for write.
   179  	overlayfsStaleRead bool
   180  
   181  	// connID is a unique identifier for the session connection.
   182  	connID string `state:"wait"`
   183  
   184  	// inodeMappings contains mappings of fs.Inodes associated with this session
   185  	// to paths relative to the attach point, where inodeMappings is keyed by
   186  	// Inode.StableAttr.InodeID.
   187  	inodeMappings map[uint64]string `state:"wait"`
   188  
   189  	// mounter is the EUID/EGID that mounted this file system.
   190  	mounter fs.FileOwner `state:"wait"`
   191  
   192  	// overrides is used to map inodes that represent socket/pipes files to their
   193  	// corresponding endpoint/iops. These files are created as regular files in
   194  	// the gofer and their presence in this map indicate that they should indeed
   195  	// be socket/pipe files. This allows unix domain sockets and named pipes to
   196  	// be used with paths that belong to a gofer.
   197  	//
   198  	// There are a few possible races with someone stat'ing the file and another
   199  	// deleting it concurrently, where the file will not be reported as socket
   200  	// file.
   201  	overrides *overrideMaps `state:"wait"`
   202  }
   203  
   204  // Destroy tears down the session.
   205  func (s *session) Destroy(ctx context.Context) {
   206  	s.client.Close()
   207  }
   208  
   209  // Revalidate implements MountSourceOperations.Revalidate.
   210  func (s *session) Revalidate(ctx context.Context, name string, parent, child *fs.Inode) bool {
   211  	return s.cachePolicy.revalidate(ctx, name, parent, child)
   212  }
   213  
   214  // Keep implements MountSourceOperations.Keep.
   215  func (s *session) Keep(d *fs.Dirent) bool {
   216  	return s.cachePolicy.keep(d)
   217  }
   218  
   219  // CacheReaddir implements MountSourceOperations.CacheReaddir.
   220  func (s *session) CacheReaddir() bool {
   221  	return s.cachePolicy.cacheReaddir()
   222  }
   223  
   224  // ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings.
   225  func (s *session) ResetInodeMappings() {
   226  	s.inodeMappings = make(map[uint64]string)
   227  }
   228  
   229  // SaveInodeMapping implements fs.MountSourceOperations.SaveInodeMapping.
   230  func (s *session) SaveInodeMapping(inode *fs.Inode, path string) {
   231  	// This is very unintuitive. We *CANNOT* trust the inode's StableAttrs,
   232  	// because overlay copyUp may have changed them out from under us.
   233  	// So much for "immutable".
   234  	switch iops := inode.InodeOperations.(type) {
   235  	case *inodeOperations:
   236  		s.inodeMappings[iops.fileState.sattr.InodeID] = path
   237  	case *fifo:
   238  		s.inodeMappings[iops.fileIops.fileState.sattr.InodeID] = path
   239  	default:
   240  		panic(fmt.Sprintf("Invalid type: %T", iops))
   241  	}
   242  }
   243  
   244  // newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File
   245  // and attributes (p9.QID, p9.AttrMask, p9.Attr).
   246  //
   247  // Endpoints lock must not be held if socket == false.
   248  func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr) (fs.StableAttr, *inodeOperations) {
   249  	deviceKey := device.MultiDeviceKey{
   250  		Device:          attr.RDev,
   251  		SecondaryDevice: s.connID,
   252  		Inode:           qid.Path,
   253  	}
   254  
   255  	sattr := fs.StableAttr{
   256  		Type:      ntype(attr),
   257  		DeviceID:  goferDevice.DeviceID(),
   258  		InodeID:   goferDevice.Map(deviceKey),
   259  		BlockSize: bsize(attr),
   260  	}
   261  
   262  	if s.overrides != nil && sattr.Type == fs.RegularFile {
   263  		// If overrides are allowed on this filesystem, check if this file is
   264  		// supposed to be of a different type, e.g. socket.
   265  		if t, ok := s.overrides.getType(deviceKey); ok {
   266  			sattr.Type = t
   267  		}
   268  	}
   269  
   270  	fileState := &inodeFileState{
   271  		s:     s,
   272  		file:  file,
   273  		sattr: sattr,
   274  		key:   deviceKey,
   275  	}
   276  	if s.cachePolicy == cacheRemoteRevalidating && fs.IsFile(sattr) {
   277  		fileState.hostMappable = fsutil.NewHostMappable(fileState)
   278  	}
   279  
   280  	uattr := unstable(ctx, valid, attr, s.mounter, s.client)
   281  	return sattr, &inodeOperations{
   282  		fileState: fileState,
   283  		cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{
   284  			ForcePageCache:         s.superBlockFlags.ForcePageCache,
   285  			LimitHostFDTranslation: s.limitHostFDTranslation,
   286  		}),
   287  	}
   288  }
   289  
   290  // Root returns the root of a 9p mount. This mount is bound to a 9p server
   291  // based on conn. Otherwise configuration parameters are:
   292  //
   293  // * dev:         connection id
   294  // * filesystem:  the filesystem backing the mount
   295  // * superBlockFlags:  the mount flags describing general mount options
   296  // * opts:        parsed 9p mount options
   297  func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockFlags fs.MountSourceFlags, o opts) (*fs.Inode, error) {
   298  	// The mounting EUID/EGID will be cached by this file system. This will
   299  	// be used to assign ownership to files that the Gofer owns.
   300  	mounter := fs.FileOwnerFromContext(ctx)
   301  
   302  	conn, err := unet.NewSocket(o.fd)
   303  	if err != nil {
   304  		return nil, err
   305  	}
   306  
   307  	// Construct the session.
   308  	s := session{
   309  		connID:                 dev,
   310  		msize:                  o.msize,
   311  		version:                o.version,
   312  		cachePolicy:            o.policy,
   313  		aname:                  o.aname,
   314  		superBlockFlags:        superBlockFlags,
   315  		limitHostFDTranslation: o.limitHostFDTranslation,
   316  		overlayfsStaleRead:     o.overlayfsStaleRead,
   317  		mounter:                mounter,
   318  	}
   319  	s.EnableLeakCheck("gofer.session")
   320  
   321  	if o.privateunixsocket {
   322  		s.overrides = newOverrideMaps()
   323  	}
   324  
   325  	// Construct the MountSource with the session and superBlockFlags.
   326  	m := fs.NewMountSource(ctx, &s, filesystem, superBlockFlags)
   327  
   328  	// Given that gofer files can consume host FDs, restrict the number
   329  	// of files that can be held by the cache.
   330  	m.SetDirentCacheMaxSize(DefaultDirentCacheSize)
   331  	m.SetDirentCacheLimiter(fs.DirentCacheLimiterFromContext(ctx))
   332  
   333  	// Send the Tversion request.
   334  	s.client, err = p9.NewClient(conn, s.msize, s.version)
   335  	if err != nil {
   336  		// Drop our reference on the session, it needs to be torn down.
   337  		s.DecRef(ctx)
   338  		return nil, err
   339  	}
   340  
   341  	// Notify that we're about to call the Gofer and block.
   342  	ctx.UninterruptibleSleepStart(false)
   343  	// Send the Tattach request.
   344  	s.attach.file, err = s.client.Attach(s.aname)
   345  	ctx.UninterruptibleSleepFinish(false)
   346  	if err != nil {
   347  		// Same as above.
   348  		s.DecRef(ctx)
   349  		return nil, err
   350  	}
   351  
   352  	qid, valid, attr, err := s.attach.getAttr(ctx, p9.AttrMaskAll())
   353  	if err != nil {
   354  		s.attach.close(ctx)
   355  		// Same as above, but after we execute the Close request.
   356  		s.DecRef(ctx)
   357  		return nil, err
   358  	}
   359  
   360  	sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr)
   361  	return fs.NewInode(ctx, iops, m, sattr), nil
   362  }
   363  
   364  // newOverrideMaps creates a new overrideMaps.
   365  func newOverrideMaps() *overrideMaps {
   366  	return &overrideMaps{
   367  		keyMap:  make(map[device.MultiDeviceKey]*overrideInfo),
   368  		pathMap: make(map[*overrideInfo]string),
   369  	}
   370  }
   371  
   372  // fillKeyMap populates key and dirent maps upon restore from saved pathmap.
   373  func (s *session) fillKeyMap(ctx context.Context) error {
   374  	s.overrides.lock()
   375  	defer s.overrides.unlock()
   376  
   377  	for ep, dirPath := range s.overrides.pathMap {
   378  		_, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath))
   379  		if err != nil {
   380  			return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err)
   381  		}
   382  
   383  		qid, _, attr, err := file.getAttr(ctx, p9.AttrMaskAll())
   384  		if err != nil {
   385  			return fmt.Errorf("failed to get file attributes of %s: %v", dirPath, err)
   386  		}
   387  
   388  		key := device.MultiDeviceKey{
   389  			Device:          attr.RDev,
   390  			SecondaryDevice: s.connID,
   391  			Inode:           qid.Path,
   392  		}
   393  
   394  		s.overrides.keyMap[key] = ep
   395  	}
   396  	return nil
   397  }
   398  
   399  // fillPathMap populates paths for overrides from dirents in direntMap
   400  // before save.
   401  func (s *session) fillPathMap(ctx context.Context) error {
   402  	s.overrides.lock()
   403  	defer s.overrides.unlock()
   404  
   405  	for _, endpoint := range s.overrides.keyMap {
   406  		mountRoot := endpoint.dirent.MountRoot()
   407  		defer mountRoot.DecRef(ctx)
   408  		dirPath, _ := endpoint.dirent.FullName(mountRoot)
   409  		if dirPath == "" {
   410  			return fmt.Errorf("error getting path from dirent")
   411  		}
   412  		s.overrides.pathMap[endpoint] = dirPath
   413  	}
   414  	return nil
   415  }
   416  
   417  // restoreEndpointMaps recreates and fills the key and dirent maps.
   418  func (s *session) restoreEndpointMaps(ctx context.Context) error {
   419  	// When restoring, only need to create the keyMap because the dirent and path
   420  	// maps got stored through the save.
   421  	s.overrides.keyMap = make(map[device.MultiDeviceKey]*overrideInfo)
   422  	if err := s.fillKeyMap(ctx); err != nil {
   423  		return fmt.Errorf("failed to insert sockets into endpoint map: %v", err)
   424  	}
   425  
   426  	// Re-create pathMap because it can no longer be trusted as socket paths can
   427  	// change while process continues to run. Empty pathMap will be re-filled upon
   428  	// next save.
   429  	s.overrides.pathMap = make(map[*overrideInfo]string)
   430  	return nil
   431  }