github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/fuse/connection.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	"sync"
    19  
    20  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    21  	"github.com/SagerNet/gvisor/pkg/context"
    22  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    23  	"github.com/SagerNet/gvisor/pkg/log"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    25  	"github.com/SagerNet/gvisor/pkg/waiter"
    26  )
    27  
    28  const (
    29  	// fuseDefaultMaxBackground is the default value for MaxBackground.
    30  	fuseDefaultMaxBackground = 12
    31  
    32  	// fuseDefaultCongestionThreshold is the default value for CongestionThreshold,
    33  	// and is 75% of the default maximum of MaxGround.
    34  	fuseDefaultCongestionThreshold = (fuseDefaultMaxBackground * 3 / 4)
    35  
    36  	// fuseDefaultMaxPagesPerReq is the default value for MaxPagesPerReq.
    37  	fuseDefaultMaxPagesPerReq = 32
    38  )
    39  
    40  // connection is the struct by which the sentry communicates with the FUSE server daemon.
    41  //
    42  // Lock order:
    43  // - conn.fd.mu
    44  // - conn.mu
    45  // - conn.asyncMu
    46  //
    47  // +stateify savable
    48  type connection struct {
    49  	fd *DeviceFD
    50  
    51  	// mu protects access to struct memebers.
    52  	mu sync.Mutex `state:"nosave"`
    53  
    54  	// attributeVersion is the version of connection's attributes.
    55  	attributeVersion uint64
    56  
    57  	// We target FUSE 7.23.
    58  	// The following FUSE_INIT flags are currently unsupported by this implementation:
    59  	// - FUSE_EXPORT_SUPPORT
    60  	// - FUSE_POSIX_LOCKS: requires POSIX locks
    61  	// - FUSE_FLOCK_LOCKS: requires POSIX locks
    62  	// - FUSE_AUTO_INVAL_DATA: requires page caching eviction
    63  	// - FUSE_DO_READDIRPLUS/FUSE_READDIRPLUS_AUTO: requires FUSE_READDIRPLUS implementation
    64  	// - FUSE_ASYNC_DIO
    65  	// - FUSE_PARALLEL_DIROPS (7.25)
    66  	// - FUSE_HANDLE_KILLPRIV (7.26)
    67  	// - FUSE_POSIX_ACL: affects defaultPermissions, posixACL, xattr handler (7.26)
    68  	// - FUSE_ABORT_ERROR (7.27)
    69  	// - FUSE_CACHE_SYMLINKS (7.28)
    70  	// - FUSE_NO_OPENDIR_SUPPORT (7.29)
    71  	// - FUSE_EXPLICIT_INVAL_DATA: requires page caching eviction (7.30)
    72  	// - FUSE_MAP_ALIGNMENT (7.31)
    73  
    74  	// initialized after receiving FUSE_INIT reply.
    75  	// Until it's set, suspend sending FUSE requests.
    76  	// Use SetInitialized() and IsInitialized() for atomic access.
    77  	initialized int32
    78  
    79  	// initializedChan is used to block requests before initialization.
    80  	initializedChan chan struct{} `state:".(bool)"`
    81  
    82  	// connected (connection established) when a new FUSE file system is created.
    83  	// Set to false when:
    84  	//   umount,
    85  	//   connection abort,
    86  	//   device release.
    87  	connected bool
    88  
    89  	// connInitError if FUSE_INIT encountered error (major version mismatch).
    90  	// Only set in INIT.
    91  	connInitError bool
    92  
    93  	// connInitSuccess if FUSE_INIT is successful.
    94  	// Only set in INIT.
    95  	// Used for destory (not yet implemented).
    96  	connInitSuccess bool
    97  
    98  	// aborted via sysfs, and will send ECONNABORTED to read after disconnection (instead of ENODEV).
    99  	// Set only if abortErr is true and via fuse control fs (not yet implemented).
   100  	// TODO(github.com/SagerNet/issue/3525): set this to true when user aborts.
   101  	aborted bool
   102  
   103  	// numWating is the number of requests waiting to be
   104  	// sent to FUSE device or being processed by FUSE daemon.
   105  	numWaiting uint32
   106  
   107  	// Terminology note:
   108  	//
   109  	// - `asyncNumMax` is the `MaxBackground` in the FUSE_INIT_IN struct.
   110  	//
   111  	// - `asyncCongestionThreshold` is the `CongestionThreshold` in the FUSE_INIT_IN struct.
   112  	//
   113  	// We call the "background" requests in unix term as async requests.
   114  	// The "async requests" in unix term is our async requests that expect a reply,
   115  	// i.e. `!request.noReply`
   116  
   117  	// asyncMu protects the async request fields.
   118  	asyncMu sync.Mutex `state:"nosave"`
   119  
   120  	// asyncNum is the number of async requests.
   121  	// Protected by asyncMu.
   122  	asyncNum uint16
   123  
   124  	// asyncCongestionThreshold the number of async requests.
   125  	// Negotiated in FUSE_INIT as "CongestionThreshold".
   126  	// TODO(github.com/SagerNet/issue/3529): add congestion control.
   127  	// Protected by asyncMu.
   128  	asyncCongestionThreshold uint16
   129  
   130  	// asyncNumMax is the maximum number of asyncNum.
   131  	// Connection blocks the async requests when it is reached.
   132  	// Negotiated in FUSE_INIT as "MaxBackground".
   133  	// Protected by asyncMu.
   134  	asyncNumMax uint16
   135  
   136  	// maxRead is the maximum size of a read buffer in in bytes.
   137  	// Initialized from a fuse fs parameter.
   138  	maxRead uint32
   139  
   140  	// maxWrite is the maximum size of a write buffer in bytes.
   141  	// Negotiated in FUSE_INIT.
   142  	maxWrite uint32
   143  
   144  	// maxPages is the maximum number of pages for a single request to use.
   145  	// Negotiated in FUSE_INIT.
   146  	maxPages uint16
   147  
   148  	// minor version of the FUSE protocol.
   149  	// Negotiated and only set in INIT.
   150  	minor uint32
   151  
   152  	// atomicOTrunc is true when FUSE does not send a separate SETATTR request
   153  	// before open with O_TRUNC flag.
   154  	// Negotiated and only set in INIT.
   155  	atomicOTrunc bool
   156  
   157  	// asyncRead if read pages asynchronously.
   158  	// Negotiated and only set in INIT.
   159  	asyncRead bool
   160  
   161  	// writebackCache is true for write-back cache policy,
   162  	// false for write-through policy.
   163  	// Negotiated and only set in INIT.
   164  	writebackCache bool
   165  
   166  	// bigWrites if doing multi-page cached writes.
   167  	// Negotiated and only set in INIT.
   168  	bigWrites bool
   169  
   170  	// dontMask if filestestem does not apply umask to creation modes.
   171  	// Negotiated in INIT.
   172  	dontMask bool
   173  
   174  	// noOpen if FUSE server doesn't support open operation.
   175  	// This flag only influence performance, not correctness of the program.
   176  	noOpen bool
   177  }
   178  
   179  func (conn *connection) saveInitializedChan() bool {
   180  	select {
   181  	case <-conn.initializedChan:
   182  		return true // Closed.
   183  	default:
   184  		return false // Not closed.
   185  	}
   186  }
   187  
   188  func (conn *connection) loadInitializedChan(closed bool) {
   189  	conn.initializedChan = make(chan struct{}, 1)
   190  	if closed {
   191  		close(conn.initializedChan)
   192  	}
   193  }
   194  
   195  // newFUSEConnection creates a FUSE connection to fuseFD.
   196  func newFUSEConnection(_ context.Context, fuseFD *DeviceFD, opts *filesystemOptions) (*connection, error) {
   197  	// Mark the device as ready so it can be used.
   198  	// FIXME(github.com/SagerNet/issue/4813): fuseFD's fields are accessed without
   199  	// synchronization and without checking if fuseFD has already been used to
   200  	// mount another filesystem.
   201  
   202  	// Create the writeBuf for the header to be stored in.
   203  	hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
   204  	fuseFD.writeBuf = make([]byte, hdrLen)
   205  	fuseFD.completions = make(map[linux.FUSEOpID]*futureResponse)
   206  	fuseFD.fullQueueCh = make(chan struct{}, opts.maxActiveRequests)
   207  	fuseFD.writeCursor = 0
   208  
   209  	return &connection{
   210  		fd:                       fuseFD,
   211  		asyncNumMax:              fuseDefaultMaxBackground,
   212  		asyncCongestionThreshold: fuseDefaultCongestionThreshold,
   213  		maxRead:                  opts.maxRead,
   214  		maxPages:                 fuseDefaultMaxPagesPerReq,
   215  		initializedChan:          make(chan struct{}),
   216  		connected:                true,
   217  	}, nil
   218  }
   219  
   220  // CallAsync makes an async (aka background) request.
   221  // It's a simple wrapper around Call().
   222  func (conn *connection) CallAsync(t *kernel.Task, r *Request) error {
   223  	r.async = true
   224  	_, err := conn.Call(t, r)
   225  	return err
   226  }
   227  
   228  // Call makes a request to the server.
   229  // Block before the connection is initialized.
   230  // When the Request is FUSE_INIT, it will not be blocked before initialization.
   231  // Task should never be nil.
   232  //
   233  // For a sync request, it blocks the invoking task until
   234  // a server responds with a response.
   235  //
   236  // For an async request (that do not expect a response immediately),
   237  // it returns directly unless being blocked either before initialization
   238  // or when there are too many async requests ongoing.
   239  //
   240  // Example for async request:
   241  // init, readahead, write, async read/write, fuse_notify_reply,
   242  // non-sync release, interrupt, forget.
   243  //
   244  // The forget request does not have a reply,
   245  // as documented in include/uapi/linux/fuse.h:FUSE_FORGET.
   246  func (conn *connection) Call(t *kernel.Task, r *Request) (*Response, error) {
   247  	// Block requests sent before connection is initalized.
   248  	if !conn.Initialized() && r.hdr.Opcode != linux.FUSE_INIT {
   249  		if err := t.Block(conn.initializedChan); err != nil {
   250  			return nil, err
   251  		}
   252  	}
   253  
   254  	if !conn.connected {
   255  		return nil, linuxerr.ENOTCONN
   256  	}
   257  
   258  	if conn.connInitError {
   259  		return nil, linuxerr.ECONNREFUSED
   260  	}
   261  
   262  	fut, err := conn.callFuture(t, r)
   263  	if err != nil {
   264  		return nil, err
   265  	}
   266  
   267  	return fut.resolve(t)
   268  }
   269  
   270  // callFuture makes a request to the server and returns a future response.
   271  // Call resolve() when the response needs to be fulfilled.
   272  func (conn *connection) callFuture(t *kernel.Task, r *Request) (*futureResponse, error) {
   273  	conn.fd.mu.Lock()
   274  	defer conn.fd.mu.Unlock()
   275  
   276  	// Is the queue full?
   277  	//
   278  	// We must busy wait here until the request can be queued. We don't
   279  	// block on the fd.fullQueueCh with a lock - so after being signalled,
   280  	// before we acquire the lock, it is possible that a barging task enters
   281  	// and queues a request. As a result, upon acquiring the lock we must
   282  	// again check if the room is available.
   283  	//
   284  	// This can potentially starve a request forever but this can only happen
   285  	// if there are always too many ongoing requests all the time. The
   286  	// supported maxActiveRequests setting should be really high to avoid this.
   287  	for conn.fd.numActiveRequests == conn.fd.fs.opts.maxActiveRequests {
   288  		log.Infof("Blocking request %v from being queued. Too many active requests: %v",
   289  			r.id, conn.fd.numActiveRequests)
   290  		conn.fd.mu.Unlock()
   291  		err := t.Block(conn.fd.fullQueueCh)
   292  		conn.fd.mu.Lock()
   293  		if err != nil {
   294  			return nil, err
   295  		}
   296  	}
   297  
   298  	return conn.callFutureLocked(t, r)
   299  }
   300  
   301  // callFutureLocked makes a request to the server and returns a future response.
   302  func (conn *connection) callFutureLocked(t *kernel.Task, r *Request) (*futureResponse, error) {
   303  	// Check connected again holding conn.mu.
   304  	conn.mu.Lock()
   305  	if !conn.connected {
   306  		conn.mu.Unlock()
   307  		// we checked connected before,
   308  		// this must be due to aborted connection.
   309  		return nil, linuxerr.ECONNABORTED
   310  	}
   311  	conn.mu.Unlock()
   312  
   313  	conn.fd.queue.PushBack(r)
   314  	conn.fd.numActiveRequests++
   315  	fut := newFutureResponse(r)
   316  	conn.fd.completions[r.id] = fut
   317  
   318  	// Signal the readers that there is something to read.
   319  	conn.fd.waitQueue.Notify(waiter.ReadableEvents)
   320  
   321  	return fut, nil
   322  }