github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/fuse/connection.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	goContext "context"
    19  	"sync"
    20  
    21  	"github.com/metacubex/gvisor/pkg/abi/linux"
    22  	"github.com/metacubex/gvisor/pkg/atomicbitops"
    23  	"github.com/metacubex/gvisor/pkg/context"
    24  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    25  	"github.com/metacubex/gvisor/pkg/log"
    26  	"github.com/metacubex/gvisor/pkg/waiter"
    27  )
    28  
    29  const (
    30  	// fuseDefaultMaxBackground is the default value for MaxBackground.
    31  	fuseDefaultMaxBackground = 12
    32  
    33  	// fuseDefaultCongestionThreshold is the default value for CongestionThreshold,
    34  	// and is 75% of the default maximum of MaxGround.
    35  	fuseDefaultCongestionThreshold = (fuseDefaultMaxBackground * 3 / 4)
    36  
    37  	// fuseDefaultMaxPagesPerReq is the default value for MaxPagesPerReq.
    38  	fuseDefaultMaxPagesPerReq = 32
    39  )
    40  
    41  // connection is the struct by which the sentry communicates with the FUSE server daemon.
    42  //
    43  // Lock order:
    44  //   - conn.fd.mu
    45  //   - conn.mu
    46  //   - conn.asyncMu
    47  //
    48  // +stateify savable
    49  type connection struct {
    50  	fd *DeviceFD
    51  
    52  	// mu protects access to struct members.
    53  	mu sync.Mutex `state:"nosave"`
    54  
    55  	// attributeVersion is the version of connection's attributes.
    56  	attributeVersion atomicbitops.Uint64
    57  
    58  	// We target FUSE 7.23.
    59  	// The following FUSE_INIT flags are currently unsupported by this implementation:
    60  	//	- FUSE_EXPORT_SUPPORT
    61  	//	- FUSE_POSIX_LOCKS: requires POSIX locks
    62  	//	- FUSE_FLOCK_LOCKS: requires POSIX locks
    63  	//	- FUSE_AUTO_INVAL_DATA: requires page caching eviction
    64  	//	- FUSE_DO_READDIRPLUS/FUSE_READDIRPLUS_AUTO: requires FUSE_READDIRPLUS implementation
    65  	//	- FUSE_ASYNC_DIO
    66  	//	- FUSE_PARALLEL_DIROPS (7.25)
    67  	//	- FUSE_HANDLE_KILLPRIV (7.26)
    68  	//	- FUSE_POSIX_ACL: affects defaultPermissions, posixACL, xattr handler (7.26)
    69  	//	- FUSE_ABORT_ERROR (7.27)
    70  	//	- FUSE_CACHE_SYMLINKS (7.28)
    71  	//	- FUSE_NO_OPENDIR_SUPPORT (7.29)
    72  	//	- FUSE_EXPLICIT_INVAL_DATA: requires page caching eviction (7.30)
    73  	//	- FUSE_MAP_ALIGNMENT (7.31)
    74  
    75  	// initialized after receiving FUSE_INIT reply.
    76  	// Until it's set, suspend sending FUSE requests.
    77  	// Use SetInitialized() and IsInitialized() for atomic access.
    78  	initialized atomicbitops.Int32
    79  
    80  	// initializedChan is used to block requests before initialization.
    81  	initializedChan chan struct{} `state:".(bool)"`
    82  
    83  	// connected (connection established) when a new FUSE file system is created.
    84  	// Set to false when:
    85  	//   umount,
    86  	//   connection abort,
    87  	//   device release.
    88  	// +checklocks:mu
    89  	connected bool
    90  
    91  	// connInitError if FUSE_INIT encountered error (major version mismatch).
    92  	// Only set in INIT.
    93  	// +checklocks:mu
    94  	connInitError bool
    95  
    96  	// connInitSuccess if FUSE_INIT is successful.
    97  	// Only set in INIT.
    98  	// Used for destroy (not yet implemented).
    99  	// +checklocks:mu
   100  	connInitSuccess bool
   101  
   102  	// aborted via sysfs, and will send ECONNABORTED to read after disconnection (instead of ENODEV).
   103  	// Set only if abortErr is true and via fuse control fs (not yet implemented).
   104  	// TODO(gvisor.dev/issue/3525): set this to true when user aborts.
   105  	aborted bool
   106  
   107  	// numWaiting is the number of requests waiting to be
   108  	// sent to FUSE device or being processed by FUSE daemon.
   109  	numWaiting uint32
   110  
   111  	// Terminology note:
   112  	//
   113  	//	- `asyncNumMax` is the `MaxBackground` in the FUSE_INIT_IN struct.
   114  	//
   115  	//	- `asyncCongestionThreshold` is the `CongestionThreshold` in the FUSE_INIT_IN struct.
   116  	//
   117  	// We call the "background" requests in unix term as async requests.
   118  	// The "async requests" in unix term is our async requests that expect a reply,
   119  	// i.e. `!request.noReply`
   120  
   121  	// asyncMu protects the async request fields.
   122  	asyncMu sync.Mutex `state:"nosave"`
   123  
   124  	// asyncNum is the number of async requests.
   125  	// +checklocks:asyncMu
   126  	asyncNum uint16
   127  
   128  	// asyncCongestionThreshold the number of async requests.
   129  	// Negotiated in FUSE_INIT as "CongestionThreshold".
   130  	// TODO(gvisor.dev/issue/3529): add congestion control.
   131  	// +checklocks:asyncMu
   132  	asyncCongestionThreshold uint16
   133  
   134  	// asyncNumMax is the maximum number of asyncNum.
   135  	// Connection blocks the async requests when it is reached.
   136  	// Negotiated in FUSE_INIT as "MaxBackground".
   137  	// +checklocks:asyncMu
   138  	asyncNumMax uint16
   139  
   140  	// maxRead is the maximum size of a read buffer in in bytes.
   141  	// Initialized from a fuse fs parameter.
   142  	maxRead uint32
   143  
   144  	// maxWrite is the maximum size of a write buffer in bytes.
   145  	// Negotiated in FUSE_INIT.
   146  	maxWrite uint32
   147  
   148  	// maxPages is the maximum number of pages for a single request to use.
   149  	// Negotiated in FUSE_INIT.
   150  	maxPages uint16
   151  
   152  	// maxActiveRequests specifies the maximum number of active requests that can
   153  	// exist at any time. Any further requests will block when trying to CAll
   154  	// the server.
   155  	maxActiveRequests uint64
   156  
   157  	// minor version of the FUSE protocol.
   158  	// Negotiated and only set in INIT.
   159  	minor uint32
   160  
   161  	// atomicOTrunc is true when FUSE does not send a separate SETATTR request
   162  	// before open with O_TRUNC flag.
   163  	// Negotiated and only set in INIT.
   164  	atomicOTrunc bool
   165  
   166  	// asyncRead if read pages asynchronously.
   167  	// Negotiated and only set in INIT.
   168  	asyncRead bool
   169  
   170  	// writebackCache is true for write-back cache policy,
   171  	// false for write-through policy.
   172  	// Negotiated and only set in INIT.
   173  	writebackCache bool
   174  
   175  	// bigWrites if doing multi-page cached writes.
   176  	// Negotiated and only set in INIT.
   177  	bigWrites bool
   178  
   179  	// dontMask if filesystem does not apply umask to creation modes.
   180  	// Negotiated in INIT.
   181  	dontMask bool
   182  
   183  	// noOpen if FUSE server doesn't support open operation.
   184  	// This flag only influences performance, not correctness of the program.
   185  	noOpen bool
   186  }
   187  
   188  func (conn *connection) saveInitializedChan() bool {
   189  	select {
   190  	case <-conn.initializedChan:
   191  		return true // Closed.
   192  	default:
   193  		return false // Not closed.
   194  	}
   195  }
   196  
   197  func (conn *connection) loadInitializedChan(_ goContext.Context, closed bool) {
   198  	conn.initializedChan = make(chan struct{}, 1)
   199  	if closed {
   200  		close(conn.initializedChan)
   201  	}
   202  }
   203  
   204  // newFUSEConnection creates a FUSE connection to fuseFD.
   205  // +checklocks:fuseFD.mu
   206  func newFUSEConnection(_ context.Context, fuseFD *DeviceFD, opts *filesystemOptions) (*connection, error) {
   207  	// Mark the device as ready so it can be used.
   208  	// FIXME(gvisor.dev/issue/4813): fuseFD's fields are accessed without
   209  	// synchronization and without checking if fuseFD has already been used to
   210  	// mount another filesystem.
   211  
   212  	// Create the writeBuf for the header to be stored in.
   213  	fuseFD.completions = make(map[linux.FUSEOpID]*futureResponse)
   214  	fuseFD.fullQueueCh = make(chan struct{}, opts.maxActiveRequests)
   215  
   216  	return &connection{
   217  		fd:                       fuseFD,
   218  		asyncNumMax:              fuseDefaultMaxBackground,
   219  		asyncCongestionThreshold: fuseDefaultCongestionThreshold,
   220  		maxRead:                  opts.maxRead,
   221  		maxPages:                 fuseDefaultMaxPagesPerReq,
   222  		maxActiveRequests:        opts.maxActiveRequests,
   223  		initializedChan:          make(chan struct{}),
   224  		connected:                true,
   225  	}, nil
   226  }
   227  
   228  // CallAsync makes an async (aka background) request.
   229  // It's a simple wrapper around Call().
   230  func (conn *connection) CallAsync(ctx context.Context, r *Request) error {
   231  	r.async = true
   232  	_, err := conn.Call(ctx, r)
   233  	return err
   234  }
   235  
   236  // Call makes a request to the server.
   237  // Block before the connection is initialized.
   238  // When the Request is FUSE_INIT, it will not be blocked before initialization.
   239  // Task should never be nil.
   240  //
   241  // For a sync request, it blocks the invoking task until
   242  // a server responds with a response.
   243  //
   244  // For an async request (that do not expect a response immediately),
   245  // it returns directly unless being blocked either before initialization
   246  // or when there are too many async requests ongoing.
   247  //
   248  // Example for async request:
   249  // init, readahead, write, async read/write, fuse_notify_reply,
   250  // non-sync release, interrupt, forget.
   251  //
   252  // The forget request does not have a reply,
   253  // as documented in include/uapi/linux/fuse.h:FUSE_FORGET.
   254  func (conn *connection) Call(ctx context.Context, r *Request) (*Response, error) {
   255  	b := blockerFromContext(ctx)
   256  	// Block requests sent before connection is initialized.
   257  	if !conn.Initialized() && r.hdr.Opcode != linux.FUSE_INIT {
   258  		if err := b.Block(conn.initializedChan); err != nil {
   259  			return nil, err
   260  		}
   261  	}
   262  
   263  	conn.fd.mu.Lock()
   264  	conn.mu.Lock()
   265  	connected := conn.connected
   266  	connInitError := conn.connInitError
   267  	conn.mu.Unlock()
   268  
   269  	if !connected {
   270  		conn.fd.mu.Unlock()
   271  		return nil, linuxerr.ENOTCONN
   272  	}
   273  
   274  	if connInitError {
   275  		conn.fd.mu.Unlock()
   276  		return nil, linuxerr.ECONNREFUSED
   277  	}
   278  
   279  	fut, err := conn.callFuture(b, r)
   280  	conn.fd.mu.Unlock()
   281  	if err != nil {
   282  		return nil, err
   283  	}
   284  
   285  	return fut.resolve(b)
   286  }
   287  
   288  // callFuture makes a request to the server and returns a future response.
   289  // Call resolve() when the response needs to be fulfilled.
   290  // +checklocks:conn.fd.mu
   291  func (conn *connection) callFuture(b context.Blocker, r *Request) (*futureResponse, error) {
   292  	// Is the queue full?
   293  	//
   294  	// We must busy wait here until the request can be queued. We don't
   295  	// block on the fd.fullQueueCh with a lock - so after being signalled,
   296  	// before we acquire the lock, it is possible that a barging task enters
   297  	// and queues a request. As a result, upon acquiring the lock we must
   298  	// again check if the room is available.
   299  	//
   300  	// This can potentially starve a request forever but this can only happen
   301  	// if there are always too many ongoing requests all the time. The
   302  	// supported maxActiveRequests setting should be really high to avoid this.
   303  	for conn.fd.numActiveRequests == conn.maxActiveRequests {
   304  		log.Infof("Blocking request %v from being queued. Too many active requests: %v",
   305  			r.id, conn.fd.numActiveRequests)
   306  		conn.fd.mu.Unlock()
   307  		err := b.Block(conn.fd.fullQueueCh)
   308  		conn.fd.mu.Lock()
   309  		if err != nil {
   310  			return nil, err
   311  		}
   312  	}
   313  
   314  	return conn.callFutureLocked(r)
   315  }
   316  
   317  // callFutureLocked makes a request to the server and returns a future response.
   318  // +checklocks:conn.fd.mu
   319  func (conn *connection) callFutureLocked(r *Request) (*futureResponse, error) {
   320  	// Check connected again holding conn.mu.
   321  	conn.mu.Lock()
   322  	if !conn.connected {
   323  		conn.mu.Unlock()
   324  		// we checked connected before,
   325  		// this must be due to aborted connection.
   326  		return nil, linuxerr.ECONNABORTED
   327  	}
   328  	conn.mu.Unlock()
   329  
   330  	conn.fd.queue.PushBack(r)
   331  	conn.fd.numActiveRequests++
   332  	fut := newFutureResponse(r)
   333  	conn.fd.completions[r.id] = fut
   334  
   335  	// Signal the readers that there is something to read.
   336  	conn.fd.waitQueue.Notify(waiter.ReadableEvents)
   337  
   338  	return fut, nil
   339  }