github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/fsimpl/fuse/connection.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	"sync"
    19  
    20  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    21  	"github.com/MerlinKodo/gvisor/pkg/atomicbitops"
    22  	"github.com/MerlinKodo/gvisor/pkg/context"
    23  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    24  	"github.com/MerlinKodo/gvisor/pkg/log"
    25  	"github.com/MerlinKodo/gvisor/pkg/waiter"
    26  )
    27  
    28  const (
    29  	// fuseDefaultMaxBackground is the default value for MaxBackground.
    30  	fuseDefaultMaxBackground = 12
    31  
    32  	// fuseDefaultCongestionThreshold is the default value for CongestionThreshold,
    33  	// and is 75% of the default maximum of MaxGround.
    34  	fuseDefaultCongestionThreshold = (fuseDefaultMaxBackground * 3 / 4)
    35  
    36  	// fuseDefaultMaxPagesPerReq is the default value for MaxPagesPerReq.
    37  	fuseDefaultMaxPagesPerReq = 32
    38  )
    39  
    40  // connection is the struct by which the sentry communicates with the FUSE server daemon.
    41  //
    42  // Lock order:
    43  //   - conn.fd.mu
    44  //   - conn.mu
    45  //   - conn.asyncMu
    46  //
    47  // +stateify savable
    48  type connection struct {
    49  	fd *DeviceFD
    50  
    51  	// mu protects access to struct members.
    52  	mu sync.Mutex `state:"nosave"`
    53  
    54  	// attributeVersion is the version of connection's attributes.
    55  	attributeVersion atomicbitops.Uint64
    56  
    57  	// We target FUSE 7.23.
    58  	// The following FUSE_INIT flags are currently unsupported by this implementation:
    59  	//	- FUSE_EXPORT_SUPPORT
    60  	//	- FUSE_POSIX_LOCKS: requires POSIX locks
    61  	//	- FUSE_FLOCK_LOCKS: requires POSIX locks
    62  	//	- FUSE_AUTO_INVAL_DATA: requires page caching eviction
    63  	//	- FUSE_DO_READDIRPLUS/FUSE_READDIRPLUS_AUTO: requires FUSE_READDIRPLUS implementation
    64  	//	- FUSE_ASYNC_DIO
    65  	//	- FUSE_PARALLEL_DIROPS (7.25)
    66  	//	- FUSE_HANDLE_KILLPRIV (7.26)
    67  	//	- FUSE_POSIX_ACL: affects defaultPermissions, posixACL, xattr handler (7.26)
    68  	//	- FUSE_ABORT_ERROR (7.27)
    69  	//	- FUSE_CACHE_SYMLINKS (7.28)
    70  	//	- FUSE_NO_OPENDIR_SUPPORT (7.29)
    71  	//	- FUSE_EXPLICIT_INVAL_DATA: requires page caching eviction (7.30)
    72  	//	- FUSE_MAP_ALIGNMENT (7.31)
    73  
    74  	// initialized after receiving FUSE_INIT reply.
    75  	// Until it's set, suspend sending FUSE requests.
    76  	// Use SetInitialized() and IsInitialized() for atomic access.
    77  	initialized atomicbitops.Int32
    78  
    79  	// initializedChan is used to block requests before initialization.
    80  	initializedChan chan struct{} `state:".(bool)"`
    81  
    82  	// connected (connection established) when a new FUSE file system is created.
    83  	// Set to false when:
    84  	//   umount,
    85  	//   connection abort,
    86  	//   device release.
    87  	// +checklocks:mu
    88  	connected bool
    89  
    90  	// connInitError if FUSE_INIT encountered error (major version mismatch).
    91  	// Only set in INIT.
    92  	// +checklocks:mu
    93  	connInitError bool
    94  
    95  	// connInitSuccess if FUSE_INIT is successful.
    96  	// Only set in INIT.
    97  	// Used for destroy (not yet implemented).
    98  	// +checklocks:mu
    99  	connInitSuccess bool
   100  
   101  	// aborted via sysfs, and will send ECONNABORTED to read after disconnection (instead of ENODEV).
   102  	// Set only if abortErr is true and via fuse control fs (not yet implemented).
   103  	// TODO(gvisor.dev/issue/3525): set this to true when user aborts.
   104  	aborted bool
   105  
   106  	// numWaiting is the number of requests waiting to be
   107  	// sent to FUSE device or being processed by FUSE daemon.
   108  	numWaiting uint32
   109  
   110  	// Terminology note:
   111  	//
   112  	//	- `asyncNumMax` is the `MaxBackground` in the FUSE_INIT_IN struct.
   113  	//
   114  	//	- `asyncCongestionThreshold` is the `CongestionThreshold` in the FUSE_INIT_IN struct.
   115  	//
   116  	// We call the "background" requests in unix term as async requests.
   117  	// The "async requests" in unix term is our async requests that expect a reply,
   118  	// i.e. `!request.noReply`
   119  
   120  	// asyncMu protects the async request fields.
   121  	asyncMu sync.Mutex `state:"nosave"`
   122  
   123  	// asyncNum is the number of async requests.
   124  	// +checklocks:asyncMu
   125  	asyncNum uint16
   126  
   127  	// asyncCongestionThreshold the number of async requests.
   128  	// Negotiated in FUSE_INIT as "CongestionThreshold".
   129  	// TODO(gvisor.dev/issue/3529): add congestion control.
   130  	// +checklocks:asyncMu
   131  	asyncCongestionThreshold uint16
   132  
   133  	// asyncNumMax is the maximum number of asyncNum.
   134  	// Connection blocks the async requests when it is reached.
   135  	// Negotiated in FUSE_INIT as "MaxBackground".
   136  	// +checklocks:asyncMu
   137  	asyncNumMax uint16
   138  
   139  	// maxRead is the maximum size of a read buffer in in bytes.
   140  	// Initialized from a fuse fs parameter.
   141  	maxRead uint32
   142  
   143  	// maxWrite is the maximum size of a write buffer in bytes.
   144  	// Negotiated in FUSE_INIT.
   145  	maxWrite uint32
   146  
   147  	// maxPages is the maximum number of pages for a single request to use.
   148  	// Negotiated in FUSE_INIT.
   149  	maxPages uint16
   150  
   151  	// maxActiveRequests specifies the maximum number of active requests that can
   152  	// exist at any time. Any further requests will block when trying to CAll
   153  	// the server.
   154  	maxActiveRequests uint64
   155  
   156  	// minor version of the FUSE protocol.
   157  	// Negotiated and only set in INIT.
   158  	minor uint32
   159  
   160  	// atomicOTrunc is true when FUSE does not send a separate SETATTR request
   161  	// before open with O_TRUNC flag.
   162  	// Negotiated and only set in INIT.
   163  	atomicOTrunc bool
   164  
   165  	// asyncRead if read pages asynchronously.
   166  	// Negotiated and only set in INIT.
   167  	asyncRead bool
   168  
   169  	// writebackCache is true for write-back cache policy,
   170  	// false for write-through policy.
   171  	// Negotiated and only set in INIT.
   172  	writebackCache bool
   173  
   174  	// bigWrites if doing multi-page cached writes.
   175  	// Negotiated and only set in INIT.
   176  	bigWrites bool
   177  
   178  	// dontMask if filesystem does not apply umask to creation modes.
   179  	// Negotiated in INIT.
   180  	dontMask bool
   181  
   182  	// noOpen if FUSE server doesn't support open operation.
   183  	// This flag only influences performance, not correctness of the program.
   184  	noOpen bool
   185  }
   186  
   187  func (conn *connection) saveInitializedChan() bool {
   188  	select {
   189  	case <-conn.initializedChan:
   190  		return true // Closed.
   191  	default:
   192  		return false // Not closed.
   193  	}
   194  }
   195  
   196  func (conn *connection) loadInitializedChan(closed bool) {
   197  	conn.initializedChan = make(chan struct{}, 1)
   198  	if closed {
   199  		close(conn.initializedChan)
   200  	}
   201  }
   202  
   203  // newFUSEConnection creates a FUSE connection to fuseFD.
   204  // +checklocks:fuseFD.mu
   205  func newFUSEConnection(_ context.Context, fuseFD *DeviceFD, opts *filesystemOptions) (*connection, error) {
   206  	// Mark the device as ready so it can be used.
   207  	// FIXME(gvisor.dev/issue/4813): fuseFD's fields are accessed without
   208  	// synchronization and without checking if fuseFD has already been used to
   209  	// mount another filesystem.
   210  
   211  	// Create the writeBuf for the header to be stored in.
   212  	fuseFD.completions = make(map[linux.FUSEOpID]*futureResponse)
   213  	fuseFD.fullQueueCh = make(chan struct{}, opts.maxActiveRequests)
   214  
   215  	return &connection{
   216  		fd:                       fuseFD,
   217  		asyncNumMax:              fuseDefaultMaxBackground,
   218  		asyncCongestionThreshold: fuseDefaultCongestionThreshold,
   219  		maxRead:                  opts.maxRead,
   220  		maxPages:                 fuseDefaultMaxPagesPerReq,
   221  		maxActiveRequests:        opts.maxActiveRequests,
   222  		initializedChan:          make(chan struct{}),
   223  		connected:                true,
   224  	}, nil
   225  }
   226  
   227  // CallAsync makes an async (aka background) request.
   228  // It's a simple wrapper around Call().
   229  func (conn *connection) CallAsync(ctx context.Context, r *Request) error {
   230  	r.async = true
   231  	_, err := conn.Call(ctx, r)
   232  	return err
   233  }
   234  
   235  // Call makes a request to the server.
   236  // Block before the connection is initialized.
   237  // When the Request is FUSE_INIT, it will not be blocked before initialization.
   238  // Task should never be nil.
   239  //
   240  // For a sync request, it blocks the invoking task until
   241  // a server responds with a response.
   242  //
   243  // For an async request (that do not expect a response immediately),
   244  // it returns directly unless being blocked either before initialization
   245  // or when there are too many async requests ongoing.
   246  //
   247  // Example for async request:
   248  // init, readahead, write, async read/write, fuse_notify_reply,
   249  // non-sync release, interrupt, forget.
   250  //
   251  // The forget request does not have a reply,
   252  // as documented in include/uapi/linux/fuse.h:FUSE_FORGET.
   253  func (conn *connection) Call(ctx context.Context, r *Request) (*Response, error) {
   254  	b := blockerFromContext(ctx)
   255  	// Block requests sent before connection is initialized.
   256  	if !conn.Initialized() && r.hdr.Opcode != linux.FUSE_INIT {
   257  		if err := b.Block(conn.initializedChan); err != nil {
   258  			return nil, err
   259  		}
   260  	}
   261  
   262  	conn.fd.mu.Lock()
   263  	conn.mu.Lock()
   264  	connected := conn.connected
   265  	connInitError := conn.connInitError
   266  	conn.mu.Unlock()
   267  
   268  	if !connected {
   269  		conn.fd.mu.Unlock()
   270  		return nil, linuxerr.ENOTCONN
   271  	}
   272  
   273  	if connInitError {
   274  		conn.fd.mu.Unlock()
   275  		return nil, linuxerr.ECONNREFUSED
   276  	}
   277  
   278  	fut, err := conn.callFuture(b, r)
   279  	conn.fd.mu.Unlock()
   280  	if err != nil {
   281  		return nil, err
   282  	}
   283  
   284  	return fut.resolve(b)
   285  }
   286  
   287  // callFuture makes a request to the server and returns a future response.
   288  // Call resolve() when the response needs to be fulfilled.
   289  // +checklocks:conn.fd.mu
   290  func (conn *connection) callFuture(b context.Blocker, r *Request) (*futureResponse, error) {
   291  	// Is the queue full?
   292  	//
   293  	// We must busy wait here until the request can be queued. We don't
   294  	// block on the fd.fullQueueCh with a lock - so after being signalled,
   295  	// before we acquire the lock, it is possible that a barging task enters
   296  	// and queues a request. As a result, upon acquiring the lock we must
   297  	// again check if the room is available.
   298  	//
   299  	// This can potentially starve a request forever but this can only happen
   300  	// if there are always too many ongoing requests all the time. The
   301  	// supported maxActiveRequests setting should be really high to avoid this.
   302  	for conn.fd.numActiveRequests == conn.maxActiveRequests {
   303  		log.Infof("Blocking request %v from being queued. Too many active requests: %v",
   304  			r.id, conn.fd.numActiveRequests)
   305  		conn.fd.mu.Unlock()
   306  		err := b.Block(conn.fd.fullQueueCh)
   307  		conn.fd.mu.Lock()
   308  		if err != nil {
   309  			return nil, err
   310  		}
   311  	}
   312  
   313  	return conn.callFutureLocked(r)
   314  }
   315  
   316  // callFutureLocked makes a request to the server and returns a future response.
   317  // +checklocks:conn.fd.mu
   318  func (conn *connection) callFutureLocked(r *Request) (*futureResponse, error) {
   319  	// Check connected again holding conn.mu.
   320  	conn.mu.Lock()
   321  	if !conn.connected {
   322  		conn.mu.Unlock()
   323  		// we checked connected before,
   324  		// this must be due to aborted connection.
   325  		return nil, linuxerr.ECONNABORTED
   326  	}
   327  	conn.mu.Unlock()
   328  
   329  	conn.fd.queue.PushBack(r)
   330  	conn.fd.numActiveRequests++
   331  	fut := newFutureResponse(r)
   332  	conn.fd.completions[r.id] = fut
   333  
   334  	// Signal the readers that there is something to read.
   335  	conn.fd.waitQueue.Notify(waiter.ReadableEvents)
   336  
   337  	return fut, nil
   338  }