github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/fuse/connection.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fuse 16 17 import ( 18 goContext "context" 19 "sync" 20 21 "github.com/metacubex/gvisor/pkg/abi/linux" 22 "github.com/metacubex/gvisor/pkg/atomicbitops" 23 "github.com/metacubex/gvisor/pkg/context" 24 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 25 "github.com/metacubex/gvisor/pkg/log" 26 "github.com/metacubex/gvisor/pkg/waiter" 27 ) 28 29 const ( 30 // fuseDefaultMaxBackground is the default value for MaxBackground. 31 fuseDefaultMaxBackground = 12 32 33 // fuseDefaultCongestionThreshold is the default value for CongestionThreshold, 34 // and is 75% of the default maximum of MaxGround. 35 fuseDefaultCongestionThreshold = (fuseDefaultMaxBackground * 3 / 4) 36 37 // fuseDefaultMaxPagesPerReq is the default value for MaxPagesPerReq. 38 fuseDefaultMaxPagesPerReq = 32 39 ) 40 41 // connection is the struct by which the sentry communicates with the FUSE server daemon. 42 // 43 // Lock order: 44 // - conn.fd.mu 45 // - conn.mu 46 // - conn.asyncMu 47 // 48 // +stateify savable 49 type connection struct { 50 fd *DeviceFD 51 52 // mu protects access to struct members. 53 mu sync.Mutex `state:"nosave"` 54 55 // attributeVersion is the version of connection's attributes. 56 attributeVersion atomicbitops.Uint64 57 58 // We target FUSE 7.23. 59 // The following FUSE_INIT flags are currently unsupported by this implementation: 60 // - FUSE_EXPORT_SUPPORT 61 // - FUSE_POSIX_LOCKS: requires POSIX locks 62 // - FUSE_FLOCK_LOCKS: requires POSIX locks 63 // - FUSE_AUTO_INVAL_DATA: requires page caching eviction 64 // - FUSE_DO_READDIRPLUS/FUSE_READDIRPLUS_AUTO: requires FUSE_READDIRPLUS implementation 65 // - FUSE_ASYNC_DIO 66 // - FUSE_PARALLEL_DIROPS (7.25) 67 // - FUSE_HANDLE_KILLPRIV (7.26) 68 // - FUSE_POSIX_ACL: affects defaultPermissions, posixACL, xattr handler (7.26) 69 // - FUSE_ABORT_ERROR (7.27) 70 // - FUSE_CACHE_SYMLINKS (7.28) 71 // - FUSE_NO_OPENDIR_SUPPORT (7.29) 72 // - FUSE_EXPLICIT_INVAL_DATA: requires page caching eviction (7.30) 73 // - FUSE_MAP_ALIGNMENT (7.31) 74 75 // initialized after receiving FUSE_INIT reply. 76 // Until it's set, suspend sending FUSE requests. 77 // Use SetInitialized() and IsInitialized() for atomic access. 78 initialized atomicbitops.Int32 79 80 // initializedChan is used to block requests before initialization. 81 initializedChan chan struct{} `state:".(bool)"` 82 83 // connected (connection established) when a new FUSE file system is created. 84 // Set to false when: 85 // umount, 86 // connection abort, 87 // device release. 88 // +checklocks:mu 89 connected bool 90 91 // connInitError if FUSE_INIT encountered error (major version mismatch). 92 // Only set in INIT. 93 // +checklocks:mu 94 connInitError bool 95 96 // connInitSuccess if FUSE_INIT is successful. 97 // Only set in INIT. 98 // Used for destroy (not yet implemented). 99 // +checklocks:mu 100 connInitSuccess bool 101 102 // aborted via sysfs, and will send ECONNABORTED to read after disconnection (instead of ENODEV). 103 // Set only if abortErr is true and via fuse control fs (not yet implemented). 104 // TODO(gvisor.dev/issue/3525): set this to true when user aborts. 105 aborted bool 106 107 // numWaiting is the number of requests waiting to be 108 // sent to FUSE device or being processed by FUSE daemon. 109 numWaiting uint32 110 111 // Terminology note: 112 // 113 // - `asyncNumMax` is the `MaxBackground` in the FUSE_INIT_IN struct. 114 // 115 // - `asyncCongestionThreshold` is the `CongestionThreshold` in the FUSE_INIT_IN struct. 116 // 117 // We call the "background" requests in unix term as async requests. 118 // The "async requests" in unix term is our async requests that expect a reply, 119 // i.e. `!request.noReply` 120 121 // asyncMu protects the async request fields. 122 asyncMu sync.Mutex `state:"nosave"` 123 124 // asyncNum is the number of async requests. 125 // +checklocks:asyncMu 126 asyncNum uint16 127 128 // asyncCongestionThreshold the number of async requests. 129 // Negotiated in FUSE_INIT as "CongestionThreshold". 130 // TODO(gvisor.dev/issue/3529): add congestion control. 131 // +checklocks:asyncMu 132 asyncCongestionThreshold uint16 133 134 // asyncNumMax is the maximum number of asyncNum. 135 // Connection blocks the async requests when it is reached. 136 // Negotiated in FUSE_INIT as "MaxBackground". 137 // +checklocks:asyncMu 138 asyncNumMax uint16 139 140 // maxRead is the maximum size of a read buffer in in bytes. 141 // Initialized from a fuse fs parameter. 142 maxRead uint32 143 144 // maxWrite is the maximum size of a write buffer in bytes. 145 // Negotiated in FUSE_INIT. 146 maxWrite uint32 147 148 // maxPages is the maximum number of pages for a single request to use. 149 // Negotiated in FUSE_INIT. 150 maxPages uint16 151 152 // maxActiveRequests specifies the maximum number of active requests that can 153 // exist at any time. Any further requests will block when trying to CAll 154 // the server. 155 maxActiveRequests uint64 156 157 // minor version of the FUSE protocol. 158 // Negotiated and only set in INIT. 159 minor uint32 160 161 // atomicOTrunc is true when FUSE does not send a separate SETATTR request 162 // before open with O_TRUNC flag. 163 // Negotiated and only set in INIT. 164 atomicOTrunc bool 165 166 // asyncRead if read pages asynchronously. 167 // Negotiated and only set in INIT. 168 asyncRead bool 169 170 // writebackCache is true for write-back cache policy, 171 // false for write-through policy. 172 // Negotiated and only set in INIT. 173 writebackCache bool 174 175 // bigWrites if doing multi-page cached writes. 176 // Negotiated and only set in INIT. 177 bigWrites bool 178 179 // dontMask if filesystem does not apply umask to creation modes. 180 // Negotiated in INIT. 181 dontMask bool 182 183 // noOpen if FUSE server doesn't support open operation. 184 // This flag only influences performance, not correctness of the program. 185 noOpen bool 186 } 187 188 func (conn *connection) saveInitializedChan() bool { 189 select { 190 case <-conn.initializedChan: 191 return true // Closed. 192 default: 193 return false // Not closed. 194 } 195 } 196 197 func (conn *connection) loadInitializedChan(_ goContext.Context, closed bool) { 198 conn.initializedChan = make(chan struct{}, 1) 199 if closed { 200 close(conn.initializedChan) 201 } 202 } 203 204 // newFUSEConnection creates a FUSE connection to fuseFD. 205 // +checklocks:fuseFD.mu 206 func newFUSEConnection(_ context.Context, fuseFD *DeviceFD, opts *filesystemOptions) (*connection, error) { 207 // Mark the device as ready so it can be used. 208 // FIXME(gvisor.dev/issue/4813): fuseFD's fields are accessed without 209 // synchronization and without checking if fuseFD has already been used to 210 // mount another filesystem. 211 212 // Create the writeBuf for the header to be stored in. 213 fuseFD.completions = make(map[linux.FUSEOpID]*futureResponse) 214 fuseFD.fullQueueCh = make(chan struct{}, opts.maxActiveRequests) 215 216 return &connection{ 217 fd: fuseFD, 218 asyncNumMax: fuseDefaultMaxBackground, 219 asyncCongestionThreshold: fuseDefaultCongestionThreshold, 220 maxRead: opts.maxRead, 221 maxPages: fuseDefaultMaxPagesPerReq, 222 maxActiveRequests: opts.maxActiveRequests, 223 initializedChan: make(chan struct{}), 224 connected: true, 225 }, nil 226 } 227 228 // CallAsync makes an async (aka background) request. 229 // It's a simple wrapper around Call(). 230 func (conn *connection) CallAsync(ctx context.Context, r *Request) error { 231 r.async = true 232 _, err := conn.Call(ctx, r) 233 return err 234 } 235 236 // Call makes a request to the server. 237 // Block before the connection is initialized. 238 // When the Request is FUSE_INIT, it will not be blocked before initialization. 239 // Task should never be nil. 240 // 241 // For a sync request, it blocks the invoking task until 242 // a server responds with a response. 243 // 244 // For an async request (that do not expect a response immediately), 245 // it returns directly unless being blocked either before initialization 246 // or when there are too many async requests ongoing. 247 // 248 // Example for async request: 249 // init, readahead, write, async read/write, fuse_notify_reply, 250 // non-sync release, interrupt, forget. 251 // 252 // The forget request does not have a reply, 253 // as documented in include/uapi/linux/fuse.h:FUSE_FORGET. 254 func (conn *connection) Call(ctx context.Context, r *Request) (*Response, error) { 255 b := blockerFromContext(ctx) 256 // Block requests sent before connection is initialized. 257 if !conn.Initialized() && r.hdr.Opcode != linux.FUSE_INIT { 258 if err := b.Block(conn.initializedChan); err != nil { 259 return nil, err 260 } 261 } 262 263 conn.fd.mu.Lock() 264 conn.mu.Lock() 265 connected := conn.connected 266 connInitError := conn.connInitError 267 conn.mu.Unlock() 268 269 if !connected { 270 conn.fd.mu.Unlock() 271 return nil, linuxerr.ENOTCONN 272 } 273 274 if connInitError { 275 conn.fd.mu.Unlock() 276 return nil, linuxerr.ECONNREFUSED 277 } 278 279 fut, err := conn.callFuture(b, r) 280 conn.fd.mu.Unlock() 281 if err != nil { 282 return nil, err 283 } 284 285 return fut.resolve(b) 286 } 287 288 // callFuture makes a request to the server and returns a future response. 289 // Call resolve() when the response needs to be fulfilled. 290 // +checklocks:conn.fd.mu 291 func (conn *connection) callFuture(b context.Blocker, r *Request) (*futureResponse, error) { 292 // Is the queue full? 293 // 294 // We must busy wait here until the request can be queued. We don't 295 // block on the fd.fullQueueCh with a lock - so after being signalled, 296 // before we acquire the lock, it is possible that a barging task enters 297 // and queues a request. As a result, upon acquiring the lock we must 298 // again check if the room is available. 299 // 300 // This can potentially starve a request forever but this can only happen 301 // if there are always too many ongoing requests all the time. The 302 // supported maxActiveRequests setting should be really high to avoid this. 303 for conn.fd.numActiveRequests == conn.maxActiveRequests { 304 log.Infof("Blocking request %v from being queued. Too many active requests: %v", 305 r.id, conn.fd.numActiveRequests) 306 conn.fd.mu.Unlock() 307 err := b.Block(conn.fd.fullQueueCh) 308 conn.fd.mu.Lock() 309 if err != nil { 310 return nil, err 311 } 312 } 313 314 return conn.callFutureLocked(r) 315 } 316 317 // callFutureLocked makes a request to the server and returns a future response. 318 // +checklocks:conn.fd.mu 319 func (conn *connection) callFutureLocked(r *Request) (*futureResponse, error) { 320 // Check connected again holding conn.mu. 321 conn.mu.Lock() 322 if !conn.connected { 323 conn.mu.Unlock() 324 // we checked connected before, 325 // this must be due to aborted connection. 326 return nil, linuxerr.ECONNABORTED 327 } 328 conn.mu.Unlock() 329 330 conn.fd.queue.PushBack(r) 331 conn.fd.numActiveRequests++ 332 fut := newFutureResponse(r) 333 conn.fd.completions[r.id] = fut 334 335 // Signal the readers that there is something to read. 336 conn.fd.waitQueue.Notify(waiter.ReadableEvents) 337 338 return fut, nil 339 }