github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/fuse/connection.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fuse 16 17 import ( 18 "sync" 19 20 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 21 "github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops" 22 "github.com/nicocha30/gvisor-ligolo/pkg/context" 23 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 24 "github.com/nicocha30/gvisor-ligolo/pkg/log" 25 "github.com/nicocha30/gvisor-ligolo/pkg/waiter" 26 ) 27 28 const ( 29 // fuseDefaultMaxBackground is the default value for MaxBackground. 30 fuseDefaultMaxBackground = 12 31 32 // fuseDefaultCongestionThreshold is the default value for CongestionThreshold, 33 // and is 75% of the default maximum of MaxGround. 34 fuseDefaultCongestionThreshold = (fuseDefaultMaxBackground * 3 / 4) 35 36 // fuseDefaultMaxPagesPerReq is the default value for MaxPagesPerReq. 37 fuseDefaultMaxPagesPerReq = 32 38 ) 39 40 // connection is the struct by which the sentry communicates with the FUSE server daemon. 41 // 42 // Lock order: 43 // - conn.fd.mu 44 // - conn.mu 45 // - conn.asyncMu 46 // 47 // +stateify savable 48 type connection struct { 49 fd *DeviceFD 50 51 // mu protects access to struct members. 52 mu sync.Mutex `state:"nosave"` 53 54 // attributeVersion is the version of connection's attributes. 55 attributeVersion atomicbitops.Uint64 56 57 // We target FUSE 7.23. 58 // The following FUSE_INIT flags are currently unsupported by this implementation: 59 // - FUSE_EXPORT_SUPPORT 60 // - FUSE_POSIX_LOCKS: requires POSIX locks 61 // - FUSE_FLOCK_LOCKS: requires POSIX locks 62 // - FUSE_AUTO_INVAL_DATA: requires page caching eviction 63 // - FUSE_DO_READDIRPLUS/FUSE_READDIRPLUS_AUTO: requires FUSE_READDIRPLUS implementation 64 // - FUSE_ASYNC_DIO 65 // - FUSE_PARALLEL_DIROPS (7.25) 66 // - FUSE_HANDLE_KILLPRIV (7.26) 67 // - FUSE_POSIX_ACL: affects defaultPermissions, posixACL, xattr handler (7.26) 68 // - FUSE_ABORT_ERROR (7.27) 69 // - FUSE_CACHE_SYMLINKS (7.28) 70 // - FUSE_NO_OPENDIR_SUPPORT (7.29) 71 // - FUSE_EXPLICIT_INVAL_DATA: requires page caching eviction (7.30) 72 // - FUSE_MAP_ALIGNMENT (7.31) 73 74 // initialized after receiving FUSE_INIT reply. 75 // Until it's set, suspend sending FUSE requests. 76 // Use SetInitialized() and IsInitialized() for atomic access. 77 initialized atomicbitops.Int32 78 79 // initializedChan is used to block requests before initialization. 80 initializedChan chan struct{} `state:".(bool)"` 81 82 // connected (connection established) when a new FUSE file system is created. 83 // Set to false when: 84 // umount, 85 // connection abort, 86 // device release. 87 // +checklocks:mu 88 connected bool 89 90 // connInitError if FUSE_INIT encountered error (major version mismatch). 91 // Only set in INIT. 92 // +checklocks:mu 93 connInitError bool 94 95 // connInitSuccess if FUSE_INIT is successful. 96 // Only set in INIT. 97 // Used for destroy (not yet implemented). 98 // +checklocks:mu 99 connInitSuccess bool 100 101 // aborted via sysfs, and will send ECONNABORTED to read after disconnection (instead of ENODEV). 102 // Set only if abortErr is true and via fuse control fs (not yet implemented). 103 // TODO(gvisor.dev/issue/3525): set this to true when user aborts. 104 aborted bool 105 106 // numWaiting is the number of requests waiting to be 107 // sent to FUSE device or being processed by FUSE daemon. 108 numWaiting uint32 109 110 // Terminology note: 111 // 112 // - `asyncNumMax` is the `MaxBackground` in the FUSE_INIT_IN struct. 113 // 114 // - `asyncCongestionThreshold` is the `CongestionThreshold` in the FUSE_INIT_IN struct. 115 // 116 // We call the "background" requests in unix term as async requests. 117 // The "async requests" in unix term is our async requests that expect a reply, 118 // i.e. `!request.noReply` 119 120 // asyncMu protects the async request fields. 121 asyncMu sync.Mutex `state:"nosave"` 122 123 // asyncNum is the number of async requests. 124 // +checklocks:asyncMu 125 asyncNum uint16 126 127 // asyncCongestionThreshold the number of async requests. 128 // Negotiated in FUSE_INIT as "CongestionThreshold". 129 // TODO(gvisor.dev/issue/3529): add congestion control. 130 // +checklocks:asyncMu 131 asyncCongestionThreshold uint16 132 133 // asyncNumMax is the maximum number of asyncNum. 134 // Connection blocks the async requests when it is reached. 135 // Negotiated in FUSE_INIT as "MaxBackground". 136 // +checklocks:asyncMu 137 asyncNumMax uint16 138 139 // maxRead is the maximum size of a read buffer in in bytes. 140 // Initialized from a fuse fs parameter. 141 maxRead uint32 142 143 // maxWrite is the maximum size of a write buffer in bytes. 144 // Negotiated in FUSE_INIT. 145 maxWrite uint32 146 147 // maxPages is the maximum number of pages for a single request to use. 148 // Negotiated in FUSE_INIT. 149 maxPages uint16 150 151 // maxActiveRequests specifies the maximum number of active requests that can 152 // exist at any time. Any further requests will block when trying to CAll 153 // the server. 154 maxActiveRequests uint64 155 156 // minor version of the FUSE protocol. 157 // Negotiated and only set in INIT. 158 minor uint32 159 160 // atomicOTrunc is true when FUSE does not send a separate SETATTR request 161 // before open with O_TRUNC flag. 162 // Negotiated and only set in INIT. 163 atomicOTrunc bool 164 165 // asyncRead if read pages asynchronously. 166 // Negotiated and only set in INIT. 167 asyncRead bool 168 169 // writebackCache is true for write-back cache policy, 170 // false for write-through policy. 171 // Negotiated and only set in INIT. 172 writebackCache bool 173 174 // bigWrites if doing multi-page cached writes. 175 // Negotiated and only set in INIT. 176 bigWrites bool 177 178 // dontMask if filesystem does not apply umask to creation modes. 179 // Negotiated in INIT. 180 dontMask bool 181 182 // noOpen if FUSE server doesn't support open operation. 183 // This flag only influences performance, not correctness of the program. 184 noOpen bool 185 } 186 187 func (conn *connection) saveInitializedChan() bool { 188 select { 189 case <-conn.initializedChan: 190 return true // Closed. 191 default: 192 return false // Not closed. 193 } 194 } 195 196 func (conn *connection) loadInitializedChan(closed bool) { 197 conn.initializedChan = make(chan struct{}, 1) 198 if closed { 199 close(conn.initializedChan) 200 } 201 } 202 203 // newFUSEConnection creates a FUSE connection to fuseFD. 204 // +checklocks:fuseFD.mu 205 func newFUSEConnection(_ context.Context, fuseFD *DeviceFD, opts *filesystemOptions) (*connection, error) { 206 // Mark the device as ready so it can be used. 207 // FIXME(gvisor.dev/issue/4813): fuseFD's fields are accessed without 208 // synchronization and without checking if fuseFD has already been used to 209 // mount another filesystem. 210 211 // Create the writeBuf for the header to be stored in. 212 fuseFD.completions = make(map[linux.FUSEOpID]*futureResponse) 213 fuseFD.fullQueueCh = make(chan struct{}, opts.maxActiveRequests) 214 215 return &connection{ 216 fd: fuseFD, 217 asyncNumMax: fuseDefaultMaxBackground, 218 asyncCongestionThreshold: fuseDefaultCongestionThreshold, 219 maxRead: opts.maxRead, 220 maxPages: fuseDefaultMaxPagesPerReq, 221 maxActiveRequests: opts.maxActiveRequests, 222 initializedChan: make(chan struct{}), 223 connected: true, 224 }, nil 225 } 226 227 // CallAsync makes an async (aka background) request. 228 // It's a simple wrapper around Call(). 229 func (conn *connection) CallAsync(ctx context.Context, r *Request) error { 230 r.async = true 231 _, err := conn.Call(ctx, r) 232 return err 233 } 234 235 // Call makes a request to the server. 236 // Block before the connection is initialized. 237 // When the Request is FUSE_INIT, it will not be blocked before initialization. 238 // Task should never be nil. 239 // 240 // For a sync request, it blocks the invoking task until 241 // a server responds with a response. 242 // 243 // For an async request (that do not expect a response immediately), 244 // it returns directly unless being blocked either before initialization 245 // or when there are too many async requests ongoing. 246 // 247 // Example for async request: 248 // init, readahead, write, async read/write, fuse_notify_reply, 249 // non-sync release, interrupt, forget. 250 // 251 // The forget request does not have a reply, 252 // as documented in include/uapi/linux/fuse.h:FUSE_FORGET. 253 func (conn *connection) Call(ctx context.Context, r *Request) (*Response, error) { 254 b := blockerFromContext(ctx) 255 // Block requests sent before connection is initialized. 256 if !conn.Initialized() && r.hdr.Opcode != linux.FUSE_INIT { 257 if err := b.Block(conn.initializedChan); err != nil { 258 return nil, err 259 } 260 } 261 262 conn.fd.mu.Lock() 263 conn.mu.Lock() 264 connected := conn.connected 265 connInitError := conn.connInitError 266 conn.mu.Unlock() 267 268 if !connected { 269 conn.fd.mu.Unlock() 270 return nil, linuxerr.ENOTCONN 271 } 272 273 if connInitError { 274 conn.fd.mu.Unlock() 275 return nil, linuxerr.ECONNREFUSED 276 } 277 278 fut, err := conn.callFuture(b, r) 279 conn.fd.mu.Unlock() 280 if err != nil { 281 return nil, err 282 } 283 284 return fut.resolve(b) 285 } 286 287 // callFuture makes a request to the server and returns a future response. 288 // Call resolve() when the response needs to be fulfilled. 289 // +checklocks:conn.fd.mu 290 func (conn *connection) callFuture(b context.Blocker, r *Request) (*futureResponse, error) { 291 // Is the queue full? 292 // 293 // We must busy wait here until the request can be queued. We don't 294 // block on the fd.fullQueueCh with a lock - so after being signalled, 295 // before we acquire the lock, it is possible that a barging task enters 296 // and queues a request. As a result, upon acquiring the lock we must 297 // again check if the room is available. 298 // 299 // This can potentially starve a request forever but this can only happen 300 // if there are always too many ongoing requests all the time. The 301 // supported maxActiveRequests setting should be really high to avoid this. 302 for conn.fd.numActiveRequests == conn.maxActiveRequests { 303 log.Infof("Blocking request %v from being queued. Too many active requests: %v", 304 r.id, conn.fd.numActiveRequests) 305 conn.fd.mu.Unlock() 306 err := b.Block(conn.fd.fullQueueCh) 307 conn.fd.mu.Lock() 308 if err != nil { 309 return nil, err 310 } 311 } 312 313 return conn.callFutureLocked(r) 314 } 315 316 // callFutureLocked makes a request to the server and returns a future response. 317 // +checklocks:conn.fd.mu 318 func (conn *connection) callFutureLocked(r *Request) (*futureResponse, error) { 319 // Check connected again holding conn.mu. 320 conn.mu.Lock() 321 if !conn.connected { 322 conn.mu.Unlock() 323 // we checked connected before, 324 // this must be due to aborted connection. 325 return nil, linuxerr.ECONNABORTED 326 } 327 conn.mu.Unlock() 328 329 conn.fd.queue.PushBack(r) 330 conn.fd.numActiveRequests++ 331 fut := newFutureResponse(r) 332 conn.fd.completions[r.id] = fut 333 334 // Signal the readers that there is something to read. 335 conn.fd.waitQueue.Notify(waiter.ReadableEvents) 336 337 return fut, nil 338 }