github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/fuse/dev.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fuse 16 17 import ( 18 "golang.org/x/sys/unix" 19 "github.com/SagerNet/gvisor/pkg/abi/linux" 20 "github.com/SagerNet/gvisor/pkg/context" 21 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 22 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 23 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 24 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 25 "github.com/SagerNet/gvisor/pkg/sync" 26 "github.com/SagerNet/gvisor/pkg/syserror" 27 "github.com/SagerNet/gvisor/pkg/usermem" 28 "github.com/SagerNet/gvisor/pkg/waiter" 29 ) 30 31 const fuseDevMinor = 229 32 33 // fuseDevice implements vfs.Device for /dev/fuse. 34 // 35 // +stateify savable 36 type fuseDevice struct{} 37 38 // Open implements vfs.Device.Open. 39 func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 40 if !kernel.FUSEEnabled { 41 return nil, syserror.ENOENT 42 } 43 44 var fd DeviceFD 45 if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{ 46 UseDentryMetadata: true, 47 }); err != nil { 48 return nil, err 49 } 50 return &fd.vfsfd, nil 51 } 52 53 // DeviceFD implements vfs.FileDescriptionImpl for /dev/fuse. 54 // 55 // +stateify savable 56 type DeviceFD struct { 57 vfsfd vfs.FileDescription 58 vfs.FileDescriptionDefaultImpl 59 vfs.DentryMetadataFileDescriptionImpl 60 vfs.NoLockFD 61 62 // nextOpID is used to create new requests. 63 nextOpID linux.FUSEOpID 64 65 // queue is the list of requests that need to be processed by the FUSE server. 66 queue requestList 67 68 // numActiveRequests is the number of requests made by the Sentry that has 69 // yet to be responded to. 70 numActiveRequests uint64 71 72 // completions is used to map a request to its response. A Writer will use this 73 // to notify the caller of a completed response. 74 completions map[linux.FUSEOpID]*futureResponse 75 76 writeCursor uint32 77 78 // writeBuf is the memory buffer used to copy in the FUSE out header from 79 // userspace. 80 writeBuf []byte 81 82 // writeCursorFR current FR being copied from server. 83 writeCursorFR *futureResponse 84 85 // mu protects all the queues, maps, buffers and cursors and nextOpID. 86 mu sync.Mutex `state:"nosave"` 87 88 // waitQueue is used to notify interested parties when the device becomes 89 // readable or writable. 90 waitQueue waiter.Queue 91 92 // fullQueueCh is a channel used to synchronize the readers with the writers. 93 // Writers (inbound requests to the filesystem) block if there are too many 94 // unprocessed in-flight requests. 95 fullQueueCh chan struct{} `state:".(int)"` 96 97 // fs is the FUSE filesystem that this FD is being used for. A reference is 98 // held on fs. 99 fs *filesystem 100 } 101 102 func (fd *DeviceFD) saveFullQueueCh() int { 103 return cap(fd.fullQueueCh) 104 } 105 106 func (fd *DeviceFD) loadFullQueueCh(capacity int) { 107 fd.fullQueueCh = make(chan struct{}, capacity) 108 } 109 110 // Release implements vfs.FileDescriptionImpl.Release. 111 func (fd *DeviceFD) Release(ctx context.Context) { 112 if fd.fs != nil { 113 fd.fs.conn.mu.Lock() 114 fd.fs.conn.connected = false 115 fd.fs.conn.mu.Unlock() 116 117 fd.fs.VFSFilesystem().DecRef(ctx) 118 fd.fs = nil 119 } 120 } 121 122 // PRead implements vfs.FileDescriptionImpl.PRead. 123 func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 124 // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. 125 if fd.fs == nil { 126 return 0, linuxerr.EPERM 127 } 128 129 return 0, syserror.ENOSYS 130 } 131 132 // Read implements vfs.FileDescriptionImpl.Read. 133 func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 134 // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. 135 if fd.fs == nil { 136 return 0, linuxerr.EPERM 137 } 138 139 // We require that any Read done on this filesystem have a sane minimum 140 // read buffer. It must have the capacity for the fixed parts of any request 141 // header (Linux uses the request header and the FUSEWriteIn header for this 142 // calculation) + the negotiated MaxWrite room for the data. 143 minBuffSize := linux.FUSE_MIN_READ_BUFFER 144 inHdrLen := uint32((*linux.FUSEHeaderIn)(nil).SizeBytes()) 145 writeHdrLen := uint32((*linux.FUSEWriteIn)(nil).SizeBytes()) 146 negotiatedMinBuffSize := inHdrLen + writeHdrLen + fd.fs.conn.maxWrite 147 if minBuffSize < negotiatedMinBuffSize { 148 minBuffSize = negotiatedMinBuffSize 149 } 150 151 // If the read buffer is too small, error out. 152 if dst.NumBytes() < int64(minBuffSize) { 153 return 0, linuxerr.EINVAL 154 } 155 156 fd.mu.Lock() 157 defer fd.mu.Unlock() 158 return fd.readLocked(ctx, dst, opts) 159 } 160 161 // readLocked implements the reading of the fuse device while locked with DeviceFD.mu. 162 // 163 // Preconditions: dst is large enough for any reasonable request. 164 func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 165 var req *Request 166 167 // Find the first valid request. 168 // For the normal case this loop only execute once. 169 for !fd.queue.Empty() { 170 req = fd.queue.Front() 171 172 if int64(req.hdr.Len)+int64(len(req.payload)) <= dst.NumBytes() { 173 break 174 } 175 176 // The request is too large. Cannot process it. All requests must be smaller than the 177 // negotiated size as specified by Connection.MaxWrite set as part of the FUSE_INIT 178 // handshake. 179 errno := -int32(unix.EIO) 180 if req.hdr.Opcode == linux.FUSE_SETXATTR { 181 errno = -int32(unix.E2BIG) 182 } 183 184 // Return the error to the calling task. 185 if err := fd.sendError(ctx, errno, req.hdr.Unique); err != nil { 186 return 0, err 187 } 188 189 // We're done with this request. 190 fd.queue.Remove(req) 191 req = nil 192 } 193 194 if req == nil { 195 return 0, syserror.ErrWouldBlock 196 } 197 198 // We already checked the size: dst must be able to fit the whole request. 199 // Now we write the marshalled header, the payload, 200 // and the potential additional payload 201 // to the user memory IOSequence. 202 203 n, err := dst.CopyOut(ctx, req.data) 204 if err != nil { 205 return 0, err 206 } 207 if n != len(req.data) { 208 return 0, syserror.EIO 209 } 210 211 if req.hdr.Opcode == linux.FUSE_WRITE { 212 written, err := dst.DropFirst(n).CopyOut(ctx, req.payload) 213 if err != nil { 214 return 0, err 215 } 216 if written != len(req.payload) { 217 return 0, syserror.EIO 218 } 219 n += int(written) 220 } 221 222 // Fully done with this req, remove it from the queue. 223 fd.queue.Remove(req) 224 225 // Remove noReply ones from map of requests expecting a reply. 226 if req.noReply { 227 fd.numActiveRequests -= 1 228 delete(fd.completions, req.hdr.Unique) 229 } 230 231 return int64(n), nil 232 } 233 234 // PWrite implements vfs.FileDescriptionImpl.PWrite. 235 func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 236 // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. 237 if fd.fs == nil { 238 return 0, linuxerr.EPERM 239 } 240 241 return 0, syserror.ENOSYS 242 } 243 244 // Write implements vfs.FileDescriptionImpl.Write. 245 func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 246 fd.mu.Lock() 247 defer fd.mu.Unlock() 248 return fd.writeLocked(ctx, src, opts) 249 } 250 251 // writeLocked implements writing to the fuse device while locked with DeviceFD.mu. 252 func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 253 // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. 254 if fd.fs == nil { 255 return 0, linuxerr.EPERM 256 } 257 258 // Return ENODEV if the filesystem is umounted. 259 if fd.fs.umounted { 260 return 0, linuxerr.ENODEV 261 } 262 263 var cn, n int64 264 hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes()) 265 266 for src.NumBytes() > 0 { 267 if fd.writeCursorFR != nil { 268 // Already have common header, and we're now copying the payload. 269 wantBytes := fd.writeCursorFR.hdr.Len 270 271 // Note that the FR data doesn't have the header. Copy it over if its necessary. 272 if fd.writeCursorFR.data == nil { 273 fd.writeCursorFR.data = make([]byte, wantBytes) 274 } 275 276 bytesCopied, err := src.CopyIn(ctx, fd.writeCursorFR.data[fd.writeCursor:wantBytes]) 277 if err != nil { 278 return 0, err 279 } 280 src = src.DropFirst(bytesCopied) 281 282 cn = int64(bytesCopied) 283 n += cn 284 fd.writeCursor += uint32(cn) 285 if fd.writeCursor == wantBytes { 286 // Done reading this full response. Clean up and unblock the 287 // initiator. 288 break 289 } 290 291 // Check if we have more data in src. 292 continue 293 } 294 295 // Assert that the header isn't read into the writeBuf yet. 296 if fd.writeCursor >= hdrLen { 297 return 0, linuxerr.EINVAL 298 } 299 300 // We don't have the full common response header yet. 301 wantBytes := hdrLen - fd.writeCursor 302 bytesCopied, err := src.CopyIn(ctx, fd.writeBuf[fd.writeCursor:wantBytes]) 303 if err != nil { 304 return 0, err 305 } 306 src = src.DropFirst(bytesCopied) 307 308 cn = int64(bytesCopied) 309 n += cn 310 fd.writeCursor += uint32(cn) 311 if fd.writeCursor == hdrLen { 312 // Have full header in the writeBuf. Use it to fetch the actual futureResponse 313 // from the device's completions map. 314 var hdr linux.FUSEHeaderOut 315 hdr.UnmarshalBytes(fd.writeBuf) 316 317 // We have the header now and so the writeBuf has served its purpose. 318 // We could reset it manually here but instead of doing that, at the 319 // end of the write, the writeCursor will be set to 0 thereby allowing 320 // the next request to overwrite whats in the buffer, 321 322 fut, ok := fd.completions[hdr.Unique] 323 if !ok { 324 // Server sent us a response for a request we never sent, 325 // or for which we already received a reply (e.g. aborted), an unlikely event. 326 return 0, linuxerr.EINVAL 327 } 328 329 delete(fd.completions, hdr.Unique) 330 331 // Copy over the header into the future response. The rest of the payload 332 // will be copied over to the FR's data in the next iteration. 333 fut.hdr = &hdr 334 fd.writeCursorFR = fut 335 336 // Next iteration will now try read the complete request, if src has 337 // any data remaining. Otherwise we're done. 338 } 339 } 340 341 if fd.writeCursorFR != nil { 342 if err := fd.sendResponse(ctx, fd.writeCursorFR); err != nil { 343 return 0, err 344 } 345 346 // Ready the device for the next request. 347 fd.writeCursorFR = nil 348 fd.writeCursor = 0 349 } 350 351 return n, nil 352 } 353 354 // Readiness implements vfs.FileDescriptionImpl.Readiness. 355 func (fd *DeviceFD) Readiness(mask waiter.EventMask) waiter.EventMask { 356 fd.mu.Lock() 357 defer fd.mu.Unlock() 358 return fd.readinessLocked(mask) 359 } 360 361 // readinessLocked implements checking the readiness of the fuse device while 362 // locked with DeviceFD.mu. 363 func (fd *DeviceFD) readinessLocked(mask waiter.EventMask) waiter.EventMask { 364 var ready waiter.EventMask 365 366 if fd.fs == nil || fd.fs.umounted { 367 ready |= waiter.EventErr 368 return ready & mask 369 } 370 371 // FD is always writable. 372 ready |= waiter.WritableEvents 373 if !fd.queue.Empty() { 374 // Have reqs available, FD is readable. 375 ready |= waiter.ReadableEvents 376 } 377 378 return ready & mask 379 } 380 381 // EventRegister implements waiter.Waitable.EventRegister. 382 func (fd *DeviceFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { 383 fd.waitQueue.EventRegister(e, mask) 384 } 385 386 // EventUnregister implements waiter.Waitable.EventUnregister. 387 func (fd *DeviceFD) EventUnregister(e *waiter.Entry) { 388 fd.waitQueue.EventUnregister(e) 389 } 390 391 // Seek implements vfs.FileDescriptionImpl.Seek. 392 func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 393 // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. 394 if fd.fs == nil { 395 return 0, linuxerr.EPERM 396 } 397 398 return 0, syserror.ENOSYS 399 } 400 401 // sendResponse sends a response to the waiting task (if any). 402 // 403 // Preconditions: fd.mu must be held. 404 func (fd *DeviceFD) sendResponse(ctx context.Context, fut *futureResponse) error { 405 // Signal the task waiting on a response if any. 406 defer close(fut.ch) 407 408 // Signal that the queue is no longer full. 409 select { 410 case fd.fullQueueCh <- struct{}{}: 411 default: 412 } 413 fd.numActiveRequests-- 414 415 if fut.async { 416 return fd.asyncCallBack(ctx, fut.getResponse()) 417 } 418 419 return nil 420 } 421 422 // sendError sends an error response to the waiting task (if any) by calling sendResponse(). 423 // 424 // Preconditions: fd.mu must be held. 425 func (fd *DeviceFD) sendError(ctx context.Context, errno int32, unique linux.FUSEOpID) error { 426 // Return the error to the calling task. 427 outHdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes()) 428 respHdr := linux.FUSEHeaderOut{ 429 Len: outHdrLen, 430 Error: errno, 431 Unique: unique, 432 } 433 434 fut, ok := fd.completions[respHdr.Unique] 435 if !ok { 436 // A response for a request we never sent, 437 // or for which we already received a reply (e.g. aborted). 438 return linuxerr.EINVAL 439 } 440 delete(fd.completions, respHdr.Unique) 441 442 fut.hdr = &respHdr 443 return fd.sendResponse(ctx, fut) 444 } 445 446 // asyncCallBack executes pre-defined callback function for async requests. 447 // Currently used by: FUSE_INIT. 448 func (fd *DeviceFD) asyncCallBack(ctx context.Context, r *Response) error { 449 switch r.opcode { 450 case linux.FUSE_INIT: 451 creds := auth.CredentialsFromContext(ctx) 452 rootUserNs := kernel.KernelFromContext(ctx).RootUserNamespace() 453 return fd.fs.conn.InitRecv(r, creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, rootUserNs)) 454 // TODO(github.com/SagerNet/issue/3247): support async read: correctly process the response. 455 } 456 457 return nil 458 }