github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/fuse/dev.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fuse 16 17 import ( 18 "golang.org/x/sys/unix" 19 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 20 "github.com/nicocha30/gvisor-ligolo/pkg/context" 21 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 22 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel" 23 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 24 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 25 "github.com/nicocha30/gvisor-ligolo/pkg/sync" 26 "github.com/nicocha30/gvisor-ligolo/pkg/usermem" 27 "github.com/nicocha30/gvisor-ligolo/pkg/waiter" 28 ) 29 30 const fuseDevMinor = 229 31 32 // This is equivalent to linux.SizeOfFUSEHeaderIn 33 const fuseHeaderOutSize = 16 34 35 // fuseDevice implements vfs.Device for /dev/fuse. 36 // 37 // +stateify savable 38 type fuseDevice struct{} 39 40 // Open implements vfs.Device.Open. 41 func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 42 var fd DeviceFD 43 if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{ 44 UseDentryMetadata: true, 45 }); err != nil { 46 return nil, err 47 } 48 return &fd.vfsfd, nil 49 } 50 51 // DeviceFD implements vfs.FileDescriptionImpl for /dev/fuse. 52 // 53 // +stateify savable 54 type DeviceFD struct { 55 vfsfd vfs.FileDescription 56 vfs.FileDescriptionDefaultImpl 57 vfs.DentryMetadataFileDescriptionImpl 58 vfs.NoLockFD 59 60 // waitQueue is used to notify interested parties when the device becomes 61 // readable or writable. 62 waitQueue waiter.Queue 63 64 // fullQueueCh is a channel used to synchronize the readers with the writers. 65 // Writers (inbound requests to the filesystem) block if there are too many 66 // unprocessed in-flight requests. 67 fullQueueCh chan struct{} `state:".(int)"` 68 69 // mu protects all the queues, maps, buffers and cursors and nextOpID. 70 mu sync.Mutex `state:"nosave"` 71 72 // nextOpID is used to create new requests. 73 // +checklocks:mu 74 nextOpID linux.FUSEOpID 75 76 // queue is the list of requests that need to be processed by the FUSE server. 77 // +checklocks:mu 78 queue requestList 79 80 // numActiveRequests is the number of requests made by the Sentry that has 81 // yet to be responded to. 82 // +checklocks:mu 83 numActiveRequests uint64 84 85 // completions is used to map a request to its response. A Writer will use this 86 // to notify the caller of a completed response. 87 // +checklocks:mu 88 completions map[linux.FUSEOpID]*futureResponse 89 90 // writeBuf is the memory buffer used to copy in the FUSE out header from 91 // userspace. 92 // +checklocks:mu 93 writeBuf [fuseHeaderOutSize]byte 94 95 // conn is the FUSE connection that this FD is being used for. 96 // +checklocks:mu 97 conn *connection 98 } 99 100 // Release implements vfs.FileDescriptionImpl.Release. 101 func (fd *DeviceFD) Release(ctx context.Context) { 102 fd.mu.Lock() 103 defer fd.mu.Unlock() 104 if fd.conn != nil { 105 fd.conn.mu.Lock() 106 fd.conn.connected = false 107 fd.conn.mu.Unlock() 108 109 fd.conn.Abort(ctx) // +checklocksforce: fd.conn.fd.mu=fd.mu 110 fd.waitQueue.Notify(waiter.ReadableEvents) 111 fd.conn = nil 112 } 113 } 114 115 // connected returns true if fd.conn is set and the connection has not been 116 // aborted. 117 // +checklocks:fd.mu 118 func (fd *DeviceFD) connected() bool { 119 if fd.conn != nil { 120 fd.conn.mu.Lock() 121 defer fd.conn.mu.Unlock() 122 return fd.conn.connected 123 } 124 return false 125 } 126 127 // PRead implements vfs.FileDescriptionImpl.PRead. 128 func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 129 // Operations on /dev/fuse don't make sense until a FUSE filesystem is 130 // mounted. If there is an active connection we know there is at least one 131 // filesystem mounted. 132 fd.mu.Lock() 133 defer fd.mu.Unlock() 134 if !fd.connected() { 135 return 0, linuxerr.EPERM 136 } 137 138 return 0, linuxerr.ENOSYS 139 } 140 141 // Read implements vfs.FileDescriptionImpl.Read. 142 func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 143 fd.mu.Lock() 144 defer fd.mu.Unlock() 145 if !fd.connected() { 146 return 0, linuxerr.EPERM 147 } 148 // We require that any Read done on this filesystem have a sane minimum 149 // read buffer. It must have the capacity for the fixed parts of any request 150 // header (Linux uses the request header and the FUSEWriteIn header for this 151 // calculation) + the negotiated MaxWrite room for the data. 152 minBuffSize := linux.FUSE_MIN_READ_BUFFER 153 fd.conn.mu.Lock() 154 negotiatedMinBuffSize := linux.SizeOfFUSEHeaderIn + linux.SizeOfFUSEHeaderOut + fd.conn.maxWrite 155 fd.conn.mu.Unlock() 156 if minBuffSize < negotiatedMinBuffSize { 157 minBuffSize = negotiatedMinBuffSize 158 } 159 160 // If the read buffer is too small, error out. 161 if dst.NumBytes() < int64(minBuffSize) { 162 return 0, linuxerr.EINVAL 163 } 164 // Find the first valid request. For the normal case this loop only executes 165 // once. 166 var req *Request 167 for req = fd.queue.Front(); !fd.queue.Empty(); req = fd.queue.Front() { 168 if int64(req.hdr.Len) <= dst.NumBytes() { 169 break 170 } 171 // The request is too large so we cannot process it. All requests must be 172 // smaller than the negotiated size as specified by Connection.MaxWrite set 173 // as part of the FUSE_INIT handshake. 174 errno := -int32(unix.EIO) 175 if req.hdr.Opcode == linux.FUSE_SETXATTR { 176 errno = -int32(unix.E2BIG) 177 } 178 179 if err := fd.sendError(ctx, errno, req.hdr.Unique); err != nil { 180 return 0, err 181 } 182 fd.queue.Remove(req) 183 req = nil 184 } 185 if req == nil { 186 return 0, linuxerr.ErrWouldBlock 187 } 188 189 // We already checked the size: dst must be able to fit the whole request. 190 n, err := dst.CopyOut(ctx, req.data) 191 if err != nil { 192 return 0, err 193 } 194 if n != len(req.data) { 195 return 0, linuxerr.EIO 196 } 197 fd.queue.Remove(req) 198 // Remove noReply ones from the map of requests expecting a reply. 199 if req.noReply { 200 fd.numActiveRequests-- 201 delete(fd.completions, req.hdr.Unique) 202 } 203 return int64(n), nil 204 } 205 206 // PWrite implements vfs.FileDescriptionImpl.PWrite. 207 func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 208 // Operations on /dev/fuse don't make sense until a FUSE filesystem is 209 // mounted. If there is an active connection we know there is at least one 210 // filesystem mounted. 211 fd.mu.Lock() 212 defer fd.mu.Unlock() 213 if !fd.connected() { 214 return 0, linuxerr.EPERM 215 } 216 217 return 0, linuxerr.ENOSYS 218 } 219 220 // Write implements vfs.FileDescriptionImpl.Write. 221 func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 222 fd.mu.Lock() 223 defer fd.mu.Unlock() 224 if !fd.connected() { 225 return 0, linuxerr.EPERM 226 } 227 228 if _, err := src.CopyIn(ctx, fd.writeBuf[:]); err != nil { 229 return 0, err 230 } 231 var hdr linux.FUSEHeaderOut 232 hdr.UnmarshalBytes(fd.writeBuf[:]) 233 234 fut, ok := fd.completions[hdr.Unique] 235 if !ok { 236 // Server sent us a response for a request we never sent, or for which we 237 // already received a reply (e.g. aborted), an unlikely event. 238 return 0, linuxerr.EINVAL 239 } 240 delete(fd.completions, hdr.Unique) 241 242 // Copy over the header into the future response. The rest of the payload 243 // will be copied over to the FR's data in the next iteration. 244 fut.hdr = &hdr 245 fut.data = make([]byte, fut.hdr.Len) 246 n, err := src.CopyIn(ctx, fut.data) 247 if err != nil { 248 return 0, err 249 } 250 if err := fd.sendResponse(ctx, fut); err != nil { 251 return 0, err 252 } 253 return int64(n), nil 254 } 255 256 // Readiness implements vfs.FileDescriptionImpl.Readiness. 257 func (fd *DeviceFD) Readiness(mask waiter.EventMask) waiter.EventMask { 258 fd.mu.Lock() 259 defer fd.mu.Unlock() 260 var ready waiter.EventMask 261 262 if !fd.connected() { 263 ready |= waiter.EventErr 264 return ready & mask 265 } 266 267 // FD is always writable. 268 ready |= waiter.WritableEvents 269 if !fd.queue.Empty() { 270 // Have reqs available, FD is readable. 271 ready |= waiter.ReadableEvents 272 } 273 274 return ready & mask 275 } 276 277 // EventRegister implements waiter.Waitable.EventRegister. 278 func (fd *DeviceFD) EventRegister(e *waiter.Entry) error { 279 fd.mu.Lock() 280 defer fd.mu.Unlock() 281 fd.waitQueue.EventRegister(e) 282 return nil 283 } 284 285 // EventUnregister implements waiter.Waitable.EventUnregister. 286 func (fd *DeviceFD) EventUnregister(e *waiter.Entry) { 287 fd.mu.Lock() 288 defer fd.mu.Unlock() 289 fd.waitQueue.EventUnregister(e) 290 } 291 292 // Epollable implements FileDescriptionImpl.Epollable. 293 func (fd *DeviceFD) Epollable() bool { 294 return true 295 } 296 297 // Seek implements vfs.FileDescriptionImpl.Seek. 298 func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 299 // Operations on /dev/fuse don't make sense until a FUSE filesystem is 300 // mounted. If there is an active connection we know there is at least one 301 // filesystem mounted. 302 fd.mu.Lock() 303 defer fd.mu.Unlock() 304 if !fd.connected() { 305 return 0, linuxerr.EPERM 306 } 307 308 return 0, linuxerr.ENOSYS 309 } 310 311 // sendResponse sends a response to the waiting task (if any). 312 // 313 // +checklocks:fd.mu 314 func (fd *DeviceFD) sendResponse(ctx context.Context, fut *futureResponse) error { 315 // Signal the task waiting on a response if any. 316 defer close(fut.ch) 317 318 // Signal that the queue is no longer full. 319 select { 320 case fd.fullQueueCh <- struct{}{}: 321 default: 322 } 323 fd.numActiveRequests-- 324 325 if fut.async { 326 return fd.asyncCallBack(ctx, fut.getResponse()) 327 } 328 329 return nil 330 } 331 332 // sendError sends an error response to the waiting task (if any) by calling sendResponse(). 333 // 334 // +checklocks:fd.mu 335 func (fd *DeviceFD) sendError(ctx context.Context, errno int32, unique linux.FUSEOpID) error { 336 // Return the error to the calling task. 337 respHdr := linux.FUSEHeaderOut{ 338 Len: linux.SizeOfFUSEHeaderOut, 339 Error: errno, 340 Unique: unique, 341 } 342 343 fut, ok := fd.completions[respHdr.Unique] 344 if !ok { 345 // A response for a request we never sent, 346 // or for which we already received a reply (e.g. aborted). 347 return linuxerr.EINVAL 348 } 349 delete(fd.completions, respHdr.Unique) 350 351 fut.hdr = &respHdr 352 return fd.sendResponse(ctx, fut) 353 } 354 355 // asyncCallBack executes pre-defined callback function for async requests. 356 // Currently used by: FUSE_INIT. 357 // +checklocks:fd.mu 358 func (fd *DeviceFD) asyncCallBack(ctx context.Context, r *Response) error { 359 switch r.opcode { 360 case linux.FUSE_INIT: 361 creds := auth.CredentialsFromContext(ctx) 362 rootUserNs := kernel.KernelFromContext(ctx).RootUserNamespace() 363 return fd.conn.InitRecv(r, creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, rootUserNs)) 364 // TODO(gvisor.dev/issue/3247): support async read: correctly process the response. 365 } 366 367 return nil 368 }