github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/kernel/fd_table.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 import ( 18 "fmt" 19 "math" 20 "strings" 21 22 "golang.org/x/sys/unix" 23 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 24 "github.com/nicocha30/gvisor-ligolo/pkg/bitmap" 25 "github.com/nicocha30/gvisor-ligolo/pkg/context" 26 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 27 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/lock" 28 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/limits" 29 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 30 ) 31 32 // FDFlags define flags for an individual descriptor. 33 // 34 // +stateify savable 35 type FDFlags struct { 36 // CloseOnExec indicates the descriptor should be closed on exec. 37 CloseOnExec bool 38 } 39 40 // ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags 41 // representation. 42 func (f FDFlags) ToLinuxFileFlags() (mask uint) { 43 if f.CloseOnExec { 44 mask |= linux.O_CLOEXEC 45 } 46 return 47 } 48 49 // ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags 50 // representation. 51 func (f FDFlags) ToLinuxFDFlags() (mask uint) { 52 if f.CloseOnExec { 53 mask |= linux.FD_CLOEXEC 54 } 55 return 56 } 57 58 // descriptor holds the details about a file descriptor, namely a pointer to 59 // the file itself and the descriptor flags. 60 // 61 // Note that this is immutable and can only be changed via operations on the 62 // descriptorTable. 63 // 64 // +stateify savable 65 type descriptor struct { 66 file *vfs.FileDescription 67 flags FDFlags 68 } 69 70 // MaxFdLimit defines the upper limit on the integer value of file descriptors. 71 const MaxFdLimit int32 = int32(bitmap.MaxBitEntryLimit) 72 73 // FDTable is used to manage File references and flags. 74 // 75 // +stateify savable 76 type FDTable struct { 77 FDTableRefs 78 79 k *Kernel 80 81 // mu protects below. 82 mu fdTableMutex `state:"nosave"` 83 84 // fdBitmap shows which fds are already in use. 85 fdBitmap bitmap.Bitmap `state:"nosave"` 86 87 // descriptorTable holds descriptors. 88 descriptorTable `state:".(map[int32]descriptor)"` 89 } 90 91 func (f *FDTable) saveDescriptorTable() map[int32]descriptor { 92 m := make(map[int32]descriptor) 93 f.mu.Lock() 94 defer f.mu.Unlock() 95 f.forEach(context.Background(), func(fd int32, file *vfs.FileDescription, flags FDFlags) { 96 m[fd] = descriptor{ 97 file: file, 98 flags: flags, 99 } 100 }) 101 return m 102 } 103 104 func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) { 105 ctx := context.Background() 106 f.initNoLeakCheck() // Initialize table. 107 f.fdBitmap = bitmap.New(uint32(math.MaxUint16)) 108 for fd, d := range m { 109 if fd < 0 { 110 panic(fmt.Sprintf("FD is not supposed to be negative. FD: %d", fd)) 111 } 112 113 if file := f.set(fd, d.file, d.flags); file != nil { 114 panic("file set") 115 } 116 f.fdBitmap.Add(uint32(fd)) 117 // Note that we do _not_ need to acquire a extra table reference here. The 118 // table reference will already be accounted for in the file, so we drop the 119 // reference taken by set above. 120 if d.file != nil { 121 d.file.DecRef(ctx) 122 } 123 } 124 } 125 126 // drop drops the table reference. 127 func (f *FDTable) drop(ctx context.Context, file *vfs.FileDescription) { 128 // Release any POSIX lock possibly held by the FDTable. 129 if file.SupportsLocks() { 130 err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF}) 131 if err != nil && !linuxerr.Equals(linuxerr.ENOLCK, err) { 132 panic(fmt.Sprintf("UnlockPOSIX failed: %v", err)) 133 } 134 } 135 136 // Drop the table's reference. 137 file.DecRef(ctx) 138 } 139 140 // NewFDTable allocates a new FDTable that may be used by tasks in k. 141 func (k *Kernel) NewFDTable() *FDTable { 142 f := &FDTable{k: k} 143 f.init() 144 return f 145 } 146 147 // DecRef implements RefCounter.DecRef. 148 // 149 // If f reaches zero references, all of its file descriptors are removed. 150 func (f *FDTable) DecRef(ctx context.Context) { 151 f.FDTableRefs.DecRef(func() { 152 f.RemoveIf(ctx, func(*vfs.FileDescription, FDFlags) bool { 153 return true 154 }) 155 }) 156 } 157 158 // forEachUpTo iterates over all non-nil files upto maxFds (non-inclusive) in sorted order. 159 // 160 // It is the caller's responsibility to acquire an appropriate lock. 161 func (f *FDTable) forEachUpTo(ctx context.Context, maxFd int32, fn func(fd int32, file *vfs.FileDescription, flags FDFlags)) { 162 // Iterate through the fdBitmap. 163 f.fdBitmap.ForEach(0, uint32(maxFd), func(ufd uint32) bool { 164 fd := int32(ufd) 165 file, flags, ok := f.get(fd) 166 if !ok { 167 return true 168 } 169 if file != nil { 170 if !file.TryIncRef() { 171 return true 172 } 173 fn(fd, file, flags) 174 file.DecRef(ctx) 175 } 176 return true 177 }) 178 } 179 180 // forEach iterates over all non-nil files upto maxFd in sorted order. 181 // 182 // It is the caller's responsibility to acquire an appropriate lock. 183 func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *vfs.FileDescription, flags FDFlags)) { 184 f.forEachUpTo(ctx, MaxFdLimit, fn) 185 } 186 187 // String is a stringer for FDTable. 188 func (f *FDTable) String() string { 189 var buf strings.Builder 190 ctx := context.Background() 191 files := make(map[int32]*vfs.FileDescription) 192 f.mu.Lock() 193 // Can't release f.mu from defer, because vfsObj.PathnameWithDeleted 194 // should not be called under the fdtable mutex. 195 f.forEach(ctx, func(fd int32, file *vfs.FileDescription, flags FDFlags) { 196 if file != nil { 197 file.IncRef() 198 files[fd] = file 199 } 200 }) 201 f.mu.Unlock() 202 defer func() { 203 for _, f := range files { 204 f.DecRef(ctx) 205 } 206 }() 207 208 for fd, file := range files { 209 vfsObj := file.Mount().Filesystem().VirtualFilesystem() 210 vd := file.VirtualDentry() 211 if vd.Dentry() == nil { 212 panic(fmt.Sprintf("fd %d (type %T) has nil dentry: %#v", fd, file.Impl(), file)) 213 } 214 name, err := vfsObj.PathnameWithDeleted(ctx, vfs.VirtualDentry{}, file.VirtualDentry()) 215 if err != nil { 216 fmt.Fprintf(&buf, "<err: %v>\n", err) 217 continue 218 } 219 fmt.Fprintf(&buf, "\tfd:%d => name %s\n", fd, name) 220 } 221 return buf.String() 222 } 223 224 // NewFDs allocates new FDs guaranteed to be the lowest number available 225 // greater than or equal to the minFD parameter. All files will share the set 226 // flags. Success is guaranteed to be all or none. 227 func (f *FDTable) NewFDs(ctx context.Context, minFD int32, files []*vfs.FileDescription, flags FDFlags) (fds []int32, err error) { 228 if minFD < 0 { 229 // Don't accept negative FDs. 230 return nil, unix.EINVAL 231 } 232 233 // Default limit. 234 end := MaxFdLimit 235 236 // Ensure we don't get past the provided limit. 237 if limitSet := limits.FromContext(ctx); limitSet != nil { 238 lim := limitSet.Get(limits.NumberOfFiles) 239 // Only set if the limit is smaller than the max to avoid overflow. 240 if lim.Cur != limits.Infinity && lim.Cur < uint64(MaxFdLimit) { 241 end = int32(lim.Cur) 242 } 243 if minFD+int32(len(files)) > end { 244 return nil, unix.EMFILE 245 } 246 } 247 248 f.mu.Lock() 249 250 // max is used as the largest number in fdBitmap + 1. 251 max := int32(0) 252 if !f.fdBitmap.IsEmpty() { 253 max = int32(f.fdBitmap.Maximum()) 254 max++ 255 } 256 257 // Adjust max in case it is less than minFD. 258 if max < minFD { 259 max = minFD 260 } 261 // Install all entries. 262 for len(fds) < len(files) { 263 // Try to use free bit in fdBitmap. 264 // If all bits in fdBitmap are used, expand fd to the max. 265 fd, err := f.fdBitmap.FirstZero(uint32(minFD)) 266 if err != nil { 267 fd = uint32(max) 268 max++ 269 } 270 if fd >= uint32(end) { 271 break 272 } 273 f.fdBitmap.Add(fd) 274 f.set(int32(fd), files[len(fds)], flags) 275 fds = append(fds, int32(fd)) 276 minFD = int32(fd) 277 } 278 279 // Failure? Unwind existing FDs. 280 if len(fds) < len(files) { 281 for _, i := range fds { 282 f.set(i, nil, FDFlags{}) 283 f.fdBitmap.Remove(uint32(i)) 284 } 285 f.mu.Unlock() 286 287 // Drop the reference taken by the call to f.set() that 288 // originally installed the file. Don't call f.drop() 289 // (generating inotify events, etc.) since the file should 290 // appear to have never been inserted into f. 291 for _, file := range files[:len(fds)] { 292 file.DecRef(ctx) 293 } 294 return nil, unix.EMFILE 295 } 296 297 f.mu.Unlock() 298 return fds, nil 299 } 300 301 // NewFD allocates a file descriptor greater than or equal to minFD for 302 // the given file description. If it succeeds, it takes a reference on file. 303 func (f *FDTable) NewFD(ctx context.Context, minFD int32, file *vfs.FileDescription, flags FDFlags) (int32, error) { 304 files := []*vfs.FileDescription{file} 305 fileSlice, error := f.NewFDs(ctx, minFD, files, flags) 306 if error != nil { 307 return -1, error 308 } 309 return fileSlice[0], nil 310 } 311 312 // NewFDAt sets the file reference for the given FD. If there is an active 313 // reference for that FD, the ref count for that existing reference is 314 // decremented. 315 func (f *FDTable) NewFDAt(ctx context.Context, fd int32, file *vfs.FileDescription, flags FDFlags) error { 316 df, err := f.newFDAt(ctx, fd, file, flags) 317 if err != nil { 318 return err 319 } 320 if df != nil { 321 f.drop(ctx, df) 322 } 323 return nil 324 } 325 326 func (f *FDTable) newFDAt(ctx context.Context, fd int32, file *vfs.FileDescription, flags FDFlags) (*vfs.FileDescription, error) { 327 if fd < 0 { 328 // Don't accept negative FDs. 329 return nil, unix.EBADF 330 } 331 332 // Check the limit for the provided file. 333 if limitSet := limits.FromContext(ctx); limitSet != nil { 334 if lim := limitSet.Get(limits.NumberOfFiles); lim.Cur != limits.Infinity && uint64(fd) >= lim.Cur { 335 return nil, unix.EMFILE 336 } 337 } 338 339 // Install the entry. 340 f.mu.Lock() 341 defer f.mu.Unlock() 342 343 df := f.set(fd, file, flags) 344 // Add fd to fdBitmap. 345 if file != nil { 346 f.fdBitmap.Add(uint32(fd)) 347 } 348 349 return df, nil 350 } 351 352 // SetFlags sets the flags for the given file descriptor. 353 // 354 // True is returned iff flags were changed. 355 func (f *FDTable) SetFlags(ctx context.Context, fd int32, flags FDFlags) error { 356 if fd < 0 { 357 // Don't accept negative FDs. 358 return unix.EBADF 359 } 360 361 f.mu.Lock() 362 defer f.mu.Unlock() 363 364 file, _, _ := f.get(fd) 365 if file == nil { 366 // No file found. 367 return unix.EBADF 368 } 369 370 // Update the flags. 371 f.set(fd, file, flags) 372 return nil 373 } 374 375 // SetFlagsForRange sets the flags for the given range of file descriptors 376 // (inclusive: [startFd, endFd]). 377 func (f *FDTable) SetFlagsForRange(ctx context.Context, startFd int32, endFd int32, flags FDFlags) error { 378 if startFd < 0 || startFd > endFd { 379 return unix.EBADF 380 } 381 382 f.mu.Lock() 383 defer f.mu.Unlock() 384 385 for fd, err := f.fdBitmap.FirstOne(uint32(startFd)); err == nil && fd <= uint32(endFd); fd, err = f.fdBitmap.FirstOne(fd + 1) { 386 fdI32 := int32(fd) 387 file, _, _ := f.get(fdI32) 388 f.set(fdI32, file, flags) 389 } 390 391 return nil 392 } 393 394 // Get returns a reference to the file and the flags for the FD or nil if no 395 // file is defined for the given fd. 396 // 397 // N.B. Callers are required to use DecRef when they are done. 398 // 399 //go:nosplit 400 func (f *FDTable) Get(fd int32) (*vfs.FileDescription, FDFlags) { 401 if fd < 0 { 402 return nil, FDFlags{} 403 } 404 405 for { 406 file, flags, _ := f.get(fd) 407 if file != nil { 408 if !file.TryIncRef() { 409 continue // Race caught. 410 } 411 // Reference acquired. 412 return file, flags 413 } 414 // No file available. 415 return nil, FDFlags{} 416 } 417 } 418 419 // GetFDs returns a sorted list of valid fds. 420 // 421 // Precondition: The caller must be running on the task goroutine, or Task.mu 422 // must be locked. 423 func (f *FDTable) GetFDs(ctx context.Context) []int32 { 424 f.mu.Lock() 425 defer f.mu.Unlock() 426 fds := make([]int32, 0, int(f.fdBitmap.GetNumOnes())) 427 f.forEach(ctx, func(fd int32, _ *vfs.FileDescription, _ FDFlags) { 428 fds = append(fds, fd) 429 }) 430 return fds 431 } 432 433 // Exists returns whether fd is defined in the table. It is inherently racy. 434 // 435 //go:nosplit 436 func (f *FDTable) Exists(fd int32) bool { 437 if fd < 0 { 438 return false 439 } 440 file, _, _ := f.get(fd) 441 return file != nil 442 } 443 444 // Fork returns an independent FDTable, cloning all FDs up to maxFds (non-inclusive). 445 func (f *FDTable) Fork(ctx context.Context, maxFd int32) *FDTable { 446 clone := f.k.NewFDTable() 447 f.mu.Lock() 448 defer f.mu.Unlock() 449 f.forEachUpTo(ctx, maxFd, func(fd int32, file *vfs.FileDescription, flags FDFlags) { 450 // The set function here will acquire an appropriate table 451 // reference for the clone. We don't need anything else. 452 if df := clone.set(fd, file, flags); df != nil { 453 panic("file set") 454 } 455 clone.fdBitmap.Add(uint32(fd)) 456 }) 457 return clone 458 } 459 460 // Remove removes an FD from and returns a tuple where one of the files is non-nil 461 // iff successful. 462 // 463 // N.B. Callers are required to use DecRef on the returned file when they are done. 464 func (f *FDTable) Remove(ctx context.Context, fd int32) *vfs.FileDescription { 465 if fd < 0 { 466 return nil 467 } 468 469 f.mu.Lock() 470 file, _, _ := f.get(fd) 471 if file != nil { 472 // Add reference for caller. 473 file.IncRef() 474 file = f.set(fd, nil, FDFlags{}) // Zap entry. 475 f.fdBitmap.Remove(uint32(fd)) 476 } 477 f.mu.Unlock() 478 479 if file != nil { 480 f.drop(ctx, file) 481 } 482 return file 483 } 484 485 // RemoveIf removes all FDs where cond is true. 486 func (f *FDTable) RemoveIf(ctx context.Context, cond func(*vfs.FileDescription, FDFlags) bool) { 487 var files []*vfs.FileDescription 488 489 f.mu.Lock() 490 f.forEach(ctx, func(fd int32, file *vfs.FileDescription, flags FDFlags) { 491 if cond(file, flags) { 492 df := f.set(fd, nil, FDFlags{}) // Clear from table. 493 f.fdBitmap.Remove(uint32(fd)) 494 if df != nil { 495 files = append(files, df) 496 } 497 } 498 }) 499 f.mu.Unlock() 500 501 for _, file := range files { 502 f.drop(ctx, file) 503 } 504 } 505 506 // RemoveNextInRange removes the next FD that falls within the given range, 507 // and returns a tuple where one of the files is non-nil iff successful. 508 // 509 // N.B. Callers are required to use DecRef on the returned file when they are done. 510 func (f *FDTable) RemoveNextInRange(ctx context.Context, startFd int32, endFd int32) (int32, *vfs.FileDescription) { 511 if startFd < 0 || startFd > endFd { 512 return MaxFdLimit, nil 513 } 514 515 f.mu.Lock() 516 517 fdUint, err := f.fdBitmap.FirstOne(uint32(startFd)) 518 fd := int32(fdUint) 519 if err != nil || fd > endFd { 520 f.mu.Unlock() 521 return MaxFdLimit, nil 522 } 523 file, _, _ := f.get(fd) 524 525 if file != nil { 526 // Add reference for caller. 527 file.IncRef() 528 file = f.set(fd, nil, FDFlags{}) // Zap entry. 529 f.fdBitmap.Remove(uint32(fd)) 530 } 531 f.mu.Unlock() 532 533 if file != nil { 534 f.drop(ctx, file) 535 } 536 537 return fd, file 538 } 539 540 // GetLastFd returns the last set FD in the FDTable bitmap. 541 func (f *FDTable) GetLastFd() int32 { 542 f.mu.Lock() 543 defer f.mu.Unlock() 544 545 last := f.fdBitmap.Maximum() 546 if last > bitmap.MaxBitEntryLimit { 547 return MaxFdLimit 548 } 549 return int32(last) 550 }