github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/kernel/fd_table.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  	"strings"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/bitmap"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/lock"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/limits"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs"
    30  )
    31  
    32  // FDFlags define flags for an individual descriptor.
    33  //
    34  // +stateify savable
    35  type FDFlags struct {
    36  	// CloseOnExec indicates the descriptor should be closed on exec.
    37  	CloseOnExec bool
    38  }
    39  
    40  // ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags
    41  // representation.
    42  func (f FDFlags) ToLinuxFileFlags() (mask uint) {
    43  	if f.CloseOnExec {
    44  		mask |= linux.O_CLOEXEC
    45  	}
    46  	return
    47  }
    48  
    49  // ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags
    50  // representation.
    51  func (f FDFlags) ToLinuxFDFlags() (mask uint) {
    52  	if f.CloseOnExec {
    53  		mask |= linux.FD_CLOEXEC
    54  	}
    55  	return
    56  }
    57  
    58  // descriptor holds the details about a file descriptor, namely a pointer to
    59  // the file itself and the descriptor flags.
    60  //
    61  // Note that this is immutable and can only be changed via operations on the
    62  // descriptorTable.
    63  //
    64  // +stateify savable
    65  type descriptor struct {
    66  	file  *vfs.FileDescription
    67  	flags FDFlags
    68  }
    69  
    70  // MaxFdLimit defines the upper limit on the integer value of file descriptors.
    71  const MaxFdLimit int32 = int32(bitmap.MaxBitEntryLimit)
    72  
    73  // FDTable is used to manage File references and flags.
    74  //
    75  // +stateify savable
    76  type FDTable struct {
    77  	FDTableRefs
    78  
    79  	k *Kernel
    80  
    81  	// mu protects below.
    82  	mu fdTableMutex `state:"nosave"`
    83  
    84  	// fdBitmap shows which fds are already in use.
    85  	fdBitmap bitmap.Bitmap `state:"nosave"`
    86  
    87  	// descriptorTable holds descriptors.
    88  	descriptorTable `state:".(map[int32]descriptor)"`
    89  }
    90  
    91  func (f *FDTable) saveDescriptorTable() map[int32]descriptor {
    92  	m := make(map[int32]descriptor)
    93  	f.mu.Lock()
    94  	defer f.mu.Unlock()
    95  	f.forEach(context.Background(), func(fd int32, file *vfs.FileDescription, flags FDFlags) {
    96  		m[fd] = descriptor{
    97  			file:  file,
    98  			flags: flags,
    99  		}
   100  	})
   101  	return m
   102  }
   103  
   104  func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) {
   105  	ctx := context.Background()
   106  	f.initNoLeakCheck() // Initialize table.
   107  	f.fdBitmap = bitmap.New(uint32(math.MaxUint16))
   108  	for fd, d := range m {
   109  		if fd < 0 {
   110  			panic(fmt.Sprintf("FD is not supposed to be negative. FD: %d", fd))
   111  		}
   112  
   113  		if file := f.set(fd, d.file, d.flags); file != nil {
   114  			panic("file set")
   115  		}
   116  		f.fdBitmap.Add(uint32(fd))
   117  		// Note that we do _not_ need to acquire a extra table reference here. The
   118  		// table reference will already be accounted for in the file, so we drop the
   119  		// reference taken by set above.
   120  		if d.file != nil {
   121  			d.file.DecRef(ctx)
   122  		}
   123  	}
   124  }
   125  
   126  // drop drops the table reference.
   127  func (f *FDTable) drop(ctx context.Context, file *vfs.FileDescription) {
   128  	// Release any POSIX lock possibly held by the FDTable.
   129  	if file.SupportsLocks() {
   130  		err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF})
   131  		if err != nil && !linuxerr.Equals(linuxerr.ENOLCK, err) {
   132  			panic(fmt.Sprintf("UnlockPOSIX failed: %v", err))
   133  		}
   134  	}
   135  
   136  	// Drop the table's reference.
   137  	file.DecRef(ctx)
   138  }
   139  
   140  // NewFDTable allocates a new FDTable that may be used by tasks in k.
   141  func (k *Kernel) NewFDTable() *FDTable {
   142  	f := &FDTable{k: k}
   143  	f.init()
   144  	return f
   145  }
   146  
   147  // DecRef implements RefCounter.DecRef.
   148  //
   149  // If f reaches zero references, all of its file descriptors are removed.
   150  func (f *FDTable) DecRef(ctx context.Context) {
   151  	f.FDTableRefs.DecRef(func() {
   152  		f.RemoveIf(ctx, func(*vfs.FileDescription, FDFlags) bool {
   153  			return true
   154  		})
   155  	})
   156  }
   157  
   158  // forEachUpTo iterates over all non-nil files upto maxFds (non-inclusive) in sorted order.
   159  //
   160  // It is the caller's responsibility to acquire an appropriate lock.
   161  func (f *FDTable) forEachUpTo(ctx context.Context, maxFd int32, fn func(fd int32, file *vfs.FileDescription, flags FDFlags)) {
   162  	// Iterate through the fdBitmap.
   163  	f.fdBitmap.ForEach(0, uint32(maxFd), func(ufd uint32) bool {
   164  		fd := int32(ufd)
   165  		file, flags, ok := f.get(fd)
   166  		if !ok {
   167  			return true
   168  		}
   169  		if file != nil {
   170  			if !file.TryIncRef() {
   171  				return true
   172  			}
   173  			fn(fd, file, flags)
   174  			file.DecRef(ctx)
   175  		}
   176  		return true
   177  	})
   178  }
   179  
   180  // forEach iterates over all non-nil files upto maxFd in sorted order.
   181  //
   182  // It is the caller's responsibility to acquire an appropriate lock.
   183  func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *vfs.FileDescription, flags FDFlags)) {
   184  	f.forEachUpTo(ctx, MaxFdLimit, fn)
   185  }
   186  
   187  // String is a stringer for FDTable.
   188  func (f *FDTable) String() string {
   189  	var buf strings.Builder
   190  	ctx := context.Background()
   191  	files := make(map[int32]*vfs.FileDescription)
   192  	f.mu.Lock()
   193  	// Can't release f.mu from defer, because vfsObj.PathnameWithDeleted
   194  	// should not be called under the fdtable mutex.
   195  	f.forEach(ctx, func(fd int32, file *vfs.FileDescription, flags FDFlags) {
   196  		if file != nil {
   197  			file.IncRef()
   198  			files[fd] = file
   199  		}
   200  	})
   201  	f.mu.Unlock()
   202  	defer func() {
   203  		for _, f := range files {
   204  			f.DecRef(ctx)
   205  		}
   206  	}()
   207  
   208  	for fd, file := range files {
   209  		vfsObj := file.Mount().Filesystem().VirtualFilesystem()
   210  		vd := file.VirtualDentry()
   211  		if vd.Dentry() == nil {
   212  			panic(fmt.Sprintf("fd %d (type %T) has nil dentry: %#v", fd, file.Impl(), file))
   213  		}
   214  		name, err := vfsObj.PathnameWithDeleted(ctx, vfs.VirtualDentry{}, file.VirtualDentry())
   215  		if err != nil {
   216  			fmt.Fprintf(&buf, "<err: %v>\n", err)
   217  			continue
   218  		}
   219  		fmt.Fprintf(&buf, "\tfd:%d => name %s\n", fd, name)
   220  	}
   221  	return buf.String()
   222  }
   223  
   224  // NewFDs allocates new FDs guaranteed to be the lowest number available
   225  // greater than or equal to the minFD parameter. All files will share the set
   226  // flags. Success is guaranteed to be all or none.
   227  func (f *FDTable) NewFDs(ctx context.Context, minFD int32, files []*vfs.FileDescription, flags FDFlags) (fds []int32, err error) {
   228  	if minFD < 0 {
   229  		// Don't accept negative FDs.
   230  		return nil, unix.EINVAL
   231  	}
   232  
   233  	// Default limit.
   234  	end := MaxFdLimit
   235  
   236  	// Ensure we don't get past the provided limit.
   237  	if limitSet := limits.FromContext(ctx); limitSet != nil {
   238  		lim := limitSet.Get(limits.NumberOfFiles)
   239  		// Only set if the limit is smaller than the max to avoid overflow.
   240  		if lim.Cur != limits.Infinity && lim.Cur < uint64(MaxFdLimit) {
   241  			end = int32(lim.Cur)
   242  		}
   243  		if minFD+int32(len(files)) > end {
   244  			return nil, unix.EMFILE
   245  		}
   246  	}
   247  
   248  	f.mu.Lock()
   249  
   250  	// max is used as the largest number in fdBitmap + 1.
   251  	max := int32(0)
   252  	if !f.fdBitmap.IsEmpty() {
   253  		max = int32(f.fdBitmap.Maximum())
   254  		max++
   255  	}
   256  
   257  	// Adjust max in case it is less than minFD.
   258  	if max < minFD {
   259  		max = minFD
   260  	}
   261  	// Install all entries.
   262  	for len(fds) < len(files) {
   263  		// Try to use free bit in fdBitmap.
   264  		// If all bits in fdBitmap are used, expand fd to the max.
   265  		fd, err := f.fdBitmap.FirstZero(uint32(minFD))
   266  		if err != nil {
   267  			fd = uint32(max)
   268  			max++
   269  		}
   270  		if fd >= uint32(end) {
   271  			break
   272  		}
   273  		f.fdBitmap.Add(fd)
   274  		f.set(int32(fd), files[len(fds)], flags)
   275  		fds = append(fds, int32(fd))
   276  		minFD = int32(fd)
   277  	}
   278  
   279  	// Failure? Unwind existing FDs.
   280  	if len(fds) < len(files) {
   281  		for _, i := range fds {
   282  			f.set(i, nil, FDFlags{})
   283  			f.fdBitmap.Remove(uint32(i))
   284  		}
   285  		f.mu.Unlock()
   286  
   287  		// Drop the reference taken by the call to f.set() that
   288  		// originally installed the file. Don't call f.drop()
   289  		// (generating inotify events, etc.) since the file should
   290  		// appear to have never been inserted into f.
   291  		for _, file := range files[:len(fds)] {
   292  			file.DecRef(ctx)
   293  		}
   294  		return nil, unix.EMFILE
   295  	}
   296  
   297  	f.mu.Unlock()
   298  	return fds, nil
   299  }
   300  
   301  // NewFD allocates a file descriptor greater than or equal to minFD for
   302  // the given file description. If it succeeds, it takes a reference on file.
   303  func (f *FDTable) NewFD(ctx context.Context, minFD int32, file *vfs.FileDescription, flags FDFlags) (int32, error) {
   304  	files := []*vfs.FileDescription{file}
   305  	fileSlice, error := f.NewFDs(ctx, minFD, files, flags)
   306  	if error != nil {
   307  		return -1, error
   308  	}
   309  	return fileSlice[0], nil
   310  }
   311  
   312  // NewFDAt sets the file reference for the given FD. If there is an active
   313  // reference for that FD, the ref count for that existing reference is
   314  // decremented.
   315  func (f *FDTable) NewFDAt(ctx context.Context, fd int32, file *vfs.FileDescription, flags FDFlags) error {
   316  	df, err := f.newFDAt(ctx, fd, file, flags)
   317  	if err != nil {
   318  		return err
   319  	}
   320  	if df != nil {
   321  		f.drop(ctx, df)
   322  	}
   323  	return nil
   324  }
   325  
   326  func (f *FDTable) newFDAt(ctx context.Context, fd int32, file *vfs.FileDescription, flags FDFlags) (*vfs.FileDescription, error) {
   327  	if fd < 0 {
   328  		// Don't accept negative FDs.
   329  		return nil, unix.EBADF
   330  	}
   331  
   332  	// Check the limit for the provided file.
   333  	if limitSet := limits.FromContext(ctx); limitSet != nil {
   334  		if lim := limitSet.Get(limits.NumberOfFiles); lim.Cur != limits.Infinity && uint64(fd) >= lim.Cur {
   335  			return nil, unix.EMFILE
   336  		}
   337  	}
   338  
   339  	// Install the entry.
   340  	f.mu.Lock()
   341  	defer f.mu.Unlock()
   342  
   343  	df := f.set(fd, file, flags)
   344  	// Add fd to fdBitmap.
   345  	if file != nil {
   346  		f.fdBitmap.Add(uint32(fd))
   347  	}
   348  
   349  	return df, nil
   350  }
   351  
   352  // SetFlags sets the flags for the given file descriptor.
   353  //
   354  // True is returned iff flags were changed.
   355  func (f *FDTable) SetFlags(ctx context.Context, fd int32, flags FDFlags) error {
   356  	if fd < 0 {
   357  		// Don't accept negative FDs.
   358  		return unix.EBADF
   359  	}
   360  
   361  	f.mu.Lock()
   362  	defer f.mu.Unlock()
   363  
   364  	file, _, _ := f.get(fd)
   365  	if file == nil {
   366  		// No file found.
   367  		return unix.EBADF
   368  	}
   369  
   370  	// Update the flags.
   371  	f.set(fd, file, flags)
   372  	return nil
   373  }
   374  
   375  // SetFlagsForRange sets the flags for the given range of file descriptors
   376  // (inclusive: [startFd, endFd]).
   377  func (f *FDTable) SetFlagsForRange(ctx context.Context, startFd int32, endFd int32, flags FDFlags) error {
   378  	if startFd < 0 || startFd > endFd {
   379  		return unix.EBADF
   380  	}
   381  
   382  	f.mu.Lock()
   383  	defer f.mu.Unlock()
   384  
   385  	for fd, err := f.fdBitmap.FirstOne(uint32(startFd)); err == nil && fd <= uint32(endFd); fd, err = f.fdBitmap.FirstOne(fd + 1) {
   386  		fdI32 := int32(fd)
   387  		file, _, _ := f.get(fdI32)
   388  		f.set(fdI32, file, flags)
   389  	}
   390  
   391  	return nil
   392  }
   393  
   394  // Get returns a reference to the file and the flags for the FD or nil if no
   395  // file is defined for the given fd.
   396  //
   397  // N.B. Callers are required to use DecRef when they are done.
   398  //
   399  //go:nosplit
   400  func (f *FDTable) Get(fd int32) (*vfs.FileDescription, FDFlags) {
   401  	if fd < 0 {
   402  		return nil, FDFlags{}
   403  	}
   404  
   405  	for {
   406  		file, flags, _ := f.get(fd)
   407  		if file != nil {
   408  			if !file.TryIncRef() {
   409  				continue // Race caught.
   410  			}
   411  			// Reference acquired.
   412  			return file, flags
   413  		}
   414  		// No file available.
   415  		return nil, FDFlags{}
   416  	}
   417  }
   418  
   419  // GetFDs returns a sorted list of valid fds.
   420  //
   421  // Precondition: The caller must be running on the task goroutine, or Task.mu
   422  // must be locked.
   423  func (f *FDTable) GetFDs(ctx context.Context) []int32 {
   424  	f.mu.Lock()
   425  	defer f.mu.Unlock()
   426  	fds := make([]int32, 0, int(f.fdBitmap.GetNumOnes()))
   427  	f.forEach(ctx, func(fd int32, _ *vfs.FileDescription, _ FDFlags) {
   428  		fds = append(fds, fd)
   429  	})
   430  	return fds
   431  }
   432  
   433  // Exists returns whether fd is defined in the table. It is inherently racy.
   434  //
   435  //go:nosplit
   436  func (f *FDTable) Exists(fd int32) bool {
   437  	if fd < 0 {
   438  		return false
   439  	}
   440  	file, _, _ := f.get(fd)
   441  	return file != nil
   442  }
   443  
   444  // Fork returns an independent FDTable, cloning all FDs up to maxFds (non-inclusive).
   445  func (f *FDTable) Fork(ctx context.Context, maxFd int32) *FDTable {
   446  	clone := f.k.NewFDTable()
   447  	f.mu.Lock()
   448  	defer f.mu.Unlock()
   449  	f.forEachUpTo(ctx, maxFd, func(fd int32, file *vfs.FileDescription, flags FDFlags) {
   450  		// The set function here will acquire an appropriate table
   451  		// reference for the clone. We don't need anything else.
   452  		if df := clone.set(fd, file, flags); df != nil {
   453  			panic("file set")
   454  		}
   455  		clone.fdBitmap.Add(uint32(fd))
   456  	})
   457  	return clone
   458  }
   459  
   460  // Remove removes an FD from and returns a tuple where one of the files is non-nil
   461  // iff successful.
   462  //
   463  // N.B. Callers are required to use DecRef on the returned file when they are done.
   464  func (f *FDTable) Remove(ctx context.Context, fd int32) *vfs.FileDescription {
   465  	if fd < 0 {
   466  		return nil
   467  	}
   468  
   469  	f.mu.Lock()
   470  	file, _, _ := f.get(fd)
   471  	if file != nil {
   472  		// Add reference for caller.
   473  		file.IncRef()
   474  		file = f.set(fd, nil, FDFlags{}) // Zap entry.
   475  		f.fdBitmap.Remove(uint32(fd))
   476  	}
   477  	f.mu.Unlock()
   478  
   479  	if file != nil {
   480  		f.drop(ctx, file)
   481  	}
   482  	return file
   483  }
   484  
   485  // RemoveIf removes all FDs where cond is true.
   486  func (f *FDTable) RemoveIf(ctx context.Context, cond func(*vfs.FileDescription, FDFlags) bool) {
   487  	var files []*vfs.FileDescription
   488  
   489  	f.mu.Lock()
   490  	f.forEach(ctx, func(fd int32, file *vfs.FileDescription, flags FDFlags) {
   491  		if cond(file, flags) {
   492  			df := f.set(fd, nil, FDFlags{}) // Clear from table.
   493  			f.fdBitmap.Remove(uint32(fd))
   494  			if df != nil {
   495  				files = append(files, df)
   496  			}
   497  		}
   498  	})
   499  	f.mu.Unlock()
   500  
   501  	for _, file := range files {
   502  		f.drop(ctx, file)
   503  	}
   504  }
   505  
   506  // RemoveNextInRange removes the next FD that falls within the given range,
   507  // and returns a tuple where one of the files is non-nil iff successful.
   508  //
   509  // N.B. Callers are required to use DecRef on the returned file when they are done.
   510  func (f *FDTable) RemoveNextInRange(ctx context.Context, startFd int32, endFd int32) (int32, *vfs.FileDescription) {
   511  	if startFd < 0 || startFd > endFd {
   512  		return MaxFdLimit, nil
   513  	}
   514  
   515  	f.mu.Lock()
   516  
   517  	fdUint, err := f.fdBitmap.FirstOne(uint32(startFd))
   518  	fd := int32(fdUint)
   519  	if err != nil || fd > endFd {
   520  		f.mu.Unlock()
   521  		return MaxFdLimit, nil
   522  	}
   523  	file, _, _ := f.get(fd)
   524  
   525  	if file != nil {
   526  		// Add reference for caller.
   527  		file.IncRef()
   528  		file = f.set(fd, nil, FDFlags{}) // Zap entry.
   529  		f.fdBitmap.Remove(uint32(fd))
   530  	}
   531  	f.mu.Unlock()
   532  
   533  	if file != nil {
   534  		f.drop(ctx, file)
   535  	}
   536  
   537  	return fd, file
   538  }
   539  
   540  // GetLastFd returns the last set FD in the FDTable bitmap.
   541  func (f *FDTable) GetLastFd() int32 {
   542  	f.mu.Lock()
   543  	defer f.mu.Unlock()
   544  
   545  	last := f.fdBitmap.Maximum()
   546  	if last > bitmap.MaxBitEntryLimit {
   547  		return MaxFdLimit
   548  	}
   549  	return int32(last)
   550  }