github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/inotify.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fs
    16  
    17  import (
    18  	"io"
    19  	"sync/atomic"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    22  	"github.com/SagerNet/gvisor/pkg/context"
    23  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    24  	"github.com/SagerNet/gvisor/pkg/hostarch"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/uniqueid"
    28  	"github.com/SagerNet/gvisor/pkg/sync"
    29  	"github.com/SagerNet/gvisor/pkg/syserror"
    30  	"github.com/SagerNet/gvisor/pkg/usermem"
    31  	"github.com/SagerNet/gvisor/pkg/waiter"
    32  )
    33  
    34  // Inotify represents an inotify instance created by inotify_init(2) or
    35  // inotify_init1(2). Inotify implements the FileOperations interface.
    36  //
    37  // Lock ordering:
    38  //   Inotify.mu -> Inode.Watches.mu -> Watch.mu -> Inotify.evMu
    39  //
    40  // +stateify savable
    41  type Inotify struct {
    42  	// Unique identifier for this inotify instance. We don't just reuse the
    43  	// inotify fd because fds can be duped. These should not be exposed to the
    44  	// user, since we may aggressively reuse an id on S/R.
    45  	id uint64
    46  
    47  	waiter.Queue `state:"nosave"`
    48  
    49  	// evMu *only* protects the events list. We need a separate lock because
    50  	// while queuing events, a watch needs to lock the event queue, and using mu
    51  	// for that would violate lock ordering since at that point the calling
    52  	// goroutine already holds Watch.target.Watches.mu.
    53  	evMu sync.Mutex `state:"nosave"`
    54  
    55  	// A list of pending events for this inotify instance. Protected by evMu.
    56  	events eventList
    57  
    58  	// A scratch buffer, use to serialize inotify events. Use allocate this
    59  	// ahead of time and reuse performance. Protected by evMu.
    60  	scratch []byte
    61  
    62  	// mu protects the fields below.
    63  	mu sync.Mutex `state:"nosave"`
    64  
    65  	// The next watch descriptor number to use for this inotify instance. Note
    66  	// that Linux starts numbering watch descriptors from 1.
    67  	nextWatch int32
    68  
    69  	// Map from watch descriptors to watch objects.
    70  	watches map[int32]*Watch
    71  }
    72  
    73  // NewInotify constructs a new Inotify instance.
    74  func NewInotify(ctx context.Context) *Inotify {
    75  	return &Inotify{
    76  		id:        uniqueid.GlobalFromContext(ctx),
    77  		scratch:   make([]byte, inotifyEventBaseSize),
    78  		nextWatch: 1, // Linux starts numbering watch descriptors from 1.
    79  		watches:   make(map[int32]*Watch),
    80  	}
    81  }
    82  
    83  // Release implements FileOperations.Release. Release removes all watches and
    84  // frees all resources for an inotify instance.
    85  func (i *Inotify) Release(ctx context.Context) {
    86  	// We need to hold i.mu to avoid a race with concurrent calls to
    87  	// Inotify.targetDestroyed from Watches. There's no risk of Watches
    88  	// accessing this Inotify after the destructor ends, because we remove all
    89  	// references to it below.
    90  	i.mu.Lock()
    91  	defer i.mu.Unlock()
    92  	for _, w := range i.watches {
    93  		// Remove references to the watch from the watch target. We don't need
    94  		// to worry about the references from the owner instance, since we're in
    95  		// the owner's destructor.
    96  		w.target.Watches.Remove(w.ID())
    97  		// Don't leak any references to the target, held by pins in the watch.
    98  		w.destroy(ctx)
    99  	}
   100  }
   101  
   102  // Readiness implements waiter.Waitable.Readiness.
   103  //
   104  // Readiness indicates whether there are pending events for an inotify instance.
   105  func (i *Inotify) Readiness(mask waiter.EventMask) waiter.EventMask {
   106  	ready := waiter.EventMask(0)
   107  
   108  	i.evMu.Lock()
   109  	defer i.evMu.Unlock()
   110  
   111  	if !i.events.Empty() {
   112  		ready |= waiter.ReadableEvents
   113  	}
   114  
   115  	return mask & ready
   116  }
   117  
   118  // Seek implements FileOperations.Seek.
   119  func (*Inotify) Seek(context.Context, *File, SeekWhence, int64) (int64, error) {
   120  	return 0, linuxerr.ESPIPE
   121  }
   122  
   123  // Readdir implements FileOperatons.Readdir.
   124  func (*Inotify) Readdir(context.Context, *File, DentrySerializer) (int64, error) {
   125  	return 0, syserror.ENOTDIR
   126  }
   127  
   128  // Write implements FileOperations.Write.
   129  func (*Inotify) Write(context.Context, *File, usermem.IOSequence, int64) (int64, error) {
   130  	return 0, linuxerr.EBADF
   131  }
   132  
   133  // Read implements FileOperations.Read.
   134  func (i *Inotify) Read(ctx context.Context, _ *File, dst usermem.IOSequence, _ int64) (int64, error) {
   135  	if dst.NumBytes() < inotifyEventBaseSize {
   136  		return 0, linuxerr.EINVAL
   137  	}
   138  
   139  	i.evMu.Lock()
   140  	defer i.evMu.Unlock()
   141  
   142  	if i.events.Empty() {
   143  		// Nothing to read yet, tell caller to block.
   144  		return 0, syserror.ErrWouldBlock
   145  	}
   146  
   147  	var writeLen int64
   148  	for it := i.events.Front(); it != nil; {
   149  		event := it
   150  		it = it.Next()
   151  
   152  		// Does the buffer have enough remaining space to hold the event we're
   153  		// about to write out?
   154  		if dst.NumBytes() < int64(event.sizeOf()) {
   155  			if writeLen > 0 {
   156  				// Buffer wasn't big enough for all pending events, but we did
   157  				// write some events out.
   158  				return writeLen, nil
   159  			}
   160  			return 0, linuxerr.EINVAL
   161  		}
   162  
   163  		// Linux always dequeues an available event as long as there's enough
   164  		// buffer space to copy it out, even if the copy below fails. Emulate
   165  		// this behaviour.
   166  		i.events.Remove(event)
   167  
   168  		// Buffer has enough space, copy event to the read buffer.
   169  		n, err := event.CopyTo(ctx, i.scratch, dst)
   170  		if err != nil {
   171  			return 0, err
   172  		}
   173  
   174  		writeLen += n
   175  		dst = dst.DropFirst64(n)
   176  	}
   177  	return writeLen, nil
   178  }
   179  
   180  // WriteTo implements FileOperations.WriteTo.
   181  func (*Inotify) WriteTo(context.Context, *File, io.Writer, int64, bool) (int64, error) {
   182  	return 0, syserror.ENOSYS
   183  }
   184  
   185  // Fsync implements FileOperations.Fsync.
   186  func (*Inotify) Fsync(context.Context, *File, int64, int64, SyncType) error {
   187  	return linuxerr.EINVAL
   188  }
   189  
   190  // ReadFrom implements FileOperations.ReadFrom.
   191  func (*Inotify) ReadFrom(context.Context, *File, io.Reader, int64) (int64, error) {
   192  	return 0, syserror.ENOSYS
   193  }
   194  
   195  // Flush implements FileOperations.Flush.
   196  func (*Inotify) Flush(context.Context, *File) error {
   197  	return nil
   198  }
   199  
   200  // ConfigureMMap implements FileOperations.ConfigureMMap.
   201  func (*Inotify) ConfigureMMap(context.Context, *File, *memmap.MMapOpts) error {
   202  	return linuxerr.ENODEV
   203  }
   204  
   205  // UnstableAttr implements FileOperations.UnstableAttr.
   206  func (i *Inotify) UnstableAttr(ctx context.Context, file *File) (UnstableAttr, error) {
   207  	return file.Dirent.Inode.UnstableAttr(ctx)
   208  }
   209  
   210  // Ioctl implements fs.FileOperations.Ioctl.
   211  func (i *Inotify) Ioctl(ctx context.Context, _ *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
   212  	switch args[1].Int() {
   213  	case linux.FIONREAD:
   214  		i.evMu.Lock()
   215  		defer i.evMu.Unlock()
   216  		var n uint32
   217  		for e := i.events.Front(); e != nil; e = e.Next() {
   218  			n += uint32(e.sizeOf())
   219  		}
   220  		var buf [4]byte
   221  		hostarch.ByteOrder.PutUint32(buf[:], n)
   222  		_, err := io.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{})
   223  		return 0, err
   224  
   225  	default:
   226  		return 0, syserror.ENOTTY
   227  	}
   228  }
   229  
   230  func (i *Inotify) queueEvent(ev *Event) {
   231  	i.evMu.Lock()
   232  
   233  	// Check if we should coalesce the event we're about to queue with the last
   234  	// one currently in the queue. Events are coalesced if they are identical.
   235  	if last := i.events.Back(); last != nil {
   236  		if ev.equals(last) {
   237  			// "Coalesce" the two events by simply not queuing the new one. We
   238  			// don't need to raise a waiter.EventIn notification because no new
   239  			// data is available for reading.
   240  			i.evMu.Unlock()
   241  			return
   242  		}
   243  	}
   244  
   245  	i.events.PushBack(ev)
   246  
   247  	// Release mutex before notifying waiters because we don't control what they
   248  	// can do.
   249  	i.evMu.Unlock()
   250  
   251  	i.Queue.Notify(waiter.ReadableEvents)
   252  }
   253  
   254  // newWatchLocked creates and adds a new watch to target.
   255  func (i *Inotify) newWatchLocked(target *Dirent, mask uint32) *Watch {
   256  	wd := i.nextWatch
   257  	i.nextWatch++
   258  
   259  	watch := &Watch{
   260  		owner:  i,
   261  		wd:     wd,
   262  		mask:   mask,
   263  		target: target.Inode,
   264  		pins:   make(map[*Dirent]bool),
   265  	}
   266  
   267  	i.watches[wd] = watch
   268  
   269  	// Grab an extra reference to target to prevent it from being evicted from
   270  	// memory. This ref is dropped during either watch removal, target
   271  	// destruction, or inotify instance destruction. See callers of Watch.Unpin.
   272  	watch.Pin(target)
   273  	target.Inode.Watches.Add(watch)
   274  
   275  	return watch
   276  }
   277  
   278  // targetDestroyed is called by w to notify i that w's target is gone. This
   279  // automatically generates a watch removal event.
   280  func (i *Inotify) targetDestroyed(w *Watch) {
   281  	i.mu.Lock()
   282  	_, found := i.watches[w.wd]
   283  	delete(i.watches, w.wd)
   284  	i.mu.Unlock()
   285  
   286  	if found {
   287  		i.queueEvent(newEvent(w.wd, "", linux.IN_IGNORED, 0))
   288  	}
   289  }
   290  
   291  // AddWatch constructs a new inotify watch and adds it to the target dirent. It
   292  // returns the watch descriptor returned by inotify_add_watch(2).
   293  func (i *Inotify) AddWatch(target *Dirent, mask uint32) int32 {
   294  	// Note: Locking this inotify instance protects the result returned by
   295  	// Lookup() below. With the lock held, we know for sure the lookup result
   296  	// won't become stale because it's impossible for *this* instance to
   297  	// add/remove watches on target.
   298  	i.mu.Lock()
   299  	defer i.mu.Unlock()
   300  
   301  	// Does the target already have a watch from this inotify instance?
   302  	if existing := target.Inode.Watches.Lookup(i.id); existing != nil {
   303  		// This may be a watch on a different dirent pointing to the
   304  		// same inode. Obtain an extra reference if necessary.
   305  		existing.Pin(target)
   306  
   307  		newmask := mask
   308  		if mergeMask := mask&linux.IN_MASK_ADD != 0; mergeMask {
   309  			// "Add (OR) events to watch mask for this pathname if it already
   310  			// exists (instead of replacing mask)." -- inotify(7)
   311  			newmask |= atomic.LoadUint32(&existing.mask)
   312  		}
   313  		atomic.StoreUint32(&existing.mask, newmask)
   314  		return existing.wd
   315  	}
   316  
   317  	// No existing watch, create a new watch.
   318  	watch := i.newWatchLocked(target, mask)
   319  	return watch.wd
   320  }
   321  
   322  // RmWatch implements watcher.Watchable.RmWatch.
   323  //
   324  // RmWatch looks up an inotify watch for the given 'wd' and configures the
   325  // target dirent to stop sending events to this inotify instance.
   326  func (i *Inotify) RmWatch(ctx context.Context, wd int32) error {
   327  	i.mu.Lock()
   328  
   329  	// Find the watch we were asked to removed.
   330  	watch, ok := i.watches[wd]
   331  	if !ok {
   332  		i.mu.Unlock()
   333  		return linuxerr.EINVAL
   334  	}
   335  
   336  	// Remove the watch from this instance.
   337  	delete(i.watches, wd)
   338  
   339  	// Remove the watch from the watch target.
   340  	watch.target.Watches.Remove(watch.ID())
   341  
   342  	// The watch is now isolated and we can safely drop the instance lock. We
   343  	// need to do so because watch.destroy() acquires Watch.mu, which cannot be
   344  	// acquired with Inotify.mu held.
   345  	i.mu.Unlock()
   346  
   347  	// Generate the event for the removal.
   348  	i.queueEvent(newEvent(watch.wd, "", linux.IN_IGNORED, 0))
   349  
   350  	// Remove all pins.
   351  	watch.destroy(ctx)
   352  
   353  	return nil
   354  }