github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/vfs/inotify.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vfs
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"sync/atomic"
    21  
    22  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    23  	"github.com/SagerNet/gvisor/pkg/context"
    24  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    25  	"github.com/SagerNet/gvisor/pkg/hostarch"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/uniqueid"
    28  	"github.com/SagerNet/gvisor/pkg/sync"
    29  	"github.com/SagerNet/gvisor/pkg/syserror"
    30  	"github.com/SagerNet/gvisor/pkg/usermem"
    31  	"github.com/SagerNet/gvisor/pkg/waiter"
    32  )
    33  
    34  // inotifyEventBaseSize is the base size of linux's struct inotify_event. This
    35  // must be a power 2 for rounding below.
    36  const inotifyEventBaseSize = 16
    37  
    38  // EventType defines different kinds of inotfiy events.
    39  //
    40  // The way events are labelled appears somewhat arbitrary, but they must match
    41  // Linux so that IN_EXCL_UNLINK behaves as it does in Linux.
    42  //
    43  // +stateify savable
    44  type EventType uint8
    45  
    46  // PathEvent and InodeEvent correspond to FSNOTIFY_EVENT_PATH and
    47  // FSNOTIFY_EVENT_INODE in Linux.
    48  const (
    49  	PathEvent  EventType = iota
    50  	InodeEvent EventType = iota
    51  )
    52  
    53  // Inotify represents an inotify instance created by inotify_init(2) or
    54  // inotify_init1(2). Inotify implements FileDescriptionImpl.
    55  //
    56  // +stateify savable
    57  type Inotify struct {
    58  	vfsfd FileDescription
    59  	FileDescriptionDefaultImpl
    60  	DentryMetadataFileDescriptionImpl
    61  	NoLockFD
    62  
    63  	// Unique identifier for this inotify instance. We don't just reuse the
    64  	// inotify fd because fds can be duped. These should not be exposed to the
    65  	// user, since we may aggressively reuse an id on S/R.
    66  	id uint64
    67  
    68  	// queue is used to notify interested parties when the inotify instance
    69  	// becomes readable or writable.
    70  	queue waiter.Queue
    71  
    72  	// evMu *only* protects the events list. We need a separate lock while
    73  	// queuing events: using mu may violate lock ordering, since at that point
    74  	// the calling goroutine may already hold Watches.mu.
    75  	evMu sync.Mutex `state:"nosave"`
    76  
    77  	// A list of pending events for this inotify instance. Protected by evMu.
    78  	events eventList
    79  
    80  	// A scratch buffer, used to serialize inotify events. Allocate this
    81  	// ahead of time for the sake of performance. Protected by evMu.
    82  	scratch []byte
    83  
    84  	// mu protects the fields below.
    85  	mu sync.Mutex `state:"nosave"`
    86  
    87  	// nextWatchMinusOne is used to allocate watch descriptors on this Inotify
    88  	// instance. Note that Linux starts numbering watch descriptors from 1.
    89  	nextWatchMinusOne int32
    90  
    91  	// Map from watch descriptors to watch objects.
    92  	watches map[int32]*Watch
    93  }
    94  
    95  var _ FileDescriptionImpl = (*Inotify)(nil)
    96  
    97  // NewInotifyFD constructs a new Inotify instance.
    98  func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32) (*FileDescription, error) {
    99  	// O_CLOEXEC affects file descriptors, so it must be handled outside of vfs.
   100  	flags &^= linux.O_CLOEXEC
   101  	if flags&^linux.O_NONBLOCK != 0 {
   102  		return nil, linuxerr.EINVAL
   103  	}
   104  
   105  	id := uniqueid.GlobalFromContext(ctx)
   106  	vd := vfsObj.NewAnonVirtualDentry(fmt.Sprintf("[inotifyfd:%d]", id))
   107  	defer vd.DecRef(ctx)
   108  	fd := &Inotify{
   109  		id:      id,
   110  		scratch: make([]byte, inotifyEventBaseSize),
   111  		watches: make(map[int32]*Watch),
   112  	}
   113  	if err := fd.vfsfd.Init(fd, flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{
   114  		UseDentryMetadata: true,
   115  		DenyPRead:         true,
   116  		DenyPWrite:        true,
   117  	}); err != nil {
   118  		return nil, err
   119  	}
   120  	return &fd.vfsfd, nil
   121  }
   122  
   123  // Release implements FileDescriptionImpl.Release. Release removes all
   124  // watches and frees all resources for an inotify instance.
   125  func (i *Inotify) Release(ctx context.Context) {
   126  	var ds []*Dentry
   127  
   128  	// We need to hold i.mu to avoid a race with concurrent calls to
   129  	// Inotify.handleDeletion from Watches. There's no risk of Watches
   130  	// accessing this Inotify after the destructor ends, because we remove all
   131  	// references to it below.
   132  	i.mu.Lock()
   133  	for _, w := range i.watches {
   134  		// Remove references to the watch from the watches set on the target. We
   135  		// don't need to worry about the references from i.watches, since this
   136  		// file description is about to be destroyed.
   137  		d := w.target
   138  		ws := d.Watches()
   139  		// Watchable dentries should never return a nil watch set.
   140  		if ws == nil {
   141  			panic("Cannot remove watch from an unwatchable dentry")
   142  		}
   143  		ws.Remove(i.id)
   144  		if ws.Size() == 0 {
   145  			ds = append(ds, d)
   146  		}
   147  	}
   148  	i.mu.Unlock()
   149  
   150  	for _, d := range ds {
   151  		d.OnZeroWatches(ctx)
   152  	}
   153  }
   154  
   155  // Allocate implements FileDescription.Allocate.
   156  func (i *Inotify) Allocate(ctx context.Context, mode, offset, length uint64) error {
   157  	panic("Allocate should not be called on read-only inotify fds")
   158  }
   159  
   160  // EventRegister implements waiter.Waitable.
   161  func (i *Inotify) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
   162  	i.queue.EventRegister(e, mask)
   163  }
   164  
   165  // EventUnregister implements waiter.Waitable.
   166  func (i *Inotify) EventUnregister(e *waiter.Entry) {
   167  	i.queue.EventUnregister(e)
   168  }
   169  
   170  // Readiness implements waiter.Waitable.Readiness.
   171  //
   172  // Readiness indicates whether there are pending events for an inotify instance.
   173  func (i *Inotify) Readiness(mask waiter.EventMask) waiter.EventMask {
   174  	ready := waiter.EventMask(0)
   175  
   176  	i.evMu.Lock()
   177  	defer i.evMu.Unlock()
   178  
   179  	if !i.events.Empty() {
   180  		ready |= waiter.ReadableEvents
   181  	}
   182  
   183  	return mask & ready
   184  }
   185  
   186  // PRead implements FileDescriptionImpl.PRead.
   187  func (*Inotify) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
   188  	return 0, linuxerr.ESPIPE
   189  }
   190  
   191  // PWrite implements FileDescriptionImpl.PWrite.
   192  func (*Inotify) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) {
   193  	return 0, linuxerr.ESPIPE
   194  }
   195  
   196  // Write implements FileDescriptionImpl.Write.
   197  func (*Inotify) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
   198  	return 0, linuxerr.EBADF
   199  }
   200  
   201  // Read implements FileDescriptionImpl.Read.
   202  func (i *Inotify) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
   203  	if dst.NumBytes() < inotifyEventBaseSize {
   204  		return 0, linuxerr.EINVAL
   205  	}
   206  
   207  	i.evMu.Lock()
   208  	defer i.evMu.Unlock()
   209  
   210  	if i.events.Empty() {
   211  		// Nothing to read yet, tell caller to block.
   212  		return 0, syserror.ErrWouldBlock
   213  	}
   214  
   215  	var writeLen int64
   216  	for it := i.events.Front(); it != nil; {
   217  		// Advance `it` before the element is removed from the list, or else
   218  		// it.Next() will always be nil.
   219  		event := it
   220  		it = it.Next()
   221  
   222  		// Does the buffer have enough remaining space to hold the event we're
   223  		// about to write out?
   224  		if dst.NumBytes() < int64(event.sizeOf()) {
   225  			if writeLen > 0 {
   226  				// Buffer wasn't big enough for all pending events, but we did
   227  				// write some events out.
   228  				return writeLen, nil
   229  			}
   230  			return 0, linuxerr.EINVAL
   231  		}
   232  
   233  		// Linux always dequeues an available event as long as there's enough
   234  		// buffer space to copy it out, even if the copy below fails. Emulate
   235  		// this behaviour.
   236  		i.events.Remove(event)
   237  
   238  		// Buffer has enough space, copy event to the read buffer.
   239  		n, err := event.CopyTo(ctx, i.scratch, dst)
   240  		if err != nil {
   241  			return 0, err
   242  		}
   243  
   244  		writeLen += n
   245  		dst = dst.DropFirst64(n)
   246  	}
   247  	return writeLen, nil
   248  }
   249  
   250  // Ioctl implements FileDescriptionImpl.Ioctl.
   251  func (i *Inotify) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
   252  	switch args[1].Int() {
   253  	case linux.FIONREAD:
   254  		i.evMu.Lock()
   255  		defer i.evMu.Unlock()
   256  		var n uint32
   257  		for e := i.events.Front(); e != nil; e = e.Next() {
   258  			n += uint32(e.sizeOf())
   259  		}
   260  		var buf [4]byte
   261  		hostarch.ByteOrder.PutUint32(buf[:], n)
   262  		_, err := uio.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{})
   263  		return 0, err
   264  
   265  	default:
   266  		return 0, syserror.ENOTTY
   267  	}
   268  }
   269  
   270  func (i *Inotify) queueEvent(ev *Event) {
   271  	i.evMu.Lock()
   272  
   273  	// Check if we should coalesce the event we're about to queue with the last
   274  	// one currently in the queue. Events are coalesced if they are identical.
   275  	if last := i.events.Back(); last != nil {
   276  		if ev.equals(last) {
   277  			// "Coalesce" the two events by simply not queuing the new one. We
   278  			// don't need to raise a waiter.EventIn notification because no new
   279  			// data is available for reading.
   280  			i.evMu.Unlock()
   281  			return
   282  		}
   283  	}
   284  
   285  	i.events.PushBack(ev)
   286  
   287  	// Release mutex before notifying waiters because we don't control what they
   288  	// can do.
   289  	i.evMu.Unlock()
   290  
   291  	i.queue.Notify(waiter.ReadableEvents)
   292  }
   293  
   294  // newWatchLocked creates and adds a new watch to target.
   295  //
   296  // Precondition: i.mu must be locked. ws must be the watch set for target d.
   297  func (i *Inotify) newWatchLocked(d *Dentry, ws *Watches, mask uint32) *Watch {
   298  	w := &Watch{
   299  		owner:  i,
   300  		wd:     i.nextWatchIDLocked(),
   301  		target: d,
   302  		mask:   mask,
   303  	}
   304  
   305  	// Hold the watch in this inotify instance as well as the watch set on the
   306  	// target.
   307  	i.watches[w.wd] = w
   308  	ws.Add(w)
   309  	return w
   310  }
   311  
   312  // newWatchIDLocked allocates and returns a new watch descriptor.
   313  //
   314  // Precondition: i.mu must be locked.
   315  func (i *Inotify) nextWatchIDLocked() int32 {
   316  	i.nextWatchMinusOne++
   317  	return i.nextWatchMinusOne
   318  }
   319  
   320  // AddWatch constructs a new inotify watch and adds it to the target. It
   321  // returns the watch descriptor returned by inotify_add_watch(2).
   322  //
   323  // The caller must hold a reference on target.
   324  func (i *Inotify) AddWatch(target *Dentry, mask uint32) (int32, error) {
   325  	// Note: Locking this inotify instance protects the result returned by
   326  	// Lookup() below. With the lock held, we know for sure the lookup result
   327  	// won't become stale because it's impossible for *this* instance to
   328  	// add/remove watches on target.
   329  	i.mu.Lock()
   330  	defer i.mu.Unlock()
   331  
   332  	ws := target.Watches()
   333  	if ws == nil {
   334  		// While Linux supports inotify watches on all filesystem types, watches on
   335  		// filesystems like kernfs are not generally useful, so we do not.
   336  		return 0, linuxerr.EPERM
   337  	}
   338  	// Does the target already have a watch from this inotify instance?
   339  	if existing := ws.Lookup(i.id); existing != nil {
   340  		newmask := mask
   341  		if mask&linux.IN_MASK_ADD != 0 {
   342  			// "Add (OR) events to watch mask for this pathname if it already
   343  			// exists (instead of replacing mask)." -- inotify(7)
   344  			newmask |= atomic.LoadUint32(&existing.mask)
   345  		}
   346  		atomic.StoreUint32(&existing.mask, newmask)
   347  		return existing.wd, nil
   348  	}
   349  
   350  	// No existing watch, create a new watch.
   351  	w := i.newWatchLocked(target, ws, mask)
   352  	return w.wd, nil
   353  }
   354  
   355  // RmWatch looks up an inotify watch for the given 'wd' and configures the
   356  // target to stop sending events to this inotify instance.
   357  func (i *Inotify) RmWatch(ctx context.Context, wd int32) error {
   358  	i.mu.Lock()
   359  
   360  	// Find the watch we were asked to removed.
   361  	w, ok := i.watches[wd]
   362  	if !ok {
   363  		i.mu.Unlock()
   364  		return linuxerr.EINVAL
   365  	}
   366  
   367  	// Remove the watch from this instance.
   368  	delete(i.watches, wd)
   369  
   370  	// Remove the watch from the watch target.
   371  	ws := w.target.Watches()
   372  	// AddWatch ensures that w.target has a non-nil watch set.
   373  	if ws == nil {
   374  		panic("Watched dentry cannot have nil watch set")
   375  	}
   376  	ws.Remove(w.OwnerID())
   377  	remaining := ws.Size()
   378  	i.mu.Unlock()
   379  
   380  	if remaining == 0 {
   381  		w.target.OnZeroWatches(ctx)
   382  	}
   383  
   384  	// Generate the event for the removal.
   385  	i.queueEvent(newEvent(wd, "", linux.IN_IGNORED, 0))
   386  
   387  	return nil
   388  }
   389  
   390  // Watches is the collection of all inotify watches on a single file.
   391  //
   392  // +stateify savable
   393  type Watches struct {
   394  	// mu protects the fields below.
   395  	mu sync.RWMutex `state:"nosave"`
   396  
   397  	// ws is the map of active watches in this collection, keyed by the inotify
   398  	// instance id of the owner.
   399  	ws map[uint64]*Watch
   400  }
   401  
   402  // Size returns the number of watches held by w.
   403  func (w *Watches) Size() int {
   404  	w.mu.Lock()
   405  	defer w.mu.Unlock()
   406  	return len(w.ws)
   407  }
   408  
   409  // Lookup returns the watch owned by an inotify instance with the given id.
   410  // Returns nil if no such watch exists.
   411  //
   412  // Precondition: the inotify instance with the given id must be locked to
   413  // prevent the returned watch from being concurrently modified or replaced in
   414  // Inotify.watches.
   415  func (w *Watches) Lookup(id uint64) *Watch {
   416  	w.mu.Lock()
   417  	defer w.mu.Unlock()
   418  	return w.ws[id]
   419  }
   420  
   421  // Add adds watch into this set of watches.
   422  //
   423  // Precondition: the inotify instance with the given id must be locked.
   424  func (w *Watches) Add(watch *Watch) {
   425  	w.mu.Lock()
   426  	defer w.mu.Unlock()
   427  
   428  	owner := watch.OwnerID()
   429  	// Sanity check, we should never have two watches for one owner on the
   430  	// same target.
   431  	if _, exists := w.ws[owner]; exists {
   432  		panic(fmt.Sprintf("Watch collision with ID %+v", owner))
   433  	}
   434  	if w.ws == nil {
   435  		w.ws = make(map[uint64]*Watch)
   436  	}
   437  	w.ws[owner] = watch
   438  }
   439  
   440  // Remove removes a watch with the given id from this set of watches and
   441  // releases it. The caller is responsible for generating any watch removal
   442  // event, as appropriate. The provided id must match an existing watch in this
   443  // collection.
   444  //
   445  // Precondition: the inotify instance with the given id must be locked.
   446  func (w *Watches) Remove(id uint64) {
   447  	w.mu.Lock()
   448  	defer w.mu.Unlock()
   449  
   450  	if w.ws == nil {
   451  		// This watch set is being destroyed. The thread executing the
   452  		// destructor is already in the process of deleting all our watches. We
   453  		// got here with no references on the target because we raced with the
   454  		// destructor notifying all the watch owners of destruction. See the
   455  		// comment in Watches.HandleDeletion for why this race exists.
   456  		return
   457  	}
   458  
   459  	// It is possible for w.Remove() to be called for the same watch multiple
   460  	// times. See the treatment of one-shot watches in Watches.Notify().
   461  	if _, ok := w.ws[id]; ok {
   462  		delete(w.ws, id)
   463  	}
   464  }
   465  
   466  // Notify queues a new event with watches in this set. Watches with
   467  // IN_EXCL_UNLINK are skipped if the event is coming from a child that has been
   468  // unlinked.
   469  func (w *Watches) Notify(ctx context.Context, name string, events, cookie uint32, et EventType, unlinked bool) {
   470  	var hasExpired bool
   471  	w.mu.RLock()
   472  	for _, watch := range w.ws {
   473  		if unlinked && watch.ExcludeUnlinked() && et == PathEvent {
   474  			continue
   475  		}
   476  		if watch.Notify(name, events, cookie) {
   477  			hasExpired = true
   478  		}
   479  	}
   480  	w.mu.RUnlock()
   481  
   482  	if hasExpired {
   483  		w.cleanupExpiredWatches(ctx)
   484  	}
   485  }
   486  
   487  // This function is relatively expensive and should only be called where there
   488  // are expired watches.
   489  func (w *Watches) cleanupExpiredWatches(ctx context.Context) {
   490  	// Because of lock ordering, we cannot acquire Inotify.mu for each watch
   491  	// owner while holding w.mu. As a result, store expired watches locally
   492  	// before removing.
   493  	var toRemove []*Watch
   494  	w.mu.RLock()
   495  	for _, watch := range w.ws {
   496  		if atomic.LoadInt32(&watch.expired) == 1 {
   497  			toRemove = append(toRemove, watch)
   498  		}
   499  	}
   500  	w.mu.RUnlock()
   501  	for _, watch := range toRemove {
   502  		watch.owner.RmWatch(ctx, watch.wd)
   503  	}
   504  }
   505  
   506  // HandleDeletion is called when the watch target is destroyed. Clear the
   507  // watch set, detach watches from the inotify instances they belong to, and
   508  // generate the appropriate events.
   509  func (w *Watches) HandleDeletion(ctx context.Context) {
   510  	w.Notify(ctx, "", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */)
   511  
   512  	// As in Watches.Notify, we can't hold w.mu while acquiring Inotify.mu for
   513  	// the owner of each watch being deleted. Instead, atomically store the
   514  	// watches map in a local variable and set it to nil so we can iterate over
   515  	// it with the assurance that there will be no concurrent accesses.
   516  	var ws map[uint64]*Watch
   517  	w.mu.Lock()
   518  	ws = w.ws
   519  	w.ws = nil
   520  	w.mu.Unlock()
   521  
   522  	// Remove each watch from its owner's watch set, and generate a corresponding
   523  	// watch removal event.
   524  	for _, watch := range ws {
   525  		i := watch.owner
   526  		i.mu.Lock()
   527  		_, found := i.watches[watch.wd]
   528  		delete(i.watches, watch.wd)
   529  
   530  		// Release mutex before notifying waiters because we don't control what
   531  		// they can do.
   532  		i.mu.Unlock()
   533  
   534  		// If watch was not found, it was removed from the inotify instance before
   535  		// we could get to it, in which case we should not generate an event.
   536  		if found {
   537  			i.queueEvent(newEvent(watch.wd, "", linux.IN_IGNORED, 0))
   538  		}
   539  	}
   540  }
   541  
   542  // Watch represent a particular inotify watch created by inotify_add_watch.
   543  //
   544  // +stateify savable
   545  type Watch struct {
   546  	// Inotify instance which owns this watch.
   547  	//
   548  	// This field is immutable after creation.
   549  	owner *Inotify
   550  
   551  	// Descriptor for this watch. This is unique across an inotify instance.
   552  	//
   553  	// This field is immutable after creation.
   554  	wd int32
   555  
   556  	// target is a dentry representing the watch target. Its watch set contains this watch.
   557  	//
   558  	// This field is immutable after creation.
   559  	target *Dentry
   560  
   561  	// Events being monitored via this watch. Must be accessed with atomic
   562  	// memory operations.
   563  	mask uint32
   564  
   565  	// expired is set to 1 to indicate that this watch is a one-shot that has
   566  	// already sent a notification and therefore can be removed. Must be accessed
   567  	// with atomic memory operations.
   568  	expired int32
   569  }
   570  
   571  // OwnerID returns the id of the inotify instance that owns this watch.
   572  func (w *Watch) OwnerID() uint64 {
   573  	return w.owner.id
   574  }
   575  
   576  // ExcludeUnlinked indicates whether the watched object should continue to be
   577  // notified of events originating from a path that has been unlinked.
   578  //
   579  // For example, if "foo/bar" is opened and then unlinked, operations on the
   580  // open fd may be ignored by watches on "foo" and "foo/bar" with IN_EXCL_UNLINK.
   581  func (w *Watch) ExcludeUnlinked() bool {
   582  	return atomic.LoadUint32(&w.mask)&linux.IN_EXCL_UNLINK != 0
   583  }
   584  
   585  // Notify queues a new event on this watch. Returns true if this is a one-shot
   586  // watch that should be deleted, after this event was successfully queued.
   587  func (w *Watch) Notify(name string, events uint32, cookie uint32) bool {
   588  	if atomic.LoadInt32(&w.expired) == 1 {
   589  		// This is a one-shot watch that is already in the process of being
   590  		// removed. This may happen if a second event reaches the watch target
   591  		// before this watch has been removed.
   592  		return false
   593  	}
   594  
   595  	mask := atomic.LoadUint32(&w.mask)
   596  	if mask&events == 0 {
   597  		// We weren't watching for this event.
   598  		return false
   599  	}
   600  
   601  	// Event mask should include bits matched from the watch plus all control
   602  	// event bits.
   603  	unmaskableBits := ^uint32(0) &^ linux.IN_ALL_EVENTS
   604  	effectiveMask := unmaskableBits | mask
   605  	matchedEvents := effectiveMask & events
   606  	w.owner.queueEvent(newEvent(w.wd, name, matchedEvents, cookie))
   607  	if mask&linux.IN_ONESHOT != 0 {
   608  		atomic.StoreInt32(&w.expired, 1)
   609  		return true
   610  	}
   611  	return false
   612  }
   613  
   614  // Event represents a struct inotify_event from linux.
   615  //
   616  // +stateify savable
   617  type Event struct {
   618  	eventEntry
   619  
   620  	wd     int32
   621  	mask   uint32
   622  	cookie uint32
   623  
   624  	// len is computed based on the name field is set automatically by
   625  	// Event.setName. It should be 0 when no name is set; otherwise it is the
   626  	// length of the name slice.
   627  	len uint32
   628  
   629  	// The name field has special padding requirements and should only be set by
   630  	// calling Event.setName.
   631  	name []byte
   632  }
   633  
   634  func newEvent(wd int32, name string, events, cookie uint32) *Event {
   635  	e := &Event{
   636  		wd:     wd,
   637  		mask:   events,
   638  		cookie: cookie,
   639  	}
   640  	if name != "" {
   641  		e.setName(name)
   642  	}
   643  	return e
   644  }
   645  
   646  // paddedBytes converts a go string to a null-terminated c-string, padded with
   647  // null bytes to a total size of 'l'. 'l' must be large enough for all the bytes
   648  // in the 's' plus at least one null byte.
   649  func paddedBytes(s string, l uint32) []byte {
   650  	if l < uint32(len(s)+1) {
   651  		panic("Converting string to byte array results in truncation, this can lead to buffer-overflow due to the missing null-byte!")
   652  	}
   653  	b := make([]byte, l)
   654  	copy(b, s)
   655  
   656  	// b was zero-value initialized during make(), so the rest of the slice is
   657  	// already filled with null bytes.
   658  
   659  	return b
   660  }
   661  
   662  // setName sets the optional name for this event.
   663  func (e *Event) setName(name string) {
   664  	// We need to pad the name such that the entire event length ends up a
   665  	// multiple of inotifyEventBaseSize.
   666  	unpaddedLen := len(name) + 1
   667  	// Round up to nearest multiple of inotifyEventBaseSize.
   668  	e.len = uint32((unpaddedLen + inotifyEventBaseSize - 1) & ^(inotifyEventBaseSize - 1))
   669  	// Make sure we haven't overflowed and wrapped around when rounding.
   670  	if unpaddedLen > int(e.len) {
   671  		panic("Overflow when rounding inotify event size, the 'name' field was too big.")
   672  	}
   673  	e.name = paddedBytes(name, e.len)
   674  }
   675  
   676  func (e *Event) sizeOf() int {
   677  	s := inotifyEventBaseSize + int(e.len)
   678  	if s < inotifyEventBaseSize {
   679  		panic("Overflowed event size")
   680  	}
   681  	return s
   682  }
   683  
   684  // CopyTo serializes this event to dst. buf is used as a scratch buffer to
   685  // construct the output. We use a buffer allocated ahead of time for
   686  // performance. buf must be at least inotifyEventBaseSize bytes.
   687  func (e *Event) CopyTo(ctx context.Context, buf []byte, dst usermem.IOSequence) (int64, error) {
   688  	hostarch.ByteOrder.PutUint32(buf[0:], uint32(e.wd))
   689  	hostarch.ByteOrder.PutUint32(buf[4:], e.mask)
   690  	hostarch.ByteOrder.PutUint32(buf[8:], e.cookie)
   691  	hostarch.ByteOrder.PutUint32(buf[12:], e.len)
   692  
   693  	writeLen := 0
   694  
   695  	n, err := dst.CopyOut(ctx, buf)
   696  	if err != nil {
   697  		return 0, err
   698  	}
   699  	writeLen += n
   700  	dst = dst.DropFirst(n)
   701  
   702  	if e.len > 0 {
   703  		n, err = dst.CopyOut(ctx, e.name)
   704  		if err != nil {
   705  			return 0, err
   706  		}
   707  		writeLen += n
   708  	}
   709  
   710  	// Santiy check.
   711  	if writeLen != e.sizeOf() {
   712  		panic(fmt.Sprintf("Serialized unexpected amount of data for an event, expected %d, wrote %d.", e.sizeOf(), writeLen))
   713  	}
   714  
   715  	return int64(writeLen), nil
   716  }
   717  
   718  func (e *Event) equals(other *Event) bool {
   719  	return e.wd == other.wd &&
   720  		e.mask == other.mask &&
   721  		e.cookie == other.cookie &&
   722  		e.len == other.len &&
   723  		bytes.Equal(e.name, other.name)
   724  }
   725  
   726  // InotifyEventFromStatMask generates the appropriate events for an operation
   727  // that set the stats specified in mask.
   728  func InotifyEventFromStatMask(mask uint32) uint32 {
   729  	var ev uint32
   730  	if mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_MODE) != 0 {
   731  		ev |= linux.IN_ATTRIB
   732  	}
   733  	if mask&linux.STATX_SIZE != 0 {
   734  		ev |= linux.IN_MODIFY
   735  	}
   736  
   737  	if (mask & (linux.STATX_ATIME | linux.STATX_MTIME)) == (linux.STATX_ATIME | linux.STATX_MTIME) {
   738  		// Both times indicates a utime(s) call.
   739  		ev |= linux.IN_ATTRIB
   740  	} else if mask&linux.STATX_ATIME != 0 {
   741  		ev |= linux.IN_ACCESS
   742  	} else if mask&linux.STATX_MTIME != 0 {
   743  		ev |= linux.IN_MODIFY
   744  	}
   745  	return ev
   746  }
   747  
   748  // InotifyRemoveChild sends the appriopriate notifications to the watch sets of
   749  // the child being removed and its parent. Note that unlike most pairs of
   750  // parent/child notifications, the child is notified first in this case.
   751  func InotifyRemoveChild(ctx context.Context, self, parent *Watches, name string) {
   752  	if self != nil {
   753  		self.Notify(ctx, "", linux.IN_ATTRIB, 0, InodeEvent, true /* unlinked */)
   754  	}
   755  	if parent != nil {
   756  		parent.Notify(ctx, name, linux.IN_DELETE, 0, InodeEvent, true /* unlinked */)
   757  	}
   758  }
   759  
   760  // InotifyRename sends the appriopriate notifications to the watch sets of the
   761  // file being renamed and its old/new parents.
   762  func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches, oldName, newName string, isDir bool) {
   763  	var dirEv uint32
   764  	if isDir {
   765  		dirEv = linux.IN_ISDIR
   766  	}
   767  	cookie := uniqueid.InotifyCookie(ctx)
   768  	if oldParent != nil {
   769  		oldParent.Notify(ctx, oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent, false /* unlinked */)
   770  	}
   771  	if newParent != nil {
   772  		newParent.Notify(ctx, newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent, false /* unlinked */)
   773  	}
   774  	// Somewhat surprisingly, self move events do not have a cookie.
   775  	if renamed != nil {
   776  		renamed.Notify(ctx, "", linux.IN_MOVE_SELF, 0, InodeEvent, false /* unlinked */)
   777  	}
   778  }