github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/vfs/inotify.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vfs
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  
    21  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/arch"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/uniqueid"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/sync"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/usermem"
    30  	"github.com/nicocha30/gvisor-ligolo/pkg/waiter"
    31  )
    32  
    33  // inotifyEventBaseSize is the base size of linux's struct inotify_event. This
    34  // must be a power 2 for rounding below.
    35  const inotifyEventBaseSize = 16
    36  
    37  // EventType defines different kinds of inotfiy events.
    38  //
    39  // The way events are labelled appears somewhat arbitrary, but they must match
    40  // Linux so that IN_EXCL_UNLINK behaves as it does in Linux.
    41  //
    42  // +stateify savable
    43  type EventType uint8
    44  
    45  // PathEvent and InodeEvent correspond to FSNOTIFY_EVENT_PATH and
    46  // FSNOTIFY_EVENT_INODE in Linux.
    47  const (
    48  	PathEvent  EventType = iota
    49  	InodeEvent EventType = iota
    50  )
    51  
    52  // Inotify represents an inotify instance created by inotify_init(2) or
    53  // inotify_init1(2). Inotify implements FileDescriptionImpl.
    54  //
    55  // +stateify savable
    56  type Inotify struct {
    57  	vfsfd FileDescription
    58  	FileDescriptionDefaultImpl
    59  	DentryMetadataFileDescriptionImpl
    60  	NoLockFD
    61  
    62  	// Unique identifier for this inotify instance. We don't just reuse the
    63  	// inotify fd because fds can be duped. These should not be exposed to the
    64  	// user, since we may aggressively reuse an id on S/R.
    65  	id uint64
    66  
    67  	// queue is used to notify interested parties when the inotify instance
    68  	// becomes readable or writable.
    69  	queue waiter.Queue
    70  
    71  	// evMu *only* protects the events list. We need a separate lock while
    72  	// queuing events: using mu may violate lock ordering, since at that point
    73  	// the calling goroutine may already hold Watches.mu.
    74  	evMu inotifyEventMutex `state:"nosave"`
    75  
    76  	// A list of pending events for this inotify instance. Protected by evMu.
    77  	events eventList
    78  
    79  	// A scratch buffer, used to serialize inotify events. Allocate this
    80  	// ahead of time for the sake of performance. Protected by evMu.
    81  	scratch []byte
    82  
    83  	// mu protects the fields below.
    84  	mu inotifyMutex `state:"nosave"`
    85  
    86  	// nextWatchMinusOne is used to allocate watch descriptors on this Inotify
    87  	// instance. Note that Linux starts numbering watch descriptors from 1.
    88  	nextWatchMinusOne int32
    89  
    90  	// Map from watch descriptors to watch objects.
    91  	watches map[int32]*Watch
    92  }
    93  
    94  var _ FileDescriptionImpl = (*Inotify)(nil)
    95  
    96  // NewInotifyFD constructs a new Inotify instance.
    97  func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32) (*FileDescription, error) {
    98  	// O_CLOEXEC affects file descriptors, so it must be handled outside of vfs.
    99  	flags &^= linux.O_CLOEXEC
   100  	if flags&^linux.O_NONBLOCK != 0 {
   101  		return nil, linuxerr.EINVAL
   102  	}
   103  
   104  	id := uniqueid.GlobalFromContext(ctx)
   105  	vd := vfsObj.NewAnonVirtualDentry(fmt.Sprintf("[inotifyfd:%d]", id))
   106  	defer vd.DecRef(ctx)
   107  	fd := &Inotify{
   108  		id:      id,
   109  		scratch: make([]byte, inotifyEventBaseSize),
   110  		watches: make(map[int32]*Watch),
   111  	}
   112  	if err := fd.vfsfd.Init(fd, flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{
   113  		UseDentryMetadata: true,
   114  		DenyPRead:         true,
   115  		DenyPWrite:        true,
   116  	}); err != nil {
   117  		return nil, err
   118  	}
   119  	return &fd.vfsfd, nil
   120  }
   121  
   122  // Release implements FileDescriptionImpl.Release. Release removes all
   123  // watches and frees all resources for an inotify instance.
   124  func (i *Inotify) Release(ctx context.Context) {
   125  	var ds []*Dentry
   126  
   127  	// We need to hold i.mu to avoid a race with concurrent calls to
   128  	// Inotify.handleDeletion from Watches. There's no risk of Watches
   129  	// accessing this Inotify after the destructor ends, because we remove all
   130  	// references to it below.
   131  	i.mu.Lock()
   132  	for _, w := range i.watches {
   133  		// Remove references to the watch from the watches set on the target. We
   134  		// don't need to worry about the references from i.watches, since this
   135  		// file description is about to be destroyed.
   136  		d := w.target
   137  		ws := d.Watches()
   138  		// Watchable dentries should never return a nil watch set.
   139  		if ws == nil {
   140  			panic("Cannot remove watch from an unwatchable dentry")
   141  		}
   142  		ws.Remove(i.id)
   143  		if ws.Size() == 0 {
   144  			ds = append(ds, d)
   145  		}
   146  	}
   147  	i.mu.Unlock()
   148  
   149  	for _, d := range ds {
   150  		d.OnZeroWatches(ctx)
   151  	}
   152  }
   153  
   154  // Allocate implements FileDescription.Allocate.
   155  func (i *Inotify) Allocate(ctx context.Context, mode, offset, length uint64) error {
   156  	panic("Allocate should not be called on read-only inotify fds")
   157  }
   158  
   159  // EventRegister implements waiter.Waitable.
   160  func (i *Inotify) EventRegister(e *waiter.Entry) error {
   161  	i.queue.EventRegister(e)
   162  	return nil
   163  }
   164  
   165  // EventUnregister implements waiter.Waitable.
   166  func (i *Inotify) EventUnregister(e *waiter.Entry) {
   167  	i.queue.EventUnregister(e)
   168  }
   169  
   170  // Readiness implements waiter.Waitable.Readiness.
   171  //
   172  // Readiness indicates whether there are pending events for an inotify instance.
   173  func (i *Inotify) Readiness(mask waiter.EventMask) waiter.EventMask {
   174  	ready := waiter.EventMask(0)
   175  
   176  	i.evMu.Lock()
   177  	defer i.evMu.Unlock()
   178  
   179  	if !i.events.Empty() {
   180  		ready |= waiter.ReadableEvents
   181  	}
   182  
   183  	return mask & ready
   184  }
   185  
   186  // Epollable implements FileDescriptionImpl.Epollable.
   187  func (i *Inotify) Epollable() bool {
   188  	return true
   189  }
   190  
   191  // PRead implements FileDescriptionImpl.PRead.
   192  func (*Inotify) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
   193  	return 0, linuxerr.ESPIPE
   194  }
   195  
   196  // PWrite implements FileDescriptionImpl.PWrite.
   197  func (*Inotify) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) {
   198  	return 0, linuxerr.ESPIPE
   199  }
   200  
   201  // Write implements FileDescriptionImpl.Write.
   202  func (*Inotify) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
   203  	return 0, linuxerr.EBADF
   204  }
   205  
   206  // Read implements FileDescriptionImpl.Read.
   207  func (i *Inotify) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
   208  	if dst.NumBytes() < inotifyEventBaseSize {
   209  		return 0, linuxerr.EINVAL
   210  	}
   211  
   212  	i.evMu.Lock()
   213  	defer i.evMu.Unlock()
   214  
   215  	if i.events.Empty() {
   216  		// Nothing to read yet, tell caller to block.
   217  		return 0, linuxerr.ErrWouldBlock
   218  	}
   219  
   220  	var writeLen int64
   221  	for it := i.events.Front(); it != nil; {
   222  		// Advance `it` before the element is removed from the list, or else
   223  		// it.Next() will always be nil.
   224  		event := it
   225  		it = it.Next()
   226  
   227  		// Does the buffer have enough remaining space to hold the event we're
   228  		// about to write out?
   229  		if dst.NumBytes() < int64(event.sizeOf()) {
   230  			if writeLen > 0 {
   231  				// Buffer wasn't big enough for all pending events, but we did
   232  				// write some events out.
   233  				return writeLen, nil
   234  			}
   235  			return 0, linuxerr.EINVAL
   236  		}
   237  
   238  		// Linux always dequeues an available event as long as there's enough
   239  		// buffer space to copy it out, even if the copy below fails. Emulate
   240  		// this behaviour.
   241  		i.events.Remove(event)
   242  
   243  		// Buffer has enough space, copy event to the read buffer.
   244  		n, err := event.CopyTo(ctx, i.scratch, dst)
   245  		if err != nil {
   246  			return 0, err
   247  		}
   248  
   249  		writeLen += n
   250  		dst = dst.DropFirst64(n)
   251  	}
   252  	return writeLen, nil
   253  }
   254  
   255  // Ioctl implements FileDescriptionImpl.Ioctl.
   256  func (i *Inotify) Ioctl(ctx context.Context, uio usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
   257  	switch args[1].Int() {
   258  	case linux.FIONREAD:
   259  		i.evMu.Lock()
   260  		var n uint32
   261  		for e := i.events.Front(); e != nil; e = e.Next() {
   262  			n += uint32(e.sizeOf())
   263  		}
   264  		i.evMu.Unlock()
   265  		var buf [4]byte
   266  		hostarch.ByteOrder.PutUint32(buf[:], n)
   267  		_, err := uio.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{})
   268  		return 0, err
   269  
   270  	default:
   271  		return 0, linuxerr.ENOTTY
   272  	}
   273  }
   274  
   275  func (i *Inotify) queueEvent(ev *Event) {
   276  	i.evMu.Lock()
   277  
   278  	// Check if we should coalesce the event we're about to queue with the last
   279  	// one currently in the queue. Events are coalesced if they are identical.
   280  	if last := i.events.Back(); last != nil {
   281  		if ev.equals(last) {
   282  			// "Coalesce" the two events by simply not queuing the new one. We
   283  			// don't need to raise a waiter.EventIn notification because no new
   284  			// data is available for reading.
   285  			i.evMu.Unlock()
   286  			return
   287  		}
   288  	}
   289  
   290  	i.events.PushBack(ev)
   291  
   292  	// Release mutex before notifying waiters because we don't control what they
   293  	// can do.
   294  	i.evMu.Unlock()
   295  
   296  	i.queue.Notify(waiter.ReadableEvents)
   297  }
   298  
   299  // newWatchLocked creates and adds a new watch to target.
   300  //
   301  // Precondition: i.mu must be locked. ws must be the watch set for target d.
   302  func (i *Inotify) newWatchLocked(d *Dentry, ws *Watches, mask uint32) *Watch {
   303  	w := &Watch{
   304  		owner:  i,
   305  		wd:     i.nextWatchIDLocked(),
   306  		target: d,
   307  		mask:   atomicbitops.FromUint32(mask),
   308  	}
   309  
   310  	// Hold the watch in this inotify instance as well as the watch set on the
   311  	// target.
   312  	i.watches[w.wd] = w
   313  	ws.Add(w)
   314  	return w
   315  }
   316  
   317  // newWatchIDLocked allocates and returns a new watch descriptor.
   318  //
   319  // Precondition: i.mu must be locked.
   320  func (i *Inotify) nextWatchIDLocked() int32 {
   321  	i.nextWatchMinusOne++
   322  	return i.nextWatchMinusOne
   323  }
   324  
   325  // AddWatch constructs a new inotify watch and adds it to the target. It
   326  // returns the watch descriptor returned by inotify_add_watch(2).
   327  //
   328  // The caller must hold a reference on target.
   329  func (i *Inotify) AddWatch(target *Dentry, mask uint32) int32 {
   330  	// Note: Locking this inotify instance protects the result returned by
   331  	// Lookup() below. With the lock held, we know for sure the lookup result
   332  	// won't become stale because it's impossible for *this* instance to
   333  	// add/remove watches on target.
   334  	i.mu.Lock()
   335  	defer i.mu.Unlock()
   336  
   337  	ws := target.Watches()
   338  	// Does the target already have a watch from this inotify instance?
   339  	if existing := ws.Lookup(i.id); existing != nil {
   340  		newmask := mask
   341  		if mask&linux.IN_MASK_ADD != 0 {
   342  			// "Add (OR) events to watch mask for this pathname if it already
   343  			// exists (instead of replacing mask)." -- inotify(7)
   344  			newmask |= existing.mask.Load()
   345  		}
   346  		existing.mask.Store(newmask)
   347  		return existing.wd
   348  	}
   349  
   350  	// No existing watch, create a new watch.
   351  	w := i.newWatchLocked(target, ws, mask)
   352  	return w.wd
   353  }
   354  
   355  // RmWatch looks up an inotify watch for the given 'wd' and configures the
   356  // target to stop sending events to this inotify instance.
   357  func (i *Inotify) RmWatch(ctx context.Context, wd int32) error {
   358  	i.mu.Lock()
   359  
   360  	// Find the watch we were asked to removed.
   361  	w, ok := i.watches[wd]
   362  	if !ok {
   363  		i.mu.Unlock()
   364  		return linuxerr.EINVAL
   365  	}
   366  
   367  	// Remove the watch from this instance.
   368  	delete(i.watches, wd)
   369  
   370  	// Remove the watch from the watch target.
   371  	ws := w.target.Watches()
   372  	// AddWatch ensures that w.target has a non-nil watch set.
   373  	if ws == nil {
   374  		panic("Watched dentry cannot have nil watch set")
   375  	}
   376  	ws.Remove(w.OwnerID())
   377  	remaining := ws.Size()
   378  	i.mu.Unlock()
   379  
   380  	if remaining == 0 {
   381  		w.target.OnZeroWatches(ctx)
   382  	}
   383  
   384  	// Generate the event for the removal.
   385  	i.queueEvent(newEvent(wd, "", linux.IN_IGNORED, 0))
   386  
   387  	return nil
   388  }
   389  
   390  // Watches is the collection of all inotify watches on a single file.
   391  //
   392  // +stateify savable
   393  type Watches struct {
   394  	// mu protects the fields below.
   395  	mu sync.RWMutex `state:"nosave"`
   396  
   397  	// ws is the map of active watches in this collection, keyed by the inotify
   398  	// instance id of the owner.
   399  	ws map[uint64]*Watch
   400  }
   401  
   402  // Size returns the number of watches held by w.
   403  func (w *Watches) Size() int {
   404  	w.mu.Lock()
   405  	defer w.mu.Unlock()
   406  	return len(w.ws)
   407  }
   408  
   409  // Lookup returns the watch owned by an inotify instance with the given id.
   410  // Returns nil if no such watch exists.
   411  //
   412  // Precondition: the inotify instance with the given id must be locked to
   413  // prevent the returned watch from being concurrently modified or replaced in
   414  // Inotify.watches.
   415  func (w *Watches) Lookup(id uint64) *Watch {
   416  	w.mu.Lock()
   417  	defer w.mu.Unlock()
   418  	return w.ws[id]
   419  }
   420  
   421  // Add adds watch into this set of watches.
   422  //
   423  // Precondition: the inotify instance with the given id must be locked.
   424  func (w *Watches) Add(watch *Watch) {
   425  	w.mu.Lock()
   426  	defer w.mu.Unlock()
   427  
   428  	owner := watch.OwnerID()
   429  	// Sanity check, we should never have two watches for one owner on the
   430  	// same target.
   431  	if _, exists := w.ws[owner]; exists {
   432  		panic(fmt.Sprintf("Watch collision with ID %+v", owner))
   433  	}
   434  	if w.ws == nil {
   435  		w.ws = make(map[uint64]*Watch)
   436  	}
   437  	w.ws[owner] = watch
   438  }
   439  
   440  // Remove removes a watch with the given id from this set of watches and
   441  // releases it. The caller is responsible for generating any watch removal
   442  // event, as appropriate. The provided id must match an existing watch in this
   443  // collection.
   444  //
   445  // Precondition: the inotify instance with the given id must be locked.
   446  func (w *Watches) Remove(id uint64) {
   447  	w.mu.Lock()
   448  	defer w.mu.Unlock()
   449  
   450  	if w.ws == nil {
   451  		// This watch set is being destroyed. The thread executing the
   452  		// destructor is already in the process of deleting all our watches. We
   453  		// got here with no references on the target because we raced with the
   454  		// destructor notifying all the watch owners of destruction. See the
   455  		// comment in Watches.HandleDeletion for why this race exists.
   456  		return
   457  	}
   458  
   459  	// It is possible for w.Remove() to be called for the same watch multiple
   460  	// times. See the treatment of one-shot watches in Watches.Notify().
   461  	if _, ok := w.ws[id]; ok {
   462  		delete(w.ws, id)
   463  	}
   464  }
   465  
   466  // Notify queues a new event with watches in this set. Watches with
   467  // IN_EXCL_UNLINK are skipped if the event is coming from a child that has been
   468  // unlinked.
   469  func (w *Watches) Notify(ctx context.Context, name string, events, cookie uint32, et EventType, unlinked bool) {
   470  	var hasExpired bool
   471  	w.mu.RLock()
   472  	for _, watch := range w.ws {
   473  		if unlinked && watch.ExcludeUnlinked() && et == PathEvent {
   474  			continue
   475  		}
   476  		if watch.Notify(name, events, cookie) {
   477  			hasExpired = true
   478  		}
   479  	}
   480  	w.mu.RUnlock()
   481  
   482  	if hasExpired {
   483  		w.cleanupExpiredWatches(ctx)
   484  	}
   485  }
   486  
   487  // This function is relatively expensive and should only be called where there
   488  // are expired watches.
   489  func (w *Watches) cleanupExpiredWatches(ctx context.Context) {
   490  	// Because of lock ordering, we cannot acquire Inotify.mu for each watch
   491  	// owner while holding w.mu. As a result, store expired watches locally
   492  	// before removing.
   493  	var toRemove []*Watch
   494  	w.mu.RLock()
   495  	for _, watch := range w.ws {
   496  		if watch.expired.Load() == 1 {
   497  			toRemove = append(toRemove, watch)
   498  		}
   499  	}
   500  	w.mu.RUnlock()
   501  	for _, watch := range toRemove {
   502  		watch.owner.RmWatch(ctx, watch.wd)
   503  	}
   504  }
   505  
   506  // HandleDeletion is called when the watch target is destroyed. Clear the
   507  // watch set, detach watches from the inotify instances they belong to, and
   508  // generate the appropriate events.
   509  func (w *Watches) HandleDeletion(ctx context.Context) {
   510  	w.Notify(ctx, "", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */)
   511  
   512  	// As in Watches.Notify, we can't hold w.mu while acquiring Inotify.mu for
   513  	// the owner of each watch being deleted. Instead, atomically store the
   514  	// watches map in a local variable and set it to nil so we can iterate over
   515  	// it with the assurance that there will be no concurrent accesses.
   516  	var ws map[uint64]*Watch
   517  	w.mu.Lock()
   518  	ws = w.ws
   519  	w.ws = nil
   520  	w.mu.Unlock()
   521  
   522  	// Remove each watch from its owner's watch set, and generate a corresponding
   523  	// watch removal event.
   524  	for _, watch := range ws {
   525  		i := watch.owner
   526  		i.mu.Lock()
   527  		_, found := i.watches[watch.wd]
   528  		delete(i.watches, watch.wd)
   529  
   530  		// Release mutex before notifying waiters because we don't control what
   531  		// they can do.
   532  		i.mu.Unlock()
   533  
   534  		// If watch was not found, it was removed from the inotify instance before
   535  		// we could get to it, in which case we should not generate an event.
   536  		if found {
   537  			i.queueEvent(newEvent(watch.wd, "", linux.IN_IGNORED, 0))
   538  		}
   539  	}
   540  }
   541  
   542  // Watch represent a particular inotify watch created by inotify_add_watch.
   543  //
   544  // +stateify savable
   545  type Watch struct {
   546  	// Inotify instance which owns this watch.
   547  	//
   548  	// This field is immutable after creation.
   549  	owner *Inotify
   550  
   551  	// Descriptor for this watch. This is unique across an inotify instance.
   552  	//
   553  	// This field is immutable after creation.
   554  	wd int32
   555  
   556  	// target is a dentry representing the watch target. Its watch set contains this watch.
   557  	//
   558  	// This field is immutable after creation.
   559  	target *Dentry
   560  
   561  	// Events being monitored via this watch.
   562  	mask atomicbitops.Uint32
   563  
   564  	// expired is set to 1 to indicate that this watch is a one-shot that has
   565  	// already sent a notification and therefore can be removed.
   566  	expired atomicbitops.Int32
   567  }
   568  
   569  // OwnerID returns the id of the inotify instance that owns this watch.
   570  func (w *Watch) OwnerID() uint64 {
   571  	return w.owner.id
   572  }
   573  
   574  // ExcludeUnlinked indicates whether the watched object should continue to be
   575  // notified of events originating from a path that has been unlinked.
   576  //
   577  // For example, if "foo/bar" is opened and then unlinked, operations on the
   578  // open fd may be ignored by watches on "foo" and "foo/bar" with IN_EXCL_UNLINK.
   579  func (w *Watch) ExcludeUnlinked() bool {
   580  	return w.mask.Load()&linux.IN_EXCL_UNLINK != 0
   581  }
   582  
   583  // Notify queues a new event on this watch. Returns true if this is a one-shot
   584  // watch that should be deleted, after this event was successfully queued.
   585  func (w *Watch) Notify(name string, events uint32, cookie uint32) bool {
   586  	if w.expired.Load() == 1 {
   587  		// This is a one-shot watch that is already in the process of being
   588  		// removed. This may happen if a second event reaches the watch target
   589  		// before this watch has been removed.
   590  		return false
   591  	}
   592  
   593  	mask := w.mask.Load()
   594  	if mask&events == 0 {
   595  		// We weren't watching for this event.
   596  		return false
   597  	}
   598  
   599  	// Event mask should include bits matched from the watch plus all control
   600  	// event bits.
   601  	unmaskableBits := ^uint32(0) &^ linux.IN_ALL_EVENTS
   602  	effectiveMask := unmaskableBits | mask
   603  	matchedEvents := effectiveMask & events
   604  	w.owner.queueEvent(newEvent(w.wd, name, matchedEvents, cookie))
   605  	if mask&linux.IN_ONESHOT != 0 {
   606  		w.expired.Store(1)
   607  		return true
   608  	}
   609  	return false
   610  }
   611  
   612  // Event represents a struct inotify_event from linux.
   613  //
   614  // +stateify savable
   615  type Event struct {
   616  	eventEntry
   617  
   618  	wd     int32
   619  	mask   uint32
   620  	cookie uint32
   621  
   622  	// len is computed based on the name field is set automatically by
   623  	// Event.setName. It should be 0 when no name is set; otherwise it is the
   624  	// length of the name slice.
   625  	len uint32
   626  
   627  	// The name field has special padding requirements and should only be set by
   628  	// calling Event.setName.
   629  	name []byte
   630  }
   631  
   632  func newEvent(wd int32, name string, events, cookie uint32) *Event {
   633  	e := &Event{
   634  		wd:     wd,
   635  		mask:   events,
   636  		cookie: cookie,
   637  	}
   638  	if name != "" {
   639  		e.setName(name)
   640  	}
   641  	return e
   642  }
   643  
   644  // paddedBytes converts a go string to a null-terminated c-string, padded with
   645  // null bytes to a total size of 'l'. 'l' must be large enough for all the bytes
   646  // in the 's' plus at least one null byte.
   647  func paddedBytes(s string, l uint32) []byte {
   648  	if l < uint32(len(s)+1) {
   649  		panic("Converting string to byte array results in truncation, this can lead to buffer-overflow due to the missing null-byte!")
   650  	}
   651  	b := make([]byte, l)
   652  	copy(b, s)
   653  
   654  	// b was zero-value initialized during make(), so the rest of the slice is
   655  	// already filled with null bytes.
   656  
   657  	return b
   658  }
   659  
   660  // setName sets the optional name for this event.
   661  func (e *Event) setName(name string) {
   662  	// We need to pad the name such that the entire event length ends up a
   663  	// multiple of inotifyEventBaseSize.
   664  	unpaddedLen := len(name) + 1
   665  	// Round up to nearest multiple of inotifyEventBaseSize.
   666  	e.len = uint32((unpaddedLen + inotifyEventBaseSize - 1) & ^(inotifyEventBaseSize - 1))
   667  	// Make sure we haven't overflowed and wrapped around when rounding.
   668  	if unpaddedLen > int(e.len) {
   669  		panic("Overflow when rounding inotify event size, the 'name' field was too big.")
   670  	}
   671  	e.name = paddedBytes(name, e.len)
   672  }
   673  
   674  func (e *Event) sizeOf() int {
   675  	s := inotifyEventBaseSize + int(e.len)
   676  	if s < inotifyEventBaseSize {
   677  		panic("Overflowed event size")
   678  	}
   679  	return s
   680  }
   681  
   682  // CopyTo serializes this event to dst. buf is used as a scratch buffer to
   683  // construct the output. We use a buffer allocated ahead of time for
   684  // performance. buf must be at least inotifyEventBaseSize bytes.
   685  func (e *Event) CopyTo(ctx context.Context, buf []byte, dst usermem.IOSequence) (int64, error) {
   686  	hostarch.ByteOrder.PutUint32(buf[0:], uint32(e.wd))
   687  	hostarch.ByteOrder.PutUint32(buf[4:], e.mask)
   688  	hostarch.ByteOrder.PutUint32(buf[8:], e.cookie)
   689  	hostarch.ByteOrder.PutUint32(buf[12:], e.len)
   690  
   691  	writeLen := 0
   692  
   693  	n, err := dst.CopyOut(ctx, buf)
   694  	if err != nil {
   695  		return 0, err
   696  	}
   697  	writeLen += n
   698  	dst = dst.DropFirst(n)
   699  
   700  	if e.len > 0 {
   701  		n, err = dst.CopyOut(ctx, e.name)
   702  		if err != nil {
   703  			return 0, err
   704  		}
   705  		writeLen += n
   706  	}
   707  
   708  	// Santiy check.
   709  	if writeLen != e.sizeOf() {
   710  		panic(fmt.Sprintf("Serialized unexpected amount of data for an event, expected %d, wrote %d.", e.sizeOf(), writeLen))
   711  	}
   712  
   713  	return int64(writeLen), nil
   714  }
   715  
   716  func (e *Event) equals(other *Event) bool {
   717  	return e.wd == other.wd &&
   718  		e.mask == other.mask &&
   719  		e.cookie == other.cookie &&
   720  		e.len == other.len &&
   721  		bytes.Equal(e.name, other.name)
   722  }
   723  
   724  // InotifyEventFromStatMask generates the appropriate events for an operation
   725  // that set the stats specified in mask.
   726  func InotifyEventFromStatMask(mask uint32) uint32 {
   727  	var ev uint32
   728  	if mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_MODE) != 0 {
   729  		ev |= linux.IN_ATTRIB
   730  	}
   731  	if mask&linux.STATX_SIZE != 0 {
   732  		ev |= linux.IN_MODIFY
   733  	}
   734  
   735  	if (mask & (linux.STATX_ATIME | linux.STATX_MTIME)) == (linux.STATX_ATIME | linux.STATX_MTIME) {
   736  		// Both times indicates a utime(s) call.
   737  		ev |= linux.IN_ATTRIB
   738  	} else if mask&linux.STATX_ATIME != 0 {
   739  		ev |= linux.IN_ACCESS
   740  	} else if mask&linux.STATX_MTIME != 0 {
   741  		ev |= linux.IN_MODIFY
   742  	}
   743  	return ev
   744  }
   745  
   746  // InotifyRemoveChild sends the appriopriate notifications to the watch sets of
   747  // the child being removed and its parent. Note that unlike most pairs of
   748  // parent/child notifications, the child is notified first in this case.
   749  func InotifyRemoveChild(ctx context.Context, self, parent *Watches, name string) {
   750  	if self != nil {
   751  		self.Notify(ctx, "", linux.IN_ATTRIB, 0, InodeEvent, true /* unlinked */)
   752  	}
   753  	if parent != nil {
   754  		parent.Notify(ctx, name, linux.IN_DELETE, 0, InodeEvent, true /* unlinked */)
   755  	}
   756  }
   757  
   758  // InotifyRename sends the appriopriate notifications to the watch sets of the
   759  // file being renamed and its old/new parents.
   760  func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches, oldName, newName string, isDir bool) {
   761  	var dirEv uint32
   762  	if isDir {
   763  		dirEv = linux.IN_ISDIR
   764  	}
   765  	cookie := uniqueid.InotifyCookie(ctx)
   766  	if oldParent != nil {
   767  		oldParent.Notify(ctx, oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent, false /* unlinked */)
   768  	}
   769  	if newParent != nil {
   770  		newParent.Notify(ctx, newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent, false /* unlinked */)
   771  	}
   772  	// Somewhat surprisingly, self move events do not have a cookie.
   773  	if renamed != nil {
   774  		renamed.Notify(ctx, "", linux.IN_MOVE_SELF, 0, InodeEvent, false /* unlinked */)
   775  	}
   776  }