github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/kernfs/inode_impl_util.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernfs
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/metacubex/gvisor/pkg/abi/linux"
    21  	"github.com/metacubex/gvisor/pkg/atomicbitops"
    22  	"github.com/metacubex/gvisor/pkg/context"
    23  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    24  	"github.com/metacubex/gvisor/pkg/hostarch"
    25  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    26  	ktime "github.com/metacubex/gvisor/pkg/sentry/kernel/time"
    27  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    28  	"github.com/metacubex/gvisor/pkg/sync"
    29  )
    30  
    31  // InodeNoopRefCount partially implements the Inode interface, specifically the
    32  // inodeRefs sub interface. InodeNoopRefCount implements a simple reference
    33  // count for inodes, performing no extra actions when references are obtained or
    34  // released. This is suitable for simple file inodes that don't reference any
    35  // resources.
    36  //
    37  // +stateify savable
    38  type InodeNoopRefCount struct {
    39  	InodeTemporary
    40  }
    41  
    42  // IncRef implements Inode.IncRef.
    43  func (InodeNoopRefCount) IncRef() {
    44  }
    45  
    46  // DecRef implements Inode.DecRef.
    47  func (InodeNoopRefCount) DecRef(context.Context) {
    48  }
    49  
    50  // TryIncRef implements Inode.TryIncRef.
    51  func (InodeNoopRefCount) TryIncRef() bool {
    52  	return true
    53  }
    54  
    55  // InodeDirectoryNoNewChildren partially implements the Inode interface.
    56  // InodeDirectoryNoNewChildren represents a directory inode which does not
    57  // support creation of new children.
    58  //
    59  // +stateify savable
    60  type InodeDirectoryNoNewChildren struct{}
    61  
    62  // NewFile implements Inode.NewFile.
    63  func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) {
    64  	return nil, linuxerr.EPERM
    65  }
    66  
    67  // NewDir implements Inode.NewDir.
    68  func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) {
    69  	return nil, linuxerr.EPERM
    70  }
    71  
    72  // NewLink implements Inode.NewLink.
    73  func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (Inode, error) {
    74  	return nil, linuxerr.EPERM
    75  }
    76  
    77  // NewSymlink implements Inode.NewSymlink.
    78  func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (Inode, error) {
    79  	return nil, linuxerr.EPERM
    80  }
    81  
    82  // NewNode implements Inode.NewNode.
    83  func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) {
    84  	return nil, linuxerr.EPERM
    85  }
    86  
    87  // InodeNotDirectory partially implements the Inode interface, specifically the
    88  // inodeDirectory and inodeDynamicDirectory sub interfaces. Inodes that do not
    89  // represent directories can embed this to provide no-op implementations for
    90  // directory-related functions.
    91  //
    92  // +stateify savable
    93  type InodeNotDirectory struct {
    94  	InodeAlwaysValid
    95  }
    96  
    97  // HasChildren implements Inode.HasChildren.
    98  func (InodeNotDirectory) HasChildren() bool {
    99  	return false
   100  }
   101  
   102  // NewFile implements Inode.NewFile.
   103  func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) {
   104  	panic("NewFile called on non-directory inode")
   105  }
   106  
   107  // NewDir implements Inode.NewDir.
   108  func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) {
   109  	panic("NewDir called on non-directory inode")
   110  }
   111  
   112  // NewLink implements Inode.NewLinkink.
   113  func (InodeNotDirectory) NewLink(context.Context, string, Inode) (Inode, error) {
   114  	panic("NewLink called on non-directory inode")
   115  }
   116  
   117  // NewSymlink implements Inode.NewSymlink.
   118  func (InodeNotDirectory) NewSymlink(context.Context, string, string) (Inode, error) {
   119  	panic("NewSymlink called on non-directory inode")
   120  }
   121  
   122  // NewNode implements Inode.NewNode.
   123  func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) {
   124  	panic("NewNode called on non-directory inode")
   125  }
   126  
   127  // Unlink implements Inode.Unlink.
   128  func (InodeNotDirectory) Unlink(context.Context, string, Inode) error {
   129  	panic("Unlink called on non-directory inode")
   130  }
   131  
   132  // RmDir implements Inode.RmDir.
   133  func (InodeNotDirectory) RmDir(context.Context, string, Inode) error {
   134  	panic("RmDir called on non-directory inode")
   135  }
   136  
   137  // Rename implements Inode.Rename.
   138  func (InodeNotDirectory) Rename(context.Context, string, string, Inode, Inode) error {
   139  	panic("Rename called on non-directory inode")
   140  }
   141  
   142  // Lookup implements Inode.Lookup.
   143  func (InodeNotDirectory) Lookup(ctx context.Context, name string) (Inode, error) {
   144  	panic("Lookup called on non-directory inode")
   145  }
   146  
   147  // IterDirents implements Inode.IterDirents.
   148  func (InodeNotDirectory) IterDirents(ctx context.Context, mnt *vfs.Mount, callback vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) {
   149  	panic("IterDirents called on non-directory inode")
   150  }
   151  
   152  // InodeNotSymlink partially implements the Inode interface, specifically the
   153  // inodeSymlink sub interface. All inodes that are not symlinks may embed this
   154  // to return the appropriate errors from symlink-related functions.
   155  //
   156  // +stateify savable
   157  type InodeNotSymlink struct{}
   158  
   159  // Readlink implements Inode.Readlink.
   160  func (InodeNotSymlink) Readlink(context.Context, *vfs.Mount) (string, error) {
   161  	return "", linuxerr.EINVAL
   162  }
   163  
   164  // Getlink implements Inode.Getlink.
   165  func (InodeNotSymlink) Getlink(context.Context, *vfs.Mount) (vfs.VirtualDentry, string, error) {
   166  	return vfs.VirtualDentry{}, "", linuxerr.EINVAL
   167  }
   168  
   169  // InodeAttrs partially implements the Inode interface, specifically the
   170  // inodeMetadata sub interface. InodeAttrs provides functionality related to
   171  // inode attributes.
   172  //
   173  // Must be initialized by Init prior to first use.
   174  //
   175  // +stateify savable
   176  type InodeAttrs struct {
   177  	devMajor  uint32
   178  	devMinor  uint32
   179  	ino       atomicbitops.Uint64
   180  	mode      atomicbitops.Uint32
   181  	uid       atomicbitops.Uint32
   182  	gid       atomicbitops.Uint32
   183  	nlink     atomicbitops.Uint32
   184  	blockSize atomicbitops.Uint32
   185  
   186  	// Timestamps, all nsecs from the Unix epoch.
   187  	atime atomicbitops.Int64
   188  	mtime atomicbitops.Int64
   189  	ctime atomicbitops.Int64
   190  }
   191  
   192  // Init initializes this InodeAttrs.
   193  func (a *InodeAttrs) Init(ctx context.Context, creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, mode linux.FileMode) {
   194  	a.InitWithIDs(ctx, creds.EffectiveKUID, creds.EffectiveKGID, devMajor, devMinor, ino, mode)
   195  }
   196  
   197  // InitWithIDs initializes this InodeAttrs.
   198  func (a *InodeAttrs) InitWithIDs(ctx context.Context, uid auth.KUID, gid auth.KGID, devMajor, devMinor uint32, ino uint64, mode linux.FileMode) {
   199  	if mode.FileType() == 0 {
   200  		panic(fmt.Sprintf("No file type specified in 'mode' for InodeAttrs.Init(): mode=0%o", mode))
   201  	}
   202  
   203  	nlink := uint32(1)
   204  	if mode.FileType() == linux.ModeDirectory {
   205  		nlink = 2
   206  	}
   207  	a.devMajor = devMajor
   208  	a.devMinor = devMinor
   209  	a.ino.Store(ino)
   210  	a.mode.Store(uint32(mode))
   211  	a.uid.Store(uint32(uid))
   212  	a.gid.Store(uint32(gid))
   213  	a.nlink.Store(nlink)
   214  	a.blockSize.Store(hostarch.PageSize)
   215  	now := ktime.NowFromContext(ctx).Nanoseconds()
   216  	a.atime.Store(now)
   217  	a.mtime.Store(now)
   218  	a.ctime.Store(now)
   219  }
   220  
   221  // DevMajor returns the device major number.
   222  func (a *InodeAttrs) DevMajor() uint32 {
   223  	return a.devMajor
   224  }
   225  
   226  // DevMinor returns the device minor number.
   227  func (a *InodeAttrs) DevMinor() uint32 {
   228  	return a.devMinor
   229  }
   230  
   231  // Ino returns the inode id.
   232  func (a *InodeAttrs) Ino() uint64 {
   233  	return a.ino.Load()
   234  }
   235  
   236  // UID implements Inode.UID.
   237  func (a *InodeAttrs) UID() auth.KUID {
   238  	return auth.KUID(a.uid.Load())
   239  }
   240  
   241  // GID implements Inode.GID.
   242  func (a *InodeAttrs) GID() auth.KGID {
   243  	return auth.KGID(a.gid.Load())
   244  }
   245  
   246  // Mode implements Inode.Mode.
   247  func (a *InodeAttrs) Mode() linux.FileMode {
   248  	return linux.FileMode(a.mode.Load())
   249  }
   250  
   251  // Links returns the link count.
   252  func (a *InodeAttrs) Links() uint32 {
   253  	return a.nlink.Load()
   254  }
   255  
   256  // TouchAtime updates a.atime to the current time.
   257  func (a *InodeAttrs) TouchAtime(ctx context.Context, mnt *vfs.Mount) {
   258  	if opts := mnt.Options(); opts.Flags.NoATime || opts.ReadOnly {
   259  		return
   260  	}
   261  	if err := mnt.CheckBeginWrite(); err != nil {
   262  		return
   263  	}
   264  	a.atime.Store(ktime.NowFromContext(ctx).Nanoseconds())
   265  	mnt.EndWrite()
   266  }
   267  
   268  // TouchCMtime updates a.{c/m}time to the current time. The caller should
   269  // synchronize calls to this so that ctime and mtime are updated to the same
   270  // value.
   271  func (a *InodeAttrs) TouchCMtime(ctx context.Context) {
   272  	now := ktime.NowFromContext(ctx).Nanoseconds()
   273  	a.mtime.Store(now)
   274  	a.ctime.Store(now)
   275  }
   276  
   277  // Stat partially implements Inode.Stat. Note that this function doesn't provide
   278  // all the stat fields, and the embedder should consider extending the result
   279  // with filesystem-specific fields.
   280  func (a *InodeAttrs) Stat(context.Context, *vfs.Filesystem, vfs.StatOptions) (linux.Statx, error) {
   281  	var stat linux.Statx
   282  	stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_NLINK | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME
   283  	stat.DevMajor = a.devMajor
   284  	stat.DevMinor = a.devMinor
   285  	stat.Ino = a.ino.Load()
   286  	stat.Mode = uint16(a.Mode())
   287  	stat.UID = a.uid.Load()
   288  	stat.GID = a.gid.Load()
   289  	stat.Nlink = a.nlink.Load()
   290  	stat.Blksize = a.blockSize.Load()
   291  	stat.Atime = linux.NsecToStatxTimestamp(a.atime.Load())
   292  	stat.Mtime = linux.NsecToStatxTimestamp(a.mtime.Load())
   293  	stat.Ctime = linux.NsecToStatxTimestamp(a.ctime.Load())
   294  	return stat, nil
   295  }
   296  
   297  // SetStat implements Inode.SetStat.
   298  func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
   299  	if opts.Stat.Mask == 0 {
   300  		return nil
   301  	}
   302  
   303  	// Note that not all fields are modifiable. For example, the file type and
   304  	// inode numbers are immutable after node creation. Setting the size is often
   305  	// allowed by kernfs files but does not do anything. If some other behavior is
   306  	// needed, the embedder should consider extending SetStat.
   307  	if opts.Stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_SIZE) != 0 {
   308  		return linuxerr.EPERM
   309  	}
   310  	if opts.Stat.Mask&linux.STATX_SIZE != 0 && a.Mode().IsDir() {
   311  		return linuxerr.EISDIR
   312  	}
   313  	if err := vfs.CheckSetStat(ctx, creds, &opts, a.Mode(), auth.KUID(a.uid.Load()), auth.KGID(a.gid.Load())); err != nil {
   314  		return err
   315  	}
   316  
   317  	clearSID := false
   318  	stat := opts.Stat
   319  	if stat.Mask&linux.STATX_UID != 0 {
   320  		a.uid.Store(stat.UID)
   321  		clearSID = true
   322  	}
   323  	if stat.Mask&linux.STATX_GID != 0 {
   324  		a.gid.Store(stat.GID)
   325  		clearSID = true
   326  	}
   327  	if stat.Mask&linux.STATX_MODE != 0 {
   328  		for {
   329  			old := a.mode.Load()
   330  			ft := old & linux.S_IFMT
   331  			newMode := ft | uint32(stat.Mode & ^uint16(linux.S_IFMT))
   332  			if clearSID {
   333  				newMode = vfs.ClearSUIDAndSGID(newMode)
   334  			}
   335  			if swapped := a.mode.CompareAndSwap(old, newMode); swapped {
   336  				clearSID = false
   337  				break
   338  			}
   339  		}
   340  	}
   341  
   342  	// We may have to clear the SUID/SGID bits, but didn't do so as part of
   343  	// STATX_MODE.
   344  	if clearSID {
   345  		for {
   346  			old := a.mode.Load()
   347  			newMode := vfs.ClearSUIDAndSGID(old)
   348  			if swapped := a.mode.CompareAndSwap(old, newMode); swapped {
   349  				break
   350  			}
   351  		}
   352  	}
   353  
   354  	now := ktime.NowFromContext(ctx).Nanoseconds()
   355  	if stat.Mask&linux.STATX_ATIME != 0 {
   356  		if stat.Atime.Nsec == linux.UTIME_NOW {
   357  			stat.Atime = linux.NsecToStatxTimestamp(now)
   358  		}
   359  		a.atime.Store(stat.Atime.ToNsec())
   360  	}
   361  	if stat.Mask&linux.STATX_MTIME != 0 {
   362  		if stat.Mtime.Nsec == linux.UTIME_NOW {
   363  			stat.Mtime = linux.NsecToStatxTimestamp(now)
   364  		}
   365  		a.mtime.Store(stat.Mtime.ToNsec())
   366  	}
   367  
   368  	return nil
   369  }
   370  
   371  // CheckPermissions implements Inode.CheckPermissions.
   372  func (a *InodeAttrs) CheckPermissions(_ context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
   373  	return vfs.GenericCheckPermissions(
   374  		creds,
   375  		ats,
   376  		a.Mode(),
   377  		auth.KUID(a.uid.Load()),
   378  		auth.KGID(a.gid.Load()),
   379  	)
   380  }
   381  
   382  // IncLinks implements Inode.IncLinks.
   383  func (a *InodeAttrs) IncLinks(n uint32) {
   384  	if a.nlink.Add(n) <= n {
   385  		panic("InodeLink.IncLinks called with no existing links")
   386  	}
   387  }
   388  
   389  // DecLinks implements Inode.DecLinks.
   390  func (a *InodeAttrs) DecLinks() {
   391  	if nlink := a.nlink.Add(^uint32(0)); nlink == ^uint32(0) {
   392  		// Negative overflow
   393  		panic("Inode.DecLinks called at 0 links")
   394  	}
   395  }
   396  
   397  // +stateify savable
   398  type slot struct {
   399  	name   string
   400  	inode  Inode
   401  	static bool
   402  	slotEntry
   403  }
   404  
   405  // OrderedChildrenOptions contains initialization options for OrderedChildren.
   406  //
   407  // +stateify savable
   408  type OrderedChildrenOptions struct {
   409  	// Writable indicates whether vfs.FilesystemImpl methods implemented by
   410  	// OrderedChildren may modify the tracked children. This applies to
   411  	// operations related to rename, unlink and rmdir. If an OrderedChildren is
   412  	// not writable, these operations all fail with EPERM.
   413  	//
   414  	// Note that writable users must implement the sticky bit (I_SVTX).
   415  	Writable bool
   416  }
   417  
   418  // inodeWithOrderedChildren allows extraction of an OrderedChildren from an
   419  // Inode implementation. A concrete type that both implements the Inode
   420  // interface and embeds OrderedChildren will be castable to this interface, and
   421  // we can get to the embedded OrderedChildren through the orderedChildren
   422  // method.
   423  type inodeWithOrderedChildren interface {
   424  	Inode
   425  	orderedChildren() *OrderedChildren
   426  }
   427  
   428  // OrderedChildren partially implements the Inode interface. OrderedChildren can
   429  // be embedded in directory inodes to keep track of children in the
   430  // directory, and can then be used to implement a generic directory FD -- see
   431  // GenericDirectoryFD.
   432  //
   433  // OrderedChildren can represent a node in an Inode tree. The children inodes
   434  // might be directories themselves using OrderedChildren; hence extending the
   435  // tree. The parent inode (OrderedChildren user) holds a ref on all its static
   436  // children. This lets the static inodes outlive their associated dentry.
   437  // While the dentry might have to be regenerated via a Lookup() call, we can
   438  // keep reusing the same static inode. These static children inodes are finally
   439  // DecRef'd when this directory inode is being destroyed. This makes
   440  // OrderedChildren suitable for static directory entries as well.
   441  //
   442  // Must be initialize with Init before first use.
   443  //
   444  // +stateify savable
   445  type OrderedChildren struct {
   446  	// Can children be modified by user syscalls? It set to false, interface
   447  	// methods that would modify the children return EPERM. Immutable.
   448  	writable bool
   449  
   450  	mu    sync.RWMutex `state:"nosave"`
   451  	order slotList
   452  	set   map[string]*slot
   453  }
   454  
   455  // orderedChildren implements inodeWithOrderedChildren.orderedChildren.
   456  func (o *OrderedChildren) orderedChildren() *OrderedChildren {
   457  	return o
   458  }
   459  
   460  // Init initializes an OrderedChildren.
   461  func (o *OrderedChildren) Init(opts OrderedChildrenOptions) {
   462  	o.writable = opts.Writable
   463  	o.set = make(map[string]*slot)
   464  }
   465  
   466  // Destroy clears the children stored in o. It should be called by structs
   467  // embedding OrderedChildren upon destruction, i.e. when their reference count
   468  // reaches zero.
   469  func (o *OrderedChildren) Destroy(ctx context.Context) {
   470  	o.mu.Lock()
   471  	defer o.mu.Unlock()
   472  	// Drop the ref that o owns on the static inodes it holds.
   473  	for _, s := range o.set {
   474  		if s.static {
   475  			s.inode.DecRef(ctx)
   476  		}
   477  	}
   478  	o.order.Reset()
   479  	o.set = nil
   480  }
   481  
   482  // Populate inserts static children into this OrderedChildren.
   483  // Populate returns the number of directories inserted, which the caller
   484  // may use to update the link count for the parent directory.
   485  //
   486  // Precondition:
   487  //   - d must represent a directory inode.
   488  //   - children must not contain any conflicting entries already in o.
   489  //   - Caller must hold a reference on all inodes passed.
   490  //
   491  // Postcondition: Caller's references on inodes are transferred to o.
   492  func (o *OrderedChildren) Populate(children map[string]Inode) uint32 {
   493  	var links uint32
   494  	for name, child := range children {
   495  		if child.Mode().IsDir() {
   496  			links++
   497  		}
   498  		if err := o.insert(name, child, true); err != nil {
   499  			panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v)", name, child))
   500  		}
   501  	}
   502  	return links
   503  }
   504  
   505  // Lookup implements Inode.Lookup.
   506  func (o *OrderedChildren) Lookup(ctx context.Context, name string) (Inode, error) {
   507  	o.mu.RLock()
   508  	defer o.mu.RUnlock()
   509  
   510  	s, ok := o.set[name]
   511  	if !ok {
   512  		return nil, linuxerr.ENOENT
   513  	}
   514  
   515  	s.inode.IncRef() // This ref is passed to the dentry upon creation via Init.
   516  	return s.inode, nil
   517  }
   518  
   519  // ForEachChild calls fn on all children tracked by this ordered children.
   520  func (o *OrderedChildren) ForEachChild(fn func(string, Inode)) {
   521  	o.mu.RLock()
   522  	defer o.mu.RUnlock()
   523  
   524  	for name, slot := range o.set {
   525  		fn(name, slot.inode)
   526  	}
   527  }
   528  
   529  // IterDirents implements Inode.IterDirents.
   530  func (o *OrderedChildren) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) {
   531  	// All entries from OrderedChildren have already been handled in
   532  	// GenericDirectoryFD.IterDirents.
   533  	return offset, nil
   534  }
   535  
   536  // HasChildren implements Inode.HasChildren.
   537  func (o *OrderedChildren) HasChildren() bool {
   538  	o.mu.RLock()
   539  	defer o.mu.RUnlock()
   540  	return len(o.set) > 0
   541  }
   542  
   543  // Insert inserts a dynamic child into o. This ignores the writability of o, as
   544  // this is not part of the vfs.FilesystemImpl interface, and is a lower-level operation.
   545  func (o *OrderedChildren) Insert(name string, child Inode) error {
   546  	return o.insert(name, child, false)
   547  }
   548  
   549  // Inserter is like Insert, but obtains the child to insert by calling
   550  // makeChild. makeChild is only called if the insert will succeed. This allows
   551  // the caller to atomically check and insert a child without having to
   552  // clean up the child on failure.
   553  func (o *OrderedChildren) Inserter(name string, makeChild func() Inode) (Inode, error) {
   554  	o.mu.Lock()
   555  	defer o.mu.Unlock()
   556  	if _, ok := o.set[name]; ok {
   557  		return nil, linuxerr.EEXIST
   558  	}
   559  
   560  	// Note: We must not fail after we call makeChild().
   561  
   562  	child := makeChild()
   563  	s := &slot{
   564  		name:   name,
   565  		inode:  child,
   566  		static: false,
   567  	}
   568  	o.order.PushBack(s)
   569  	o.set[name] = s
   570  	return child, nil
   571  }
   572  
   573  // insert inserts child into o.
   574  //
   575  // Precondition: Caller must be holding a ref on child if static is true.
   576  //
   577  // Postcondition: Caller's ref on child is transferred to o if static is true.
   578  func (o *OrderedChildren) insert(name string, child Inode, static bool) error {
   579  	o.mu.Lock()
   580  	defer o.mu.Unlock()
   581  	if _, ok := o.set[name]; ok {
   582  		return linuxerr.EEXIST
   583  	}
   584  	s := &slot{
   585  		name:   name,
   586  		inode:  child,
   587  		static: static,
   588  	}
   589  	o.order.PushBack(s)
   590  	o.set[name] = s
   591  	return nil
   592  }
   593  
   594  // Precondition: caller must hold o.mu for writing.
   595  func (o *OrderedChildren) removeLocked(name string) {
   596  	if s, ok := o.set[name]; ok {
   597  		if s.static {
   598  			panic(fmt.Sprintf("removeLocked called on a static inode: %v", s.inode))
   599  		}
   600  		delete(o.set, name)
   601  		o.order.Remove(s)
   602  	}
   603  }
   604  
   605  // Precondition: caller must hold o.mu for reading or writing.
   606  func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error {
   607  	s, ok := o.set[name]
   608  	if !ok {
   609  		return linuxerr.ENOENT
   610  	}
   611  	if s.inode != child {
   612  		panic(fmt.Sprintf("Inode doesn't match what kernfs thinks! Name: %q, OrderedChild: %p, kernfs: %p", name, s.inode, child))
   613  	}
   614  	return nil
   615  }
   616  
   617  // Unlink implements Inode.Unlink.
   618  func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode) error {
   619  	if !o.writable {
   620  		return linuxerr.EPERM
   621  	}
   622  	o.mu.Lock()
   623  	defer o.mu.Unlock()
   624  	if err := o.checkExistingLocked(name, child); err != nil {
   625  		return err
   626  	}
   627  
   628  	o.removeLocked(name)
   629  	return nil
   630  }
   631  
   632  // RmDir implements Inode.RmDir.
   633  func (o *OrderedChildren) RmDir(ctx context.Context, name string, child Inode) error {
   634  	// We're not responsible for checking that child is a directory, that it's
   635  	// empty, or updating any link counts; so this is the same as unlink.
   636  	return o.Unlink(ctx, name, child)
   637  }
   638  
   639  // Rename implements Inode.Rename.
   640  //
   641  // Precondition: Rename may only be called across two directory inodes with
   642  // identical implementations of Rename. Practically, this means filesystems that
   643  // implement Rename by embedding OrderedChildren for any directory
   644  // implementation must use OrderedChildren for all directory implementations
   645  // that will support Rename.
   646  //
   647  // Postcondition: reference on any replaced dentry transferred to caller.
   648  func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error {
   649  	if !o.writable {
   650  		return linuxerr.EPERM
   651  	}
   652  	dstIOC, ok := dstDir.(inodeWithOrderedChildren)
   653  	if !ok {
   654  		return linuxerr.EXDEV
   655  	}
   656  	dst := dstIOC.orderedChildren()
   657  	if !dst.writable {
   658  		return linuxerr.EPERM
   659  	}
   660  
   661  	// Note: There's a potential deadlock below if concurrent calls to Rename
   662  	// refer to the same src and dst directories in reverse. We avoid any
   663  	// ordering issues because the caller is required to serialize concurrent
   664  	// calls to Rename in accordance with the interface declaration.
   665  	o.mu.Lock()
   666  	defer o.mu.Unlock()
   667  	if dst != o {
   668  		dst.mu.Lock()
   669  		defer dst.mu.Unlock()
   670  	}
   671  
   672  	// Ensure target inode exists in src.
   673  	if err := o.checkExistingLocked(oldname, child); err != nil {
   674  		return err
   675  	}
   676  
   677  	// Ensure no name collision in dst.
   678  	if _, ok := dst.set[newname]; ok {
   679  		return linuxerr.EEXIST
   680  	}
   681  
   682  	// Remove from src.
   683  	o.removeLocked(oldname)
   684  
   685  	// Add to dst.
   686  	s := &slot{
   687  		name:  newname,
   688  		inode: child,
   689  	}
   690  	dst.order.PushBack(s)
   691  	dst.set[newname] = s
   692  
   693  	return nil
   694  }
   695  
   696  // nthLocked returns an iterator to the nth child tracked by this object. The
   697  // iterator is valid until the caller releases o.mu. Returns nil if the
   698  // requested index falls out of bounds.
   699  //
   700  // Preconditon: Caller must hold o.mu for reading.
   701  func (o *OrderedChildren) nthLocked(i int64) *slot {
   702  	for it := o.order.Front(); it != nil && i >= 0; it = it.Next() {
   703  		if i == 0 {
   704  			return it
   705  		}
   706  		i--
   707  	}
   708  	return nil
   709  }
   710  
   711  // InodeSymlink partially implements Inode interface for symlinks.
   712  //
   713  // +stateify savable
   714  type InodeSymlink struct {
   715  	InodeNotDirectory
   716  }
   717  
   718  // Open implements Inode.Open.
   719  func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   720  	return nil, linuxerr.ELOOP
   721  }
   722  
   723  // StaticDirectory is a standard implementation of a directory with static
   724  // contents.
   725  //
   726  // +stateify savable
   727  type StaticDirectory struct {
   728  	InodeAlwaysValid
   729  	InodeAttrs
   730  	InodeDirectoryNoNewChildren
   731  	InodeNoStatFS
   732  	InodeNotAnonymous
   733  	InodeNotSymlink
   734  	InodeTemporary
   735  	InodeWatches
   736  	OrderedChildren
   737  	StaticDirectoryRefs
   738  
   739  	locks  vfs.FileLocks
   740  	fdOpts GenericDirectoryFDOptions
   741  }
   742  
   743  var _ Inode = (*StaticDirectory)(nil)
   744  
   745  // NewStaticDir creates a new static directory and returns its dentry.
   746  func NewStaticDir(ctx context.Context, creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]Inode, fdOpts GenericDirectoryFDOptions) Inode {
   747  	inode := &StaticDirectory{}
   748  	inode.Init(ctx, creds, devMajor, devMinor, ino, perm, fdOpts)
   749  	inode.InitRefs()
   750  
   751  	inode.OrderedChildren.Init(OrderedChildrenOptions{})
   752  	links := inode.OrderedChildren.Populate(children)
   753  	inode.IncLinks(links)
   754  
   755  	return inode
   756  }
   757  
   758  // Init initializes StaticDirectory.
   759  func (s *StaticDirectory) Init(ctx context.Context, creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, fdOpts GenericDirectoryFDOptions) {
   760  	if perm&^linux.PermissionsMask != 0 {
   761  		panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask))
   762  	}
   763  	s.fdOpts = fdOpts
   764  	s.InodeAttrs.Init(ctx, creds, devMajor, devMinor, ino, linux.ModeDirectory|perm)
   765  }
   766  
   767  // Open implements Inode.Open.
   768  func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   769  	fd, err := NewGenericDirectoryFD(rp.Mount(), d, &s.OrderedChildren, &s.locks, &opts, s.fdOpts)
   770  	if err != nil {
   771  		return nil, err
   772  	}
   773  	return fd.VFSFileDescription(), nil
   774  }
   775  
   776  // SetStat implements Inode.SetStat not allowing inode attributes to be changed.
   777  func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
   778  	return linuxerr.EPERM
   779  }
   780  
   781  // DecRef implements Inode.DecRef.
   782  func (s *StaticDirectory) DecRef(ctx context.Context) {
   783  	s.StaticDirectoryRefs.DecRef(func() { s.Destroy(ctx) })
   784  }
   785  
   786  // InodeAlwaysValid partially implements Inode.
   787  //
   788  // +stateify savable
   789  type InodeAlwaysValid struct{}
   790  
   791  // Valid implements Inode.Valid.
   792  func (*InodeAlwaysValid) Valid(context.Context) bool {
   793  	return true
   794  }
   795  
   796  // InodeTemporary partially implements Inode.
   797  //
   798  // +stateify savable
   799  type InodeTemporary struct{}
   800  
   801  // Keep implements Inode.Keep.
   802  func (*InodeTemporary) Keep() bool {
   803  	return false
   804  }
   805  
   806  // InodeNoStatFS partially implements the Inode interface, where the client
   807  // filesystem doesn't support statfs(2).
   808  //
   809  // +stateify savable
   810  type InodeNoStatFS struct{}
   811  
   812  // StatFS implements Inode.StatFS.
   813  func (*InodeNoStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
   814  	return linux.Statfs{}, linuxerr.ENOSYS
   815  }
   816  
   817  // InodeWatches partially implements Inode.
   818  //
   819  // +stateify savable
   820  type InodeWatches struct {
   821  	watches vfs.Watches
   822  }
   823  
   824  // Watches implements Inode.Watches.
   825  func (i *InodeWatches) Watches() *vfs.Watches {
   826  	return &i.watches
   827  }
   828  
   829  // InodeAnonymous partially implements Inode.
   830  //
   831  // +stateify savable
   832  type InodeAnonymous struct{}
   833  
   834  // Anonymous implements Inode.Anonymous
   835  func (*InodeAnonymous) Anonymous() bool {
   836  	return true
   837  }
   838  
   839  // InodeNotAnonymous partially implements Inode.
   840  //
   841  // +stateify savable
   842  type InodeNotAnonymous struct{}
   843  
   844  // Anonymous implements Inode.Anonymous
   845  func (*InodeNotAnonymous) Anonymous() bool {
   846  	return false
   847  }