github.com/sagernet/gvisor@v0.0.0-20240428053021-e691de28565f/pkg/lisafs/handlers.go (about)

     1  // Copyright 2021 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package lisafs
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  	"strings"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/sagernet/gvisor/pkg/abi/linux"
    24  	"github.com/sagernet/gvisor/pkg/cleanup"
    25  	"github.com/sagernet/gvisor/pkg/flipcall"
    26  	"github.com/sagernet/gvisor/pkg/fspath"
    27  	"github.com/sagernet/gvisor/pkg/log"
    28  	"github.com/sagernet/gvisor/pkg/marshal/primitive"
    29  	"github.com/sagernet/gvisor/pkg/p9"
    30  )
    31  
    32  const (
    33  	allowedOpenFlags     = unix.O_ACCMODE | unix.O_TRUNC
    34  	setStatSupportedMask = unix.STATX_MODE | unix.STATX_UID | unix.STATX_GID | unix.STATX_SIZE | unix.STATX_ATIME | unix.STATX_MTIME
    35  	// unixDirentMaxSize is the maximum size of unix.Dirent for amd64.
    36  	unixDirentMaxSize = 280
    37  )
    38  
    39  // RPCHandler defines a handler that is invoked when the associated message is
    40  // received. The handler is responsible for:
    41  //
    42  //   - Unmarshalling the request from the passed payload and interpreting it.
    43  //   - Marshalling the response into the communicator's payload buffer.
    44  //   - Return the number of payload bytes written.
    45  //   - Donate any FDs (if needed) to comm which will in turn donate it to client.
    46  type RPCHandler func(c *Connection, comm Communicator, payloadLen uint32) (uint32, error)
    47  
    48  var handlers = [...]RPCHandler{
    49  	Error:        ErrorHandler,
    50  	Mount:        MountHandler,
    51  	Channel:      ChannelHandler,
    52  	FStat:        FStatHandler,
    53  	SetStat:      SetStatHandler,
    54  	Walk:         WalkHandler,
    55  	WalkStat:     WalkStatHandler,
    56  	OpenAt:       OpenAtHandler,
    57  	OpenCreateAt: OpenCreateAtHandler,
    58  	Close:        CloseHandler,
    59  	FSync:        FSyncHandler,
    60  	PWrite:       PWriteHandler,
    61  	PRead:        PReadHandler,
    62  	MkdirAt:      MkdirAtHandler,
    63  	MknodAt:      MknodAtHandler,
    64  	SymlinkAt:    SymlinkAtHandler,
    65  	LinkAt:       LinkAtHandler,
    66  	FStatFS:      FStatFSHandler,
    67  	FAllocate:    FAllocateHandler,
    68  	ReadLinkAt:   ReadLinkAtHandler,
    69  	Flush:        FlushHandler,
    70  	UnlinkAt:     UnlinkAtHandler,
    71  	RenameAt:     RenameAtHandler,
    72  	Getdents64:   Getdents64Handler,
    73  	FGetXattr:    FGetXattrHandler,
    74  	FSetXattr:    FSetXattrHandler,
    75  	FListXattr:   FListXattrHandler,
    76  	FRemoveXattr: FRemoveXattrHandler,
    77  	Connect:      ConnectHandler,
    78  	BindAt:       BindAtHandler,
    79  	Listen:       ListenHandler,
    80  	Accept:       AcceptHandler,
    81  }
    82  
    83  // ErrorHandler handles Error message.
    84  func ErrorHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
    85  	// Client should never send Error.
    86  	return 0, unix.EINVAL
    87  }
    88  
    89  // MountHandler handles the Mount RPC. Note that there can not be concurrent
    90  // executions of MountHandler on a connection because the connection enforces
    91  // that Mount is the first message on the connection. Only after the connection
    92  // has been successfully mounted can other channels be created.
    93  func MountHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
    94  	var (
    95  		mountPointFD     *ControlFD
    96  		mountPointHostFD = -1
    97  		mountPointStat   linux.Statx
    98  		mountNode        = c.server.root
    99  	)
   100  	if err := c.server.withRenameReadLock(func() (err error) {
   101  		// Maintain extra ref on mountNode to ensure existence during walk.
   102  		mountNode.IncRef()
   103  		defer func() {
   104  			// Drop extra ref on mountNode. Wrap the defer call with a func so that
   105  			// mountNode is evaluated on execution, not on defer itself.
   106  			mountNode.DecRef(nil)
   107  		}()
   108  
   109  		// Walk to the mountpoint.
   110  		pit := fspath.Parse(c.mountPath).Begin
   111  		for pit.Ok() {
   112  			curName := pit.String()
   113  			if err := checkSafeName(curName); err != nil {
   114  				return err
   115  			}
   116  			mountNode.opMu.RLock()
   117  			if mountNode.isDeleted() {
   118  				mountNode.opMu.RUnlock()
   119  				return unix.ENOENT
   120  			}
   121  			mountNode.childrenMu.Lock()
   122  			next := mountNode.LookupChildLocked(curName)
   123  			if next == nil {
   124  				next = &Node{}
   125  				next.InitLocked(curName, mountNode)
   126  			} else {
   127  				next.IncRef()
   128  			}
   129  			mountNode.childrenMu.Unlock()
   130  			mountNode.opMu.RUnlock()
   131  			// next has an extra ref as needed. Drop extra ref on mountNode.
   132  			mountNode.DecRef(nil)
   133  			pit = pit.Next()
   134  			mountNode = next
   135  		}
   136  
   137  		// Provide Mount with read concurrency guarantee.
   138  		mountNode.opMu.RLock()
   139  		defer mountNode.opMu.RUnlock()
   140  		if mountNode.isDeleted() {
   141  			return unix.ENOENT
   142  		}
   143  		mountPointFD, mountPointStat, mountPointHostFD, err = c.ServerImpl().Mount(c, mountNode)
   144  		return err
   145  	}); err != nil {
   146  		return 0, err
   147  	}
   148  
   149  	if mountPointHostFD >= 0 {
   150  		comm.DonateFD(mountPointHostFD)
   151  	}
   152  	resp := MountResp{
   153  		Root: Inode{
   154  			ControlFD: mountPointFD.id,
   155  			Stat:      mountPointStat,
   156  		},
   157  		SupportedMs:    c.ServerImpl().SupportedMessages(),
   158  		MaxMessageSize: primitive.Uint32(c.ServerImpl().MaxMessageSize()),
   159  	}
   160  	respPayloadLen := uint32(resp.SizeBytes())
   161  	resp.MarshalBytes(comm.PayloadBuf(respPayloadLen))
   162  	return respPayloadLen, nil
   163  }
   164  
   165  // ChannelHandler handles the Channel RPC.
   166  func ChannelHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   167  	ch, desc, fdSock, err := c.createChannel(c.ServerImpl().MaxMessageSize())
   168  	if err != nil {
   169  		return 0, err
   170  	}
   171  
   172  	// Start servicing the channel in a separate goroutine.
   173  	c.activeWg.Add(1)
   174  	go func() {
   175  		if err := c.service(ch); err != nil {
   176  			// Don't log shutdown error which is expected during server shutdown.
   177  			if _, ok := err.(flipcall.ShutdownError); !ok {
   178  				log.Warningf("lisafs.Connection.service(channel = @%p): %v", ch, err)
   179  			}
   180  		}
   181  		c.activeWg.Done()
   182  	}()
   183  
   184  	clientDataFD, err := unix.Dup(desc.FD)
   185  	if err != nil {
   186  		unix.Close(fdSock)
   187  		ch.shutdown()
   188  		return 0, err
   189  	}
   190  
   191  	// Respond to client with successful channel creation message.
   192  	comm.DonateFD(clientDataFD)
   193  	comm.DonateFD(fdSock)
   194  	resp := ChannelResp{
   195  		dataOffset: desc.Offset,
   196  		dataLength: uint64(desc.Length),
   197  	}
   198  	respLen := uint32(resp.SizeBytes())
   199  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   200  	return respLen, nil
   201  }
   202  
   203  // FStatHandler handles the FStat RPC.
   204  func FStatHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   205  	var req StatReq
   206  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   207  		return 0, unix.EIO
   208  	}
   209  
   210  	fd, err := c.lookupFD(req.FD)
   211  	if err != nil {
   212  		return 0, err
   213  	}
   214  	defer fd.DecRef(nil)
   215  
   216  	var resp linux.Statx
   217  	switch t := fd.(type) {
   218  	case *ControlFD:
   219  		t.safelyRead(func() error {
   220  			resp, err = t.impl.Stat()
   221  			return err
   222  		})
   223  	case *OpenFD:
   224  		t.controlFD.safelyRead(func() error {
   225  			resp, err = t.impl.Stat()
   226  			return err
   227  		})
   228  	default:
   229  		panic(fmt.Sprintf("unknown fd type %T", t))
   230  	}
   231  	if err != nil {
   232  		return 0, err
   233  	}
   234  	respLen := uint32(resp.SizeBytes())
   235  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   236  	return respLen, nil
   237  }
   238  
   239  // SetStatHandler handles the SetStat RPC.
   240  func SetStatHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   241  	if c.readonly {
   242  		return 0, unix.EROFS
   243  	}
   244  
   245  	var req SetStatReq
   246  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   247  		return 0, unix.EIO
   248  	}
   249  
   250  	fd, err := c.lookupControlFD(req.FD)
   251  	if err != nil {
   252  		return 0, err
   253  	}
   254  	defer fd.DecRef(nil)
   255  
   256  	if req.Mask&^setStatSupportedMask != 0 {
   257  		return 0, unix.EPERM
   258  	}
   259  
   260  	var resp SetStatResp
   261  	if err := fd.safelyWrite(func() error {
   262  		if fd.node.isDeleted() && !c.server.opts.SetAttrOnDeleted {
   263  			return unix.EINVAL
   264  		}
   265  		failureMask, failureErr := fd.impl.SetStat(req)
   266  		resp.FailureMask = failureMask
   267  		if failureErr != nil {
   268  			resp.FailureErrNo = uint32(p9.ExtractErrno(failureErr))
   269  		}
   270  		return nil
   271  	}); err != nil {
   272  		return 0, err
   273  	}
   274  	respLen := uint32(resp.SizeBytes())
   275  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   276  	return respLen, nil
   277  }
   278  
   279  // WalkHandler handles the Walk RPC.
   280  func WalkHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   281  	var req WalkReq
   282  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   283  		return 0, unix.EIO
   284  	}
   285  
   286  	startDir, err := c.lookupControlFD(req.DirFD)
   287  	if err != nil {
   288  		return 0, err
   289  	}
   290  	defer startDir.DecRef(nil)
   291  	if !startDir.IsDir() {
   292  		return 0, unix.ENOTDIR
   293  	}
   294  
   295  	// Manually marshal the inodes into the payload buffer during walk to avoid
   296  	// the slice allocation. The memory format should be WalkResp's.
   297  	var (
   298  		numInodes primitive.Uint16
   299  		status    = WalkSuccess
   300  	)
   301  	respMetaSize := status.SizeBytes() + numInodes.SizeBytes()
   302  	maxPayloadSize := respMetaSize + (len(req.Path) * (*Inode)(nil).SizeBytes())
   303  	if maxPayloadSize > math.MaxUint32 {
   304  		// Too much to walk, can't do.
   305  		return 0, unix.EIO
   306  	}
   307  	payloadBuf := comm.PayloadBuf(uint32(maxPayloadSize))
   308  	payloadPos := respMetaSize
   309  	if err := c.server.withRenameReadLock(func() error {
   310  		curDir := startDir
   311  		cu := cleanup.Make(func() {
   312  			// Destroy all newly created FDs until now. Read the new FDIDs from the
   313  			// payload buffer.
   314  			buf := comm.PayloadBuf(uint32(maxPayloadSize))[respMetaSize:]
   315  			var curIno Inode
   316  			for i := 0; i < int(numInodes); i++ {
   317  				buf = curIno.UnmarshalBytes(buf)
   318  				c.removeControlFDLocked(curIno.ControlFD)
   319  			}
   320  		})
   321  		defer cu.Clean()
   322  
   323  		for _, name := range req.Path {
   324  			if err := checkSafeName(name); err != nil {
   325  				return err
   326  			}
   327  			// Symlinks terminate walk. This client gets the symlink inode, but will
   328  			// have to invoke Walk again with the resolved path.
   329  			if curDir.IsSymlink() {
   330  				status = WalkComponentSymlink
   331  				break
   332  			}
   333  			curDir.node.opMu.RLock()
   334  			if curDir.node.isDeleted() {
   335  				// It is not safe to walk on a deleted directory. It could have been
   336  				// replaced with a malicious symlink.
   337  				curDir.node.opMu.RUnlock()
   338  				status = WalkComponentDoesNotExist
   339  				break
   340  			}
   341  			child, childStat, err := curDir.impl.Walk(name)
   342  			curDir.node.opMu.RUnlock()
   343  			if err == unix.ENOENT {
   344  				status = WalkComponentDoesNotExist
   345  				break
   346  			}
   347  			if err != nil {
   348  				return err
   349  			}
   350  			// Write inode into payload buffer.
   351  			i := Inode{ControlFD: child.id, Stat: childStat}
   352  			i.MarshalUnsafe(payloadBuf[payloadPos:])
   353  			payloadPos += i.SizeBytes()
   354  			numInodes++
   355  			curDir = child
   356  		}
   357  		cu.Release()
   358  		return nil
   359  	}); err != nil {
   360  		return 0, err
   361  	}
   362  
   363  	// WalkResp writes the walk status followed by the number of inodes in the
   364  	// beginning.
   365  	payloadBuf = status.MarshalUnsafe(payloadBuf)
   366  	numInodes.MarshalUnsafe(payloadBuf)
   367  	return uint32(payloadPos), nil
   368  }
   369  
   370  // WalkStatHandler handles the WalkStat RPC.
   371  func WalkStatHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   372  	var req WalkReq
   373  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   374  		return 0, unix.EIO
   375  	}
   376  
   377  	startDir, err := c.lookupControlFD(req.DirFD)
   378  	if err != nil {
   379  		return 0, err
   380  	}
   381  	defer startDir.DecRef(nil)
   382  
   383  	// Note that this fd is allowed to not actually be a directory when the
   384  	// only path component to walk is "" (self).
   385  	if !startDir.IsDir() {
   386  		if len(req.Path) > 1 || (len(req.Path) == 1 && len(req.Path[0]) > 0) {
   387  			return 0, unix.ENOTDIR
   388  		}
   389  	}
   390  	for i, name := range req.Path {
   391  		// First component is allowed to be "".
   392  		if i == 0 && len(name) == 0 {
   393  			continue
   394  		}
   395  		if err := checkSafeName(name); err != nil {
   396  			return 0, err
   397  		}
   398  	}
   399  
   400  	// We will manually marshal the statx results into the payload buffer as they
   401  	// are generated to avoid the slice allocation. The memory format should be
   402  	// the same as WalkStatResp's.
   403  	var numStats primitive.Uint16
   404  	maxPayloadSize := numStats.SizeBytes() + (len(req.Path) * linux.SizeOfStatx)
   405  	if maxPayloadSize > math.MaxUint32 {
   406  		// Too much to walk, can't do.
   407  		return 0, unix.EIO
   408  	}
   409  	payloadBuf := comm.PayloadBuf(uint32(maxPayloadSize))
   410  	payloadPos := numStats.SizeBytes()
   411  
   412  	if c.server.opts.WalkStatSupported {
   413  		if err = startDir.safelyRead(func() error {
   414  			return startDir.impl.WalkStat(req.Path, func(s linux.Statx) {
   415  				s.MarshalUnsafe(payloadBuf[payloadPos:])
   416  				payloadPos += s.SizeBytes()
   417  				numStats++
   418  			})
   419  		}); err != nil {
   420  			return 0, err
   421  		}
   422  		// WalkStatResp writes the number of stats in the beginning.
   423  		numStats.MarshalUnsafe(payloadBuf)
   424  		return uint32(payloadPos), nil
   425  	}
   426  
   427  	if err = c.server.withRenameReadLock(func() error {
   428  		if len(req.Path) > 0 && len(req.Path[0]) == 0 {
   429  			startDir.node.opMu.RLock()
   430  			stat, err := startDir.impl.Stat()
   431  			startDir.node.opMu.RUnlock()
   432  			if err != nil {
   433  				return err
   434  			}
   435  			stat.MarshalUnsafe(payloadBuf[payloadPos:])
   436  			payloadPos += stat.SizeBytes()
   437  			numStats++
   438  			req.Path = req.Path[1:]
   439  		}
   440  
   441  		parent := startDir
   442  		closeParent := func() {
   443  			if parent != startDir {
   444  				c.removeControlFDLocked(parent.id)
   445  			}
   446  		}
   447  		defer closeParent()
   448  
   449  		for _, name := range req.Path {
   450  			parent.node.opMu.RLock()
   451  			if parent.node.isDeleted() {
   452  				// It is not safe to walk on a deleted directory. It could have been
   453  				// replaced with a malicious symlink.
   454  				parent.node.opMu.RUnlock()
   455  				break
   456  			}
   457  			child, childStat, err := parent.impl.Walk(name)
   458  			parent.node.opMu.RUnlock()
   459  			if err != nil {
   460  				if err == unix.ENOENT {
   461  					break
   462  				}
   463  				return err
   464  			}
   465  
   466  			// Update with next generation.
   467  			closeParent()
   468  			parent = child
   469  
   470  			// Write results.
   471  			childStat.MarshalUnsafe(payloadBuf[payloadPos:])
   472  			payloadPos += childStat.SizeBytes()
   473  			numStats++
   474  
   475  			// Symlinks terminate walk. This client gets the symlink stat result, but
   476  			// will have to invoke Walk again with the resolved path.
   477  			if childStat.Mode&unix.S_IFMT == unix.S_IFLNK {
   478  				break
   479  			}
   480  		}
   481  		return nil
   482  	}); err != nil {
   483  		return 0, err
   484  	}
   485  
   486  	// WalkStatResp writes the number of stats in the beginning.
   487  	numStats.MarshalUnsafe(payloadBuf)
   488  	return uint32(payloadPos), nil
   489  }
   490  
   491  // OpenAtHandler handles the OpenAt RPC.
   492  func OpenAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   493  	var req OpenAtReq
   494  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   495  		return 0, unix.EIO
   496  	}
   497  
   498  	// Only keep allowed open flags.
   499  	if allowedFlags := req.Flags & allowedOpenFlags; allowedFlags != req.Flags {
   500  		log.Debugf("discarding open flags that are not allowed: old open flags = %d, new open flags = %d", req.Flags, allowedFlags)
   501  		req.Flags = allowedFlags
   502  	}
   503  
   504  	accessMode := req.Flags & unix.O_ACCMODE
   505  	trunc := req.Flags&unix.O_TRUNC != 0
   506  	if c.readonly && (accessMode != unix.O_RDONLY || trunc) {
   507  		return 0, unix.EROFS
   508  	}
   509  
   510  	fd, err := c.lookupControlFD(req.FD)
   511  	if err != nil {
   512  		return 0, err
   513  	}
   514  	defer fd.DecRef(nil)
   515  	if fd.IsDir() {
   516  		// Directory is not truncatable and must be opened with O_RDONLY.
   517  		if accessMode != unix.O_RDONLY || trunc {
   518  			return 0, unix.EISDIR
   519  		}
   520  	}
   521  
   522  	var (
   523  		openFD     *OpenFD
   524  		hostOpenFD int
   525  	)
   526  	if err := fd.safelyRead(func() error {
   527  		if fd.node.isDeleted() || fd.IsSymlink() {
   528  			return unix.EINVAL
   529  		}
   530  		openFD, hostOpenFD, err = fd.impl.Open(req.Flags)
   531  		return err
   532  	}); err != nil {
   533  		return 0, err
   534  	}
   535  
   536  	if hostOpenFD >= 0 {
   537  		comm.DonateFD(hostOpenFD)
   538  	}
   539  	resp := OpenAtResp{OpenFD: openFD.id}
   540  	respLen := uint32(resp.SizeBytes())
   541  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   542  	return respLen, nil
   543  }
   544  
   545  // OpenCreateAtHandler handles the OpenCreateAt RPC.
   546  func OpenCreateAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   547  	if c.readonly {
   548  		return 0, unix.EROFS
   549  	}
   550  	var req OpenCreateAtReq
   551  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   552  		return 0, unix.EIO
   553  	}
   554  
   555  	// Only keep allowed open flags.
   556  	if allowedFlags := req.Flags & allowedOpenFlags; allowedFlags != req.Flags {
   557  		log.Debugf("discarding open flags that are not allowed: old open flags = %d, new open flags = %d", req.Flags, allowedFlags)
   558  		req.Flags = allowedFlags
   559  	}
   560  
   561  	name := string(req.Name)
   562  	if err := checkSafeName(name); err != nil {
   563  		return 0, err
   564  	}
   565  
   566  	fd, err := c.lookupControlFD(req.DirFD)
   567  	if err != nil {
   568  		return 0, err
   569  	}
   570  	defer fd.DecRef(nil)
   571  	if !fd.IsDir() {
   572  		return 0, unix.ENOTDIR
   573  	}
   574  
   575  	var (
   576  		childFD    *ControlFD
   577  		childStat  linux.Statx
   578  		openFD     *OpenFD
   579  		hostOpenFD int
   580  	)
   581  	if err := fd.safelyWrite(func() error {
   582  		if fd.node.isDeleted() {
   583  			return unix.EINVAL
   584  		}
   585  		childFD, childStat, openFD, hostOpenFD, err = fd.impl.OpenCreate(req.Mode, req.UID, req.GID, name, uint32(req.Flags))
   586  		return err
   587  	}); err != nil {
   588  		return 0, err
   589  	}
   590  
   591  	if hostOpenFD >= 0 {
   592  		comm.DonateFD(hostOpenFD)
   593  	}
   594  	resp := OpenCreateAtResp{
   595  		NewFD: openFD.id,
   596  		Child: Inode{
   597  			ControlFD: childFD.id,
   598  			Stat:      childStat,
   599  		},
   600  	}
   601  	respLen := uint32(resp.SizeBytes())
   602  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   603  	return respLen, nil
   604  }
   605  
   606  // CloseHandler handles the Close RPC.
   607  func CloseHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   608  	var req CloseReq
   609  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   610  		return 0, unix.EIO
   611  	}
   612  	for _, fd := range req.FDs {
   613  		c.removeFD(fd)
   614  	}
   615  
   616  	// There is no response message for this.
   617  	return 0, nil
   618  }
   619  
   620  // FSyncHandler handles the FSync RPC.
   621  func FSyncHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   622  	var req FsyncReq
   623  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   624  		return 0, unix.EIO
   625  	}
   626  
   627  	// Return the first error we encounter, but sync everything we can
   628  	// regardless.
   629  	var retErr error
   630  	for _, fdid := range req.FDs {
   631  		if err := c.fsyncFD(fdid); err != nil && retErr == nil {
   632  			retErr = err
   633  		}
   634  	}
   635  
   636  	// There is no response message for this.
   637  	return 0, retErr
   638  }
   639  
   640  func (c *Connection) fsyncFD(id FDID) error {
   641  	fd, err := c.lookupOpenFD(id)
   642  	if err != nil {
   643  		return err
   644  	}
   645  	defer fd.DecRef(nil)
   646  	return fd.controlFD.safelyRead(func() error {
   647  		return fd.impl.Sync()
   648  	})
   649  }
   650  
   651  // PWriteHandler handles the PWrite RPC.
   652  func PWriteHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   653  	if c.readonly {
   654  		return 0, unix.EROFS
   655  	}
   656  	var req PWriteReq
   657  	// Note that it is an optimized Unmarshal operation which avoids any buffer
   658  	// allocation and copying. req.Buf just points to payload. This is safe to do
   659  	// as the handler owns payload and req's lifetime is limited to the handler.
   660  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   661  		return 0, unix.EIO
   662  	}
   663  
   664  	fd, err := c.lookupOpenFD(req.FD)
   665  	if err != nil {
   666  		return 0, err
   667  	}
   668  	defer fd.DecRef(nil)
   669  	if !fd.writable {
   670  		return 0, unix.EBADF
   671  	}
   672  	var count uint64
   673  	if err := fd.controlFD.safelyWrite(func() error {
   674  		count, err = fd.impl.Write(req.Buf, uint64(req.Offset))
   675  		return err
   676  	}); err != nil {
   677  		return 0, err
   678  	}
   679  	resp := PWriteResp{Count: count}
   680  	respLen := uint32(resp.SizeBytes())
   681  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   682  	return respLen, nil
   683  }
   684  
   685  // PReadHandler handles the PRead RPC.
   686  func PReadHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   687  	var req PReadReq
   688  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   689  		return 0, unix.EIO
   690  	}
   691  
   692  	fd, err := c.lookupOpenFD(req.FD)
   693  	if err != nil {
   694  		return 0, err
   695  	}
   696  	defer fd.DecRef(nil)
   697  	if !fd.readable {
   698  		return 0, unix.EBADF
   699  	}
   700  
   701  	// To save an allocation and a copy, we directly read into the payload
   702  	// buffer. The rest of the response message is manually marshalled.
   703  	var resp PReadResp
   704  	respMetaSize := uint32(resp.NumBytes.SizeBytes())
   705  	respPayloadLen := respMetaSize + req.Count
   706  	if respPayloadLen > c.maxMessageSize {
   707  		return 0, unix.ENOBUFS
   708  	}
   709  	payloadBuf := comm.PayloadBuf(respPayloadLen)
   710  	var n uint64
   711  	if err := fd.controlFD.safelyRead(func() error {
   712  		n, err = fd.impl.Read(payloadBuf[respMetaSize:], req.Offset)
   713  		return err
   714  	}); err != nil {
   715  		return 0, err
   716  	}
   717  
   718  	// Write the response metadata onto the payload buffer. The response contents
   719  	// already have been written immediately after it.
   720  	resp.NumBytes = primitive.Uint64(n)
   721  	resp.NumBytes.MarshalUnsafe(payloadBuf)
   722  	return respMetaSize + uint32(n), nil
   723  }
   724  
   725  // MkdirAtHandler handles the MkdirAt RPC.
   726  func MkdirAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   727  	if c.readonly {
   728  		return 0, unix.EROFS
   729  	}
   730  	var req MkdirAtReq
   731  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   732  		return 0, unix.EIO
   733  	}
   734  
   735  	name := string(req.Name)
   736  	if err := checkSafeName(name); err != nil {
   737  		return 0, err
   738  	}
   739  
   740  	fd, err := c.lookupControlFD(req.DirFD)
   741  	if err != nil {
   742  		return 0, err
   743  	}
   744  	defer fd.DecRef(nil)
   745  	if !fd.IsDir() {
   746  		return 0, unix.ENOTDIR
   747  	}
   748  	var (
   749  		childDir     *ControlFD
   750  		childDirStat linux.Statx
   751  	)
   752  	if err := fd.safelyWrite(func() error {
   753  		if fd.node.isDeleted() {
   754  			return unix.EINVAL
   755  		}
   756  		childDir, childDirStat, err = fd.impl.Mkdir(req.Mode, req.UID, req.GID, name)
   757  		return err
   758  	}); err != nil {
   759  		return 0, err
   760  	}
   761  
   762  	resp := MkdirAtResp{
   763  		ChildDir: Inode{
   764  			ControlFD: childDir.id,
   765  			Stat:      childDirStat,
   766  		},
   767  	}
   768  	respLen := uint32(resp.SizeBytes())
   769  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   770  	return respLen, nil
   771  }
   772  
   773  // MknodAtHandler handles the MknodAt RPC.
   774  func MknodAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   775  	if c.readonly {
   776  		return 0, unix.EROFS
   777  	}
   778  	var req MknodAtReq
   779  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   780  		return 0, unix.EIO
   781  	}
   782  
   783  	name := string(req.Name)
   784  	if err := checkSafeName(name); err != nil {
   785  		return 0, err
   786  	}
   787  
   788  	fd, err := c.lookupControlFD(req.DirFD)
   789  	if err != nil {
   790  		return 0, err
   791  	}
   792  	defer fd.DecRef(nil)
   793  	if !fd.IsDir() {
   794  		return 0, unix.ENOTDIR
   795  	}
   796  	var (
   797  		child     *ControlFD
   798  		childStat linux.Statx
   799  	)
   800  	if err := fd.safelyWrite(func() error {
   801  		if fd.node.isDeleted() {
   802  			return unix.EINVAL
   803  		}
   804  		child, childStat, err = fd.impl.Mknod(req.Mode, req.UID, req.GID, name, uint32(req.Minor), uint32(req.Major))
   805  		return err
   806  	}); err != nil {
   807  		return 0, err
   808  	}
   809  	resp := MknodAtResp{
   810  		Child: Inode{
   811  			ControlFD: child.id,
   812  			Stat:      childStat,
   813  		},
   814  	}
   815  	respLen := uint32(resp.SizeBytes())
   816  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   817  	return respLen, nil
   818  }
   819  
   820  // SymlinkAtHandler handles the SymlinkAt RPC.
   821  func SymlinkAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   822  	if c.readonly {
   823  		return 0, unix.EROFS
   824  	}
   825  	var req SymlinkAtReq
   826  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   827  		return 0, unix.EIO
   828  	}
   829  
   830  	name := string(req.Name)
   831  	if err := checkSafeName(name); err != nil {
   832  		return 0, err
   833  	}
   834  
   835  	fd, err := c.lookupControlFD(req.DirFD)
   836  	if err != nil {
   837  		return 0, err
   838  	}
   839  	defer fd.DecRef(nil)
   840  	if !fd.IsDir() {
   841  		return 0, unix.ENOTDIR
   842  	}
   843  	var (
   844  		symlink     *ControlFD
   845  		symlinkStat linux.Statx
   846  	)
   847  	if err := fd.safelyWrite(func() error {
   848  		if fd.node.isDeleted() {
   849  			return unix.EINVAL
   850  		}
   851  		symlink, symlinkStat, err = fd.impl.Symlink(name, string(req.Target), req.UID, req.GID)
   852  		return err
   853  	}); err != nil {
   854  		return 0, err
   855  	}
   856  	resp := SymlinkAtResp{
   857  		Symlink: Inode{
   858  			ControlFD: symlink.id,
   859  			Stat:      symlinkStat,
   860  		},
   861  	}
   862  	respLen := uint32(resp.SizeBytes())
   863  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   864  	return respLen, nil
   865  }
   866  
   867  // LinkAtHandler handles the LinkAt RPC.
   868  func LinkAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   869  	if c.readonly {
   870  		return 0, unix.EROFS
   871  	}
   872  	var req LinkAtReq
   873  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   874  		return 0, unix.EIO
   875  	}
   876  
   877  	name := string(req.Name)
   878  	if err := checkSafeName(name); err != nil {
   879  		return 0, err
   880  	}
   881  
   882  	fd, err := c.lookupControlFD(req.DirFD)
   883  	if err != nil {
   884  		return 0, err
   885  	}
   886  	defer fd.DecRef(nil)
   887  	if !fd.IsDir() {
   888  		return 0, unix.ENOTDIR
   889  	}
   890  
   891  	targetFD, err := c.lookupControlFD(req.Target)
   892  	if err != nil {
   893  		return 0, err
   894  	}
   895  	defer targetFD.DecRef(nil)
   896  	if targetFD.IsDir() {
   897  		// Can not create hard link to directory.
   898  		return 0, unix.EPERM
   899  	}
   900  	var (
   901  		link     *ControlFD
   902  		linkStat linux.Statx
   903  	)
   904  	if err := fd.safelyWrite(func() error {
   905  		if fd.node.isDeleted() {
   906  			return unix.EINVAL
   907  		}
   908  		// This is a lock ordering issue. Need to provide safe read guarantee for
   909  		// targetFD. We know targetFD is not a directory while fd is a directory.
   910  		// So targetFD would either be a descendant of fd or exist elsewhere in the
   911  		// tree. So locking fd first and targetFD later should not lead to cycles.
   912  		targetFD.node.opMu.RLock()
   913  		defer targetFD.node.opMu.RUnlock()
   914  		if targetFD.node.isDeleted() {
   915  			return unix.EINVAL
   916  		}
   917  		link, linkStat, err = targetFD.impl.Link(fd.impl, name)
   918  		return err
   919  	}); err != nil {
   920  		return 0, err
   921  	}
   922  	resp := LinkAtResp{
   923  		Link: Inode{
   924  			ControlFD: link.id,
   925  			Stat:      linkStat,
   926  		},
   927  	}
   928  	respLen := uint32(resp.SizeBytes())
   929  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   930  	return respLen, nil
   931  }
   932  
   933  // FStatFSHandler handles the FStatFS RPC.
   934  func FStatFSHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   935  	var req FStatFSReq
   936  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   937  		return 0, unix.EIO
   938  	}
   939  
   940  	fd, err := c.lookupControlFD(req.FD)
   941  	if err != nil {
   942  		return 0, err
   943  	}
   944  	defer fd.DecRef(nil)
   945  	var resp StatFS
   946  	if err := fd.safelyRead(func() error {
   947  		resp, err = fd.impl.StatFS()
   948  		return err
   949  	}); err != nil {
   950  		return 0, err
   951  	}
   952  	respLen := uint32(resp.SizeBytes())
   953  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
   954  	return respLen, nil
   955  }
   956  
   957  // FAllocateHandler handles the FAllocate RPC.
   958  func FAllocateHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   959  	if c.readonly {
   960  		return 0, unix.EROFS
   961  	}
   962  	var req FAllocateReq
   963  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   964  		return 0, unix.EIO
   965  	}
   966  
   967  	fd, err := c.lookupOpenFD(req.FD)
   968  	if err != nil {
   969  		return 0, err
   970  	}
   971  	defer fd.DecRef(nil)
   972  	if !fd.writable {
   973  		return 0, unix.EBADF
   974  	}
   975  
   976  	return 0, fd.controlFD.safelyWrite(func() error {
   977  		if fd.controlFD.node.isDeleted() && !c.server.opts.AllocateOnDeleted {
   978  			return unix.EINVAL
   979  		}
   980  		return fd.impl.Allocate(req.Mode, req.Offset, req.Length)
   981  	})
   982  }
   983  
   984  // ReadLinkAtHandler handles the ReadLinkAt RPC.
   985  func ReadLinkAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
   986  	var req ReadLinkAtReq
   987  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
   988  		return 0, unix.EIO
   989  	}
   990  
   991  	fd, err := c.lookupControlFD(req.FD)
   992  	if err != nil {
   993  		return 0, err
   994  	}
   995  	defer fd.DecRef(nil)
   996  	if !fd.IsSymlink() {
   997  		return 0, unix.EINVAL
   998  	}
   999  
  1000  	// We will manually marshal ReadLinkAtResp, which just contains a
  1001  	// SizedString. Let Readlinkat directly write into the payload buffer and
  1002  	// manually write the string size before it.
  1003  	var (
  1004  		linkLen primitive.Uint16
  1005  		n       uint16
  1006  	)
  1007  	respMetaSize := uint32(linkLen.SizeBytes())
  1008  	if fd.safelyRead(func() error {
  1009  		if fd.node.isDeleted() {
  1010  			return unix.EINVAL
  1011  		}
  1012  		n, err = fd.impl.Readlink(func(dataLen uint32) []byte {
  1013  			return comm.PayloadBuf(dataLen + respMetaSize)[respMetaSize:]
  1014  		})
  1015  		return err
  1016  	}); err != nil {
  1017  		return 0, err
  1018  	}
  1019  	linkLen = primitive.Uint16(n)
  1020  	linkLen.MarshalUnsafe(comm.PayloadBuf(respMetaSize))
  1021  	return respMetaSize + uint32(n), nil
  1022  }
  1023  
  1024  // FlushHandler handles the Flush RPC.
  1025  func FlushHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1026  	var req FlushReq
  1027  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1028  		return 0, unix.EIO
  1029  	}
  1030  
  1031  	fd, err := c.lookupOpenFD(req.FD)
  1032  	if err != nil {
  1033  		return 0, err
  1034  	}
  1035  	defer fd.DecRef(nil)
  1036  
  1037  	return 0, fd.controlFD.safelyRead(func() error {
  1038  		return fd.impl.Flush()
  1039  	})
  1040  }
  1041  
  1042  // ConnectHandler handles the Connect RPC.
  1043  func ConnectHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1044  	var req ConnectReq
  1045  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1046  		return 0, unix.EIO
  1047  	}
  1048  
  1049  	fd, err := c.lookupControlFD(req.FD)
  1050  	if err != nil {
  1051  		return 0, err
  1052  	}
  1053  	defer fd.DecRef(nil)
  1054  	if !fd.IsSocket() {
  1055  		return 0, unix.ENOTSOCK
  1056  	}
  1057  	var sock int
  1058  	if err := fd.safelyRead(func() error {
  1059  		if fd.node.isDeleted() {
  1060  			return unix.EINVAL
  1061  		}
  1062  		sock, err = fd.impl.Connect(req.SockType)
  1063  		return err
  1064  	}); err != nil {
  1065  		return 0, err
  1066  	}
  1067  
  1068  	comm.DonateFD(sock)
  1069  	return 0, nil
  1070  }
  1071  
  1072  // BindAtHandler handles the BindAt RPC.
  1073  func BindAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1074  	var req BindAtReq
  1075  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1076  		return 0, unix.EIO
  1077  	}
  1078  
  1079  	name := string(req.Name)
  1080  	if err := checkSafeName(name); err != nil {
  1081  		return 0, err
  1082  	}
  1083  
  1084  	dir, err := c.lookupControlFD(req.DirFD)
  1085  	if err != nil {
  1086  		return 0, err
  1087  	}
  1088  	defer dir.DecRef(nil)
  1089  
  1090  	if !dir.IsDir() {
  1091  		return 0, unix.ENOTDIR
  1092  	}
  1093  
  1094  	var (
  1095  		childFD       *ControlFD
  1096  		childStat     linux.Statx
  1097  		boundSocketFD *BoundSocketFD
  1098  		hostSocketFD  int
  1099  	)
  1100  	if err := dir.safelyWrite(func() error {
  1101  		if dir.node.isDeleted() {
  1102  			return unix.EINVAL
  1103  		}
  1104  		childFD, childStat, boundSocketFD, hostSocketFD, err = dir.impl.BindAt(name, uint32(req.SockType), req.Mode, req.UID, req.GID)
  1105  		return err
  1106  	}); err != nil {
  1107  		return 0, err
  1108  	}
  1109  
  1110  	comm.DonateFD(hostSocketFD)
  1111  	resp := BindAtResp{
  1112  		Child: Inode{
  1113  			ControlFD: childFD.id,
  1114  			Stat:      childStat,
  1115  		},
  1116  		BoundSocketFD: boundSocketFD.id,
  1117  	}
  1118  	respLen := uint32(resp.SizeBytes())
  1119  	resp.MarshalUnsafe(comm.PayloadBuf(respLen))
  1120  	return respLen, nil
  1121  }
  1122  
  1123  // ListenHandler handles the Listen RPC.
  1124  func ListenHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1125  	var req ListenReq
  1126  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1127  		return 0, unix.EIO
  1128  	}
  1129  	sock, err := c.lookupBoundSocketFD(req.FD)
  1130  	if err != nil {
  1131  		return 0, err
  1132  	}
  1133  	if err := sock.controlFD.safelyRead(func() error {
  1134  		if sock.controlFD.node.isDeleted() {
  1135  			return unix.EINVAL
  1136  		}
  1137  		return sock.impl.Listen(req.Backlog)
  1138  	}); err != nil {
  1139  		return 0, err
  1140  	}
  1141  	return 0, nil
  1142  }
  1143  
  1144  // AcceptHandler handles the Accept RPC.
  1145  func AcceptHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1146  	var req AcceptReq
  1147  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1148  		return 0, unix.EIO
  1149  	}
  1150  	sock, err := c.lookupBoundSocketFD(req.FD)
  1151  	if err != nil {
  1152  		return 0, err
  1153  	}
  1154  	var (
  1155  		newSock  int
  1156  		peerAddr string
  1157  	)
  1158  	if err := sock.controlFD.safelyRead(func() error {
  1159  		if sock.controlFD.node.isDeleted() {
  1160  			return unix.EINVAL
  1161  		}
  1162  		var err error
  1163  		newSock, peerAddr, err = sock.impl.Accept()
  1164  		return err
  1165  	}); err != nil {
  1166  		return 0, err
  1167  	}
  1168  	comm.DonateFD(newSock)
  1169  	resp := AcceptResp{
  1170  		PeerAddr: SizedString(peerAddr),
  1171  	}
  1172  	respLen := uint32(resp.SizeBytes())
  1173  	resp.MarshalBytes(comm.PayloadBuf(respLen))
  1174  	return respLen, nil
  1175  }
  1176  
  1177  // UnlinkAtHandler handles the UnlinkAt RPC.
  1178  func UnlinkAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1179  	if c.readonly {
  1180  		return 0, unix.EROFS
  1181  	}
  1182  	var req UnlinkAtReq
  1183  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1184  		return 0, unix.EIO
  1185  	}
  1186  
  1187  	name := string(req.Name)
  1188  	if err := checkSafeName(name); err != nil {
  1189  		return 0, err
  1190  	}
  1191  
  1192  	fd, err := c.lookupControlFD(req.DirFD)
  1193  	if err != nil {
  1194  		return 0, err
  1195  	}
  1196  	defer fd.DecRef(nil)
  1197  	if !fd.IsDir() {
  1198  		return 0, unix.ENOTDIR
  1199  	}
  1200  	return 0, fd.safelyWrite(func() error {
  1201  		if fd.node.isDeleted() {
  1202  			return unix.EINVAL
  1203  		}
  1204  
  1205  		fd.node.childrenMu.Lock()
  1206  		childNode := fd.node.LookupChildLocked(name)
  1207  		fd.node.childrenMu.Unlock()
  1208  		if childNode != nil {
  1209  			// Before we do the unlink itself, we need to ensure that there
  1210  			// are no operations in flight on associated path node.
  1211  			//
  1212  			// This is another case of a lock ordering issue, but since we always
  1213  			// acquire deeper in the hierarchy, we know that we are free of cycles.
  1214  			childNode.opMu.Lock()
  1215  			defer childNode.opMu.Unlock()
  1216  		}
  1217  		if err := fd.impl.Unlink(name, uint32(req.Flags)); err != nil {
  1218  			return err
  1219  		}
  1220  		// Since fd.node.opMu is locked for writing, there will not be a concurrent
  1221  		// creation of a node at that position if childNode == nil. So only remove
  1222  		// node if one existed.
  1223  		if childNode != nil {
  1224  			fd.node.childrenMu.Lock()
  1225  			fd.node.removeChildLocked(name)
  1226  			fd.node.childrenMu.Unlock()
  1227  			childNode.markDeletedRecursive()
  1228  		}
  1229  		return nil
  1230  	})
  1231  }
  1232  
  1233  // RenameAtHandler handles the RenameAt RPC.
  1234  func RenameAtHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1235  	if c.readonly {
  1236  		return 0, unix.EROFS
  1237  	}
  1238  	var req RenameAtReq
  1239  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1240  		return 0, unix.EIO
  1241  	}
  1242  
  1243  	oldName := string(req.OldName)
  1244  	if err := checkSafeName(oldName); err != nil {
  1245  		return 0, err
  1246  	}
  1247  	newName := string(req.NewName)
  1248  	if err := checkSafeName(newName); err != nil {
  1249  		return 0, err
  1250  	}
  1251  
  1252  	oldDir, err := c.lookupControlFD(req.OldDir)
  1253  	if err != nil {
  1254  		return 0, err
  1255  	}
  1256  	defer oldDir.DecRef(nil)
  1257  	newDir, err := c.lookupControlFD(req.NewDir)
  1258  	if err != nil {
  1259  		return 0, err
  1260  	}
  1261  	defer newDir.DecRef(nil)
  1262  
  1263  	if !oldDir.IsDir() || !newDir.IsDir() {
  1264  		return 0, unix.ENOTDIR
  1265  	}
  1266  
  1267  	// Hold RenameMu for writing during rename, this is important.
  1268  	return 0, oldDir.safelyGlobal(func() error {
  1269  		if oldDir.node.isDeleted() || newDir.node.isDeleted() {
  1270  			return unix.EINVAL
  1271  		}
  1272  
  1273  		if oldDir.node == newDir.node && oldName == newName {
  1274  			// Nothing to do.
  1275  			return nil
  1276  		}
  1277  
  1278  		// Attempt the actual rename.
  1279  		if err := oldDir.impl.RenameAt(oldName, newDir.impl, newName); err != nil {
  1280  			return err
  1281  		}
  1282  
  1283  		// Successful, so update the node tree. Note that since we have global
  1284  		// concurrency guarantee here, the node tree can not be modified
  1285  		// concurrently in any way.
  1286  
  1287  		// First see if a file was deleted by being replaced by the rename. If so,
  1288  		// detach it from node tree and mark it as deleted.
  1289  		newDir.node.childrenMu.Lock()
  1290  		replaced := newDir.node.removeChildLocked(newName)
  1291  		newDir.node.childrenMu.Unlock()
  1292  		if replaced != nil {
  1293  			replaced.opMu.Lock()
  1294  			replaced.markDeletedRecursive()
  1295  			replaced.opMu.Unlock()
  1296  		}
  1297  
  1298  		// Now move the renamed node to the right position.
  1299  		oldDir.node.childrenMu.Lock()
  1300  		renamed := oldDir.node.removeChildLocked(oldName)
  1301  		oldDir.node.childrenMu.Unlock()
  1302  		if renamed != nil {
  1303  			renamed.parent.DecRef(nil)
  1304  			renamed.parent = newDir.node
  1305  			renamed.parent.IncRef()
  1306  			renamed.name = newName
  1307  			newDir.node.childrenMu.Lock()
  1308  			newDir.node.insertChildLocked(newName, renamed)
  1309  			newDir.node.childrenMu.Unlock()
  1310  
  1311  			// Now update all FDs under the subtree rooted at renamed.
  1312  			notifyRenameRecursive(renamed)
  1313  		}
  1314  		return nil
  1315  	})
  1316  }
  1317  
  1318  func notifyRenameRecursive(n *Node) {
  1319  	n.forEachFD(func(cfd *ControlFD) {
  1320  		cfd.impl.Renamed()
  1321  		cfd.forEachOpenFD(func(ofd *OpenFD) {
  1322  			ofd.impl.Renamed()
  1323  		})
  1324  	})
  1325  
  1326  	n.forEachChild(func(child *Node) {
  1327  		notifyRenameRecursive(child)
  1328  	})
  1329  }
  1330  
  1331  // Getdents64Handler handles the Getdents64 RPC.
  1332  func Getdents64Handler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1333  	var req Getdents64Req
  1334  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1335  		return 0, unix.EIO
  1336  	}
  1337  
  1338  	fd, err := c.lookupOpenFD(req.DirFD)
  1339  	if err != nil {
  1340  		return 0, err
  1341  	}
  1342  	defer fd.DecRef(nil)
  1343  	if !fd.controlFD.IsDir() {
  1344  		return 0, unix.ENOTDIR
  1345  	}
  1346  
  1347  	seek0 := false
  1348  	if req.Count < 0 {
  1349  		seek0 = true
  1350  		req.Count = -req.Count
  1351  	}
  1352  
  1353  	// We will manually marshal the response Getdents64Resp.
  1354  
  1355  	// numDirents is the number of dirents marshalled into the payload.
  1356  	var numDirents primitive.Uint16
  1357  	// The payload starts with numDirents, dirents go right after that.
  1358  	// payloadBufPos represents the position at which to write the next dirent.
  1359  	payloadBufPos := uint32(numDirents.SizeBytes())
  1360  	// Request enough payloadBuf for 10 dirents, we will extend when needed.
  1361  	// unix.Dirent is 280 bytes for amd64.
  1362  	payloadBuf := comm.PayloadBuf(payloadBufPos + 10*unixDirentMaxSize)
  1363  	if err := fd.controlFD.safelyRead(func() error {
  1364  		if fd.controlFD.node.isDeleted() {
  1365  			return unix.EINVAL
  1366  		}
  1367  		return fd.impl.Getdent64(uint32(req.Count), seek0, func(dirent Dirent64) {
  1368  			// Paste the dirent into the payload buffer without having the dirent
  1369  			// escape. Request a larger buffer if needed.
  1370  			if int(payloadBufPos)+dirent.SizeBytes() > len(payloadBuf) {
  1371  				// Ask for 10 large dirents worth of more space.
  1372  				payloadBuf = comm.PayloadBuf(payloadBufPos + 10*unixDirentMaxSize)
  1373  			}
  1374  			dirent.MarshalBytes(payloadBuf[payloadBufPos:])
  1375  			payloadBufPos += uint32(dirent.SizeBytes())
  1376  			numDirents++
  1377  		})
  1378  	}); err != nil {
  1379  		return 0, err
  1380  	}
  1381  
  1382  	// The number of dirents goes at the beginning of the payload.
  1383  	numDirents.MarshalUnsafe(payloadBuf)
  1384  	return payloadBufPos, nil
  1385  }
  1386  
  1387  // FGetXattrHandler handles the FGetXattr RPC.
  1388  func FGetXattrHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1389  	var req FGetXattrReq
  1390  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1391  		return 0, unix.EIO
  1392  	}
  1393  
  1394  	fd, err := c.lookupControlFD(req.FD)
  1395  	if err != nil {
  1396  		return 0, err
  1397  	}
  1398  	defer fd.DecRef(nil)
  1399  
  1400  	// Manually marshal FGetXattrResp to avoid allocations and copying.
  1401  	// FGetXattrResp simply is a wrapper around SizedString.
  1402  	var valueLen primitive.Uint16
  1403  	respMetaSize := uint32(valueLen.SizeBytes())
  1404  	var n uint16
  1405  	if err := fd.safelyRead(func() error {
  1406  		if fd.node.isDeleted() {
  1407  			return unix.EINVAL
  1408  		}
  1409  		n, err = fd.impl.GetXattr(string(req.Name), uint32(req.BufSize), func(dataLen uint32) []byte {
  1410  			return comm.PayloadBuf(dataLen + respMetaSize)[respMetaSize:]
  1411  		})
  1412  		return err
  1413  	}); err != nil {
  1414  		return 0, err
  1415  	}
  1416  	payloadBuf := comm.PayloadBuf(respMetaSize)
  1417  	valueLen = primitive.Uint16(n)
  1418  	valueLen.MarshalBytes(payloadBuf)
  1419  	return respMetaSize + uint32(n), nil
  1420  }
  1421  
  1422  // FSetXattrHandler handles the FSetXattr RPC.
  1423  func FSetXattrHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1424  	if c.readonly {
  1425  		return 0, unix.EROFS
  1426  	}
  1427  	var req FSetXattrReq
  1428  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1429  		return 0, unix.EIO
  1430  	}
  1431  
  1432  	fd, err := c.lookupControlFD(req.FD)
  1433  	if err != nil {
  1434  		return 0, err
  1435  	}
  1436  	defer fd.DecRef(nil)
  1437  	return 0, fd.safelyWrite(func() error {
  1438  		if fd.node.isDeleted() {
  1439  			return unix.EINVAL
  1440  		}
  1441  		return fd.impl.SetXattr(string(req.Name), string(req.Value), uint32(req.Flags))
  1442  	})
  1443  }
  1444  
  1445  // FListXattrHandler handles the FListXattr RPC.
  1446  func FListXattrHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1447  	var req FListXattrReq
  1448  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1449  		return 0, unix.EIO
  1450  	}
  1451  
  1452  	fd, err := c.lookupControlFD(req.FD)
  1453  	if err != nil {
  1454  		return 0, err
  1455  	}
  1456  	defer fd.DecRef(nil)
  1457  
  1458  	var resp FListXattrResp
  1459  	if fd.safelyRead(func() error {
  1460  		if fd.node.isDeleted() {
  1461  			return unix.EINVAL
  1462  		}
  1463  		resp.Xattrs, err = fd.impl.ListXattr(req.Size)
  1464  		return err
  1465  	}); err != nil {
  1466  		return 0, err
  1467  	}
  1468  	respLen := uint32(resp.SizeBytes())
  1469  	resp.MarshalBytes(comm.PayloadBuf(respLen))
  1470  	return respLen, nil
  1471  }
  1472  
  1473  // FRemoveXattrHandler handles the FRemoveXattr RPC.
  1474  func FRemoveXattrHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
  1475  	if c.readonly {
  1476  		return 0, unix.EROFS
  1477  	}
  1478  	var req FRemoveXattrReq
  1479  	if _, ok := req.CheckedUnmarshal(comm.PayloadBuf(payloadLen)); !ok {
  1480  		return 0, unix.EIO
  1481  	}
  1482  
  1483  	fd, err := c.lookupControlFD(req.FD)
  1484  	if err != nil {
  1485  		return 0, err
  1486  	}
  1487  	defer fd.DecRef(nil)
  1488  
  1489  	return 0, fd.safelyWrite(func() error {
  1490  		return fd.impl.RemoveXattr(string(req.Name))
  1491  	})
  1492  }
  1493  
  1494  // checkSafeName validates the name and returns nil or returns an error.
  1495  func checkSafeName(name string) error {
  1496  	if name != "" && !strings.Contains(name, "/") && name != "." && name != ".." {
  1497  		return nil
  1498  	}
  1499  	return unix.EINVAL
  1500  }