gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/lisafs/client.go (about)

     1  // Copyright 2021 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package lisafs
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  
    21  	"golang.org/x/sys/unix"
    22  	"gvisor.dev/gvisor/pkg/context"
    23  	"gvisor.dev/gvisor/pkg/flipcall"
    24  	"gvisor.dev/gvisor/pkg/log"
    25  	"gvisor.dev/gvisor/pkg/sync"
    26  	"gvisor.dev/gvisor/pkg/unet"
    27  )
    28  
    29  const (
    30  	// fdsToCloseBatchSize is the number of closed FDs batched before an Close
    31  	// RPC is made to close them all. fdsToCloseBatchSize is immutable.
    32  	fdsToCloseBatchSize = 100
    33  )
    34  
    35  // Client helps manage a connection to the lisafs server and pass messages
    36  // efficiently. There is a 1:1 mapping between a Connection and a Client.
    37  type Client struct {
    38  	// sockComm is the main socket by which this connections is established.
    39  	// Communication over the socket is synchronized by sockMu.
    40  	sockMu   sync.Mutex
    41  	sockComm *sockCommunicator
    42  
    43  	// channelsMu protects channels and availableChannels.
    44  	channelsMu sync.Mutex
    45  	// channels tracks all the channels.
    46  	channels []*channel
    47  	// availableChannels is a LIFO (stack) of channels available to be used.
    48  	availableChannels []*channel
    49  	// activeWg represents active channels.
    50  	activeWg sync.WaitGroup
    51  
    52  	// watchdogWg only holds the watchdog goroutine.
    53  	watchdogWg sync.WaitGroup
    54  
    55  	// supported caches information about which messages are supported. It is
    56  	// indexed by MID. An MID is supported if supported[MID] is true.
    57  	supported []bool
    58  
    59  	// maxMessageSize is the maximum payload length (in bytes) that can be sent.
    60  	// It is initialized on Mount and is immutable.
    61  	maxMessageSize uint32
    62  
    63  	// fdsToClose tracks the FDs to close. It caches the FDs no longer being used
    64  	// by the client and closes them in one shot. It is not preserved across
    65  	// checkpoint/restore as FDIDs are not preserved.
    66  	fdsMu      sync.Mutex
    67  	fdsToClose []FDID
    68  }
    69  
    70  // NewClient creates a new client for communication with the server. It mounts
    71  // the server and creates channels for fast IPC. NewClient takes ownership over
    72  // the passed socket. On success, it returns the initialized client along with
    73  // the root Inode.
    74  func NewClient(sock *unet.Socket) (*Client, Inode, int, error) {
    75  	c := &Client{
    76  		sockComm:       newSockComm(sock),
    77  		maxMessageSize: 1 << 20, // 1 MB for now.
    78  		fdsToClose:     make([]FDID, 0, fdsToCloseBatchSize),
    79  	}
    80  
    81  	// Start a goroutine to check socket health. This goroutine is also
    82  	// responsible for client cleanup.
    83  	c.watchdogWg.Add(1)
    84  	go c.watchdog()
    85  
    86  	// Mount the server first. Assume Mount is supported so that we can make the
    87  	// Mount RPC below.
    88  	c.supported = make([]bool, Mount+1)
    89  	c.supported[Mount] = true
    90  	var (
    91  		mountReq    MountReq
    92  		mountResp   MountResp
    93  		mountHostFD = [1]int{-1}
    94  	)
    95  	if err := c.SndRcvMessage(Mount, uint32(mountReq.SizeBytes()), mountReq.MarshalBytes, mountResp.CheckedUnmarshal, mountHostFD[:], mountReq.String, mountResp.String); err != nil {
    96  		c.Close()
    97  		return nil, Inode{}, -1, err
    98  	}
    99  
   100  	// Initialize client.
   101  	c.maxMessageSize = uint32(mountResp.MaxMessageSize)
   102  	var maxSuppMID MID
   103  	for _, suppMID := range mountResp.SupportedMs {
   104  		if suppMID > maxSuppMID {
   105  			maxSuppMID = suppMID
   106  		}
   107  	}
   108  	c.supported = make([]bool, maxSuppMID+1)
   109  	for _, suppMID := range mountResp.SupportedMs {
   110  		c.supported[suppMID] = true
   111  	}
   112  	return c, mountResp.Root, mountHostFD[0], nil
   113  }
   114  
   115  // StartChannels starts maxChannels() channel communicators.
   116  func (c *Client) StartChannels() error {
   117  	maxChans := maxChannels()
   118  	c.channelsMu.Lock()
   119  	c.channels = make([]*channel, 0, maxChans)
   120  	c.availableChannels = make([]*channel, 0, maxChans)
   121  	c.channelsMu.Unlock()
   122  
   123  	// Create channels parallelly so that channels can be used to create more
   124  	// channels and costly initialization like flipcall.Endpoint.Connect can
   125  	// proceed parallelly.
   126  	var channelsWg sync.WaitGroup
   127  	for i := 0; i < maxChans; i++ {
   128  		channelsWg.Add(1)
   129  		go func() {
   130  			defer channelsWg.Done()
   131  			ch, err := c.createChannel()
   132  			if err != nil {
   133  				if err == unix.ENOMEM {
   134  					log.Debugf("channel creation failed because server hit max channels limit")
   135  				} else {
   136  					log.Warningf("channel creation failed: %v", err)
   137  				}
   138  				return
   139  			}
   140  			c.channelsMu.Lock()
   141  			c.channels = append(c.channels, ch)
   142  			c.availableChannels = append(c.availableChannels, ch)
   143  			c.channelsMu.Unlock()
   144  		}()
   145  	}
   146  	channelsWg.Wait()
   147  
   148  	// Check that atleast 1 channel is created. This is not required by lisafs
   149  	// protocol. It exists to flag server side issues in channel creation.
   150  	c.channelsMu.Lock()
   151  	numChannels := len(c.channels)
   152  	c.channelsMu.Unlock()
   153  	if maxChans > 0 && numChannels == 0 {
   154  		log.Warningf("all channel RPCs failed")
   155  		return unix.ENOMEM
   156  	}
   157  	return nil
   158  }
   159  
   160  func (c *Client) watchdog() {
   161  	defer c.watchdogWg.Done()
   162  
   163  	events := []unix.PollFd{
   164  		{
   165  			Fd:     int32(c.sockComm.FD()),
   166  			Events: unix.POLLHUP | unix.POLLRDHUP,
   167  		},
   168  	}
   169  
   170  	// Wait for a shutdown event.
   171  	for {
   172  		n, err := unix.Ppoll(events, nil, nil)
   173  		if err == unix.EINTR || err == unix.EAGAIN {
   174  			continue
   175  		}
   176  		if err != nil {
   177  			log.Warningf("lisafs.Client.watch(): %v", err)
   178  		} else if n != 1 {
   179  			log.Warningf("lisafs.Client.watch(): got %d events, wanted 1", n)
   180  		}
   181  		break
   182  	}
   183  
   184  	// Shutdown all active channels and wait for them to complete.
   185  	c.shutdownActiveChans()
   186  	c.activeWg.Wait()
   187  
   188  	// Close all channels.
   189  	c.channelsMu.Lock()
   190  	for _, ch := range c.channels {
   191  		ch.destroy()
   192  	}
   193  	c.channelsMu.Unlock()
   194  
   195  	// Close main socket.
   196  	c.sockComm.destroy()
   197  }
   198  
   199  func (c *Client) shutdownActiveChans() {
   200  	c.channelsMu.Lock()
   201  	defer c.channelsMu.Unlock()
   202  
   203  	availableChans := make(map[*channel]bool)
   204  	for _, ch := range c.availableChannels {
   205  		availableChans[ch] = true
   206  	}
   207  	for _, ch := range c.channels {
   208  		// A channel that is not available is active.
   209  		if _, ok := availableChans[ch]; !ok {
   210  			log.Debugf("shutting down active channel@%p...", ch)
   211  			ch.shutdown()
   212  		}
   213  	}
   214  
   215  	// Prevent channels from becoming available and serving new requests.
   216  	c.availableChannels = nil
   217  }
   218  
   219  // Close shuts down the main socket and waits for the watchdog to clean up.
   220  func (c *Client) Close() {
   221  	// This shutdown has no effect if the watchdog has already fired and closed
   222  	// the main socket.
   223  	c.sockComm.shutdown()
   224  	c.watchdogWg.Wait()
   225  }
   226  
   227  func (c *Client) createChannel() (*channel, error) {
   228  	var (
   229  		chanReq  ChannelReq
   230  		chanResp ChannelResp
   231  	)
   232  	var fds [2]int
   233  	if err := c.SndRcvMessage(Channel, uint32(chanReq.SizeBytes()), chanReq.MarshalBytes, chanResp.CheckedUnmarshal, fds[:], chanReq.String, chanResp.String); err != nil {
   234  		return nil, err
   235  	}
   236  	if fds[0] < 0 || fds[1] < 0 {
   237  		closeFDs(fds[:])
   238  		return nil, fmt.Errorf("insufficient FDs provided in Channel response: %v", fds)
   239  	}
   240  
   241  	// Lets create the channel.
   242  	defer closeFDs(fds[:1]) // The data FD is not needed after this.
   243  	desc := flipcall.PacketWindowDescriptor{
   244  		FD:     fds[0],
   245  		Offset: chanResp.dataOffset,
   246  		Length: int(chanResp.dataLength),
   247  	}
   248  
   249  	ch := &channel{}
   250  	if err := ch.data.Init(flipcall.ClientSide, desc); err != nil {
   251  		closeFDs(fds[1:])
   252  		return nil, err
   253  	}
   254  	ch.fdChan.Init(fds[1]) // fdChan now owns this FD.
   255  
   256  	// Only a connected channel is usable.
   257  	if err := ch.data.Connect(); err != nil {
   258  		ch.destroy()
   259  		return nil, err
   260  	}
   261  	return ch, nil
   262  }
   263  
   264  // IsSupported returns true if this connection supports the passed message.
   265  func (c *Client) IsSupported(m MID) bool {
   266  	return int(m) < len(c.supported) && c.supported[m]
   267  }
   268  
   269  // CloseFD either queues the passed FD to be closed or makes a batch
   270  // RPC to close all the accumulated FDs-to-close. If flush is true, the RPC
   271  // is made immediately.
   272  func (c *Client) CloseFD(ctx context.Context, fd FDID, flush bool) {
   273  	c.fdsMu.Lock()
   274  	c.fdsToClose = append(c.fdsToClose, fd)
   275  	if !flush && len(c.fdsToClose) < fdsToCloseBatchSize {
   276  		// We can continue batching.
   277  		c.fdsMu.Unlock()
   278  		return
   279  	}
   280  
   281  	// Flush the cache. We should not hold fdsMu while making an RPC, so be sure
   282  	// to copy the fdsToClose to another buffer before unlocking fdsMu.
   283  	var toCloseArr [fdsToCloseBatchSize]FDID
   284  	toClose := toCloseArr[:len(c.fdsToClose)]
   285  	copy(toClose, c.fdsToClose)
   286  
   287  	// Clear fdsToClose so other FDIDs can be appended.
   288  	c.fdsToClose = c.fdsToClose[:0]
   289  	c.fdsMu.Unlock()
   290  
   291  	req := CloseReq{FDs: toClose}
   292  	var resp CloseResp
   293  	ctx.UninterruptibleSleepStart(false)
   294  	err := c.SndRcvMessage(Close, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String)
   295  	ctx.UninterruptibleSleepFinish(false)
   296  	if err != nil {
   297  		log.Warningf("lisafs: batch closing FDs returned error: %v", err)
   298  	}
   299  }
   300  
   301  // SyncFDs makes a Fsync RPC to sync multiple FDs.
   302  func (c *Client) SyncFDs(ctx context.Context, fds []FDID) error {
   303  	if len(fds) == 0 {
   304  		return nil
   305  	}
   306  	req := FsyncReq{FDs: fds}
   307  	var resp FsyncResp
   308  	ctx.UninterruptibleSleepStart(false)
   309  	err := c.SndRcvMessage(FSync, uint32(req.SizeBytes()), req.MarshalBytes, resp.CheckedUnmarshal, nil, req.String, resp.String)
   310  	ctx.UninterruptibleSleepFinish(false)
   311  	return err
   312  }
   313  
   314  // SndRcvMessage invokes reqMarshal to marshal the request onto the payload
   315  // buffer, wakes up the server to process the request, waits for the response
   316  // and invokes respUnmarshal with the response payload. respFDs is populated
   317  // with the received FDs, extra fields are set to -1.
   318  //
   319  // See messages.go to understand why function arguments are used instead of
   320  // combining these functions into an interface type.
   321  //
   322  // Precondition: function arguments must be non-nil.
   323  func (c *Client) SndRcvMessage(m MID, payloadLen uint32, reqMarshal marshalFunc, respUnmarshal unmarshalFunc, respFDs []int, reqString debugStringer, respString debugStringer) error {
   324  	if !c.IsSupported(m) {
   325  		return unix.EOPNOTSUPP
   326  	}
   327  	if payloadLen > c.maxMessageSize {
   328  		log.Warningf("message %d has payload which is too large: %d bytes", m, payloadLen)
   329  		return unix.EIO
   330  	}
   331  	wantFDs := len(respFDs)
   332  	if wantFDs > math.MaxUint8 {
   333  		log.Warningf("want too many FDs: %d", wantFDs)
   334  		return unix.EINVAL
   335  	}
   336  
   337  	// Acquire a communicator.
   338  	comm := c.acquireCommunicator()
   339  	defer c.releaseCommunicator(comm)
   340  
   341  	debugf("send", comm, reqString)
   342  
   343  	// Marshal the request into comm's payload buffer and make the RPC.
   344  	reqMarshal(comm.PayloadBuf(payloadLen))
   345  	respM, respPayloadLen, err := comm.SndRcvMessage(m, payloadLen, uint8(wantFDs))
   346  
   347  	// Handle FD donation.
   348  	rcvFDs := comm.ReleaseFDs()
   349  	if numRcvFDs := len(rcvFDs); numRcvFDs+wantFDs > 0 {
   350  		// releasedFDs is memory owned by comm which can not be returned to caller.
   351  		// Copy it into the caller's buffer.
   352  		numFDCopied := copy(respFDs, rcvFDs)
   353  		if numFDCopied < numRcvFDs {
   354  			log.Warningf("%d unexpected FDs were donated by the server, wanted", numRcvFDs-numFDCopied, wantFDs)
   355  			closeFDs(rcvFDs[numFDCopied:])
   356  		}
   357  		if numFDCopied < wantFDs {
   358  			for i := numFDCopied; i < wantFDs; i++ {
   359  				respFDs[i] = -1
   360  			}
   361  		}
   362  	}
   363  
   364  	// Error cases.
   365  	if err != nil {
   366  		closeFDs(respFDs)
   367  		return err
   368  	}
   369  	if respPayloadLen > c.maxMessageSize {
   370  		log.Warningf("server response for message %d is too large: %d bytes", respM, respPayloadLen)
   371  		closeFDs(respFDs)
   372  		return unix.EIO
   373  	}
   374  	if respM == Error {
   375  		closeFDs(respFDs)
   376  		var resp ErrorResp
   377  		resp.UnmarshalUnsafe(comm.PayloadBuf(respPayloadLen))
   378  		debugf("recv", comm, resp.String)
   379  		return unix.Errno(resp.errno)
   380  	}
   381  	if respM != m {
   382  		closeFDs(respFDs)
   383  		log.Warningf("sent %d message but got %d in response", m, respM)
   384  		return unix.EINVAL
   385  	}
   386  
   387  	// Success. The payload must be unmarshalled *before* comm is released.
   388  	if _, ok := respUnmarshal(comm.PayloadBuf(respPayloadLen)); !ok {
   389  		log.Warningf("server response unmarshalling for %d message failed", respM)
   390  		return unix.EIO
   391  	}
   392  	debugf("recv", comm, respString)
   393  	return nil
   394  }
   395  
   396  func debugf(action string, comm Communicator, debugMsg debugStringer) {
   397  	// Replicate the log.IsLogging(log.Debug) check to avoid having to call
   398  	// debugMsg() on the hot path.
   399  	if log.IsLogging(log.Debug) {
   400  		log.Debugf("%s [%s] %s", action, comm, debugMsg())
   401  	}
   402  }
   403  
   404  // Postcondition: releaseCommunicator() must be called on the returned value.
   405  func (c *Client) acquireCommunicator() Communicator {
   406  	// Prefer using channel over socket because:
   407  	//	- Channel uses a shared memory region for passing messages. IO from shared
   408  	//		memory is faster and does not involve making a syscall.
   409  	//	- No intermediate buffer allocation needed. With a channel, the message
   410  	//		can be directly pasted into the shared memory region.
   411  	if ch := c.getChannel(); ch != nil {
   412  		return ch
   413  	}
   414  
   415  	c.sockMu.Lock()
   416  	return c.sockComm
   417  }
   418  
   419  // Precondition: comm must have been acquired via acquireCommunicator().
   420  func (c *Client) releaseCommunicator(comm Communicator) {
   421  	switch t := comm.(type) {
   422  	case *sockCommunicator:
   423  		c.sockMu.Unlock() // +checklocksforce: locked in acquireCommunicator().
   424  	case *channel:
   425  		c.releaseChannel(t)
   426  	default:
   427  		panic(fmt.Sprintf("unknown communicator type %T", t))
   428  	}
   429  }
   430  
   431  // getChannel pops a channel from the available channels stack. The caller must
   432  // release the channel after use.
   433  func (c *Client) getChannel() *channel {
   434  	c.channelsMu.Lock()
   435  	defer c.channelsMu.Unlock()
   436  	if len(c.availableChannels) == 0 {
   437  		return nil
   438  	}
   439  
   440  	idx := len(c.availableChannels) - 1
   441  	ch := c.availableChannels[idx]
   442  	c.availableChannels = c.availableChannels[:idx]
   443  	c.activeWg.Add(1)
   444  	return ch
   445  }
   446  
   447  // releaseChannel pushes the passed channel onto the available channel stack if
   448  // reinsert is true.
   449  func (c *Client) releaseChannel(ch *channel) {
   450  	c.channelsMu.Lock()
   451  	defer c.channelsMu.Unlock()
   452  
   453  	// If availableChannels is nil, then watchdog has fired and the client is
   454  	// shutting down. So don't make this channel available again.
   455  	if !ch.dead && c.availableChannels != nil {
   456  		c.availableChannels = append(c.availableChannels, ch)
   457  	}
   458  	c.activeWg.Done()
   459  }