github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/retryrpc/server.go (about)

     1  // Copyright (c) 2015-2021, NVIDIA CORPORATION.
     2  // SPDX-License-Identifier: Apache-2.0
     3  
     4  package retryrpc
     5  
     6  import (
     7  	"container/list"
     8  	"encoding/binary"
     9  	"encoding/json"
    10  	"fmt"
    11  	"net"
    12  	"os"
    13  	"reflect"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/swiftstack/ProxyFS/bucketstats"
    18  	"github.com/swiftstack/ProxyFS/logger"
    19  	"golang.org/x/sys/unix"
    20  )
    21  
    22  // Variable to control debug output
    23  var printDebugLogs bool = false
    24  var debugPutGet bool = false
    25  
    26  // TODO - test if Register has been called???
    27  
    28  func (server *Server) closeClient(myConn net.Conn, myElm *list.Element) {
    29  
    30  	server.connLock.Lock()
    31  	server.connections.Remove(myElm)
    32  
    33  	// There is a race condition where the connection could have been
    34  	// closed in Down().  However, closing it twice is okay.
    35  	myConn.Close()
    36  	server.connLock.Unlock()
    37  	server.connWG.Done()
    38  }
    39  
    40  func (server *Server) run() {
    41  	defer server.goroutineWG.Done()
    42  	for {
    43  		conn, err := server.tlsListener.Accept()
    44  		if err != nil {
    45  			if !server.halting {
    46  				logger.ErrorfWithError(err, "net.Accept failed for Retry RPC listener")
    47  			}
    48  			server.listenersWG.Done()
    49  			return
    50  		}
    51  
    52  		server.connWG.Add(1)
    53  
    54  		server.connLock.Lock()
    55  		elm := server.connections.PushBack(conn)
    56  		server.connLock.Unlock()
    57  
    58  		// The first message sent on this socket by the client is the uniqueID.
    59  		//
    60  		// Read that first and create the relevant structures before calling
    61  		// serviceClient().  We do the cleanup in this routine because there are
    62  		// race conditions with noisy clients repeatedly reconnecting.
    63  		//
    64  		// Those race conditions are resolved if we serialize the recovery.
    65  		cCtx := &connCtx{conn: conn}
    66  		cCtx.cond = sync.NewCond(&cCtx.Mutex)
    67  		ci, err := server.getClientIDAndWait(cCtx)
    68  		if err != nil {
    69  			// Socket already had an error - just loop back
    70  			logger.Warnf("getClientIDAndWait() from client addr: %v returned err: %v\n", conn.RemoteAddr(), err)
    71  
    72  			// Sleep to block over active clients from pounding on us
    73  			time.Sleep(1 * time.Second)
    74  
    75  			server.closeClient(conn, elm)
    76  			continue
    77  		}
    78  
    79  		server.goroutineWG.Add(1)
    80  		go func(myConn net.Conn, myElm *list.Element) {
    81  			defer server.goroutineWG.Done()
    82  
    83  			logger.Infof("Servicing client: %v address: %v", ci.myUniqueID, myConn.RemoteAddr())
    84  			server.serviceClient(ci, cCtx)
    85  
    86  			logger.Infof("Closing client: %v address: %v", ci.myUniqueID, myConn.RemoteAddr())
    87  			server.closeClient(conn, elm)
    88  
    89  			// The clientInfo for this client will first be trimmed and then later
    90  			// deleted from the list of server.perClientInfo by the TTL trimmer.
    91  
    92  		}(conn, elm)
    93  	}
    94  }
    95  
    96  // processRequest is given a request from the client.
    97  func (server *Server) processRequest(ci *clientInfo, myConnCtx *connCtx, buf []byte) {
    98  	defer server.goroutineWG.Done()
    99  
   100  	// We first unmarshal the raw buf to find the method
   101  	//
   102  	// Next we unmarshal again with the request structure specific
   103  	// to the RPC.
   104  	jReq := jsonRequest{}
   105  	unmarErr := json.Unmarshal(buf, &jReq)
   106  	if unmarErr != nil {
   107  		logger.Errorf("Unmarshal of buf failed with err: %v\n", unmarErr)
   108  		return
   109  	}
   110  
   111  	ci.Lock()
   112  
   113  	// Keep track of the highest consecutive requestID seen
   114  	// by client.  We use this to trim completedRequest list.
   115  	//
   116  	// Messages could arrive out of order so only update if
   117  	// the new request is giving us a higher value.
   118  	if jReq.HighestReplySeen > ci.highestReplySeen {
   119  		ci.highestReplySeen = jReq.HighestReplySeen
   120  	}
   121  	ci.stats.RPCattempted.Add(1)
   122  
   123  	// First check if we already completed this request by looking at
   124  	// completed queue.
   125  	var localIOR ioReply
   126  	rID := jReq.RequestID
   127  	ce, ok := ci.completedRequest[rID]
   128  	if ok {
   129  		// Already have answer for this in completedRequest queue.
   130  		// Just return the results.
   131  		setupHdrReply(ce.reply, RPC)
   132  		localIOR = *ce.reply
   133  		ci.stats.RPCretried.Add(1)
   134  		ci.Unlock()
   135  
   136  	} else {
   137  		ci.Unlock()
   138  
   139  		// Call the RPC and return the results.
   140  		//
   141  		// We pass buf to the call because the request will have to
   142  		// be unmarshaled again to retrieve the parameters specific to
   143  		// the RPC.
   144  		startRPC := time.Now()
   145  		ior := server.callRPCAndFormatReply(buf, &jReq)
   146  		ci.stats.RPCLenUsec.Add(uint64(time.Since(startRPC) / time.Microsecond))
   147  		ci.stats.RPCcompleted.Add(1)
   148  
   149  		// We had to drop the lock before calling the RPC since it
   150  		// could block.
   151  		ci.Lock()
   152  		dur := time.Since(startRPC)
   153  		if dur > ci.stats.longestRPC {
   154  			ci.stats.longestRPCMethod = jReq.Method
   155  			ci.stats.longestRPC = dur
   156  		}
   157  
   158  		// Update completed queue
   159  		ce := &completedEntry{reply: ior}
   160  		ci.completedRequest[rID] = ce
   161  		ci.stats.AddCompleted.Add(1)
   162  		setupHdrReply(ce.reply, RPC)
   163  		localIOR = *ce.reply
   164  		sz := uint64(len(ior.JResult))
   165  		if sz > ci.stats.largestReplySize {
   166  			ci.stats.largestReplySizeMethod = jReq.Method
   167  			ci.stats.largestReplySize = sz
   168  		}
   169  		ci.stats.ReplySize.Add(sz)
   170  		lruEntry := completedLRUEntry{requestID: rID, timeCompleted: time.Now()}
   171  		le := ci.completedRequestLRU.PushBack(lruEntry)
   172  		ce.lruElem = le
   173  		ci.Unlock()
   174  	}
   175  
   176  	// Write results on socket back to client...
   177  	server.returnResults(&localIOR, myConnCtx)
   178  
   179  	myConnCtx.activeRPCsWG.Done()
   180  }
   181  
   182  // getClientIDAndWait reads the first message off the new connection.
   183  // The client sends its uniqueID before sending it's first RPC.
   184  //
   185  // Then getClientIDAndWait waits until any outstanding RPCs on a prior
   186  // connection have completed before proceeding.
   187  //
   188  // This avoids race conditions when there are cascading retransmits.
   189  // The algorithm is:
   190  //
   191  // 1. Client sends UniqueID to server when the socket is first open
   192  // 2. After accepting new socket, the server waits for the UniqueID from
   193  //    the client.
   194  // 3. If this is the first connection from the client, the server creates a
   195  //    clientInfo structure for the client and proceeds to wait for RPCs.
   196  // 4. If this is a client returning on a new socket, the server blocks
   197  //    until all outstanding RPCs and related goroutines have completed for the
   198  //    client on the previous connection.
   199  // 5. Additionally, the server blocks on accepting new connections
   200  //    (which could be yet another reconnect for the same client) until the
   201  //    previous connection has closed down.
   202  func (server *Server) getClientIDAndWait(cCtx *connCtx) (ci *clientInfo, err error) {
   203  	buf, msgType, getErr := getIO(uint64(0), server.deadlineIO, cCtx.conn)
   204  	if getErr != nil {
   205  		err = getErr
   206  		return
   207  	}
   208  
   209  	if msgType != PassID {
   210  		err = fmt.Errorf("Server expecting msgType PassID and received: %v", msgType)
   211  		return
   212  	}
   213  
   214  	var connUniqueID string
   215  	err = json.Unmarshal(buf, &connUniqueID)
   216  	if err != nil {
   217  		return
   218  	}
   219  
   220  	// Check if this is the first time we have seen this client
   221  	server.Lock()
   222  	lci, ok := server.perClientInfo[connUniqueID]
   223  	if !ok {
   224  		// First time we have seen this client
   225  		c := &clientInfo{cCtx: cCtx, myUniqueID: connUniqueID}
   226  		c.completedRequest = make(map[requestID]*completedEntry)
   227  		c.completedRequestLRU = list.New()
   228  		server.perClientInfo[connUniqueID] = c
   229  		server.Unlock()
   230  		ci = c
   231  		cCtx.Lock()
   232  		cCtx.ci = ci
   233  		cCtx.Unlock()
   234  
   235  		bucketstats.Register("proxyfs.retryrpc", c.myUniqueID, &c.stats)
   236  	} else {
   237  		server.Unlock()
   238  		ci = lci
   239  
   240  		// Wait for the serviceClient() goroutine from a prior connection to exit
   241  		// before proceeding.
   242  		ci.cCtx.Lock()
   243  		for ci.cCtx.serviceClientExited != true {
   244  			ci.cCtx.cond.Wait()
   245  		}
   246  		ci.cCtx.Unlock()
   247  
   248  		// Now wait for any outstanding RPCs to complete
   249  		ci.cCtx.activeRPCsWG.Wait()
   250  
   251  		// Set cCtx back pointer to ci
   252  		ci.Lock()
   253  		cCtx.Lock()
   254  		cCtx.ci = ci
   255  		cCtx.Unlock()
   256  
   257  		ci.cCtx = cCtx
   258  		ci.Unlock()
   259  	}
   260  
   261  	return ci, err
   262  }
   263  
   264  // serviceClient gets called when we accept a new connection.
   265  func (server *Server) serviceClient(ci *clientInfo, cCtx *connCtx) {
   266  	for {
   267  		// Get RPC request
   268  		buf, msgType, getErr := getIO(uint64(0), server.deadlineIO, cCtx.conn)
   269  		if os.IsTimeout(getErr) == false && getErr != nil {
   270  
   271  			// Drop response on the floor.   Client will either reconnect or
   272  			// this response will age out of the queues.
   273  			cCtx.Lock()
   274  			cCtx.serviceClientExited = true
   275  			cCtx.cond.Broadcast()
   276  			cCtx.Unlock()
   277  			return
   278  		}
   279  
   280  		server.Lock()
   281  		if server.halting == true {
   282  			server.Unlock()
   283  			return
   284  		}
   285  
   286  		if os.IsTimeout(getErr) == true {
   287  			server.Unlock()
   288  			continue
   289  		}
   290  
   291  		if msgType != RPC {
   292  			fmt.Printf("serviceClient() received invalid msgType: %v - dropping\n", msgType)
   293  			continue
   294  		}
   295  
   296  		// Keep track of how many processRequest() goroutines we have
   297  		// so that we can wait until they complete when handling retransmits.
   298  		cCtx.activeRPCsWG.Add(1)
   299  		server.Unlock()
   300  
   301  		// No sense blocking the read of the next request,
   302  		// push the work off on processRequest().
   303  		//
   304  		// Writes back on the socket wil have to be serialized so
   305  		// pass the per connection context.
   306  		server.goroutineWG.Add(1)
   307  		go server.processRequest(ci, cCtx, buf)
   308  	}
   309  }
   310  
   311  // callRPCAndMarshal calls the RPC and returns results to requestor
   312  func (server *Server) callRPCAndFormatReply(buf []byte, jReq *jsonRequest) (ior *ioReply) {
   313  	var (
   314  		err error
   315  	)
   316  
   317  	// Setup the reply structure with common fields
   318  	reply := &ioReply{}
   319  	rid := jReq.RequestID
   320  	jReply := &jsonReply{MyUniqueID: jReq.MyUniqueID, RequestID: rid}
   321  
   322  	ma := server.svrMap[jReq.Method]
   323  	if ma != nil {
   324  
   325  		// Another unmarshal of buf to find the parameters specific to
   326  		// this RPC
   327  		typOfReq := ma.request.Elem()
   328  		dummyReq := reflect.New(typOfReq).Interface()
   329  
   330  		sReq := svrRequest{}
   331  		sReq.Params[0] = dummyReq
   332  		err = json.Unmarshal(buf, &sReq)
   333  		if err != nil {
   334  			logger.PanicfWithError(err, "Unmarshal sReq: %+v", sReq)
   335  			return
   336  		}
   337  		req := reflect.ValueOf(dummyReq)
   338  
   339  		// Create the reply structure
   340  		typOfReply := ma.reply.Elem()
   341  		myReply := reflect.New(typOfReply)
   342  
   343  		// Call the method
   344  		function := ma.methodPtr.Func
   345  		returnValues := function.Call([]reflect.Value{server.receiver, req, myReply})
   346  
   347  		// The return value for the method is an error.
   348  		errInter := returnValues[0].Interface()
   349  		if errInter == nil {
   350  			jReply.Result = myReply.Elem().Interface()
   351  		} else {
   352  			e, ok := errInter.(error)
   353  			if !ok {
   354  				logger.PanicfWithError(err, "Call returnValues invalid cast errInter: %+v", errInter)
   355  			}
   356  			jReply.ErrStr = e.Error()
   357  		}
   358  	} else {
   359  		// TODO - figure out if this is the correct error
   360  
   361  		// Method does not exist
   362  		jReply.ErrStr = fmt.Sprintf("errno: %d", unix.ENOENT)
   363  	}
   364  
   365  	// Convert response into JSON for return trip
   366  	reply.JResult, err = json.Marshal(jReply)
   367  	if err != nil {
   368  		logger.PanicfWithError(err, "Unable to marshal jReply: %+v", jReply)
   369  	}
   370  
   371  	return reply
   372  }
   373  
   374  func (server *Server) returnResults(ior *ioReply, cCtx *connCtx) {
   375  
   376  	// Now write the response back to the client
   377  	//
   378  	// Serialize multiple goroutines writing on socket back to client
   379  	// by grabbing a mutex on the context
   380  
   381  	// Write Len back
   382  	cCtx.Lock()
   383  	cCtx.conn.SetDeadline(time.Now().Add(server.deadlineIO))
   384  	binErr := binary.Write(cCtx.conn, binary.BigEndian, ior.Hdr)
   385  	if binErr != nil {
   386  		cCtx.Unlock()
   387  		// Conn will be closed when serviceClient() returns
   388  		return
   389  	}
   390  
   391  	// Write JSON reply
   392  	//
   393  	// In error case - Conn will be closed when serviceClient() returns
   394  	cCtx.conn.SetDeadline(time.Now().Add(server.deadlineIO))
   395  	cnt, e := cCtx.conn.Write(ior.JResult)
   396  	if e != nil {
   397  		logger.Infof("returnResults() returned err: %v cnt: %v length of JResult: %v", e, cnt, len(ior.JResult))
   398  	}
   399  
   400  	cCtx.Unlock()
   401  }
   402  
   403  // Close sockets to client so that goroutines wakeup from blocked
   404  // reads and let the server exit.
   405  func (server *Server) closeClientConn() {
   406  	server.connLock.Lock()
   407  	for e := server.connections.Front(); e != nil; e = e.Next() {
   408  		conn := e.Value.(net.Conn)
   409  		conn.Close()
   410  	}
   411  	server.connLock.Unlock()
   412  }
   413  
   414  // Loop through all clients and trim up to already ACKed
   415  func (server *Server) trimCompleted(t time.Time, long bool) {
   416  	var (
   417  		totalItems int
   418  	)
   419  
   420  	server.Lock()
   421  	if long {
   422  		l := list.New()
   423  		for k, ci := range server.perClientInfo {
   424  			n := server.trimTLLBased(ci, t)
   425  			totalItems += n
   426  
   427  			ci.Lock()
   428  			if ci.isEmpty() {
   429  				l.PushBack(k)
   430  			}
   431  			ci.Unlock()
   432  
   433  		}
   434  
   435  		// If the client is no longer active - delete it's entry
   436  		//
   437  		// We can only do the check if we are currently holding the
   438  		// lock.
   439  		for e := l.Front(); e != nil; e = e.Next() {
   440  			key := e.Value.(string)
   441  			ci := server.perClientInfo[key]
   442  
   443  			ci.Lock()
   444  			ci.cCtx.Lock()
   445  			if ci.isEmpty() && ci.cCtx.serviceClientExited == true {
   446  				bucketstats.UnRegister("proxyfs.retryrpc", ci.myUniqueID)
   447  				delete(server.perClientInfo, key)
   448  				logger.Infof("Trim - DELETE inactive clientInfo with ID: %v", ci.myUniqueID)
   449  			}
   450  			ci.cCtx.Unlock()
   451  			ci.Unlock()
   452  		}
   453  		logger.Infof("Trimmed completed RetryRpcs - Total: %v", totalItems)
   454  	} else {
   455  		for k, ci := range server.perClientInfo {
   456  			n := server.trimAClientBasedACK(k, ci)
   457  			totalItems += n
   458  		}
   459  	}
   460  	server.Unlock()
   461  }
   462  
   463  // Walk through client and trim completedRequest based either
   464  // on TTL or RequestID acknowledgement from client.
   465  //
   466  // NOTE: We assume Server Lock is held
   467  func (server *Server) trimAClientBasedACK(uniqueID string, ci *clientInfo) (numItems int) {
   468  
   469  	ci.Lock()
   470  
   471  	// Remove from completedRequest completedRequestLRU
   472  	for h := ci.previousHighestReplySeen + 1; h <= ci.highestReplySeen; h++ {
   473  		v, ok := ci.completedRequest[h]
   474  		if ok {
   475  			ci.completedRequestLRU.Remove(v.lruElem)
   476  			delete(ci.completedRequest, h)
   477  			ci.stats.RmCompleted.Add(1)
   478  			numItems++
   479  		}
   480  	}
   481  
   482  	// Keep track of how far we have trimmed for next run
   483  	ci.previousHighestReplySeen = ci.highestReplySeen
   484  	ci.Unlock()
   485  	return
   486  }
   487  
   488  // Remove completedRequest/completedRequestLRU entries older than server.completedTTL
   489  //
   490  // This gets called every ~10 minutes to clean out older entries.
   491  //
   492  // NOTE: We assume Server Lock is held
   493  func (server *Server) trimTLLBased(ci *clientInfo, t time.Time) (numItems int) {
   494  	ci.Lock()
   495  	for e := ci.completedRequestLRU.Front(); e != nil; {
   496  		eTime := e.Value.(completedLRUEntry).timeCompleted.Add(server.completedLongTTL)
   497  		if eTime.Before(t) {
   498  			delete(ci.completedRequest, e.Value.(completedLRUEntry).requestID)
   499  			ci.stats.RmCompleted.Add(1)
   500  
   501  			eTmp := e
   502  			e = e.Next()
   503  			_ = ci.completedRequestLRU.Remove(eTmp)
   504  			numItems++
   505  		} else {
   506  			// Oldest is in front so just break
   507  			break
   508  		}
   509  	}
   510  	s := ci.stats
   511  	logger.Infof("ID: %v largestReplySize: %v largestReplySizeMethod: %v longest RPC: %v longest RPC Method: %v",
   512  		ci.myUniqueID, s.largestReplySize, s.largestReplySizeMethod, s.longestRPC, s.longestRPCMethod)
   513  
   514  	ci.Unlock()
   515  	return
   516  }