github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/retryrpc/server.go (about) 1 // Copyright (c) 2015-2021, NVIDIA CORPORATION. 2 // SPDX-License-Identifier: Apache-2.0 3 4 package retryrpc 5 6 import ( 7 "container/list" 8 "encoding/binary" 9 "encoding/json" 10 "fmt" 11 "net" 12 "os" 13 "reflect" 14 "sync" 15 "time" 16 17 "github.com/swiftstack/ProxyFS/bucketstats" 18 "github.com/swiftstack/ProxyFS/logger" 19 "golang.org/x/sys/unix" 20 ) 21 22 // Variable to control debug output 23 var printDebugLogs bool = false 24 var debugPutGet bool = false 25 26 // TODO - test if Register has been called??? 27 28 func (server *Server) closeClient(myConn net.Conn, myElm *list.Element) { 29 30 server.connLock.Lock() 31 server.connections.Remove(myElm) 32 33 // There is a race condition where the connection could have been 34 // closed in Down(). However, closing it twice is okay. 35 myConn.Close() 36 server.connLock.Unlock() 37 server.connWG.Done() 38 } 39 40 func (server *Server) run() { 41 defer server.goroutineWG.Done() 42 for { 43 conn, err := server.tlsListener.Accept() 44 if err != nil { 45 if !server.halting { 46 logger.ErrorfWithError(err, "net.Accept failed for Retry RPC listener") 47 } 48 server.listenersWG.Done() 49 return 50 } 51 52 server.connWG.Add(1) 53 54 server.connLock.Lock() 55 elm := server.connections.PushBack(conn) 56 server.connLock.Unlock() 57 58 // The first message sent on this socket by the client is the uniqueID. 59 // 60 // Read that first and create the relevant structures before calling 61 // serviceClient(). We do the cleanup in this routine because there are 62 // race conditions with noisy clients repeatedly reconnecting. 63 // 64 // Those race conditions are resolved if we serialize the recovery. 65 cCtx := &connCtx{conn: conn} 66 cCtx.cond = sync.NewCond(&cCtx.Mutex) 67 ci, err := server.getClientIDAndWait(cCtx) 68 if err != nil { 69 // Socket already had an error - just loop back 70 logger.Warnf("getClientIDAndWait() from client addr: %v returned err: %v\n", conn.RemoteAddr(), err) 71 72 // Sleep to block over active clients from pounding on us 73 time.Sleep(1 * time.Second) 74 75 server.closeClient(conn, elm) 76 continue 77 } 78 79 server.goroutineWG.Add(1) 80 go func(myConn net.Conn, myElm *list.Element) { 81 defer server.goroutineWG.Done() 82 83 logger.Infof("Servicing client: %v address: %v", ci.myUniqueID, myConn.RemoteAddr()) 84 server.serviceClient(ci, cCtx) 85 86 logger.Infof("Closing client: %v address: %v", ci.myUniqueID, myConn.RemoteAddr()) 87 server.closeClient(conn, elm) 88 89 // The clientInfo for this client will first be trimmed and then later 90 // deleted from the list of server.perClientInfo by the TTL trimmer. 91 92 }(conn, elm) 93 } 94 } 95 96 // processRequest is given a request from the client. 97 func (server *Server) processRequest(ci *clientInfo, myConnCtx *connCtx, buf []byte) { 98 defer server.goroutineWG.Done() 99 100 // We first unmarshal the raw buf to find the method 101 // 102 // Next we unmarshal again with the request structure specific 103 // to the RPC. 104 jReq := jsonRequest{} 105 unmarErr := json.Unmarshal(buf, &jReq) 106 if unmarErr != nil { 107 logger.Errorf("Unmarshal of buf failed with err: %v\n", unmarErr) 108 return 109 } 110 111 ci.Lock() 112 113 // Keep track of the highest consecutive requestID seen 114 // by client. We use this to trim completedRequest list. 115 // 116 // Messages could arrive out of order so only update if 117 // the new request is giving us a higher value. 118 if jReq.HighestReplySeen > ci.highestReplySeen { 119 ci.highestReplySeen = jReq.HighestReplySeen 120 } 121 ci.stats.RPCattempted.Add(1) 122 123 // First check if we already completed this request by looking at 124 // completed queue. 125 var localIOR ioReply 126 rID := jReq.RequestID 127 ce, ok := ci.completedRequest[rID] 128 if ok { 129 // Already have answer for this in completedRequest queue. 130 // Just return the results. 131 setupHdrReply(ce.reply, RPC) 132 localIOR = *ce.reply 133 ci.stats.RPCretried.Add(1) 134 ci.Unlock() 135 136 } else { 137 ci.Unlock() 138 139 // Call the RPC and return the results. 140 // 141 // We pass buf to the call because the request will have to 142 // be unmarshaled again to retrieve the parameters specific to 143 // the RPC. 144 startRPC := time.Now() 145 ior := server.callRPCAndFormatReply(buf, &jReq) 146 ci.stats.RPCLenUsec.Add(uint64(time.Since(startRPC) / time.Microsecond)) 147 ci.stats.RPCcompleted.Add(1) 148 149 // We had to drop the lock before calling the RPC since it 150 // could block. 151 ci.Lock() 152 dur := time.Since(startRPC) 153 if dur > ci.stats.longestRPC { 154 ci.stats.longestRPCMethod = jReq.Method 155 ci.stats.longestRPC = dur 156 } 157 158 // Update completed queue 159 ce := &completedEntry{reply: ior} 160 ci.completedRequest[rID] = ce 161 ci.stats.AddCompleted.Add(1) 162 setupHdrReply(ce.reply, RPC) 163 localIOR = *ce.reply 164 sz := uint64(len(ior.JResult)) 165 if sz > ci.stats.largestReplySize { 166 ci.stats.largestReplySizeMethod = jReq.Method 167 ci.stats.largestReplySize = sz 168 } 169 ci.stats.ReplySize.Add(sz) 170 lruEntry := completedLRUEntry{requestID: rID, timeCompleted: time.Now()} 171 le := ci.completedRequestLRU.PushBack(lruEntry) 172 ce.lruElem = le 173 ci.Unlock() 174 } 175 176 // Write results on socket back to client... 177 server.returnResults(&localIOR, myConnCtx) 178 179 myConnCtx.activeRPCsWG.Done() 180 } 181 182 // getClientIDAndWait reads the first message off the new connection. 183 // The client sends its uniqueID before sending it's first RPC. 184 // 185 // Then getClientIDAndWait waits until any outstanding RPCs on a prior 186 // connection have completed before proceeding. 187 // 188 // This avoids race conditions when there are cascading retransmits. 189 // The algorithm is: 190 // 191 // 1. Client sends UniqueID to server when the socket is first open 192 // 2. After accepting new socket, the server waits for the UniqueID from 193 // the client. 194 // 3. If this is the first connection from the client, the server creates a 195 // clientInfo structure for the client and proceeds to wait for RPCs. 196 // 4. If this is a client returning on a new socket, the server blocks 197 // until all outstanding RPCs and related goroutines have completed for the 198 // client on the previous connection. 199 // 5. Additionally, the server blocks on accepting new connections 200 // (which could be yet another reconnect for the same client) until the 201 // previous connection has closed down. 202 func (server *Server) getClientIDAndWait(cCtx *connCtx) (ci *clientInfo, err error) { 203 buf, msgType, getErr := getIO(uint64(0), server.deadlineIO, cCtx.conn) 204 if getErr != nil { 205 err = getErr 206 return 207 } 208 209 if msgType != PassID { 210 err = fmt.Errorf("Server expecting msgType PassID and received: %v", msgType) 211 return 212 } 213 214 var connUniqueID string 215 err = json.Unmarshal(buf, &connUniqueID) 216 if err != nil { 217 return 218 } 219 220 // Check if this is the first time we have seen this client 221 server.Lock() 222 lci, ok := server.perClientInfo[connUniqueID] 223 if !ok { 224 // First time we have seen this client 225 c := &clientInfo{cCtx: cCtx, myUniqueID: connUniqueID} 226 c.completedRequest = make(map[requestID]*completedEntry) 227 c.completedRequestLRU = list.New() 228 server.perClientInfo[connUniqueID] = c 229 server.Unlock() 230 ci = c 231 cCtx.Lock() 232 cCtx.ci = ci 233 cCtx.Unlock() 234 235 bucketstats.Register("proxyfs.retryrpc", c.myUniqueID, &c.stats) 236 } else { 237 server.Unlock() 238 ci = lci 239 240 // Wait for the serviceClient() goroutine from a prior connection to exit 241 // before proceeding. 242 ci.cCtx.Lock() 243 for ci.cCtx.serviceClientExited != true { 244 ci.cCtx.cond.Wait() 245 } 246 ci.cCtx.Unlock() 247 248 // Now wait for any outstanding RPCs to complete 249 ci.cCtx.activeRPCsWG.Wait() 250 251 // Set cCtx back pointer to ci 252 ci.Lock() 253 cCtx.Lock() 254 cCtx.ci = ci 255 cCtx.Unlock() 256 257 ci.cCtx = cCtx 258 ci.Unlock() 259 } 260 261 return ci, err 262 } 263 264 // serviceClient gets called when we accept a new connection. 265 func (server *Server) serviceClient(ci *clientInfo, cCtx *connCtx) { 266 for { 267 // Get RPC request 268 buf, msgType, getErr := getIO(uint64(0), server.deadlineIO, cCtx.conn) 269 if os.IsTimeout(getErr) == false && getErr != nil { 270 271 // Drop response on the floor. Client will either reconnect or 272 // this response will age out of the queues. 273 cCtx.Lock() 274 cCtx.serviceClientExited = true 275 cCtx.cond.Broadcast() 276 cCtx.Unlock() 277 return 278 } 279 280 server.Lock() 281 if server.halting == true { 282 server.Unlock() 283 return 284 } 285 286 if os.IsTimeout(getErr) == true { 287 server.Unlock() 288 continue 289 } 290 291 if msgType != RPC { 292 fmt.Printf("serviceClient() received invalid msgType: %v - dropping\n", msgType) 293 continue 294 } 295 296 // Keep track of how many processRequest() goroutines we have 297 // so that we can wait until they complete when handling retransmits. 298 cCtx.activeRPCsWG.Add(1) 299 server.Unlock() 300 301 // No sense blocking the read of the next request, 302 // push the work off on processRequest(). 303 // 304 // Writes back on the socket wil have to be serialized so 305 // pass the per connection context. 306 server.goroutineWG.Add(1) 307 go server.processRequest(ci, cCtx, buf) 308 } 309 } 310 311 // callRPCAndMarshal calls the RPC and returns results to requestor 312 func (server *Server) callRPCAndFormatReply(buf []byte, jReq *jsonRequest) (ior *ioReply) { 313 var ( 314 err error 315 ) 316 317 // Setup the reply structure with common fields 318 reply := &ioReply{} 319 rid := jReq.RequestID 320 jReply := &jsonReply{MyUniqueID: jReq.MyUniqueID, RequestID: rid} 321 322 ma := server.svrMap[jReq.Method] 323 if ma != nil { 324 325 // Another unmarshal of buf to find the parameters specific to 326 // this RPC 327 typOfReq := ma.request.Elem() 328 dummyReq := reflect.New(typOfReq).Interface() 329 330 sReq := svrRequest{} 331 sReq.Params[0] = dummyReq 332 err = json.Unmarshal(buf, &sReq) 333 if err != nil { 334 logger.PanicfWithError(err, "Unmarshal sReq: %+v", sReq) 335 return 336 } 337 req := reflect.ValueOf(dummyReq) 338 339 // Create the reply structure 340 typOfReply := ma.reply.Elem() 341 myReply := reflect.New(typOfReply) 342 343 // Call the method 344 function := ma.methodPtr.Func 345 returnValues := function.Call([]reflect.Value{server.receiver, req, myReply}) 346 347 // The return value for the method is an error. 348 errInter := returnValues[0].Interface() 349 if errInter == nil { 350 jReply.Result = myReply.Elem().Interface() 351 } else { 352 e, ok := errInter.(error) 353 if !ok { 354 logger.PanicfWithError(err, "Call returnValues invalid cast errInter: %+v", errInter) 355 } 356 jReply.ErrStr = e.Error() 357 } 358 } else { 359 // TODO - figure out if this is the correct error 360 361 // Method does not exist 362 jReply.ErrStr = fmt.Sprintf("errno: %d", unix.ENOENT) 363 } 364 365 // Convert response into JSON for return trip 366 reply.JResult, err = json.Marshal(jReply) 367 if err != nil { 368 logger.PanicfWithError(err, "Unable to marshal jReply: %+v", jReply) 369 } 370 371 return reply 372 } 373 374 func (server *Server) returnResults(ior *ioReply, cCtx *connCtx) { 375 376 // Now write the response back to the client 377 // 378 // Serialize multiple goroutines writing on socket back to client 379 // by grabbing a mutex on the context 380 381 // Write Len back 382 cCtx.Lock() 383 cCtx.conn.SetDeadline(time.Now().Add(server.deadlineIO)) 384 binErr := binary.Write(cCtx.conn, binary.BigEndian, ior.Hdr) 385 if binErr != nil { 386 cCtx.Unlock() 387 // Conn will be closed when serviceClient() returns 388 return 389 } 390 391 // Write JSON reply 392 // 393 // In error case - Conn will be closed when serviceClient() returns 394 cCtx.conn.SetDeadline(time.Now().Add(server.deadlineIO)) 395 cnt, e := cCtx.conn.Write(ior.JResult) 396 if e != nil { 397 logger.Infof("returnResults() returned err: %v cnt: %v length of JResult: %v", e, cnt, len(ior.JResult)) 398 } 399 400 cCtx.Unlock() 401 } 402 403 // Close sockets to client so that goroutines wakeup from blocked 404 // reads and let the server exit. 405 func (server *Server) closeClientConn() { 406 server.connLock.Lock() 407 for e := server.connections.Front(); e != nil; e = e.Next() { 408 conn := e.Value.(net.Conn) 409 conn.Close() 410 } 411 server.connLock.Unlock() 412 } 413 414 // Loop through all clients and trim up to already ACKed 415 func (server *Server) trimCompleted(t time.Time, long bool) { 416 var ( 417 totalItems int 418 ) 419 420 server.Lock() 421 if long { 422 l := list.New() 423 for k, ci := range server.perClientInfo { 424 n := server.trimTLLBased(ci, t) 425 totalItems += n 426 427 ci.Lock() 428 if ci.isEmpty() { 429 l.PushBack(k) 430 } 431 ci.Unlock() 432 433 } 434 435 // If the client is no longer active - delete it's entry 436 // 437 // We can only do the check if we are currently holding the 438 // lock. 439 for e := l.Front(); e != nil; e = e.Next() { 440 key := e.Value.(string) 441 ci := server.perClientInfo[key] 442 443 ci.Lock() 444 ci.cCtx.Lock() 445 if ci.isEmpty() && ci.cCtx.serviceClientExited == true { 446 bucketstats.UnRegister("proxyfs.retryrpc", ci.myUniqueID) 447 delete(server.perClientInfo, key) 448 logger.Infof("Trim - DELETE inactive clientInfo with ID: %v", ci.myUniqueID) 449 } 450 ci.cCtx.Unlock() 451 ci.Unlock() 452 } 453 logger.Infof("Trimmed completed RetryRpcs - Total: %v", totalItems) 454 } else { 455 for k, ci := range server.perClientInfo { 456 n := server.trimAClientBasedACK(k, ci) 457 totalItems += n 458 } 459 } 460 server.Unlock() 461 } 462 463 // Walk through client and trim completedRequest based either 464 // on TTL or RequestID acknowledgement from client. 465 // 466 // NOTE: We assume Server Lock is held 467 func (server *Server) trimAClientBasedACK(uniqueID string, ci *clientInfo) (numItems int) { 468 469 ci.Lock() 470 471 // Remove from completedRequest completedRequestLRU 472 for h := ci.previousHighestReplySeen + 1; h <= ci.highestReplySeen; h++ { 473 v, ok := ci.completedRequest[h] 474 if ok { 475 ci.completedRequestLRU.Remove(v.lruElem) 476 delete(ci.completedRequest, h) 477 ci.stats.RmCompleted.Add(1) 478 numItems++ 479 } 480 } 481 482 // Keep track of how far we have trimmed for next run 483 ci.previousHighestReplySeen = ci.highestReplySeen 484 ci.Unlock() 485 return 486 } 487 488 // Remove completedRequest/completedRequestLRU entries older than server.completedTTL 489 // 490 // This gets called every ~10 minutes to clean out older entries. 491 // 492 // NOTE: We assume Server Lock is held 493 func (server *Server) trimTLLBased(ci *clientInfo, t time.Time) (numItems int) { 494 ci.Lock() 495 for e := ci.completedRequestLRU.Front(); e != nil; { 496 eTime := e.Value.(completedLRUEntry).timeCompleted.Add(server.completedLongTTL) 497 if eTime.Before(t) { 498 delete(ci.completedRequest, e.Value.(completedLRUEntry).requestID) 499 ci.stats.RmCompleted.Add(1) 500 501 eTmp := e 502 e = e.Next() 503 _ = ci.completedRequestLRU.Remove(eTmp) 504 numItems++ 505 } else { 506 // Oldest is in front so just break 507 break 508 } 509 } 510 s := ci.stats 511 logger.Infof("ID: %v largestReplySize: %v largestReplySizeMethod: %v longest RPC: %v longest RPC Method: %v", 512 ci.myUniqueID, s.largestReplySize, s.largestReplySizeMethod, s.longestRPC, s.longestRPCMethod) 513 514 ci.Unlock() 515 return 516 }