github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/retryrpc/stress_test.go (about) 1 // Copyright (c) 2015-2021, NVIDIA CORPORATION. 2 // SPDX-License-Identifier: Apache-2.0 3 4 package retryrpc 5 6 import ( 7 "fmt" 8 "math/rand" 9 "sync" 10 "testing" 11 "time" 12 13 /* DEBUG for pprof 14 _ "net/http/pprof" 15 */ 16 17 "github.com/stretchr/testify/assert" 18 "github.com/swiftstack/ProxyFS/retryrpc/rpctest" 19 ) 20 21 func TestStress(t *testing.T) { 22 23 /* 24 * DEBUG - used to debug memory leaks 25 * Run " go tool pprof http://localhost:12123/debug/pprof/heap" 26 * to look at memory inuse 27 // Start the ws that listens for pprof requests 28 go http.ListenAndServe("localhost:12123", nil) 29 */ 30 31 testLoop(t) 32 testLoopClientAckTrim(t) 33 testLoopTTLTrim(t) 34 testSendLargeRPC(t) 35 } 36 37 func testLoop(t *testing.T) { 38 var ( 39 agentCount = 15 40 sendCount = 250 41 ) 42 assert := assert.New(t) 43 zero := 0 44 assert.Equal(0, zero) 45 46 // Create new rpctest server - needed for calling 47 // RPCs 48 myJrpcfs := rpctest.NewServer() 49 50 rrSvr, ipAddr, port := getNewServer(65*time.Second, false) 51 assert.NotNil(rrSvr) 52 53 // Register the Server - sets up the methods supported by the 54 // server 55 err := rrSvr.Register(myJrpcfs) 56 assert.Nil(err) 57 58 // Start listening for requests on the ipaddr/port 59 startErr := rrSvr.Start() 60 assert.Nil(startErr, "startErr is not nil") 61 62 // Tell server to start accepting and processing requests 63 rrSvr.Run() 64 65 // Start up the agents 66 parallelAgentSenders(t, rrSvr, ipAddr, port, agentCount, "RpcPing", sendCount, rrSvr.Creds.RootCAx509CertificatePEM) 67 68 rrSvr.Close() 69 } 70 71 // testLoopClientAckTrim tests that we are correctly trimming messages 72 // based on the shorter term trimmer. The shorter term trimmer relies 73 // on the client code saying "this is the highest consecutive sqn we have 74 // seen". Then the server can throw away messages up to and including the 75 // highest consecutive sqn. 76 func testLoopClientAckTrim(t *testing.T) { 77 var ( 78 agentCount = 15 79 sendCount = 250 80 ) 81 assert := assert.New(t) 82 zero := 0 83 assert.Equal(0, zero) 84 85 // Create new rpctest server - needed for calling 86 // RPCs 87 myJrpcfs := rpctest.NewServer() 88 89 whenTTL := 10 * time.Millisecond 90 rrSvr, ipAddr, port := getNewServer(whenTTL, true) 91 assert.NotNil(rrSvr) 92 93 // Register the Server - sets up the methods supported by the 94 // server 95 err := rrSvr.Register(myJrpcfs) 96 assert.Nil(err) 97 98 // Start listening for requests on the ipaddr/port 99 startErr := rrSvr.Start() 100 assert.Nil(startErr, "startErr is not nil") 101 102 // Tell server to start accepting and processing requests 103 rrSvr.Run() 104 105 // Start up the agents 106 parallelAgentSenders(t, rrSvr, ipAddr, port, agentCount, "RpcPing", sendCount, rrSvr.Creds.RootCAx509CertificatePEM) 107 108 // Now for both trimmers to run 109 tm := time.Now() 110 111 // First the 100ms trimmer - this will leave 1 entry on completed request queue 112 // for each agent since there is no remaining client request to say it is completed. 113 // 114 // We need the TTL timer to clean up the last entry 115 rrSvr.trimCompleted(tm, false) 116 assert.Equal(agentCount, cntNotTrimmed(rrSvr), "Should have agentCount messages remaining") 117 118 // Make sure the queue messages will be old enough to be trimmed 119 time.Sleep(whenTTL) 120 121 // Now the TTL timer to cleanup the last 122 tmTTL := time.Now() 123 rrSvr.trimCompleted(tmTTL, true) 124 125 // All messages should be trimmed at this point 126 assert.Equal(0, cntNotTrimmed(rrSvr), "Still have incomplete messages") 127 128 /* 129 * DEBUG - allows user to use pprof to check for memory leaks 130 // The caller of this test will block and we can check for memory leaks with pprof 131 fmt.Printf("\n=========== SLEEP 5 minutes ===================\n") 132 time.Sleep(5 * time.Minute) 133 */ 134 135 rrSvr.Close() 136 } 137 138 func testLoopTTLTrim(t *testing.T) { 139 var ( 140 agentCount = 15 141 sendCount = 250 142 ) 143 assert := assert.New(t) 144 zero := 0 145 assert.Equal(0, zero) 146 147 // Create new rpctest server - needed for calling 148 // RPCs 149 myJrpcfs := rpctest.NewServer() 150 151 whenTTL := 10 * time.Millisecond 152 rrSvr, ipAddr, port := getNewServer(whenTTL, true) 153 assert.NotNil(rrSvr) 154 155 // Register the Server - sets up the methods supported by the 156 // server 157 err := rrSvr.Register(myJrpcfs) 158 assert.Nil(err) 159 160 // Start listening for requests on the ipaddr/port 161 startErr := rrSvr.Start() 162 assert.Nil(startErr, "startErr is not nil") 163 164 // Tell server to start accepting and processing requests 165 rrSvr.Run() 166 167 // Start up the agents 168 parallelAgentSenders(t, rrSvr, ipAddr, port, agentCount, "RpcPing", sendCount, rrSvr.Creds.RootCAx509CertificatePEM) 169 170 // Use the TTL trimmer to remove all messages after guaranteeing we are 171 // past time when they should be removed 172 time.Sleep(whenTTL) 173 tmTTL := time.Now() 174 rrSvr.trimCompleted(tmTTL, true) 175 176 assert.Equal(0, cntNotTrimmed(rrSvr), "Still have incomplete messages") 177 178 /* 179 * DEBUG - all time for pprof tool to be used for tracking down memory leaks 180 // The caller of this test will block and we can check for memory leaks with pprof 181 fmt.Printf("\n=========== SLEEP 5 minutes ===================\n") 182 time.Sleep(5 * time.Minute) 183 */ 184 185 rrSvr.Close() 186 } 187 188 func testSendLargeRPC(t *testing.T) { 189 var ( 190 agentCount = 15 191 sendCount = 250 192 ) 193 assert := assert.New(t) 194 zero := 0 195 assert.Equal(0, zero) 196 197 // Create new rpctest server - needed for calling 198 // RPCs 199 myJrpcfs := rpctest.NewServer() 200 201 whenTTL := 10 * time.Millisecond 202 rrSvr, ipAddr, port := getNewServer(whenTTL, true) 203 assert.NotNil(rrSvr) 204 205 // Register the Server - sets up the methods supported by the 206 // server 207 err := rrSvr.Register(myJrpcfs) 208 assert.Nil(err) 209 210 // Start listening for requests on the ipaddr/port 211 startErr := rrSvr.Start() 212 assert.Nil(startErr, "startErr is not nil") 213 214 // Tell server to start accepting and processing requests 215 rrSvr.Run() 216 217 // Start up the agents 218 parallelAgentSenders(t, rrSvr, ipAddr, port, agentCount, "RpcPingLarge", sendCount, rrSvr.Creds.RootCAx509CertificatePEM) 219 220 // Now for both trimmers to run 221 tm := time.Now() 222 223 // First the 100ms trimmer - this will leave 1 entry on completed request queue 224 // for each agent since there is no remaining client request to say it is completed. 225 // 226 // We need the TTL timer to clean up the last entry 227 rrSvr.trimCompleted(tm, false) 228 assert.Equal(agentCount, cntNotTrimmed(rrSvr), "Should have agentCount messages remaining") 229 230 // Make sure the queue messages will be old enough to be trimmed 231 time.Sleep(whenTTL) 232 233 // Now the TTL timer to cleanup the last 234 tmTTL := time.Now() 235 rrSvr.trimCompleted(tmTTL, true) 236 237 /* 238 * DEBUG - sleep for a time for pprof tool to be used for tracking down memory leaks 239 // The caller of this test will block and we can check for memory leaks with pprof 240 fmt.Printf("\n=========== SLEEP 5 minutes ===================\n") 241 time.Sleep(5 * time.Minute) 242 */ 243 244 // All messages should be trimmed at this point 245 assert.Equal(0, cntNotTrimmed(rrSvr), "Still have incomplete messages") 246 247 rrSvr.Close() 248 } 249 250 // testLoopClientAckTrim tests that we are correctly trimming messages 251 252 func cntNotTrimmed(server *Server) (numItems int) { 253 server.Lock() 254 for _, ci := range server.perClientInfo { 255 ci.Lock() 256 if len(ci.completedRequest) != 0 { 257 numItems += len(ci.completedRequest) 258 } else { 259 if ci.completedRequestLRU.Len() != 0 { 260 numItems += ci.completedRequestLRU.Len() 261 } 262 } 263 ci.Unlock() 264 } 265 server.Unlock() 266 267 return 268 } 269 270 func ping(t *testing.T, client *Client, i int, agentID uint64, assert *assert.Assertions) { 271 // Send a ping RPC and print the results 272 msg := fmt.Sprintf("Ping Me - %v", i) 273 pingRequest := &rpctest.PingReq{Message: msg} 274 pingReply := &rpctest.PingReply{} 275 expectedReply := fmt.Sprintf("pong %d bytes", len(msg)) 276 err := client.Send("RpcPing", pingRequest, pingReply) 277 assert.Nil(err, "client.Send() returned an error") 278 if expectedReply != pingReply.Message { 279 fmt.Printf(" client - AGENTID: %v\n", agentID) 280 fmt.Printf(" client.Send(RpcPing) reply '%+v'\n", pingReply) 281 fmt.Printf(" client.Send(RpcPing) expected '%s' but received '%s'\n", expectedReply, pingReply.Message) 282 fmt.Printf(" client.Send(RpcPing) SENT: msg '%v' but received '%s'\n", msg, pingReply.Message) 283 fmt.Printf(" client.Send(RpcPing) len(pingRequest.Message): '%d' i: %v\n", len(pingRequest.Message), i) 284 } 285 assert.Equal(expectedReply, pingReply.Message, "Received different output then expected") 286 } 287 288 // pingLarge responds to the RPC with a large packet 289 func pingLarge(t *testing.T, client *Client, i int, agentID uint64, assert *assert.Assertions) { 290 // Send a ping RPC and print the results 291 msg := fmt.Sprintf("Ping Me - %v", i) 292 pingRequest := &rpctest.PingReq{Message: msg} 293 pingReply := &rpctest.PingReply{} 294 err := client.Send("RpcPingLarge", pingRequest, pingReply) 295 assert.Nil(err, "client.Send() returned an error") 296 } 297 298 func sendIt(t *testing.T, client *Client, z int, sendCnt int, sendWg *sync.WaitGroup, prevWg *sync.WaitGroup, agentID uint64, method string, i int) { 299 300 assert := assert.New(t) 301 defer sendWg.Done() 302 303 switch method { 304 case "RpcPing": 305 ping(t, client, z, agentID, assert) 306 break 307 case "RpcPingLarge": 308 pingLarge(t, client, z, agentID, assert) 309 break 310 } 311 312 // The last send is blocked until the previous send has completed. This 313 // is how we test the short trimmer. 314 if i <= (sendCnt - 2) { 315 prevWg.Done() 316 } 317 } 318 319 type stressMyClient struct { 320 sync.Mutex 321 cond *sync.Cond // Signal that received Interrupt() callback 322 sawCallback bool // True if Interrupt() was called 323 interruptCnt int // Count of Interrupt() calls received (best effort) 324 } 325 326 func (cb *stressMyClient) Interrupt(payload []byte) { 327 cb.Lock() 328 cb.sawCallback = true 329 cb.interruptCnt++ 330 cb.cond.Broadcast() 331 cb.Unlock() 332 return 333 } 334 335 // Represents a pfsagent - sepearate client 336 func pfsagent(t *testing.T, rrSvr *Server, ipAddr string, port int, agentID uint64, method string, 337 agentWg *sync.WaitGroup, sendCnt int, rootCAx509CertificatePEM []byte) { 338 defer agentWg.Done() 339 340 cb := &stressMyClient{} 341 cb.cond = sync.NewCond(&cb.Mutex) 342 clientID := fmt.Sprintf("client - %v", agentID) 343 clientConfig := &ClientConfig{MyUniqueID: clientID, IPAddr: ipAddr, Port: port, 344 RootCAx509CertificatePEM: rootCAx509CertificatePEM, Callbacks: cb, DeadlineIO: 5 * time.Second} 345 client, err := NewClient(clientConfig) 346 if err != nil { 347 fmt.Printf("Dial() failed with err: %v\n", err) 348 return 349 } 350 defer client.Close() 351 352 // WG to verify all messages sent 353 var sendWg sync.WaitGroup 354 355 // WG to verify all but the last send() has been sent and 356 // received. This is needed to test the consecutive sequence 357 // trimmer is working. 358 var prevWg sync.WaitGroup 359 360 var z, r int 361 var msg1 []byte = []byte("server msg back to client") 362 for i := 0; i < sendCnt; i++ { 363 364 z = (z + i) * 10 365 366 if i == (sendCnt - 1) { 367 // Give server time to process messages. This last 368 // call gets us closer to highestConsecutive set to sendCnt - 1. 369 prevWg.Wait() 370 371 // The highest consecutive number is updated in the background with 372 // a goroutine when send() returns. 373 // 374 // Therefore, we loop waiting for it to hit (sendCnt - 1) 375 for { 376 var currentHighest requestID 377 client.Lock() 378 currentHighest = client.highestConsecutive 379 client.Unlock() 380 381 if int(currentHighest) == (sendCnt - 1) { 382 break 383 } 384 time.Sleep(10 * time.Millisecond) 385 } 386 } else { 387 prevWg.Add(1) 388 } 389 390 sendWg.Add(1) 391 go func(z int, i int) { 392 sendIt(t, client, z, sendCnt, &sendWg, &prevWg, agentID, method, i) 393 rrSvr.SendCallback(clientID, msg1) 394 }(z, i) 395 396 // Occasionally drop the connection to the server to 397 // simulate retransmits 398 r = i % 10 399 if r == 0 && (i != 0) { 400 rrSvr.CloseClientConn() 401 } 402 } 403 sendWg.Wait() 404 } 405 406 // Start a bunch of "pfsagents" in parallel 407 func parallelAgentSenders(t *testing.T, rrSrv *Server, ipAddr string, port int, agentCnt int, 408 method string, sendCnt int, rootCAx509CertificatePEM []byte) { 409 410 var agentWg sync.WaitGroup 411 412 // Figure out random seed for runs 413 r := rand.New(rand.NewSource(99)) 414 clientSeed := r.Uint64() 415 416 // Start parallel pfsagents - each agent doing sendCnt parallel sends 417 var agentID uint64 418 for i := 0; i < agentCnt; i++ { 419 agentID = clientSeed + uint64(i) 420 421 agentWg.Add(1) 422 go pfsagent(t, rrSrv, ipAddr, port, agentID, method, &agentWg, sendCnt, rootCAx509CertificatePEM) 423 } 424 agentWg.Wait() 425 }