get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/test/client_cluster_test.go (about) 1 // Copyright 2013-2019 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package test 15 16 import ( 17 "fmt" 18 "math/rand" 19 "sync" 20 "sync/atomic" 21 "testing" 22 "time" 23 24 "github.com/nats-io/nats.go" 25 ) 26 27 func TestServerRestartReSliceIssue(t *testing.T) { 28 srvA, srvB, optsA, optsB := runServers(t) 29 defer srvA.Shutdown() 30 31 urlA := fmt.Sprintf("nats://%s:%d/", optsA.Host, optsA.Port) 32 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, optsB.Port) 33 34 // msg to send.. 35 msg := []byte("Hello World") 36 37 servers := []string{urlA, urlB} 38 39 opts := nats.GetDefaultOptions() 40 opts.Timeout = (5 * time.Second) 41 opts.ReconnectWait = (50 * time.Millisecond) 42 opts.MaxReconnect = 1000 43 44 numClients := 20 45 46 reconnects := int32(0) 47 reconnectsDone := make(chan bool, numClients) 48 opts.ReconnectedCB = func(nc *nats.Conn) { 49 atomic.AddInt32(&reconnects, 1) 50 reconnectsDone <- true 51 } 52 53 clients := make([]*nats.Conn, numClients) 54 55 // Create 20 random clients. 56 // Half connected to A and half to B.. 57 for i := 0; i < numClients; i++ { 58 opts.Url = servers[i%2] 59 nc, err := opts.Connect() 60 if err != nil { 61 t.Fatalf("Failed to create connection: %v\n", err) 62 } 63 clients[i] = nc 64 defer nc.Close() 65 66 // Create 10 subscriptions each.. 67 for x := 0; x < 10; x++ { 68 subject := fmt.Sprintf("foo.%d", (rand.Int()%50)+1) 69 nc.Subscribe(subject, func(m *nats.Msg) { 70 // Just eat it.. 71 }) 72 } 73 // Pick one subject to send to.. 74 subject := fmt.Sprintf("foo.%d", (rand.Int()%50)+1) 75 go func() { 76 time.Sleep(10 * time.Millisecond) 77 for i := 1; i <= 100; i++ { 78 if err := nc.Publish(subject, msg); err != nil { 79 return 80 } 81 if i%10 == 0 { 82 time.Sleep(time.Millisecond) 83 } 84 } 85 }() 86 } 87 88 // Wait for a short bit.. 89 time.Sleep(20 * time.Millisecond) 90 91 // Restart SrvB 92 srvB.Shutdown() 93 srvB = RunServer(optsB) 94 defer srvB.Shutdown() 95 96 // Check that all expected clients have reconnected 97 done := false 98 for i := 0; i < numClients/2 && !done; i++ { 99 select { 100 case <-reconnectsDone: 101 done = true 102 case <-time.After(3 * time.Second): 103 t.Fatalf("Expected %d reconnects, got %d\n", numClients/2, reconnects) 104 } 105 } 106 107 // Since srvB was restarted, its defer Shutdown() was last, so will 108 // exectue first, which would cause clients that have reconnected to 109 // it to try to reconnect (causing delays on Windows). So let's 110 // explicitly close them here. 111 // NOTE: With fix of NATS GO client (reconnect loop yields to Close()), 112 // this change would not be required, however, it still speeeds up 113 // the test, from more than 7s to less than one. 114 for i := 0; i < numClients; i++ { 115 nc := clients[i] 116 nc.Close() 117 } 118 } 119 120 // This will test queue subscriber semantics across a cluster in the presence 121 // of server restarts. 122 func TestServerRestartAndQueueSubs(t *testing.T) { 123 srvA, srvB, optsA, optsB := runServers(t) 124 defer srvA.Shutdown() 125 defer srvB.Shutdown() 126 127 urlA := fmt.Sprintf("nats://%s:%d/", optsA.Host, optsA.Port) 128 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, optsB.Port) 129 130 // Client options 131 opts := nats.GetDefaultOptions() 132 opts.Timeout = (5 * time.Second) 133 opts.ReconnectWait = (20 * time.Millisecond) 134 opts.MaxReconnect = 1000 135 opts.NoRandomize = true 136 137 // Allow us to block on a reconnect completion. 138 reconnectsDone := make(chan bool) 139 opts.ReconnectedCB = func(nc *nats.Conn) { 140 reconnectsDone <- true 141 } 142 143 // Helper to wait on a reconnect. 144 waitOnReconnect := func() { 145 t.Helper() 146 select { 147 case <-reconnectsDone: 148 case <-time.After(2 * time.Second): 149 t.Fatalf("Expected a reconnect, timedout!\n") 150 } 151 } 152 153 // Create two clients.. 154 opts.Servers = []string{urlA, urlB} 155 nc1, err := opts.Connect() 156 if err != nil { 157 t.Fatalf("Failed to create connection for nc1: %v\n", err) 158 } 159 160 opts.Servers = []string{urlB, urlA} 161 nc2, err := opts.Connect() 162 if err != nil { 163 t.Fatalf("Failed to create connection for nc2: %v\n", err) 164 } 165 166 c1, _ := nats.NewEncodedConn(nc1, "json") 167 defer c1.Close() 168 c2, _ := nats.NewEncodedConn(nc2, "json") 169 defer c2.Close() 170 171 // Flusher helper function. 172 flush := func() { 173 // Wait for processing. 174 c1.Flush() 175 c2.Flush() 176 // Wait for a short bit for cluster propagation. 177 time.Sleep(50 * time.Millisecond) 178 } 179 180 // To hold queue results. 181 results := make(map[int]int) 182 var mu sync.Mutex 183 184 // This corresponds to the subsriptions below. 185 const ExpectedMsgCount = 3 186 187 // Make sure we got what we needed, 1 msg only and all seqnos accounted for.. 188 checkResults := func(numSent int) { 189 mu.Lock() 190 defer mu.Unlock() 191 192 for i := 0; i < numSent; i++ { 193 if results[i] != ExpectedMsgCount { 194 t.Fatalf("Received incorrect number of messages, [%d] vs [%d] for seq: %d\n", results[i], ExpectedMsgCount, i) 195 } 196 } 197 198 // Auto reset results map 199 results = make(map[int]int) 200 } 201 202 subj := "foo.bar" 203 qgroup := "workers" 204 205 cb := func(seqno int) { 206 mu.Lock() 207 defer mu.Unlock() 208 results[seqno] = results[seqno] + 1 209 } 210 211 // Create queue subscribers 212 c1.QueueSubscribe(subj, qgroup, cb) 213 c2.QueueSubscribe(subj, qgroup, cb) 214 215 // Do a wildcard subscription. 216 c1.Subscribe("foo.*", cb) 217 c2.Subscribe("foo.*", cb) 218 219 // Wait for processing. 220 flush() 221 222 sendAndCheckMsgs := func(numToSend int) { 223 for i := 0; i < numToSend; i++ { 224 if i%2 == 0 { 225 c1.Publish(subj, i) 226 } else { 227 c2.Publish(subj, i) 228 } 229 } 230 // Wait for processing. 231 flush() 232 // Check Results 233 checkResults(numToSend) 234 } 235 236 //////////////////////////////////////////////////////////////////////////// 237 // Base Test 238 //////////////////////////////////////////////////////////////////////////// 239 240 // Make sure subscriptions are propagated in the cluster 241 if err := checkExpectedSubs(4, srvA, srvB); err != nil { 242 t.Fatalf("%v", err) 243 } 244 245 // Now send 10 messages, from each client.. 246 sendAndCheckMsgs(10) 247 248 //////////////////////////////////////////////////////////////////////////// 249 // Now restart SrvA and srvB, re-run test 250 //////////////////////////////////////////////////////////////////////////// 251 252 srvA.Shutdown() 253 // Wait for client on A to reconnect to B. 254 waitOnReconnect() 255 256 srvA = RunServer(optsA) 257 defer srvA.Shutdown() 258 259 srvB.Shutdown() 260 // Now both clients should reconnect to A. 261 waitOnReconnect() 262 waitOnReconnect() 263 264 srvB = RunServer(optsB) 265 defer srvB.Shutdown() 266 267 // Make sure the cluster is reformed 268 checkClusterFormed(t, srvA, srvB) 269 270 // Make sure subscriptions are propagated in the cluster 271 // Clients will be connected to srvA, so that will be 4, 272 // but srvB will only have 2 now since we coaelsce. 273 if err := checkExpectedSubs(4, srvA); err != nil { 274 t.Fatalf("%v", err) 275 } 276 if err := checkExpectedSubs(2, srvB); err != nil { 277 t.Fatalf("%v", err) 278 } 279 280 // Now send another 10 messages, from each client.. 281 sendAndCheckMsgs(10) 282 283 // Since servers are restarted after all client's close defer calls, 284 // their defer Shutdown() are last, and so will be executed first, 285 // which would cause clients to try to reconnect on exit, causing 286 // delays on Windows. So let's explicitly close them here. 287 c1.Close() 288 c2.Close() 289 } 290 291 // This will test request semantics across a route 292 func TestRequestsAcrossRoutes(t *testing.T) { 293 srvA, srvB, optsA, optsB := runServers(t) 294 defer srvA.Shutdown() 295 defer srvB.Shutdown() 296 297 urlA := fmt.Sprintf("nats://%s:%d/", optsA.Host, optsA.Port) 298 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, optsB.Port) 299 300 nc1, err := nats.Connect(urlA) 301 if err != nil { 302 t.Fatalf("Failed to create connection for nc1: %v\n", err) 303 } 304 defer nc1.Close() 305 306 nc2, err := nats.Connect(urlB) 307 if err != nil { 308 t.Fatalf("Failed to create connection for nc2: %v\n", err) 309 } 310 defer nc2.Close() 311 312 ec2, _ := nats.NewEncodedConn(nc2, nats.JSON_ENCODER) 313 314 response := []byte("I will help you") 315 316 // Connect responder to srvA 317 nc1.Subscribe("foo-req", func(m *nats.Msg) { 318 nc1.Publish(m.Reply, response) 319 }) 320 // Make sure the route and the subscription are propagated. 321 nc1.Flush() 322 323 if err := checkExpectedSubs(1, srvA, srvB); err != nil { 324 t.Fatalf(err.Error()) 325 } 326 327 var resp string 328 329 for i := 0; i < 100; i++ { 330 if err := ec2.Request("foo-req", i, &resp, 250*time.Millisecond); err != nil { 331 t.Fatalf("Received an error on Request test [%d]: %s", i, err) 332 } 333 } 334 } 335 336 // This will test request semantics across a route to queues 337 func TestRequestsAcrossRoutesToQueues(t *testing.T) { 338 srvA, srvB, optsA, optsB := runServers(t) 339 defer srvA.Shutdown() 340 defer srvB.Shutdown() 341 342 urlA := fmt.Sprintf("nats://%s:%d/", optsA.Host, optsA.Port) 343 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, optsB.Port) 344 345 nc1, err := nats.Connect(urlA) 346 if err != nil { 347 t.Fatalf("Failed to create connection for nc1: %v\n", err) 348 } 349 defer nc1.Close() 350 351 nc2, err := nats.Connect(urlB) 352 if err != nil { 353 t.Fatalf("Failed to create connection for nc2: %v\n", err) 354 } 355 defer nc2.Close() 356 357 ec1, _ := nats.NewEncodedConn(nc1, nats.JSON_ENCODER) 358 ec2, _ := nats.NewEncodedConn(nc2, nats.JSON_ENCODER) 359 360 response := []byte("I will help you") 361 362 // Connect one responder to srvA 363 nc1.QueueSubscribe("foo-req", "booboo", func(m *nats.Msg) { 364 nc1.Publish(m.Reply, response) 365 }) 366 // Make sure the route and the subscription are propagated. 367 nc1.Flush() 368 369 // Connect the other responder to srvB 370 nc2.QueueSubscribe("foo-req", "booboo", func(m *nats.Msg) { 371 nc2.Publish(m.Reply, response) 372 }) 373 374 if err := checkExpectedSubs(2, srvA, srvB); err != nil { 375 t.Fatalf(err.Error()) 376 } 377 378 var resp string 379 380 for i := 0; i < 100; i++ { 381 if err := ec2.Request("foo-req", i, &resp, 500*time.Millisecond); err != nil { 382 t.Fatalf("Received an error on Request test [%d]: %s", i, err) 383 } 384 } 385 386 for i := 0; i < 100; i++ { 387 if err := ec1.Request("foo-req", i, &resp, 500*time.Millisecond); err != nil { 388 t.Fatalf("Received an error on Request test [%d]: %s", i, err) 389 } 390 } 391 } 392 393 // This is in response to Issue #1144 394 // https://github.com/nats-io/nats-server/issues/1144 395 func TestQueueDistributionAcrossRoutes(t *testing.T) { 396 srvA, srvB, _, _ := runServers(t) 397 defer srvA.Shutdown() 398 defer srvB.Shutdown() 399 400 checkClusterFormed(t, srvA, srvB) 401 402 urlA := srvA.ClientURL() 403 urlB := srvB.ClientURL() 404 405 nc1, err := nats.Connect(urlA) 406 if err != nil { 407 t.Fatalf("Failed to create connection for nc1: %v\n", err) 408 } 409 defer nc1.Close() 410 411 nc2, err := nats.Connect(urlB) 412 if err != nil { 413 t.Fatalf("Failed to create connection for nc2: %v\n", err) 414 } 415 defer nc2.Close() 416 417 var qsubs []*nats.Subscription 418 419 // Connect queue subscriptions as mentioned in the issue. 2(A) - 6(B) - 4(A) 420 for i := 0; i < 2; i++ { 421 sub, _ := nc1.QueueSubscribeSync("foo", "bar") 422 qsubs = append(qsubs, sub) 423 } 424 nc1.Flush() 425 for i := 0; i < 6; i++ { 426 sub, _ := nc2.QueueSubscribeSync("foo", "bar") 427 qsubs = append(qsubs, sub) 428 } 429 nc2.Flush() 430 for i := 0; i < 4; i++ { 431 sub, _ := nc1.QueueSubscribeSync("foo", "bar") 432 qsubs = append(qsubs, sub) 433 } 434 nc1.Flush() 435 436 if err := checkExpectedSubs(7, srvA, srvB); err != nil { 437 t.Fatalf("%v", err) 438 } 439 440 send := 10000 441 for i := 0; i < send; i++ { 442 nc2.Publish("foo", nil) 443 } 444 nc2.Flush() 445 446 tp := func() int { 447 var total int 448 for i := 0; i < len(qsubs); i++ { 449 pending, _, _ := qsubs[i].Pending() 450 total += pending 451 } 452 return total 453 } 454 455 checkFor(t, time.Second, 10*time.Millisecond, func() error { 456 if total := tp(); total != send { 457 return fmt.Errorf("Number of total received %d", total) 458 } 459 return nil 460 }) 461 462 // The bug is essentially that when we deliver across a route, we 463 // prefer locals, but if we randomize to a block of bounce backs, then 464 // we walk to the end and find the same local for all the remote options. 465 // So what you will see in this case is a large value at #9 (2+6, next one local). 466 467 avg := send / len(qsubs) 468 for i := 0; i < len(qsubs); i++ { 469 total, _, _ := qsubs[i].Pending() 470 if total > avg+(avg*3/10) { 471 if i == 8 { 472 t.Fatalf("Qsub in 8th position gets majority of the messages (prior 6 spots) in this test") 473 } 474 t.Fatalf("Received too high, %d vs %d", total, avg) 475 } 476 } 477 }