get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/test/norace_test.go (about) 1 // Copyright 2019-2020 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 //go:build !race && !skip_no_race_tests 15 // +build !race,!skip_no_race_tests 16 17 package test 18 19 import ( 20 "context" 21 crand "crypto/rand" 22 "encoding/json" 23 "fmt" 24 "net" 25 "net/url" 26 "os" 27 "regexp" 28 "runtime" 29 "strconv" 30 "sync" 31 "testing" 32 "time" 33 34 "get.pme.sh/pnats/server" 35 "github.com/nats-io/nats.go" 36 "github.com/nats-io/nuid" 37 ) 38 39 // IMPORTANT: Tests in this file are not executed when running with the -race flag. 40 // The test name should be prefixed with TestNoRace so we can run only 41 // those tests: go test -run=TestNoRace ... 42 43 func TestNoRaceRouteSendSubs(t *testing.T) { 44 template := ` 45 port: -1 46 write_deadline: "2s" 47 cluster { 48 port: -1 49 pool_size: -1 50 compression: disabled 51 %s 52 } 53 no_sys_acc: true 54 ` 55 cfa := createConfFile(t, []byte(fmt.Sprintf(template, ""))) 56 srvA, optsA := RunServerWithConfig(cfa) 57 srvA.Shutdown() 58 optsA.DisableShortFirstPing = true 59 srvA = RunServer(optsA) 60 defer srvA.Shutdown() 61 62 cfb := createConfFile(t, []byte(fmt.Sprintf(template, ""))) 63 srvB, optsB := RunServerWithConfig(cfb) 64 srvB.Shutdown() 65 optsB.DisableShortFirstPing = true 66 srvB = RunServer(optsB) 67 defer srvB.Shutdown() 68 69 clientA := createClientConn(t, optsA.Host, optsA.Port) 70 defer clientA.Close() 71 72 clientASend, clientAExpect := setupConn(t, clientA) 73 clientASend("PING\r\n") 74 clientAExpect(pongRe) 75 76 clientB := createClientConn(t, optsB.Host, optsB.Port) 77 defer clientB.Close() 78 79 clientBSend, clientBExpect := setupConn(t, clientB) 80 clientBSend("PING\r\n") 81 clientBExpect(pongRe) 82 83 // total number of subscriptions per server 84 totalPerServer := 100000 85 for i := 0; i < totalPerServer/2; i++ { 86 proto := fmt.Sprintf("SUB foo.%d %d\r\n", i, i*2+1) 87 clientASend(proto) 88 clientBSend(proto) 89 proto = fmt.Sprintf("SUB bar.%d queue.%d %d\r\n", i, i, i*2+2) 90 clientASend(proto) 91 clientBSend(proto) 92 } 93 clientASend("PING\r\n") 94 clientAExpect(pongRe) 95 clientBSend("PING\r\n") 96 clientBExpect(pongRe) 97 98 if err := checkExpectedSubs(totalPerServer, srvA, srvB); err != nil { 99 t.Fatalf(err.Error()) 100 } 101 102 routes := fmt.Sprintf(` 103 routes: [ 104 "nats://%s:%d" 105 ] 106 `, optsA.Cluster.Host, optsA.Cluster.Port) 107 if err := os.WriteFile(cfb, []byte(fmt.Sprintf(template, routes)), 0600); err != nil { 108 t.Fatalf("Error rewriting B's config file: %v", err) 109 } 110 if err := srvB.Reload(); err != nil { 111 t.Fatalf("Error on reload: %v", err) 112 } 113 114 checkClusterFormed(t, srvA, srvB) 115 if err := checkExpectedSubs(2*totalPerServer, srvA, srvB); err != nil { 116 t.Fatalf(err.Error()) 117 } 118 119 checkSlowConsumers := func(t *testing.T) { 120 t.Helper() 121 if srvB.NumSlowConsumers() != 0 || srvA.NumSlowConsumers() != 0 { 122 t.Fatalf("Expected no slow consumers, got %d for srvA and %df for srvB", 123 srvA.NumSlowConsumers(), srvB.NumSlowConsumers()) 124 } 125 } 126 checkSlowConsumers(t) 127 128 type sender struct { 129 c net.Conn 130 sf sendFun 131 ef expectFun 132 } 133 var senders []*sender 134 createSenders := func(t *testing.T, host string, port int) { 135 t.Helper() 136 for i := 0; i < 25; i++ { 137 s := &sender{} 138 s.c = createClientConn(t, host, port) 139 s.sf, s.ef = setupConn(t, s.c) 140 s.sf("PING\r\n") 141 s.ef(pongRe) 142 senders = append(senders, s) 143 } 144 } 145 createSenders(t, optsA.Host, optsA.Port) 146 createSenders(t, optsB.Host, optsB.Port) 147 for _, s := range senders { 148 defer s.c.Close() 149 } 150 151 // Now create SUBs on A and B for "ping.replies" and simulate 152 // that there are thousands of replies being sent on 153 // both sides. 154 createSubOnReplies := func(t *testing.T, host string, port int) net.Conn { 155 t.Helper() 156 c := createClientConn(t, host, port) 157 send, expect := setupConn(t, c) 158 send("SUB ping.replies 123456789\r\nPING\r\n") 159 expect(pongRe) 160 return c 161 } 162 requestorOnA := createSubOnReplies(t, optsA.Host, optsA.Port) 163 defer requestorOnA.Close() 164 165 requestorOnB := createSubOnReplies(t, optsB.Host, optsB.Port) 166 defer requestorOnB.Close() 167 168 if err := checkExpectedSubs(2*totalPerServer+2, srvA, srvB); err != nil { 169 t.Fatalf(err.Error()) 170 } 171 172 totalReplies := 120000 173 payload := sizedBytes(400) 174 expectedBytes := (len(fmt.Sprintf("MSG ping.replies 123456789 %d\r\n\r\n", len(payload))) + len(payload)) * totalReplies 175 ch := make(chan error, 2) 176 recvReplies := func(c net.Conn) { 177 var buf [32 * 1024]byte 178 179 for total := 0; total < expectedBytes; { 180 n, err := c.Read(buf[:]) 181 if err != nil { 182 ch <- fmt.Errorf("read error: %v", err) 183 return 184 } 185 total += n 186 } 187 ch <- nil 188 } 189 go recvReplies(requestorOnA) 190 go recvReplies(requestorOnB) 191 192 wg := sync.WaitGroup{} 193 wg.Add(len(senders)) 194 replyMsg := fmt.Sprintf("PUB ping.replies %d\r\n%s\r\n", len(payload), payload) 195 for _, s := range senders { 196 go func(s *sender, count int) { 197 defer wg.Done() 198 for i := 0; i < count; i++ { 199 s.sf(replyMsg) 200 } 201 s.sf("PING\r\n") 202 s.ef(pongRe) 203 }(s, totalReplies/len(senders)) 204 } 205 206 for i := 0; i < 2; i++ { 207 select { 208 case e := <-ch: 209 if e != nil { 210 t.Fatalf("Error: %v", e) 211 } 212 case <-time.After(10 * time.Second): 213 t.Fatalf("Did not receive all %v replies", totalReplies) 214 } 215 } 216 checkSlowConsumers(t) 217 wg.Wait() 218 checkSlowConsumers(t) 219 220 // Let's remove the route and do a config reload. 221 // Otherwise, on test shutdown the client close 222 // will cause the server to try to send unsubs and 223 // this can delay the test. 224 if err := os.WriteFile(cfb, []byte(fmt.Sprintf(template, "")), 0600); err != nil { 225 t.Fatalf("Error rewriting B's config file: %v", err) 226 } 227 if err := srvB.Reload(); err != nil { 228 t.Fatalf("Error on reload: %v", err) 229 } 230 } 231 232 func TestNoRaceDynamicResponsePermsMemory(t *testing.T) { 233 srv, opts := RunServerWithConfig("./configs/authorization.conf") 234 defer srv.Shutdown() 235 236 // We will test the timeout to make sure that we are not showing excessive growth 237 // when a reply subject is not utilized by the responder. 238 239 // Alice can do anything, so she will be our requestor 240 rc := createClientConn(t, opts.Host, opts.Port) 241 defer rc.Close() 242 expectAuthRequired(t, rc) 243 doAuthConnect(t, rc, "", "alice", DefaultPass) 244 expectResult(t, rc, okRe) 245 246 // MY_STREAM_SERVICE has an expiration of 10ms for the response permissions. 247 c := createClientConn(t, opts.Host, opts.Port) 248 defer c.Close() 249 expectAuthRequired(t, c) 250 doAuthConnect(t, c, "", "svcb", DefaultPass) 251 expectResult(t, c, okRe) 252 253 sendProto(t, c, "SUB my.service.req 1\r\n") 254 expectResult(t, c, okRe) 255 256 var m runtime.MemStats 257 258 runtime.GC() 259 runtime.ReadMemStats(&m) 260 pta := m.TotalAlloc 261 262 // Need this so we do not blow the allocs on expectResult which makes 32k each time. 263 expBuf := make([]byte, 32768) 264 expect := func(c net.Conn, re *regexp.Regexp) { 265 t.Helper() 266 c.SetReadDeadline(time.Now().Add(2 * time.Second)) 267 n, _ := c.Read(expBuf) 268 c.SetReadDeadline(time.Time{}) 269 buf := expBuf[:n] 270 if !re.Match(buf) { 271 t.Fatalf("Response did not match expected: \n\tReceived:'%q'\n\tExpected:'%s'", buf, re) 272 } 273 } 274 275 // Now send off some requests. We will not answer them and this will build up reply 276 // permissions in the server. 277 for i := 0; i < 10000; i++ { 278 pub := fmt.Sprintf("PUB my.service.req resp.%d 2\r\nok\r\n", i) 279 sendProto(t, rc, pub) 280 expect(rc, okRe) 281 expect(c, msgRe) 282 } 283 284 const max = 20 * 1024 * 1024 // 20MB 285 checkFor(t, time.Second, 25*time.Millisecond, func() error { 286 runtime.GC() 287 runtime.ReadMemStats(&m) 288 used := m.TotalAlloc - pta 289 if used > max { 290 return fmt.Errorf("Using too much memory, expect < 20MB, got %dMB", used/(1024*1024)) 291 } 292 return nil 293 }) 294 } 295 296 func TestNoRaceLargeClusterMem(t *testing.T) { 297 // Try to clean up. 298 runtime.GC() 299 var m runtime.MemStats 300 runtime.ReadMemStats(&m) 301 pta := m.TotalAlloc 302 303 opts := func() *server.Options { 304 o := DefaultTestOptions 305 o.Host = "127.0.0.1" 306 o.Port = -1 307 o.Cluster.Host = o.Host 308 o.Cluster.Port = -1 309 return &o 310 } 311 312 var servers []*server.Server 313 314 // Create seed first. 315 o := opts() 316 s := RunServer(o) 317 servers = append(servers, s) 318 319 // For connecting to seed server above. 320 routeAddr := fmt.Sprintf("nats-route://%s:%d", o.Cluster.Host, o.Cluster.Port) 321 rurl, _ := url.Parse(routeAddr) 322 routes := []*url.URL{rurl} 323 324 numServers := 15 325 326 for i := 1; i < numServers; i++ { 327 o := opts() 328 o.Routes = routes 329 s := RunServer(o) 330 servers = append(servers, s) 331 } 332 checkClusterFormed(t, servers...) 333 334 // Calculate in MB what we are using now. 335 const max = 80 * 1024 * 1024 // 80MB 336 runtime.ReadMemStats(&m) 337 used := m.TotalAlloc - pta 338 if used > max { 339 t.Fatalf("Cluster using too much memory, expect < 80MB, got %dMB", used/(1024*1024)) 340 } 341 342 for _, s := range servers { 343 s.Shutdown() 344 } 345 } 346 347 // Make sure we have the correct remote state when dealing with queue subscribers 348 // across many client connections. 349 func TestNoRaceQueueSubWeightOrderMultipleConnections(t *testing.T) { 350 opts, err := server.ProcessConfigFile("./configs/new_cluster.conf") 351 if err != nil { 352 t.Fatalf("Error processing config file: %v", err) 353 } 354 opts.DisableShortFirstPing = true 355 s := RunServer(opts) 356 defer s.Shutdown() 357 358 // Create 100 connections to s 359 url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port) 360 clients := make([]*nats.Conn, 0, 100) 361 for i := 0; i < 100; i++ { 362 nc, err := nats.Connect(url, nats.NoReconnect()) 363 if err != nil { 364 t.Fatalf("Error connecting: %v", err) 365 } 366 defer nc.Close() 367 clients = append(clients, nc) 368 } 369 370 rc := createRouteConn(t, opts.Cluster.Host, opts.Cluster.Port) 371 defer rc.Close() 372 373 routeID := "RTEST_NEW:22" 374 routeSend, routeExpect := setupRouteEx(t, rc, opts, routeID) 375 376 info := checkInfoMsg(t, rc) 377 378 info.ID = routeID 379 info.Name = routeID 380 381 b, err := json.Marshal(info) 382 if err != nil { 383 t.Fatalf("Could not marshal test route info: %v", err) 384 } 385 // Send our INFO and wait for a PONG. This will prevent a race 386 // where server will have started processing queue subscriptions 387 // and then processes the route's INFO (sending its current subs) 388 // followed by updates. 389 routeSend(fmt.Sprintf("INFO %s\r\nPING\r\n", b)) 390 routeExpect(pongRe) 391 392 start := make(chan bool) 393 for _, nc := range clients { 394 go func(nc *nats.Conn) { 395 <-start 396 // Now create 100 identical queue subscribers on each connection. 397 for i := 0; i < 100; i++ { 398 if _, err := nc.QueueSubscribe("foo", "bar", func(_ *nats.Msg) {}); err != nil { 399 return 400 } 401 } 402 nc.Flush() 403 }(nc) 404 } 405 close(start) 406 407 // We did have this where we wanted to get every update, but now with optimizations 408 // we just want to make sure we always are increasing and that a previous update to 409 // a lesser queue weight is never delivered for this test. 410 maxExpected := 10000 411 updates := 0 412 for qw := 0; qw < maxExpected; { 413 buf := routeExpect(rsubRe) 414 matches := rsubRe.FindAllSubmatch(buf, -1) 415 for _, m := range matches { 416 if len(m) != 5 { 417 t.Fatalf("Expected a weight for the queue group") 418 } 419 nqw, err := strconv.Atoi(string(m[4])) 420 if err != nil { 421 t.Fatalf("Got an error converting queue weight: %v", err) 422 } 423 // Make sure the new value only increases, ok to skip since we will 424 // optimize this now, but needs to always be increasing. 425 if nqw <= qw { 426 t.Fatalf("Was expecting increasing queue weight after %d, got %d", qw, nqw) 427 } 428 qw = nqw 429 updates++ 430 } 431 } 432 if updates >= maxExpected { 433 t.Fatalf("Was not expecting all %v updates to be received", maxExpected) 434 } 435 } 436 437 func TestNoRaceClusterLeaksSubscriptions(t *testing.T) { 438 srvA, srvB, optsA, optsB := runServers(t) 439 defer srvA.Shutdown() 440 defer srvB.Shutdown() 441 442 checkClusterFormed(t, srvA, srvB) 443 444 urlA := fmt.Sprintf("nats://%s:%d/", optsA.Host, optsA.Port) 445 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, optsB.Port) 446 447 numResponses := 100 448 repliers := make([]*nats.Conn, 0, numResponses) 449 450 var noOpErrHandler = func(_ *nats.Conn, _ *nats.Subscription, _ error) {} 451 452 // Create 100 repliers 453 for i := 0; i < 50; i++ { 454 nc1, _ := nats.Connect(urlA) 455 defer nc1.Close() 456 nc1.SetErrorHandler(noOpErrHandler) 457 nc2, _ := nats.Connect(urlB) 458 defer nc2.Close() 459 nc2.SetErrorHandler(noOpErrHandler) 460 repliers = append(repliers, nc1, nc2) 461 nc1.Subscribe("test.reply", func(m *nats.Msg) { 462 m.Respond([]byte("{\"sender\": 22 }")) 463 }) 464 nc2.Subscribe("test.reply", func(m *nats.Msg) { 465 m.Respond([]byte("{\"sender\": 33 }")) 466 }) 467 nc1.Flush() 468 nc2.Flush() 469 } 470 471 servers := fmt.Sprintf("%s, %s", urlA, urlB) 472 req := sizedBytes(8 * 1024) 473 474 // Now run a requestor in a loop, creating and tearing down each time to 475 // simulate running a modified nats-req. 476 doReq := func() { 477 msgs := make(chan *nats.Msg, 1) 478 inbox := nats.NewInbox() 479 grp := nuid.Next() 480 // Create 8 queue Subscribers for responses. 481 for i := 0; i < 8; i++ { 482 nc, _ := nats.Connect(servers) 483 nc.SetErrorHandler(noOpErrHandler) 484 nc.ChanQueueSubscribe(inbox, grp, msgs) 485 nc.Flush() 486 defer nc.Close() 487 } 488 nc, _ := nats.Connect(servers) 489 nc.SetErrorHandler(noOpErrHandler) 490 nc.PublishRequest("test.reply", inbox, req) 491 defer nc.Close() 492 493 ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) 494 defer cancel() 495 496 var received int 497 for { 498 select { 499 case <-msgs: 500 received++ 501 if received >= numResponses { 502 return 503 } 504 case <-ctx.Done(): 505 return 506 } 507 } 508 } 509 510 var wg sync.WaitGroup 511 512 doRequests := func(n int) { 513 for i := 0; i < n; i++ { 514 doReq() 515 } 516 wg.Done() 517 } 518 519 concurrent := 10 520 wg.Add(concurrent) 521 for i := 0; i < concurrent; i++ { 522 go doRequests(10) 523 } 524 wg.Wait() 525 526 // Close responders too, should have zero(0) subs attached to routes. 527 for _, nc := range repliers { 528 nc.Close() 529 } 530 531 // Make sure no clients remain. This is to make sure the test is correct and that 532 // we have closed all the client connections. 533 checkFor(t, time.Second, 10*time.Millisecond, func() error { 534 v1, _ := srvA.Varz(nil) 535 v2, _ := srvB.Varz(nil) 536 if v1.Connections != 0 || v2.Connections != 0 { 537 return fmt.Errorf("We have lingering client connections %d:%d", v1.Connections, v2.Connections) 538 } 539 return nil 540 }) 541 542 loadRoutez := func() (*server.Routez, *server.Routez) { 543 v1, err := srvA.Routez(&server.RoutezOptions{Subscriptions: true}) 544 if err != nil { 545 t.Fatalf("Error getting Routez: %v", err) 546 } 547 v2, err := srvB.Routez(&server.RoutezOptions{Subscriptions: true}) 548 if err != nil { 549 t.Fatalf("Error getting Routez: %v", err) 550 } 551 return v1, v2 552 } 553 554 checkFor(t, time.Second, 10*time.Millisecond, func() error { 555 r1, r2 := loadRoutez() 556 if r1.Routes[0].NumSubs != 0 { 557 return fmt.Errorf("Leaked %d subs: %+v", r1.Routes[0].NumSubs, r1.Routes[0].Subs) 558 } 559 if r2.Routes[0].NumSubs != 0 { 560 return fmt.Errorf("Leaked %d subs: %+v", r2.Routes[0].NumSubs, r2.Routes[0].Subs) 561 } 562 return nil 563 }) 564 } 565 566 func TestNoRaceLeafNodeSmapUpdate(t *testing.T) { 567 s, opts := runLeafServer() 568 defer s.Shutdown() 569 570 // Create a client on leaf server 571 c := createClientConn(t, opts.Host, opts.Port) 572 defer c.Close() 573 csend, cexpect := setupConn(t, c) 574 575 numSubs := make(chan int, 1) 576 doneCh := make(chan struct{}, 1) 577 wg := sync.WaitGroup{} 578 wg.Add(1) 579 580 go func() { 581 defer wg.Done() 582 for i := 1; ; i++ { 583 csend(fmt.Sprintf("SUB foo.%d %d\r\n", i, i)) 584 select { 585 case <-doneCh: 586 numSubs <- i 587 return 588 default: 589 } 590 } 591 }() 592 593 time.Sleep(5 * time.Millisecond) 594 // Create leaf node 595 lc := createLeafConn(t, opts.LeafNode.Host, opts.LeafNode.Port) 596 defer lc.Close() 597 598 setupConn(t, lc) 599 checkLeafNodeConnected(t, s) 600 601 close(doneCh) 602 ns := <-numSubs 603 csend("PING\r\n") 604 cexpect(pongRe) 605 wg.Wait() 606 607 // Make sure we receive as many LS+ protocols (since all subs are unique). 608 // But we also have to count for LDS subject. 609 // There may be so many protocols and partials, that expectNumberOfProtos may 610 // not work. Do a manual search here. 611 checkLS := func(proto string, expected int) { 612 t.Helper() 613 p := []byte(proto) 614 cur := 0 615 buf := make([]byte, 32768) 616 for ls := 0; ls < expected; { 617 lc.SetReadDeadline(time.Now().Add(2 * time.Second)) 618 n, err := lc.Read(buf) 619 lc.SetReadDeadline(time.Time{}) 620 if err == nil && n > 0 { 621 for i := 0; i < n; i++ { 622 if buf[i] == p[cur] { 623 cur++ 624 if cur == len(p) { 625 ls++ 626 cur = 0 627 } 628 } else { 629 cur = 0 630 } 631 } 632 } 633 if err != nil || ls > expected { 634 t.Fatalf("Expected %v %sgot %v, err: %v", expected, proto, ls, err) 635 } 636 } 637 } 638 checkLS("LS+ ", ns+1) 639 640 // Now unsub all those subs... 641 for i := 1; i <= ns; i++ { 642 csend(fmt.Sprintf("UNSUB %d\r\n", i)) 643 } 644 csend("PING\r\n") 645 cexpect(pongRe) 646 647 // Expect that many LS- 648 checkLS("LS- ", ns) 649 } 650 651 func TestNoRaceSlowProxy(t *testing.T) { 652 t.Skip() 653 654 opts := DefaultTestOptions 655 opts.Port = -1 656 s := RunServer(&opts) 657 defer s.Shutdown() 658 659 rttTarget := 22 * time.Millisecond 660 bwTarget := 10 * 1024 * 1024 / 8 // 10mbit 661 662 sp := newSlowProxy(rttTarget, bwTarget, bwTarget, &opts) 663 defer sp.stop() 664 665 nc, err := nats.Connect(sp.clientURL()) 666 if err != nil { 667 t.Fatalf("Unexpected error: %v", err) 668 } 669 defer nc.Close() 670 671 doRTT := func() time.Duration { 672 t.Helper() 673 const samples = 5 674 var total time.Duration 675 for i := 0; i < samples; i++ { 676 rtt, _ := nc.RTT() 677 total += rtt 678 } 679 return total / samples 680 } 681 682 rtt := doRTT() 683 if rtt < rttTarget || rtt > (rttTarget*3/2) { 684 t.Fatalf("rtt is out of range, target of %v, actual %v", rttTarget, rtt) 685 } 686 687 // Now test send BW. 688 const payloadSize = 64 * 1024 689 var payload [payloadSize]byte 690 crand.Read(payload[:]) 691 692 // 5MB total. 693 bytesSent := (5 * 1024 * 1024) 694 toSend := bytesSent / payloadSize 695 696 start := time.Now() 697 for i := 0; i < toSend; i++ { 698 nc.Publish("z", payload[:]) 699 } 700 nc.Flush() 701 tt := time.Since(start) 702 bps := float64(bytesSent) / tt.Seconds() 703 min, max := float64(bwTarget)*0.8, float64(bwTarget)*1.25 704 if bps < min || bps > max { 705 t.Fatalf("bps is off, target is %v, actual is %v", bwTarget, bps) 706 } 707 }