github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/routes_test.go (about) 1 // Copyright 2013-2023 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package server 15 16 import ( 17 "bufio" 18 "bytes" 19 "context" 20 "crypto/tls" 21 "encoding/json" 22 "fmt" 23 "math/rand" 24 "net" 25 "net/http" 26 "net/http/httptest" 27 "net/url" 28 "os" 29 "reflect" 30 "runtime" 31 "strconv" 32 "strings" 33 "sync" 34 "sync/atomic" 35 "testing" 36 "time" 37 38 "github.com/nats-io/jwt/v2" 39 "github.com/nats-io/nats.go" 40 "github.com/nats-io/nkeys" 41 ) 42 43 func init() { 44 routeConnectDelay = 15 * time.Millisecond 45 } 46 47 func checkNumRoutes(t *testing.T, s *Server, expected int) { 48 t.Helper() 49 checkFor(t, 5*time.Second, 15*time.Millisecond, func() error { 50 if nr := s.NumRoutes(); nr != expected { 51 return fmt.Errorf("Expected %v routes, got %v", expected, nr) 52 } 53 return nil 54 }) 55 } 56 57 func checkSubInterest(t *testing.T, s *Server, accName, subject string, timeout time.Duration) { 58 t.Helper() 59 checkFor(t, timeout, 15*time.Millisecond, func() error { 60 acc, err := s.LookupAccount(accName) 61 if err != nil { 62 return fmt.Errorf("error looking up account %q: %v", accName, err) 63 } 64 if acc.SubscriptionInterest(subject) { 65 return nil 66 } 67 return fmt.Errorf("no subscription interest for account %q on %q", accName, subject) 68 }) 69 } 70 71 func checkSubNoInterest(t *testing.T, s *Server, accName, subject string, timeout time.Duration) { 72 t.Helper() 73 checkFor(t, timeout, 15*time.Millisecond, func() error { 74 acc, err := s.LookupAccount(accName) 75 if err != nil { 76 return fmt.Errorf("error looking up account %q: %v", accName, err) 77 } 78 if acc.SubscriptionInterest(subject) { 79 return fmt.Errorf("unexpected subscription interest for account %q on %q", accName, subject) 80 } 81 return nil 82 }) 83 } 84 85 func TestRouteConfig(t *testing.T) { 86 opts, err := ProcessConfigFile("./configs/cluster.conf") 87 if err != nil { 88 t.Fatalf("Received an error reading route config file: %v\n", err) 89 } 90 91 golden := &Options{ 92 ConfigFile: "./configs/cluster.conf", 93 Host: "127.0.0.1", 94 Port: 4242, 95 Username: "derek", 96 Password: "porkchop", 97 AuthTimeout: 1.0, 98 Cluster: ClusterOpts{ 99 Name: "abc", 100 Host: "127.0.0.1", 101 Port: 4244, 102 Username: "route_user", 103 Password: "top_secret", 104 AuthTimeout: 1.0, 105 NoAdvertise: true, 106 ConnectRetries: 2, 107 }, 108 PidFile: "/tmp/nats-server/nats_cluster_test.pid", 109 authBlockDefined: true, 110 } 111 112 // Setup URLs 113 r1, _ := url.Parse("nats-route://foo:bar@127.0.0.1:4245") 114 r2, _ := url.Parse("nats-route://foo:bar@127.0.0.1:4246") 115 116 golden.Routes = []*url.URL{r1, r2} 117 118 checkOptionsEqual(t, golden, opts) 119 } 120 121 func TestClusterAdvertise(t *testing.T) { 122 lst, err := natsListen("tcp", "127.0.0.1:0") 123 if err != nil { 124 t.Fatalf("Error starting listener: %v", err) 125 } 126 ch := make(chan error) 127 go func() { 128 c, err := lst.Accept() 129 if err != nil { 130 ch <- err 131 return 132 } 133 c.Close() 134 ch <- nil 135 }() 136 137 optsA, err := ProcessConfigFile("./configs/seed.conf") 138 require_NoError(t, err) 139 optsA.NoSigs, optsA.NoLog = true, true 140 srvA := RunServer(optsA) 141 defer srvA.Shutdown() 142 143 srvARouteURL := fmt.Sprintf("nats://%s:%d", optsA.Cluster.Host, srvA.ClusterAddr().Port) 144 optsB := nextServerOpts(optsA) 145 optsB.Routes = RoutesFromStr(srvARouteURL) 146 147 srvB := RunServer(optsB) 148 defer srvB.Shutdown() 149 150 // Wait for these 2 to connect to each other 151 checkClusterFormed(t, srvA, srvB) 152 153 // Now start server C that connects to A. A should ask B to connect to C, 154 // based on C's URL. But since C configures a Cluster.Advertise, it will connect 155 // to our listener. 156 optsC := nextServerOpts(optsB) 157 optsC.Cluster.Advertise = lst.Addr().String() 158 optsC.ClientAdvertise = "me:1" 159 optsC.Routes = RoutesFromStr(srvARouteURL) 160 161 srvC := RunServer(optsC) 162 defer srvC.Shutdown() 163 164 select { 165 case e := <-ch: 166 if e != nil { 167 t.Fatalf("Error: %v", e) 168 } 169 case <-time.After(2 * time.Second): 170 t.Fatalf("Test timed out") 171 } 172 } 173 174 func TestClusterAdvertiseErrorOnStartup(t *testing.T) { 175 opts := DefaultOptions() 176 // Set invalid address 177 opts.Cluster.Advertise = "addr:::123" 178 testFatalErrorOnStart(t, opts, "Cluster.Advertise") 179 } 180 181 func TestClientAdvertise(t *testing.T) { 182 optsA, err := ProcessConfigFile("./configs/seed.conf") 183 require_NoError(t, err) 184 optsA.NoSigs, optsA.NoLog = true, true 185 186 srvA := RunServer(optsA) 187 defer srvA.Shutdown() 188 189 optsB := nextServerOpts(optsA) 190 optsB.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", optsA.Cluster.Host, optsA.Cluster.Port)) 191 optsB.ClientAdvertise = "me:1" 192 srvB := RunServer(optsB) 193 defer srvB.Shutdown() 194 195 checkClusterFormed(t, srvA, srvB) 196 197 nc, err := nats.Connect(fmt.Sprintf("nats://%s:%d", optsA.Host, optsA.Port)) 198 if err != nil { 199 t.Fatalf("Error on connect: %v", err) 200 } 201 defer nc.Close() 202 checkFor(t, time.Second, 15*time.Millisecond, func() error { 203 ds := nc.DiscoveredServers() 204 if len(ds) == 1 { 205 if ds[0] == "nats://me:1" { 206 return nil 207 } 208 } 209 return fmt.Errorf("Did not get expected discovered servers: %v", nc.DiscoveredServers()) 210 }) 211 } 212 213 func TestServerRoutesWithClients(t *testing.T) { 214 optsA, err := ProcessConfigFile("./configs/srv_a.conf") 215 require_NoError(t, err) 216 optsB, err := ProcessConfigFile("./configs/srv_b.conf") 217 require_NoError(t, err) 218 219 optsA.NoSigs, optsA.NoLog = true, true 220 optsB.NoSigs, optsB.NoLog = true, true 221 222 srvA := RunServer(optsA) 223 defer srvA.Shutdown() 224 225 urlA := fmt.Sprintf("nats://%s:%d/", optsA.Host, optsA.Port) 226 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, optsB.Port) 227 228 nc1, err := nats.Connect(urlA) 229 if err != nil { 230 t.Fatalf("Error creating client: %v\n", err) 231 } 232 defer nc1.Close() 233 234 ch := make(chan bool) 235 sub, _ := nc1.Subscribe("foo", func(m *nats.Msg) { ch <- true }) 236 nc1.QueueSubscribe("foo", "bar", func(m *nats.Msg) {}) 237 nc1.Publish("foo", []byte("Hello")) 238 // Wait for message 239 <-ch 240 sub.Unsubscribe() 241 242 srvB := RunServer(optsB) 243 defer srvB.Shutdown() 244 245 // Wait for route to form. 246 checkClusterFormed(t, srvA, srvB) 247 248 nc2, err := nats.Connect(urlB) 249 if err != nil { 250 t.Fatalf("Error creating client: %v\n", err) 251 } 252 defer nc2.Close() 253 nc2.Publish("foo", []byte("Hello")) 254 nc2.Flush() 255 } 256 257 func TestServerRoutesWithAuthAndBCrypt(t *testing.T) { 258 optsA, err := ProcessConfigFile("./configs/srv_a_bcrypt.conf") 259 require_NoError(t, err) 260 optsB, err := ProcessConfigFile("./configs/srv_b_bcrypt.conf") 261 require_NoError(t, err) 262 263 optsA.NoSigs, optsA.NoLog = true, true 264 optsB.NoSigs, optsB.NoLog = true, true 265 266 srvA := RunServer(optsA) 267 defer srvA.Shutdown() 268 269 srvB := RunServer(optsB) 270 defer srvB.Shutdown() 271 272 // Wait for route to form. 273 checkClusterFormed(t, srvA, srvB) 274 275 urlA := fmt.Sprintf("nats://%s:%s@%s:%d/", optsA.Username, optsA.Password, optsA.Host, optsA.Port) 276 urlB := fmt.Sprintf("nats://%s:%s@%s:%d/", optsB.Username, optsB.Password, optsB.Host, optsB.Port) 277 278 nc1, err := nats.Connect(urlA) 279 if err != nil { 280 t.Fatalf("Error creating client: %v\n", err) 281 } 282 defer nc1.Close() 283 284 // Test that we are connected. 285 ch := make(chan bool) 286 sub, err := nc1.Subscribe("foo", func(m *nats.Msg) { ch <- true }) 287 if err != nil { 288 t.Fatalf("Error creating subscription: %v\n", err) 289 } 290 nc1.Flush() 291 defer sub.Unsubscribe() 292 293 checkSubInterest(t, srvB, globalAccountName, "foo", time.Second) 294 295 nc2, err := nats.Connect(urlB) 296 if err != nil { 297 t.Fatalf("Error creating client: %v\n", err) 298 } 299 defer nc2.Close() 300 nc2.Publish("foo", []byte("Hello")) 301 nc2.Flush() 302 303 // Wait for message 304 select { 305 case <-ch: 306 case <-time.After(2 * time.Second): 307 t.Fatal("Timeout waiting for message across route") 308 } 309 } 310 311 // Helper function to check that a cluster is formed 312 func checkClusterFormed(t testing.TB, servers ...*Server) { 313 t.Helper() 314 var _enr [8]int 315 enr := _enr[:0] 316 for _, a := range servers { 317 if a.getOpts().Cluster.PoolSize < 0 { 318 enr = append(enr, len(servers)-1) 319 } else { 320 a.mu.RLock() 321 nr := a.routesPoolSize + len(a.accRoutes) 322 a.mu.RUnlock() 323 total := 0 324 for _, b := range servers { 325 if a == b { 326 continue 327 } 328 if b.getOpts().Cluster.PoolSize < 0 { 329 total++ 330 } else { 331 total += nr 332 } 333 } 334 enr = append(enr, total) 335 } 336 } 337 checkFor(t, 10*time.Second, 100*time.Millisecond, func() error { 338 for i, s := range servers { 339 if numRoutes := s.NumRoutes(); numRoutes != enr[i] { 340 return fmt.Errorf("Expected %d routes for server %q, got %d", enr[i], s, numRoutes) 341 } 342 } 343 return nil 344 }) 345 } 346 347 // Helper function to generate next opts to make sure no port conflicts etc. 348 func nextServerOpts(opts *Options) *Options { 349 nopts := *opts 350 nopts.Port = -1 351 nopts.Cluster.Port = -1 352 nopts.HTTPPort = -1 353 if nopts.Gateway.Name != "" { 354 nopts.Gateway.Port = -1 355 } 356 nopts.ServerName = "" 357 return &nopts 358 } 359 360 func TestSeedSolicitWorks(t *testing.T) { 361 optsSeed, err := ProcessConfigFile("./configs/seed.conf") 362 require_NoError(t, err) 363 364 optsSeed.NoSigs, optsSeed.NoLog = true, true 365 optsSeed.NoSystemAccount = true 366 367 srvSeed := RunServer(optsSeed) 368 defer srvSeed.Shutdown() 369 370 optsA := nextServerOpts(optsSeed) 371 optsA.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", optsSeed.Cluster.Host, 372 srvSeed.ClusterAddr().Port)) 373 374 srvA := RunServer(optsA) 375 defer srvA.Shutdown() 376 377 urlA := fmt.Sprintf("nats://%s:%d/", optsA.Host, srvA.ClusterAddr().Port) 378 379 nc1, err := nats.Connect(urlA) 380 if err != nil { 381 t.Fatalf("Error creating client: %v\n", err) 382 } 383 defer nc1.Close() 384 385 // Test that we are connected. 386 ch := make(chan bool) 387 nc1.Subscribe("foo", func(m *nats.Msg) { ch <- true }) 388 nc1.Flush() 389 390 optsB := nextServerOpts(optsA) 391 optsB.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", optsSeed.Cluster.Host, 392 srvSeed.ClusterAddr().Port)) 393 394 srvB := RunServer(optsB) 395 defer srvB.Shutdown() 396 397 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, srvB.ClusterAddr().Port) 398 399 nc2, err := nats.Connect(urlB) 400 if err != nil { 401 t.Fatalf("Error creating client: %v\n", err) 402 } 403 defer nc2.Close() 404 405 checkClusterFormed(t, srvSeed, srvA, srvB) 406 checkExpectedSubs(t, 1, srvB) 407 408 nc2.Publish("foo", []byte("Hello")) 409 410 // Wait for message 411 select { 412 case <-ch: 413 case <-time.After(2 * time.Second): 414 t.Fatal("Timeout waiting for message across route") 415 } 416 } 417 418 func TestTLSSeedSolicitWorks(t *testing.T) { 419 optsSeed, err := ProcessConfigFile("./configs/seed_tls.conf") 420 require_NoError(t, err) 421 422 optsSeed.NoSigs, optsSeed.NoLog = true, true 423 optsSeed.NoSystemAccount = true 424 425 srvSeed := RunServer(optsSeed) 426 defer srvSeed.Shutdown() 427 428 seedRouteURL := fmt.Sprintf("nats://%s:%d", optsSeed.Cluster.Host, 429 srvSeed.ClusterAddr().Port) 430 optsA := nextServerOpts(optsSeed) 431 optsA.Routes = RoutesFromStr(seedRouteURL) 432 433 srvA := RunServer(optsA) 434 defer srvA.Shutdown() 435 436 urlA := fmt.Sprintf("nats://%s:%d/", optsA.Host, srvA.Addr().(*net.TCPAddr).Port) 437 438 nc1, err := nats.Connect(urlA) 439 if err != nil { 440 t.Fatalf("Error creating client: %v\n", err) 441 } 442 defer nc1.Close() 443 444 // Test that we are connected. 445 ch := make(chan bool) 446 nc1.Subscribe("foo", func(m *nats.Msg) { ch <- true }) 447 nc1.Flush() 448 449 optsB := nextServerOpts(optsA) 450 optsB.Routes = RoutesFromStr(seedRouteURL) 451 452 srvB := RunServer(optsB) 453 defer srvB.Shutdown() 454 455 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, srvB.Addr().(*net.TCPAddr).Port) 456 457 nc2, err := nats.Connect(urlB) 458 if err != nil { 459 t.Fatalf("Error creating client: %v\n", err) 460 } 461 defer nc2.Close() 462 463 checkClusterFormed(t, srvSeed, srvA, srvB) 464 checkExpectedSubs(t, 1, srvB) 465 466 nc2.Publish("foo", []byte("Hello")) 467 468 // Wait for message 469 select { 470 case <-ch: 471 case <-time.After(2 * time.Second): 472 t.Fatal("Timeout waiting for message across route") 473 } 474 } 475 476 func TestChainedSolicitWorks(t *testing.T) { 477 optsSeed, err := ProcessConfigFile("./configs/seed.conf") 478 require_NoError(t, err) 479 480 optsSeed.NoSigs, optsSeed.NoLog = true, true 481 optsSeed.NoSystemAccount = true 482 483 srvSeed := RunServer(optsSeed) 484 defer srvSeed.Shutdown() 485 486 seedRouteURL := fmt.Sprintf("nats://%s:%d", optsSeed.Cluster.Host, 487 srvSeed.ClusterAddr().Port) 488 optsA := nextServerOpts(optsSeed) 489 optsA.Routes = RoutesFromStr(seedRouteURL) 490 491 srvA := RunServer(optsA) 492 defer srvA.Shutdown() 493 494 urlSeed := fmt.Sprintf("nats://%s:%d/", optsSeed.Host, srvA.Addr().(*net.TCPAddr).Port) 495 496 nc1, err := nats.Connect(urlSeed) 497 if err != nil { 498 t.Fatalf("Error creating client: %v\n", err) 499 } 500 defer nc1.Close() 501 502 // Test that we are connected. 503 ch := make(chan bool) 504 nc1.Subscribe("foo", func(m *nats.Msg) { ch <- true }) 505 nc1.Flush() 506 507 optsB := nextServerOpts(optsA) 508 // Server B connects to A 509 optsB.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", optsA.Cluster.Host, 510 srvA.ClusterAddr().Port)) 511 512 srvB := RunServer(optsB) 513 defer srvB.Shutdown() 514 515 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, srvB.Addr().(*net.TCPAddr).Port) 516 517 nc2, err := nats.Connect(urlB) 518 if err != nil { 519 t.Fatalf("Error creating client: %v\n", err) 520 } 521 defer nc2.Close() 522 523 checkClusterFormed(t, srvSeed, srvA, srvB) 524 checkExpectedSubs(t, 1, srvB) 525 526 nc2.Publish("foo", []byte("Hello")) 527 528 // Wait for message 529 select { 530 case <-ch: 531 case <-time.After(2 * time.Second): 532 t.Fatal("Timeout waiting for message across route") 533 } 534 } 535 536 // Helper function to check that a server (or list of servers) have the 537 // expected number of subscriptions. 538 func checkExpectedSubs(t *testing.T, expected int, servers ...*Server) { 539 t.Helper() 540 checkFor(t, 4*time.Second, 10*time.Millisecond, func() error { 541 for _, s := range servers { 542 if numSubs := int(s.NumSubscriptions()); numSubs != expected { 543 return fmt.Errorf("Expected %d subscriptions for server %q, got %d", expected, s.ID(), numSubs) 544 } 545 } 546 return nil 547 }) 548 } 549 550 func TestTLSChainedSolicitWorks(t *testing.T) { 551 optsSeed, err := ProcessConfigFile("./configs/seed_tls.conf") 552 require_NoError(t, err) 553 554 optsSeed.NoSigs, optsSeed.NoLog = true, true 555 optsSeed.NoSystemAccount = true 556 557 srvSeed := RunServer(optsSeed) 558 defer srvSeed.Shutdown() 559 560 urlSeedRoute := fmt.Sprintf("nats://%s:%d", optsSeed.Cluster.Host, 561 srvSeed.ClusterAddr().Port) 562 optsA := nextServerOpts(optsSeed) 563 optsA.Routes = RoutesFromStr(urlSeedRoute) 564 565 srvA := RunServer(optsA) 566 defer srvA.Shutdown() 567 568 urlSeed := fmt.Sprintf("nats://%s:%d/", optsSeed.Host, srvSeed.Addr().(*net.TCPAddr).Port) 569 570 nc1, err := nats.Connect(urlSeed) 571 if err != nil { 572 t.Fatalf("Error creating client: %v\n", err) 573 } 574 defer nc1.Close() 575 576 // Test that we are connected. 577 ch := make(chan bool) 578 nc1.Subscribe("foo", func(m *nats.Msg) { ch <- true }) 579 nc1.Flush() 580 581 optsB := nextServerOpts(optsA) 582 // Server B connects to A 583 optsB.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", optsA.Cluster.Host, 584 srvA.ClusterAddr().Port)) 585 586 srvB := RunServer(optsB) 587 defer srvB.Shutdown() 588 589 checkClusterFormed(t, srvSeed, srvA, srvB) 590 checkExpectedSubs(t, 1, srvA, srvB) 591 592 urlB := fmt.Sprintf("nats://%s:%d/", optsB.Host, srvB.Addr().(*net.TCPAddr).Port) 593 594 nc2, err := nats.Connect(urlB) 595 if err != nil { 596 t.Fatalf("Error creating client: %v\n", err) 597 } 598 defer nc2.Close() 599 600 nc2.Publish("foo", []byte("Hello")) 601 602 // Wait for message 603 select { 604 case <-ch: 605 case <-time.After(2 * time.Second): 606 t.Fatal("Timeout waiting for message across route") 607 } 608 } 609 610 func TestRouteTLSHandshakeError(t *testing.T) { 611 optsSeed, err := ProcessConfigFile("./configs/seed_tls.conf") 612 require_NoError(t, err) 613 optsSeed.NoLog = true 614 optsSeed.NoSigs = true 615 srvSeed := RunServer(optsSeed) 616 defer srvSeed.Shutdown() 617 618 opts := DefaultOptions() 619 opts.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", optsSeed.Cluster.Host, optsSeed.Cluster.Port)) 620 621 srv := RunServer(opts) 622 defer srv.Shutdown() 623 624 time.Sleep(500 * time.Millisecond) 625 626 checkNumRoutes(t, srv, 0) 627 } 628 629 func TestBlockedShutdownOnRouteAcceptLoopFailure(t *testing.T) { 630 opts := DefaultOptions() 631 opts.Cluster.Host = "x.x.x.x" 632 opts.Cluster.Port = 7222 633 634 s := New(opts) 635 s.Start() 636 // Wait a second 637 time.Sleep(time.Second) 638 ch := make(chan bool) 639 go func() { 640 s.Shutdown() 641 ch <- true 642 }() 643 644 timeout := time.NewTimer(5 * time.Second) 645 select { 646 case <-ch: 647 return 648 case <-timeout.C: 649 t.Fatal("Shutdown did not complete") 650 } 651 } 652 653 func TestRouteUseIPv6(t *testing.T) { 654 opts := DefaultOptions() 655 opts.Cluster.Host = "::" 656 opts.Cluster.Port = 6222 657 658 // I believe that there is no IPv6 support on Travis... 659 // Regardless, cannot have this test fail simply because IPv6 is disabled 660 // on the host. 661 hp := net.JoinHostPort(opts.Cluster.Host, strconv.Itoa(opts.Cluster.Port)) 662 _, err := net.ResolveTCPAddr("tcp", hp) 663 if err != nil { 664 t.Skipf("Skipping this test since there is no IPv6 support on this host: %v", err) 665 } 666 667 s := RunServer(opts) 668 defer s.Shutdown() 669 670 routeUp := false 671 timeout := time.Now().Add(5 * time.Second) 672 for time.Now().Before(timeout) && !routeUp { 673 // We know that the server is local and listening to 674 // all IPv6 interfaces. Try connect using IPv6 loopback. 675 if conn, err := net.Dial("tcp", "[::1]:6222"); err != nil { 676 // Travis seem to have the server actually listening to 0.0.0.0, 677 // so try with 127.0.0.1 678 if conn, err := net.Dial("tcp", "127.0.0.1:6222"); err != nil { 679 time.Sleep(time.Second) 680 continue 681 } else { 682 conn.Close() 683 } 684 } else { 685 conn.Close() 686 } 687 routeUp = true 688 } 689 if !routeUp { 690 t.Fatal("Server failed to start route accept loop") 691 } 692 } 693 694 func TestClientConnectToRoutePort(t *testing.T) { 695 opts := DefaultOptions() 696 697 // Since client will first connect to the route listen port, set the 698 // cluster's Host to 127.0.0.1 so it works on Windows too, since on 699 // Windows, a client can't use 0.0.0.0 in a connect. 700 opts.Cluster.Host = "127.0.0.1" 701 s := RunServer(opts) 702 defer s.Shutdown() 703 704 url := fmt.Sprintf("nats://%s:%d", opts.Cluster.Host, s.ClusterAddr().Port) 705 clientURL := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port) 706 // When connecting to the ROUTE port, the client library will receive the 707 // CLIENT port in the INFO protocol. This URL is added to the client's pool 708 // and will be tried after the initial connect failure. So all those 709 // nats.Connect() should succeed. 710 // The only reason for a failure would be if there are too many FDs in time-wait 711 // which would delay the creation of TCP connection. So keep the total of 712 // attempts rather small. 713 total := 10 714 for i := 0; i < total; i++ { 715 nc, err := nats.Connect(url) 716 if err != nil { 717 t.Fatalf("Unexpected error on connect: %v", err) 718 } 719 defer nc.Close() 720 if nc.ConnectedUrl() != clientURL { 721 t.Fatalf("Expected client to be connected to %v, got %v", clientURL, nc.ConnectedUrl()) 722 } 723 } 724 725 s.Shutdown() 726 // Try again with NoAdvertise and this time, the client should fail to connect. 727 opts.Cluster.NoAdvertise = true 728 s = RunServer(opts) 729 defer s.Shutdown() 730 731 for i := 0; i < total; i++ { 732 nc, err := nats.Connect(url) 733 if err == nil { 734 nc.Close() 735 t.Fatal("Expected error on connect, got none") 736 } 737 } 738 } 739 740 type checkDuplicateRouteLogger struct { 741 sync.Mutex 742 gotDuplicate bool 743 } 744 745 func (l *checkDuplicateRouteLogger) Noticef(format string, v ...any) {} 746 func (l *checkDuplicateRouteLogger) Errorf(format string, v ...any) {} 747 func (l *checkDuplicateRouteLogger) Warnf(format string, v ...any) {} 748 func (l *checkDuplicateRouteLogger) Fatalf(format string, v ...any) {} 749 func (l *checkDuplicateRouteLogger) Tracef(format string, v ...any) {} 750 func (l *checkDuplicateRouteLogger) Debugf(format string, v ...any) { 751 l.Lock() 752 defer l.Unlock() 753 msg := fmt.Sprintf(format, v...) 754 if strings.Contains(msg, "duplicate remote route") { 755 l.gotDuplicate = true 756 } 757 } 758 759 func TestRoutesToEachOther(t *testing.T) { 760 optsA := DefaultOptions() 761 optsA.Cluster.Port = 7246 762 optsA.Routes = RoutesFromStr("nats://127.0.0.1:7247") 763 764 optsB := DefaultOptions() 765 optsB.Cluster.Port = 7247 766 optsB.Routes = RoutesFromStr("nats://127.0.0.1:7246") 767 768 srvALogger := &checkDuplicateRouteLogger{} 769 srvA := New(optsA) 770 srvA.SetLogger(srvALogger, true, false) 771 defer srvA.Shutdown() 772 773 srvBLogger := &checkDuplicateRouteLogger{} 774 srvB := New(optsB) 775 srvB.SetLogger(srvBLogger, true, false) 776 defer srvB.Shutdown() 777 778 go srvA.Start() 779 go srvB.Start() 780 781 start := time.Now() 782 checkClusterFormed(t, srvA, srvB) 783 end := time.Now() 784 785 srvALogger.Lock() 786 gotIt := srvALogger.gotDuplicate 787 srvALogger.Unlock() 788 if !gotIt { 789 srvBLogger.Lock() 790 gotIt = srvBLogger.gotDuplicate 791 srvBLogger.Unlock() 792 } 793 if gotIt { 794 dur := end.Sub(start) 795 // It should not take too long to have a successful connection 796 // between the 2 servers. 797 if dur > 5*time.Second { 798 t.Logf("Cluster formed, but took a long time: %v", dur) 799 } 800 } else { 801 t.Log("Was not able to get duplicate route this time!") 802 } 803 } 804 805 func wait(ch chan bool) error { 806 select { 807 case <-ch: 808 return nil 809 case <-time.After(5 * time.Second): 810 } 811 return fmt.Errorf("timeout") 812 } 813 814 func TestServerPoolUpdatedWhenRouteGoesAway(t *testing.T) { 815 s1Opts := DefaultOptions() 816 s1Opts.ServerName = "A" 817 s1Opts.Host = "127.0.0.1" 818 s1Opts.Port = 4222 819 s1Opts.Cluster.Host = "127.0.0.1" 820 s1Opts.Cluster.Port = 6222 821 s1Opts.Routes = RoutesFromStr("nats://127.0.0.1:6223,nats://127.0.0.1:6224") 822 s1 := RunServer(s1Opts) 823 defer s1.Shutdown() 824 825 s1Url := "nats://127.0.0.1:4222" 826 s2Url := "nats://127.0.0.1:4223" 827 s3Url := "nats://127.0.0.1:4224" 828 829 ch := make(chan bool, 1) 830 chch := make(chan bool, 1) 831 connHandler := func(_ *nats.Conn) { 832 chch <- true 833 } 834 nc, err := nats.Connect(s1Url, 835 nats.ReconnectWait(50*time.Millisecond), 836 nats.ReconnectHandler(connHandler), 837 nats.DiscoveredServersHandler(func(_ *nats.Conn) { 838 ch <- true 839 })) 840 if err != nil { 841 t.Fatalf("Error on connect") 842 } 843 defer nc.Close() 844 845 s2Opts := DefaultOptions() 846 s2Opts.ServerName = "B" 847 s2Opts.Host = "127.0.0.1" 848 s2Opts.Port = s1Opts.Port + 1 849 s2Opts.Cluster.Host = "127.0.0.1" 850 s2Opts.Cluster.Port = 6223 851 s2Opts.Routes = RoutesFromStr("nats://127.0.0.1:6222,nats://127.0.0.1:6224") 852 s2 := RunServer(s2Opts) 853 defer s2.Shutdown() 854 855 // Wait to be notified 856 if err := wait(ch); err != nil { 857 t.Fatal("New server callback was not invoked") 858 } 859 860 checkPool := func(expected []string) { 861 t.Helper() 862 // Don't use discovered here, but Servers to have the full list. 863 // Also, there may be cases where the mesh is not formed yet, 864 // so try again on failure. 865 checkFor(t, 5*time.Second, 50*time.Millisecond, func() error { 866 ds := nc.Servers() 867 if len(ds) == len(expected) { 868 m := make(map[string]struct{}, len(ds)) 869 for _, url := range ds { 870 m[url] = struct{}{} 871 } 872 ok := true 873 for _, url := range expected { 874 if _, present := m[url]; !present { 875 ok = false 876 break 877 } 878 } 879 if ok { 880 return nil 881 } 882 } 883 return fmt.Errorf("Expected %v, got %v", expected, ds) 884 }) 885 } 886 // Verify that we now know about s2 887 checkPool([]string{s1Url, s2Url}) 888 889 s3Opts := DefaultOptions() 890 s3Opts.ServerName = "C" 891 s3Opts.Host = "127.0.0.1" 892 s3Opts.Port = s2Opts.Port + 1 893 s3Opts.Cluster.Host = "127.0.0.1" 894 s3Opts.Cluster.Port = 6224 895 s3Opts.Routes = RoutesFromStr("nats://127.0.0.1:6222,nats://127.0.0.1:6223") 896 s3 := RunServer(s3Opts) 897 defer s3.Shutdown() 898 899 // Wait to be notified 900 if err := wait(ch); err != nil { 901 t.Fatal("New server callback was not invoked") 902 } 903 // Verify that we now know about s3 904 checkPool([]string{s1Url, s2Url, s3Url}) 905 906 // Stop s1. Since this was passed to the Connect() call, this one should 907 // still be present. 908 s1.Shutdown() 909 // Wait for reconnect 910 if err := wait(chch); err != nil { 911 t.Fatal("Reconnect handler not invoked") 912 } 913 checkPool([]string{s1Url, s2Url, s3Url}) 914 915 // Check the server we reconnected to. 916 reConnectedTo := nc.ConnectedUrl() 917 expected := []string{s1Url} 918 if reConnectedTo == s2Url { 919 s2.Shutdown() 920 expected = append(expected, s3Url) 921 } else if reConnectedTo == s3Url { 922 s3.Shutdown() 923 expected = append(expected, s2Url) 924 } else { 925 t.Fatalf("Unexpected server client has reconnected to: %v", reConnectedTo) 926 } 927 // Wait for reconnect 928 if err := wait(chch); err != nil { 929 t.Fatal("Reconnect handler not invoked") 930 } 931 // The implicit server that we just shutdown should have been removed from the pool 932 checkPool(expected) 933 nc.Close() 934 } 935 936 func TestRouteFailedConnRemovedFromTmpMap(t *testing.T) { 937 for _, test := range []struct { 938 name string 939 poolSize int 940 }{ 941 {"no pooling", -1}, 942 {"pool 1", 1}, 943 {"pool 3", 3}, 944 } { 945 t.Run(test.name, func(t *testing.T) { 946 optsA, err := ProcessConfigFile("./configs/srv_a.conf") 947 require_NoError(t, err) 948 optsA.NoSigs, optsA.NoLog = true, true 949 optsA.Cluster.PoolSize = test.poolSize 950 951 optsB, err := ProcessConfigFile("./configs/srv_b.conf") 952 require_NoError(t, err) 953 optsB.NoSigs, optsB.NoLog = true, true 954 optsB.Cluster.PoolSize = test.poolSize 955 956 srvA := New(optsA) 957 defer srvA.Shutdown() 958 srvB := New(optsB) 959 defer srvB.Shutdown() 960 961 // Start this way to increase chance of having the two connect 962 // to each other at the same time. This will cause one of the 963 // route to be dropped. 964 wg := &sync.WaitGroup{} 965 wg.Add(2) 966 go func() { 967 srvA.Start() 968 wg.Done() 969 }() 970 go func() { 971 srvB.Start() 972 wg.Done() 973 }() 974 975 checkClusterFormed(t, srvA, srvB) 976 977 // Ensure that maps are empty 978 checkMap := func(s *Server) { 979 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 980 s.grMu.Lock() 981 l := len(s.grTmpClients) 982 s.grMu.Unlock() 983 if l != 0 { 984 return fmt.Errorf("grTmpClients map should be empty, got %v", l) 985 } 986 return nil 987 }) 988 } 989 checkMap(srvA) 990 checkMap(srvB) 991 992 srvB.Shutdown() 993 srvA.Shutdown() 994 wg.Wait() 995 }) 996 } 997 } 998 999 func getFirstRoute(s *Server) *client { 1000 for _, conns := range s.routes { 1001 for _, r := range conns { 1002 if r != nil { 1003 return r 1004 } 1005 } 1006 } 1007 return nil 1008 } 1009 1010 func TestRoutePermsAppliedOnInboundAndOutboundRoute(t *testing.T) { 1011 1012 perms := &RoutePermissions{ 1013 Import: &SubjectPermission{ 1014 Allow: []string{"imp.foo"}, 1015 Deny: []string{"imp.bar"}, 1016 }, 1017 Export: &SubjectPermission{ 1018 Allow: []string{"exp.foo"}, 1019 Deny: []string{"exp.bar"}, 1020 }, 1021 } 1022 1023 optsA, err := ProcessConfigFile("./configs/seed.conf") 1024 require_NoError(t, err) 1025 optsA.NoLog = true 1026 optsA.NoSigs = true 1027 optsA.Cluster.Permissions = perms 1028 srva := RunServer(optsA) 1029 defer srva.Shutdown() 1030 1031 optsB := DefaultOptions() 1032 optsB.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", optsA.Cluster.Host, optsA.Cluster.Port)) 1033 srvb := RunServer(optsB) 1034 defer srvb.Shutdown() 1035 1036 checkClusterFormed(t, srva, srvb) 1037 1038 // Ensure permission is properly set 1039 check := func(t *testing.T, s *Server) { 1040 t.Helper() 1041 var route *client 1042 s.mu.Lock() 1043 route = getFirstRoute(s) 1044 s.mu.Unlock() 1045 route.mu.Lock() 1046 perms := route.perms 1047 route.mu.Unlock() 1048 if perms == nil { 1049 t.Fatal("Expected perms to be set") 1050 } 1051 if perms.pub.allow == nil || perms.pub.allow.Count() != 1 { 1052 t.Fatal("unexpected pub allow perms") 1053 } 1054 if r := perms.pub.allow.Match("imp.foo"); len(r.psubs) != 1 { 1055 t.Fatal("unexpected pub allow match") 1056 } 1057 if perms.pub.deny == nil || perms.pub.deny.Count() != 1 { 1058 t.Fatal("unexpected pub deny perms") 1059 } 1060 if r := perms.pub.deny.Match("imp.bar"); len(r.psubs) != 1 { 1061 t.Fatal("unexpected pub deny match") 1062 } 1063 if perms.sub.allow == nil || perms.sub.allow.Count() != 1 { 1064 t.Fatal("unexpected sub allow perms") 1065 } 1066 if r := perms.sub.allow.Match("exp.foo"); len(r.psubs) != 1 { 1067 t.Fatal("unexpected sub allow match") 1068 } 1069 if perms.sub.deny == nil || perms.sub.deny.Count() != 1 { 1070 t.Fatal("unexpected sub deny perms") 1071 } 1072 if r := perms.sub.deny.Match("exp.bar"); len(r.psubs) != 1 { 1073 t.Fatal("unexpected sub deny match") 1074 } 1075 } 1076 1077 // First check when permissions are set on the server accepting the route connection 1078 check(t, srva) 1079 1080 srvb.Shutdown() 1081 srva.Shutdown() 1082 1083 optsA.Cluster.Permissions = nil 1084 optsB.Cluster.Permissions = perms 1085 1086 srva = RunServer(optsA) 1087 defer srva.Shutdown() 1088 1089 srvb = RunServer(optsB) 1090 defer srvb.Shutdown() 1091 1092 checkClusterFormed(t, srva, srvb) 1093 1094 // Now check for permissions set on server initiating the route connection 1095 check(t, srvb) 1096 } 1097 1098 func TestRouteSendLocalSubsWithLowMaxPending(t *testing.T) { 1099 optsA := DefaultOptions() 1100 optsA.MaxPayload = 1024 1101 optsA.MaxPending = 1024 1102 optsA.NoSystemAccount = true 1103 srvA := RunServer(optsA) 1104 defer srvA.Shutdown() 1105 1106 nc, err := nats.Connect(fmt.Sprintf("nats://%s:%d", optsA.Host, optsA.Port)) 1107 if err != nil { 1108 t.Fatalf("Error on connect: %v", err) 1109 } 1110 defer nc.Close() 1111 numSubs := 1000 1112 for i := 0; i < numSubs; i++ { 1113 subj := fmt.Sprintf("fo.bar.%d", i) 1114 nc.Subscribe(subj, func(_ *nats.Msg) {}) 1115 } 1116 checkExpectedSubs(t, numSubs, srvA) 1117 1118 // Now create a route between B and A 1119 optsB := DefaultOptions() 1120 optsB.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", optsA.Cluster.Host, optsA.Cluster.Port)) 1121 optsB.NoSystemAccount = true 1122 srvB := RunServer(optsB) 1123 defer srvB.Shutdown() 1124 1125 checkClusterFormed(t, srvA, srvB) 1126 1127 // Check that all subs have been sent ok 1128 checkExpectedSubs(t, numSubs, srvA, srvB) 1129 } 1130 1131 func TestRouteNoCrashOnAddingSubToRoute(t *testing.T) { 1132 opts := DefaultOptions() 1133 s := RunServer(opts) 1134 defer s.Shutdown() 1135 1136 numRoutes := routeTargetInit + 2 1137 total := int32(numRoutes) 1138 count := int32(0) 1139 ch := make(chan bool, 1) 1140 cb := func(_ *nats.Msg) { 1141 if n := atomic.AddInt32(&count, 1); n == total { 1142 ch <- true 1143 } 1144 } 1145 1146 var servers []*Server 1147 servers = append(servers, s) 1148 1149 seedURL := fmt.Sprintf("nats://%s:%d", opts.Cluster.Host, opts.Cluster.Port) 1150 for i := 0; i < numRoutes; i++ { 1151 ropts := DefaultOptions() 1152 ropts.Routes = RoutesFromStr(seedURL) 1153 rs := RunServer(ropts) 1154 defer rs.Shutdown() 1155 servers = append(servers, rs) 1156 1157 // Confirm routes are active before clients connect. 1158 for _, srv := range servers { 1159 rz, err := srv.Routez(nil) 1160 require_NoError(t, err) 1161 for i, route := range rz.Routes { 1162 if route.LastActivity.IsZero() { 1163 t.Errorf("Expected LastActivity to be valid (%d)", i) 1164 } 1165 } 1166 } 1167 1168 // Create a sub on each routed server. 1169 nc := natsConnect(t, fmt.Sprintf("nats://%s:%d", ropts.Host, ropts.Port)) 1170 defer nc.Close() 1171 natsSub(t, nc, "foo", cb) 1172 } 1173 checkClusterFormed(t, servers...) 1174 1175 // Make sure all subs are registered in s. 1176 gacc := s.globalAccount() 1177 gacc.mu.RLock() 1178 sl := gacc.sl 1179 gacc.mu.RUnlock() 1180 checkFor(t, time.Second, 15*time.Millisecond, func() error { 1181 var _subs [64]*subscription 1182 subs := _subs[:0] 1183 sl.All(&subs) 1184 var ts int 1185 for _, sub := range subs { 1186 if string(sub.subject) == "foo" { 1187 ts++ 1188 } 1189 } 1190 if ts != int(numRoutes) { 1191 return fmt.Errorf("Not all %d routed subs were registered: %d", numRoutes, ts) 1192 } 1193 return nil 1194 }) 1195 1196 pubNC := natsConnect(t, fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)) 1197 defer pubNC.Close() 1198 natsPub(t, pubNC, "foo", []byte("hello world!")) 1199 1200 waitCh(t, ch, "Did not get all messages") 1201 } 1202 1203 func TestRouteRTT(t *testing.T) { 1204 ob := DefaultOptions() 1205 ob.PingInterval = 15 * time.Millisecond 1206 sb := RunServer(ob) 1207 defer sb.Shutdown() 1208 1209 oa := DefaultOptions() 1210 oa.PingInterval = 15 * time.Millisecond 1211 oa.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", ob.Cluster.Host, ob.Cluster.Port)) 1212 sa := RunServer(oa) 1213 defer sa.Shutdown() 1214 1215 checkClusterFormed(t, sa, sb) 1216 1217 checkRTT := func(t *testing.T, s *Server) time.Duration { 1218 t.Helper() 1219 var route *client 1220 s.mu.Lock() 1221 route = getFirstRoute(s) 1222 s.mu.Unlock() 1223 1224 var rtt time.Duration 1225 checkFor(t, 2*firstPingInterval, 15*time.Millisecond, func() error { 1226 route.mu.Lock() 1227 rtt = route.rtt 1228 route.mu.Unlock() 1229 if rtt == 0 { 1230 return fmt.Errorf("RTT not tracked") 1231 } 1232 return nil 1233 }) 1234 return rtt 1235 } 1236 1237 prevA := checkRTT(t, sa) 1238 prevB := checkRTT(t, sb) 1239 1240 checkUpdated := func(t *testing.T, s *Server, prev time.Duration) { 1241 t.Helper() 1242 attempts := 0 1243 timeout := time.Now().Add(2 * firstPingInterval) 1244 for time.Now().Before(timeout) { 1245 if rtt := checkRTT(t, s); rtt != prev { 1246 return 1247 } 1248 attempts++ 1249 if attempts == 5 { 1250 // If could be that we are very unlucky 1251 // and the RTT is constant. So override 1252 // the route's RTT to 0 to see if it gets 1253 // updated. 1254 s.mu.Lock() 1255 if r := getFirstRoute(s); r != nil { 1256 r.mu.Lock() 1257 r.rtt = 0 1258 r.mu.Unlock() 1259 } 1260 s.mu.Unlock() 1261 } 1262 time.Sleep(15 * time.Millisecond) 1263 } 1264 t.Fatalf("RTT probably not updated") 1265 } 1266 checkUpdated(t, sa, prevA) 1267 checkUpdated(t, sb, prevB) 1268 1269 sa.Shutdown() 1270 sb.Shutdown() 1271 1272 // Now check that initial RTT is computed prior to first PingInterval 1273 // Get new options to avoid possible race changing the ping interval. 1274 ob = DefaultOptions() 1275 ob.PingInterval = time.Minute 1276 sb = RunServer(ob) 1277 defer sb.Shutdown() 1278 1279 oa = DefaultOptions() 1280 oa.PingInterval = time.Minute 1281 oa.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", ob.Cluster.Host, ob.Cluster.Port)) 1282 sa = RunServer(oa) 1283 defer sa.Shutdown() 1284 1285 checkClusterFormed(t, sa, sb) 1286 checkRTT(t, sa) 1287 checkRTT(t, sb) 1288 } 1289 1290 func TestRouteCloseTLSConnection(t *testing.T) { 1291 opts := DefaultOptions() 1292 opts.DisableShortFirstPing = true 1293 opts.Cluster.Name = "A" 1294 opts.Cluster.Host = "127.0.0.1" 1295 opts.Cluster.Port = -1 1296 opts.Cluster.TLSTimeout = 100 1297 tc := &TLSConfigOpts{ 1298 CertFile: "./configs/certs/server.pem", 1299 KeyFile: "./configs/certs/key.pem", 1300 Insecure: true, 1301 } 1302 tlsConf, err := GenTLSConfig(tc) 1303 if err != nil { 1304 t.Fatalf("Error generating tls config: %v", err) 1305 } 1306 opts.Cluster.TLSConfig = tlsConf 1307 opts.NoLog = true 1308 opts.NoSigs = true 1309 s := RunServer(opts) 1310 defer s.Shutdown() 1311 1312 endpoint := fmt.Sprintf("%s:%d", opts.Cluster.Host, opts.Cluster.Port) 1313 conn, err := net.DialTimeout("tcp", endpoint, 2*time.Second) 1314 if err != nil { 1315 t.Fatalf("Unexpected error on dial: %v", err) 1316 } 1317 defer conn.Close() 1318 1319 tlsConn := tls.Client(conn, &tls.Config{InsecureSkipVerify: true}) 1320 defer tlsConn.Close() 1321 if err := tlsConn.Handshake(); err != nil { 1322 t.Fatalf("Unexpected error during handshake: %v", err) 1323 } 1324 connectOp := []byte("CONNECT {\"name\":\"route\",\"verbose\":false,\"pedantic\":false,\"tls_required\":true,\"cluster\":\"A\"}\r\n") 1325 if _, err := tlsConn.Write(connectOp); err != nil { 1326 t.Fatalf("Unexpected error writing CONNECT: %v", err) 1327 } 1328 infoOp := []byte("INFO {\"server_id\":\"route\",\"tls_required\":true}\r\n") 1329 if _, err := tlsConn.Write(infoOp); err != nil { 1330 t.Fatalf("Unexpected error writing CONNECT: %v", err) 1331 } 1332 if _, err := tlsConn.Write([]byte("PING\r\n")); err != nil { 1333 t.Fatalf("Unexpected error writing PING: %v", err) 1334 } 1335 1336 checkFor(t, time.Second, 15*time.Millisecond, func() error { 1337 if s.NumRoutes() != 1 { 1338 return fmt.Errorf("No route registered yet") 1339 } 1340 return nil 1341 }) 1342 1343 // Get route connection 1344 var route *client 1345 s.mu.Lock() 1346 route = getFirstRoute(s) 1347 s.mu.Unlock() 1348 // Fill the buffer. We want to timeout on write so that nc.Close() 1349 // would block due to a write that cannot complete. 1350 buf := make([]byte, 64*1024) 1351 done := false 1352 for !done { 1353 route.nc.SetWriteDeadline(time.Now().Add(time.Second)) 1354 if _, err := route.nc.Write(buf); err != nil { 1355 done = true 1356 } 1357 route.nc.SetWriteDeadline(time.Time{}) 1358 } 1359 ch := make(chan bool) 1360 go func() { 1361 select { 1362 case <-ch: 1363 return 1364 case <-time.After(3 * time.Second): 1365 fmt.Println("!!!! closeConnection is blocked, test will hang !!!") 1366 return 1367 } 1368 }() 1369 // Close the route 1370 route.closeConnection(SlowConsumerWriteDeadline) 1371 ch <- true 1372 } 1373 1374 func TestRouteClusterNameConflictBetweenStaticAndDynamic(t *testing.T) { 1375 o1 := DefaultOptions() 1376 o1.Cluster.Name = "AAAAAAAAAAAAAAAAAAAA" // make it alphabetically the "smallest" 1377 s1 := RunServer(o1) 1378 defer s1.Shutdown() 1379 1380 o2 := DefaultOptions() 1381 o2.Cluster.Name = "" // intentional, let it be assigned dynamically 1382 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port)) 1383 s2 := RunServer(o2) 1384 defer s2.Shutdown() 1385 1386 checkClusterFormed(t, s1, s2) 1387 } 1388 1389 type testRouteResolver struct{} 1390 1391 func (r *testRouteResolver) LookupHost(ctx context.Context, host string) ([]string, error) { 1392 return []string{"127.0.0.1", "other.host.in.cluster"}, nil 1393 } 1394 1395 type routeHostLookupLogger struct { 1396 DummyLogger 1397 errCh chan string 1398 ch chan bool 1399 count int 1400 } 1401 1402 func (l *routeHostLookupLogger) Debugf(format string, v ...any) { 1403 l.Lock() 1404 defer l.Unlock() 1405 msg := fmt.Sprintf(format, v...) 1406 if strings.Contains(msg, "127.0.0.1:1234") { 1407 l.errCh <- msg 1408 } else if strings.Contains(msg, "other.host.in.cluster") { 1409 if l.count++; l.count == 10 { 1410 l.ch <- true 1411 } 1412 } 1413 } 1414 1415 func TestRouteIPResolutionAndRouteToSelf(t *testing.T) { 1416 o := DefaultOptions() 1417 o.Cluster.Port = 1234 1418 r := &testRouteResolver{} 1419 o.Cluster.resolver = r 1420 o.Routes = RoutesFromStr("nats://routehost:1234") 1421 o.Debug = true 1422 o.NoLog = false 1423 s, err := NewServer(o) 1424 if err != nil { 1425 t.Fatalf("Error creating server: %v", err) 1426 } 1427 defer s.Shutdown() 1428 l := &routeHostLookupLogger{errCh: make(chan string, 1), ch: make(chan bool, 1)} 1429 s.SetLogger(l, true, true) 1430 s.Start() 1431 if err := s.readyForConnections(time.Second); err != nil { 1432 t.Fatal(err) 1433 } 1434 1435 select { 1436 case e := <-l.errCh: 1437 t.Fatalf("Unexpected trace: %q", e) 1438 case <-l.ch: 1439 // Ok 1440 return 1441 } 1442 } 1443 1444 func TestRouteDuplicateServerName(t *testing.T) { 1445 o := DefaultOptions() 1446 o.ServerName = "A" 1447 s := RunServer(o) 1448 defer s.Shutdown() 1449 1450 l := &captureErrorLogger{errCh: make(chan string, 1)} 1451 s.SetLogger(l, false, false) 1452 1453 o2 := DefaultOptions() 1454 // Set the same server name on purpose 1455 o2.ServerName = "A" 1456 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o.Cluster.Port)) 1457 s2 := RunServer(o2) 1458 defer s2.Shutdown() 1459 1460 // This is an error now so can't wait on cluster formed. 1461 select { 1462 case w := <-l.errCh: 1463 if !strings.Contains(w, "Remote server has a duplicate name") { 1464 t.Fatalf("Expected warning about same name, got %q", w) 1465 } 1466 case <-time.After(5 * time.Second): 1467 t.Fatal("Should have gotten a warning regarding duplicate server name") 1468 } 1469 } 1470 1471 func TestRouteLockReleasedOnTLSFailure(t *testing.T) { 1472 o1 := DefaultOptions() 1473 o1.Cluster.Name = "abc" 1474 o1.Cluster.Host = "127.0.0.1" 1475 o1.Cluster.Port = -1 1476 o1.Cluster.TLSTimeout = 0.25 1477 tc := &TLSConfigOpts{ 1478 CertFile: "./configs/certs/server.pem", 1479 KeyFile: "./configs/certs/key.pem", 1480 Insecure: true, 1481 } 1482 tlsConf, err := GenTLSConfig(tc) 1483 if err != nil { 1484 t.Fatalf("Error generating tls config: %v", err) 1485 } 1486 o1.Cluster.TLSConfig = tlsConf 1487 s1 := RunServer(o1) 1488 defer s1.Shutdown() 1489 1490 l := &captureErrorLogger{errCh: make(chan string, 10)} 1491 s1.SetLogger(l, false, false) 1492 1493 o2 := DefaultOptions() 1494 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port)) 1495 s2 := RunServer(o2) 1496 defer s2.Shutdown() 1497 1498 select { 1499 case err := <-l.errCh: 1500 if !strings.Contains(err, "TLS") { 1501 t.Fatalf("Unexpected error: %v", err) 1502 } 1503 case <-time.After(time.Second): 1504 } 1505 1506 s2.Shutdown() 1507 1508 // Wait for longer than the TLS timeout and check that tlsTimeout is not stuck 1509 time.Sleep(500 * time.Millisecond) 1510 1511 buf := make([]byte, 10000) 1512 n := runtime.Stack(buf, true) 1513 if bytes.Contains(buf[:n], []byte("tlsTimeout")) { 1514 t.Fatal("Seem connection lock was not released") 1515 } 1516 } 1517 1518 type localhostResolver struct{} 1519 1520 func (r *localhostResolver) LookupHost(ctx context.Context, host string) ([]string, error) { 1521 return []string{"127.0.0.1"}, nil 1522 } 1523 1524 func TestTLSRoutesCertificateImplicitAllowPass(t *testing.T) { 1525 testTLSRoutesCertificateImplicitAllow(t, true) 1526 } 1527 1528 func TestTLSRoutesCertificateImplicitAllowFail(t *testing.T) { 1529 testTLSRoutesCertificateImplicitAllow(t, false) 1530 } 1531 1532 func testTLSRoutesCertificateImplicitAllow(t *testing.T, pass bool) { 1533 // Base config for the servers 1534 cfg := createTempFile(t, "cfg") 1535 cfg.WriteString(fmt.Sprintf(` 1536 cluster { 1537 tls { 1538 cert_file = "../test/configs/certs/tlsauth/server.pem" 1539 key_file = "../test/configs/certs/tlsauth/server-key.pem" 1540 ca_file = "../test/configs/certs/tlsauth/ca.pem" 1541 verify_cert_and_check_known_urls = true 1542 insecure = %t 1543 timeout = 1 1544 } 1545 } 1546 `, !pass)) // set insecure to skip verification on the outgoing end 1547 if err := cfg.Sync(); err != nil { 1548 t.Fatal(err) 1549 } 1550 cfg.Close() 1551 1552 optsA := LoadConfig(cfg.Name()) 1553 optsB := LoadConfig(cfg.Name()) 1554 1555 routeURLs := "nats://localhost:9935, nats://localhost:9936" 1556 if !pass { 1557 routeURLs = "nats://127.0.0.1:9935, nats://127.0.0.1:9936" 1558 } 1559 optsA.Host = "127.0.0.1" 1560 optsA.Port = 9335 1561 optsA.Cluster.Name = "xyz" 1562 optsA.Cluster.Host = optsA.Host 1563 optsA.Cluster.Port = 9935 1564 optsA.Cluster.resolver = &localhostResolver{} 1565 optsA.Routes = RoutesFromStr(routeURLs) 1566 optsA.NoSystemAccount = true 1567 srvA := RunServer(optsA) 1568 defer srvA.Shutdown() 1569 1570 optsB.Host = "127.0.0.1" 1571 optsB.Port = 9336 1572 optsB.Cluster.Name = "xyz" 1573 optsB.Cluster.Host = optsB.Host 1574 optsB.Cluster.Port = 9936 1575 optsB.Cluster.resolver = &localhostResolver{} 1576 optsB.Routes = RoutesFromStr(routeURLs) 1577 optsB.NoSystemAccount = true 1578 srvB := RunServer(optsB) 1579 defer srvB.Shutdown() 1580 1581 if pass { 1582 checkNumRoutes(t, srvA, DEFAULT_ROUTE_POOL_SIZE) 1583 checkNumRoutes(t, srvB, DEFAULT_ROUTE_POOL_SIZE) 1584 } else { 1585 time.Sleep(1 * time.Second) // the fail case uses the IP, so a short wait is sufficient 1586 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 1587 if srvA.NumRoutes() != 0 || srvB.NumRoutes() != 0 { 1588 return fmt.Errorf("No route connection expected") 1589 } 1590 return nil 1591 }) 1592 } 1593 } 1594 1595 func TestSubjectRenameViaJetStreamAck(t *testing.T) { 1596 s := RunRandClientPortServer(t) 1597 defer s.Shutdown() 1598 errChan := make(chan error) 1599 defer close(errChan) 1600 ncPub := natsConnect(t, s.ClientURL(), nats.UserInfo("client", "pwd"), 1601 nats.ErrorHandler(func(conn *nats.Conn, s *nats.Subscription, err error) { 1602 errChan <- err 1603 })) 1604 defer ncPub.Close() 1605 require_NoError(t, ncPub.PublishRequest("SVC.ALLOWED", "$JS.ACK.whatever@ADMIN", nil)) 1606 select { 1607 case err := <-errChan: 1608 require_Contains(t, err.Error(), "Permissions Violation for Publish with Reply of") 1609 case <-time.After(time.Second): 1610 t.Fatalf("Expected error") 1611 } 1612 } 1613 1614 func TestClusterQueueGroupWeightTrackingLeak(t *testing.T) { 1615 o := DefaultOptions() 1616 o.ServerName = "A" 1617 s := RunServer(o) 1618 defer s.Shutdown() 1619 1620 o2 := DefaultOptions() 1621 o2.ServerName = "B" 1622 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o.Cluster.Port)) 1623 s2 := RunServer(o2) 1624 defer s2.Shutdown() 1625 1626 nc := natsConnect(t, s.ClientURL()) 1627 defer nc.Close() 1628 1629 // Create a queue subscription 1630 sub := natsQueueSubSync(t, nc, "foo", "bar") 1631 1632 // Check on s0 that we have the proper queue weight info 1633 acc := s.GlobalAccount() 1634 1635 check := func(present bool, expected int32) { 1636 t.Helper() 1637 checkFor(t, time.Second, 15*time.Millisecond, func() error { 1638 acc.mu.RLock() 1639 v, ok := acc.lqws["foo bar"] 1640 acc.mu.RUnlock() 1641 if present { 1642 if !ok { 1643 return fmt.Errorf("the key is not present") 1644 } 1645 if v != expected { 1646 return fmt.Errorf("lqws doest not contain expected value of %v: %v", expected, v) 1647 } 1648 } else if ok { 1649 return fmt.Errorf("the key is present with value %v and should not be", v) 1650 } 1651 return nil 1652 }) 1653 } 1654 check(true, 1) 1655 1656 // Now unsub, and it should be removed, not just be 0 1657 sub.Unsubscribe() 1658 check(false, 0) 1659 1660 // Still make sure that the subject interest is gone from both servers. 1661 checkSubGone := func(s *Server) { 1662 t.Helper() 1663 checkFor(t, time.Second, 15*time.Millisecond, func() error { 1664 acc := s.GlobalAccount() 1665 acc.mu.RLock() 1666 res := acc.sl.Match("foo") 1667 acc.mu.RUnlock() 1668 if res != nil && len(res.qsubs) > 0 { 1669 return fmt.Errorf("Found queue sub on foo for server %v", s) 1670 } 1671 return nil 1672 }) 1673 } 1674 checkSubGone(s) 1675 checkSubGone(s2) 1676 } 1677 1678 type testRouteReconnectLogger struct { 1679 DummyLogger 1680 ch chan string 1681 } 1682 1683 func (l *testRouteReconnectLogger) Debugf(format string, v ...any) { 1684 msg := fmt.Sprintf(format, v...) 1685 if strings.Contains(msg, "Trying to connect to route") { 1686 select { 1687 case l.ch <- msg: 1688 default: 1689 } 1690 } 1691 } 1692 1693 func TestRouteSolicitedReconnectsEvenIfImplicit(t *testing.T) { 1694 o1 := DefaultOptions() 1695 o1.ServerName = "A" 1696 s1 := RunServer(o1) 1697 defer s1.Shutdown() 1698 1699 o2 := DefaultOptions() 1700 o2.ServerName = "B" 1701 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port)) 1702 // Not strictly required to reconnect, but if the reconnect were to fail for any reason 1703 // then the server would retry only once and then stops. So set it to some higher value 1704 // and then we will check that the server does not try more than that. 1705 o2.Cluster.ConnectRetries = 3 1706 s2 := RunServer(o2) 1707 defer s2.Shutdown() 1708 1709 o3 := DefaultOptions() 1710 o3.ServerName = "C" 1711 o3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port)) 1712 o3.Cluster.ConnectRetries = 3 1713 s3 := RunServer(o3) 1714 defer s3.Shutdown() 1715 1716 checkClusterFormed(t, s1, s2, s3) 1717 1718 s2.mu.Lock() 1719 s2.forEachRoute(func(r *client) { 1720 r.mu.Lock() 1721 // Close the route between S2 and S3 (that do not have explicit route to each other) 1722 if r.route.remoteID == s3.ID() { 1723 r.nc.Close() 1724 } 1725 r.mu.Unlock() 1726 }) 1727 s2.mu.Unlock() 1728 // Wait a bit to make sure that we don't check for cluster formed too soon (need to make 1729 // sure that connection is really removed and reconnect mechanism starts). 1730 time.Sleep(500 * time.Millisecond) 1731 checkClusterFormed(t, s1, s2, s3) 1732 1733 // Now shutdown server 3 and make sure that s2 stops trying to reconnect to s3 at one point 1734 l := &testRouteReconnectLogger{ch: make(chan string, 10)} 1735 s2.SetLogger(l, true, false) 1736 s3.Shutdown() 1737 // S2 should retry ConnectRetries+1 times and then stop 1738 // Take into account default route pool size and system account dedicated route 1739 for i := 0; i < (DEFAULT_ROUTE_POOL_SIZE+1)*(o2.Cluster.ConnectRetries+1); i++ { 1740 select { 1741 case <-l.ch: 1742 case <-time.After(2 * time.Second): 1743 t.Fatal("Did not attempt to reconnect") 1744 } 1745 } 1746 // Now it should have stopped (in tests, reconnect delay is down to 15ms, so we don't need 1747 // to wait for too long). 1748 select { 1749 case msg := <-l.ch: 1750 t.Fatalf("Unexpected attempt to reconnect: %s", msg) 1751 case <-time.After(50 * time.Millisecond): 1752 // OK 1753 } 1754 } 1755 1756 func TestRouteSaveTLSName(t *testing.T) { 1757 c1Conf := createConfFile(t, []byte(` 1758 port: -1 1759 cluster { 1760 name: "abc" 1761 port: -1 1762 pool_size: -1 1763 tls { 1764 cert_file: '../test/configs/certs/server-noip.pem' 1765 key_file: '../test/configs/certs/server-key-noip.pem' 1766 ca_file: '../test/configs/certs/ca.pem' 1767 } 1768 } 1769 `)) 1770 s1, o1 := RunServerWithConfig(c1Conf) 1771 defer s1.Shutdown() 1772 1773 tmpl := ` 1774 port: -1 1775 cluster { 1776 name: "abc" 1777 port: -1 1778 pool_size: -1 1779 routes: ["nats://%s:%d"] 1780 tls { 1781 cert_file: '../test/configs/certs/server-noip.pem' 1782 key_file: '../test/configs/certs/server-key-noip.pem' 1783 ca_file: '../test/configs/certs/ca.pem' 1784 } 1785 } 1786 ` 1787 c2And3Conf := createConfFile(t, []byte(fmt.Sprintf(tmpl, "localhost", o1.Cluster.Port))) 1788 s2, _ := RunServerWithConfig(c2And3Conf) 1789 defer s2.Shutdown() 1790 1791 checkClusterFormed(t, s1, s2) 1792 1793 s3, _ := RunServerWithConfig(c2And3Conf) 1794 defer s3.Shutdown() 1795 1796 checkClusterFormed(t, s1, s2, s3) 1797 1798 reloadUpdateConfig(t, s2, c2And3Conf, fmt.Sprintf(tmpl, "127.0.0.1", o1.Cluster.Port)) 1799 1800 s2.mu.RLock() 1801 s2.forEachRoute(func(r *client) { 1802 r.mu.Lock() 1803 if r.route.routeType == Implicit { 1804 r.nc.Close() 1805 } 1806 r.mu.Unlock() 1807 }) 1808 s2.mu.RUnlock() 1809 1810 checkClusterFormed(t, s1, s2, s3) 1811 1812 // Set a logger to capture errors trying to connect after clearing 1813 // the routeTLSName and causing a disconnect 1814 l := &captureErrorLogger{errCh: make(chan string, 1)} 1815 s2.SetLogger(l, false, false) 1816 1817 var gotIt bool 1818 for i := 0; !gotIt && i < 5; i++ { 1819 s2.mu.Lock() 1820 s2.routeTLSName = _EMPTY_ 1821 s2.forEachRoute(func(r *client) { 1822 r.mu.Lock() 1823 if r.route.routeType == Implicit { 1824 r.nc.Close() 1825 } 1826 r.mu.Unlock() 1827 }) 1828 s2.mu.Unlock() 1829 select { 1830 case <-l.errCh: 1831 gotIt = true 1832 case <-time.After(time.Second): 1833 // Try again 1834 } 1835 } 1836 if !gotIt { 1837 t.Fatal("Did not get the handshake error") 1838 } 1839 1840 // Now get back to localhost in config and reload config and 1841 // it should start to work again. 1842 reloadUpdateConfig(t, s2, c2And3Conf, fmt.Sprintf(tmpl, "localhost", o1.Cluster.Port)) 1843 checkClusterFormed(t, s1, s2, s3) 1844 } 1845 1846 func TestRoutePoolAndPerAccountErrors(t *testing.T) { 1847 conf := createConfFile(t, []byte(` 1848 port: -1 1849 cluster { 1850 port: -1 1851 accounts: ["abc", "def", "abc"] 1852 } 1853 `)) 1854 o := LoadConfig(conf) 1855 if _, err := NewServer(o); err == nil || !strings.Contains(err.Error(), "duplicate") { 1856 t.Fatalf("Expected error about duplicate, got %v", err) 1857 } 1858 1859 conf1 := createConfFile(t, []byte(` 1860 port: -1 1861 accounts { 1862 abc { users: [{user:abc, password: pwd}] } 1863 def { users: [{user:def, password: pwd}] } 1864 } 1865 cluster { 1866 port: -1 1867 name: "local" 1868 accounts: ["abc"] 1869 } 1870 `)) 1871 s1, o1 := RunServerWithConfig(conf1) 1872 defer s1.Shutdown() 1873 1874 l := &captureErrorLogger{errCh: make(chan string, 10)} 1875 s1.SetLogger(l, false, false) 1876 1877 conf2 := createConfFile(t, []byte(fmt.Sprintf(` 1878 port: -1 1879 accounts { 1880 abc { users: [{user:abc, password: pwd}] } 1881 def { users: [{user:def, password: pwd}] } 1882 } 1883 cluster { 1884 port: -1 1885 name: "local" 1886 routes: ["nats://127.0.0.1:%d"] 1887 accounts: ["def"] 1888 } 1889 `, o1.Cluster.Port))) 1890 s2, _ := RunServerWithConfig(conf2) 1891 defer s2.Shutdown() 1892 1893 for i := 0; i < 2; i++ { 1894 select { 1895 case e := <-l.errCh: 1896 if !strings.Contains(e, "No route for account \"def\"") { 1897 t.Fatalf("Expected error about no route for account, got %v", e) 1898 } 1899 case <-time.After(2 * time.Second): 1900 t.Fatalf("Did not get expected error regarding no route for account") 1901 } 1902 time.Sleep(DEFAULT_ROUTE_RECONNECT + 100*time.Millisecond) 1903 } 1904 1905 s2.Shutdown() 1906 s1.Shutdown() 1907 1908 conf1 = createConfFile(t, []byte(` 1909 port: -1 1910 cluster { 1911 port: -1 1912 name: "local" 1913 pool_size: 5 1914 } 1915 `)) 1916 s1, o1 = RunServerWithConfig(conf1) 1917 defer s1.Shutdown() 1918 1919 l = &captureErrorLogger{errCh: make(chan string, 10)} 1920 s1.SetLogger(l, false, false) 1921 1922 conf2 = createConfFile(t, []byte(fmt.Sprintf(` 1923 port: -1 1924 cluster { 1925 port: -1 1926 name: "local" 1927 routes: ["nats://127.0.0.1:%d"] 1928 pool_size: 3 1929 } 1930 `, o1.Cluster.Port))) 1931 s2, _ = RunServerWithConfig(conf2) 1932 defer s2.Shutdown() 1933 1934 for i := 0; i < 2; i++ { 1935 select { 1936 case e := <-l.errCh: 1937 if !strings.Contains(e, "Mismatch route pool size") { 1938 t.Fatalf("Expected error about pool size mismatch, got %v", e) 1939 } 1940 case <-time.After(2 * time.Second): 1941 t.Fatalf("Did not get expected error regarding mismatch pool size") 1942 } 1943 time.Sleep(DEFAULT_ROUTE_RECONNECT + 100*time.Millisecond) 1944 } 1945 } 1946 1947 func TestRoutePool(t *testing.T) { 1948 tmpl := ` 1949 port: -1 1950 accounts { 1951 A { users: [{user: "a", password: "a"}] } 1952 B { users: [{user: "b", password: "b"}] } 1953 } 1954 cluster { 1955 port: -1 1956 name: "local" 1957 %s 1958 pool_size: 2 1959 } 1960 ` 1961 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, _EMPTY_))) 1962 s1, o1 := RunServerWithConfig(conf1) 1963 defer s1.Shutdown() 1964 1965 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, 1966 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 1967 s2, _ := RunServerWithConfig(conf2) 1968 defer s2.Shutdown() 1969 1970 checkClusterFormed(t, s1, s2) 1971 1972 checkRoutePoolIdx := func(s *Server, accName string, expected int) { 1973 t.Helper() 1974 a, err := s.LookupAccount(accName) 1975 require_NoError(t, err) 1976 require_True(t, a != nil) 1977 a.mu.RLock() 1978 rpi := a.routePoolIdx 1979 a.mu.RUnlock() 1980 require_True(t, rpi == expected) 1981 } 1982 checkRoutePoolIdx(s1, "A", 0) 1983 checkRoutePoolIdx(s2, "A", 0) 1984 checkRoutePoolIdx(s2, "B", 1) 1985 checkRoutePoolIdx(s2, "B", 1) 1986 1987 sendAndRecv := func(acc, user, pwd string) { 1988 t.Helper() 1989 s2nc := natsConnect(t, s2.ClientURL(), nats.UserInfo(user, pwd)) 1990 defer s2nc.Close() 1991 1992 sub := natsSubSync(t, s2nc, "foo") 1993 natsFlush(t, s2nc) 1994 1995 s1nc := natsConnect(t, s1.ClientURL(), nats.UserInfo(user, pwd)) 1996 defer s1nc.Close() 1997 1998 checkSubInterest(t, s1, acc, "foo", time.Second) 1999 2000 for i := 0; i < 1000; i++ { 2001 natsPub(t, s1nc, "foo", []byte("hello")) 2002 } 2003 for i := 0; i < 1000; i++ { 2004 natsNexMsg(t, sub, time.Second) 2005 } 2006 // Make sure we don't receive more 2007 if msg, err := sub.NextMsg(150 * time.Millisecond); err == nil { 2008 t.Fatalf("Unexpected message: %+v", msg) 2009 } 2010 } 2011 2012 sendAndRecv("A", "a", "a") 2013 sendAndRecv("B", "b", "b") 2014 2015 checkStats := func(s *Server, isOut bool) { 2016 t.Helper() 2017 s.mu.RLock() 2018 defer s.mu.RUnlock() 2019 for _, conns := range s.routes { 2020 for i, r := range conns { 2021 r.mu.Lock() 2022 if isOut { 2023 if v := r.stats.outMsgs; v < 1000 { 2024 r.mu.Unlock() 2025 t.Fatalf("Expected at least 1000 in out msgs for route %v, got %v", i+1, v) 2026 } 2027 } else { 2028 if v := r.stats.inMsgs; v < 1000 { 2029 r.mu.Unlock() 2030 t.Fatalf("Expected at least 1000 in msgs for route %v, got %v", i+1, v) 2031 } 2032 } 2033 r.mu.Unlock() 2034 } 2035 } 2036 } 2037 checkStats(s1, true) 2038 checkStats(s2, false) 2039 2040 disconnectRoute := func(s *Server, idx int) { 2041 t.Helper() 2042 attempts := 0 2043 TRY_AGAIN: 2044 s.mu.RLock() 2045 for _, conns := range s.routes { 2046 for i, r := range conns { 2047 if i != idx { 2048 continue 2049 } 2050 if r != nil { 2051 r.mu.Lock() 2052 nc := r.nc 2053 r.mu.Unlock() 2054 if nc == nil { 2055 s.mu.RUnlock() 2056 if attempts++; attempts < 10 { 2057 time.Sleep(250 * time.Millisecond) 2058 goto TRY_AGAIN 2059 } 2060 t.Fatalf("Route %v net.Conn is nil", i) 2061 } 2062 nc.Close() 2063 } else { 2064 s.mu.RUnlock() 2065 if attempts++; attempts < 10 { 2066 time.Sleep(250 * time.Millisecond) 2067 goto TRY_AGAIN 2068 } 2069 t.Fatalf("Route %v connection is nil", i) 2070 } 2071 } 2072 } 2073 s.mu.RUnlock() 2074 time.Sleep(250 * time.Millisecond) 2075 checkClusterFormed(t, s1, s2) 2076 } 2077 disconnectRoute(s1, 0) 2078 disconnectRoute(s2, 1) 2079 } 2080 2081 func TestRoutePoolConnectRace(t *testing.T) { 2082 for _, test := range []struct { 2083 name string 2084 poolSize int 2085 }{ 2086 {"no pool", -1}, 2087 {"pool size 1", 1}, 2088 {"pool size 5", 5}, 2089 } { 2090 t.Run(test.name, func(t *testing.T) { 2091 // This test will have each server point to each other and that is causing 2092 // each one to attempt to connect routes to each other which should lead 2093 // to connections needing to be dropped. We make sure that there is still 2094 // resolution and there is the expected number of routes. 2095 createSrv := func(name string, port int) *Server { 2096 o := DefaultOptions() 2097 o.Port = -1 2098 o.ServerName = name 2099 o.Cluster.PoolSize = test.poolSize 2100 o.Cluster.Name = "local" 2101 o.Cluster.Port = port 2102 o.Routes = RoutesFromStr("nats://127.0.0.1:1234,nats://127.0.0.1:1235,nats://127.0.0.1:1236") 2103 s, err := NewServer(o) 2104 if err != nil { 2105 t.Fatalf("Error creating server: %v", err) 2106 } 2107 return s 2108 } 2109 s1 := createSrv("A", 1234) 2110 s2 := createSrv("B", 1235) 2111 s3 := createSrv("C", 1236) 2112 2113 l := &captureDebugLogger{dbgCh: make(chan string, 100)} 2114 s1.SetLogger(l, true, false) 2115 2116 servers := []*Server{s1, s2, s3} 2117 2118 for _, s := range servers { 2119 go s.Start() 2120 defer s.Shutdown() 2121 } 2122 2123 checkClusterFormed(t, s1, s2, s3) 2124 2125 for done, duplicate := false, 0; !done; { 2126 select { 2127 case e := <-l.dbgCh: 2128 if strings.Contains(e, "duplicate") { 2129 if duplicate++; duplicate > 20 { 2130 t.Fatalf("Routes are constantly reconnecting: %v", e) 2131 } 2132 } 2133 case <-time.After(DEFAULT_ROUTE_RECONNECT + 250*time.Millisecond): 2134 // More than reconnect and some, and no reconnect, so we are good. 2135 done = true 2136 } 2137 } 2138 2139 // Also, check that they all report as solicited and configured in monitoring. 2140 for _, s := range servers { 2141 routes, err := s.Routez(nil) 2142 require_NoError(t, err) 2143 for _, r := range routes.Routes { 2144 if !r.DidSolicit { 2145 t.Fatalf("All routes should have been marked as solicited, this one was not: %+v", r) 2146 } 2147 if !r.IsConfigured { 2148 t.Fatalf("All routes should have been marked as configured, this one was not: %+v", r) 2149 } 2150 } 2151 } 2152 2153 for _, s := range servers { 2154 s.Shutdown() 2155 s.WaitForShutdown() 2156 } 2157 }) 2158 } 2159 } 2160 2161 func TestRoutePoolRouteStoredSameIndexBothSides(t *testing.T) { 2162 tmpl := ` 2163 port: -1 2164 accounts { 2165 A { users: [{user: "a", password: "a"}] } 2166 B { users: [{user: "b", password: "b"}] } 2167 C { users: [{user: "c", password: "c"}] } 2168 D { users: [{user: "d", password: "d"}] } 2169 } 2170 cluster { 2171 port: -1 2172 name: "local" 2173 %s 2174 pool_size: 4 2175 } 2176 no_sys_acc: true 2177 ` 2178 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, _EMPTY_))) 2179 s1, o1 := RunServerWithConfig(conf1) 2180 defer s1.Shutdown() 2181 2182 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, 2183 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 2184 s2, _ := RunServerWithConfig(conf2) 2185 defer s2.Shutdown() 2186 2187 for i := 0; i < 20; i++ { 2188 checkClusterFormed(t, s1, s2) 2189 2190 collect := func(s *Server, checkRemoteAddr bool) []string { 2191 addrs := make([]string, 0, 4) 2192 s.mu.RLock() 2193 s.forEachRoute(func(r *client) { 2194 var addr string 2195 r.mu.Lock() 2196 if r.nc != nil { 2197 if checkRemoteAddr { 2198 addr = r.nc.RemoteAddr().String() 2199 } else { 2200 addr = r.nc.LocalAddr().String() 2201 } 2202 addrs = append(addrs, addr) 2203 } 2204 r.mu.Unlock() 2205 }) 2206 s.mu.RUnlock() 2207 return addrs 2208 } 2209 2210 addrsS1 := collect(s1, true) 2211 addrsS2 := collect(s2, false) 2212 if len(addrsS1) != 4 || len(addrsS2) != 4 { 2213 // It could be that connections were not ready (r.nc is nil in collect()) 2214 // if that is the case, try again. 2215 i-- 2216 continue 2217 } 2218 2219 if !reflect.DeepEqual(addrsS1, addrsS2) { 2220 t.Fatalf("Connections not stored at same index:\ns1=%v\ns2=%v", addrsS1, addrsS2) 2221 } 2222 2223 s1.mu.RLock() 2224 s1.forEachRoute(func(r *client) { 2225 r.mu.Lock() 2226 if r.nc != nil { 2227 r.nc.Close() 2228 } 2229 r.mu.Unlock() 2230 }) 2231 s1.mu.RUnlock() 2232 } 2233 } 2234 2235 type captureRMsgTrace struct { 2236 DummyLogger 2237 sync.Mutex 2238 traces *bytes.Buffer 2239 out []string 2240 } 2241 2242 func (l *captureRMsgTrace) Tracef(format string, args ...any) { 2243 l.Lock() 2244 defer l.Unlock() 2245 msg := fmt.Sprintf(format, args...) 2246 if strings.Contains(msg, "[RMSG ") { 2247 l.traces.WriteString(msg) 2248 l.out = append(l.out, msg) 2249 } 2250 } 2251 2252 func TestRoutePerAccount(t *testing.T) { 2253 2254 akp1, _ := nkeys.CreateAccount() 2255 acc1, _ := akp1.PublicKey() 2256 2257 akp2, _ := nkeys.CreateAccount() 2258 acc2, _ := akp2.PublicKey() 2259 2260 tmpl := ` 2261 port: -1 2262 accounts { 2263 %s { users: [{user: "a", password: "a"}] } 2264 %s { users: [{user: "b", password: "b"}] } 2265 } 2266 cluster { 2267 port: -1 2268 name: "local" 2269 %s 2270 accounts: ["%s"] 2271 } 2272 ` 2273 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, acc1, acc2, _EMPTY_, acc2))) 2274 s1, o1 := RunServerWithConfig(conf1) 2275 defer s1.Shutdown() 2276 2277 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, 2278 acc1, acc2, 2279 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port), 2280 acc2))) 2281 s2, _ := RunServerWithConfig(conf2) 2282 defer s2.Shutdown() 2283 2284 l := &captureRMsgTrace{traces: &bytes.Buffer{}} 2285 s2.SetLogger(l, false, true) 2286 2287 checkClusterFormed(t, s1, s2) 2288 2289 disconnectRoute := func(s *Server) { 2290 t.Helper() 2291 attempts := 0 2292 TRY_AGAIN: 2293 s.mu.RLock() 2294 if conns, ok := s.accRoutes[acc2]; ok { 2295 for _, r := range conns { 2296 if r != nil { 2297 r.mu.Lock() 2298 nc := r.nc 2299 r.mu.Unlock() 2300 if nc == nil { 2301 s.mu.RUnlock() 2302 if attempts++; attempts < 10 { 2303 time.Sleep(250 * time.Millisecond) 2304 goto TRY_AGAIN 2305 } 2306 t.Fatal("Route net.Conn is nil") 2307 } 2308 nc.Close() 2309 } else { 2310 s.mu.RUnlock() 2311 if attempts++; attempts < 10 { 2312 time.Sleep(250 * time.Millisecond) 2313 goto TRY_AGAIN 2314 } 2315 t.Fatal("Route connection is nil") 2316 } 2317 } 2318 } 2319 s.mu.RUnlock() 2320 time.Sleep(250 * time.Millisecond) 2321 checkClusterFormed(t, s1, s2) 2322 } 2323 disconnectRoute(s1) 2324 disconnectRoute(s2) 2325 2326 sendAndRecv := func(acc, user, pwd string) { 2327 t.Helper() 2328 s2nc := natsConnect(t, s2.ClientURL(), nats.UserInfo(user, pwd)) 2329 defer s2nc.Close() 2330 2331 sub := natsSubSync(t, s2nc, "foo") 2332 natsFlush(t, s2nc) 2333 2334 s1nc := natsConnect(t, s1.ClientURL(), nats.UserInfo(user, pwd)) 2335 defer s1nc.Close() 2336 2337 checkSubInterest(t, s1, acc, "foo", time.Second) 2338 2339 for i := 0; i < 10; i++ { 2340 natsPub(t, s1nc, "foo", []byte("hello")) 2341 } 2342 for i := 0; i < 10; i++ { 2343 natsNexMsg(t, sub, time.Second) 2344 } 2345 // Make sure we don't receive more 2346 if msg, err := sub.NextMsg(150 * time.Millisecond); err == nil { 2347 t.Fatalf("Unexpected message: %+v", msg) 2348 } 2349 } 2350 2351 sendAndRecv(acc1, "a", "a") 2352 sendAndRecv(acc2, "b", "b") 2353 2354 l.Lock() 2355 traces := l.traces.String() 2356 out := append([]string(nil), l.out...) 2357 l.Unlock() 2358 // We should not have any "[RMSG <acc2>" 2359 if strings.Contains(traces, fmt.Sprintf("[RMSG %s", acc2)) { 2360 var outStr string 2361 for _, l := range out { 2362 outStr += l + "\r\n" 2363 } 2364 t.Fatalf("Should not have included account %q in protocol, got:\n%s", acc2, outStr) 2365 } 2366 } 2367 2368 func TestRoutePerAccountImplicit(t *testing.T) { 2369 tmpl := ` 2370 port: -1 2371 accounts { 2372 A { users: [{user: "a", password: "a"}] } 2373 B { users: [{user: "b", password: "b"}] } 2374 } 2375 cluster { 2376 port: -1 2377 name: "local" 2378 accounts: ["A"] 2379 %s 2380 } 2381 ` 2382 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, _EMPTY_))) 2383 s1, o1 := RunServerWithConfig(conf1) 2384 defer s1.Shutdown() 2385 2386 conf2And3 := createConfFile(t, []byte(fmt.Sprintf(tmpl, 2387 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 2388 s2, _ := RunServerWithConfig(conf2And3) 2389 defer s2.Shutdown() 2390 2391 checkClusterFormed(t, s1, s2) 2392 2393 s3, _ := RunServerWithConfig(conf2And3) 2394 defer s3.Shutdown() 2395 2396 checkClusterFormed(t, s1, s2, s3) 2397 2398 // On s3, close the per-account routes from s2 2399 s3.mu.RLock() 2400 for _, conns := range s3.accRoutes { 2401 for rem, r := range conns { 2402 if rem != s2.ID() { 2403 continue 2404 } 2405 r.mu.Lock() 2406 if r.nc != nil { 2407 r.nc.Close() 2408 } 2409 r.mu.Unlock() 2410 } 2411 } 2412 s3.mu.RUnlock() 2413 // Wait a bit to make sure there is a disconnect, then check the cluster is ok 2414 time.Sleep(250 * time.Millisecond) 2415 checkClusterFormed(t, s1, s2, s3) 2416 } 2417 2418 func TestRoutePerAccountDefaultForSysAccount(t *testing.T) { 2419 tmpl := ` 2420 port: -1 2421 accounts { 2422 A { users: [{user: "a", password: "a"}] } 2423 B { users: [{user: "b", password: "b"}] } 2424 } 2425 cluster { 2426 port: -1 2427 name: "local" 2428 %s 2429 %s 2430 %s 2431 } 2432 %s 2433 ` 2434 for _, test := range []struct { 2435 name string 2436 accounts string 2437 sysAcc string 2438 noSysAcc bool 2439 }{ 2440 {"default sys no accounts", _EMPTY_, _EMPTY_, false}, 2441 {"default sys in accounts", "accounts: [\"$SYS\"]", _EMPTY_, false}, 2442 {"default sys with other accounts", "accounts: [\"A\",\"$SYS\"]", _EMPTY_, false}, 2443 {"explicit sys no accounts", _EMPTY_, "system_account: B", false}, 2444 {"explicit sys in accounts", "accounts: [\"B\"]", "system_account: B", false}, 2445 {"explicit sys with other accounts", "accounts: [\"B\",\"A\"]", "system_account: B", false}, 2446 {"no system account no accounts", _EMPTY_, "no_sys_acc: true", true}, 2447 {"no system account with accounts", "accounts: [\"A\"]", "no_sys_acc: true", true}, 2448 } { 2449 t.Run(test.name, func(t *testing.T) { 2450 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, _EMPTY_, test.accounts, 2451 _EMPTY_, test.sysAcc))) 2452 s1, o1 := RunServerWithConfig(conf1) 2453 defer s1.Shutdown() 2454 2455 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, _EMPTY_, test.accounts, 2456 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port), test.sysAcc))) 2457 s2, _ := RunServerWithConfig(conf2) 2458 defer s2.Shutdown() 2459 2460 checkClusterFormed(t, s1, s2) 2461 2462 checkSysAccRoute := func(s *Server) { 2463 t.Helper() 2464 var name string 2465 acc := s.SystemAccount() 2466 if test.noSysAcc { 2467 if acc != nil { 2468 t.Fatalf("Should not be any system account, got %q", acc.GetName()) 2469 } 2470 // We will check that there is no accRoutes for the default 2471 // system account name 2472 name = DEFAULT_SYSTEM_ACCOUNT 2473 } else { 2474 acc.mu.RLock() 2475 pi := acc.routePoolIdx 2476 name = acc.Name 2477 acc.mu.RUnlock() 2478 if pi != -1 { 2479 t.Fatalf("System account %q should have route pool index==-1, got %v", name, pi) 2480 } 2481 } 2482 s.mu.RLock() 2483 _, ok := s.accRoutes[name] 2484 s.mu.RUnlock() 2485 if test.noSysAcc { 2486 if ok { 2487 t.Fatalf("System account %q should not have its own route, since NoSystemAccount was specified", name) 2488 } 2489 } else if !ok { 2490 t.Fatalf("System account %q should be present in accRoutes, it was not", name) 2491 } 2492 } 2493 checkSysAccRoute(s1) 2494 checkSysAccRoute(s2) 2495 2496 // Check that this is still the case after a config reload 2497 reloadUpdateConfig(t, s1, conf1, fmt.Sprintf(tmpl, "pool_size: 4", test.accounts, 2498 _EMPTY_, test.sysAcc)) 2499 reloadUpdateConfig(t, s2, conf2, fmt.Sprintf(tmpl, "pool_size: 4", test.accounts, 2500 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port), test.sysAcc)) 2501 2502 checkSysAccRoute(s1) 2503 checkSysAccRoute(s2) 2504 }) 2505 } 2506 } 2507 2508 func TestRoutePerAccountConnectRace(t *testing.T) { 2509 // This test will have each server point to each other and that is causing 2510 // each one to attempt to connect routes to each other which should lead 2511 // to connections needing to be dropped. We make sure that there is still 2512 // resolution and there is the expected number of routes. 2513 createSrv := func(name string, port int) *Server { 2514 o := DefaultOptions() 2515 o.Port = -1 2516 o.ServerName = name 2517 o.Accounts = []*Account{NewAccount("A")} 2518 o.NoSystemAccount = true 2519 o.Cluster.PoolSize = 1 2520 o.Cluster.PinnedAccounts = []string{"A"} 2521 o.Cluster.Name = "local" 2522 o.Cluster.Port = port 2523 o.Routes = RoutesFromStr("nats://127.0.0.1:1234,nats://127.0.0.1:1235,nats://127.0.0.1:1236") 2524 s, err := NewServer(o) 2525 if err != nil { 2526 t.Fatalf("Error creating server: %v", err) 2527 } 2528 return s 2529 } 2530 s1 := createSrv("A", 1234) 2531 s2 := createSrv("B", 1235) 2532 s3 := createSrv("C", 1236) 2533 2534 l := &captureDebugLogger{dbgCh: make(chan string, 100)} 2535 s1.SetLogger(l, true, false) 2536 2537 servers := []*Server{s1, s2, s3} 2538 2539 for _, s := range servers { 2540 go s.Start() 2541 defer s.Shutdown() 2542 } 2543 2544 checkClusterFormed(t, s1, s2, s3) 2545 2546 for done, duplicate := false, 0; !done; { 2547 select { 2548 case e := <-l.dbgCh: 2549 if strings.Contains(e, "duplicate") { 2550 if duplicate++; duplicate > 10 { 2551 t.Fatalf("Routes are constantly reconnecting: %v", e) 2552 } 2553 } 2554 case <-time.After(DEFAULT_ROUTE_RECONNECT + 250*time.Millisecond): 2555 // More than reconnect and some, and no reconnect, so we are good. 2556 done = true 2557 } 2558 } 2559 2560 // Also, check that they all report as solicited and configured in monitoring. 2561 for _, s := range servers { 2562 routes, err := s.Routez(nil) 2563 require_NoError(t, err) 2564 for _, r := range routes.Routes { 2565 if !r.DidSolicit { 2566 t.Fatalf("All routes should have been marked as solicited, this one was not: %+v", r) 2567 } 2568 if !r.IsConfigured { 2569 t.Fatalf("All routes should have been marked as configured, this one was not: %+v", r) 2570 } 2571 } 2572 } 2573 2574 for _, s := range servers { 2575 s.Shutdown() 2576 s.WaitForShutdown() 2577 } 2578 } 2579 2580 func TestRoutePerAccountGossipWorks(t *testing.T) { 2581 tmplA := ` 2582 port: -1 2583 server_name: "A" 2584 accounts { 2585 A { users: [{user: "A", password: "pwd"}] } 2586 B { users: [{user: "B", password: "pwd"}] } 2587 } 2588 cluster { 2589 port: %d 2590 name: "local" 2591 accounts: ["A"] 2592 %s 2593 } 2594 ` 2595 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmplA, -1, _EMPTY_))) 2596 s1, o1 := RunServerWithConfig(conf1) 2597 defer s1.Shutdown() 2598 2599 tmplBC := ` 2600 port: -1 2601 server_name: "%s" 2602 accounts { 2603 A { users: [{user: "A", password: "pwd"}] } 2604 B { users: [{user: "B", password: "pwd"}] } 2605 } 2606 cluster { 2607 port: -1 2608 name: "local" 2609 %s 2610 accounts: ["A"] 2611 } 2612 ` 2613 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmplBC, "B", 2614 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 2615 s2, _ := RunServerWithConfig(conf2) 2616 defer s2.Shutdown() 2617 2618 checkClusterFormed(t, s1, s2) 2619 2620 // Now connect s3 to s1 and make sure that s2 connects properly to s3. 2621 conf3 := createConfFile(t, []byte(fmt.Sprintf(tmplBC, "C", 2622 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 2623 s3, _ := RunServerWithConfig(conf3) 2624 defer s3.Shutdown() 2625 2626 checkClusterFormed(t, s1, s2, s3) 2627 2628 s3.Shutdown() 2629 s2.Shutdown() 2630 s1.Shutdown() 2631 2632 // Slightly different version where s2 is connecting to s1, while s1 2633 // connects to s3 (and s3 does not solicit connections). 2634 2635 conf1 = createConfFile(t, []byte(fmt.Sprintf(tmplA, -1, _EMPTY_))) 2636 s1, o1 = RunServerWithConfig(conf1) 2637 defer s1.Shutdown() 2638 2639 conf2 = createConfFile(t, []byte(fmt.Sprintf(tmplBC, "B", 2640 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 2641 s2, _ = RunServerWithConfig(conf2) 2642 defer s2.Shutdown() 2643 2644 checkClusterFormed(t, s1, s2) 2645 2646 // Start s3 first that will simply accept connections 2647 conf3 = createConfFile(t, []byte(fmt.Sprintf(tmplBC, "C", _EMPTY_))) 2648 s3, o3 := RunServerWithConfig(conf3) 2649 defer s3.Shutdown() 2650 2651 // Now config reload s1 so that it points to s3. 2652 reloadUpdateConfig(t, s1, conf1, 2653 fmt.Sprintf(tmplA, o1.Cluster.Port, fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o3.Cluster.Port))) 2654 2655 checkClusterFormed(t, s1, s2, s3) 2656 } 2657 2658 func TestRoutePerAccountGossipWorksWithOldServerNotSeed(t *testing.T) { 2659 tmplA := ` 2660 port: -1 2661 server_name: "A" 2662 accounts { 2663 A { users: [{user: "A", password: "pwd"}] } 2664 B { users: [{user: "B", password: "pwd"}] } 2665 } 2666 cluster { 2667 port: %d 2668 name: "local" 2669 accounts: ["A"] 2670 } 2671 ` 2672 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmplA, -1))) 2673 s1, o1 := RunServerWithConfig(conf1) 2674 defer s1.Shutdown() 2675 2676 // Here, server "B" will have no pooling/accounts. 2677 tmplB := ` 2678 port: -1 2679 server_name: "B" 2680 accounts { 2681 A { users: [{user: "A", password: "pwd"}] } 2682 B { users: [{user: "B", password: "pwd"}] } 2683 } 2684 cluster { 2685 port: -1 2686 name: "local" 2687 routes: ["nats://127.0.0.1:%d"] 2688 pool_size: -1 2689 } 2690 ` 2691 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmplB, o1.Cluster.Port))) 2692 s2, _ := RunServerWithConfig(conf2) 2693 defer s2.Shutdown() 2694 2695 checkClusterFormed(t, s1, s2) 2696 2697 l := &captureErrorLogger{errCh: make(chan string, 100)} 2698 s2.SetLogger(l, false, false) 2699 2700 // Now connect s3 to s1. Server s1 should not gossip to s2 information 2701 // about pinned-account routes or extra routes from the pool. 2702 tmplC := ` 2703 port: -1 2704 server_name: "C" 2705 accounts { 2706 A { users: [{user: "A", password: "pwd"}] } 2707 B { users: [{user: "B", password: "pwd"}] } 2708 } 2709 cluster { 2710 port: -1 2711 name: "local" 2712 routes: ["nats://127.0.0.1:%d"] 2713 accounts: ["A"] 2714 } 2715 ` 2716 conf3 := createConfFile(t, []byte(fmt.Sprintf(tmplC, o1.Cluster.Port))) 2717 s3, _ := RunServerWithConfig(conf3) 2718 defer s3.Shutdown() 2719 2720 checkClusterFormed(t, s1, s2, s3) 2721 2722 // We should not have had s2 try to create dedicated routes for "A" or "$SYS" 2723 tm := time.NewTimer(time.Second) 2724 defer tm.Stop() 2725 for { 2726 select { 2727 case err := <-l.errCh: 2728 if strings.Contains(err, "dedicated route") { 2729 t.Fatalf("Server s2 should not have tried to create a dedicated route: %s", err) 2730 } 2731 case <-tm.C: 2732 return 2733 } 2734 } 2735 } 2736 2737 func TestRoutePerAccountGossipWorksWithOldServerSeed(t *testing.T) { 2738 tmplA := ` 2739 port: -1 2740 server_name: "A" 2741 accounts { 2742 A { users: [{user: "A", password: "pwd"}] } 2743 B { users: [{user: "B", password: "pwd"}] } 2744 } 2745 cluster { 2746 port: %d 2747 name: "local" 2748 pool_size: %d 2749 %s 2750 } 2751 ` 2752 // Start with s1 being an "old" server, which does not support pooling/pinned-accounts. 2753 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmplA, -1, -1, _EMPTY_))) 2754 s1, o1 := RunServerWithConfig(conf1) 2755 defer s1.Shutdown() 2756 2757 tmplBC := ` 2758 port: -1 2759 server_name: "%s" 2760 accounts { 2761 A { users: [{user: "A", password: "pwd"}] } 2762 B { users: [{user: "B", password: "pwd"}] } 2763 } 2764 cluster { 2765 port: -1 2766 name: "local" 2767 pool_size: 3 2768 routes: ["nats://127.0.0.1:%d"] 2769 accounts: ["A"] 2770 } 2771 ` 2772 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmplBC, "B", o1.Cluster.Port))) 2773 s2, _ := RunServerWithConfig(conf2) 2774 defer s2.Shutdown() 2775 2776 checkClusterFormed(t, s1, s2) 2777 2778 // Now connect s3 to s1 and make sure that s2 connects properly to s3. 2779 conf3 := createConfFile(t, []byte(fmt.Sprintf(tmplBC, "C", o1.Cluster.Port))) 2780 s3, _ := RunServerWithConfig(conf3) 2781 defer s3.Shutdown() 2782 2783 checkClusterFormed(t, s1, s2, s3) 2784 2785 checkRoutes := func(s *Server, expected int) { 2786 t.Helper() 2787 checkFor(t, 2*time.Second, 50*time.Millisecond, func() error { 2788 if nr := s.NumRoutes(); nr != expected { 2789 return fmt.Errorf("Server %q should have %v routes, got %v", s.Name(), expected, nr) 2790 } 2791 return nil 2792 }) 2793 } 2794 // Since s1 has no pooling/pinned-accounts, there should be only 2 routes, 2795 // one to s2 and one to s3. 2796 checkRoutes(s1, 2) 2797 // s2 and s3 should have 1 route to s1 and 3(pool)+"A"+"$SYS" == 6 2798 checkRoutes(s2, 6) 2799 checkRoutes(s3, 6) 2800 2801 s1.Shutdown() 2802 2803 // The server s1 will now support pooling and accounts pinning. 2804 // Restart the server s1 with the same cluster port otherwise 2805 // s2/s3 would not be able to reconnect. 2806 conf1 = createConfFile(t, []byte(fmt.Sprintf(tmplA, o1.Cluster.Port, 3, "accounts: [\"A\"]"))) 2807 s1, _ = RunServerWithConfig(conf1) 2808 defer s1.Shutdown() 2809 // Make sure reconnect occurs. We should now have 5 routes. 2810 checkClusterFormed(t, s1, s2, s3) 2811 // Now all servers should have 3+2 to each other, so 10 total. 2812 checkRoutes(s1, 10) 2813 checkRoutes(s2, 10) 2814 checkRoutes(s3, 10) 2815 } 2816 2817 func TestRoutePoolPerAccountSubUnsubProtoParsing(t *testing.T) { 2818 for _, test := range []struct { 2819 name string 2820 extra string 2821 }{ 2822 {"regular", _EMPTY_}, 2823 {"pooling", "pool_size: 5"}, 2824 {"per-account", "accounts: [\"A\"]"}, 2825 } { 2826 t.Run(test.name, func(t *testing.T) { 2827 confATemplate := ` 2828 port: -1 2829 accounts { 2830 A { users: [{user: "user1", password: "pwd"}] } 2831 } 2832 cluster { 2833 listen: 127.0.0.1:-1 2834 %s 2835 } 2836 ` 2837 confA := createConfFile(t, []byte(fmt.Sprintf(confATemplate, test.extra))) 2838 srva, optsA := RunServerWithConfig(confA) 2839 defer srva.Shutdown() 2840 2841 confBTemplate := ` 2842 port: -1 2843 accounts { 2844 A { users: [{user: "user1", password: "pwd"}] } 2845 } 2846 cluster { 2847 listen: 127.0.0.1:-1 2848 routes = [ 2849 "nats://127.0.0.1:%d" 2850 ] 2851 %s 2852 } 2853 ` 2854 confB := createConfFile(t, []byte(fmt.Sprintf(confBTemplate, optsA.Cluster.Port, test.extra))) 2855 srvb, _ := RunServerWithConfig(confB) 2856 defer srvb.Shutdown() 2857 2858 checkClusterFormed(t, srva, srvb) 2859 2860 ncA := natsConnect(t, srva.ClientURL(), nats.UserInfo("user1", "pwd")) 2861 defer ncA.Close() 2862 2863 for i := 0; i < 2; i++ { 2864 var sub *nats.Subscription 2865 if i == 0 { 2866 sub = natsSubSync(t, ncA, "foo") 2867 } else { 2868 sub = natsQueueSubSync(t, ncA, "foo", "bar") 2869 } 2870 2871 checkSubInterest(t, srvb, "A", "foo", 2*time.Second) 2872 2873 checkSubs := func(s *Server, queue, expected bool) { 2874 t.Helper() 2875 acc, err := s.LookupAccount("A") 2876 if err != nil { 2877 t.Fatalf("Error looking account: %v", err) 2878 } 2879 checkFor(t, time.Second, 15*time.Millisecond, func() error { 2880 acc.mu.RLock() 2881 res := acc.sl.Match("foo") 2882 acc.mu.RUnlock() 2883 if expected { 2884 if queue && (len(res.qsubs) == 0 || len(res.psubs) != 0) { 2885 return fmt.Errorf("Expected queue sub, did not find it") 2886 } else if !queue && (len(res.psubs) == 0 || len(res.qsubs) != 0) { 2887 return fmt.Errorf("Expected psub, did not find it") 2888 } 2889 } else if len(res.psubs)+len(res.qsubs) != 0 { 2890 return fmt.Errorf("Unexpected subscription: %+v", res) 2891 } 2892 return nil 2893 }) 2894 } 2895 2896 checkSubs(srva, i == 1, true) 2897 checkSubs(srvb, i == 1, true) 2898 2899 sub.Unsubscribe() 2900 natsFlush(t, ncA) 2901 2902 checkSubs(srva, i == 1, false) 2903 checkSubs(srvb, i == 1, false) 2904 } 2905 }) 2906 } 2907 } 2908 2909 func TestRoutePoolPerAccountStreamImport(t *testing.T) { 2910 for _, test := range []struct { 2911 name string 2912 route string 2913 }{ 2914 {"regular", _EMPTY_}, 2915 {"pooled", "pool_size: 5"}, 2916 {"one per account", "accounts: [\"A\"]"}, 2917 {"both per account", "accounts: [\"A\", \"B\"]"}, 2918 } { 2919 t.Run(test.name, func(t *testing.T) { 2920 tmplA := ` 2921 server_name: "A" 2922 port: -1 2923 accounts { 2924 A { 2925 users: [{user: "user1", password: "pwd"}] 2926 exports: [{stream: "foo"}] 2927 } 2928 B { 2929 users: [{user: "user2", password: "pwd"}] 2930 imports: [{stream: {subject: "foo", account: "A"}}] 2931 } 2932 C { users: [{user: "user3", password: "pwd"}] } 2933 D { users: [{user: "user4", password: "pwd"}] } 2934 } 2935 cluster { 2936 name: "local" 2937 listen: 127.0.0.1:-1 2938 %s 2939 } 2940 ` 2941 confA := createConfFile(t, []byte(fmt.Sprintf(tmplA, test.route))) 2942 srva, optsA := RunServerWithConfig(confA) 2943 defer srva.Shutdown() 2944 2945 tmplB := ` 2946 server_name: "B" 2947 port: -1 2948 accounts { 2949 A { 2950 users: [{user: "user1", password: "pwd"}] 2951 exports: [{stream: "foo"}] 2952 } 2953 B { 2954 users: [{user: "user2", password: "pwd"}] 2955 imports: [{stream: {subject: "foo", account: "A"}}] 2956 } 2957 C { users: [{user: "user3", password: "pwd"}] } 2958 D { users: [{user: "user4", password: "pwd"}] } 2959 } 2960 cluster { 2961 name: "local" 2962 listen: 127.0.0.1:-1 2963 %s 2964 %s 2965 } 2966 ` 2967 confB := createConfFile(t, []byte(fmt.Sprintf(tmplB, 2968 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", optsA.Cluster.Port), 2969 test.route))) 2970 srvb, _ := RunServerWithConfig(confB) 2971 defer srvb.Shutdown() 2972 2973 checkClusterFormed(t, srva, srvb) 2974 2975 ncB := natsConnect(t, srvb.ClientURL(), nats.UserInfo("user2", "pwd")) 2976 defer ncB.Close() 2977 2978 sub := natsSubSync(t, ncB, "foo") 2979 2980 checkSubInterest(t, srva, "B", "foo", time.Second) 2981 checkSubInterest(t, srva, "A", "foo", time.Second) 2982 2983 ncA := natsConnect(t, srva.ClientURL(), nats.UserInfo("user1", "pwd")) 2984 defer ncA.Close() 2985 2986 natsPub(t, ncA, "foo", []byte("hello")) 2987 natsNexMsg(t, sub, time.Second) 2988 2989 natsUnsub(t, sub) 2990 natsFlush(t, ncB) 2991 2992 checkFor(t, time.Second, 15*time.Millisecond, func() error { 2993 for _, acc := range []string{"A", "B"} { 2994 a, err := srva.LookupAccount(acc) 2995 if err != nil { 2996 return err 2997 } 2998 a.mu.RLock() 2999 r := a.sl.Match("foo") 3000 a.mu.RUnlock() 3001 if len(r.psubs) != 0 { 3002 return fmt.Errorf("Subscription not unsubscribed") 3003 } 3004 } 3005 return nil 3006 }) 3007 }) 3008 } 3009 } 3010 3011 func TestRoutePoolAndPerAccountWithServiceLatencyNoDataRace(t *testing.T) { 3012 // For this test, we want the system (SYS) and SERVICE accounts to be bound 3013 // to different routes. So the names and pool size have been chosen accordingly. 3014 for _, test := range []struct { 3015 name string 3016 poolStr string 3017 }{ 3018 {"pool", "pool_size: 5"}, 3019 {"per account", "accounts: [\"SYS\", \"SERVICE\", \"REQUESTOR\"]"}, 3020 } { 3021 t.Run(test.name, func(t *testing.T) { 3022 tmpl := ` 3023 port: -1 3024 accounts { 3025 SYS { 3026 users [{user: "sys", password: "pwd"}] 3027 } 3028 SERVICE { 3029 users [{user: "svc", password: "pwd"}] 3030 exports = [ 3031 {service: "req.*", latency: {subject: "results"}} 3032 ] 3033 } 3034 REQUESTOR { 3035 users [{user: "req", password: "pwd"}] 3036 imports = [ 3037 {service: {account: "SERVICE", subject: "req.echo"}, to: "request"} 3038 ] 3039 } 3040 } 3041 system_account: "SYS" 3042 cluster { 3043 name: "local" 3044 port: -1 3045 %s 3046 %s 3047 } 3048 ` 3049 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, test.poolStr, _EMPTY_))) 3050 s1, opts1 := RunServerWithConfig(conf1) 3051 defer s1.Shutdown() 3052 3053 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, test.poolStr, 3054 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", opts1.Cluster.Port)))) 3055 s2, _ := RunServerWithConfig(conf2) 3056 defer s2.Shutdown() 3057 3058 checkClusterFormed(t, s1, s2) 3059 3060 // Create service provider. 3061 nc := natsConnect(t, s1.ClientURL(), nats.UserInfo("svc", "pwd")) 3062 defer nc.Close() 3063 3064 // The service listener. 3065 natsSub(t, nc, "req.echo", func(msg *nats.Msg) { 3066 msg.Respond(msg.Data) 3067 }) 3068 3069 // Listen for metrics 3070 rsub := natsSubSync(t, nc, "results") 3071 natsFlush(t, nc) 3072 checkSubInterest(t, s2, "SERVICE", "results", time.Second) 3073 3074 // Create second client and send request from this one. 3075 nc2 := natsConnect(t, s2.ClientURL(), nats.UserInfo("req", "pwd")) 3076 defer nc2.Close() 3077 3078 for i := 0; i < 5; i++ { 3079 // Send the request. 3080 _, err := nc2.Request("request", []byte("hello"), time.Second) 3081 require_NoError(t, err) 3082 // Get the latency result 3083 natsNexMsg(t, rsub, time.Second) 3084 } 3085 }) 3086 } 3087 } 3088 3089 func TestRouteParseOriginClusterMsgArgs(t *testing.T) { 3090 for _, test := range []struct { 3091 racc bool 3092 args string 3093 pacache string 3094 reply string 3095 queues [][]byte 3096 }{ 3097 {true, "ORIGIN foo 12 345\r\n", "MY_ACCOUNT foo", _EMPTY_, nil}, 3098 {true, "ORIGIN foo bar 12 345\r\n", "MY_ACCOUNT foo", "bar", nil}, 3099 {true, "ORIGIN foo + bar queue1 queue2 12 345\r\n", "MY_ACCOUNT foo", "bar", [][]byte{[]byte("queue1"), []byte("queue2")}}, 3100 {true, "ORIGIN foo | queue1 queue2 12 345\r\n", "MY_ACCOUNT foo", _EMPTY_, [][]byte{[]byte("queue1"), []byte("queue2")}}, 3101 3102 {false, "ORIGIN MY_ACCOUNT foo 12 345\r\n", "MY_ACCOUNT foo", _EMPTY_, nil}, 3103 {false, "ORIGIN MY_ACCOUNT foo bar 12 345\r\n", "MY_ACCOUNT foo", "bar", nil}, 3104 {false, "ORIGIN MY_ACCOUNT foo + bar queue1 queue2 12 345\r\n", "MY_ACCOUNT foo", "bar", [][]byte{[]byte("queue1"), []byte("queue2")}}, 3105 {false, "ORIGIN MY_ACCOUNT foo | queue1 queue2 12 345\r\n", "MY_ACCOUNT foo", _EMPTY_, [][]byte{[]byte("queue1"), []byte("queue2")}}, 3106 } { 3107 t.Run(test.args, func(t *testing.T) { 3108 c := &client{kind: ROUTER, route: &route{}} 3109 if test.racc { 3110 c.route.accName = []byte("MY_ACCOUNT") 3111 } 3112 if err := c.processRoutedOriginClusterMsgArgs([]byte(test.args)); err != nil { 3113 t.Fatalf("Error processing: %v", err) 3114 } 3115 if string(c.pa.origin) != "ORIGIN" { 3116 t.Fatalf("Invalid origin: %q", c.pa.origin) 3117 } 3118 if string(c.pa.account) != "MY_ACCOUNT" { 3119 t.Fatalf("Invalid account: %q", c.pa.account) 3120 } 3121 if string(c.pa.subject) != "foo" { 3122 t.Fatalf("Invalid subject: %q", c.pa.subject) 3123 } 3124 if string(c.pa.reply) != test.reply { 3125 t.Fatalf("Invalid reply: %q", c.pa.reply) 3126 } 3127 if !reflect.DeepEqual(c.pa.queues, test.queues) { 3128 t.Fatalf("Invalid queues: %v", c.pa.queues) 3129 } 3130 if c.pa.hdr != 12 { 3131 t.Fatalf("Invalid header size: %v", c.pa.hdr) 3132 } 3133 if c.pa.size != 345 { 3134 t.Fatalf("Invalid size: %v", c.pa.size) 3135 } 3136 }) 3137 } 3138 } 3139 3140 func TestRoutePoolAndPerAccountOperatorMode(t *testing.T) { 3141 _, spub := createKey(t) 3142 sysClaim := jwt.NewAccountClaims(spub) 3143 sysClaim.Name = "SYS" 3144 sysJwt := encodeClaim(t, sysClaim, spub) 3145 3146 akp, apub := createKey(t) 3147 claima := jwt.NewAccountClaims(apub) 3148 ajwt := encodeClaim(t, claima, apub) 3149 3150 bkp, bpub := createKey(t) 3151 claimb := jwt.NewAccountClaims(bpub) 3152 bjwt := encodeClaim(t, claimb, bpub) 3153 3154 ckp, cpub := createKey(t) 3155 claimc := jwt.NewAccountClaims(cpub) 3156 cjwt := encodeClaim(t, claimc, cpub) 3157 3158 _, dpub := createKey(t) 3159 3160 basePath := "/ngs/v1/accounts/jwt/" 3161 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 3162 if r.URL.Path == basePath { 3163 w.Write([]byte("ok")) 3164 } else if strings.HasSuffix(r.URL.Path, spub) { 3165 w.Write([]byte(sysJwt)) 3166 } else if strings.HasSuffix(r.URL.Path, apub) { 3167 w.Write([]byte(ajwt)) 3168 } else if strings.HasSuffix(r.URL.Path, bpub) { 3169 w.Write([]byte(bjwt)) 3170 } else if strings.HasSuffix(r.URL.Path, cpub) { 3171 w.Write([]byte(cjwt)) 3172 } 3173 })) 3174 defer ts.Close() 3175 3176 operator := fmt.Sprintf(` 3177 operator: %s 3178 system_account: %s 3179 resolver: URL("%s%s") 3180 `, ojwt, spub, ts.URL, basePath) 3181 3182 tmpl := ` 3183 listen: 127.0.0.1:-1 3184 server_name: %s 3185 cluster { 3186 port: -1 3187 name: "local" 3188 %s 3189 accounts: ["` + apub + `"%s] 3190 } 3191 ` + operator 3192 3193 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "A", _EMPTY_, _EMPTY_))) 3194 s1, o1 := RunServerWithConfig(conf1) 3195 defer s1.Shutdown() 3196 3197 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "B", fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port), 3198 _EMPTY_))) 3199 s2, _ := RunServerWithConfig(conf2) 3200 defer s2.Shutdown() 3201 3202 checkClusterFormed(t, s1, s2) 3203 3204 checkRoute := func(s *Server, acc string, perAccount bool) { 3205 t.Helper() 3206 checkFor(t, 2*time.Second, 50*time.Millisecond, func() error { 3207 s.mu.RLock() 3208 _, ok := s.accRoutes[acc] 3209 s.mu.RUnlock() 3210 if perAccount && !ok { 3211 return fmt.Errorf("No dedicated route for account %q on server %q", acc, s) 3212 } else if !perAccount && ok { 3213 return fmt.Errorf("Dedicated route for account %q on server %q", acc, s) 3214 } 3215 return nil 3216 }) 3217 } 3218 // Route for accounts "apub" and "spub" should be a dedicated route 3219 checkRoute(s1, apub, true) 3220 checkRoute(s2, apub, true) 3221 checkRoute(s1, spub, true) 3222 checkRoute(s2, spub, true) 3223 // Route for account "bpub" should not 3224 checkRoute(s1, bpub, false) 3225 checkRoute(s2, bpub, false) 3226 3227 checkComm := func(acc string, kp nkeys.KeyPair, subj string) { 3228 t.Helper() 3229 usr := createUserCreds(t, nil, kp) 3230 ncAs2 := natsConnect(t, s2.ClientURL(), usr) 3231 defer ncAs2.Close() 3232 sub := natsSubSync(t, ncAs2, subj) 3233 checkSubInterest(t, s1, acc, subj, time.Second) 3234 3235 ncAs1 := natsConnect(t, s1.ClientURL(), usr) 3236 defer ncAs1.Close() 3237 natsPub(t, ncAs1, subj, nil) 3238 natsNexMsg(t, sub, time.Second) 3239 } 3240 checkComm(apub, akp, "foo") 3241 checkComm(bpub, bkp, "bar") 3242 3243 // Add account "bpub" in accounts doing a configuration reload 3244 reloadUpdateConfig(t, s1, conf1, fmt.Sprintf(tmpl, "A", _EMPTY_, fmt.Sprintf(",\"%s\"", bpub))) 3245 // Already the route should be moved to a dedicated route, even 3246 // before doing the config reload on s2. 3247 checkRoute(s1, bpub, true) 3248 checkRoute(s2, bpub, true) 3249 // Account "apub" should still have its dedicated route 3250 checkRoute(s1, apub, true) 3251 checkRoute(s2, apub, true) 3252 // So the system account 3253 checkRoute(s1, spub, true) 3254 checkRoute(s2, spub, true) 3255 // Let's complete the config reload on srvb 3256 reloadUpdateConfig(t, s2, conf2, fmt.Sprintf(tmpl, "B", fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port), 3257 fmt.Sprintf(",\"%s\"", bpub))) 3258 checkClusterFormed(t, s1, s2) 3259 // Check communication on account bpub again. 3260 checkComm(bpub, bkp, "baz") 3261 3262 // Now add with config reload an account that has not been used yet (cpub). 3263 // We will also remove account bpub from the account list. 3264 reloadUpdateConfig(t, s1, conf1, fmt.Sprintf(tmpl, "A", _EMPTY_, fmt.Sprintf(",\"%s\"", cpub))) 3265 // Again, check before reloading s2. 3266 checkRoute(s1, cpub, true) 3267 checkRoute(s2, cpub, true) 3268 // Now reload s2 and do other checks. 3269 reloadUpdateConfig(t, s2, conf2, fmt.Sprintf(tmpl, "B", fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port), 3270 fmt.Sprintf(",\"%s\"", cpub))) 3271 checkClusterFormed(t, s1, s2) 3272 checkRoute(s1, bpub, false) 3273 checkRoute(s2, bpub, false) 3274 checkComm(cpub, ckp, "bat") 3275 3276 // Finally, let's try to add an account that the account server rejects. 3277 err := os.WriteFile(conf1, []byte(fmt.Sprintf(tmpl, "A", _EMPTY_, fmt.Sprintf(",\"%s\",\"%s\"", cpub, dpub))), 0666) 3278 require_NoError(t, err) 3279 if err := s1.Reload(); err == nil || !strings.Contains(err.Error(), dpub) { 3280 t.Fatalf("Expected error about not being able to lookup this account, got %q", err) 3281 } 3282 } 3283 3284 func TestRoutePoolAndPerAccountWithOlderServer(t *testing.T) { 3285 tmpl := ` 3286 port: -1 3287 server_name: "%s" 3288 accounts { 3289 A { users: [{user: "A", password: "pwd"}] } 3290 B { users: [{user: "B", password: "pwd"}] } 3291 } 3292 cluster { 3293 port: -1 3294 name: "local" 3295 pool_size: 5 3296 accounts: ["A"] 3297 %s 3298 } 3299 ` 3300 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "A", _EMPTY_))) 3301 s1, o1 := RunServerWithConfig(conf1) 3302 defer s1.Shutdown() 3303 3304 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "B", 3305 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 3306 s2, _ := RunServerWithConfig(conf2) 3307 defer s2.Shutdown() 3308 3309 // Have s3 explicitly disable pooling (to behave as an old server) 3310 conf3 := createConfFile(t, []byte(fmt.Sprintf(` 3311 port: -1 3312 server_name: "C" 3313 accounts { 3314 A { users: [{user: "A", password: "pwd"}] } 3315 B { users: [{user: "B", password: "pwd"}] } 3316 } 3317 cluster { 3318 port: -1 3319 name: "local" 3320 pool_size: -1 3321 routes: ["nats://127.0.0.1:%d"] 3322 } 3323 `, o1.Cluster.Port))) 3324 s3, _ := RunServerWithConfig(conf3) 3325 defer s3.Shutdown() 3326 3327 checkClusterFormed(t, s1, s2, s3) 3328 3329 check := func(acc, subj string, subSrv, pubSrv1, pubSrv2 *Server) { 3330 t.Helper() 3331 3332 ncSub := natsConnect(t, subSrv.ClientURL(), nats.UserInfo(acc, "pwd")) 3333 defer ncSub.Close() 3334 sub := natsSubSync(t, ncSub, subj) 3335 3336 checkSubInterest(t, pubSrv1, acc, subj, time.Second) 3337 checkSubInterest(t, pubSrv2, acc, subj, time.Second) 3338 3339 pub := func(s *Server) { 3340 t.Helper() 3341 nc := natsConnect(t, s.ClientURL(), nats.UserInfo(acc, "pwd")) 3342 defer nc.Close() 3343 3344 natsPub(t, nc, subj, []byte("hello")) 3345 natsNexMsg(t, sub, time.Second) 3346 } 3347 pub(pubSrv1) 3348 pub(pubSrv2) 3349 } 3350 check("A", "subj1", s1, s2, s3) 3351 check("A", "subj2", s2, s1, s3) 3352 check("A", "subj3", s3, s1, s2) 3353 check("B", "subj4", s1, s2, s3) 3354 check("B", "subj5", s2, s1, s3) 3355 check("B", "subj6", s3, s1, s2) 3356 } 3357 3358 type testDuplicateRouteLogger struct { 3359 DummyLogger 3360 ch chan struct{} 3361 } 3362 3363 func (l *testDuplicateRouteLogger) Noticef(format string, args ...any) { 3364 msg := fmt.Sprintf(format, args...) 3365 if !strings.Contains(msg, DuplicateRoute.String()) { 3366 return 3367 } 3368 select { 3369 case l.ch <- struct{}{}: 3370 default: 3371 } 3372 } 3373 3374 // This test will make sure that a server with pooling does not 3375 // keep trying to connect to a non pooled (for instance old) server. 3376 // Will also make sure that if the old server is simply accepting 3377 // connections, and restarted, the server with pooling will connect. 3378 func TestRoutePoolWithOlderServerConnectAndReconnect(t *testing.T) { 3379 tmplA := ` 3380 port: -1 3381 server_name: "A" 3382 cluster { 3383 port: -1 3384 name: "local" 3385 pool_size: 3 3386 %s 3387 } 3388 ` 3389 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmplA, _EMPTY_))) 3390 s1, o1 := RunServerWithConfig(conf1) 3391 defer s1.Shutdown() 3392 3393 l := &testDuplicateRouteLogger{ch: make(chan struct{}, 50)} 3394 s1.SetLogger(l, false, false) 3395 3396 tmplB := ` 3397 port: -1 3398 server_name: "B" 3399 cluster { 3400 port: %d 3401 name: "local" 3402 pool_size: -1 3403 %s 3404 } 3405 ` 3406 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmplB, -1, 3407 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 3408 s2, o2 := RunServerWithConfig(conf2) 3409 defer s2.Shutdown() 3410 3411 checkClusterFormed(t, s1, s2) 3412 3413 // Now reload configuration of s1 to point to s2. 3414 reloadUpdateConfig(t, s1, conf1, fmt.Sprintf(tmplA, fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o2.Cluster.Port))) 3415 checkClusterFormed(t, s1, s2) 3416 3417 // We could get some, but it should settle. 3418 checkRepeatConnect := func() { 3419 t.Helper() 3420 tm := time.NewTimer(4 * routeConnectDelay) 3421 var last time.Time 3422 for done := false; !done; { 3423 select { 3424 case <-l.ch: 3425 last = time.Now() 3426 case <-tm.C: 3427 done = true 3428 } 3429 } 3430 if dur := time.Since(last); dur <= routeConnectDelay { 3431 t.Fatalf("Still attempted to connect %v ago", dur) 3432 } 3433 } 3434 checkRepeatConnect() 3435 3436 // Now shutdown s2 and restart it without active route to s1. 3437 // Check that cluster can still be formed: that is, s1 is 3438 // still trying to connect to s2. 3439 s2.Shutdown() 3440 // Wait for more than a regular reconnect delay attempt. 3441 // Note that in test it is set to 15ms. 3442 time.Sleep(50 * time.Millisecond) 3443 // Restart the server s2 with the same cluster port otherwise 3444 // s1 would not be able to reconnect. 3445 conf2 = createConfFile(t, []byte(fmt.Sprintf(tmplB, o2.Cluster.Port, _EMPTY_))) 3446 s2, _ = RunServerWithConfig(conf2) 3447 defer s2.Shutdown() 3448 // Make sure reconnect occurs 3449 checkClusterFormed(t, s1, s2) 3450 // And again, make sure there is no repeat-connect 3451 checkRepeatConnect() 3452 } 3453 3454 func TestRouteCompressionOptions(t *testing.T) { 3455 org := testDefaultClusterCompression 3456 testDefaultClusterCompression = _EMPTY_ 3457 defer func() { testDefaultClusterCompression = org }() 3458 3459 tmpl := ` 3460 port: -1 3461 cluster { 3462 port: -1 3463 compression: %s 3464 } 3465 ` 3466 for _, test := range []struct { 3467 name string 3468 mode string 3469 rttVals []int 3470 expected string 3471 rtts []time.Duration 3472 }{ 3473 {"boolean enabled", "true", nil, CompressionS2Fast, nil}, 3474 {"string enabled", "enabled", nil, CompressionS2Fast, nil}, 3475 {"string EnaBled", "EnaBled", nil, CompressionS2Fast, nil}, 3476 {"string on", "on", nil, CompressionS2Fast, nil}, 3477 {"string ON", "ON", nil, CompressionS2Fast, nil}, 3478 {"string fast", "fast", nil, CompressionS2Fast, nil}, 3479 {"string Fast", "Fast", nil, CompressionS2Fast, nil}, 3480 {"string s2_fast", "s2_fast", nil, CompressionS2Fast, nil}, 3481 {"string s2_Fast", "s2_Fast", nil, CompressionS2Fast, nil}, 3482 {"boolean disabled", "false", nil, CompressionOff, nil}, 3483 {"string disabled", "disabled", nil, CompressionOff, nil}, 3484 {"string DisableD", "DisableD", nil, CompressionOff, nil}, 3485 {"string off", "off", nil, CompressionOff, nil}, 3486 {"string OFF", "OFF", nil, CompressionOff, nil}, 3487 {"better", "better", nil, CompressionS2Better, nil}, 3488 {"Better", "Better", nil, CompressionS2Better, nil}, 3489 {"s2_better", "s2_better", nil, CompressionS2Better, nil}, 3490 {"S2_BETTER", "S2_BETTER", nil, CompressionS2Better, nil}, 3491 {"best", "best", nil, CompressionS2Best, nil}, 3492 {"BEST", "BEST", nil, CompressionS2Best, nil}, 3493 {"s2_best", "s2_best", nil, CompressionS2Best, nil}, 3494 {"S2_BEST", "S2_BEST", nil, CompressionS2Best, nil}, 3495 {"auto no rtts", "auto", nil, CompressionS2Auto, defaultCompressionS2AutoRTTThresholds}, 3496 {"s2_auto no rtts", "s2_auto", nil, CompressionS2Auto, defaultCompressionS2AutoRTTThresholds}, 3497 {"auto", "{mode: auto, rtt_thresholds: [%s]}", []int{1}, CompressionS2Auto, []time.Duration{time.Millisecond}}, 3498 {"Auto", "{Mode: Auto, thresholds: [%s]}", []int{1, 2}, CompressionS2Auto, []time.Duration{time.Millisecond, 2 * time.Millisecond}}, 3499 {"s2_auto", "{mode: s2_auto, thresholds: [%s]}", []int{1, 2, 3}, CompressionS2Auto, []time.Duration{time.Millisecond, 2 * time.Millisecond, 3 * time.Millisecond}}, 3500 {"s2_AUTO", "{mode: s2_AUTO, thresholds: [%s]}", []int{1, 2, 3, 4}, CompressionS2Auto, []time.Duration{time.Millisecond, 2 * time.Millisecond, 3 * time.Millisecond, 4 * time.Millisecond}}, 3501 {"s2_auto:-10,5,10", "{mode: s2_auto, thresholds: [%s]}", []int{-10, 5, 10}, CompressionS2Auto, []time.Duration{0, 5 * time.Millisecond, 10 * time.Millisecond}}, 3502 {"s2_auto:5,10,15", "{mode: s2_auto, thresholds: [%s]}", []int{5, 10, 15}, CompressionS2Auto, []time.Duration{5 * time.Millisecond, 10 * time.Millisecond, 15 * time.Millisecond}}, 3503 {"s2_auto:0,5,10", "{mode: s2_auto, thresholds: [%s]}", []int{0, 5, 10}, CompressionS2Auto, []time.Duration{0, 5 * time.Millisecond, 10 * time.Millisecond}}, 3504 {"s2_auto:5,10,0,20", "{mode: s2_auto, thresholds: [%s]}", []int{5, 10, 0, 20}, CompressionS2Auto, []time.Duration{5 * time.Millisecond, 10 * time.Millisecond, 0, 20 * time.Millisecond}}, 3505 {"s2_auto:0,10,0,20", "{mode: s2_auto, thresholds: [%s]}", []int{0, 10, 0, 20}, CompressionS2Auto, []time.Duration{0, 10 * time.Millisecond, 0, 20 * time.Millisecond}}, 3506 {"s2_auto:0,0,0,20", "{mode: s2_auto, thresholds: [%s]}", []int{0, 0, 0, 20}, CompressionS2Auto, []time.Duration{0, 0, 0, 20 * time.Millisecond}}, 3507 {"s2_auto:0,10,0,0", "{mode: s2_auto, rtt_thresholds: [%s]}", []int{0, 10, 0, 0}, CompressionS2Auto, []time.Duration{0, 10 * time.Millisecond}}, 3508 } { 3509 t.Run(test.name, func(t *testing.T) { 3510 var val string 3511 if len(test.rttVals) > 0 { 3512 var rtts string 3513 for i, v := range test.rttVals { 3514 if i > 0 { 3515 rtts += ", " 3516 } 3517 rtts += fmt.Sprintf("%dms", v) 3518 } 3519 val = fmt.Sprintf(test.mode, rtts) 3520 } else { 3521 val = test.mode 3522 } 3523 conf := createConfFile(t, []byte(fmt.Sprintf(tmpl, val))) 3524 s, o := RunServerWithConfig(conf) 3525 defer s.Shutdown() 3526 3527 if cm := o.Cluster.Compression.Mode; cm != test.expected { 3528 t.Fatalf("Expected compression value to be %q, got %q", test.expected, cm) 3529 } 3530 if !reflect.DeepEqual(test.rtts, o.Cluster.Compression.RTTThresholds) { 3531 t.Fatalf("Expected RTT tresholds to be %+v, got %+v", test.rtts, o.Cluster.Compression.RTTThresholds) 3532 } 3533 s.Shutdown() 3534 3535 o.Cluster.Port = -1 3536 o.Cluster.Compression.Mode = test.mode 3537 if len(test.rttVals) > 0 { 3538 o.Cluster.Compression.Mode = CompressionS2Auto 3539 o.Cluster.Compression.RTTThresholds = o.Cluster.Compression.RTTThresholds[:0] 3540 for _, v := range test.rttVals { 3541 o.Cluster.Compression.RTTThresholds = append(o.Cluster.Compression.RTTThresholds, time.Duration(v)*time.Millisecond) 3542 } 3543 } 3544 s = RunServer(o) 3545 defer s.Shutdown() 3546 if cm := o.Cluster.Compression.Mode; cm != test.expected { 3547 t.Fatalf("Expected compression value to be %q, got %q", test.expected, cm) 3548 } 3549 if !reflect.DeepEqual(test.rtts, o.Cluster.Compression.RTTThresholds) { 3550 t.Fatalf("Expected RTT tresholds to be %+v, got %+v", test.rtts, o.Cluster.Compression.RTTThresholds) 3551 } 3552 }) 3553 } 3554 // Test that with no compression specified, we default to "accept" 3555 conf := createConfFile(t, []byte(` 3556 port: -1 3557 cluster { 3558 port: -1 3559 } 3560 `)) 3561 s, o := RunServerWithConfig(conf) 3562 defer s.Shutdown() 3563 if cm := o.Cluster.Compression.Mode; cm != CompressionAccept { 3564 t.Fatalf("Expected compression value to be %q, got %q", CompressionAccept, cm) 3565 } 3566 for _, test := range []struct { 3567 name string 3568 mode string 3569 rtts []time.Duration 3570 err string 3571 }{ 3572 {"unsupported mode", "gzip", nil, "unsupported"}, 3573 {"not ascending order", "s2_auto", []time.Duration{ 3574 5 * time.Millisecond, 3575 10 * time.Millisecond, 3576 2 * time.Millisecond, 3577 }, "ascending"}, 3578 {"too many thresholds", "s2_auto", []time.Duration{ 3579 5 * time.Millisecond, 3580 10 * time.Millisecond, 3581 20 * time.Millisecond, 3582 40 * time.Millisecond, 3583 60 * time.Millisecond, 3584 }, "more than 4"}, 3585 {"all 0", "s2_auto", []time.Duration{0, 0, 0, 0}, "at least one"}, 3586 {"single 0", "s2_auto", []time.Duration{0}, "at least one"}, 3587 } { 3588 t.Run(test.name, func(t *testing.T) { 3589 o := DefaultOptions() 3590 o.Cluster.Port = -1 3591 o.Cluster.Compression = CompressionOpts{test.mode, test.rtts} 3592 if _, err := NewServer(o); err == nil || !strings.Contains(err.Error(), test.err) { 3593 t.Fatalf("Unexpected error: %v", err) 3594 } 3595 }) 3596 } 3597 } 3598 3599 type testConnSentBytes struct { 3600 net.Conn 3601 sync.RWMutex 3602 sent int 3603 } 3604 3605 func (c *testConnSentBytes) Write(p []byte) (int, error) { 3606 n, err := c.Conn.Write(p) 3607 c.Lock() 3608 c.sent += n 3609 c.Unlock() 3610 return n, err 3611 } 3612 3613 func TestRouteCompression(t *testing.T) { 3614 tmpl := ` 3615 port: -1 3616 server_name: "%s" 3617 accounts { 3618 A { users: [{user: "a", pass: "pwd"}] } 3619 } 3620 cluster { 3621 name: "local" 3622 port: -1 3623 compression: true 3624 pool_size: %d 3625 %s 3626 %s 3627 } 3628 ` 3629 for _, test := range []struct { 3630 name string 3631 poolSize int 3632 accounts string 3633 }{ 3634 {"no pooling", -1, _EMPTY_}, 3635 {"pooling", 3, _EMPTY_}, 3636 {"per account", 1, "accounts: [\"A\"]"}, 3637 } { 3638 t.Run(test.name, func(t *testing.T) { 3639 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "S1", test.poolSize, test.accounts, _EMPTY_))) 3640 s1, o1 := RunServerWithConfig(conf1) 3641 defer s1.Shutdown() 3642 3643 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "S2", test.poolSize, test.accounts, 3644 fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 3645 s2, _ := RunServerWithConfig(conf2) 3646 defer s2.Shutdown() 3647 3648 checkClusterFormed(t, s1, s2) 3649 3650 s1.mu.RLock() 3651 s1.forEachRoute(func(r *client) { 3652 r.mu.Lock() 3653 r.nc = &testConnSentBytes{Conn: r.nc} 3654 r.mu.Unlock() 3655 }) 3656 s1.mu.RUnlock() 3657 3658 nc2 := natsConnect(t, s2.ClientURL(), nats.UserInfo("a", "pwd")) 3659 defer nc2.Close() 3660 sub := natsSubSync(t, nc2, "foo") 3661 natsFlush(t, nc2) 3662 checkSubInterest(t, s1, "A", "foo", time.Second) 3663 3664 nc1 := natsConnect(t, s1.ClientURL(), nats.UserInfo("a", "pwd")) 3665 defer nc1.Close() 3666 3667 var payloads [][]byte 3668 count := 26 3669 for i := 0; i < count; i++ { 3670 n := rand.Intn(2048) + 1 3671 p := make([]byte, n) 3672 for j := 0; j < n; j++ { 3673 p[j] = byte(i) + 'A' 3674 } 3675 payloads = append(payloads, p) 3676 natsPub(t, nc1, "foo", p) 3677 } 3678 3679 totalPayloadSize := 0 3680 for i := 0; i < count; i++ { 3681 m := natsNexMsg(t, sub, time.Second) 3682 if !bytes.Equal(m.Data, payloads[i]) { 3683 t.Fatalf("Expected payload %q - got %q", payloads[i], m.Data) 3684 } 3685 totalPayloadSize += len(m.Data) 3686 } 3687 3688 // Also check that the route stats shows that compression likely occurred 3689 var out int 3690 s1.mu.RLock() 3691 if len(test.accounts) > 0 { 3692 rems := s1.accRoutes["A"] 3693 if rems == nil { 3694 t.Fatal("Did not find route for account") 3695 } 3696 for _, r := range rems { 3697 r.mu.Lock() 3698 if r.nc != nil { 3699 nc := r.nc.(*testConnSentBytes) 3700 nc.RLock() 3701 out = nc.sent 3702 nc.RUnlock() 3703 } 3704 r.mu.Unlock() 3705 break 3706 } 3707 } else { 3708 ai, _ := s1.accounts.Load("A") 3709 acc := ai.(*Account) 3710 acc.mu.RLock() 3711 pi := acc.routePoolIdx 3712 acc.mu.RUnlock() 3713 s1.forEachRouteIdx(pi, func(r *client) bool { 3714 r.mu.Lock() 3715 if r.nc != nil { 3716 nc := r.nc.(*testConnSentBytes) 3717 nc.RLock() 3718 out = nc.sent 3719 nc.RUnlock() 3720 } 3721 r.mu.Unlock() 3722 return false 3723 }) 3724 } 3725 s1.mu.RUnlock() 3726 // Should at least be smaller than totalPayloadSize, use 20%. 3727 limit := totalPayloadSize * 80 / 100 3728 if int(out) > limit { 3729 t.Fatalf("Expected s1's outBytes to be less than %v, got %v", limit, out) 3730 } 3731 }) 3732 } 3733 } 3734 3735 func TestRouteCompressionMatrixModes(t *testing.T) { 3736 tmpl := ` 3737 port: -1 3738 server_name: "%s" 3739 cluster { 3740 name: "local" 3741 port: -1 3742 compression: %s 3743 pool_size: -1 3744 %s 3745 } 3746 ` 3747 for _, test := range []struct { 3748 name string 3749 s1 string 3750 s2 string 3751 s1Expected string 3752 s2Expected string 3753 }{ 3754 {"off off", "off", "off", CompressionOff, CompressionOff}, 3755 {"off accept", "off", "accept", CompressionOff, CompressionOff}, 3756 {"off on", "off", "on", CompressionOff, CompressionOff}, 3757 {"off better", "off", "better", CompressionOff, CompressionOff}, 3758 {"off best", "off", "best", CompressionOff, CompressionOff}, 3759 3760 {"accept off", "accept", "off", CompressionOff, CompressionOff}, 3761 {"accept accept", "accept", "accept", CompressionOff, CompressionOff}, 3762 {"accept on", "accept", "on", CompressionS2Fast, CompressionS2Fast}, 3763 {"accept better", "accept", "better", CompressionS2Better, CompressionS2Better}, 3764 {"accept best", "accept", "best", CompressionS2Best, CompressionS2Best}, 3765 3766 {"on off", "on", "off", CompressionOff, CompressionOff}, 3767 {"on accept", "on", "accept", CompressionS2Fast, CompressionS2Fast}, 3768 {"on on", "on", "on", CompressionS2Fast, CompressionS2Fast}, 3769 {"on better", "on", "better", CompressionS2Fast, CompressionS2Better}, 3770 {"on best", "on", "best", CompressionS2Fast, CompressionS2Best}, 3771 3772 {"better off", "better", "off", CompressionOff, CompressionOff}, 3773 {"better accept", "better", "accept", CompressionS2Better, CompressionS2Better}, 3774 {"better on", "better", "on", CompressionS2Better, CompressionS2Fast}, 3775 {"better better", "better", "better", CompressionS2Better, CompressionS2Better}, 3776 {"better best", "better", "best", CompressionS2Better, CompressionS2Best}, 3777 3778 {"best off", "best", "off", CompressionOff, CompressionOff}, 3779 {"best accept", "best", "accept", CompressionS2Best, CompressionS2Best}, 3780 {"best on", "best", "on", CompressionS2Best, CompressionS2Fast}, 3781 {"best better", "best", "better", CompressionS2Best, CompressionS2Better}, 3782 {"best best", "best", "best", CompressionS2Best, CompressionS2Best}, 3783 } { 3784 t.Run(test.name, func(t *testing.T) { 3785 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "A", test.s1, _EMPTY_))) 3786 s1, o1 := RunServerWithConfig(conf1) 3787 defer s1.Shutdown() 3788 3789 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "B", test.s2, fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port)))) 3790 s2, _ := RunServerWithConfig(conf2) 3791 defer s2.Shutdown() 3792 3793 checkClusterFormed(t, s1, s2) 3794 3795 nc1 := natsConnect(t, s1.ClientURL()) 3796 defer nc1.Close() 3797 3798 nc2 := natsConnect(t, s2.ClientURL()) 3799 defer nc2.Close() 3800 3801 payload := make([]byte, 128) 3802 check := func(ncp, ncs *nats.Conn, subj string, s *Server) { 3803 t.Helper() 3804 sub := natsSubSync(t, ncs, subj) 3805 checkSubInterest(t, s, globalAccountName, subj, time.Second) 3806 natsPub(t, ncp, subj, payload) 3807 natsNexMsg(t, sub, time.Second) 3808 3809 for _, srv := range []*Server{s1, s2} { 3810 rz, err := srv.Routez(nil) 3811 require_NoError(t, err) 3812 var expected string 3813 if srv == s1 { 3814 expected = test.s1Expected 3815 } else { 3816 expected = test.s2Expected 3817 } 3818 if cm := rz.Routes[0].Compression; cm != expected { 3819 t.Fatalf("Server %s - expected compression %q, got %q", srv, expected, cm) 3820 } 3821 } 3822 } 3823 check(nc1, nc2, "foo", s1) 3824 check(nc2, nc1, "bar", s2) 3825 }) 3826 } 3827 } 3828 3829 func TestRouteCompressionWithOlderServer(t *testing.T) { 3830 tmpl := ` 3831 port: -1 3832 server_name: "%s" 3833 cluster { 3834 port: -1 3835 name: "local" 3836 %s 3837 %s 3838 } 3839 ` 3840 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "A", _EMPTY_, "compression: \"on\""))) 3841 s1, o1 := RunServerWithConfig(conf1) 3842 defer s1.Shutdown() 3843 3844 routes := fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port) 3845 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "B", routes, "compression: \"not supported\""))) 3846 s2, _ := RunServerWithConfig(conf2) 3847 defer s2.Shutdown() 3848 3849 checkClusterFormed(t, s1, s2) 3850 3851 // Make sure that s1 route's compression is "off" 3852 s1.mu.RLock() 3853 s1.forEachRoute(func(r *client) { 3854 r.mu.Lock() 3855 cm := r.route.compression 3856 r.mu.Unlock() 3857 if cm != CompressionNotSupported { 3858 s1.mu.RUnlock() 3859 t.Fatalf("Compression should be %q, got %q", CompressionNotSupported, cm) 3860 } 3861 }) 3862 s1.mu.RUnlock() 3863 } 3864 3865 func TestRouteCompressionImplicitRoute(t *testing.T) { 3866 tmpl := ` 3867 port: -1 3868 server_name: "%s" 3869 cluster { 3870 port: -1 3871 name: "local" 3872 %s 3873 %s 3874 } 3875 ` 3876 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "A", _EMPTY_, _EMPTY_))) 3877 s1, o1 := RunServerWithConfig(conf1) 3878 defer s1.Shutdown() 3879 3880 routes := fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port) 3881 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "B", routes, "compression: \"fast\""))) 3882 s2, _ := RunServerWithConfig(conf2) 3883 defer s2.Shutdown() 3884 3885 conf3 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "C", routes, "compression: \"best\""))) 3886 s3, _ := RunServerWithConfig(conf3) 3887 defer s3.Shutdown() 3888 3889 checkClusterFormed(t, s1, s2, s3) 3890 3891 checkComp := func(s *Server, remoteID, expected string) { 3892 t.Helper() 3893 s.mu.RLock() 3894 defer s.mu.RUnlock() 3895 var err error 3896 s.forEachRoute(func(r *client) { 3897 if err != nil { 3898 return 3899 } 3900 var cm string 3901 ok := true 3902 r.mu.Lock() 3903 if r.route.remoteID == remoteID { 3904 cm = r.route.compression 3905 ok = cm == expected 3906 } 3907 r.mu.Unlock() 3908 if !ok { 3909 err = fmt.Errorf("Server %q - expected route to %q to use compression %q, got %q", 3910 s, remoteID, expected, cm) 3911 } 3912 }) 3913 } 3914 checkComp(s1, s2.ID(), CompressionS2Fast) 3915 checkComp(s1, s3.ID(), CompressionS2Best) 3916 checkComp(s2, s1.ID(), CompressionS2Fast) 3917 checkComp(s2, s3.ID(), CompressionS2Best) 3918 checkComp(s3, s1.ID(), CompressionS2Best) 3919 checkComp(s3, s2.ID(), CompressionS2Best) 3920 } 3921 3922 func TestRouteCompressionAuto(t *testing.T) { 3923 tmpl := ` 3924 port: -1 3925 server_name: "%s" 3926 ping_interval: "%s" 3927 cluster { 3928 port: -1 3929 name: "local" 3930 compression: %s 3931 %s 3932 } 3933 ` 3934 conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "A", "10s", "s2_fast", _EMPTY_))) 3935 s1, o1 := RunServerWithConfig(conf1) 3936 defer s1.Shutdown() 3937 3938 // Start with 0ms RTT 3939 np := createNetProxy(0, 1024*1024*1024, 1024*1024*1024, fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port), true) 3940 routes := fmt.Sprintf("routes: [\"%s\"]", np.routeURL()) 3941 3942 rtts := "{mode: s2_auto, rtt_thresholds: [100ms, 200ms, 300ms]}" 3943 conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "B", "500ms", rtts, routes))) 3944 s2, _ := RunServerWithConfig(conf2) 3945 defer s2.Shutdown() 3946 defer np.stop() 3947 3948 checkClusterFormed(t, s1, s2) 3949 3950 checkComp := func(expected string) { 3951 t.Helper() 3952 checkFor(t, 4*time.Second, 50*time.Millisecond, func() error { 3953 s2.mu.RLock() 3954 defer s2.mu.RUnlock() 3955 if n := s2.numRoutes(); n != 4 { 3956 return fmt.Errorf("Cluster not formed properly, got %v routes", n) 3957 } 3958 var err error 3959 s2.forEachRoute(func(r *client) { 3960 if err != nil { 3961 return 3962 } 3963 r.mu.Lock() 3964 cm := r.route.compression 3965 r.mu.Unlock() 3966 if cm != expected { 3967 err = fmt.Errorf("Route %v compression mode expected to be %q, got %q", r, expected, cm) 3968 } 3969 }) 3970 return err 3971 }) 3972 } 3973 checkComp(CompressionS2Uncompressed) 3974 3975 // Change the proxy RTT and we should get compression "fast" 3976 np.updateRTT(150 * time.Millisecond) 3977 checkComp(CompressionS2Fast) 3978 3979 // Now 250ms, and get "better" 3980 np.updateRTT(250 * time.Millisecond) 3981 checkComp(CompressionS2Better) 3982 3983 // Above 350 and we should get "best" 3984 np.updateRTT(350 * time.Millisecond) 3985 checkComp(CompressionS2Best) 3986 3987 // Down to 1ms and again should get "uncompressed" 3988 np.updateRTT(1 * time.Millisecond) 3989 checkComp(CompressionS2Uncompressed) 3990 3991 // Do a config reload with disabling uncompressed 3992 reloadUpdateConfig(t, s2, conf2, fmt.Sprintf(tmpl, "B", "500ms", "{mode: s2_auto, rtt_thresholds: [0ms, 100ms, 0ms, 300ms]}", routes)) 3993 // Change the RTT back down to 1ms, but we should not go uncompressed, 3994 // we should have "fast" compression. 3995 np.updateRTT(1 * time.Millisecond) 3996 checkComp(CompressionS2Fast) 3997 // Now bump to 150ms and we should be using "best", not the "better" mode 3998 np.updateRTT(150 * time.Millisecond) 3999 checkComp(CompressionS2Best) 4000 // Try 400ms and we should still be using "best" 4001 np.updateRTT(400 * time.Millisecond) 4002 checkComp(CompressionS2Best) 4003 4004 // Try other variations 4005 reloadUpdateConfig(t, s2, conf2, fmt.Sprintf(tmpl, "B", "500ms", "{mode: s2_auto, rtt_thresholds: [50ms, 150ms, 0ms, 0ms]}", routes)) 4006 np.updateRTT(0 * time.Millisecond) 4007 checkComp(CompressionS2Uncompressed) 4008 np.updateRTT(100 * time.Millisecond) 4009 checkComp(CompressionS2Fast) 4010 // Since we expect the same compression level, just wait before doing 4011 // the update and the next check. 4012 time.Sleep(100 * time.Millisecond) 4013 np.updateRTT(250 * time.Millisecond) 4014 checkComp(CompressionS2Fast) 4015 4016 // Now disable compression on s1 4017 reloadUpdateConfig(t, s1, conf1, fmt.Sprintf(tmpl, "A", "10s", "off", _EMPTY_)) 4018 // Wait a bit to make sure we don't check for cluster too soon since 4019 // we expect a disconnect. 4020 time.Sleep(100 * time.Millisecond) 4021 checkClusterFormed(t, s1, s2) 4022 // Now change the RTT values in the proxy. 4023 np.updateRTT(0 * time.Millisecond) 4024 // Now check that s2 also shows as "off". Wait for some ping intervals. 4025 time.Sleep(200 * time.Millisecond) 4026 checkComp(CompressionOff) 4027 } 4028 4029 func TestRoutePings(t *testing.T) { 4030 routeMaxPingInterval = 50 * time.Millisecond 4031 defer func() { routeMaxPingInterval = defaultRouteMaxPingInterval }() 4032 4033 o1 := DefaultOptions() 4034 s1 := RunServer(o1) 4035 defer s1.Shutdown() 4036 4037 o2 := DefaultOptions() 4038 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port)) 4039 s2 := RunServer(o2) 4040 defer s2.Shutdown() 4041 4042 checkClusterFormed(t, s1, s2) 4043 4044 ch := make(chan struct{}, 1) 4045 s1.mu.RLock() 4046 s1.forEachRemote(func(r *client) { 4047 r.mu.Lock() 4048 r.nc = &capturePingConn{r.nc, ch} 4049 r.mu.Unlock() 4050 }) 4051 s1.mu.RUnlock() 4052 4053 for i := 0; i < 5; i++ { 4054 select { 4055 case <-ch: 4056 case <-time.After(250 * time.Millisecond): 4057 t.Fatalf("Did not send PING") 4058 } 4059 } 4060 } 4061 4062 func TestRouteCustomPing(t *testing.T) { 4063 pingInterval := 50 * time.Millisecond 4064 o1 := DefaultOptions() 4065 o1.Cluster.PingInterval = pingInterval 4066 o1.Cluster.MaxPingsOut = 2 4067 s1 := RunServer(o1) 4068 defer s1.Shutdown() 4069 4070 o2 := DefaultOptions() 4071 o2.Cluster.PingInterval = pingInterval 4072 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port)) 4073 s2 := RunServer(o2) 4074 defer s2.Shutdown() 4075 4076 checkClusterFormed(t, s1, s2) 4077 4078 ch := make(chan struct{}, 1) 4079 s1.mu.RLock() 4080 s1.forEachRemote(func(r *client) { 4081 r.mu.Lock() 4082 r.nc = &capturePingConn{r.nc, ch} 4083 r.mu.Unlock() 4084 }) 4085 s1.mu.RUnlock() 4086 4087 for i := 0; i < 5; i++ { 4088 select { 4089 case <-ch: 4090 case <-time.After(250 * time.Millisecond): 4091 t.Fatalf("Did not send PING") 4092 } 4093 } 4094 } 4095 4096 func TestRouteNoLeakOnSlowConsumer(t *testing.T) { 4097 o1 := DefaultOptions() 4098 o1.Cluster.PoolSize = -1 4099 s1 := RunServer(o1) 4100 defer s1.Shutdown() 4101 4102 o2 := DefaultOptions() 4103 o2.Cluster.PoolSize = -1 4104 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port)) 4105 s2 := RunServer(o2) 4106 defer s2.Shutdown() 4107 4108 checkClusterFormed(t, s1, s2) 4109 4110 // For any route connections on the first server, drop the write 4111 // deadline down and then get the client to try sending something. 4112 // This should result in an effectively immediate write timeout, 4113 // which will surface as a slow consumer. 4114 s1.mu.Lock() 4115 for _, cl := range s1.routes { 4116 for _, c := range cl { 4117 c.mu.Lock() 4118 c.out.wdl = time.Nanosecond 4119 c.mu.Unlock() 4120 c.sendRTTPing() 4121 } 4122 } 4123 s1.mu.Unlock() 4124 4125 // By now the routes should have gone down, so check that there 4126 // aren't any routes listed still. 4127 checkFor(t, time.Millisecond*500, time.Millisecond*25, func() error { 4128 if nc := s1.NumRoutes(); nc != 0 { 4129 return fmt.Errorf("Server 1 should have no route connections, got %v", nc) 4130 } 4131 if nc := s2.NumRoutes(); nc != 0 { 4132 return fmt.Errorf("Server 2 should have no route connections, got %v", nc) 4133 } 4134 return nil 4135 }) 4136 var got, expected int64 4137 got = s1.NumSlowConsumers() 4138 expected = 1 4139 if got != expected { 4140 t.Errorf("got: %d, expected: %d", got, expected) 4141 } 4142 got = int64(s1.NumSlowConsumersRoutes()) 4143 if got != expected { 4144 t.Errorf("got: %d, expected: %d", got, expected) 4145 } 4146 got = int64(s1.NumSlowConsumersClients()) 4147 expected = 0 4148 if got != expected { 4149 t.Errorf("got: %d, expected: %d", got, expected) 4150 } 4151 varz, err := s1.Varz(nil) 4152 if err != nil { 4153 t.Fatal(err) 4154 } 4155 if varz.SlowConsumersStats.Clients != 0 { 4156 t.Error("Expected no slow consumer clients") 4157 } 4158 if varz.SlowConsumersStats.Routes != 1 { 4159 t.Error("Expected a slow consumer route") 4160 } 4161 } 4162 4163 func TestRouteSlowConsumerRecover(t *testing.T) { 4164 o1 := DefaultOptions() 4165 o1.Cluster.PoolSize = -1 4166 s1 := RunServer(o1) 4167 defer s1.Shutdown() 4168 4169 rtt := 1500 * time.Nanosecond 4170 upRate := 1024 * 1024 4171 downRate := 128 * 1024 4172 np := createNetProxy(rtt, upRate, downRate, fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port), true) 4173 defer np.stop() 4174 4175 o2 := DefaultOptions() 4176 o2.Cluster.PoolSize = -1 4177 o2.Routes = RoutesFromStr(np.routeURL()) 4178 s2 := RunServer(o2) 4179 defer s2.Shutdown() 4180 4181 checkClusterFormed(t, s1, s2) 4182 4183 changeWriteDeadline := func(s *Server, duration time.Duration) { 4184 s.mu.Lock() 4185 for _, cl := range s.routes { 4186 for _, c := range cl { 4187 c.mu.Lock() 4188 c.out.wdl = duration 4189 c.mu.Unlock() 4190 } 4191 } 4192 s.mu.Unlock() 4193 } 4194 hasSlowConsumerRoutes := func(s *Server) bool { 4195 var sc bool 4196 s.mu.Lock() 4197 Loop: 4198 for _, cl := range s.routes { 4199 for _, c := range cl { 4200 c.mu.Lock() 4201 sc = c.flags.isSet(isSlowConsumer) 4202 c.mu.Unlock() 4203 if sc { 4204 break Loop 4205 } 4206 } 4207 } 4208 s.mu.Unlock() 4209 return sc 4210 } 4211 4212 // Start with a shorter write deadline to cause errors 4213 // then bump it again later to let it recover. 4214 changeWriteDeadline(s1, 1*time.Second) 4215 4216 ncA, err := nats.Connect(s1.Addr().String()) 4217 require_NoError(t, err) 4218 4219 ncB, err := nats.Connect(s2.Addr().String()) 4220 require_NoError(t, err) 4221 4222 var wg sync.WaitGroup 4223 ncB.Subscribe("test", func(*nats.Msg) { 4224 ncB.Close() 4225 }) 4226 ncB.Flush() 4227 4228 ctx, cancel := context.WithTimeout(context.Background(), 800*time.Millisecond) 4229 defer cancel() 4230 4231 go func() { 4232 var total int 4233 payload := fmt.Appendf(nil, strings.Repeat("A", 132*1024)) 4234 for range time.NewTicker(30 * time.Millisecond).C { 4235 select { 4236 case <-ctx.Done(): 4237 wg.Done() 4238 return 4239 default: 4240 } 4241 ncA.Publish("test", payload) 4242 ncA.Flush() 4243 total++ 4244 } 4245 }() 4246 wg.Add(1) 4247 4248 checkFor(t, 20*time.Second, 2*time.Millisecond, func() error { 4249 if s1.NumRoutes() < 1 { 4250 return fmt.Errorf("No routes connected") 4251 } 4252 if !hasSlowConsumerRoutes(s1) { 4253 if s1.NumSlowConsumersRoutes() > 0 { 4254 // In case it has recovered already. 4255 return nil 4256 } 4257 return fmt.Errorf("Expected Slow Consumer routes") 4258 } 4259 return nil 4260 }) 4261 cancel() 4262 changeWriteDeadline(s1, 5*time.Second) 4263 np.updateRTT(0) 4264 checkFor(t, 20*time.Second, 10*time.Millisecond, func() error { 4265 if s1.NumRoutes() < 1 { 4266 return fmt.Errorf("No routes connected") 4267 } 4268 if hasSlowConsumerRoutes(s1) { 4269 return fmt.Errorf("Expected Slow Consumer routes to recover") 4270 } 4271 return nil 4272 }) 4273 4274 checkFor(t, 20*time.Second, 100*time.Millisecond, func() error { 4275 var got, expected int64 4276 got = int64(s1.NumSlowConsumersRoutes()) 4277 expected = 1 4278 if got != expected { 4279 return fmt.Errorf("got: %d, expected: %d", got, expected) 4280 } 4281 return nil 4282 }) 4283 wg.Wait() 4284 } 4285 4286 func TestRouteNoLeakOnAuthTimeout(t *testing.T) { 4287 opts := DefaultOptions() 4288 opts.Cluster.Username = "foo" 4289 opts.Cluster.Password = "bar" 4290 opts.AuthTimeout = 0.01 // Deliberately short timeout 4291 s := RunServer(opts) 4292 defer s.Shutdown() 4293 4294 c, err := net.Dial("tcp", fmt.Sprintf("%s:%d", opts.Host, opts.Cluster.Port)) 4295 if err != nil { 4296 t.Fatalf("Error connecting: %v", err) 4297 } 4298 defer c.Close() 4299 4300 cr := bufio.NewReader(c) 4301 4302 // Wait for INFO... 4303 line, _, _ := cr.ReadLine() 4304 var info serverInfo 4305 if err = json.Unmarshal(line[5:], &info); err != nil { 4306 t.Fatalf("Could not parse INFO json: %v\n", err) 4307 } 4308 4309 // Wait out the clock so we hit the auth timeout 4310 time.Sleep(secondsToDuration(opts.AuthTimeout) * 2) 4311 line, _, _ = cr.ReadLine() 4312 if string(line) != "-ERR 'Authentication Timeout'" { 4313 t.Fatalf("Expected '-ERR 'Authentication Timeout'' but got %q", line) 4314 } 4315 4316 // There shouldn't be a route entry as we didn't set up. 4317 if nc := s.NumRoutes(); nc != 0 { 4318 t.Fatalf("Server should have no route connections, got %v", nc) 4319 } 4320 }