github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/gateway_test.go (about) 1 // Copyright 2018-2020 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package server 15 16 import ( 17 "bufio" 18 "bytes" 19 "context" 20 "crypto/tls" 21 "encoding/json" 22 "fmt" 23 "net" 24 "net/url" 25 "os" 26 "runtime" 27 "strconv" 28 "strings" 29 "sync" 30 "sync/atomic" 31 "testing" 32 "time" 33 34 . "github.com/nats-io/nats-server/v2/internal/ocsp" 35 "github.com/nats-io/nats-server/v2/logger" 36 "github.com/nats-io/nats.go" 37 "golang.org/x/crypto/ocsp" 38 ) 39 40 func init() { 41 gatewayConnectDelay = 15 * time.Millisecond 42 gatewayReconnectDelay = 15 * time.Millisecond 43 } 44 45 // Wait for the expected number of outbound gateways, or fails. 46 func waitForOutboundGateways(t *testing.T, s *Server, expected int, timeout time.Duration) { 47 t.Helper() 48 if timeout < 2*time.Second { 49 timeout = 2 * time.Second 50 } 51 checkFor(t, timeout, 15*time.Millisecond, func() error { 52 if n := s.numOutboundGateways(); n != expected { 53 return fmt.Errorf("Expected %v outbound gateway(s), got %v", expected, n) 54 } 55 return nil 56 }) 57 } 58 59 // Wait for the expected number of inbound gateways, or fails. 60 func waitForInboundGateways(t *testing.T, s *Server, expected int, timeout time.Duration) { 61 t.Helper() 62 if timeout < 2*time.Second { 63 timeout = 2 * time.Second 64 } 65 checkFor(t, timeout, 15*time.Millisecond, func() error { 66 if n := s.numInboundGateways(); n != expected { 67 return fmt.Errorf("Expected %v inbound gateway(s), got %v", expected, n) 68 } 69 return nil 70 }) 71 } 72 73 func waitForGatewayFailedConnect(t *testing.T, s *Server, gwName string, expectFailure bool, timeout time.Duration) { 74 t.Helper() 75 checkFor(t, timeout, 15*time.Millisecond, func() error { 76 var c int 77 cfg := s.getRemoteGateway(gwName) 78 if cfg != nil { 79 c = cfg.getConnAttempts() 80 } 81 if expectFailure && c <= 1 { 82 return fmt.Errorf("Expected several attempts to connect, got %v", c) 83 } else if !expectFailure && c > 1 { 84 return fmt.Errorf("Expected single attempt to connect, got %v", c) 85 } 86 return nil 87 }) 88 } 89 90 func checkForRegisteredQSubInterest(t *testing.T, s *Server, gwName, acc, subj string, expected int, timeout time.Duration) { 91 t.Helper() 92 checkFor(t, timeout, 15*time.Millisecond, func() error { 93 count := 0 94 c := s.getOutboundGatewayConnection(gwName) 95 ei, _ := c.gw.outsim.Load(acc) 96 if ei != nil { 97 sl := ei.(*outsie).sl 98 r := sl.Match(subj) 99 for _, qsubs := range r.qsubs { 100 count += len(qsubs) 101 } 102 } 103 if count == expected { 104 return nil 105 } 106 return fmt.Errorf("Expected %v qsubs in sublist, got %v", expected, count) 107 }) 108 } 109 110 func checkForSubjectNoInterest(t *testing.T, c *client, account, subject string, expectNoInterest bool, timeout time.Duration) { 111 t.Helper() 112 checkFor(t, timeout, 15*time.Millisecond, func() error { 113 ei, _ := c.gw.outsim.Load(account) 114 if ei == nil { 115 return fmt.Errorf("Did not receive subject no-interest") 116 } 117 e := ei.(*outsie) 118 e.RLock() 119 _, inMap := e.ni[subject] 120 e.RUnlock() 121 if expectNoInterest { 122 if inMap { 123 return nil 124 } 125 return fmt.Errorf("Did not receive subject no-interest on %q", subject) 126 } 127 if inMap { 128 return fmt.Errorf("No-interest on subject %q was not cleared", subject) 129 } 130 return nil 131 }) 132 } 133 134 func checkForAccountNoInterest(t *testing.T, c *client, account string, expectedNoInterest bool, timeout time.Duration) { 135 t.Helper() 136 checkFor(t, timeout, 15*time.Millisecond, func() error { 137 ei, ok := c.gw.outsim.Load(account) 138 if !ok && expectedNoInterest { 139 return fmt.Errorf("No-interest for account %q not yet registered", account) 140 } else if ok && !expectedNoInterest { 141 return fmt.Errorf("Account %q should not have a no-interest", account) 142 } 143 if ei != nil { 144 return fmt.Errorf("Account %q should have a global no-interest, not subject no-interest", account) 145 } 146 return nil 147 }) 148 } 149 150 func checkGWInterestOnlyMode(t *testing.T, s *Server, outboundGWName, accName string) { 151 t.Helper() 152 checkGWInterestOnlyModeOrNotPresent(t, s, outboundGWName, accName, false) 153 } 154 155 func checkGWInterestOnlyModeOrNotPresent(t *testing.T, s *Server, outboundGWName, accName string, notPresentOk bool) { 156 t.Helper() 157 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 158 gwc := s.getOutboundGatewayConnection(outboundGWName) 159 if gwc == nil { 160 return fmt.Errorf("No outbound gateway connection %q for server %v", outboundGWName, s) 161 } 162 gwc.mu.Lock() 163 defer gwc.mu.Unlock() 164 out, ok := gwc.gw.outsim.Load(accName) 165 if !ok { 166 if notPresentOk { 167 return nil 168 } else { 169 return fmt.Errorf("Server %v - outbound gateway connection %q: no account %q found in map", 170 s, outboundGWName, accName) 171 } 172 } 173 if out == nil { 174 return fmt.Errorf("Server %v - outbound gateway connection %q: interest map not found for account %q", 175 s, outboundGWName, accName) 176 } 177 e := out.(*outsie) 178 e.RLock() 179 defer e.RUnlock() 180 if e.mode != InterestOnly { 181 return fmt.Errorf( 182 "Server %v - outbound gateway connection %q: account %q mode shoule be InterestOnly but is %v", 183 s, outboundGWName, accName, e.mode) 184 } 185 return nil 186 }) 187 } 188 189 func checkGWInterestOnlyModeInterestOn(t *testing.T, s *Server, outboundGWName, accName, subject string) { 190 t.Helper() 191 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 192 c := s.getOutboundGatewayConnection(outboundGWName) 193 outsiei, _ := c.gw.outsim.Load(accName) 194 if outsiei == nil { 195 return fmt.Errorf("Server %s - outbound gateway connection %q: no map entry found for account %q", 196 s, outboundGWName, accName) 197 } 198 outsie := outsiei.(*outsie) 199 r := outsie.sl.Match(subject) 200 if len(r.psubs) == 0 { 201 return fmt.Errorf("Server %s - outbound gateway connection %q - account %q: no subject interest for %q", 202 s, outboundGWName, accName, subject) 203 } 204 return nil 205 }) 206 } 207 208 func waitCh(t *testing.T, ch chan bool, errTxt string) { 209 t.Helper() 210 select { 211 case <-ch: 212 return 213 case <-time.After(5 * time.Second): 214 t.Fatalf(errTxt) 215 } 216 } 217 218 var noOpErrHandler = func(_ *nats.Conn, _ *nats.Subscription, _ error) {} 219 220 func natsConnect(t testing.TB, url string, options ...nats.Option) *nats.Conn { 221 t.Helper() 222 opts := nats.GetDefaultOptions() 223 for _, opt := range options { 224 if err := opt(&opts); err != nil { 225 t.Fatalf("Error applying client option: %v", err) 226 } 227 } 228 nc, err := nats.Connect(url, options...) 229 if err != nil { 230 t.Fatalf("Error on connect: %v", err) 231 } 232 if opts.AsyncErrorCB == nil { 233 // Set this up to not pollute the logs when running tests. 234 nc.SetErrorHandler(noOpErrHandler) 235 } 236 237 return nc 238 } 239 240 func natsSub(t *testing.T, nc *nats.Conn, subj string, cb nats.MsgHandler) *nats.Subscription { 241 t.Helper() 242 sub, err := nc.Subscribe(subj, cb) 243 if err != nil { 244 t.Fatalf("Error on subscribe: %v", err) 245 } 246 return sub 247 } 248 249 func natsSubSync(t *testing.T, nc *nats.Conn, subj string) *nats.Subscription { 250 t.Helper() 251 sub, err := nc.SubscribeSync(subj) 252 if err != nil { 253 t.Fatalf("Error on subscribe: %v", err) 254 } 255 return sub 256 } 257 258 func natsNexMsg(t *testing.T, sub *nats.Subscription, timeout time.Duration) *nats.Msg { 259 t.Helper() 260 msg, err := sub.NextMsg(timeout) 261 if err != nil { 262 t.Fatalf("Failed getting next message: %v", err) 263 } 264 return msg 265 } 266 267 func natsQueueSub(t *testing.T, nc *nats.Conn, subj, queue string, cb nats.MsgHandler) *nats.Subscription { 268 t.Helper() 269 sub, err := nc.QueueSubscribe(subj, queue, cb) 270 if err != nil { 271 t.Fatalf("Error on subscribe: %v", err) 272 } 273 return sub 274 } 275 276 func natsQueueSubSync(t *testing.T, nc *nats.Conn, subj, queue string) *nats.Subscription { 277 t.Helper() 278 sub, err := nc.QueueSubscribeSync(subj, queue) 279 if err != nil { 280 t.Fatalf("Error on subscribe: %v", err) 281 } 282 return sub 283 } 284 285 func natsFlush(t *testing.T, nc *nats.Conn) { 286 t.Helper() 287 if err := nc.Flush(); err != nil { 288 t.Fatalf("Error on flush: %v", err) 289 } 290 } 291 292 func natsPub(t testing.TB, nc *nats.Conn, subj string, payload []byte) { 293 t.Helper() 294 if err := nc.Publish(subj, payload); err != nil { 295 t.Fatalf("Error on publish: %v", err) 296 } 297 } 298 299 func natsPubReq(t *testing.T, nc *nats.Conn, subj, reply string, payload []byte) { 300 t.Helper() 301 if err := nc.PublishRequest(subj, reply, payload); err != nil { 302 t.Fatalf("Error on publish: %v", err) 303 } 304 } 305 306 func natsUnsub(t *testing.T, sub *nats.Subscription) { 307 t.Helper() 308 if err := sub.Unsubscribe(); err != nil { 309 t.Fatalf("Error on unsubscribe: %v", err) 310 } 311 } 312 313 func testDefaultOptionsForGateway(name string) *Options { 314 o := DefaultOptions() 315 o.NoSystemAccount = true 316 o.Cluster.Name = name 317 o.Gateway.Name = name 318 o.Gateway.Host = "127.0.0.1" 319 o.Gateway.Port = -1 320 o.gatewaysSolicitDelay = 15 * time.Millisecond 321 return o 322 } 323 324 func runGatewayServer(o *Options) *Server { 325 s := RunServer(o) 326 s.SetLogger(&DummyLogger{}, true, true) 327 return s 328 } 329 330 func testGatewayOptionsFromToWithServers(t *testing.T, org, dst string, servers ...*Server) *Options { 331 t.Helper() 332 o := testDefaultOptionsForGateway(org) 333 gw := &RemoteGatewayOpts{Name: dst} 334 for _, s := range servers { 335 us := fmt.Sprintf("nats://127.0.0.1:%d", s.GatewayAddr().Port) 336 u, err := url.Parse(us) 337 if err != nil { 338 t.Fatalf("Error parsing url: %v", err) 339 } 340 gw.URLs = append(gw.URLs, u) 341 } 342 o.Gateway.Gateways = append(o.Gateway.Gateways, gw) 343 return o 344 } 345 346 func testAddGatewayURLs(t *testing.T, o *Options, dst string, urls []string) { 347 t.Helper() 348 gw := &RemoteGatewayOpts{Name: dst} 349 for _, us := range urls { 350 u, err := url.Parse(us) 351 if err != nil { 352 t.Fatalf("Error parsing url: %v", err) 353 } 354 gw.URLs = append(gw.URLs, u) 355 } 356 o.Gateway.Gateways = append(o.Gateway.Gateways, gw) 357 } 358 359 func testGatewayOptionsFromToWithURLs(t *testing.T, org, dst string, urls []string) *Options { 360 o := testDefaultOptionsForGateway(org) 361 testAddGatewayURLs(t, o, dst, urls) 362 return o 363 } 364 365 func testGatewayOptionsWithTLS(t *testing.T, name string) *Options { 366 t.Helper() 367 o := testDefaultOptionsForGateway(name) 368 var ( 369 tc = &TLSConfigOpts{} 370 err error 371 ) 372 if name == "A" { 373 tc.CertFile = "../test/configs/certs/srva-cert.pem" 374 tc.KeyFile = "../test/configs/certs/srva-key.pem" 375 } else { 376 tc.CertFile = "../test/configs/certs/srvb-cert.pem" 377 tc.KeyFile = "../test/configs/certs/srvb-key.pem" 378 } 379 tc.CaFile = "../test/configs/certs/ca.pem" 380 o.Gateway.TLSConfig, err = GenTLSConfig(tc) 381 if err != nil { 382 t.Fatalf("Error generating TLS config: %v", err) 383 } 384 o.Gateway.TLSConfig.ClientAuth = tls.RequireAndVerifyClientCert 385 o.Gateway.TLSConfig.RootCAs = o.Gateway.TLSConfig.ClientCAs 386 o.Gateway.TLSTimeout = 2.0 387 return o 388 } 389 390 func testGatewayOptionsFromToWithTLS(t *testing.T, org, dst string, urls []string) *Options { 391 o := testGatewayOptionsWithTLS(t, org) 392 testAddGatewayURLs(t, o, dst, urls) 393 return o 394 } 395 396 func TestGatewayBasic(t *testing.T) { 397 o2 := testDefaultOptionsForGateway("B") 398 o2.Gateway.ConnectRetries = 0 399 s2 := runGatewayServer(o2) 400 defer s2.Shutdown() 401 402 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 403 s1 := runGatewayServer(o1) 404 defer s1.Shutdown() 405 406 // s1 should have an outbound gateway to s2. 407 waitForOutboundGateways(t, s1, 1, time.Second) 408 // s2 should have an inbound gateway 409 waitForInboundGateways(t, s2, 1, time.Second) 410 // and an outbound too 411 waitForOutboundGateways(t, s2, 1, time.Second) 412 413 // Stop s2 server 414 s2.Shutdown() 415 416 // gateway should go away 417 waitForOutboundGateways(t, s1, 0, time.Second) 418 waitForInboundGateways(t, s1, 0, time.Second) 419 420 // Restart server 421 s2 = runGatewayServer(o2) 422 defer s2.Shutdown() 423 424 // gateway should reconnect 425 waitForOutboundGateways(t, s1, 1, 2*time.Second) 426 waitForOutboundGateways(t, s2, 1, 2*time.Second) 427 waitForInboundGateways(t, s1, 1, 2*time.Second) 428 waitForInboundGateways(t, s2, 1, 2*time.Second) 429 430 // Shutdown s1, remove the gateway from A to B and restart. 431 s1.Shutdown() 432 // When s2 detects the connection is closed, it will attempt 433 // to reconnect once (even if the route is implicit). 434 // We need to wait more than a dial timeout to make sure 435 // s1 does not restart too quickly and s2 can actually reconnect. 436 time.Sleep(DEFAULT_ROUTE_DIAL + 250*time.Millisecond) 437 // Restart s1 without gateway to B. 438 o1.Gateway.Gateways = nil 439 s1 = runGatewayServer(o1) 440 defer s1.Shutdown() 441 442 // s1 should not have any outbound nor inbound 443 waitForOutboundGateways(t, s1, 0, 2*time.Second) 444 waitForInboundGateways(t, s1, 0, 2*time.Second) 445 446 // Same for s2 447 waitForOutboundGateways(t, s2, 0, 2*time.Second) 448 waitForInboundGateways(t, s2, 0, 2*time.Second) 449 450 // Verify that s2 no longer has A gateway in its list 451 checkFor(t, time.Second, 15*time.Millisecond, func() error { 452 if s2.getRemoteGateway("A") != nil { 453 return fmt.Errorf("Gateway A should have been removed from s2") 454 } 455 return nil 456 }) 457 } 458 459 func TestGatewayIgnoreSelfReference(t *testing.T) { 460 o := testDefaultOptionsForGateway("A") 461 // To create a reference to itself before running the server 462 // it means that we have to assign an explicit port 463 o.Gateway.Port = 5222 464 o.gatewaysSolicitDelay = 0 465 u, _ := url.Parse(fmt.Sprintf("nats://%s:%d", o.Gateway.Host, o.Gateway.Port)) 466 cfg := &RemoteGatewayOpts{ 467 Name: "A", 468 URLs: []*url.URL{u}, 469 } 470 o.Gateway.Gateways = append(o.Gateway.Gateways, cfg) 471 o.NoSystemAccount = true 472 s := runGatewayServer(o) 473 defer s.Shutdown() 474 475 // Wait a bit to make sure that there is no attempt to connect. 476 time.Sleep(20 * time.Millisecond) 477 478 // No outbound connection expected, and no attempt to connect. 479 if s.getRemoteGateway("A") != nil { 480 t.Fatalf("Should not have a remote gateway config for A") 481 } 482 if s.getOutboundGatewayConnection("A") != nil { 483 t.Fatalf("Should not have a gateway connection to A") 484 } 485 s.Shutdown() 486 487 // Now try with config files and include 488 s1, _ := RunServerWithConfig("configs/gwa.conf") 489 defer s1.Shutdown() 490 491 s2, _ := RunServerWithConfig("configs/gwb.conf") 492 defer s2.Shutdown() 493 494 waitForOutboundGateways(t, s1, 1, 2*time.Second) 495 waitForOutboundGateways(t, s2, 1, 2*time.Second) 496 waitForInboundGateways(t, s1, 1, 2*time.Second) 497 waitForInboundGateways(t, s2, 1, 2*time.Second) 498 499 if s1.getRemoteGateway("A") != nil { 500 t.Fatalf("Should not have a remote gateway config for A") 501 } 502 if s1.getOutboundGatewayConnection("A") != nil { 503 t.Fatalf("Should not have a gateway connection to A") 504 } 505 if s2.getRemoteGateway("B") != nil { 506 t.Fatalf("Should not have a remote gateway config for B") 507 } 508 if s2.getOutboundGatewayConnection("B") != nil { 509 t.Fatalf("Should not have a gateway connection to B") 510 } 511 } 512 513 func TestGatewayHeaderInfo(t *testing.T) { 514 o := testDefaultOptionsForGateway("A") 515 s := runGatewayServer(o) 516 defer s.Shutdown() 517 518 gwconn, err := net.Dial("tcp", fmt.Sprintf("%s:%d", o.Gateway.Host, o.Gateway.Port)) 519 if err != nil { 520 t.Fatalf("Error dialing server: %v\n", err) 521 } 522 defer gwconn.Close() 523 client := bufio.NewReaderSize(gwconn, maxBufSize) 524 l, err := client.ReadString('\n') 525 if err != nil { 526 t.Fatalf("Error receiving info from server: %v\n", err) 527 } 528 var info serverInfo 529 if err = json.Unmarshal([]byte(l[5:]), &info); err != nil { 530 t.Fatalf("Could not parse INFO json: %v\n", err) 531 } 532 if !info.Headers { 533 t.Fatalf("Expected by default for header support to be enabled") 534 } 535 536 s.Shutdown() 537 gwconn.Close() 538 539 // Now turn headers off. 540 o.NoHeaderSupport = true 541 s = runGatewayServer(o) 542 defer s.Shutdown() 543 544 gwconn, err = net.Dial("tcp", fmt.Sprintf("%s:%d", o.Gateway.Host, o.Gateway.Port)) 545 if err != nil { 546 t.Fatalf("Error dialing server: %v\n", err) 547 } 548 defer gwconn.Close() 549 client = bufio.NewReaderSize(gwconn, maxBufSize) 550 l, err = client.ReadString('\n') 551 if err != nil { 552 t.Fatalf("Error receiving info from server: %v\n", err) 553 } 554 if err = json.Unmarshal([]byte(l[5:]), &info); err != nil { 555 t.Fatalf("Could not parse INFO json: %v\n", err) 556 } 557 if info.Headers { 558 t.Fatalf("Expected header support to be disabled") 559 } 560 } 561 562 func TestGatewayHeaderSupport(t *testing.T) { 563 o2 := testDefaultOptionsForGateway("B") 564 o2.Gateway.ConnectRetries = 0 565 s2 := runGatewayServer(o2) 566 defer s2.Shutdown() 567 568 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 569 s1 := runGatewayServer(o1) 570 defer s1.Shutdown() 571 572 // s1 should have an outbound gateway to s2. 573 waitForOutboundGateways(t, s1, 1, time.Second) 574 // and an inbound too 575 waitForInboundGateways(t, s1, 1, time.Second) 576 // s2 should have an inbound gateway 577 waitForInboundGateways(t, s2, 1, time.Second) 578 // and an outbound too 579 waitForOutboundGateways(t, s2, 1, time.Second) 580 581 c, cr, _ := newClientForServer(s1) 582 defer c.close() 583 584 connect := "CONNECT {\"headers\":true}" 585 subOp := "SUB foo 1" 586 pingOp := "PING\r\n" 587 cmd := strings.Join([]string{connect, subOp, pingOp}, "\r\n") 588 c.parseAsync(cmd) 589 if _, err := cr.ReadString('\n'); err != nil { 590 t.Fatalf("Error receiving msg from server: %v\n", err) 591 } 592 593 // Wait for interest to be registered on s2 594 checkGWInterestOnlyModeInterestOn(t, s2, "A", globalAccountName, "foo") 595 596 b, _, _ := newClientForServer(s2) 597 defer b.close() 598 599 pubOp := "HPUB foo 12 14\r\nName:Derek\r\nOK\r\n" 600 cmd = strings.Join([]string{connect, pubOp}, "\r\n") 601 b.parseAsync(cmd) 602 603 l, err := cr.ReadString('\n') 604 if err != nil { 605 t.Fatalf("Error receiving msg from server: %v\n", err) 606 } 607 608 am := hmsgPat.FindAllStringSubmatch(l, -1) 609 if len(am) == 0 { 610 t.Fatalf("Did not get a match for %q", l) 611 } 612 matches := am[0] 613 if len(matches) != 7 { 614 t.Fatalf("Did not get correct # matches: %d vs %d\n", len(matches), 7) 615 } 616 if matches[SUB_INDEX] != "foo" { 617 t.Fatalf("Did not get correct subject: '%s'\n", matches[SUB_INDEX]) 618 } 619 if matches[SID_INDEX] != "1" { 620 t.Fatalf("Did not get correct sid: '%s'\n", matches[SID_INDEX]) 621 } 622 if matches[HDR_INDEX] != "12" { 623 t.Fatalf("Did not get correct msg length: '%s'\n", matches[HDR_INDEX]) 624 } 625 if matches[TLEN_INDEX] != "14" { 626 t.Fatalf("Did not get correct msg length: '%s'\n", matches[TLEN_INDEX]) 627 } 628 checkPayload(cr, []byte("Name:Derek\r\nOK\r\n"), t) 629 } 630 631 func TestGatewayHeaderDeliverStrippedMsg(t *testing.T) { 632 o2 := testDefaultOptionsForGateway("B") 633 o2.Gateway.ConnectRetries = 0 634 s2 := runGatewayServer(o2) 635 defer s2.Shutdown() 636 637 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 638 o1.NoHeaderSupport = true 639 s1 := runGatewayServer(o1) 640 defer s1.Shutdown() 641 642 // s1 should have an outbound gateway to s2. 643 waitForOutboundGateways(t, s1, 1, time.Second) 644 // and an inbound too 645 waitForInboundGateways(t, s1, 1, time.Second) 646 // s2 should have an inbound gateway 647 waitForInboundGateways(t, s2, 1, time.Second) 648 // and an outbound too 649 waitForOutboundGateways(t, s2, 1, time.Second) 650 651 c, cr, _ := newClientForServer(s1) 652 defer c.close() 653 654 connect := "CONNECT {\"headers\":true}" 655 subOp := "SUB foo 1" 656 pingOp := "PING\r\n" 657 cmd := strings.Join([]string{connect, subOp, pingOp}, "\r\n") 658 c.parseAsync(cmd) 659 if _, err := cr.ReadString('\n'); err != nil { 660 t.Fatalf("Error receiving msg from server: %v\n", err) 661 } 662 663 // Wait for interest to be registered on s2 664 checkGWInterestOnlyModeInterestOn(t, s2, "A", globalAccountName, "foo") 665 666 b, _, _ := newClientForServer(s2) 667 defer b.close() 668 669 pubOp := "HPUB foo 12 14\r\nName:Derek\r\nOK\r\n" 670 cmd = strings.Join([]string{connect, pubOp}, "\r\n") 671 b.parseAsync(cmd) 672 673 l, err := cr.ReadString('\n') 674 if err != nil { 675 t.Fatalf("Error receiving msg from server: %v\n", err) 676 } 677 am := smsgPat.FindAllStringSubmatch(l, -1) 678 if len(am) == 0 { 679 t.Fatalf("Did not get a correct match for %q", l) 680 } 681 matches := am[0] 682 if len(matches) != 6 { 683 t.Fatalf("Did not get correct # matches: %d vs %d\n", len(matches), 6) 684 } 685 if matches[SUB_INDEX] != "foo" { 686 t.Fatalf("Did not get correct subject: '%s'\n", matches[SUB_INDEX]) 687 } 688 if matches[SID_INDEX] != "1" { 689 t.Fatalf("Did not get correct sid: '%s'\n", matches[SID_INDEX]) 690 } 691 if matches[LEN_INDEX] != "2" { 692 t.Fatalf("Did not get correct msg length: '%s'\n", matches[LEN_INDEX]) 693 } 694 checkPayload(cr, []byte("OK\r\n"), t) 695 if cr.Buffered() != 0 { 696 t.Fatalf("Expected no extra bytes to be buffered, got %d", cr.Buffered()) 697 } 698 } 699 700 func TestGatewaySolicitDelay(t *testing.T) { 701 o2 := testDefaultOptionsForGateway("B") 702 s2 := runGatewayServer(o2) 703 defer s2.Shutdown() 704 705 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 706 // Set the solicit delay to 0. This tests that server will use its 707 // default value, currently set at 1 sec. 708 o1.gatewaysSolicitDelay = 0 709 start := time.Now() 710 s1 := runGatewayServer(o1) 711 defer s1.Shutdown() 712 713 // After 500ms, check outbound gateway. Should not be there. 714 time.Sleep(500 * time.Millisecond) 715 if time.Since(start) < defaultSolicitGatewaysDelay { 716 if s1.numOutboundGateways() > 0 { 717 t.Fatalf("The outbound gateway was initiated sooner than expected (%v)", time.Since(start)) 718 } 719 } 720 // Ultimately, s1 should have an outbound gateway to s2. 721 waitForOutboundGateways(t, s1, 1, 2*time.Second) 722 // s2 should have an inbound gateway 723 waitForInboundGateways(t, s2, 1, 2*time.Second) 724 725 s1.Shutdown() 726 // Make sure that server can be shutdown while waiting 727 // for that initial solicit delay 728 o1.gatewaysSolicitDelay = 2 * time.Second 729 s1 = runGatewayServer(o1) 730 start = time.Now() 731 s1.Shutdown() 732 if dur := time.Since(start); dur >= 2*time.Second { 733 t.Fatalf("Looks like shutdown was delayed: %v", dur) 734 } 735 } 736 737 func TestGatewaySolicitDelayWithImplicitOutbounds(t *testing.T) { 738 // Cause a situation where A connects to B, and because of 739 // delay of solicit gateways set on B, we want to make sure 740 // that B does not end-up with 2 connections to A. 741 o2 := testDefaultOptionsForGateway("B") 742 o2.gatewaysSolicitDelay = 500 * time.Millisecond 743 s2 := runGatewayServer(o2) 744 defer s2.Shutdown() 745 746 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 747 s1 := runGatewayServer(o1) 748 defer s1.Shutdown() 749 750 // s1 should have an outbound and inbound gateway to s2. 751 waitForOutboundGateways(t, s1, 1, 2*time.Second) 752 // s2 should have an inbound gateway 753 waitForInboundGateways(t, s2, 1, 2*time.Second) 754 // Wait for more than s2 solicit delay 755 time.Sleep(750 * time.Millisecond) 756 // The way we store outbound (map key'ed by gw name), we would 757 // not know if we had created 2 (since the newer would replace 758 // the older in the map). But if a second connection was made, 759 // then s1 would have 2 inbounds. So check it has only 1. 760 waitForInboundGateways(t, s1, 1, time.Second) 761 } 762 763 type slowResolver struct { 764 inLookupCh chan struct{} 765 releaseCh chan struct{} 766 } 767 768 func (r *slowResolver) LookupHost(ctx context.Context, h string) ([]string, error) { 769 if r.inLookupCh != nil { 770 select { 771 case r.inLookupCh <- struct{}{}: 772 default: 773 } 774 <-r.releaseCh 775 } else { 776 time.Sleep(500 * time.Millisecond) 777 } 778 return []string{h}, nil 779 } 780 781 func TestGatewaySolicitShutdown(t *testing.T) { 782 var urls []string 783 for i := 0; i < 5; i++ { 784 u := fmt.Sprintf("nats://localhost:%d", 1234+i) 785 urls = append(urls, u) 786 } 787 o1 := testGatewayOptionsFromToWithURLs(t, "A", "B", urls) 788 o1.Gateway.resolver = &slowResolver{} 789 s1 := runGatewayServer(o1) 790 defer s1.Shutdown() 791 792 time.Sleep(o1.gatewaysSolicitDelay + 10*time.Millisecond) 793 794 start := time.Now() 795 s1.Shutdown() 796 if dur := time.Since(start); dur > 1200*time.Millisecond { 797 t.Fatalf("Took too long to shutdown: %v", dur) 798 } 799 } 800 801 func testFatalErrorOnStart(t *testing.T, o *Options, errTxt string) { 802 t.Helper() 803 s := New(o) 804 defer s.Shutdown() 805 l := &captureFatalLogger{fatalCh: make(chan string, 1)} 806 s.SetLogger(l, false, false) 807 wg := sync.WaitGroup{} 808 wg.Add(1) 809 go func() { 810 s.Start() 811 wg.Done() 812 }() 813 select { 814 case e := <-l.fatalCh: 815 if !strings.Contains(e, errTxt) { 816 t.Fatalf("Error should contain %q, got %s", errTxt, e) 817 } 818 case <-time.After(time.Second): 819 t.Fatal("Should have got a fatal error") 820 } 821 s.Shutdown() 822 wg.Wait() 823 } 824 825 func TestGatewayListenError(t *testing.T) { 826 o2 := testDefaultOptionsForGateway("B") 827 s2 := runGatewayServer(o2) 828 defer s2.Shutdown() 829 830 o1 := testDefaultOptionsForGateway("A") 831 o1.Gateway.Port = s2.GatewayAddr().Port 832 testFatalErrorOnStart(t, o1, "listening on") 833 } 834 835 func TestGatewayWithListenToAny(t *testing.T) { 836 confB1 := createConfFile(t, []byte(` 837 listen: "127.0.0.1:-1" 838 cluster { 839 listen: "127.0.0.1:-1" 840 } 841 gateway { 842 name: "B" 843 listen: "0.0.0.0:-1" 844 } 845 `)) 846 sb1, ob1 := RunServerWithConfig(confB1) 847 defer sb1.Shutdown() 848 849 confB2 := createConfFile(t, []byte(fmt.Sprintf(` 850 listen: "127.0.0.1:-1" 851 cluster { 852 listen: "127.0.0.1:-1" 853 routes: ["%s"] 854 } 855 gateway { 856 name: "B" 857 listen: "0.0.0.0:-1" 858 } 859 `, fmt.Sprintf("nats://127.0.0.1:%d", sb1.ClusterAddr().Port)))) 860 sb2, ob2 := RunServerWithConfig(confB2) 861 defer sb2.Shutdown() 862 863 checkClusterFormed(t, sb1, sb2) 864 865 confA := createConfFile(t, []byte(fmt.Sprintf(` 866 listen: "127.0.0.1:-1" 867 cluster { 868 listen: "127.0.0.1:-1" 869 } 870 gateway { 871 name: "A" 872 listen: "0.0.0.0:-1" 873 gateways [ 874 { 875 name: "B" 876 urls: ["%s", "%s"] 877 } 878 ] 879 } 880 `, fmt.Sprintf("nats://127.0.0.1:%d", ob1.Gateway.Port), fmt.Sprintf("nats://127.0.0.1:%d", ob2.Gateway.Port)))) 881 oa := LoadConfig(confA) 882 oa.gatewaysSolicitDelay = 15 * time.Millisecond 883 sa := runGatewayServer(oa) 884 defer sa.Shutdown() 885 886 waitForOutboundGateways(t, sa, 1, time.Second) 887 waitForOutboundGateways(t, sb1, 1, time.Second) 888 waitForOutboundGateways(t, sb2, 1, time.Second) 889 waitForInboundGateways(t, sa, 2, time.Second) 890 891 checkAll := func(t *testing.T) { 892 t.Helper() 893 checkURL := func(t *testing.T, s *Server) { 894 t.Helper() 895 url := s.getGatewayURL() 896 if strings.HasPrefix(url, "0.0.0.0") { 897 t.Fatalf("URL still references 0.0.0.0") 898 } 899 s.gateway.RLock() 900 for url := range s.gateway.URLs { 901 if strings.HasPrefix(url, "0.0.0.0") { 902 s.gateway.RUnlock() 903 t.Fatalf("URL still references 0.0.0.0") 904 } 905 } 906 s.gateway.RUnlock() 907 908 var cfg *gatewayCfg 909 if s.getGatewayName() == "A" { 910 cfg = s.getRemoteGateway("B") 911 } else { 912 cfg = s.getRemoteGateway("A") 913 } 914 urls := cfg.getURLs() 915 for _, url := range urls { 916 if strings.HasPrefix(url.Host, "0.0.0.0") { 917 t.Fatalf("URL still references 0.0.0.0") 918 } 919 } 920 } 921 checkURL(t, sb1) 922 checkURL(t, sb2) 923 checkURL(t, sa) 924 } 925 checkAll(t) 926 // Perform a reload and ensure that nothing has changed 927 servers := []*Server{sb1, sb2, sa} 928 for _, s := range servers { 929 if err := s.Reload(); err != nil { 930 t.Fatalf("Error on reload: %v", err) 931 } 932 checkAll(t) 933 } 934 } 935 936 func TestGatewayAdvertise(t *testing.T) { 937 o3 := testDefaultOptionsForGateway("C") 938 s3 := runGatewayServer(o3) 939 defer s3.Shutdown() 940 941 o2 := testDefaultOptionsForGateway("B") 942 s2 := runGatewayServer(o2) 943 defer s2.Shutdown() 944 945 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 946 // Set the advertise so that this points to C 947 o1.Gateway.Advertise = fmt.Sprintf("127.0.0.1:%d", s3.GatewayAddr().Port) 948 s1 := runGatewayServer(o1) 949 defer s1.Shutdown() 950 951 // We should have outbound from s1 to s2 952 waitForOutboundGateways(t, s1, 1, time.Second) 953 // But no inbound from s2 954 waitForInboundGateways(t, s1, 0, time.Second) 955 956 // And since B tries to connect to A but reaches C, it should fail to connect, 957 // and without connect retries, stop trying. So no outbound for s2, and no 958 // inbound/outbound for s3. 959 waitForInboundGateways(t, s2, 1, time.Second) 960 waitForOutboundGateways(t, s2, 0, time.Second) 961 waitForInboundGateways(t, s3, 0, time.Second) 962 waitForOutboundGateways(t, s3, 0, time.Second) 963 } 964 965 func TestGatewayAdvertiseErr(t *testing.T) { 966 o1 := testDefaultOptionsForGateway("A") 967 o1.Gateway.Advertise = "wrong:address" 968 testFatalErrorOnStart(t, o1, "Gateway.Advertise") 969 } 970 971 func TestGatewayAuth(t *testing.T) { 972 o2 := testDefaultOptionsForGateway("B") 973 o2.Gateway.Username = "me" 974 o2.Gateway.Password = "pwd" 975 s2 := runGatewayServer(o2) 976 defer s2.Shutdown() 977 978 o1 := testGatewayOptionsFromToWithURLs(t, "A", "B", []string{fmt.Sprintf("nats://me:pwd@127.0.0.1:%d", s2.GatewayAddr().Port)}) 979 s1 := runGatewayServer(o1) 980 defer s1.Shutdown() 981 982 // s1 should have an outbound gateway to s2. 983 waitForOutboundGateways(t, s1, 1, time.Second) 984 // s2 should have an inbound gateway 985 waitForInboundGateways(t, s2, 1, time.Second) 986 987 s2.Shutdown() 988 s1.Shutdown() 989 990 o2.Gateway.Username = "me" 991 o2.Gateway.Password = "wrong" 992 s2 = runGatewayServer(o2) 993 defer s2.Shutdown() 994 995 s1 = runGatewayServer(o1) 996 defer s1.Shutdown() 997 998 // Connection should fail... 999 waitForGatewayFailedConnect(t, s1, "B", true, 2*time.Second) 1000 1001 s2.Shutdown() 1002 s1.Shutdown() 1003 o2.Gateway.Username = "wrong" 1004 o2.Gateway.Password = "pwd" 1005 s2 = runGatewayServer(o2) 1006 defer s2.Shutdown() 1007 1008 s1 = runGatewayServer(o1) 1009 defer s1.Shutdown() 1010 1011 // Connection should fail... 1012 waitForGatewayFailedConnect(t, s1, "B", true, 2*time.Second) 1013 } 1014 1015 func TestGatewayTLS(t *testing.T) { 1016 o2 := testGatewayOptionsWithTLS(t, "B") 1017 s2 := runGatewayServer(o2) 1018 defer s2.Shutdown() 1019 1020 o1 := testGatewayOptionsFromToWithTLS(t, "A", "B", []string{fmt.Sprintf("nats://127.0.0.1:%d", s2.GatewayAddr().Port)}) 1021 s1 := runGatewayServer(o1) 1022 defer s1.Shutdown() 1023 1024 // s1 should have an outbound gateway to s2. 1025 waitForOutboundGateways(t, s1, 1, time.Second) 1026 // s2 should have an inbound gateway 1027 waitForInboundGateways(t, s2, 1, time.Second) 1028 // and vice-versa 1029 waitForOutboundGateways(t, s2, 1, time.Second) 1030 waitForInboundGateways(t, s1, 1, time.Second) 1031 1032 // Stop s2 server 1033 s2.Shutdown() 1034 1035 // gateway should go away 1036 waitForOutboundGateways(t, s1, 0, time.Second) 1037 waitForInboundGateways(t, s1, 0, time.Second) 1038 waitForOutboundGateways(t, s2, 0, time.Second) 1039 waitForInboundGateways(t, s2, 0, time.Second) 1040 1041 // Restart server 1042 s2 = runGatewayServer(o2) 1043 defer s2.Shutdown() 1044 1045 // gateway should reconnect 1046 waitForOutboundGateways(t, s1, 1, 2*time.Second) 1047 waitForOutboundGateways(t, s2, 1, 2*time.Second) 1048 waitForInboundGateways(t, s1, 1, 2*time.Second) 1049 waitForInboundGateways(t, s2, 1, 2*time.Second) 1050 1051 s1.Shutdown() 1052 // Wait for s2 to lose connections to s1. 1053 waitForOutboundGateways(t, s2, 0, 2*time.Second) 1054 waitForInboundGateways(t, s2, 0, 2*time.Second) 1055 1056 // Make an explicit TLS config for remote gateway config "B" 1057 // on cluster A. 1058 o1.Gateway.Gateways[0].TLSConfig = o1.Gateway.TLSConfig.Clone() 1059 u, _ := url.Parse(fmt.Sprintf("tls://localhost:%d", s2.GatewayAddr().Port)) 1060 o1.Gateway.Gateways[0].URLs = []*url.URL{u} 1061 // Make the TLSTimeout so small that it should fail to connect. 1062 smallTimeout := 0.00000001 1063 o1.Gateway.Gateways[0].TLSTimeout = smallTimeout 1064 s1 = runGatewayServer(o1) 1065 defer s1.Shutdown() 1066 1067 // Check that s1 reports connection failures 1068 waitForGatewayFailedConnect(t, s1, "B", true, 2*time.Second) 1069 1070 // Check that TLSConfig from s1's remote "B" is based on 1071 // what we have configured. 1072 cfg := s1.getRemoteGateway("B") 1073 cfg.RLock() 1074 tlsName := cfg.tlsName 1075 timeout := cfg.TLSTimeout 1076 cfg.RUnlock() 1077 if tlsName != "localhost" { 1078 t.Fatalf("Expected server name to be localhost, got %v", tlsName) 1079 } 1080 if timeout != smallTimeout { 1081 t.Fatalf("Expected tls timeout to be %v, got %v", smallTimeout, timeout) 1082 } 1083 s1.Shutdown() 1084 // Wait for s2 to lose connections to s1. 1085 waitForOutboundGateways(t, s2, 0, 2*time.Second) 1086 waitForInboundGateways(t, s2, 0, 2*time.Second) 1087 1088 // Remove explicit TLSTimeout from gateway "B" and check that 1089 // we use the A's spec one. 1090 o1.Gateway.Gateways[0].TLSTimeout = 0 1091 s1 = runGatewayServer(o1) 1092 defer s1.Shutdown() 1093 1094 waitForOutboundGateways(t, s1, 1, 2*time.Second) 1095 waitForOutboundGateways(t, s2, 1, 2*time.Second) 1096 waitForInboundGateways(t, s1, 1, 2*time.Second) 1097 waitForInboundGateways(t, s2, 1, 2*time.Second) 1098 1099 cfg = s1.getRemoteGateway("B") 1100 cfg.RLock() 1101 timeout = cfg.TLSTimeout 1102 cfg.RUnlock() 1103 if timeout != o1.Gateway.TLSTimeout { 1104 t.Fatalf("Expected tls timeout to be %v, got %v", o1.Gateway.TLSTimeout, timeout) 1105 } 1106 } 1107 1108 func TestGatewayTLSErrors(t *testing.T) { 1109 o2 := testDefaultOptionsForGateway("B") 1110 s2 := runGatewayServer(o2) 1111 defer s2.Shutdown() 1112 1113 o1 := testGatewayOptionsFromToWithTLS(t, "A", "B", []string{fmt.Sprintf("nats://127.0.0.1:%d", s2.ClusterAddr().Port)}) 1114 s1 := runGatewayServer(o1) 1115 defer s1.Shutdown() 1116 1117 // Expect s1 to have a failed to connect count > 0 1118 waitForGatewayFailedConnect(t, s1, "B", true, 2*time.Second) 1119 } 1120 1121 func TestGatewayServerNameInTLSConfig(t *testing.T) { 1122 o2 := testDefaultOptionsForGateway("B") 1123 var ( 1124 tc = &TLSConfigOpts{} 1125 err error 1126 ) 1127 tc.CertFile = "../test/configs/certs/server-noip.pem" 1128 tc.KeyFile = "../test/configs/certs/server-key-noip.pem" 1129 tc.CaFile = "../test/configs/certs/ca.pem" 1130 o2.Gateway.TLSConfig, err = GenTLSConfig(tc) 1131 if err != nil { 1132 t.Fatalf("Error generating TLS config: %v", err) 1133 } 1134 o2.Gateway.TLSConfig.ClientAuth = tls.RequireAndVerifyClientCert 1135 o2.Gateway.TLSConfig.RootCAs = o2.Gateway.TLSConfig.ClientCAs 1136 o2.Gateway.TLSTimeout = 2.0 1137 s2 := runGatewayServer(o2) 1138 defer s2.Shutdown() 1139 1140 o1 := testGatewayOptionsFromToWithTLS(t, "A", "B", []string{fmt.Sprintf("nats://127.0.0.1:%d", s2.GatewayAddr().Port)}) 1141 s1 := runGatewayServer(o1) 1142 defer s1.Shutdown() 1143 1144 // s1 should fail to connect since we don't have proper expected hostname. 1145 waitForGatewayFailedConnect(t, s1, "B", true, 2*time.Second) 1146 1147 // Now set server name, and it should work. 1148 s1.Shutdown() 1149 o1.Gateway.TLSConfig.ServerName = "localhost" 1150 s1 = runGatewayServer(o1) 1151 defer s1.Shutdown() 1152 1153 waitForOutboundGateways(t, s1, 1, 2*time.Second) 1154 } 1155 1156 func TestGatewayWrongDestination(t *testing.T) { 1157 // Start a server with a gateway named "C" 1158 o2 := testDefaultOptionsForGateway("C") 1159 s2 := runGatewayServer(o2) 1160 defer s2.Shutdown() 1161 1162 // Configure a gateway to "B", but since we are connecting to "C"... 1163 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1164 s1 := runGatewayServer(o1) 1165 defer s1.Shutdown() 1166 1167 // we should not be able to connect. 1168 waitForGatewayFailedConnect(t, s1, "B", true, time.Second) 1169 1170 // Shutdown s2 and fix the gateway name. 1171 // s1 should then connect ok and failed connect should be cleared. 1172 s2.Shutdown() 1173 1174 // Reset the conn attempts 1175 cfg := s1.getRemoteGateway("B") 1176 cfg.resetConnAttempts() 1177 1178 o2.Gateway.Name = "B" 1179 o2.Cluster.Name = "B" 1180 s2 = runGatewayServer(o2) 1181 defer s2.Shutdown() 1182 1183 // At some point, the number of failed connect count should be reset to 0. 1184 waitForGatewayFailedConnect(t, s1, "B", false, 2*time.Second) 1185 } 1186 1187 func TestGatewayConnectToWrongPort(t *testing.T) { 1188 o2 := testDefaultOptionsForGateway("B") 1189 s2 := runGatewayServer(o2) 1190 defer s2.Shutdown() 1191 1192 // Configure a gateway to "B", but connect to the wrong port 1193 urls := []string{fmt.Sprintf("nats://127.0.0.1:%d", s2.Addr().(*net.TCPAddr).Port)} 1194 o1 := testGatewayOptionsFromToWithURLs(t, "A", "B", urls) 1195 s1 := runGatewayServer(o1) 1196 defer s1.Shutdown() 1197 1198 // we should not be able to connect. 1199 waitForGatewayFailedConnect(t, s1, "B", true, time.Second) 1200 1201 s1.Shutdown() 1202 1203 // Repeat with route port 1204 urls = []string{fmt.Sprintf("nats://127.0.0.1:%d", s2.ClusterAddr().Port)} 1205 o1 = testGatewayOptionsFromToWithURLs(t, "A", "B", urls) 1206 s1 = runGatewayServer(o1) 1207 defer s1.Shutdown() 1208 1209 // we should not be able to connect. 1210 waitForGatewayFailedConnect(t, s1, "B", true, time.Second) 1211 1212 s1.Shutdown() 1213 1214 // Now have a client connect to s2's gateway port. 1215 nc, err := nats.Connect(fmt.Sprintf("nats://127.0.0.1:%d", s2.GatewayAddr().Port)) 1216 if err == nil { 1217 nc.Close() 1218 t.Fatal("Expected error, got none") 1219 } 1220 } 1221 1222 func TestGatewayCreateImplicit(t *testing.T) { 1223 // Create a regular cluster of 2 servers 1224 o2 := testDefaultOptionsForGateway("B") 1225 s2 := runGatewayServer(o2) 1226 defer s2.Shutdown() 1227 1228 o3 := testDefaultOptionsForGateway("B") 1229 o3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", s2.ClusterAddr().Port)) 1230 s3 := runGatewayServer(o3) 1231 defer s3.Shutdown() 1232 1233 checkClusterFormed(t, s2, s3) 1234 1235 // Now start s1 that creates a Gateway connection to s2 or s3 1236 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2, s3) 1237 s1 := runGatewayServer(o1) 1238 defer s1.Shutdown() 1239 1240 // We should have an outbound gateway connection on ALL servers. 1241 waitForOutboundGateways(t, s1, 1, 2*time.Second) 1242 waitForOutboundGateways(t, s2, 1, 2*time.Second) 1243 waitForOutboundGateways(t, s3, 1, 2*time.Second) 1244 1245 // Server s1 must have 2 inbound ones 1246 waitForInboundGateways(t, s1, 2, 2*time.Second) 1247 1248 // However, s1 may have created the outbound to s2 or s3. It is possible that 1249 // either s2 or s3 does not an inbound connection. 1250 s2Inbound := s2.numInboundGateways() 1251 s3Inbound := s3.numInboundGateways() 1252 if (s2Inbound == 1 && s3Inbound != 0) || (s3Inbound == 1 && s2Inbound != 0) { 1253 t.Fatalf("Unexpected inbound for s2/s3: %v/%v", s2Inbound, s3Inbound) 1254 } 1255 } 1256 1257 func TestGatewayCreateImplicitOnNewRoute(t *testing.T) { 1258 // Start with only 2 clusters of 1 server each 1259 o2 := testDefaultOptionsForGateway("B") 1260 s2 := runGatewayServer(o2) 1261 defer s2.Shutdown() 1262 1263 // Now start s1 that creates a Gateway connection to s2 1264 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1265 s1 := runGatewayServer(o1) 1266 defer s1.Shutdown() 1267 1268 // Check outbounds 1269 waitForOutboundGateways(t, s1, 1, 2*time.Second) 1270 waitForOutboundGateways(t, s2, 1, 2*time.Second) 1271 1272 // Now add a server to cluster B 1273 o3 := testDefaultOptionsForGateway("B") 1274 o3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", s2.ClusterAddr().Port)) 1275 s3 := runGatewayServer(o3) 1276 defer s3.Shutdown() 1277 1278 // Wait for cluster between s2/s3 to form 1279 checkClusterFormed(t, s2, s3) 1280 1281 // s3 should have been notified about existence of A and create its gateway to A. 1282 waitForOutboundGateways(t, s1, 1, 2*time.Second) 1283 waitForOutboundGateways(t, s2, 1, 2*time.Second) 1284 waitForOutboundGateways(t, s3, 1, 2*time.Second) 1285 } 1286 1287 func TestGatewayImplicitReconnect(t *testing.T) { 1288 o2 := testDefaultOptionsForGateway("B") 1289 o2.Gateway.ConnectRetries = 5 1290 s2 := runGatewayServer(o2) 1291 defer s2.Shutdown() 1292 1293 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1294 s1 := runGatewayServer(o1) 1295 defer s1.Shutdown() 1296 1297 // s1 should have an outbound gateway to s2. 1298 waitForOutboundGateways(t, s1, 1, time.Second) 1299 // s2 should have an inbound gateway 1300 waitForInboundGateways(t, s2, 1, time.Second) 1301 // It will have also created an implicit outbound connection to s1. 1302 // We need to wait for that implicit outbound connection to be made 1303 // to show that it will try to reconnect when we stop/restart s1 1304 // (without config to connect to B). 1305 waitForOutboundGateways(t, s2, 1, time.Second) 1306 1307 // Shutdown s1, remove the gateway from A to B and restart. 1308 s1.Shutdown() 1309 o1.Gateway.Gateways = o1.Gateway.Gateways[:0] 1310 s1 = runGatewayServer(o1) 1311 defer s1.Shutdown() 1312 1313 // s1 should have both outbound and inbound to s2 1314 waitForOutboundGateways(t, s1, 1, 2*time.Second) 1315 waitForInboundGateways(t, s1, 1, 2*time.Second) 1316 1317 // Same for s2 1318 waitForOutboundGateways(t, s2, 1, 2*time.Second) 1319 waitForInboundGateways(t, s2, 1, 2*time.Second) 1320 1321 // Verify that s2 still has "A" in its gateway config 1322 if s2.getRemoteGateway("A") == nil { 1323 t.Fatal("Gateway A should be in s2") 1324 } 1325 } 1326 1327 func TestGatewayImplicitReconnectRace(t *testing.T) { 1328 ob := testDefaultOptionsForGateway("B") 1329 resolver := &slowResolver{ 1330 inLookupCh: make(chan struct{}, 1), 1331 releaseCh: make(chan struct{}), 1332 } 1333 ob.Gateway.resolver = resolver 1334 sb := runGatewayServer(ob) 1335 defer sb.Shutdown() 1336 1337 oa1 := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 1338 sa1 := runGatewayServer(oa1) 1339 defer sa1.Shutdown() 1340 1341 // Wait for the proper connections 1342 waitForOutboundGateways(t, sa1, 1, time.Second) 1343 waitForOutboundGateways(t, sb, 1, time.Second) 1344 waitForInboundGateways(t, sa1, 1, time.Second) 1345 waitForInboundGateways(t, sb, 1, time.Second) 1346 1347 // On sb, change the URL to sa1 so that it is a name, instead of an IP, 1348 // so that we hit the slow resolver. 1349 cfg := sb.getRemoteGateway("A") 1350 cfg.updateURLs([]string{fmt.Sprintf("localhost:%d", sa1.GatewayAddr().Port)}) 1351 1352 // Shutdown sa1 now... 1353 sa1.Shutdown() 1354 1355 // Wait to be notified that B has detected the connection close 1356 // and it is trying to resolve the host during the reconnect. 1357 <-resolver.inLookupCh 1358 1359 // Start a new "A" server (sa2). 1360 oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 1361 sa2 := runGatewayServer(oa2) 1362 defer sa2.Shutdown() 1363 1364 // Make sure we have our outbound to sb registered on sa2 and inbound 1365 // from sa2 on sb before releasing the resolver. 1366 waitForOutboundGateways(t, sa2, 1, 2*time.Second) 1367 waitForInboundGateways(t, sb, 1, 2*time.Second) 1368 1369 // Now release the resolver and ensure we have all connections. 1370 close(resolver.releaseCh) 1371 1372 waitForOutboundGateways(t, sb, 1, 2*time.Second) 1373 waitForInboundGateways(t, sa2, 1, 2*time.Second) 1374 } 1375 1376 type gwReconnAttemptLogger struct { 1377 DummyLogger 1378 errCh chan string 1379 } 1380 1381 func (l *gwReconnAttemptLogger) Errorf(format string, v ...any) { 1382 msg := fmt.Sprintf(format, v...) 1383 if strings.Contains(msg, `Error connecting to implicit gateway "A"`) { 1384 select { 1385 case l.errCh <- msg: 1386 default: 1387 } 1388 } 1389 } 1390 1391 func TestGatewayImplicitReconnectHonorConnectRetries(t *testing.T) { 1392 ob := testDefaultOptionsForGateway("B") 1393 ob.ReconnectErrorReports = 1 1394 ob.Gateway.ConnectRetries = 2 1395 sb := runGatewayServer(ob) 1396 defer sb.Shutdown() 1397 1398 l := &gwReconnAttemptLogger{errCh: make(chan string, 3)} 1399 sb.SetLogger(l, true, false) 1400 1401 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 1402 sa := runGatewayServer(oa) 1403 defer sa.Shutdown() 1404 1405 // Wait for the proper connections 1406 waitForOutboundGateways(t, sa, 1, time.Second) 1407 waitForOutboundGateways(t, sb, 1, time.Second) 1408 waitForInboundGateways(t, sa, 1, time.Second) 1409 waitForInboundGateways(t, sb, 1, time.Second) 1410 1411 // Now have C connect to B. 1412 oc := testGatewayOptionsFromToWithServers(t, "C", "B", sb) 1413 sc := runGatewayServer(oc) 1414 defer sc.Shutdown() 1415 1416 // Wait for the proper connections 1417 waitForOutboundGateways(t, sa, 2, time.Second) 1418 waitForOutboundGateways(t, sb, 2, time.Second) 1419 waitForOutboundGateways(t, sc, 2, time.Second) 1420 waitForInboundGateways(t, sa, 2, time.Second) 1421 waitForInboundGateways(t, sb, 2, time.Second) 1422 waitForInboundGateways(t, sc, 2, time.Second) 1423 1424 // Shutdown sa now... 1425 sa.Shutdown() 1426 1427 // B will try to reconnect to A 3 times (we stop after attempts > ConnectRetries) 1428 timeout := time.NewTimer(time.Second) 1429 for i := 0; i < 3; i++ { 1430 select { 1431 case <-l.errCh: 1432 // OK 1433 case <-timeout.C: 1434 t.Fatal("Did not get debug trace about reconnect") 1435 } 1436 } 1437 // If we get 1 more, we have an issue! 1438 select { 1439 case e := <-l.errCh: 1440 t.Fatalf("Should not have attempted to reconnect: %q", e) 1441 case <-time.After(250 * time.Millisecond): 1442 // OK! 1443 } 1444 1445 waitForOutboundGateways(t, sb, 1, 2*time.Second) 1446 waitForInboundGateways(t, sb, 1, 2*time.Second) 1447 waitForOutboundGateways(t, sc, 1, 2*time.Second) 1448 waitForInboundGateways(t, sc, 1, 2*time.Second) 1449 } 1450 1451 func TestGatewayURLsFromClusterSentInINFO(t *testing.T) { 1452 o2 := testDefaultOptionsForGateway("B") 1453 s2 := runGatewayServer(o2) 1454 defer s2.Shutdown() 1455 1456 o3 := testDefaultOptionsForGateway("B") 1457 o3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", s2.ClusterAddr().Port)) 1458 s3 := runGatewayServer(o3) 1459 defer s3.Shutdown() 1460 1461 checkClusterFormed(t, s2, s3) 1462 1463 // Now start s1 that creates a Gateway connection to s2 1464 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1465 s1 := runGatewayServer(o1) 1466 defer s1.Shutdown() 1467 1468 // Make sure we have proper outbound/inbound 1469 waitForOutboundGateways(t, s1, 1, time.Second) 1470 waitForOutboundGateways(t, s2, 1, time.Second) 1471 waitForOutboundGateways(t, s3, 1, time.Second) 1472 1473 // Although s1 connected to s2 and knew only about s2, it should have 1474 // received the list of gateway URLs in the B cluster. So if we shutdown 1475 // server s2, it should be able to reconnect to s3. 1476 s2.Shutdown() 1477 // Wait for s3 to register that there s2 is gone. 1478 checkNumRoutes(t, s3, 0) 1479 // s1 should have reconnected to s3 because it learned about it 1480 // when connecting earlier to s2. 1481 waitForOutboundGateways(t, s1, 1, 2*time.Second) 1482 // Also make sure that the gateway's urls map has 2 urls. 1483 gw := s1.getRemoteGateway("B") 1484 if gw == nil { 1485 t.Fatal("Did not find gateway B") 1486 } 1487 gw.RLock() 1488 l := len(gw.urls) 1489 gw.RUnlock() 1490 if l != 2 { 1491 t.Fatalf("S1 should have 2 urls, got %v", l) 1492 } 1493 } 1494 1495 func TestGatewayUseUpdatedURLs(t *testing.T) { 1496 // For this test, we have cluster B with an explicit gateway to cluster A 1497 // on a given URL. Then we create cluster A with a gateway to B with server B's 1498 // GW url, and we expect server B to ultimately create an outbound GW connection 1499 // to server A (with the URL it will get from server A connecting to it). 1500 1501 ob := testGatewayOptionsFromToWithURLs(t, "B", "A", []string{"nats://127.0.0.1:1234"}) 1502 sb := runGatewayServer(ob) 1503 defer sb.Shutdown() 1504 1505 // Add a delay before starting server A to make sure that server B start 1506 // initiating the connection to A on inexistant server at :1234. 1507 time.Sleep(100 * time.Millisecond) 1508 1509 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 1510 sa := runGatewayServer(oa) 1511 defer sa.Shutdown() 1512 1513 // sa should have no problem creating outbound connection to sb 1514 waitForOutboundGateways(t, sa, 1, time.Second) 1515 1516 // Make sure that since sb learns about sa's GW URL, it can successfully 1517 // connect to it. 1518 waitForOutboundGateways(t, sb, 1, 3*time.Second) 1519 waitForInboundGateways(t, sb, 1, time.Second) 1520 waitForInboundGateways(t, sa, 1, time.Second) 1521 } 1522 1523 func TestGatewayAutoDiscovery(t *testing.T) { 1524 o4 := testDefaultOptionsForGateway("D") 1525 s4 := runGatewayServer(o4) 1526 defer s4.Shutdown() 1527 1528 o3 := testGatewayOptionsFromToWithServers(t, "C", "D", s4) 1529 s3 := runGatewayServer(o3) 1530 defer s3.Shutdown() 1531 1532 o2 := testGatewayOptionsFromToWithServers(t, "B", "C", s3) 1533 s2 := runGatewayServer(o2) 1534 defer s2.Shutdown() 1535 1536 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1537 s1 := runGatewayServer(o1) 1538 defer s1.Shutdown() 1539 1540 // Each server should have 3 outbound gateway connections. 1541 waitForOutboundGateways(t, s1, 3, 2*time.Second) 1542 waitForOutboundGateways(t, s2, 3, 2*time.Second) 1543 waitForOutboundGateways(t, s3, 3, 2*time.Second) 1544 waitForOutboundGateways(t, s4, 3, 2*time.Second) 1545 1546 s1.Shutdown() 1547 s2.Shutdown() 1548 s3.Shutdown() 1549 s4.Shutdown() 1550 1551 o2 = testDefaultOptionsForGateway("B") 1552 s2 = runGatewayServer(o2) 1553 defer s2.Shutdown() 1554 1555 o4 = testGatewayOptionsFromToWithServers(t, "D", "B", s2) 1556 s4 = runGatewayServer(o4) 1557 defer s4.Shutdown() 1558 1559 o3 = testGatewayOptionsFromToWithServers(t, "C", "B", s2) 1560 s3 = runGatewayServer(o3) 1561 defer s3.Shutdown() 1562 1563 o1 = testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1564 s1 = runGatewayServer(o1) 1565 defer s1.Shutdown() 1566 1567 // Each server should have 3 outbound gateway connections. 1568 waitForOutboundGateways(t, s1, 3, 2*time.Second) 1569 waitForOutboundGateways(t, s2, 3, 2*time.Second) 1570 waitForOutboundGateways(t, s3, 3, 2*time.Second) 1571 waitForOutboundGateways(t, s4, 3, 2*time.Second) 1572 1573 s1.Shutdown() 1574 s2.Shutdown() 1575 s3.Shutdown() 1576 s4.Shutdown() 1577 1578 o1 = testDefaultOptionsForGateway("A") 1579 s1 = runGatewayServer(o1) 1580 defer s1.Shutdown() 1581 1582 o2 = testDefaultOptionsForGateway("A") 1583 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", s1.ClusterAddr().Port)) 1584 s2 = runGatewayServer(o2) 1585 defer s2.Shutdown() 1586 1587 o3 = testDefaultOptionsForGateway("A") 1588 o3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", s1.ClusterAddr().Port)) 1589 s3 = runGatewayServer(o3) 1590 defer s3.Shutdown() 1591 1592 checkClusterFormed(t, s1, s2, s3) 1593 1594 o4 = testGatewayOptionsFromToWithServers(t, "B", "A", s1) 1595 s4 = runGatewayServer(o4) 1596 defer s4.Shutdown() 1597 1598 waitForOutboundGateways(t, s1, 1, 2*time.Second) 1599 waitForOutboundGateways(t, s2, 1, 2*time.Second) 1600 waitForOutboundGateways(t, s3, 1, 2*time.Second) 1601 waitForOutboundGateways(t, s4, 1, 2*time.Second) 1602 waitForInboundGateways(t, s4, 3, 2*time.Second) 1603 1604 o5 := testGatewayOptionsFromToWithServers(t, "C", "B", s4) 1605 s5 := runGatewayServer(o5) 1606 defer s5.Shutdown() 1607 1608 waitForOutboundGateways(t, s1, 2, 2*time.Second) 1609 waitForOutboundGateways(t, s2, 2, 2*time.Second) 1610 waitForOutboundGateways(t, s3, 2, 2*time.Second) 1611 waitForOutboundGateways(t, s4, 2, 2*time.Second) 1612 waitForInboundGateways(t, s4, 4, 2*time.Second) 1613 waitForOutboundGateways(t, s5, 2, 2*time.Second) 1614 waitForInboundGateways(t, s5, 4, 2*time.Second) 1615 } 1616 1617 func TestGatewayRejectUnknown(t *testing.T) { 1618 o2 := testDefaultOptionsForGateway("B") 1619 s2 := runGatewayServer(o2) 1620 defer s2.Shutdown() 1621 1622 // Create a gateway from A to B, but configure B to reject non configured ones. 1623 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1624 o1.Gateway.RejectUnknown = true 1625 s1 := runGatewayServer(o1) 1626 defer s1.Shutdown() 1627 1628 // Wait for outbound/inbound to be created. 1629 waitForOutboundGateways(t, s1, 1, time.Second) 1630 waitForOutboundGateways(t, s2, 1, time.Second) 1631 waitForInboundGateways(t, s1, 1, time.Second) 1632 waitForInboundGateways(t, s2, 1, time.Second) 1633 1634 // Create gateway C to B. B will tell C to connect to A, 1635 // which A should reject. 1636 o3 := testGatewayOptionsFromToWithServers(t, "C", "B", s2) 1637 s3 := runGatewayServer(o3) 1638 defer s3.Shutdown() 1639 1640 // s3 should have outbound to B, but not to A 1641 waitForOutboundGateways(t, s3, 1, time.Second) 1642 // s2 should have 2 inbounds (one from s1 one from s3) 1643 waitForInboundGateways(t, s2, 2, time.Second) 1644 1645 // s1 should have single outbound/inbound with s2. 1646 waitForOutboundGateways(t, s1, 1, time.Second) 1647 waitForInboundGateways(t, s1, 1, time.Second) 1648 1649 // It should not have a registered remote gateway with C (s3) 1650 if s1.getOutboundGatewayConnection("C") != nil { 1651 t.Fatalf("A should not have outbound gateway to C") 1652 } 1653 if s1.getRemoteGateway("C") != nil { 1654 t.Fatalf("A should not have a registered remote gateway to C") 1655 } 1656 1657 // Restart s1 and this time, B will tell A to connect to C. 1658 // But A will not even attempt that since it does not have 1659 // C configured. 1660 s1.Shutdown() 1661 waitForOutboundGateways(t, s2, 1, time.Second) 1662 waitForInboundGateways(t, s2, 1, time.Second) 1663 s1 = runGatewayServer(o1) 1664 defer s1.Shutdown() 1665 waitForOutboundGateways(t, s2, 2, time.Second) 1666 waitForInboundGateways(t, s2, 2, time.Second) 1667 waitForOutboundGateways(t, s1, 1, time.Second) 1668 waitForInboundGateways(t, s1, 1, time.Second) 1669 waitForOutboundGateways(t, s3, 1, time.Second) 1670 waitForInboundGateways(t, s3, 1, time.Second) 1671 // It should not have a registered remote gateway with C (s3) 1672 if s1.getOutboundGatewayConnection("C") != nil { 1673 t.Fatalf("A should not have outbound gateway to C") 1674 } 1675 if s1.getRemoteGateway("C") != nil { 1676 t.Fatalf("A should not have a registered remote gateway to C") 1677 } 1678 } 1679 1680 func TestGatewayNoReconnectOnClose(t *testing.T) { 1681 o2 := testDefaultOptionsForGateway("B") 1682 s2 := runGatewayServer(o2) 1683 defer s2.Shutdown() 1684 1685 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1686 s1 := runGatewayServer(o1) 1687 defer s1.Shutdown() 1688 1689 waitForOutboundGateways(t, s1, 1, time.Second) 1690 waitForOutboundGateways(t, s2, 1, time.Second) 1691 1692 // Shutdown s1, and check that there is no attempt to reconnect. 1693 s1.Shutdown() 1694 time.Sleep(250 * time.Millisecond) 1695 waitForOutboundGateways(t, s1, 0, time.Second) 1696 waitForOutboundGateways(t, s2, 0, time.Second) 1697 waitForInboundGateways(t, s2, 0, time.Second) 1698 } 1699 1700 func TestGatewayDontSendSubInterest(t *testing.T) { 1701 o2 := testDefaultOptionsForGateway("B") 1702 s2 := runGatewayServer(o2) 1703 defer s2.Shutdown() 1704 1705 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1706 s1 := runGatewayServer(o1) 1707 defer s1.Shutdown() 1708 1709 waitForOutboundGateways(t, s1, 1, time.Second) 1710 waitForOutboundGateways(t, s2, 1, time.Second) 1711 1712 s2Url := fmt.Sprintf("nats://127.0.0.1:%d", o2.Port) 1713 subnc := natsConnect(t, s2Url) 1714 defer subnc.Close() 1715 natsSub(t, subnc, "foo", func(_ *nats.Msg) {}) 1716 natsFlush(t, subnc) 1717 1718 checkExpectedSubs(t, 1, s2) 1719 // Subscription should not be sent to s1 1720 checkExpectedSubs(t, 0, s1) 1721 1722 // Restart s1 1723 s1.Shutdown() 1724 s1 = runGatewayServer(o1) 1725 defer s1.Shutdown() 1726 waitForOutboundGateways(t, s1, 1, time.Second) 1727 waitForOutboundGateways(t, s2, 1, time.Second) 1728 1729 checkExpectedSubs(t, 1, s2) 1730 checkExpectedSubs(t, 0, s1) 1731 } 1732 1733 func setAccountUserPassInOptions(o *Options, accName, username, password string) { 1734 acc := NewAccount(accName) 1735 o.Accounts = append(o.Accounts, acc) 1736 o.Users = append(o.Users, &User{Username: username, Password: password, Account: acc}) 1737 } 1738 1739 func TestGatewayAccountInterest(t *testing.T) { 1740 GatewayDoNotForceInterestOnlyMode(true) 1741 defer GatewayDoNotForceInterestOnlyMode(false) 1742 1743 o2 := testDefaultOptionsForGateway("B") 1744 // Add users to cause s2 to require auth. Will add an account with user later. 1745 o2.Users = append([]*User(nil), &User{Username: "test", Password: "pwd"}) 1746 s2 := runGatewayServer(o2) 1747 defer s2.Shutdown() 1748 1749 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 1750 setAccountUserPassInOptions(o1, "$foo", "ivan", "password") 1751 s1 := runGatewayServer(o1) 1752 defer s1.Shutdown() 1753 1754 // Make this server initiate connection to A, so it is faster 1755 // when restarting it at the end of this test. 1756 o3 := testGatewayOptionsFromToWithServers(t, "C", "A", s1) 1757 setAccountUserPassInOptions(o3, "$foo", "ivan", "password") 1758 s3 := runGatewayServer(o3) 1759 defer s3.Shutdown() 1760 1761 waitForOutboundGateways(t, s1, 2, time.Second) 1762 waitForOutboundGateways(t, s2, 2, time.Second) 1763 waitForOutboundGateways(t, s3, 2, time.Second) 1764 1765 s1Url := fmt.Sprintf("nats://ivan:password@127.0.0.1:%d", o1.Port) 1766 nc := natsConnect(t, s1Url) 1767 defer nc.Close() 1768 natsPub(t, nc, "foo", []byte("hello")) 1769 natsFlush(t, nc) 1770 1771 // On first send, the message should be sent. 1772 checkCount := func(t *testing.T, c *client, expected int) { 1773 t.Helper() 1774 c.mu.Lock() 1775 out := c.outMsgs 1776 c.mu.Unlock() 1777 if int(out) != expected { 1778 t.Fatalf("Expected %d message(s) to be sent over, got %v", expected, out) 1779 } 1780 } 1781 gwcb := s1.getOutboundGatewayConnection("B") 1782 checkCount(t, gwcb, 1) 1783 gwcc := s1.getOutboundGatewayConnection("C") 1784 checkCount(t, gwcc, 1) 1785 1786 // S2 and S3 should have sent a protocol indicating no account interest. 1787 checkForAccountNoInterest(t, gwcb, "$foo", true, 2*time.Second) 1788 checkForAccountNoInterest(t, gwcc, "$foo", true, 2*time.Second) 1789 // Second send should not go to B nor C. 1790 natsPub(t, nc, "foo", []byte("hello")) 1791 natsFlush(t, nc) 1792 checkCount(t, gwcb, 1) 1793 checkCount(t, gwcc, 1) 1794 1795 // Add account to S2 and a client, this should clear the no-interest 1796 // for that account. 1797 s2FooAcc, err := s2.RegisterAccount("$foo") 1798 if err != nil { 1799 t.Fatalf("Error registering account: %v", err) 1800 } 1801 s2.mu.Lock() 1802 s2.users["ivan"] = &User{Account: s2FooAcc, Username: "ivan", Password: "password"} 1803 s2.mu.Unlock() 1804 s2Url := fmt.Sprintf("nats://ivan:password@127.0.0.1:%d", o2.Port) 1805 ncS2 := natsConnect(t, s2Url) 1806 defer ncS2.Close() 1807 // Any subscription should cause s2 to send an A+ 1808 natsSubSync(t, ncS2, "asub") 1809 // Wait for the A+ 1810 checkForAccountNoInterest(t, gwcb, "$foo", false, 2*time.Second) 1811 1812 // Now publish a message that should go to B 1813 natsPub(t, nc, "foo", []byte("hello")) 1814 natsFlush(t, nc) 1815 checkCount(t, gwcb, 2) 1816 // Still won't go to C since there is no sub interest 1817 checkCount(t, gwcc, 1) 1818 1819 // We should have received a subject no interest for foo 1820 checkForSubjectNoInterest(t, gwcb, "$foo", "foo", true, 2*time.Second) 1821 1822 // Now if we close the client, which removed the sole subscription, 1823 // and publish to a new subject, we should then get an A- 1824 ncS2.Close() 1825 // Wait a bit... 1826 time.Sleep(20 * time.Millisecond) 1827 // Publish on new subject 1828 natsPub(t, nc, "bar", []byte("hello")) 1829 natsFlush(t, nc) 1830 // It should go out to B... 1831 checkCount(t, gwcb, 3) 1832 // But then we should get a A- 1833 checkForAccountNoInterest(t, gwcb, "$foo", true, 2*time.Second) 1834 1835 // Restart C and that should reset the no-interest 1836 s3.Shutdown() 1837 s3 = runGatewayServer(o3) 1838 defer s3.Shutdown() 1839 1840 waitForOutboundGateways(t, s1, 2, 2*time.Second) 1841 waitForOutboundGateways(t, s2, 2, 2*time.Second) 1842 waitForOutboundGateways(t, s3, 2, 2*time.Second) 1843 1844 // First refresh gwcc 1845 gwcc = s1.getOutboundGatewayConnection("C") 1846 // Verify that it's count is 0 1847 checkCount(t, gwcc, 0) 1848 // Publish and now... 1849 natsPub(t, nc, "foo", []byte("hello")) 1850 natsFlush(t, nc) 1851 // it should not go to B (no sub interest) 1852 checkCount(t, gwcb, 3) 1853 // but will go to C 1854 checkCount(t, gwcc, 1) 1855 } 1856 1857 func TestGatewayAccountUnsub(t *testing.T) { 1858 ob := testDefaultOptionsForGateway("B") 1859 sb := runGatewayServer(ob) 1860 defer sb.Shutdown() 1861 1862 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 1863 sa := runGatewayServer(oa) 1864 defer sa.Shutdown() 1865 1866 waitForOutboundGateways(t, sa, 1, time.Second) 1867 waitForOutboundGateways(t, sb, 1, time.Second) 1868 waitForInboundGateways(t, sa, 1, time.Second) 1869 waitForInboundGateways(t, sb, 1, time.Second) 1870 1871 // Connect on B 1872 ncb := natsConnect(t, fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)) 1873 defer ncb.Close() 1874 // Create subscription 1875 natsSub(t, ncb, "foo", func(m *nats.Msg) { 1876 ncb.Publish(m.Reply, []byte("reply")) 1877 }) 1878 natsFlush(t, ncb) 1879 1880 // Connect on A 1881 nca := natsConnect(t, fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port)) 1882 defer nca.Close() 1883 // Send a request 1884 if _, err := nca.Request("foo", []byte("req"), time.Second); err != nil { 1885 t.Fatalf("Error getting reply: %v", err) 1886 } 1887 1888 // Now close connection on B 1889 ncb.Close() 1890 1891 // Publish lots of messages on "foo" from A. 1892 // We should receive an A- shortly and the number 1893 // of outbound messages from A to B should not be 1894 // close to the number of messages sent here. 1895 total := 5000 1896 for i := 0; i < total; i++ { 1897 natsPub(t, nca, "foo", []byte("hello")) 1898 // Try to slow down things a bit to give a chance 1899 // to srvB to send the A- and to srvA to be able 1900 // to process it, which will then suppress the sends. 1901 if i%100 == 0 { 1902 natsFlush(t, nca) 1903 } 1904 } 1905 natsFlush(t, nca) 1906 1907 c := sa.getOutboundGatewayConnection("B") 1908 c.mu.Lock() 1909 out := c.outMsgs 1910 c.mu.Unlock() 1911 1912 if out >= int64(80*total)/100 { 1913 t.Fatalf("Unexpected number of messages sent from A to B: %v", out) 1914 } 1915 } 1916 1917 func TestGatewaySubjectInterest(t *testing.T) { 1918 GatewayDoNotForceInterestOnlyMode(true) 1919 defer GatewayDoNotForceInterestOnlyMode(false) 1920 1921 o1 := testDefaultOptionsForGateway("A") 1922 setAccountUserPassInOptions(o1, "$foo", "ivan", "password") 1923 s1 := runGatewayServer(o1) 1924 defer s1.Shutdown() 1925 1926 o2 := testGatewayOptionsFromToWithServers(t, "B", "A", s1) 1927 setAccountUserPassInOptions(o2, "$foo", "ivan", "password") 1928 s2 := runGatewayServer(o2) 1929 defer s2.Shutdown() 1930 1931 waitForOutboundGateways(t, s1, 1, time.Second) 1932 waitForOutboundGateways(t, s2, 1, time.Second) 1933 1934 // We will create a subscription that we are not testing so 1935 // that we don't get an A- in this test. 1936 s2Url := fmt.Sprintf("nats://ivan:password@127.0.0.1:%d", o2.Port) 1937 ncb := natsConnect(t, s2Url) 1938 defer ncb.Close() 1939 natsSubSync(t, ncb, "not.used") 1940 checkExpectedSubs(t, 1, s2) 1941 1942 s1Url := fmt.Sprintf("nats://ivan:password@127.0.0.1:%d", o1.Port) 1943 nc := natsConnect(t, s1Url) 1944 defer nc.Close() 1945 natsPub(t, nc, "foo", []byte("hello")) 1946 natsFlush(t, nc) 1947 1948 // On first send, the message should be sent. 1949 checkCount := func(t *testing.T, c *client, expected int) { 1950 t.Helper() 1951 c.mu.Lock() 1952 out := c.outMsgs 1953 c.mu.Unlock() 1954 if int(out) != expected { 1955 t.Fatalf("Expected %d message(s) to be sent over, got %v", expected, out) 1956 } 1957 } 1958 gwcb := s1.getOutboundGatewayConnection("B") 1959 checkCount(t, gwcb, 1) 1960 1961 // S2 should have sent a protocol indicating no subject interest. 1962 checkNoInterest := func(t *testing.T, subject string, expectedNoInterest bool) { 1963 t.Helper() 1964 checkForSubjectNoInterest(t, gwcb, "$foo", subject, expectedNoInterest, 2*time.Second) 1965 } 1966 checkNoInterest(t, "foo", true) 1967 // Second send should not go through to B 1968 natsPub(t, nc, "foo", []byte("hello")) 1969 natsFlush(t, nc) 1970 checkCount(t, gwcb, 1) 1971 1972 // Now create subscription interest on B (s2) 1973 ch := make(chan bool, 1) 1974 sub := natsSub(t, ncb, "foo", func(_ *nats.Msg) { 1975 ch <- true 1976 }) 1977 natsFlush(t, ncb) 1978 checkExpectedSubs(t, 2, s2) 1979 checkExpectedSubs(t, 0, s1) 1980 1981 // This should clear the no interest for this subject 1982 checkNoInterest(t, "foo", false) 1983 // Third send should go to B 1984 natsPub(t, nc, "foo", []byte("hello")) 1985 natsFlush(t, nc) 1986 checkCount(t, gwcb, 2) 1987 1988 // Make sure message is received 1989 waitCh(t, ch, "Did not get our message") 1990 // Now unsubscribe, there won't be an UNSUB sent to the gateway. 1991 natsUnsub(t, sub) 1992 natsFlush(t, ncb) 1993 checkExpectedSubs(t, 1, s2) 1994 checkExpectedSubs(t, 0, s1) 1995 1996 // So now sending a message should go over, but then we should get an RS- 1997 natsPub(t, nc, "foo", []byte("hello")) 1998 natsFlush(t, nc) 1999 checkCount(t, gwcb, 3) 2000 2001 checkNoInterest(t, "foo", true) 2002 2003 // Send one more time and now it should not go to B 2004 natsPub(t, nc, "foo", []byte("hello")) 2005 natsFlush(t, nc) 2006 checkCount(t, gwcb, 3) 2007 2008 // Send on bar, message should go over. 2009 natsPub(t, nc, "bar", []byte("hello")) 2010 natsFlush(t, nc) 2011 checkCount(t, gwcb, 4) 2012 2013 // But now we should have receives an RS- on bar. 2014 checkNoInterest(t, "bar", true) 2015 2016 // Check that wildcards are supported. Create a subscription on '*' on B. 2017 // This should clear the no-interest on both "foo" and "bar" 2018 natsSub(t, ncb, "*", func(_ *nats.Msg) {}) 2019 natsFlush(t, ncb) 2020 checkExpectedSubs(t, 2, s2) 2021 checkExpectedSubs(t, 0, s1) 2022 checkNoInterest(t, "foo", false) 2023 checkNoInterest(t, "bar", false) 2024 // Publish on message on foo and one on bar and they should go. 2025 natsPub(t, nc, "foo", []byte("hello")) 2026 natsPub(t, nc, "bar", []byte("hello")) 2027 natsFlush(t, nc) 2028 checkCount(t, gwcb, 6) 2029 2030 // Restart B and that should clear everything on A 2031 ncb.Close() 2032 s2.Shutdown() 2033 s2 = runGatewayServer(o2) 2034 defer s2.Shutdown() 2035 2036 waitForOutboundGateways(t, s1, 1, time.Second) 2037 waitForOutboundGateways(t, s2, 1, time.Second) 2038 2039 ncb = natsConnect(t, s2Url) 2040 defer ncb.Close() 2041 natsSubSync(t, ncb, "not.used") 2042 checkExpectedSubs(t, 1, s2) 2043 2044 gwcb = s1.getOutboundGatewayConnection("B") 2045 checkCount(t, gwcb, 0) 2046 natsPub(t, nc, "foo", []byte("hello")) 2047 natsFlush(t, nc) 2048 checkCount(t, gwcb, 1) 2049 2050 checkNoInterest(t, "foo", true) 2051 2052 natsPub(t, nc, "foo", []byte("hello")) 2053 natsFlush(t, nc) 2054 checkCount(t, gwcb, 1) 2055 2056 // Add a node to B cluster and subscribe there. 2057 // We want to ensure that the no-interest is cleared 2058 // when s2 receives remote SUB from s2bis 2059 o2bis := testGatewayOptionsFromToWithServers(t, "B", "A", s1) 2060 setAccountUserPassInOptions(o2bis, "$foo", "ivan", "password") 2061 o2bis.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", s2.ClusterAddr().Port)) 2062 s2bis := runGatewayServer(o2bis) 2063 defer s2bis.Shutdown() 2064 2065 checkClusterFormed(t, s2, s2bis) 2066 2067 // Make sure all outbound gateway connections are setup 2068 waitForOutboundGateways(t, s1, 1, time.Second) 2069 waitForOutboundGateways(t, s2, 1, time.Second) 2070 waitForOutboundGateways(t, s2bis, 1, time.Second) 2071 2072 // A should have 2 inbound 2073 waitForInboundGateways(t, s1, 2, time.Second) 2074 2075 // Create sub on s2bis 2076 ncb2bis := natsConnect(t, fmt.Sprintf("nats://ivan:password@127.0.0.1:%d", o2bis.Port)) 2077 defer ncb2bis.Close() 2078 natsSub(t, ncb2bis, "foo", func(_ *nats.Msg) {}) 2079 natsFlush(t, ncb2bis) 2080 2081 // Wait for subscriptions to be registered locally on s2bis and remotely on s2 2082 checkExpectedSubs(t, 2, s2, s2bis) 2083 2084 // Check that subject no-interest on A was cleared. 2085 checkNoInterest(t, "foo", false) 2086 2087 // Now publish. Remember, s1 has outbound gateway to s2, and s2 does not 2088 // have a local subscription and has previously sent a no-interest on "foo". 2089 // We check that this has been cleared due to the interest on s2bis. 2090 natsPub(t, nc, "foo", []byte("hello")) 2091 natsFlush(t, nc) 2092 checkCount(t, gwcb, 2) 2093 } 2094 2095 func TestGatewayDoesntSendBackToItself(t *testing.T) { 2096 o2 := testDefaultOptionsForGateway("B") 2097 s2 := runGatewayServer(o2) 2098 defer s2.Shutdown() 2099 2100 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 2101 s1 := runGatewayServer(o1) 2102 defer s1.Shutdown() 2103 2104 waitForOutboundGateways(t, s1, 1, time.Second) 2105 waitForOutboundGateways(t, s2, 1, time.Second) 2106 2107 s2Url := fmt.Sprintf("nats://127.0.0.1:%d", o2.Port) 2108 nc2 := natsConnect(t, s2Url) 2109 defer nc2.Close() 2110 2111 count := int32(0) 2112 cb := func(_ *nats.Msg) { 2113 atomic.AddInt32(&count, 1) 2114 } 2115 natsSub(t, nc2, "foo", cb) 2116 natsFlush(t, nc2) 2117 2118 s1Url := fmt.Sprintf("nats://127.0.0.1:%d", o1.Port) 2119 nc1 := natsConnect(t, s1Url) 2120 defer nc1.Close() 2121 2122 natsSub(t, nc1, "foo", cb) 2123 natsFlush(t, nc1) 2124 2125 // Now send 1 message. If there is a cycle, after few ms we 2126 // should have tons of messages... 2127 natsPub(t, nc1, "foo", []byte("cycle")) 2128 natsFlush(t, nc1) 2129 time.Sleep(100 * time.Millisecond) 2130 if c := atomic.LoadInt32(&count); c != 2 { 2131 t.Fatalf("Expected only 2 messages, got %v", c) 2132 } 2133 } 2134 2135 func TestGatewayOrderedOutbounds(t *testing.T) { 2136 o2 := testDefaultOptionsForGateway("B") 2137 s2 := runGatewayServer(o2) 2138 defer s2.Shutdown() 2139 2140 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 2141 s1 := runGatewayServer(o1) 2142 defer s1.Shutdown() 2143 2144 o3 := testGatewayOptionsFromToWithServers(t, "C", "B", s2) 2145 s3 := runGatewayServer(o3) 2146 defer s3.Shutdown() 2147 2148 waitForOutboundGateways(t, s1, 2, time.Second) 2149 waitForOutboundGateways(t, s2, 2, time.Second) 2150 waitForOutboundGateways(t, s3, 2, time.Second) 2151 2152 gws := make([]*client, 0, 2) 2153 s2.getOutboundGatewayConnections(&gws) 2154 2155 // RTTs are expected to be initially 0. So update RTT of first 2156 // in the array so that its value is no longer 0, this should 2157 // cause order to be flipped. 2158 c := gws[0] 2159 c.mu.Lock() 2160 c.sendPing() 2161 c.mu.Unlock() 2162 2163 // Wait a tiny but 2164 time.Sleep(15 * time.Millisecond) 2165 // Get the ordering again. 2166 gws = gws[:0] 2167 s2.getOutboundGatewayConnections(&gws) 2168 // Verify order is correct. 2169 fRTT := gws[0].getRTTValue() 2170 sRTT := gws[1].getRTTValue() 2171 if fRTT > sRTT { 2172 t.Fatalf("Wrong ordering: %v, %v", fRTT, sRTT) 2173 } 2174 2175 // What is the first in the array? 2176 gws[0].mu.Lock() 2177 gwName := gws[0].gw.name 2178 gws[0].mu.Unlock() 2179 if gwName == "A" { 2180 s1.Shutdown() 2181 } else { 2182 s3.Shutdown() 2183 } 2184 waitForOutboundGateways(t, s2, 1, time.Second) 2185 gws = gws[:0] 2186 s2.getOutboundGatewayConnections(&gws) 2187 if len(gws) != 1 { 2188 t.Fatalf("Expected size of outo to be 1, got %v", len(gws)) 2189 } 2190 gws[0].mu.Lock() 2191 name := gws[0].gw.name 2192 gws[0].mu.Unlock() 2193 if gwName == name { 2194 t.Fatalf("Gateway %q should have been removed", gwName) 2195 } 2196 // Stop the remaining gateway 2197 if gwName == "A" { 2198 s3.Shutdown() 2199 } else { 2200 s1.Shutdown() 2201 } 2202 waitForOutboundGateways(t, s2, 0, time.Second) 2203 gws = gws[:0] 2204 s2.getOutboundGatewayConnections(&gws) 2205 if len(gws) != 0 { 2206 t.Fatalf("Expected size of outo to be 0, got %v", len(gws)) 2207 } 2208 } 2209 2210 func TestGatewayQueueSub(t *testing.T) { 2211 o2 := testDefaultOptionsForGateway("B") 2212 s2 := runGatewayServer(o2) 2213 defer s2.Shutdown() 2214 2215 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 2216 s1 := runGatewayServer(o1) 2217 defer s1.Shutdown() 2218 2219 waitForOutboundGateways(t, s1, 1, time.Second) 2220 waitForOutboundGateways(t, s2, 1, time.Second) 2221 2222 sBUrl := fmt.Sprintf("nats://127.0.0.1:%d", o2.Port) 2223 ncB := natsConnect(t, sBUrl) 2224 defer ncB.Close() 2225 2226 count2 := int32(0) 2227 cb2 := func(_ *nats.Msg) { 2228 atomic.AddInt32(&count2, 1) 2229 } 2230 qsubOnB := natsQueueSub(t, ncB, "foo", "bar", cb2) 2231 natsFlush(t, ncB) 2232 2233 sAUrl := fmt.Sprintf("nats://127.0.0.1:%d", o1.Port) 2234 ncA := natsConnect(t, sAUrl) 2235 defer ncA.Close() 2236 2237 count1 := int32(0) 2238 cb1 := func(_ *nats.Msg) { 2239 atomic.AddInt32(&count1, 1) 2240 } 2241 qsubOnA := natsQueueSub(t, ncA, "foo", "bar", cb1) 2242 natsFlush(t, ncA) 2243 2244 // Make sure subs are registered on each server 2245 checkExpectedSubs(t, 1, s1, s2) 2246 checkForRegisteredQSubInterest(t, s1, "B", globalAccountName, "foo", 1, time.Second) 2247 checkForRegisteredQSubInterest(t, s2, "A", globalAccountName, "foo", 1, time.Second) 2248 2249 total := 100 2250 send := func(t *testing.T, nc *nats.Conn) { 2251 t.Helper() 2252 for i := 0; i < total; i++ { 2253 // Alternate with adding a reply 2254 if i%2 == 0 { 2255 natsPubReq(t, nc, "foo", "reply", []byte("msg")) 2256 } else { 2257 natsPub(t, nc, "foo", []byte("msg")) 2258 } 2259 } 2260 natsFlush(t, nc) 2261 } 2262 // Send from client connecting to S1 (cluster A) 2263 send(t, ncA) 2264 2265 check := func(t *testing.T, count *int32, expected int) { 2266 t.Helper() 2267 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 2268 if n := int(atomic.LoadInt32(count)); n != expected { 2269 return fmt.Errorf("Expected to get %v messages, got %v", expected, n) 2270 } 2271 return nil 2272 }) 2273 } 2274 // Check that all messages stay on S1 (cluster A) 2275 check(t, &count1, total) 2276 check(t, &count2, 0) 2277 2278 // Now send from the other side 2279 send(t, ncB) 2280 check(t, &count1, total) 2281 check(t, &count2, total) 2282 2283 // Reset counters 2284 atomic.StoreInt32(&count1, 0) 2285 atomic.StoreInt32(&count2, 0) 2286 2287 // Add different queue group and make sure that messages are received 2288 count3 := int32(0) 2289 cb3 := func(_ *nats.Msg) { 2290 atomic.AddInt32(&count3, 1) 2291 } 2292 batQSub := natsQueueSub(t, ncB, "foo", "bat", cb3) 2293 natsFlush(t, ncB) 2294 checkExpectedSubs(t, 2, s2) 2295 2296 checkForRegisteredQSubInterest(t, s1, "B", globalAccountName, "foo", 2, time.Second) 2297 2298 send(t, ncA) 2299 check(t, &count1, total) 2300 check(t, &count2, 0) 2301 check(t, &count3, total) 2302 2303 // Reset counters 2304 atomic.StoreInt32(&count1, 0) 2305 atomic.StoreInt32(&count2, 0) 2306 atomic.StoreInt32(&count3, 0) 2307 2308 natsUnsub(t, batQSub) 2309 natsFlush(t, ncB) 2310 checkExpectedSubs(t, 1, s2) 2311 2312 checkForRegisteredQSubInterest(t, s1, "B", globalAccountName, "foo", 1, time.Second) 2313 2314 // Stop qsub on A, and send messages to A, they should 2315 // be routed to B. 2316 qsubOnA.Unsubscribe() 2317 checkExpectedSubs(t, 0, s1) 2318 send(t, ncA) 2319 check(t, &count1, 0) 2320 check(t, &count2, total) 2321 2322 // Reset counters 2323 atomic.StoreInt32(&count1, 0) 2324 atomic.StoreInt32(&count2, 0) 2325 2326 // Create a C gateway now 2327 o3 := testGatewayOptionsFromToWithServers(t, "C", "B", s2) 2328 s3 := runGatewayServer(o3) 2329 defer s3.Shutdown() 2330 2331 waitForOutboundGateways(t, s1, 2, time.Second) 2332 waitForOutboundGateways(t, s2, 2, time.Second) 2333 waitForOutboundGateways(t, s3, 2, time.Second) 2334 2335 waitForInboundGateways(t, s1, 2, time.Second) 2336 waitForInboundGateways(t, s2, 2, time.Second) 2337 waitForInboundGateways(t, s3, 2, time.Second) 2338 2339 // Create another qsub "bar" 2340 sCUrl := fmt.Sprintf("nats://127.0.0.1:%d", o3.Port) 2341 ncC := natsConnect(t, sCUrl) 2342 defer ncC.Close() 2343 // Associate this with count1 (since A qsub is no longer running) 2344 natsQueueSub(t, ncC, "foo", "bar", cb1) 2345 natsFlush(t, ncC) 2346 checkExpectedSubs(t, 1, s3) 2347 checkForRegisteredQSubInterest(t, s1, "C", globalAccountName, "foo", 1, time.Second) 2348 checkForRegisteredQSubInterest(t, s2, "C", globalAccountName, "foo", 1, time.Second) 2349 2350 // Artificially bump the RTT from A to C so that 2351 // the code should favor sending to B. 2352 gwcC := s1.getOutboundGatewayConnection("C") 2353 gwcC.mu.Lock() 2354 gwcC.rtt = 10 * time.Second 2355 gwcC.mu.Unlock() 2356 s1.gateway.orderOutboundConnections() 2357 2358 send(t, ncA) 2359 check(t, &count1, 0) 2360 check(t, &count2, total) 2361 2362 // Add a new group on s3 that should receive all messages 2363 natsQueueSub(t, ncC, "foo", "baz", cb3) 2364 natsFlush(t, ncC) 2365 checkExpectedSubs(t, 2, s3) 2366 checkForRegisteredQSubInterest(t, s1, "C", globalAccountName, "foo", 2, time.Second) 2367 checkForRegisteredQSubInterest(t, s2, "C", globalAccountName, "foo", 2, time.Second) 2368 2369 // Reset counters 2370 atomic.StoreInt32(&count1, 0) 2371 atomic.StoreInt32(&count2, 0) 2372 2373 // Make the RTTs equal 2374 gwcC.mu.Lock() 2375 gwcC.rtt = time.Second 2376 gwcC.mu.Unlock() 2377 2378 gwcB := s1.getOutboundGatewayConnection("B") 2379 gwcB.mu.Lock() 2380 gwcB.rtt = time.Second 2381 gwcB.mu.Unlock() 2382 2383 s1.gateway.Lock() 2384 s1.gateway.orderOutboundConnectionsLocked() 2385 destName := s1.gateway.outo[0].gw.name 2386 s1.gateway.Unlock() 2387 2388 send(t, ncA) 2389 // Group baz should receive all messages 2390 check(t, &count3, total) 2391 2392 // Ordering is normally re-evaluated when processing PONGs, 2393 // but rest of the time order will remain the same. 2394 // Since RTT are equal, messages will go to the first 2395 // GW in the array. 2396 if destName == "B" { 2397 check(t, &count2, total) 2398 } else if destName == "C" && int(atomic.LoadInt32(&count2)) != total { 2399 check(t, &count1, total) 2400 } 2401 2402 // Unsubscribe qsub on B and C should receive 2403 // all messages on count1 and count3. 2404 qsubOnB.Unsubscribe() 2405 checkExpectedSubs(t, 0, s2) 2406 2407 // gwcB should have the qsubs interest map empty now. 2408 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 2409 ei, _ := gwcB.gw.outsim.Load(globalAccountName) 2410 if ei != nil { 2411 sl := ei.(*outsie).sl 2412 if sl.Count() == 0 { 2413 return nil 2414 } 2415 } 2416 return fmt.Errorf("Qsub interest for account should have been removed") 2417 }) 2418 2419 // Reset counters 2420 atomic.StoreInt32(&count1, 0) 2421 atomic.StoreInt32(&count2, 0) 2422 atomic.StoreInt32(&count3, 0) 2423 2424 send(t, ncA) 2425 check(t, &count1, total) 2426 check(t, &count3, total) 2427 } 2428 2429 func TestGatewayTotalQSubs(t *testing.T) { 2430 ob1 := testDefaultOptionsForGateway("B") 2431 sb1 := runGatewayServer(ob1) 2432 defer sb1.Shutdown() 2433 2434 ob2 := testDefaultOptionsForGateway("B") 2435 ob2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", sb1.ClusterAddr().Port)) 2436 sb2 := runGatewayServer(ob2) 2437 defer sb2.Shutdown() 2438 2439 checkClusterFormed(t, sb1, sb2) 2440 2441 sb1URL := fmt.Sprintf("nats://%s:%d", ob1.Host, ob1.Port) 2442 ncb1 := natsConnect(t, sb1URL, nats.ReconnectWait(50*time.Millisecond)) 2443 defer ncb1.Close() 2444 2445 sb2URL := fmt.Sprintf("nats://%s:%d", ob2.Host, ob2.Port) 2446 ncb2 := natsConnect(t, sb2URL, nats.ReconnectWait(50*time.Millisecond)) 2447 defer ncb2.Close() 2448 2449 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb1) 2450 sa := runGatewayServer(oa) 2451 defer sa.Shutdown() 2452 2453 waitForOutboundGateways(t, sa, 1, 2*time.Second) 2454 waitForOutboundGateways(t, sb1, 1, 2*time.Second) 2455 waitForOutboundGateways(t, sb2, 1, 2*time.Second) 2456 waitForInboundGateways(t, sa, 2, 2*time.Second) 2457 waitForInboundGateways(t, sb1, 1, 2*time.Second) 2458 2459 checkTotalQSubs := func(t *testing.T, s *Server, expected int) { 2460 t.Helper() 2461 checkFor(t, time.Second, 15*time.Millisecond, func() error { 2462 if n := int(atomic.LoadInt64(&s.gateway.totalQSubs)); n != expected { 2463 return fmt.Errorf("Expected TotalQSubs to be %v, got %v", expected, n) 2464 } 2465 return nil 2466 }) 2467 } 2468 2469 cb := func(_ *nats.Msg) {} 2470 2471 natsQueueSub(t, ncb1, "foo", "bar", cb) 2472 checkTotalQSubs(t, sa, 1) 2473 qsub2 := natsQueueSub(t, ncb1, "foo", "baz", cb) 2474 checkTotalQSubs(t, sa, 2) 2475 qsub3 := natsQueueSub(t, ncb1, "foo", "baz", cb) 2476 checkTotalQSubs(t, sa, 2) 2477 2478 // Shutdown sb1, there should be a failover from clients 2479 // to sb2. sb2 will then send the queue subs to sa. 2480 sb1.Shutdown() 2481 2482 checkClientsCount(t, sb2, 2) 2483 checkExpectedSubs(t, 3, sb2) 2484 2485 waitForOutboundGateways(t, sa, 1, 2*time.Second) 2486 waitForOutboundGateways(t, sb2, 1, 2*time.Second) 2487 waitForInboundGateways(t, sa, 1, 2*time.Second) 2488 waitForInboundGateways(t, sb2, 1, 2*time.Second) 2489 2490 // When sb1 is shutdown, the total qsubs on sa should fall 2491 // down to 0, but will be updated as soon as sa and sb2 2492 // connect to each other. So instead we will verify by 2493 // making sure that the count is 2 instead of 4 if there 2494 // was a bug. 2495 // (note that there are 2 qsubs on same group, so only 2496 // 1 RS+ would have been sent for that group) 2497 checkTotalQSubs(t, sa, 2) 2498 2499 // Restart sb1 2500 sb1 = runGatewayServer(ob1) 2501 defer sb1.Shutdown() 2502 2503 checkClusterFormed(t, sb1, sb2) 2504 2505 waitForOutboundGateways(t, sa, 1, 2*time.Second) 2506 waitForOutboundGateways(t, sb1, 1, 2*time.Second) 2507 waitForOutboundGateways(t, sb2, 1, 2*time.Second) 2508 waitForInboundGateways(t, sa, 2, 2*time.Second) 2509 waitForInboundGateways(t, sb1, 0, 2*time.Second) 2510 waitForInboundGateways(t, sb2, 1, 2*time.Second) 2511 2512 // Now start unsubscribing. Start with one of the duplicate 2513 // and check that count stays same. 2514 natsUnsub(t, qsub3) 2515 checkTotalQSubs(t, sa, 2) 2516 // Now the other, which would cause an RS- 2517 natsUnsub(t, qsub2) 2518 checkTotalQSubs(t, sa, 1) 2519 // Now test that if connections are closed, things are updated 2520 // properly. 2521 ncb1.Close() 2522 ncb2.Close() 2523 checkTotalQSubs(t, sa, 0) 2524 } 2525 2526 func TestGatewaySendQSubsOnGatewayConnect(t *testing.T) { 2527 o2 := testDefaultOptionsForGateway("B") 2528 s2 := runGatewayServer(o2) 2529 defer s2.Shutdown() 2530 2531 s2Url := fmt.Sprintf("nats://127.0.0.1:%d", o2.Port) 2532 subnc := natsConnect(t, s2Url) 2533 defer subnc.Close() 2534 2535 ch := make(chan bool, 1) 2536 cb := func(_ *nats.Msg) { 2537 ch <- true 2538 } 2539 natsQueueSub(t, subnc, "foo", "bar", cb) 2540 natsFlush(t, subnc) 2541 2542 // Now start s1 that creates a gateway to s2 2543 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 2544 s1 := runGatewayServer(o1) 2545 defer s1.Shutdown() 2546 2547 waitForOutboundGateways(t, s1, 1, time.Second) 2548 waitForOutboundGateways(t, s2, 1, time.Second) 2549 2550 checkForRegisteredQSubInterest(t, s1, "B", globalAccountName, "foo", 1, time.Second) 2551 2552 // Publish from s1, message should be received on s2. 2553 pubnc := natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", o1.Port)) 2554 defer pubnc.Close() 2555 // Publish 1 message 2556 natsPub(t, pubnc, "foo", []byte("hello")) 2557 waitCh(t, ch, "Did not get out message") 2558 pubnc.Close() 2559 2560 s1.Shutdown() 2561 s1 = runGatewayServer(o1) 2562 defer s1.Shutdown() 2563 2564 waitForOutboundGateways(t, s1, 1, time.Second) 2565 waitForOutboundGateways(t, s2, 1, time.Second) 2566 2567 checkForRegisteredQSubInterest(t, s1, "B", globalAccountName, "foo", 1, time.Second) 2568 2569 pubnc = natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", o1.Port)) 2570 defer pubnc.Close() 2571 // Publish 1 message 2572 natsPub(t, pubnc, "foo", []byte("hello")) 2573 waitCh(t, ch, "Did not get out message") 2574 } 2575 2576 func TestGatewaySendRemoteQSubs(t *testing.T) { 2577 GatewayDoNotForceInterestOnlyMode(true) 2578 defer GatewayDoNotForceInterestOnlyMode(false) 2579 2580 ob1 := testDefaultOptionsForGateway("B") 2581 sb1 := runGatewayServer(ob1) 2582 defer sb1.Shutdown() 2583 2584 ob2 := testDefaultOptionsForGateway("B") 2585 ob2.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", ob1.Cluster.Host, ob1.Cluster.Port)) 2586 sb2 := runGatewayServer(ob2) 2587 defer sb2.Shutdown() 2588 2589 checkClusterFormed(t, sb1, sb2) 2590 2591 sbURL := fmt.Sprintf("nats://127.0.0.1:%d", ob2.Port) 2592 subnc := natsConnect(t, sbURL) 2593 defer subnc.Close() 2594 2595 ch := make(chan bool, 1) 2596 cb := func(_ *nats.Msg) { 2597 ch <- true 2598 } 2599 qsub1 := natsQueueSub(t, subnc, "foo", "bar", cb) 2600 qsub2 := natsQueueSub(t, subnc, "foo", "bar", cb) 2601 natsFlush(t, subnc) 2602 2603 // There will be 2 local qsubs on the sb2 server where the client is connected 2604 checkExpectedSubs(t, 2, sb2) 2605 // But only 1 remote on sb1 2606 checkExpectedSubs(t, 1, sb1) 2607 2608 // Now start s1 that creates a gateway to sb1 (the one that does not have the local QSub) 2609 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb1) 2610 sa := runGatewayServer(oa) 2611 defer sa.Shutdown() 2612 2613 waitForOutboundGateways(t, sa, 1, time.Second) 2614 waitForOutboundGateways(t, sb1, 1, time.Second) 2615 waitForOutboundGateways(t, sb2, 1, time.Second) 2616 2617 checkForRegisteredQSubInterest(t, sa, "B", globalAccountName, "foo", 1, time.Second) 2618 2619 // Publish from s1, message should be received on s2. 2620 saURL := fmt.Sprintf("nats://127.0.0.1:%d", oa.Port) 2621 pubnc := natsConnect(t, saURL) 2622 defer pubnc.Close() 2623 // Publish 1 message 2624 natsPub(t, pubnc, "foo", []byte("hello")) 2625 natsFlush(t, pubnc) 2626 waitCh(t, ch, "Did not get out message") 2627 2628 // Note that since cluster B has no plain sub, an "RS- $G foo" will have been sent. 2629 // Wait for the no interest to be received by A 2630 checkFor(t, time.Second, 15*time.Millisecond, func() error { 2631 gw := sa.getOutboundGatewayConnection("B").gw 2632 ei, _ := gw.outsim.Load(globalAccountName) 2633 if ei != nil { 2634 e := ei.(*outsie) 2635 e.RLock() 2636 defer e.RUnlock() 2637 if _, inMap := e.ni["foo"]; inMap { 2638 return nil 2639 } 2640 } 2641 return fmt.Errorf("No-interest still not registered") 2642 }) 2643 2644 // Unsubscribe 1 qsub 2645 natsUnsub(t, qsub1) 2646 natsFlush(t, subnc) 2647 // There should be only 1 local qsub on sb2 now, and the remote should still exist on sb1 2648 checkExpectedSubs(t, 1, sb1, sb2) 2649 2650 // Publish 1 message 2651 natsPub(t, pubnc, "foo", []byte("hello")) 2652 natsFlush(t, pubnc) 2653 waitCh(t, ch, "Did not get out message") 2654 2655 // Unsubscribe the remaining 2656 natsUnsub(t, qsub2) 2657 natsFlush(t, subnc) 2658 2659 // No more subs now on both sb1 and sb2 2660 checkExpectedSubs(t, 0, sb1, sb2) 2661 2662 // Server sb1 should not have qsub in its sub interest map 2663 checkFor(t, time.Second, 15*time.Millisecond, func() error { 2664 var entry *sitally 2665 var err error 2666 sb1.gateway.pasi.Lock() 2667 asim := sb1.gateway.pasi.m[globalAccountName] 2668 if asim != nil { 2669 entry = asim["foo bar"] 2670 } 2671 if entry != nil { 2672 err = fmt.Errorf("Map should not have an entry, got %#v", entry) 2673 } 2674 sb1.gateway.pasi.Unlock() 2675 return err 2676 }) 2677 2678 // Let's wait for A to receive the unsubscribe 2679 checkFor(t, time.Second, 15*time.Millisecond, func() error { 2680 gw := sa.getOutboundGatewayConnection("B").gw 2681 ei, _ := gw.outsim.Load(globalAccountName) 2682 if ei != nil { 2683 sl := ei.(*outsie).sl 2684 if sl.Count() == 0 { 2685 return nil 2686 } 2687 } 2688 return fmt.Errorf("Interest still present") 2689 }) 2690 2691 // Now send a message, it won't be sent because A received an RS- 2692 // on the first published message since there was no plain sub interest. 2693 natsPub(t, pubnc, "foo", []byte("hello")) 2694 natsFlush(t, pubnc) 2695 2696 // Get the gateway connection from A (sa) to B (sb1) 2697 gw := sa.getOutboundGatewayConnection("B") 2698 gw.mu.Lock() 2699 out := gw.outMsgs 2700 gw.mu.Unlock() 2701 if out != 2 { 2702 t.Fatalf("Expected 2 out messages, got %v", out) 2703 } 2704 2705 // Restart A 2706 pubnc.Close() 2707 sa.Shutdown() 2708 sa = runGatewayServer(oa) 2709 defer sa.Shutdown() 2710 2711 waitForOutboundGateways(t, sa, 1, time.Second) 2712 2713 // Check qsubs interest should be empty 2714 checkFor(t, time.Second, 15*time.Millisecond, func() error { 2715 gw := sa.getOutboundGatewayConnection("B").gw 2716 if ei, _ := gw.outsim.Load(globalAccountName); ei == nil { 2717 return nil 2718 } 2719 return fmt.Errorf("Interest still present") 2720 }) 2721 } 2722 2723 func TestGatewayComplexSetup(t *testing.T) { 2724 doLog := false 2725 2726 // This test will have the following setup: 2727 // --- means route connection 2728 // === means gateway connection 2729 // [o] is outbound 2730 // [i] is inbound 2731 // Each server as an outbound connection to the other cluster. 2732 // It may have 0 or more inbound connection(s). 2733 // 2734 // Cluster A Cluster B 2735 // sa1 [o]===========>[i] 2736 // | [i]<===========[o] 2737 // | sb1 ------- sb2 2738 // | [i] [o] 2739 // sa2 [o]=============^ || 2740 // [i]<========================|| 2741 ob1 := testDefaultOptionsForGateway("B") 2742 sb1 := runGatewayServer(ob1) 2743 defer sb1.Shutdown() 2744 if doLog { 2745 sb1.SetLogger(logger.NewTestLogger("[B1] - ", true), true, true) 2746 } 2747 2748 oa1 := testGatewayOptionsFromToWithServers(t, "A", "B", sb1) 2749 sa1 := runGatewayServer(oa1) 2750 defer sa1.Shutdown() 2751 if doLog { 2752 sa1.SetLogger(logger.NewTestLogger("[A1] - ", true), true, true) 2753 } 2754 2755 waitForOutboundGateways(t, sa1, 1, time.Second) 2756 waitForOutboundGateways(t, sb1, 1, time.Second) 2757 2758 waitForInboundGateways(t, sa1, 1, time.Second) 2759 waitForInboundGateways(t, sb1, 1, time.Second) 2760 2761 oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb1) 2762 oa2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", sa1.ClusterAddr().Port)) 2763 sa2 := runGatewayServer(oa2) 2764 defer sa2.Shutdown() 2765 if doLog { 2766 sa2.SetLogger(logger.NewTestLogger("[A2] - ", true), true, true) 2767 } 2768 2769 checkClusterFormed(t, sa1, sa2) 2770 2771 waitForOutboundGateways(t, sa2, 1, time.Second) 2772 waitForInboundGateways(t, sb1, 2, time.Second) 2773 2774 ob2 := testGatewayOptionsFromToWithServers(t, "B", "A", sa2) 2775 ob2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", sb1.ClusterAddr().Port)) 2776 var sb2 *Server 2777 for { 2778 sb2 = runGatewayServer(ob2) 2779 defer sb2.Shutdown() 2780 2781 checkClusterFormed(t, sb1, sb2) 2782 2783 waitForOutboundGateways(t, sb2, 1, time.Second) 2784 waitForInboundGateways(t, sb2, 0, time.Second) 2785 // For this test, we want the outbound to be to sa2, so if we don't have that, 2786 // restart sb2 until we get lucky. 2787 time.Sleep(100 * time.Millisecond) 2788 if sa2.numInboundGateways() == 0 { 2789 sb2.Shutdown() 2790 sb2 = nil 2791 } else { 2792 break 2793 } 2794 } 2795 if doLog { 2796 sb2.SetLogger(logger.NewTestLogger("[B2] - ", true), true, true) 2797 } 2798 2799 ch := make(chan bool, 1) 2800 cb := func(_ *nats.Msg) { 2801 ch <- true 2802 } 2803 2804 // Create a subscription on sa1 and sa2. 2805 ncsa1 := natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", oa1.Port)) 2806 defer ncsa1.Close() 2807 sub1 := natsSub(t, ncsa1, "foo", cb) 2808 natsFlush(t, ncsa1) 2809 2810 ncsa2 := natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", oa2.Port)) 2811 defer ncsa2.Close() 2812 sub2 := natsSub(t, ncsa2, "foo", cb) 2813 natsFlush(t, ncsa2) 2814 2815 // sa1 will have 1 local, one remote (from sa2), same for sa2. 2816 checkExpectedSubs(t, 2, sa1, sa2) 2817 2818 // Connect to sb2 and send 1 message 2819 ncsb2 := natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", ob2.Port)) 2820 defer ncsb2.Close() 2821 natsPub(t, ncsb2, "foo", []byte("hello")) 2822 natsFlush(t, ncsb2) 2823 2824 for i := 0; i < 2; i++ { 2825 waitCh(t, ch, "Did not get our message") 2826 } 2827 2828 // Unsubscribe sub2, and send 1, should still get it. 2829 natsUnsub(t, sub2) 2830 natsFlush(t, ncsa2) 2831 natsPub(t, ncsb2, "foo", []byte("hello")) 2832 natsFlush(t, ncsb2) 2833 waitCh(t, ch, "Did not get our message") 2834 2835 // Unsubscribe sub1, all server's sublist should be empty 2836 sub1.Unsubscribe() 2837 natsFlush(t, ncsa1) 2838 2839 checkExpectedSubs(t, 0, sa1, sa2, sb1, sb2) 2840 2841 // Create queue subs 2842 total := 100 2843 c1 := int32(0) 2844 c2 := int32(0) 2845 c3 := int32(0) 2846 tc := int32(0) 2847 natsQueueSub(t, ncsa1, "foo", "bar", func(_ *nats.Msg) { 2848 atomic.AddInt32(&c1, 1) 2849 if c := atomic.AddInt32(&tc, 1); int(c) == total { 2850 ch <- true 2851 } 2852 }) 2853 natsFlush(t, ncsa1) 2854 natsQueueSub(t, ncsa2, "foo", "bar", func(_ *nats.Msg) { 2855 atomic.AddInt32(&c2, 1) 2856 if c := atomic.AddInt32(&tc, 1); int(c) == total { 2857 ch <- true 2858 } 2859 }) 2860 natsFlush(t, ncsa2) 2861 checkExpectedSubs(t, 2, sa1, sa2) 2862 2863 qsubOnB2 := natsQueueSub(t, ncsb2, "foo", "bar", func(_ *nats.Msg) { 2864 atomic.AddInt32(&c3, 1) 2865 if c := atomic.AddInt32(&tc, 1); int(c) == total { 2866 ch <- true 2867 } 2868 }) 2869 natsFlush(t, ncsb2) 2870 checkExpectedSubs(t, 1, sb2) 2871 2872 checkForRegisteredQSubInterest(t, sb1, "A", globalAccountName, "foo", 1, time.Second) 2873 2874 // Publish all messages. The queue sub on cluster B should receive all 2875 // messages. 2876 for i := 0; i < total; i++ { 2877 natsPub(t, ncsb2, "foo", []byte("msg")) 2878 } 2879 natsFlush(t, ncsb2) 2880 2881 waitCh(t, ch, "Did not get all our queue messages") 2882 if n := int(atomic.LoadInt32(&c1)); n != 0 { 2883 t.Fatalf("No message should have been received by qsub1, got %v", n) 2884 } 2885 if n := int(atomic.LoadInt32(&c2)); n != 0 { 2886 t.Fatalf("No message should have been received by qsub2, got %v", n) 2887 } 2888 if n := int(atomic.LoadInt32(&c3)); n != total { 2889 t.Fatalf("All messages should have been delivered to qsub on B, got %v", n) 2890 } 2891 2892 // Reset counters 2893 atomic.StoreInt32(&c1, 0) 2894 atomic.StoreInt32(&c2, 0) 2895 atomic.StoreInt32(&c3, 0) 2896 atomic.StoreInt32(&tc, 0) 2897 2898 // Now send from cluster A, messages should be distributed to qsubs on A. 2899 for i := 0; i < total; i++ { 2900 natsPub(t, ncsa1, "foo", []byte("msg")) 2901 } 2902 natsFlush(t, ncsa1) 2903 2904 expectedLow := int(float32(total/2) * 0.6) 2905 expectedHigh := int(float32(total/2) * 1.4) 2906 checkCount := func(t *testing.T, count *int32) { 2907 t.Helper() 2908 c := int(atomic.LoadInt32(count)) 2909 if c < expectedLow || c > expectedHigh { 2910 t.Fatalf("Expected value to be between %v/%v, got %v", expectedLow, expectedHigh, c) 2911 } 2912 } 2913 waitCh(t, ch, "Did not get all our queue messages") 2914 checkCount(t, &c1) 2915 checkCount(t, &c2) 2916 2917 // Now unsubscribe sub on B and reset counters 2918 natsUnsub(t, qsubOnB2) 2919 checkExpectedSubs(t, 0, sb2) 2920 atomic.StoreInt32(&c1, 0) 2921 atomic.StoreInt32(&c2, 0) 2922 atomic.StoreInt32(&c3, 0) 2923 atomic.StoreInt32(&tc, 0) 2924 // Publish from cluster B, messages should be delivered to cluster A. 2925 for i := 0; i < total; i++ { 2926 natsPub(t, ncsb2, "foo", []byte("msg")) 2927 } 2928 natsFlush(t, ncsb2) 2929 2930 waitCh(t, ch, "Did not get all our queue messages") 2931 if n := int(atomic.LoadInt32(&c3)); n != 0 { 2932 t.Fatalf("There should not have been messages on unsubscribed sub, got %v", n) 2933 } 2934 checkCount(t, &c1) 2935 checkCount(t, &c2) 2936 } 2937 2938 func TestGatewayMsgSentOnlyOnce(t *testing.T) { 2939 o2 := testDefaultOptionsForGateway("B") 2940 s2 := runGatewayServer(o2) 2941 defer s2.Shutdown() 2942 2943 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 2944 s1 := runGatewayServer(o1) 2945 defer s1.Shutdown() 2946 2947 waitForOutboundGateways(t, s1, 1, time.Second) 2948 waitForOutboundGateways(t, s2, 1, time.Second) 2949 2950 s2Url := fmt.Sprintf("nats://127.0.0.1:%d", o2.Port) 2951 nc2 := natsConnect(t, s2Url) 2952 defer nc2.Close() 2953 2954 s1Url := fmt.Sprintf("nats://127.0.0.1:%d", o1.Port) 2955 nc1 := natsConnect(t, s1Url) 2956 defer nc1.Close() 2957 2958 ch := make(chan bool, 1) 2959 count := int32(0) 2960 expected := int32(4) 2961 cb := func(_ *nats.Msg) { 2962 if c := atomic.AddInt32(&count, 1); c == expected { 2963 ch <- true 2964 } 2965 } 2966 2967 // On s1, create 2 plain subs, 2 queue members for group 2968 // "bar" and 1 for group "baz". 2969 natsSub(t, nc1, ">", cb) 2970 natsSub(t, nc1, "foo", cb) 2971 natsQueueSub(t, nc1, "foo", "bar", cb) 2972 natsQueueSub(t, nc1, "foo", "bar", cb) 2973 natsQueueSub(t, nc1, "foo", "baz", cb) 2974 natsFlush(t, nc1) 2975 2976 // Ensure subs registered in S1 2977 checkExpectedSubs(t, 5, s1) 2978 2979 // Also need to wait for qsubs to be registered on s2. 2980 checkForRegisteredQSubInterest(t, s2, "A", globalAccountName, "foo", 2, time.Second) 2981 2982 // From s2, send 1 message, s1 should receive 1 only, 2983 // and total we should get the callback notified 4 times. 2984 natsPub(t, nc2, "foo", []byte("hello")) 2985 natsFlush(t, nc2) 2986 2987 waitCh(t, ch, "Did not get our messages") 2988 // Verifiy that count is still 4 2989 if c := atomic.LoadInt32(&count); c != expected { 2990 t.Fatalf("Expected %v messages, got %v", expected, c) 2991 } 2992 // Check s2 outbound connection stats. It should say that it 2993 // sent only 1 message. 2994 c := s2.getOutboundGatewayConnection("A") 2995 if c == nil { 2996 t.Fatalf("S2 outbound gateway not found") 2997 } 2998 c.mu.Lock() 2999 out := c.outMsgs 3000 c.mu.Unlock() 3001 if out != 1 { 3002 t.Fatalf("Expected s2's outbound gateway to have sent a single message, got %v", out) 3003 } 3004 // Now check s1's inbound gateway 3005 s1.gateway.RLock() 3006 c = nil 3007 for _, ci := range s1.gateway.in { 3008 c = ci 3009 break 3010 } 3011 s1.gateway.RUnlock() 3012 if c == nil { 3013 t.Fatalf("S1 inbound gateway not found") 3014 } 3015 if in := atomic.LoadInt64(&c.inMsgs); in != 1 { 3016 t.Fatalf("Expected s1's inbound gateway to have received a single message, got %v", in) 3017 } 3018 } 3019 3020 type checkErrorLogger struct { 3021 DummyLogger 3022 checkErrorStr string 3023 gotError bool 3024 } 3025 3026 func (l *checkErrorLogger) Errorf(format string, args ...any) { 3027 l.DummyLogger.Errorf(format, args...) 3028 l.Lock() 3029 if strings.Contains(l.Msg, l.checkErrorStr) { 3030 l.gotError = true 3031 } 3032 l.Unlock() 3033 } 3034 3035 func TestGatewayRoutedServerWithoutGatewayConfigured(t *testing.T) { 3036 o2 := testDefaultOptionsForGateway("B") 3037 s2 := runGatewayServer(o2) 3038 defer s2.Shutdown() 3039 3040 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 3041 s1 := runGatewayServer(o1) 3042 defer s1.Shutdown() 3043 3044 waitForOutboundGateways(t, s1, 1, time.Second) 3045 waitForOutboundGateways(t, s2, 1, time.Second) 3046 3047 o3 := DefaultOptions() 3048 o3.NoSystemAccount = true 3049 o3.Cluster.Name = "B" 3050 o3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", s2.ClusterAddr().Port)) 3051 s3 := New(o3) 3052 defer s3.Shutdown() 3053 l := &checkErrorLogger{checkErrorStr: "not configured"} 3054 s3.SetLogger(l, true, true) 3055 wg := sync.WaitGroup{} 3056 wg.Add(1) 3057 go func() { 3058 s3.Start() 3059 wg.Done() 3060 }() 3061 3062 checkClusterFormed(t, s2, s3) 3063 3064 // Check that server s3 does not panic when being notified 3065 // about the A gateway, but report an error. 3066 deadline := time.Now().Add(2 * time.Second) 3067 gotIt := false 3068 for time.Now().Before(deadline) { 3069 l.Lock() 3070 gotIt = l.gotError 3071 l.Unlock() 3072 if gotIt { 3073 break 3074 } 3075 time.Sleep(15 * time.Millisecond) 3076 } 3077 if !gotIt { 3078 t.Fatalf("Should have reported error about gateway not configured") 3079 } 3080 3081 s3.Shutdown() 3082 wg.Wait() 3083 } 3084 3085 func TestGatewaySendsToNonLocalSubs(t *testing.T) { 3086 ob1 := testDefaultOptionsForGateway("B") 3087 sb1 := runGatewayServer(ob1) 3088 defer sb1.Shutdown() 3089 3090 oa1 := testGatewayOptionsFromToWithServers(t, "A", "B", sb1) 3091 sa1 := runGatewayServer(oa1) 3092 defer sa1.Shutdown() 3093 3094 waitForOutboundGateways(t, sa1, 1, time.Second) 3095 waitForOutboundGateways(t, sb1, 1, time.Second) 3096 3097 waitForInboundGateways(t, sa1, 1, time.Second) 3098 waitForInboundGateways(t, sb1, 1, time.Second) 3099 3100 oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb1) 3101 oa2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", sa1.ClusterAddr().Port)) 3102 sa2 := runGatewayServer(oa2) 3103 defer sa2.Shutdown() 3104 3105 checkClusterFormed(t, sa1, sa2) 3106 3107 waitForOutboundGateways(t, sa2, 1, time.Second) 3108 waitForInboundGateways(t, sb1, 2, time.Second) 3109 3110 ch := make(chan bool, 1) 3111 // Create an interest of sa2 3112 ncSub := natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", oa2.Port)) 3113 defer ncSub.Close() 3114 natsSub(t, ncSub, "foo", func(_ *nats.Msg) { ch <- true }) 3115 natsFlush(t, ncSub) 3116 checkExpectedSubs(t, 1, sa1, sa2) 3117 3118 // Produce a message from sb1, make sure it can be received. 3119 ncPub := natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", ob1.Port)) 3120 defer ncPub.Close() 3121 natsPub(t, ncPub, "foo", []byte("hello")) 3122 waitCh(t, ch, "Did not get our message") 3123 3124 ncSub.Close() 3125 ncPub.Close() 3126 checkExpectedSubs(t, 0, sa1, sa2) 3127 3128 // Now create sb2 that has a route to sb1 and gateway connects to sa2. 3129 ob2 := testGatewayOptionsFromToWithServers(t, "B", "A", sa2) 3130 ob2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", sb1.ClusterAddr().Port)) 3131 sb2 := runGatewayServer(ob2) 3132 defer sb2.Shutdown() 3133 3134 checkClusterFormed(t, sb1, sb2) 3135 waitForOutboundGateways(t, sb2, 1, time.Second) 3136 3137 ncSub = natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", oa1.Port)) 3138 defer ncSub.Close() 3139 natsSub(t, ncSub, "foo", func(_ *nats.Msg) { ch <- true }) 3140 natsFlush(t, ncSub) 3141 checkExpectedSubs(t, 1, sa1, sa2) 3142 3143 ncPub = natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", ob2.Port)) 3144 defer ncPub.Close() 3145 natsPub(t, ncPub, "foo", []byte("hello")) 3146 waitCh(t, ch, "Did not get our message") 3147 } 3148 3149 func TestGatewayUnknownGatewayCommand(t *testing.T) { 3150 o1 := testDefaultOptionsForGateway("A") 3151 s1 := runGatewayServer(o1) 3152 defer s1.Shutdown() 3153 3154 l := &checkErrorLogger{checkErrorStr: "Unknown command"} 3155 s1.SetLogger(l, true, true) 3156 3157 o2 := testDefaultOptionsForGateway("A") 3158 o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", s1.ClusterAddr().Port)) 3159 s2 := runGatewayServer(o2) 3160 defer s2.Shutdown() 3161 3162 checkClusterFormed(t, s1, s2) 3163 3164 var route *client 3165 s2.mu.Lock() 3166 if r := getFirstRoute(s2); r != nil { 3167 route = r 3168 } 3169 s2.mu.Unlock() 3170 3171 route.mu.Lock() 3172 info := &Info{ 3173 Gateway: "B", 3174 GatewayCmd: 255, 3175 } 3176 b, _ := json.Marshal(info) 3177 route.enqueueProto([]byte(fmt.Sprintf(InfoProto, b))) 3178 route.mu.Unlock() 3179 3180 checkFor(t, time.Second, 15*time.Millisecond, func() error { 3181 l.Lock() 3182 gotIt := l.gotError 3183 l.Unlock() 3184 if gotIt { 3185 return nil 3186 } 3187 return fmt.Errorf("Did not get expected error") 3188 }) 3189 } 3190 3191 func TestGatewayRandomIP(t *testing.T) { 3192 ob := testDefaultOptionsForGateway("B") 3193 sb := runGatewayServer(ob) 3194 defer sb.Shutdown() 3195 3196 oa := testGatewayOptionsFromToWithURLs(t, "A", "B", 3197 []string{ 3198 "nats://noport", 3199 fmt.Sprintf("nats://localhost:%d", sb.GatewayAddr().Port), 3200 }) 3201 // Create a dummy resolver that returns error since we 3202 // don't provide any IP. The code should then use the configured 3203 // url (localhost:port) and try with that, which in this case 3204 // should work. 3205 oa.Gateway.resolver = &myDummyDNSResolver{} 3206 sa := runGatewayServer(oa) 3207 defer sa.Shutdown() 3208 3209 waitForOutboundGateways(t, sa, 1, 2*time.Second) 3210 waitForOutboundGateways(t, sb, 1, 2*time.Second) 3211 } 3212 3213 func TestGatewaySendQSubsBufSize(t *testing.T) { 3214 for _, test := range []struct { 3215 name string 3216 bufSize int 3217 }{ 3218 { 3219 name: "Bufsize 45, more than one at a time", 3220 bufSize: 45, 3221 }, 3222 { 3223 name: "Bufsize 15, one at a time", 3224 bufSize: 15, 3225 }, 3226 { 3227 name: "Bufsize 0, default to maxBufSize, all at once", 3228 bufSize: 0, 3229 }, 3230 } { 3231 t.Run(test.name, func(t *testing.T) { 3232 3233 o2 := testDefaultOptionsForGateway("B") 3234 o2.Gateway.sendQSubsBufSize = test.bufSize 3235 s2 := runGatewayServer(o2) 3236 defer s2.Shutdown() 3237 3238 s2Url := fmt.Sprintf("nats://%s:%d", o2.Host, o2.Port) 3239 nc := natsConnect(t, s2Url) 3240 defer nc.Close() 3241 natsQueueSub(t, nc, "foo", "bar", func(_ *nats.Msg) {}) 3242 natsQueueSub(t, nc, "foo", "baz", func(_ *nats.Msg) {}) 3243 natsQueueSub(t, nc, "foo", "bat", func(_ *nats.Msg) {}) 3244 natsQueueSub(t, nc, "foo", "bax", func(_ *nats.Msg) {}) 3245 3246 checkExpectedSubs(t, 4, s2) 3247 3248 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 3249 s1 := runGatewayServer(o1) 3250 defer s1.Shutdown() 3251 3252 waitForOutboundGateways(t, s1, 1, time.Second) 3253 waitForOutboundGateways(t, s2, 1, time.Second) 3254 3255 checkForRegisteredQSubInterest(t, s1, "B", globalAccountName, "foo", 4, time.Second) 3256 3257 // Make sure we have the 4 we expected 3258 c := s1.getOutboundGatewayConnection("B") 3259 ei, _ := c.gw.outsim.Load(globalAccountName) 3260 if ei == nil { 3261 t.Fatalf("No interest found") 3262 } 3263 sl := ei.(*outsie).sl 3264 r := sl.Match("foo") 3265 if len(r.qsubs) != 4 { 3266 t.Fatalf("Expected 4 groups, got %v", len(r.qsubs)) 3267 } 3268 var gotBar, gotBaz, gotBat, gotBax bool 3269 for _, qs := range r.qsubs { 3270 if len(qs) != 1 { 3271 t.Fatalf("Unexpected number of subs for group %s: %v", qs[0].queue, len(qs)) 3272 } 3273 q := qs[0].queue 3274 switch string(q) { 3275 case "bar": 3276 gotBar = true 3277 case "baz": 3278 gotBaz = true 3279 case "bat": 3280 gotBat = true 3281 case "bax": 3282 gotBax = true 3283 default: 3284 t.Fatalf("Unexpected group name: %s", q) 3285 } 3286 } 3287 if !gotBar || !gotBaz || !gotBat || !gotBax { 3288 t.Fatalf("Did not get all we wanted: bar=%v baz=%v bat=%v bax=%v", 3289 gotBar, gotBaz, gotBat, gotBax) 3290 } 3291 3292 nc.Close() 3293 s1.Shutdown() 3294 s2.Shutdown() 3295 3296 waitForOutboundGateways(t, s1, 0, time.Second) 3297 waitForOutboundGateways(t, s2, 0, time.Second) 3298 }) 3299 } 3300 } 3301 3302 func TestGatewayRaceBetweenPubAndSub(t *testing.T) { 3303 o2 := testDefaultOptionsForGateway("B") 3304 s2 := runGatewayServer(o2) 3305 defer s2.Shutdown() 3306 3307 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 3308 s1 := runGatewayServer(o1) 3309 defer s1.Shutdown() 3310 3311 waitForOutboundGateways(t, s1, 1, time.Second) 3312 waitForOutboundGateways(t, s2, 1, time.Second) 3313 3314 s2Url := fmt.Sprintf("nats://127.0.0.1:%d", o2.Port) 3315 nc2 := natsConnect(t, s2Url) 3316 defer nc2.Close() 3317 3318 s1Url := fmt.Sprintf("nats://127.0.0.1:%d", o1.Port) 3319 var ncaa [5]*nats.Conn 3320 var nca = ncaa[:0] 3321 for i := 0; i < 5; i++ { 3322 nc := natsConnect(t, s1Url) 3323 defer nc.Close() 3324 nca = append(nca, nc) 3325 } 3326 3327 ch := make(chan bool, 1) 3328 wg := sync.WaitGroup{} 3329 wg.Add(5) 3330 for _, nc := range nca { 3331 nc := nc 3332 go func(n *nats.Conn) { 3333 defer wg.Done() 3334 for { 3335 n.Publish("foo", []byte("hello")) 3336 select { 3337 case <-ch: 3338 return 3339 default: 3340 } 3341 } 3342 }(nc) 3343 } 3344 time.Sleep(100 * time.Millisecond) 3345 natsQueueSub(t, nc2, "foo", "bar", func(m *nats.Msg) { 3346 natsUnsub(t, m.Sub) 3347 close(ch) 3348 }) 3349 wg.Wait() 3350 } 3351 3352 // Returns the first (if any) of the inbound connections for this name. 3353 func getInboundGatewayConnection(s *Server, name string) *client { 3354 var gwsa [4]*client 3355 var gws = gwsa[:0] 3356 s.getInboundGatewayConnections(&gws) 3357 for _, gw := range gws { 3358 gw.mu.Lock() 3359 ok := gw.gw.name == name 3360 gw.mu.Unlock() 3361 if ok { 3362 return gw 3363 } 3364 } 3365 return nil 3366 } 3367 3368 func TestGatewaySendAllSubs(t *testing.T) { 3369 GatewayDoNotForceInterestOnlyMode(true) 3370 defer GatewayDoNotForceInterestOnlyMode(false) 3371 3372 gatewayMaxRUnsubBeforeSwitch = 100 3373 defer func() { gatewayMaxRUnsubBeforeSwitch = defaultGatewayMaxRUnsubBeforeSwitch }() 3374 3375 ob := testDefaultOptionsForGateway("B") 3376 sb := runGatewayServer(ob) 3377 defer sb.Shutdown() 3378 3379 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 3380 sa := runGatewayServer(oa) 3381 defer sa.Shutdown() 3382 3383 oc := testGatewayOptionsFromToWithServers(t, "C", "B", sb) 3384 sc := runGatewayServer(oc) 3385 defer sc.Shutdown() 3386 3387 waitForOutboundGateways(t, sa, 2, time.Second) 3388 waitForOutboundGateways(t, sb, 2, time.Second) 3389 waitForOutboundGateways(t, sc, 2, time.Second) 3390 waitForInboundGateways(t, sa, 2, time.Second) 3391 waitForInboundGateways(t, sb, 2, time.Second) 3392 waitForInboundGateways(t, sc, 2, time.Second) 3393 3394 // On A, create a sub to register some interest 3395 aURL := fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port) 3396 ncA := natsConnect(t, aURL) 3397 defer ncA.Close() 3398 natsSub(t, ncA, "sub.on.a.*", func(m *nats.Msg) {}) 3399 natsFlush(t, ncA) 3400 checkExpectedSubs(t, 1, sa) 3401 3402 // On C, have some sub activity while it receives 3403 // unwanted messages and switches to interestOnly mode. 3404 cURL := fmt.Sprintf("nats://%s:%d", oc.Host, oc.Port) 3405 ncC := natsConnect(t, cURL) 3406 defer ncC.Close() 3407 wg := sync.WaitGroup{} 3408 wg.Add(2) 3409 done := make(chan bool) 3410 consCount := 0 3411 accsCount := 0 3412 go func() { 3413 defer wg.Done() 3414 for i := 0; ; i++ { 3415 // Create subs and qsubs on same subject 3416 natsSub(t, ncC, fmt.Sprintf("foo.%d", i+1), func(_ *nats.Msg) {}) 3417 natsQueueSub(t, ncC, fmt.Sprintf("foo.%d", i+1), fmt.Sprintf("bar.%d", i+1), func(_ *nats.Msg) {}) 3418 // Create psubs and qsubs on unique subjects 3419 natsSub(t, ncC, fmt.Sprintf("foox.%d", i+1), func(_ *nats.Msg) {}) 3420 natsQueueSub(t, ncC, fmt.Sprintf("fooy.%d", i+1), fmt.Sprintf("bar.%d", i+1), func(_ *nats.Msg) {}) 3421 consCount += 4 3422 // Register account 3423 sc.RegisterAccount(fmt.Sprintf("acc.%d", i+1)) 3424 accsCount++ 3425 select { 3426 case <-done: 3427 return 3428 case <-time.After(15 * time.Millisecond): 3429 } 3430 } 3431 }() 3432 3433 // From B publish on subjects for which C has an interest 3434 bURL := fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port) 3435 ncB := natsConnect(t, bURL) 3436 defer ncB.Close() 3437 3438 go func() { 3439 defer wg.Done() 3440 time.Sleep(10 * time.Millisecond) 3441 for { 3442 for i := 0; i < 10; i++ { 3443 natsPub(t, ncB, fmt.Sprintf("foo.%d", i+1), []byte("hello")) 3444 } 3445 select { 3446 case <-done: 3447 return 3448 case <-time.After(5 * time.Millisecond): 3449 } 3450 } 3451 }() 3452 3453 // From B, send a lot of messages that A is interested in, 3454 // but not C. 3455 // TODO(ik): May need to change that if we change the threshold 3456 // for when the switch happens. 3457 total := 300 3458 for i := 0; i < total; i++ { 3459 if err := ncB.Publish(fmt.Sprintf("sub.on.a.%d", i), []byte("hi")); err != nil { 3460 t.Fatalf("Error waiting for reply: %v", err) 3461 } 3462 } 3463 close(done) 3464 3465 // Normally, C would receive a message for each req inbox and 3466 // would send and RS- on that to B, making both have an unbounded 3467 // growth of the no-interest map. But after a certain amount 3468 // of RS-, C will send all its sub for the given account and 3469 // instruct B to send only if there is explicit interest. 3470 checkFor(t, 2*time.Second, 50*time.Millisecond, func() error { 3471 // Check C inbound connection from B 3472 c := getInboundGatewayConnection(sc, "B") 3473 c.mu.Lock() 3474 var switchedMode bool 3475 e := c.gw.insim[globalAccountName] 3476 if e != nil { 3477 switchedMode = e.ni == nil && e.mode == InterestOnly 3478 } 3479 c.mu.Unlock() 3480 if !switchedMode { 3481 return fmt.Errorf("C has still not switched mode") 3482 } 3483 return nil 3484 }) 3485 checkGWInterestOnlyMode(t, sb, "C", globalAccountName) 3486 wg.Wait() 3487 3488 // Check consCount and accsCount on C 3489 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 3490 sc.gateway.pasi.Lock() 3491 scount := len(sc.gateway.pasi.m[globalAccountName]) 3492 sc.gateway.pasi.Unlock() 3493 if scount != consCount { 3494 return fmt.Errorf("Expected %v consumers for global account, got %v", consCount, scount) 3495 } 3496 acount := sc.numAccounts() 3497 if acount != accsCount+1 { 3498 return fmt.Errorf("Expected %v accounts, got %v", accsCount+1, acount) 3499 } 3500 return nil 3501 }) 3502 3503 // Also, after all that, if a sub is created on C, it should 3504 // be sent to B (but not A). Check that this is the case. 3505 // So first send from A on the subject that we are going to 3506 // use for this new sub. 3507 natsPub(t, ncA, "newsub", []byte("hello")) 3508 natsFlush(t, ncA) 3509 aOutboundToC := sa.getOutboundGatewayConnection("C") 3510 checkForSubjectNoInterest(t, aOutboundToC, globalAccountName, "newsub", true, 2*time.Second) 3511 3512 newSubSub := natsSub(t, ncC, "newsub", func(_ *nats.Msg) {}) 3513 natsFlush(t, ncC) 3514 checkExpectedSubs(t, consCount+1) 3515 checkFor(t, time.Second, 15*time.Millisecond, func() error { 3516 c := sb.getOutboundGatewayConnection("C") 3517 ei, _ := c.gw.outsim.Load(globalAccountName) 3518 if ei != nil { 3519 sl := ei.(*outsie).sl 3520 r := sl.Match("newsub") 3521 if len(r.psubs) == 1 { 3522 return nil 3523 } 3524 } 3525 return fmt.Errorf("Newsub not registered on B") 3526 }) 3527 checkForSubjectNoInterest(t, aOutboundToC, globalAccountName, "newsub", false, 2*time.Second) 3528 3529 natsUnsub(t, newSubSub) 3530 natsFlush(t, ncC) 3531 checkExpectedSubs(t, consCount) 3532 checkFor(t, time.Second, 15*time.Millisecond, func() error { 3533 c := sb.getOutboundGatewayConnection("C") 3534 ei, _ := c.gw.outsim.Load(globalAccountName) 3535 if ei != nil { 3536 sl := ei.(*outsie).sl 3537 r := sl.Match("newsub") 3538 if len(r.psubs) == 0 { 3539 return nil 3540 } 3541 } 3542 return fmt.Errorf("Newsub still registered on B") 3543 }) 3544 } 3545 3546 func TestGatewaySendAllSubsBadProtocol(t *testing.T) { 3547 ob := testDefaultOptionsForGateway("B") 3548 sb := runGatewayServer(ob) 3549 defer sb.Shutdown() 3550 3551 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 3552 sa := runGatewayServer(oa) 3553 defer sa.Shutdown() 3554 3555 waitForOutboundGateways(t, sa, 1, time.Second) 3556 waitForOutboundGateways(t, sb, 1, time.Second) 3557 waitForInboundGateways(t, sa, 1, time.Second) 3558 waitForInboundGateways(t, sb, 1, time.Second) 3559 3560 // For this test, make sure to use inbound from A so 3561 // A will reconnect when we send bad proto that 3562 // causes connection to be closed. 3563 c := getInboundGatewayConnection(sa, "B") 3564 // Mock an invalid protocol (account name missing) 3565 info := &Info{ 3566 Gateway: "B", 3567 GatewayCmd: gatewayCmdAllSubsStart, 3568 } 3569 b, _ := json.Marshal(info) 3570 c.mu.Lock() 3571 c.enqueueProto([]byte(fmt.Sprintf("INFO %s\r\n", b))) 3572 c.mu.Unlock() 3573 3574 orgConn := c 3575 checkFor(t, 3*time.Second, 100*time.Millisecond, func() error { 3576 curConn := getInboundGatewayConnection(sa, "B") 3577 if orgConn == curConn { 3578 return fmt.Errorf("Not reconnected") 3579 } 3580 return nil 3581 }) 3582 3583 waitForOutboundGateways(t, sa, 1, 2*time.Second) 3584 waitForOutboundGateways(t, sb, 1, 2*time.Second) 3585 3586 // Refresh 3587 c = nil 3588 checkFor(t, 3*time.Second, 15*time.Millisecond, func() error { 3589 c = getInboundGatewayConnection(sa, "B") 3590 if c == nil { 3591 return fmt.Errorf("Did not reconnect") 3592 } 3593 return nil 3594 }) 3595 // Do correct start 3596 info.GatewayCmdPayload = []byte(globalAccountName) 3597 b, _ = json.Marshal(info) 3598 c.mu.Lock() 3599 c.enqueueProto([]byte(fmt.Sprintf("INFO %s\r\n", b))) 3600 c.mu.Unlock() 3601 // But incorrect end. 3602 info.GatewayCmd = gatewayCmdAllSubsComplete 3603 info.GatewayCmdPayload = nil 3604 b, _ = json.Marshal(info) 3605 c.mu.Lock() 3606 c.enqueueProto([]byte(fmt.Sprintf("INFO %s\r\n", b))) 3607 c.mu.Unlock() 3608 3609 orgConn = c 3610 checkFor(t, 3*time.Second, 100*time.Millisecond, func() error { 3611 curConn := getInboundGatewayConnection(sa, "B") 3612 if orgConn == curConn { 3613 return fmt.Errorf("Not reconnected") 3614 } 3615 return nil 3616 }) 3617 } 3618 3619 func TestGatewayRaceOnClose(t *testing.T) { 3620 ob := testDefaultOptionsForGateway("B") 3621 sb := runGatewayServer(ob) 3622 defer sb.Shutdown() 3623 3624 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 3625 sa := runGatewayServer(oa) 3626 defer sa.Shutdown() 3627 3628 waitForOutboundGateways(t, sa, 1, time.Second) 3629 waitForOutboundGateways(t, sb, 1, time.Second) 3630 waitForInboundGateways(t, sa, 1, time.Second) 3631 waitForInboundGateways(t, sb, 1, time.Second) 3632 3633 bURL := fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port) 3634 ncB := natsConnect(t, bURL, nats.NoReconnect()) 3635 defer ncB.Close() 3636 3637 wg := sync.WaitGroup{} 3638 wg.Add(1) 3639 go func() { 3640 defer wg.Done() 3641 cb := func(_ *nats.Msg) {} 3642 for { 3643 // Expect failure at one point and just return. 3644 qsub, err := ncB.QueueSubscribe("foo", "bar", cb) 3645 if err != nil { 3646 return 3647 } 3648 if err := qsub.Unsubscribe(); err != nil { 3649 return 3650 } 3651 } 3652 }() 3653 // Wait a bit and kill B 3654 time.Sleep(200 * time.Millisecond) 3655 sb.Shutdown() 3656 wg.Wait() 3657 } 3658 3659 // Similar to TestNewRoutesServiceImport but with 2 GW servers instead 3660 // of a cluster of 2 servers. 3661 func TestGatewayServiceImport(t *testing.T) { 3662 GatewayDoNotForceInterestOnlyMode(true) 3663 defer GatewayDoNotForceInterestOnlyMode(false) 3664 3665 oa := testDefaultOptionsForGateway("A") 3666 setAccountUserPassInOptions(oa, "$foo", "clientA", "password") 3667 setAccountUserPassInOptions(oa, "$bar", "yyyyyyy", "password") 3668 sa := runGatewayServer(oa) 3669 defer sa.Shutdown() 3670 3671 ob := testGatewayOptionsFromToWithServers(t, "B", "A", sa) 3672 setAccountUserPassInOptions(ob, "$foo", "clientBFoo", "password") 3673 setAccountUserPassInOptions(ob, "$bar", "clientB", "password") 3674 sb := runGatewayServer(ob) 3675 defer sb.Shutdown() 3676 3677 waitForOutboundGateways(t, sa, 1, time.Second) 3678 waitForOutboundGateways(t, sb, 1, time.Second) 3679 waitForInboundGateways(t, sa, 1, time.Second) 3680 waitForInboundGateways(t, sb, 1, time.Second) 3681 3682 // Get accounts 3683 fooA, _ := sa.LookupAccount("$foo") 3684 barA, _ := sa.LookupAccount("$bar") 3685 fooB, _ := sb.LookupAccount("$foo") 3686 barB, _ := sb.LookupAccount("$bar") 3687 3688 // Add in the service export for the requests. Make it public. 3689 fooA.AddServiceExport("test.request", nil) 3690 fooB.AddServiceExport("test.request", nil) 3691 3692 // Add import abilities to server B's bar account from foo. 3693 if err := barB.AddServiceImport(fooB, "foo.request", "test.request"); err != nil { 3694 t.Fatalf("Error adding service import: %v", err) 3695 } 3696 // Same on A. 3697 if err := barA.AddServiceImport(fooA, "foo.request", "test.request"); err != nil { 3698 t.Fatalf("Error adding service import: %v", err) 3699 } 3700 3701 // clientA will be connected to srvA and be the service endpoint and responder. 3702 aURL := fmt.Sprintf("nats://clientA:password@127.0.0.1:%d", oa.Port) 3703 clientA := natsConnect(t, aURL) 3704 defer clientA.Close() 3705 3706 subA := natsSubSync(t, clientA, "test.request") 3707 natsFlush(t, clientA) 3708 3709 // Now setup client B on srvB who will do a sub from account $bar 3710 // that should map account $foo's foo subject. 3711 bURL := fmt.Sprintf("nats://clientB:password@127.0.0.1:%d", ob.Port) 3712 clientB := natsConnect(t, bURL) 3713 defer clientB.Close() 3714 3715 subB := natsSubSync(t, clientB, "reply") 3716 natsFlush(t, clientB) 3717 3718 for i := 1; i <= 2; i++ { 3719 // Send the request from clientB on foo.request, 3720 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 3721 natsFlush(t, clientB) 3722 3723 // Expect the request on A 3724 msg, err := subA.NextMsg(time.Second) 3725 if err != nil { 3726 t.Fatalf("subA failed to get request: %v", err) 3727 } 3728 if msg.Subject != "test.request" || string(msg.Data) != "hi" { 3729 t.Fatalf("Unexpected message: %v", msg) 3730 } 3731 if msg.Reply == "reply" { 3732 t.Fatalf("Expected randomized reply, but got original") 3733 } 3734 3735 // Check for duplicate message 3736 if msg, err := subA.NextMsg(250 * time.Millisecond); err != nats.ErrTimeout { 3737 t.Fatalf("Unexpected msg: %v", msg) 3738 } 3739 3740 // Send reply 3741 natsPub(t, clientA, msg.Reply, []byte("ok")) 3742 natsFlush(t, clientA) 3743 3744 msg, err = subB.NextMsg(time.Second) 3745 if err != nil { 3746 t.Fatalf("subB failed to get reply: %v", err) 3747 } 3748 if msg.Subject != "reply" || string(msg.Data) != "ok" { 3749 t.Fatalf("Unexpected message: %v", msg) 3750 } 3751 3752 // Check for duplicate message 3753 if msg, err := subB.NextMsg(250 * time.Millisecond); err != nats.ErrTimeout { 3754 t.Fatalf("Unexpected msg: %v", msg) 3755 } 3756 3757 expected := int64(i * 2) 3758 vz, _ := sa.Varz(nil) 3759 if vz.OutMsgs != expected { 3760 t.Fatalf("Expected %d outMsgs for A, got %v", expected, vz.OutMsgs) 3761 } 3762 3763 // For B, we expect it to send to gateway on the two subjects: test.request 3764 // and foo.request then send the reply to the client and optimistically 3765 // to the other gateway. 3766 if i == 1 { 3767 expected = 4 3768 } else { 3769 // The second time, one of the accounts will be suppressed and the reply going 3770 // back so we should only get 2 more messages. 3771 expected = 6 3772 } 3773 vz, _ = sb.Varz(nil) 3774 if vz.OutMsgs != expected { 3775 t.Fatalf("Expected %d outMsgs for B, got %v", expected, vz.OutMsgs) 3776 } 3777 } 3778 3779 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 3780 if ts := fooA.TotalSubs(); ts != 1 { 3781 return fmt.Errorf("Expected one sub to be left on fooA, but got %d", ts) 3782 } 3783 return nil 3784 }) 3785 3786 // Speed up exiration 3787 err := fooA.SetServiceExportResponseThreshold("test.request", 50*time.Millisecond) 3788 if err != nil { 3789 t.Fatalf("Error setting response threshold: %v", err) 3790 } 3791 3792 // Send 100 requests from clientB on foo.request, 3793 for i := 0; i < 100; i++ { 3794 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 3795 } 3796 natsFlush(t, clientB) 3797 3798 // Consume the requests, but don't reply to them... 3799 for i := 0; i < 100; i++ { 3800 if _, err := subA.NextMsg(time.Second); err != nil { 3801 t.Fatalf("subA did not receive request: %v", err) 3802 } 3803 } 3804 3805 // These reply subjects will be dangling off of $foo account on serverA. 3806 // Remove our service endpoint and wait for the dangling replies to go to zero. 3807 natsUnsub(t, subA) 3808 natsFlush(t, clientA) 3809 3810 checkFor(t, 2*time.Second, 10*time.Millisecond, func() error { 3811 if ts := fooA.TotalSubs(); ts != 0 { 3812 return fmt.Errorf("Number of subs is %d, should be zero", ts) 3813 } 3814 return nil 3815 }) 3816 3817 // Repeat similar test but without the small TTL and verify 3818 // that if B is shutdown, the dangling subs for replies are 3819 // cleared from the account sublist. 3820 err = fooA.SetServiceExportResponseThreshold("test.request", 10*time.Second) 3821 if err != nil { 3822 t.Fatalf("Error setting response threshold: %v", err) 3823 } 3824 3825 subA = natsSubSync(t, clientA, "test.request") 3826 natsFlush(t, clientA) 3827 3828 // Send 100 requests from clientB on foo.request, 3829 for i := 0; i < 100; i++ { 3830 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 3831 } 3832 natsFlush(t, clientB) 3833 3834 // Consume the requests, but don't reply to them... 3835 for i := 0; i < 100; i++ { 3836 if _, err := subA.NextMsg(time.Second); err != nil { 3837 t.Fatalf("subA did not receive request: %v", err) 3838 } 3839 } 3840 3841 // Shutdown B 3842 clientB.Close() 3843 sb.Shutdown() 3844 3845 // Close our last sub 3846 natsUnsub(t, subA) 3847 natsFlush(t, clientA) 3848 3849 // Verify that they are gone before the 10 sec TTL 3850 checkFor(t, 2*time.Second, 10*time.Millisecond, func() error { 3851 if ts := fooA.TotalSubs(); ts != 0 { 3852 return fmt.Errorf("Number of subs is %d, should be zero", ts) 3853 } 3854 return nil 3855 }) 3856 3857 // Check that this all work in interest-only mode 3858 sb = runGatewayServer(ob) 3859 defer sb.Shutdown() 3860 3861 fooB, _ = sb.LookupAccount("$foo") 3862 barB, _ = sb.LookupAccount("$bar") 3863 3864 // Add in the service export for the requests. Make it public. 3865 fooB.AddServiceExport("test.request", nil) 3866 // Add import abilities to server B's bar account from foo. 3867 if err := barB.AddServiceImport(fooB, "foo.request", "test.request"); err != nil { 3868 t.Fatalf("Error adding service import: %v", err) 3869 } 3870 3871 waitForOutboundGateways(t, sa, 1, 2*time.Second) 3872 waitForOutboundGateways(t, sb, 1, 2*time.Second) 3873 waitForInboundGateways(t, sa, 1, 2*time.Second) 3874 waitForInboundGateways(t, sb, 1, 2*time.Second) 3875 3876 // We need at least a subscription on A otherwise when publishing 3877 // to subjects with no interest we would simply get an A- 3878 natsSubSync(t, clientA, "not.used") 3879 3880 // Create a client on B that will use account $foo 3881 bURL = fmt.Sprintf("nats://clientBFoo:password@127.0.0.1:%d", ob.Port) 3882 clientB = natsConnect(t, bURL) 3883 defer clientB.Close() 3884 3885 // First flood with subjects that remote gw is not interested 3886 // so we switch to interest-only. 3887 for i := 0; i < 1100; i++ { 3888 natsPub(t, clientB, fmt.Sprintf("no.interest.%d", i), []byte("hello")) 3889 } 3890 natsFlush(t, clientB) 3891 3892 checkGWInterestOnlyMode(t, sb, "A", "$foo") 3893 3894 // Go back to clientB on $bar. 3895 clientB.Close() 3896 bURL = fmt.Sprintf("nats://clientB:password@127.0.0.1:%d", ob.Port) 3897 clientB = natsConnect(t, bURL) 3898 defer clientB.Close() 3899 3900 subA = natsSubSync(t, clientA, "test.request") 3901 natsFlush(t, clientA) 3902 3903 subB = natsSubSync(t, clientB, "reply") 3904 natsFlush(t, clientB) 3905 3906 // Sine it is interest-only, B should receive an interest 3907 // on $foo test.request 3908 checkGWInterestOnlyModeInterestOn(t, sb, "A", "$foo", "test.request") 3909 3910 // Send the request from clientB on foo.request, 3911 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 3912 natsFlush(t, clientB) 3913 3914 // Expect the request on A 3915 msg, err := subA.NextMsg(time.Second) 3916 if err != nil { 3917 t.Fatalf("subA failed to get request: %v", err) 3918 } 3919 if msg.Subject != "test.request" || string(msg.Data) != "hi" { 3920 t.Fatalf("Unexpected message: %v", msg) 3921 } 3922 if msg.Reply == "reply" { 3923 t.Fatalf("Expected randomized reply, but got original") 3924 } 3925 3926 // Check for duplicate message 3927 if msg, err := subA.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 3928 t.Fatalf("Unexpected msg: %v", msg) 3929 } 3930 3931 // Send reply 3932 natsPub(t, clientA, msg.Reply, []byte("ok")) 3933 natsFlush(t, clientA) 3934 3935 msg, err = subB.NextMsg(time.Second) 3936 if err != nil { 3937 t.Fatalf("subB failed to get reply: %v", err) 3938 } 3939 if msg.Subject != "reply" || string(msg.Data) != "ok" { 3940 t.Fatalf("Unexpected message: %v", msg) 3941 } 3942 3943 // Check for duplicate message 3944 if msg, err := subB.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 3945 t.Fatalf("Unexpected msg: %v", msg) 3946 } 3947 } 3948 3949 func TestGatewayServiceImportWithQueue(t *testing.T) { 3950 GatewayDoNotForceInterestOnlyMode(true) 3951 defer GatewayDoNotForceInterestOnlyMode(false) 3952 3953 oa := testDefaultOptionsForGateway("A") 3954 setAccountUserPassInOptions(oa, "$foo", "clientA", "password") 3955 setAccountUserPassInOptions(oa, "$bar", "yyyyyyy", "password") 3956 sa := runGatewayServer(oa) 3957 defer sa.Shutdown() 3958 3959 ob := testGatewayOptionsFromToWithServers(t, "B", "A", sa) 3960 setAccountUserPassInOptions(ob, "$foo", "clientBFoo", "password") 3961 setAccountUserPassInOptions(ob, "$bar", "clientB", "password") 3962 sb := runGatewayServer(ob) 3963 defer sb.Shutdown() 3964 3965 waitForOutboundGateways(t, sa, 1, time.Second) 3966 waitForOutboundGateways(t, sb, 1, time.Second) 3967 waitForInboundGateways(t, sa, 1, time.Second) 3968 waitForInboundGateways(t, sb, 1, time.Second) 3969 3970 // Get accounts 3971 fooA, _ := sa.LookupAccount("$foo") 3972 barA, _ := sa.LookupAccount("$bar") 3973 fooB, _ := sb.LookupAccount("$foo") 3974 barB, _ := sb.LookupAccount("$bar") 3975 3976 // Add in the service export for the requests. Make it public. 3977 fooA.AddServiceExport("test.request", nil) 3978 fooB.AddServiceExport("test.request", nil) 3979 3980 // Add import abilities to server B's bar account from foo. 3981 if err := barB.AddServiceImport(fooB, "foo.request", "test.request"); err != nil { 3982 t.Fatalf("Error adding service import: %v", err) 3983 } 3984 // Same on A. 3985 if err := barA.AddServiceImport(fooA, "foo.request", "test.request"); err != nil { 3986 t.Fatalf("Error adding service import: %v", err) 3987 } 3988 3989 // clientA will be connected to srvA and be the service endpoint and responder. 3990 aURL := fmt.Sprintf("nats://clientA:password@127.0.0.1:%d", oa.Port) 3991 clientA := natsConnect(t, aURL) 3992 defer clientA.Close() 3993 3994 subA := natsQueueSubSync(t, clientA, "test.request", "queue") 3995 natsFlush(t, clientA) 3996 3997 // Now setup client B on srvB who will do a sub from account $bar 3998 // that should map account $foo's foo subject. 3999 bURL := fmt.Sprintf("nats://clientB:password@127.0.0.1:%d", ob.Port) 4000 clientB := natsConnect(t, bURL) 4001 defer clientB.Close() 4002 4003 subB := natsQueueSubSync(t, clientB, "reply", "queue2") 4004 natsFlush(t, clientB) 4005 4006 // Wait for queue interest on test.request from A to be registered 4007 // on server B. 4008 checkForRegisteredQSubInterest(t, sb, "A", "$foo", "test.request", 1, time.Second) 4009 4010 for i := 0; i < 2; i++ { 4011 // Send the request from clientB on foo.request, 4012 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 4013 natsFlush(t, clientB) 4014 4015 // Expect the request on A 4016 msg, err := subA.NextMsg(time.Second) 4017 if err != nil { 4018 t.Fatalf("subA failed to get request: %v", err) 4019 } 4020 if msg.Subject != "test.request" || string(msg.Data) != "hi" { 4021 t.Fatalf("Unexpected message: %v", msg) 4022 } 4023 if msg.Reply == "reply" { 4024 t.Fatalf("Expected randomized reply, but got original") 4025 } 4026 // Check for duplicate message 4027 if msg, err := subA.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4028 t.Fatalf("Unexpected msg: %v", msg) 4029 } 4030 4031 // Send reply 4032 natsPub(t, clientA, msg.Reply, []byte("ok")) 4033 natsFlush(t, clientA) 4034 4035 msg, err = subB.NextMsg(time.Second) 4036 if err != nil { 4037 t.Fatalf("subB failed to get reply: %v", err) 4038 } 4039 if msg.Subject != "reply" || string(msg.Data) != "ok" { 4040 t.Fatalf("Unexpected message: %v", msg) 4041 } 4042 // Check for duplicate message 4043 if msg, err := subB.NextMsg(250 * time.Millisecond); err != nats.ErrTimeout { 4044 t.Fatalf("Unexpected msg: %v", msg) 4045 } 4046 4047 expected := int64((i + 1) * 2) 4048 vz, _ := sa.Varz(nil) 4049 if vz.OutMsgs != expected { 4050 t.Fatalf("Expected %d outMsgs for A, got %v", expected, vz.OutMsgs) 4051 } 4052 4053 // For B, we expect it to send to gateway on the two subjects: test.request 4054 // and foo.request then send the reply to the client and optimistically 4055 // to the other gateway. 4056 if i == 0 { 4057 expected = 4 4058 } else { 4059 // The second time, one of the accounts will be suppressed and the reply going 4060 // back so we should get only 2 more messages. 4061 expected = 6 4062 } 4063 vz, _ = sb.Varz(nil) 4064 if vz.OutMsgs != expected { 4065 t.Fatalf("Expected %d outMsgs for B, got %v", expected, vz.OutMsgs) 4066 } 4067 } 4068 4069 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 4070 if ts := fooA.TotalSubs(); ts != 1 { 4071 return fmt.Errorf("Expected one sub to be left on fooA, but got %d", ts) 4072 } 4073 return nil 4074 }) 4075 4076 // Speed up exiration 4077 err := fooA.SetServiceExportResponseThreshold("test.request", 10*time.Millisecond) 4078 if err != nil { 4079 t.Fatalf("Error setting response threshold: %v", err) 4080 } 4081 4082 // Send 100 requests from clientB on foo.request, 4083 for i := 0; i < 100; i++ { 4084 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 4085 } 4086 natsFlush(t, clientB) 4087 4088 // Consume the requests, but don't reply to them... 4089 for i := 0; i < 100; i++ { 4090 if _, err := subA.NextMsg(time.Second); err != nil { 4091 t.Fatalf("subA did not receive request: %v", err) 4092 } 4093 } 4094 4095 // These reply subjects will be dangling off of $foo account on serverA. 4096 // Remove our service endpoint and wait for the dangling replies to go to zero. 4097 natsUnsub(t, subA) 4098 natsFlush(t, clientA) 4099 4100 checkFor(t, 2*time.Second, 10*time.Millisecond, func() error { 4101 if ts := fooA.TotalSubs(); ts != 0 { 4102 return fmt.Errorf("Number of subs is %d, should be zero", ts) 4103 } 4104 return nil 4105 }) 4106 checkForRegisteredQSubInterest(t, sb, "A", "$foo", "test.request", 0, time.Second) 4107 4108 // Repeat similar test but without the small TTL and verify 4109 // that if B is shutdown, the dangling subs for replies are 4110 // cleared from the account sublist. 4111 err = fooA.SetServiceExportResponseThreshold("test.request", 10*time.Second) 4112 if err != nil { 4113 t.Fatalf("Error setting response threshold: %v", err) 4114 } 4115 4116 subA = natsQueueSubSync(t, clientA, "test.request", "queue") 4117 natsFlush(t, clientA) 4118 checkForRegisteredQSubInterest(t, sb, "A", "$foo", "test.request", 1, time.Second) 4119 4120 // Send 100 requests from clientB on foo.request, 4121 for i := 0; i < 100; i++ { 4122 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 4123 } 4124 natsFlush(t, clientB) 4125 4126 // Consume the requests, but don't reply to them... 4127 for i := 0; i < 100; i++ { 4128 if _, err := subA.NextMsg(time.Second); err != nil { 4129 t.Fatalf("subA did not receive request %d: %v", i+1, err) 4130 } 4131 } 4132 4133 // Shutdown B 4134 clientB.Close() 4135 sb.Shutdown() 4136 4137 // Close our last sub 4138 natsUnsub(t, subA) 4139 natsFlush(t, clientA) 4140 4141 // Verify that they are gone before the 10 sec TTL 4142 checkFor(t, 2*time.Second, 10*time.Millisecond, func() error { 4143 if ts := fooA.TotalSubs(); ts != 0 { 4144 return fmt.Errorf("Number of subs is %d, should be zero", ts) 4145 } 4146 return nil 4147 }) 4148 4149 // Check that this all work in interest-only mode 4150 sb = runGatewayServer(ob) 4151 defer sb.Shutdown() 4152 4153 fooB, _ = sb.LookupAccount("$foo") 4154 barB, _ = sb.LookupAccount("$bar") 4155 4156 // Add in the service export for the requests. Make it public. 4157 fooB.AddServiceExport("test.request", nil) 4158 // Add import abilities to server B's bar account from foo. 4159 if err := barB.AddServiceImport(fooB, "foo.request", "test.request"); err != nil { 4160 t.Fatalf("Error adding service import: %v", err) 4161 } 4162 4163 waitForOutboundGateways(t, sa, 1, 2*time.Second) 4164 waitForOutboundGateways(t, sb, 1, 2*time.Second) 4165 waitForInboundGateways(t, sa, 1, 2*time.Second) 4166 waitForInboundGateways(t, sb, 1, 2*time.Second) 4167 4168 // We need at least a subscription on A otherwise when publishing 4169 // to subjects with no interest we would simply get an A- 4170 natsSubSync(t, clientA, "not.used") 4171 4172 // Create a client on B that will use account $foo 4173 bURL = fmt.Sprintf("nats://clientBFoo:password@127.0.0.1:%d", ob.Port) 4174 clientB = natsConnect(t, bURL) 4175 defer clientB.Close() 4176 4177 // First flood with subjects that remote gw is not interested 4178 // so we switch to interest-only. 4179 for i := 0; i < 1100; i++ { 4180 natsPub(t, clientB, fmt.Sprintf("no.interest.%d", i), []byte("hello")) 4181 } 4182 natsFlush(t, clientB) 4183 4184 checkGWInterestOnlyMode(t, sb, "A", "$foo") 4185 4186 // Go back to clientB on $bar. 4187 clientB.Close() 4188 bURL = fmt.Sprintf("nats://clientB:password@127.0.0.1:%d", ob.Port) 4189 clientB = natsConnect(t, bURL) 4190 defer clientB.Close() 4191 4192 subA = natsSubSync(t, clientA, "test.request") 4193 natsFlush(t, clientA) 4194 4195 subB = natsSubSync(t, clientB, "reply") 4196 natsFlush(t, clientB) 4197 4198 // Sine it is interest-only, B should receive an interest 4199 // on $foo test.request 4200 checkGWInterestOnlyModeInterestOn(t, sb, "A", "$foo", "test.request") 4201 4202 // Send the request from clientB on foo.request, 4203 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 4204 natsFlush(t, clientB) 4205 4206 // Expect the request on A 4207 msg, err := subA.NextMsg(time.Second) 4208 if err != nil { 4209 t.Fatalf("subA failed to get request: %v", err) 4210 } 4211 if msg.Subject != "test.request" || string(msg.Data) != "hi" { 4212 t.Fatalf("Unexpected message: %v", msg) 4213 } 4214 if msg.Reply == "reply" { 4215 t.Fatalf("Expected randomized reply, but got original") 4216 } 4217 4218 // Check for duplicate message 4219 if msg, err := subA.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4220 t.Fatalf("Unexpected msg: %v", msg) 4221 } 4222 4223 // Send reply 4224 natsPub(t, clientA, msg.Reply, []byte("ok")) 4225 natsFlush(t, clientA) 4226 4227 msg, err = subB.NextMsg(time.Second) 4228 if err != nil { 4229 t.Fatalf("subB failed to get reply: %v", err) 4230 } 4231 if msg.Subject != "reply" || string(msg.Data) != "ok" { 4232 t.Fatalf("Unexpected message: %v", msg) 4233 } 4234 4235 // Check for duplicate message 4236 if msg, err := subB.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4237 t.Fatalf("Unexpected msg: %v", msg) 4238 } 4239 } 4240 4241 func ensureGWConnectTo(t *testing.T, s *Server, remoteGWName string, remoteGWServer *Server) { 4242 t.Helper() 4243 var good bool 4244 for i := 0; !good && (i < 3); i++ { 4245 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 4246 if s.numOutboundGateways() == 0 { 4247 return fmt.Errorf("Still no gw outbound connection") 4248 } 4249 return nil 4250 }) 4251 ogc := s.getOutboundGatewayConnection(remoteGWName) 4252 ogc.mu.Lock() 4253 name := ogc.opts.Name 4254 nc := ogc.nc 4255 ogc.mu.Unlock() 4256 if name != remoteGWServer.ID() { 4257 rg := s.getRemoteGateway(remoteGWName) 4258 goodURL := remoteGWServer.getGatewayURL() 4259 rg.Lock() 4260 for u := range rg.urls { 4261 if u != goodURL { 4262 delete(rg.urls, u) 4263 } 4264 } 4265 rg.Unlock() 4266 if nc != nil { 4267 nc.Close() 4268 } 4269 } else { 4270 good = true 4271 } 4272 } 4273 if !good { 4274 t.Fatalf("Could not ensure that server connects to remote gateway %q at URL %q", 4275 remoteGWName, remoteGWServer.getGatewayURL()) 4276 } 4277 } 4278 4279 func TestGatewayServiceImportComplexSetup(t *testing.T) { 4280 // This test will have following setup: 4281 // 4282 // |- responder (subs to "$foo test.request") 4283 // | (sends to "$foo _R_.xxxx") 4284 // route v 4285 // [A1]<----------------->[A2] 4286 // ^ |^ | 4287 // |gw| \______gw________ gw| 4288 // | v \ v 4289 // [B1]<----------------->[B2] 4290 // ^ route 4291 // | 4292 // |_ requestor (sends "$bar foo.request reply") 4293 // 4294 4295 // Setup first A1 and B1 to ensure that they have GWs 4296 // connections as described above. 4297 4298 oa1 := testDefaultOptionsForGateway("A") 4299 setAccountUserPassInOptions(oa1, "$foo", "clientA", "password") 4300 setAccountUserPassInOptions(oa1, "$bar", "yyyyyyy", "password") 4301 sa1 := runGatewayServer(oa1) 4302 defer sa1.Shutdown() 4303 4304 ob1 := testGatewayOptionsFromToWithServers(t, "B", "A", sa1) 4305 setAccountUserPassInOptions(ob1, "$foo", "xxxxxxx", "password") 4306 setAccountUserPassInOptions(ob1, "$bar", "clientB", "password") 4307 sb1 := runGatewayServer(ob1) 4308 defer sb1.Shutdown() 4309 4310 waitForOutboundGateways(t, sa1, 1, time.Second) 4311 waitForOutboundGateways(t, sb1, 1, time.Second) 4312 4313 waitForInboundGateways(t, sa1, 1, time.Second) 4314 waitForInboundGateways(t, sb1, 1, time.Second) 4315 4316 ob2 := testGatewayOptionsFromToWithServers(t, "B", "A", sa1) 4317 ob2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", sb1.ClusterAddr().Port)) 4318 setAccountUserPassInOptions(ob2, "$foo", "clientBFoo", "password") 4319 setAccountUserPassInOptions(ob2, "$bar", "clientB", "password") 4320 ob2.gatewaysSolicitDelay = time.Nanosecond // 0 would be default, so nano to connect asap 4321 sb2 := runGatewayServer(ob2) 4322 defer sb2.Shutdown() 4323 4324 waitForOutboundGateways(t, sa1, 1, time.Second) 4325 waitForOutboundGateways(t, sb1, 1, time.Second) 4326 waitForOutboundGateways(t, sb2, 1, 2*time.Second) 4327 4328 waitForInboundGateways(t, sa1, 2, time.Second) 4329 waitForInboundGateways(t, sb1, 1, time.Second) 4330 waitForInboundGateways(t, sb2, 0, time.Second) 4331 4332 oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb2) 4333 oa2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", sa1.ClusterAddr().Port)) 4334 setAccountUserPassInOptions(oa2, "$foo", "clientA", "password") 4335 setAccountUserPassInOptions(oa2, "$bar", "yyyyyyy", "password") 4336 oa2.gatewaysSolicitDelay = time.Nanosecond // 0 would be default, so nano to connect asap 4337 sa2 := runGatewayServer(oa2) 4338 defer sa2.Shutdown() 4339 4340 ensureGWConnectTo(t, sa2, "B", sb2) 4341 4342 checkClusterFormed(t, sa1, sa2) 4343 checkClusterFormed(t, sb1, sb2) 4344 4345 waitForOutboundGateways(t, sa1, 1, time.Second) 4346 waitForOutboundGateways(t, sb1, 1, time.Second) 4347 waitForOutboundGateways(t, sb2, 1, time.Second) 4348 waitForOutboundGateways(t, sa2, 1, 2*time.Second) 4349 4350 waitForInboundGateways(t, sa1, 2, time.Second) 4351 waitForInboundGateways(t, sb1, 1, time.Second) 4352 waitForInboundGateways(t, sb2, 1, 2*time.Second) 4353 waitForInboundGateways(t, sa2, 0, time.Second) 4354 4355 // Verification that we have what we wanted 4356 c := sa2.getOutboundGatewayConnection("B") 4357 if c == nil || c.opts.Name != sb2.ID() { 4358 t.Fatalf("A2 does not have outbound to B2") 4359 } 4360 c = getInboundGatewayConnection(sa2, "B") 4361 if c != nil { 4362 t.Fatalf("Bad setup") 4363 } 4364 c = sb2.getOutboundGatewayConnection("A") 4365 if c == nil || c.opts.Name != sa1.ID() { 4366 t.Fatalf("B2 does not have outbound to A1") 4367 } 4368 c = getInboundGatewayConnection(sb2, "A") 4369 if c == nil || c.opts.Name != sa2.ID() { 4370 t.Fatalf("Bad setup") 4371 } 4372 4373 // Ok, so now that we have proper setup, do actual test! 4374 4375 // Get accounts 4376 fooA1, _ := sa1.LookupAccount("$foo") 4377 barA1, _ := sa1.LookupAccount("$bar") 4378 fooA2, _ := sa2.LookupAccount("$foo") 4379 barA2, _ := sa2.LookupAccount("$bar") 4380 4381 fooB1, _ := sb1.LookupAccount("$foo") 4382 barB1, _ := sb1.LookupAccount("$bar") 4383 fooB2, _ := sb2.LookupAccount("$foo") 4384 barB2, _ := sb2.LookupAccount("$bar") 4385 4386 // Add in the service export for the requests. Make it public. 4387 fooA1.AddServiceExport("test.request", nil) 4388 fooA2.AddServiceExport("test.request", nil) 4389 fooB1.AddServiceExport("test.request", nil) 4390 fooB2.AddServiceExport("test.request", nil) 4391 4392 // Add import abilities to server B's bar account from foo. 4393 if err := barB1.AddServiceImport(fooB1, "foo.request", "test.request"); err != nil { 4394 t.Fatalf("Error adding service import: %v", err) 4395 } 4396 if err := barB2.AddServiceImport(fooB2, "foo.request", "test.request"); err != nil { 4397 t.Fatalf("Error adding service import: %v", err) 4398 } 4399 // Same on A. 4400 if err := barA1.AddServiceImport(fooA1, "foo.request", "test.request"); err != nil { 4401 t.Fatalf("Error adding service import: %v", err) 4402 } 4403 if err := barA2.AddServiceImport(fooA2, "foo.request", "test.request"); err != nil { 4404 t.Fatalf("Error adding service import: %v", err) 4405 } 4406 4407 // clientA will be connected to A2 and be the service endpoint and responder. 4408 a2URL := fmt.Sprintf("nats://clientA:password@127.0.0.1:%d", oa2.Port) 4409 clientA := natsConnect(t, a2URL) 4410 defer clientA.Close() 4411 4412 subA := natsSubSync(t, clientA, "test.request") 4413 natsFlush(t, clientA) 4414 4415 // Now setup client B on B1 who will do a sub from account $bar 4416 // that should map account $foo's foo subject. 4417 b1URL := fmt.Sprintf("nats://clientB:password@127.0.0.1:%d", ob1.Port) 4418 clientB := natsConnect(t, b1URL) 4419 defer clientB.Close() 4420 4421 subB := natsSubSync(t, clientB, "reply") 4422 natsFlush(t, clientB) 4423 4424 var msg *nats.Msg 4425 var err error 4426 for attempts := 1; attempts <= 2; attempts++ { 4427 // Send the request from clientB on foo.request, 4428 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 4429 natsFlush(t, clientB) 4430 4431 // Expect the request on A 4432 msg, err = subA.NextMsg(time.Second) 4433 if err != nil { 4434 if attempts == 1 { 4435 // Since we are in interestOnly mode, it is possible 4436 // that server B did not receive the subscription 4437 // interest yet, so try again. 4438 continue 4439 } 4440 t.Fatalf("subA failed to get request: %v", err) 4441 } 4442 if msg.Subject != "test.request" || string(msg.Data) != "hi" { 4443 t.Fatalf("Unexpected message: %v", msg) 4444 } 4445 if msg.Reply == "reply" { 4446 t.Fatalf("Expected randomized reply, but got original") 4447 } 4448 } 4449 // Make sure we don't receive a second copy 4450 if msg, err := subA.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4451 t.Fatalf("Received unexpected message: %v", msg) 4452 } 4453 4454 // Send reply 4455 natsPub(t, clientA, msg.Reply, []byte("ok")) 4456 natsFlush(t, clientA) 4457 4458 msg, err = subB.NextMsg(time.Second) 4459 if err != nil { 4460 t.Fatalf("subB failed to get reply: %v", err) 4461 } 4462 if msg.Subject != "reply" || string(msg.Data) != "ok" { 4463 t.Fatalf("Unexpected message: %v", msg) 4464 } 4465 // Make sure we don't receive a second copy 4466 if msg, err := subB.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4467 t.Fatalf("Received unexpected message: %v", msg) 4468 } 4469 4470 checkSubs := func(t *testing.T, acc *Account, srvName string, expected int) { 4471 t.Helper() 4472 checkFor(t, 2*time.Second, 10*time.Millisecond, func() error { 4473 if ts := acc.TotalSubs(); ts != expected { 4474 return fmt.Errorf("Number of subs is %d on acc=%s srv=%s, should be %v", ts, acc.Name, srvName, expected) 4475 } 4476 return nil 4477 }) 4478 } 4479 checkSubs(t, fooA1, "A1", 1) 4480 checkSubs(t, barA1, "A1", 1) 4481 checkSubs(t, fooA2, "A2", 1) 4482 checkSubs(t, barA2, "A2", 1) 4483 checkSubs(t, fooB1, "B1", 1) 4484 checkSubs(t, barB1, "B1", 2) 4485 checkSubs(t, fooB2, "B2", 1) 4486 checkSubs(t, barB2, "B2", 2) 4487 4488 // Speed up exiration 4489 err = fooA2.SetServiceExportResponseThreshold("test.request", 10*time.Millisecond) 4490 if err != nil { 4491 t.Fatalf("Error setting response threshold: %v", err) 4492 } 4493 err = fooB1.SetServiceExportResponseThreshold("test.request", 10*time.Millisecond) 4494 if err != nil { 4495 t.Fatalf("Error setting response threshold: %v", err) 4496 } 4497 4498 // Send 100 requests from clientB on foo.request, 4499 for i := 0; i < 100; i++ { 4500 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 4501 } 4502 natsFlush(t, clientB) 4503 4504 // Consume the requests, but don't reply to them... 4505 for i := 0; i < 100; i++ { 4506 if _, err := subA.NextMsg(time.Second); err != nil { 4507 t.Fatalf("subA did not receive request: %v", err) 4508 } 4509 } 4510 4511 // Unsubsribe all and ensure counts go to 0. 4512 natsUnsub(t, subA) 4513 natsFlush(t, clientA) 4514 natsUnsub(t, subB) 4515 natsFlush(t, clientB) 4516 4517 // We should expire because ttl. 4518 checkFor(t, 2*time.Second, 10*time.Millisecond, func() error { 4519 if nr := len(fooA1.exports.responses); nr != 0 { 4520 return fmt.Errorf("Number of responses is %d", nr) 4521 } 4522 return nil 4523 }) 4524 4525 checkSubs(t, fooA1, "A1", 0) 4526 checkSubs(t, fooA2, "A2", 0) 4527 checkSubs(t, fooB1, "B1", 1) 4528 checkSubs(t, fooB2, "B2", 1) 4529 4530 checkSubs(t, barA1, "A1", 1) 4531 checkSubs(t, barA2, "A2", 1) 4532 checkSubs(t, barB1, "B1", 1) 4533 checkSubs(t, barB2, "B2", 1) 4534 4535 // Check that this all work in interest-only mode. 4536 4537 // We need at least a subscription on B2 otherwise when publishing 4538 // to subjects with no interest we would simply get an A- 4539 b2URL := fmt.Sprintf("nats://clientBFoo:password@127.0.0.1:%d", ob2.Port) 4540 clientB2 := natsConnect(t, b2URL) 4541 defer clientB2.Close() 4542 natsSubSync(t, clientB2, "not.used") 4543 natsFlush(t, clientB2) 4544 4545 // Make A2 flood B2 with subjects that B2 is not interested in. 4546 for i := 0; i < 1100; i++ { 4547 natsPub(t, clientA, fmt.Sprintf("no.interest.%d", i), []byte("hello")) 4548 } 4549 natsFlush(t, clientA) 4550 // Wait for B2 to switch to interest-only 4551 checkGWInterestOnlyMode(t, sa2, "B", "$foo") 4552 4553 subA = natsSubSync(t, clientA, "test.request") 4554 natsFlush(t, clientA) 4555 4556 subB = natsSubSync(t, clientB, "reply") 4557 natsFlush(t, clientB) 4558 4559 for attempts := 1; attempts <= 2; attempts++ { 4560 // Send the request from clientB on foo.request, 4561 natsPubReq(t, clientB, "foo.request", "reply", []byte("hi")) 4562 natsFlush(t, clientB) 4563 4564 // Expect the request on A 4565 msg, err = subA.NextMsg(time.Second) 4566 if err != nil { 4567 if attempts == 1 { 4568 // Since we are in interestOnly mode, it is possible 4569 // that server B did not receive the subscription 4570 // interest yet, so try again. 4571 continue 4572 } 4573 t.Fatalf("subA failed to get request: %v", err) 4574 } 4575 if msg.Subject != "test.request" || string(msg.Data) != "hi" { 4576 t.Fatalf("Unexpected message: %v", msg) 4577 } 4578 if msg.Reply == "reply" { 4579 t.Fatalf("Expected randomized reply, but got original") 4580 } 4581 } 4582 4583 // Check for duplicate message 4584 if msg, err := subA.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4585 t.Fatalf("Unexpected msg: %v", msg) 4586 } 4587 4588 // Send reply 4589 natsPub(t, clientA, msg.Reply, []byte("ok")) 4590 natsFlush(t, clientA) 4591 4592 msg, err = subB.NextMsg(time.Second) 4593 if err != nil { 4594 t.Fatalf("subB failed to get reply: %v", err) 4595 } 4596 if msg.Subject != "reply" || string(msg.Data) != "ok" { 4597 t.Fatalf("Unexpected message: %v", msg) 4598 } 4599 4600 // Check for duplicate message 4601 if msg, err := subB.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4602 t.Fatalf("Unexpected msg: %v", msg) 4603 } 4604 } 4605 4606 func TestGatewayServiceExportWithWildcards(t *testing.T) { 4607 // This test will have following setup: 4608 // 4609 // |- responder 4610 // | 4611 // route v 4612 // [A1]<----------------->[A2] 4613 // ^ |^ | 4614 // |gw| \______gw________ gw| 4615 // | v \ v 4616 // [B1]<----------------->[B2] 4617 // ^ route 4618 // | 4619 // |_ requestor 4620 // 4621 4622 for _, test := range []struct { 4623 name string 4624 public bool 4625 }{ 4626 { 4627 name: "public", 4628 public: true, 4629 }, 4630 { 4631 name: "private", 4632 public: false, 4633 }, 4634 } { 4635 t.Run(test.name, func(t *testing.T) { 4636 4637 // Setup first A1 and B1 to ensure that they have GWs 4638 // connections as described above. 4639 4640 oa1 := testDefaultOptionsForGateway("A") 4641 setAccountUserPassInOptions(oa1, "$foo", "clientA", "password") 4642 setAccountUserPassInOptions(oa1, "$bar", "yyyyyyy", "password") 4643 sa1 := runGatewayServer(oa1) 4644 defer sa1.Shutdown() 4645 4646 ob1 := testGatewayOptionsFromToWithServers(t, "B", "A", sa1) 4647 setAccountUserPassInOptions(ob1, "$foo", "xxxxxxx", "password") 4648 setAccountUserPassInOptions(ob1, "$bar", "clientB", "password") 4649 sb1 := runGatewayServer(ob1) 4650 defer sb1.Shutdown() 4651 4652 waitForOutboundGateways(t, sa1, 1, time.Second) 4653 waitForOutboundGateways(t, sb1, 1, time.Second) 4654 4655 waitForInboundGateways(t, sa1, 1, time.Second) 4656 waitForInboundGateways(t, sb1, 1, time.Second) 4657 4658 ob2 := testGatewayOptionsFromToWithServers(t, "B", "A", sa1) 4659 ob2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", sb1.ClusterAddr().Port)) 4660 setAccountUserPassInOptions(ob2, "$foo", "clientBFoo", "password") 4661 setAccountUserPassInOptions(ob2, "$bar", "clientB", "password") 4662 ob2.gatewaysSolicitDelay = time.Nanosecond // 0 would be default, so nano to connect asap 4663 sb2 := runGatewayServer(ob2) 4664 defer sb2.Shutdown() 4665 4666 waitForOutboundGateways(t, sa1, 1, time.Second) 4667 waitForOutboundGateways(t, sb1, 1, time.Second) 4668 waitForOutboundGateways(t, sb2, 1, 2*time.Second) 4669 4670 waitForInboundGateways(t, sa1, 2, time.Second) 4671 waitForInboundGateways(t, sb1, 1, time.Second) 4672 waitForInboundGateways(t, sb2, 0, time.Second) 4673 4674 oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb2) 4675 oa2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", sa1.ClusterAddr().Port)) 4676 setAccountUserPassInOptions(oa2, "$foo", "clientA", "password") 4677 setAccountUserPassInOptions(oa2, "$bar", "yyyyyyy", "password") 4678 oa2.gatewaysSolicitDelay = time.Nanosecond // 0 would be default, so nano to connect asap 4679 sa2 := runGatewayServer(oa2) 4680 defer sa2.Shutdown() 4681 4682 ensureGWConnectTo(t, sa2, "B", sb2) 4683 4684 checkClusterFormed(t, sa1, sa2) 4685 checkClusterFormed(t, sb1, sb2) 4686 4687 waitForOutboundGateways(t, sa1, 1, time.Second) 4688 waitForOutboundGateways(t, sb1, 1, time.Second) 4689 waitForOutboundGateways(t, sb2, 1, time.Second) 4690 waitForOutboundGateways(t, sa2, 1, 2*time.Second) 4691 4692 waitForInboundGateways(t, sa1, 2, time.Second) 4693 waitForInboundGateways(t, sb1, 1, time.Second) 4694 waitForInboundGateways(t, sb2, 1, 2*time.Second) 4695 waitForInboundGateways(t, sa2, 0, time.Second) 4696 4697 // Verification that we have what we wanted 4698 c := sa2.getOutboundGatewayConnection("B") 4699 if c == nil || c.opts.Name != sb2.ID() { 4700 t.Fatalf("A2 does not have outbound to B2") 4701 } 4702 c = getInboundGatewayConnection(sa2, "B") 4703 if c != nil { 4704 t.Fatalf("Bad setup") 4705 } 4706 c = sb2.getOutboundGatewayConnection("A") 4707 if c == nil || c.opts.Name != sa1.ID() { 4708 t.Fatalf("B2 does not have outbound to A1") 4709 } 4710 c = getInboundGatewayConnection(sb2, "A") 4711 if c == nil || c.opts.Name != sa2.ID() { 4712 t.Fatalf("Bad setup") 4713 } 4714 4715 // Ok, so now that we have proper setup, do actual test! 4716 4717 // Get accounts 4718 fooA1, _ := sa1.LookupAccount("$foo") 4719 barA1, _ := sa1.LookupAccount("$bar") 4720 fooA2, _ := sa2.LookupAccount("$foo") 4721 barA2, _ := sa2.LookupAccount("$bar") 4722 4723 fooB1, _ := sb1.LookupAccount("$foo") 4724 barB1, _ := sb1.LookupAccount("$bar") 4725 fooB2, _ := sb2.LookupAccount("$foo") 4726 barB2, _ := sb2.LookupAccount("$bar") 4727 4728 var accs []*Account 4729 // Add in the service export for the requests. 4730 if !test.public { 4731 accs = []*Account{barA1} 4732 } 4733 fooA1.AddServiceExport("ngs.update.*", accs) 4734 if !test.public { 4735 accs = []*Account{barA2} 4736 } 4737 fooA2.AddServiceExport("ngs.update.*", accs) 4738 if !test.public { 4739 accs = []*Account{barB1} 4740 } 4741 fooB1.AddServiceExport("ngs.update.*", accs) 4742 if !test.public { 4743 accs = []*Account{barB2} 4744 } 4745 fooB2.AddServiceExport("ngs.update.*", accs) 4746 4747 // Add import abilities to server B's bar account from foo. 4748 if err := barB1.AddServiceImport(fooB1, "ngs.update", "ngs.update.$bar"); err != nil { 4749 t.Fatalf("Error adding service import: %v", err) 4750 } 4751 if err := barB2.AddServiceImport(fooB2, "ngs.update", "ngs.update.$bar"); err != nil { 4752 t.Fatalf("Error adding service import: %v", err) 4753 } 4754 // Same on A. 4755 if err := barA1.AddServiceImport(fooA1, "ngs.update", "ngs.update.$bar"); err != nil { 4756 t.Fatalf("Error adding service import: %v", err) 4757 } 4758 if err := barA2.AddServiceImport(fooA2, "ngs.update", "ngs.update.$bar"); err != nil { 4759 t.Fatalf("Error adding service import: %v", err) 4760 } 4761 4762 // clientA will be connected to A2 and be the service endpoint and responder. 4763 a2URL := fmt.Sprintf("nats://clientA:password@127.0.0.1:%d", oa2.Port) 4764 clientA := natsConnect(t, a2URL) 4765 defer clientA.Close() 4766 4767 subA := natsSubSync(t, clientA, "ngs.update.$bar") 4768 natsFlush(t, clientA) 4769 4770 // Now setup client B on B1 who will do a sub from account $bar 4771 // that should map account $foo's foo subject. 4772 b1URL := fmt.Sprintf("nats://clientB:password@127.0.0.1:%d", ob1.Port) 4773 clientB := natsConnect(t, b1URL) 4774 defer clientB.Close() 4775 4776 subB := natsSubSync(t, clientB, "reply") 4777 natsFlush(t, clientB) 4778 4779 var msg *nats.Msg 4780 var err error 4781 for attempts := 1; attempts <= 2; attempts++ { 4782 // Send the request from clientB on foo.request, 4783 natsPubReq(t, clientB, "ngs.update", "reply", []byte("hi")) 4784 natsFlush(t, clientB) 4785 4786 // Expect the request on A 4787 msg, err = subA.NextMsg(time.Second) 4788 if err != nil { 4789 if attempts == 1 { 4790 // Since we are in interestOnly mode, it is possible 4791 // that server B did not receive the subscription 4792 // interest yet, so try again. 4793 continue 4794 } 4795 t.Fatalf("subA failed to get request: %v", err) 4796 } 4797 if msg.Subject != "ngs.update.$bar" || string(msg.Data) != "hi" { 4798 t.Fatalf("Unexpected message: %v", msg) 4799 } 4800 if msg.Reply == "reply" { 4801 t.Fatalf("Expected randomized reply, but got original") 4802 } 4803 } 4804 // Make sure we don't receive a second copy 4805 if msg, err := subA.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4806 t.Fatalf("Received unexpected message: %v", msg) 4807 } 4808 4809 // Send reply 4810 natsPub(t, clientA, msg.Reply, []byte("ok")) 4811 natsFlush(t, clientA) 4812 4813 msg, err = subB.NextMsg(time.Second) 4814 if err != nil { 4815 t.Fatalf("subB failed to get reply: %v", err) 4816 } 4817 if msg.Subject != "reply" || string(msg.Data) != "ok" { 4818 t.Fatalf("Unexpected message: %v", msg) 4819 } 4820 // Make sure we don't receive a second copy 4821 if msg, err := subB.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4822 t.Fatalf("Received unexpected message: %v", msg) 4823 } 4824 4825 checkSubs := func(t *testing.T, acc *Account, srvName string, expected int) { 4826 t.Helper() 4827 checkFor(t, 2*time.Second, 10*time.Millisecond, func() error { 4828 if ts := acc.TotalSubs(); ts != expected { 4829 return fmt.Errorf("Number of subs is %d on acc=%s srv=%s, should be %v", ts, acc.Name, srvName, expected) 4830 } 4831 return nil 4832 }) 4833 } 4834 checkSubs(t, fooA1, "A1", 1) 4835 checkSubs(t, barA1, "A1", 1) 4836 checkSubs(t, fooA2, "A2", 1) 4837 checkSubs(t, barA2, "A2", 1) 4838 checkSubs(t, fooB1, "B1", 1) 4839 checkSubs(t, barB1, "B1", 2) 4840 checkSubs(t, fooB2, "B2", 1) 4841 checkSubs(t, barB2, "B2", 2) 4842 4843 // Speed up exiration 4844 err = fooA1.SetServiceExportResponseThreshold("ngs.update.*", 10*time.Millisecond) 4845 if err != nil { 4846 t.Fatalf("Error setting response threshold: %v", err) 4847 } 4848 err = fooB1.SetServiceExportResponseThreshold("ngs.update.*", 10*time.Millisecond) 4849 if err != nil { 4850 t.Fatalf("Error setting response threshold: %v", err) 4851 } 4852 4853 // Send 100 requests from clientB on foo.request, 4854 for i := 0; i < 100; i++ { 4855 natsPubReq(t, clientB, "ngs.update", "reply", []byte("hi")) 4856 } 4857 natsFlush(t, clientB) 4858 4859 // Consume the requests, but don't reply to them... 4860 for i := 0; i < 100; i++ { 4861 if _, err := subA.NextMsg(time.Second); err != nil { 4862 t.Fatalf("subA did not receive request: %v", err) 4863 } 4864 } 4865 4866 // Unsubsribe all and ensure counts go to 0. 4867 natsUnsub(t, subA) 4868 natsFlush(t, clientA) 4869 natsUnsub(t, subB) 4870 natsFlush(t, clientB) 4871 4872 // We should expire because ttl. 4873 checkFor(t, 2*time.Second, 10*time.Millisecond, func() error { 4874 if nr := len(fooA1.exports.responses); nr != 0 { 4875 return fmt.Errorf("Number of responses is %d", nr) 4876 } 4877 return nil 4878 }) 4879 4880 checkSubs(t, fooA1, "A1", 0) 4881 checkSubs(t, fooA2, "A2", 0) 4882 checkSubs(t, fooB1, "B1", 1) 4883 checkSubs(t, fooB2, "B2", 1) 4884 4885 checkSubs(t, barA1, "A1", 1) 4886 checkSubs(t, barA2, "A2", 1) 4887 checkSubs(t, barB1, "B1", 1) 4888 checkSubs(t, barB2, "B2", 1) 4889 4890 // Check that this all work in interest-only mode. 4891 4892 // We need at least a subscription on B2 otherwise when publishing 4893 // to subjects with no interest we would simply get an A- 4894 b2URL := fmt.Sprintf("nats://clientBFoo:password@127.0.0.1:%d", ob2.Port) 4895 clientB2 := natsConnect(t, b2URL) 4896 defer clientB2.Close() 4897 natsSubSync(t, clientB2, "not.used") 4898 natsFlush(t, clientB2) 4899 4900 // Make A2 flood B2 with subjects that B2 is not interested in. 4901 for i := 0; i < 1100; i++ { 4902 natsPub(t, clientA, fmt.Sprintf("no.interest.%d", i), []byte("hello")) 4903 } 4904 natsFlush(t, clientA) 4905 4906 // Wait for B2 to switch to interest-only 4907 checkGWInterestOnlyMode(t, sa2, "B", "$foo") 4908 4909 subA = natsSubSync(t, clientA, "ngs.update.*") 4910 natsFlush(t, clientA) 4911 4912 subB = natsSubSync(t, clientB, "reply") 4913 natsFlush(t, clientB) 4914 4915 for attempts := 1; attempts <= 2; attempts++ { 4916 // Send the request from clientB on foo.request, 4917 natsPubReq(t, clientB, "ngs.update", "reply", []byte("hi")) 4918 natsFlush(t, clientB) 4919 4920 // Expect the request on A 4921 msg, err = subA.NextMsg(time.Second) 4922 if err != nil { 4923 if attempts == 1 { 4924 // Since we are in interestOnly mode, it is possible 4925 // that server B did not receive the subscription 4926 // interest yet, so try again. 4927 continue 4928 } 4929 t.Fatalf("subA failed to get request: %v", err) 4930 } 4931 if msg.Subject != "ngs.update.$bar" || string(msg.Data) != "hi" { 4932 t.Fatalf("Unexpected message: %v", msg) 4933 } 4934 if msg.Reply == "reply" { 4935 t.Fatalf("Expected randomized reply, but got original") 4936 } 4937 } 4938 4939 // Check for duplicate message 4940 if msg, err := subA.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4941 t.Fatalf("Unexpected msg: %v", msg) 4942 } 4943 4944 // Send reply 4945 natsPub(t, clientA, msg.Reply, []byte("ok")) 4946 natsFlush(t, clientA) 4947 4948 msg, err = subB.NextMsg(time.Second) 4949 if err != nil { 4950 t.Fatalf("subB failed to get reply: %v", err) 4951 } 4952 if msg.Subject != "reply" || string(msg.Data) != "ok" { 4953 t.Fatalf("Unexpected message: %v", msg) 4954 } 4955 4956 // Check for duplicate message 4957 if msg, err := subB.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 4958 t.Fatalf("Unexpected msg: %v", msg) 4959 } 4960 }) 4961 } 4962 } 4963 4964 // NOTE: if this fails for you and says only has <10 outbound, make sure ulimit for open files > 256. 4965 func TestGatewayMemUsage(t *testing.T) { 4966 // Try to clean up. 4967 runtime.GC() 4968 var m runtime.MemStats 4969 runtime.ReadMemStats(&m) 4970 pta := m.TotalAlloc 4971 4972 o := testDefaultOptionsForGateway("A") 4973 s := runGatewayServer(o) 4974 defer s.Shutdown() 4975 4976 var servers []*Server 4977 servers = append(servers, s) 4978 4979 numServers := 10 4980 for i := 0; i < numServers; i++ { 4981 rn := fmt.Sprintf("RG_%d", i+1) 4982 o := testGatewayOptionsFromToWithServers(t, rn, "A", s) 4983 s := runGatewayServer(o) 4984 defer s.Shutdown() 4985 servers = append(servers, s) 4986 } 4987 4988 // Each server should have an outbound 4989 for _, s := range servers { 4990 waitForOutboundGateways(t, s, numServers, 2*time.Second) 4991 } 4992 // The first started server should have numServers inbounds (since 4993 // they all connect to it). 4994 waitForInboundGateways(t, s, numServers, 2*time.Second) 4995 4996 // Calculate in MB what we are using now. 4997 const max = 50 * 1024 * 1024 // 50MB 4998 runtime.ReadMemStats(&m) 4999 used := m.TotalAlloc - pta 5000 if used > max { 5001 t.Fatalf("Cluster using too much memory, expect < 50MB, got %dMB", used/(1024*1024)) 5002 } 5003 5004 for _, s := range servers { 5005 s.Shutdown() 5006 } 5007 } 5008 5009 func TestGatewayMapReplyOnlyForRecentSub(t *testing.T) { 5010 o2 := testDefaultOptionsForGateway("B") 5011 s2 := runGatewayServer(o2) 5012 defer s2.Shutdown() 5013 5014 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 5015 s1 := runGatewayServer(o1) 5016 defer s1.Shutdown() 5017 5018 waitForOutboundGateways(t, s1, 1, time.Second) 5019 waitForOutboundGateways(t, s2, 1, time.Second) 5020 5021 // Change s1's recent sub expiration default value 5022 s1.mu.Lock() 5023 s1.gateway.pasi.Lock() 5024 s1.gateway.recSubExp = 100 * time.Millisecond 5025 s1.gateway.pasi.Unlock() 5026 s1.mu.Unlock() 5027 5028 // Setup a replier on s2 5029 nc2 := natsConnect(t, fmt.Sprintf("nats://%s:%d", o2.Host, o2.Port)) 5030 defer nc2.Close() 5031 errCh := make(chan error, 1) 5032 natsSub(t, nc2, "foo", func(m *nats.Msg) { 5033 // Send reply regardless.. 5034 nc2.Publish(m.Reply, []byte("reply")) 5035 // Check that reply given to application is not mapped. 5036 if !strings.HasPrefix(m.Reply, nats.InboxPrefix) { 5037 errCh <- fmt.Errorf("Reply expected to have normal inbox, got %v", m.Reply) 5038 return 5039 } 5040 errCh <- nil 5041 }) 5042 natsFlush(t, nc2) 5043 checkExpectedSubs(t, 1, s2) 5044 5045 // Create requestor on s1 5046 nc1 := natsConnect(t, fmt.Sprintf("nats://%s:%d", o1.Host, o1.Port)) 5047 defer nc1.Close() 5048 // Send first request, reply should be mapped 5049 nc1.Request("foo", []byte("msg1"), time.Second) 5050 // Wait more than the recent sub expiration (that we have set to 100ms) 5051 time.Sleep(200 * time.Millisecond) 5052 // Send second request (reply should not be mapped) 5053 nc1.Request("foo", []byte("msg2"), time.Second) 5054 5055 select { 5056 case e := <-errCh: 5057 if e != nil { 5058 t.Fatalf(e.Error()) 5059 } 5060 case <-time.After(time.Second): 5061 t.Fatalf("Did not get replies") 5062 } 5063 } 5064 5065 type delayedWriteConn struct { 5066 sync.Mutex 5067 net.Conn 5068 bytes [][]byte 5069 delay bool 5070 wg sync.WaitGroup 5071 } 5072 5073 func (c *delayedWriteConn) Write(b []byte) (int, error) { 5074 c.Lock() 5075 defer c.Unlock() 5076 if c.delay || len(c.bytes) > 0 { 5077 c.bytes = append(c.bytes, append([]byte(nil), b...)) 5078 c.wg.Add(1) 5079 go func() { 5080 defer c.wg.Done() 5081 c.Lock() 5082 defer c.Unlock() 5083 if c.delay { 5084 c.Unlock() 5085 time.Sleep(100 * time.Millisecond) 5086 c.Lock() 5087 } 5088 if len(c.bytes) > 0 { 5089 b = c.bytes[0] 5090 c.bytes = c.bytes[1:] 5091 c.Conn.Write(b) 5092 } 5093 }() 5094 return len(b), nil 5095 } 5096 return c.Conn.Write(b) 5097 } 5098 5099 // This test uses a single account and makes sure that when 5100 // a reply subject is prefixed with $GR it comes back to 5101 // the origin cluster and delivered to proper reply subject 5102 // there, but also to subscribers on that reply subject 5103 // on the other cluster. 5104 func TestGatewaySendReplyAcrossGateways(t *testing.T) { 5105 for _, test := range []struct { 5106 name string 5107 poolSize int 5108 peracc bool 5109 }{ 5110 {"no pooling", -1, false}, 5111 {"pooling", 5, false}, 5112 {"per account", 0, true}, 5113 } { 5114 t.Run(test.name, func(t *testing.T) { 5115 ob := testDefaultOptionsForGateway("B") 5116 ob.Accounts = []*Account{NewAccount("ACC")} 5117 ob.Users = []*User{{Username: "user", Password: "pwd", Account: ob.Accounts[0]}} 5118 sb := runGatewayServer(ob) 5119 defer sb.Shutdown() 5120 5121 oa1 := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5122 oa1.Cluster.PoolSize = test.poolSize 5123 if test.peracc { 5124 oa1.Cluster.PinnedAccounts = []string{"ACC"} 5125 } 5126 oa1.Accounts = []*Account{NewAccount("ACC")} 5127 oa1.Users = []*User{{Username: "user", Password: "pwd", Account: oa1.Accounts[0]}} 5128 sa1 := runGatewayServer(oa1) 5129 defer sa1.Shutdown() 5130 5131 waitForOutboundGateways(t, sb, 1, time.Second) 5132 waitForInboundGateways(t, sb, 1, time.Second) 5133 waitForOutboundGateways(t, sa1, 1, time.Second) 5134 waitForInboundGateways(t, sa1, 1, time.Second) 5135 5136 // Now start another server in cluster "A". This will allow us 5137 // to test the reply from cluster "B" coming back directly to 5138 // the server where the request originates, and indirectly through 5139 // route. 5140 oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5141 oa2.Cluster.PoolSize = test.poolSize 5142 if test.peracc { 5143 oa2.Cluster.PinnedAccounts = []string{"ACC"} 5144 } 5145 oa2.Accounts = []*Account{NewAccount("ACC")} 5146 oa2.Users = []*User{{Username: "user", Password: "pwd", Account: oa2.Accounts[0]}} 5147 oa2.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa1.Cluster.Host, oa1.Cluster.Port)) 5148 sa2 := runGatewayServer(oa2) 5149 defer sa2.Shutdown() 5150 5151 waitForOutboundGateways(t, sa2, 1, time.Second) 5152 waitForInboundGateways(t, sb, 2, time.Second) 5153 checkClusterFormed(t, sa1, sa2) 5154 5155 replySubj := "bar" 5156 5157 // Setup a responder on sb 5158 ncb := natsConnect(t, fmt.Sprintf("nats://user:pwd@%s:%d", ob.Host, ob.Port)) 5159 defer ncb.Close() 5160 natsSub(t, ncb, "foo", func(m *nats.Msg) { 5161 m.Respond([]byte("reply")) 5162 }) 5163 // Set a subscription on the reply subject on sb 5164 subSB := natsSubSync(t, ncb, replySubj) 5165 natsFlush(t, ncb) 5166 checkExpectedSubs(t, 2, sb) 5167 5168 testReqReply := func(t *testing.T, host string, port int, createSubOnA bool) { 5169 t.Helper() 5170 nca := natsConnect(t, fmt.Sprintf("nats://user:pwd@%s:%d", host, port)) 5171 defer nca.Close() 5172 if createSubOnA { 5173 subSA := natsSubSync(t, nca, replySubj) 5174 natsPubReq(t, nca, "foo", replySubj, []byte("hello")) 5175 natsNexMsg(t, subSA, time.Second) 5176 // Check for duplicates 5177 if _, err := subSA.NextMsg(50 * time.Millisecond); err == nil { 5178 t.Fatalf("Received duplicate message on subSA!") 5179 } 5180 } else { 5181 natsPubReq(t, nca, "foo", replySubj, []byte("hello")) 5182 } 5183 natsNexMsg(t, subSB, time.Second) 5184 // Check for duplicates 5185 if _, err := subSB.NextMsg(50 * time.Millisecond); err == nil { 5186 t.Fatalf("Received duplicate message on subSB!") 5187 } 5188 } 5189 // Create requestor on sa1 to check for direct reply from GW: 5190 testReqReply(t, oa1.Host, oa1.Port, true) 5191 // Wait for subscription to be gone... 5192 checkExpectedSubs(t, 0, sa1) 5193 // Now create requestor on sa2, it will receive reply through sa1. 5194 testReqReply(t, oa2.Host, oa2.Port, true) 5195 checkExpectedSubs(t, 0, sa1) 5196 checkExpectedSubs(t, 0, sa2) 5197 5198 // Now issue requests but without any interest in the requestor's 5199 // origin cluster and make sure the other cluster gets the reply. 5200 testReqReply(t, oa1.Host, oa1.Port, false) 5201 testReqReply(t, oa2.Host, oa2.Port, false) 5202 5203 // There is a possible race between sa2 sending the RS+ for the 5204 // subscription on the reply subject, and the GW reply making it 5205 // to sa1 before the RS+ is processed there. 5206 // We are going to force this race by making the route connection 5207 // block as needed. 5208 5209 acc, _ := sa2.LookupAccount("ACC") 5210 acc.mu.RLock() 5211 api := acc.routePoolIdx 5212 acc.mu.RUnlock() 5213 5214 var route *client 5215 sa2.mu.Lock() 5216 if test.peracc { 5217 if conns, ok := sa2.accRoutes["ACC"]; ok { 5218 for _, r := range conns { 5219 route = r 5220 break 5221 } 5222 } 5223 } else if test.poolSize > 0 { 5224 sa2.forEachRoute(func(r *client) { 5225 r.mu.Lock() 5226 if r.route.poolIdx == api { 5227 route = r 5228 } 5229 r.mu.Unlock() 5230 }) 5231 } else if r := getFirstRoute(sa2); r != nil { 5232 route = r 5233 } 5234 sa2.mu.Unlock() 5235 route.mu.Lock() 5236 routeConn := &delayedWriteConn{ 5237 Conn: route.nc, 5238 wg: sync.WaitGroup{}, 5239 } 5240 route.nc = routeConn 5241 route.mu.Unlock() 5242 5243 delayRoute := func() { 5244 routeConn.Lock() 5245 routeConn.delay = true 5246 routeConn.Unlock() 5247 } 5248 stopDelayRoute := func() { 5249 routeConn.Lock() 5250 routeConn.delay = false 5251 wg := &routeConn.wg 5252 routeConn.Unlock() 5253 wg.Wait() 5254 } 5255 5256 delayRoute() 5257 testReqReply(t, oa2.Host, oa2.Port, true) 5258 stopDelayRoute() 5259 5260 // Same test but now we have a local interest on the reply subject 5261 // on sa1 to make sure that interest there does not prevent sending 5262 // the RMSG to sa2, which is the origin of the request. 5263 checkExpectedSubs(t, 0, sa1) 5264 checkExpectedSubs(t, 0, sa2) 5265 nca1 := natsConnect(t, fmt.Sprintf("nats://user:pwd@%s:%d", oa1.Host, oa1.Port)) 5266 defer nca1.Close() 5267 subSA1 := natsSubSync(t, nca1, replySubj) 5268 natsFlush(t, nca1) 5269 checkExpectedSubs(t, 1, sa1) 5270 checkExpectedSubs(t, 1, sa2) 5271 5272 delayRoute() 5273 testReqReply(t, oa2.Host, oa2.Port, true) 5274 stopDelayRoute() 5275 5276 natsNexMsg(t, subSA1, time.Second) 5277 }) 5278 } 5279 } 5280 5281 // This test will have a requestor on cluster A and responder 5282 // on cluster B, but when the responder sends the response, 5283 // it will also have a reply subject to receive a response 5284 // for the response. 5285 func TestGatewayPingPongReplyAcrossGateways(t *testing.T) { 5286 ob := testDefaultOptionsForGateway("B") 5287 ob.Accounts = []*Account{NewAccount("ACC")} 5288 ob.Users = []*User{{Username: "user", Password: "pwd", Account: ob.Accounts[0]}} 5289 sb := runGatewayServer(ob) 5290 defer sb.Shutdown() 5291 5292 oa1 := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5293 oa1.Accounts = []*Account{NewAccount("ACC")} 5294 oa1.Users = []*User{{Username: "user", Password: "pwd", Account: oa1.Accounts[0]}} 5295 sa1 := runGatewayServer(oa1) 5296 defer sa1.Shutdown() 5297 5298 waitForOutboundGateways(t, sb, 1, time.Second) 5299 waitForInboundGateways(t, sb, 1, time.Second) 5300 waitForOutboundGateways(t, sa1, 1, time.Second) 5301 waitForInboundGateways(t, sa1, 1, time.Second) 5302 5303 // Now start another server in cluster "A". This will allow us 5304 // to test the reply from cluster "B" coming back directly to 5305 // the server where the request originates, and indirectly through 5306 // route. 5307 oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5308 oa2.Accounts = []*Account{NewAccount("ACC")} 5309 oa2.Users = []*User{{Username: "user", Password: "pwd", Account: oa2.Accounts[0]}} 5310 oa2.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa1.Cluster.Host, oa1.Cluster.Port)) 5311 sa2 := runGatewayServer(oa2) 5312 defer sa2.Shutdown() 5313 5314 waitForOutboundGateways(t, sa2, 1, time.Second) 5315 waitForInboundGateways(t, sb, 2, time.Second) 5316 checkClusterFormed(t, sa1, sa2) 5317 5318 // Setup a responder on sb 5319 ncb := natsConnect(t, fmt.Sprintf("nats://user:pwd@%s:%d", ob.Host, ob.Port)) 5320 defer ncb.Close() 5321 sbReplySubj := "sbreply" 5322 subSB := natsSubSync(t, ncb, sbReplySubj) 5323 natsSub(t, ncb, "foo", func(m *nats.Msg) { 5324 ncb.PublishRequest(m.Reply, sbReplySubj, []byte("sb reply")) 5325 }) 5326 natsFlush(t, ncb) 5327 checkExpectedSubs(t, 2, sb) 5328 5329 testReqReply := func(t *testing.T, host string, port int) { 5330 t.Helper() 5331 nca := natsConnect(t, fmt.Sprintf("nats://user:pwd@%s:%d", host, port)) 5332 defer nca.Close() 5333 msg, err := nca.Request("foo", []byte("sa request"), time.Second) 5334 if err != nil { 5335 t.Fatalf("Did not get response: %v", err) 5336 } 5337 // Check response from sb, it should have content "sb reply" and 5338 // reply subject should not have GW prefix 5339 if string(msg.Data) != "sb reply" || msg.Reply != sbReplySubj { 5340 t.Fatalf("Unexpected message from sb: %+v", msg) 5341 } 5342 // Now send our own reply: 5343 nca.Publish(msg.Reply, []byte("sa reply")) 5344 // And make sure that subS2 receives it... 5345 msg = natsNexMsg(t, subSB, time.Second) 5346 if string(msg.Data) != "sa reply" || msg.Reply != _EMPTY_ { 5347 t.Fatalf("Unexpected message from sa: %v", msg) 5348 } 5349 } 5350 // Create requestor on sa1 to check for direct reply from GW: 5351 testReqReply(t, oa1.Host, oa1.Port) 5352 // Now from sa2 to see reply coming from route (sa1) 5353 testReqReply(t, oa2.Host, oa2.Port) 5354 } 5355 5356 // Similar to TestGatewaySendReplyAcrossGateways, but this time 5357 // with service import. 5358 func TestGatewaySendReplyAcrossGatewaysServiceImport(t *testing.T) { 5359 ob := testDefaultOptionsForGateway("B") 5360 setAccountUserPassInOptions(ob, "$foo", "clientBFoo", "password") 5361 setAccountUserPassInOptions(ob, "$bar", "clientBBar", "password") 5362 sb := runGatewayServer(ob) 5363 defer sb.Shutdown() 5364 5365 oa1 := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5366 oa1.Cluster.PoolSize = 1 5367 setAccountUserPassInOptions(oa1, "$foo", "clientAFoo", "password") 5368 setAccountUserPassInOptions(oa1, "$bar", "clientABar", "password") 5369 sa1 := runGatewayServer(oa1) 5370 defer sa1.Shutdown() 5371 5372 waitForOutboundGateways(t, sb, 1, time.Second) 5373 waitForInboundGateways(t, sb, 1, time.Second) 5374 waitForOutboundGateways(t, sa1, 1, time.Second) 5375 waitForInboundGateways(t, sa1, 1, time.Second) 5376 5377 oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5378 oa2.Cluster.PoolSize = 1 5379 setAccountUserPassInOptions(oa2, "$foo", "clientAFoo", "password") 5380 setAccountUserPassInOptions(oa2, "$bar", "clientABar", "password") 5381 oa2.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa1.Cluster.Host, oa1.Cluster.Port)) 5382 sa2 := runGatewayServer(oa2) 5383 defer sa2.Shutdown() 5384 5385 oa3 := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5386 oa3.Cluster.PoolSize = 1 5387 setAccountUserPassInOptions(oa3, "$foo", "clientAFoo", "password") 5388 setAccountUserPassInOptions(oa3, "$bar", "clientABar", "password") 5389 oa3.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa1.Cluster.Host, oa1.Cluster.Port)) 5390 sa3 := runGatewayServer(oa3) 5391 defer sa3.Shutdown() 5392 5393 waitForOutboundGateways(t, sa2, 1, time.Second) 5394 waitForOutboundGateways(t, sa3, 1, time.Second) 5395 waitForInboundGateways(t, sb, 3, time.Second) 5396 checkClusterFormed(t, sa1, sa2, sa3) 5397 5398 // Setup account on B 5399 fooB, _ := sb.LookupAccount("$foo") 5400 // Add in the service export for the requests. Make it public. 5401 fooB.AddServiceExport("foo.request", nil) 5402 5403 // Setup accounts on sa1, sa2 and sa3 5404 setupAccsOnA := func(s *Server) { 5405 // Get accounts 5406 fooA, _ := s.LookupAccount("$foo") 5407 barA, _ := s.LookupAccount("$bar") 5408 // Add in the service export for the requests. Make it public. 5409 fooA.AddServiceExport("foo.request", nil) 5410 // Add import abilities to server A's bar account from foo. 5411 if err := barA.AddServiceImport(fooA, "bar.request", "foo.request"); err != nil { 5412 t.Fatalf("Error adding service import: %v", err) 5413 } 5414 } 5415 setupAccsOnA(sa1) 5416 setupAccsOnA(sa2) 5417 setupAccsOnA(sa3) 5418 5419 // clientB will be connected to sb and be the service endpoint and responder. 5420 bURL := fmt.Sprintf("nats://clientBFoo:password@127.0.0.1:%d", ob.Port) 5421 clientBFoo := natsConnect(t, bURL) 5422 defer clientBFoo.Close() 5423 subBFoo := natsSubSync(t, clientBFoo, "foo.request") 5424 natsFlush(t, clientBFoo) 5425 5426 // Create another client on B for account $bar that will listen to 5427 // the reply subject. 5428 bURL = fmt.Sprintf("nats://clientBBar:password@127.0.0.1:%d", ob.Port) 5429 clientBBar := natsConnect(t, bURL) 5430 defer clientBBar.Close() 5431 replySubj := "reply" 5432 subBReply := natsSubSync(t, clientBBar, replySubj) 5433 natsFlush(t, clientBBar) 5434 5435 testServiceImport := func(t *testing.T, host string, port int) { 5436 t.Helper() 5437 bURL := fmt.Sprintf("nats://clientABar:password@%s:%d", host, port) 5438 clientABar := natsConnect(t, bURL) 5439 defer clientABar.Close() 5440 subAReply := natsSubSync(t, clientABar, replySubj) 5441 natsFlush(t, clientABar) 5442 5443 // Send the request from clientA on bar.request, which 5444 // will be translated to foo.request and sent over. 5445 natsPubReq(t, clientABar, "bar.request", replySubj, []byte("hi")) 5446 natsFlush(t, clientABar) 5447 5448 // Expect the request to be received on subAFoo 5449 msg, err := subBFoo.NextMsg(time.Second) 5450 if err != nil { 5451 t.Fatalf("subBFoo failed to get request: %v", err) 5452 } 5453 if msg.Subject != "foo.request" || string(msg.Data) != "hi" { 5454 t.Fatalf("Unexpected message: %v", msg) 5455 } 5456 if msg.Reply == replySubj { 5457 t.Fatalf("Expected randomized reply, but got original") 5458 } 5459 5460 // Check for duplicate message 5461 if msg, err := subBFoo.NextMsg(100 * time.Millisecond); err != nats.ErrTimeout { 5462 t.Fatalf("Unexpected msg: %v", msg) 5463 } 5464 5465 // Send reply 5466 natsPub(t, clientBFoo, msg.Reply, []byte("ok-42")) 5467 natsFlush(t, clientBFoo) 5468 5469 // Now check that the subscription on the reply receives the message... 5470 checkReply := func(t *testing.T, sub *nats.Subscription) { 5471 t.Helper() 5472 msg, err = sub.NextMsg(time.Second) 5473 if err != nil { 5474 t.Fatalf("sub failed to get reply: %v", err) 5475 } 5476 if msg.Subject != replySubj || string(msg.Data) != "ok-42" { 5477 t.Fatalf("Unexpected message: %v", msg) 5478 } 5479 } 5480 // Check subscription on A (where the request originated) 5481 checkReply(t, subAReply) 5482 // And the subscription on B (where the responder is located) 5483 checkReply(t, subBReply) 5484 } 5485 5486 // We check the service import with GW working ok with either 5487 // direct connection between the responder's server to the 5488 // requestor's server and also through routes. 5489 testServiceImport(t, oa1.Host, oa1.Port) 5490 testServiceImport(t, oa2.Host, oa2.Port) 5491 // sa1 is the one receiving the reply from GW between B and A. 5492 // Check that the server routes directly to the server 5493 // with the interest. 5494 checkRoute := func(t *testing.T, s *Server, expected int64) { 5495 t.Helper() 5496 s.mu.Lock() 5497 defer s.mu.Unlock() 5498 s.forEachRoute(func(r *client) { 5499 r.mu.Lock() 5500 if r.route.remoteID != sa1.ID() { 5501 r.mu.Unlock() 5502 return 5503 } 5504 inMsgs := atomic.LoadInt64(&r.inMsgs) 5505 r.mu.Unlock() 5506 if inMsgs != expected { 5507 t.Fatalf("Expected %v incoming msgs, got %v", expected, inMsgs) 5508 } 5509 }) 5510 } 5511 // Wait a bit to make sure that we don't have a loop that 5512 // cause messages to be routed more than needed. 5513 time.Sleep(100 * time.Millisecond) 5514 checkRoute(t, sa2, 1) 5515 checkRoute(t, sa3, 0) 5516 5517 testServiceImport(t, oa3.Host, oa3.Port) 5518 // Wait a bit to make sure that we don't have a loop that 5519 // cause messages to be routed more than needed. 5520 time.Sleep(100 * time.Millisecond) 5521 checkRoute(t, sa2, 1) 5522 checkRoute(t, sa3, 1) 5523 } 5524 5525 func TestGatewayClientsDontReceiveMsgsOnGWPrefix(t *testing.T) { 5526 ob := testDefaultOptionsForGateway("B") 5527 sb := runGatewayServer(ob) 5528 defer sb.Shutdown() 5529 5530 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5531 sa := runGatewayServer(oa) 5532 defer sa.Shutdown() 5533 5534 waitForOutboundGateways(t, sa, 1, time.Second) 5535 waitForInboundGateways(t, sa, 1, time.Second) 5536 waitForOutboundGateways(t, sb, 1, time.Second) 5537 waitForInboundGateways(t, sb, 1, time.Second) 5538 5539 // Setup a responder on sb 5540 ncb := natsConnect(t, fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)) 5541 defer ncb.Close() 5542 natsSub(t, ncb, "foo", func(m *nats.Msg) { 5543 if strings.HasPrefix(m.Reply, gwReplyPrefix) { 5544 m.Respond([]byte(fmt.Sprintf("-ERR: received request with mapped reply subject %q", m.Reply))) 5545 } else { 5546 m.Respond([]byte("+OK: reply")) 5547 } 5548 }) 5549 // And create a sub on ">" that should not get the $GR reply. 5550 subSB := natsSubSync(t, ncb, ">") 5551 natsFlush(t, ncb) 5552 checkExpectedSubs(t, 2, sb) 5553 5554 nca := natsConnect(t, fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port)) 5555 defer nca.Close() 5556 msg, err := nca.Request("foo", []byte("request"), time.Second) 5557 if err != nil { 5558 t.Fatalf("Did not get response: %v", err) 5559 } 5560 if string(msg.Data) != "+OK: reply" { 5561 t.Fatalf("Error from responder: %q", msg.Data) 5562 } 5563 5564 // subSB would have also received the request, so drop that one. 5565 msg = natsNexMsg(t, subSB, time.Second) 5566 if string(msg.Data) != "request" { 5567 t.Fatalf("Wrong request: %q", msg.Data) 5568 } 5569 // Once sa gets the direct reply, it should resend the reply 5570 // with normal subject. So subSB should get the message with 5571 // a subject that does not start with $GNR prefix. 5572 msg = natsNexMsg(t, subSB, time.Second) 5573 if string(msg.Data) != "+OK: reply" || strings.HasPrefix(msg.Subject, gwReplyPrefix) { 5574 t.Fatalf("Unexpected message from sa: %v", msg) 5575 } 5576 // Check no more message... 5577 if m, err := subSB.NextMsg(100 * time.Millisecond); m != nil || err == nil { 5578 t.Fatalf("Expected only 1 message, got %+v", m) 5579 } 5580 } 5581 5582 func TestGatewayNoAccInterestThenQSubThenRegularSub(t *testing.T) { 5583 GatewayDoNotForceInterestOnlyMode(true) 5584 defer GatewayDoNotForceInterestOnlyMode(false) 5585 5586 ob := testDefaultOptionsForGateway("B") 5587 sb := runGatewayServer(ob) 5588 defer sb.Shutdown() 5589 5590 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5591 sa := runGatewayServer(oa) 5592 defer sa.Shutdown() 5593 5594 waitForOutboundGateways(t, sa, 1, time.Second) 5595 waitForInboundGateways(t, sa, 1, time.Second) 5596 waitForOutboundGateways(t, sb, 1, time.Second) 5597 waitForInboundGateways(t, sb, 1, time.Second) 5598 5599 // Connect on A and send a message 5600 ncA := natsConnect(t, fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port)) 5601 defer ncA.Close() 5602 natsPub(t, ncA, "foo", []byte("hello")) 5603 natsFlush(t, ncA) 5604 5605 // expect an A- on return 5606 gwb := sa.getOutboundGatewayConnection("B") 5607 checkForAccountNoInterest(t, gwb, globalAccountName, true, time.Second) 5608 5609 // Create a connection o B, and create a queue sub first 5610 ncB := natsConnect(t, fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)) 5611 defer ncB.Close() 5612 qsub := natsQueueSubSync(t, ncB, "bar", "queue") 5613 natsFlush(t, ncB) 5614 5615 // A should have received a queue interest 5616 checkForRegisteredQSubInterest(t, sa, "B", globalAccountName, "bar", 1, time.Second) 5617 5618 // Now on B, create a regular sub 5619 sub := natsSubSync(t, ncB, "baz") 5620 natsFlush(t, ncB) 5621 5622 // From A now, produce a message on each subject and 5623 // expect both subs to receive their message. 5624 msgForQSub := []byte("msg_qsub") 5625 natsPub(t, ncA, "bar", msgForQSub) 5626 natsFlush(t, ncA) 5627 5628 if msg := natsNexMsg(t, qsub, time.Second); !bytes.Equal(msgForQSub, msg.Data) { 5629 t.Fatalf("Expected msg for queue sub to be %q, got %q", msgForQSub, msg.Data) 5630 } 5631 5632 // Publish for the regular sub 5633 msgForSub := []byte("msg_sub") 5634 natsPub(t, ncA, "baz", msgForSub) 5635 natsFlush(t, ncA) 5636 5637 if msg := natsNexMsg(t, sub, time.Second); !bytes.Equal(msgForSub, msg.Data) { 5638 t.Fatalf("Expected msg for sub to be %q, got %q", msgForSub, msg.Data) 5639 } 5640 } 5641 5642 // Similar to TestGatewayNoAccInterestThenQSubThenRegularSub but simulate 5643 // older incorrect behavior. 5644 func TestGatewayHandleUnexpectedASubUnsub(t *testing.T) { 5645 GatewayDoNotForceInterestOnlyMode(true) 5646 defer GatewayDoNotForceInterestOnlyMode(false) 5647 5648 ob := testDefaultOptionsForGateway("B") 5649 sb := runGatewayServer(ob) 5650 defer sb.Shutdown() 5651 5652 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5653 sa := runGatewayServer(oa) 5654 defer sa.Shutdown() 5655 5656 waitForOutboundGateways(t, sa, 1, time.Second) 5657 waitForInboundGateways(t, sa, 1, time.Second) 5658 waitForOutboundGateways(t, sb, 1, time.Second) 5659 waitForInboundGateways(t, sb, 1, time.Second) 5660 5661 // Connect on A and send a message 5662 ncA := natsConnect(t, fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port)) 5663 defer ncA.Close() 5664 natsPub(t, ncA, "foo", []byte("hello")) 5665 natsFlush(t, ncA) 5666 5667 // expect an A- on return 5668 gwb := sa.getOutboundGatewayConnection("B") 5669 checkForAccountNoInterest(t, gwb, globalAccountName, true, time.Second) 5670 5671 // Create a connection o B, and create a queue sub first 5672 ncB := natsConnect(t, fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)) 5673 defer ncB.Close() 5674 qsub := natsQueueSubSync(t, ncB, "bar", "queue") 5675 natsFlush(t, ncB) 5676 5677 // A should have received a queue interest 5678 checkForRegisteredQSubInterest(t, sa, "B", globalAccountName, "bar", 1, time.Second) 5679 5680 // Now on B, create a regular sub 5681 sub := natsSubSync(t, ncB, "baz") 5682 natsFlush(t, ncB) 5683 // and reproduce old, wrong, behavior that would have resulted in sending an A- 5684 gwA := getInboundGatewayConnection(sb, "A") 5685 gwA.mu.Lock() 5686 gwA.enqueueProto([]byte("A- $G\r\n")) 5687 gwA.mu.Unlock() 5688 5689 // From A now, produce a message on each subject and 5690 // expect both subs to receive their message. 5691 msgForQSub := []byte("msg_qsub") 5692 natsPub(t, ncA, "bar", msgForQSub) 5693 natsFlush(t, ncA) 5694 5695 if msg := natsNexMsg(t, qsub, time.Second); !bytes.Equal(msgForQSub, msg.Data) { 5696 t.Fatalf("Expected msg for queue sub to be %q, got %q", msgForQSub, msg.Data) 5697 } 5698 5699 // Publish for the regular sub 5700 msgForSub := []byte("msg_sub") 5701 natsPub(t, ncA, "baz", msgForSub) 5702 natsFlush(t, ncA) 5703 5704 if msg := natsNexMsg(t, sub, time.Second); !bytes.Equal(msgForSub, msg.Data) { 5705 t.Fatalf("Expected msg for sub to be %q, got %q", msgForSub, msg.Data) 5706 } 5707 5708 // Remove all subs on B. 5709 qsub.Unsubscribe() 5710 sub.Unsubscribe() 5711 ncB.Flush() 5712 5713 // Produce a message from A expect A- 5714 natsPub(t, ncA, "foo", []byte("hello")) 5715 natsFlush(t, ncA) 5716 5717 // expect an A- on return 5718 checkForAccountNoInterest(t, gwb, globalAccountName, true, time.Second) 5719 5720 // Simulate B sending another A-, on A account no interest should remain same. 5721 gwA.mu.Lock() 5722 gwA.enqueueProto([]byte("A- $G\r\n")) 5723 gwA.mu.Unlock() 5724 5725 checkForAccountNoInterest(t, gwb, globalAccountName, true, time.Second) 5726 5727 // Create a queue sub on B 5728 qsub = natsQueueSubSync(t, ncB, "bar", "queue") 5729 natsFlush(t, ncB) 5730 5731 checkForRegisteredQSubInterest(t, sa, "B", globalAccountName, "bar", 1, time.Second) 5732 5733 // Make B send an A+ and verify that we sitll have the registered qsub interest 5734 gwA.mu.Lock() 5735 gwA.enqueueProto([]byte("A+ $G\r\n")) 5736 gwA.mu.Unlock() 5737 5738 // Give a chance to A to possibly misbehave when receiving this proto 5739 time.Sleep(250 * time.Millisecond) 5740 // Now check interest is still there 5741 checkForRegisteredQSubInterest(t, sa, "B", globalAccountName, "bar", 1, time.Second) 5742 5743 qsub.Unsubscribe() 5744 natsFlush(t, ncB) 5745 checkForRegisteredQSubInterest(t, sa, "B", globalAccountName, "bar", 0, time.Second) 5746 5747 // Send A-, server A should set entry to nil 5748 gwA.mu.Lock() 5749 gwA.enqueueProto([]byte("A- $G\r\n")) 5750 gwA.mu.Unlock() 5751 checkForAccountNoInterest(t, gwb, globalAccountName, true, time.Second) 5752 5753 // Send A+ and entry should be removed since there is no longer reason to 5754 // keep the entry. 5755 gwA.mu.Lock() 5756 gwA.enqueueProto([]byte("A+ $G\r\n")) 5757 gwA.mu.Unlock() 5758 checkForAccountNoInterest(t, gwb, globalAccountName, false, time.Second) 5759 5760 // Last A+ should not change because account already removed from map. 5761 gwA.mu.Lock() 5762 gwA.enqueueProto([]byte("A+ $G\r\n")) 5763 gwA.mu.Unlock() 5764 checkForAccountNoInterest(t, gwb, globalAccountName, false, time.Second) 5765 } 5766 5767 type captureGWInterestSwitchLogger struct { 5768 DummyLogger 5769 imss []string 5770 } 5771 5772 func (l *captureGWInterestSwitchLogger) Debugf(format string, args ...any) { 5773 l.Lock() 5774 msg := fmt.Sprintf(format, args...) 5775 if strings.Contains(msg, fmt.Sprintf("switching account %q to %s mode", globalAccountName, InterestOnly)) || 5776 strings.Contains(msg, fmt.Sprintf("switching account %q to %s mode complete", globalAccountName, InterestOnly)) { 5777 l.imss = append(l.imss, msg) 5778 } 5779 l.Unlock() 5780 } 5781 5782 func TestGatewayLogAccountInterestModeSwitch(t *testing.T) { 5783 GatewayDoNotForceInterestOnlyMode(true) 5784 defer GatewayDoNotForceInterestOnlyMode(false) 5785 5786 ob := testDefaultOptionsForGateway("B") 5787 sb := runGatewayServer(ob) 5788 defer sb.Shutdown() 5789 5790 logB := &captureGWInterestSwitchLogger{} 5791 sb.SetLogger(logB, true, true) 5792 5793 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5794 sa := runGatewayServer(oa) 5795 defer sa.Shutdown() 5796 5797 logA := &captureGWInterestSwitchLogger{} 5798 sa.SetLogger(logA, true, true) 5799 5800 waitForOutboundGateways(t, sa, 1, 2*time.Second) 5801 waitForInboundGateways(t, sa, 1, 2*time.Second) 5802 waitForOutboundGateways(t, sb, 1, 2*time.Second) 5803 waitForInboundGateways(t, sb, 1, 2*time.Second) 5804 5805 ncB := natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", ob.Port)) 5806 defer ncB.Close() 5807 natsSubSync(t, ncB, "foo") 5808 natsFlush(t, ncB) 5809 5810 ncA := natsConnect(t, fmt.Sprintf("nats://127.0.0.1:%d", oa.Port)) 5811 defer ncA.Close() 5812 for i := 0; i < gatewayMaxRUnsubBeforeSwitch+10; i++ { 5813 subj := fmt.Sprintf("bar.%d", i) 5814 natsPub(t, ncA, subj, []byte("hello")) 5815 } 5816 natsFlush(t, ncA) 5817 5818 gwA := getInboundGatewayConnection(sb, "A") 5819 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 5820 mode := Optimistic 5821 gwA.mu.Lock() 5822 e := gwA.gw.insim[globalAccountName] 5823 if e != nil { 5824 mode = e.mode 5825 } 5826 gwA.mu.Unlock() 5827 if mode != InterestOnly { 5828 return fmt.Errorf("not switched yet") 5829 } 5830 return nil 5831 }) 5832 5833 checkGWInterestOnlyMode(t, sa, "B", globalAccountName) 5834 5835 checkLog := func(t *testing.T, l *captureGWInterestSwitchLogger) { 5836 t.Helper() 5837 l.Lock() 5838 logs := append([]string(nil), l.imss...) 5839 l.Unlock() 5840 5841 if len(logs) != 2 { 5842 t.Fatalf("Expected 2 logs about switching to interest-only, got %v", logs) 5843 } 5844 if !strings.Contains(logs[0], "switching account") { 5845 t.Fatalf("First log statement should have been about switching, got %v", logs[0]) 5846 } 5847 if !strings.Contains(logs[1], "complete") { 5848 t.Fatalf("Second log statement should have been about having switched, got %v", logs[1]) 5849 } 5850 } 5851 checkLog(t, logB) 5852 checkLog(t, logA) 5853 5854 // Clear log of server B 5855 logB.Lock() 5856 logB.imss = nil 5857 logB.Unlock() 5858 5859 // Force a switch on B to inbound gateway from A and make sure that it is 5860 // a no-op since this gateway connection has already been switched. 5861 sb.switchAccountToInterestMode(globalAccountName) 5862 5863 logB.Lock() 5864 didSwitch := len(logB.imss) > 0 5865 logB.Unlock() 5866 if didSwitch { 5867 t.Fatalf("Attempted to switch while it was already in interest mode only") 5868 } 5869 } 5870 5871 func TestGatewayAccountInterestModeSwitchOnlyOncePerAccount(t *testing.T) { 5872 GatewayDoNotForceInterestOnlyMode(true) 5873 defer GatewayDoNotForceInterestOnlyMode(false) 5874 5875 ob := testDefaultOptionsForGateway("B") 5876 sb := runGatewayServer(ob) 5877 defer sb.Shutdown() 5878 5879 logB := &captureGWInterestSwitchLogger{} 5880 sb.SetLogger(logB, true, true) 5881 5882 nc := natsConnect(t, sb.ClientURL()) 5883 defer nc.Close() 5884 natsSubSync(t, nc, "foo") 5885 natsQueueSubSync(t, nc, "bar", "baz") 5886 5887 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5888 sa := runGatewayServer(oa) 5889 defer sa.Shutdown() 5890 5891 waitForOutboundGateways(t, sa, 1, 2*time.Second) 5892 waitForInboundGateways(t, sa, 1, 2*time.Second) 5893 waitForOutboundGateways(t, sb, 1, 2*time.Second) 5894 waitForInboundGateways(t, sb, 1, 2*time.Second) 5895 5896 wg := sync.WaitGroup{} 5897 total := 20 5898 wg.Add(total) 5899 for i := 0; i < total; i++ { 5900 go func() { 5901 sb.switchAccountToInterestMode(globalAccountName) 5902 wg.Done() 5903 }() 5904 } 5905 wg.Wait() 5906 time.Sleep(50 * time.Millisecond) 5907 logB.Lock() 5908 nl := len(logB.imss) 5909 logB.Unlock() 5910 // There should be a trace for switching and when switch is complete 5911 if nl != 2 { 5912 t.Fatalf("Attempted to switch account too many times, number lines=%v", nl) 5913 } 5914 } 5915 5916 func TestGatewaySingleOutbound(t *testing.T) { 5917 l, err := natsListen("tcp", "127.0.0.1:0") 5918 if err != nil { 5919 t.Fatalf("Error on listen: %v", err) 5920 } 5921 defer l.Close() 5922 port := l.Addr().(*net.TCPAddr).Port 5923 5924 oa := testGatewayOptionsFromToWithTLS(t, "A", "B", []string{fmt.Sprintf("nats://127.0.0.1:%d", port)}) 5925 oa.Gateway.TLSTimeout = 0.1 5926 sa := runGatewayServer(oa) 5927 defer sa.Shutdown() 5928 5929 // Wait a bit for reconnections 5930 time.Sleep(500 * time.Millisecond) 5931 5932 // Now prepare gateway B to take place of the bare listener. 5933 ob := testGatewayOptionsWithTLS(t, "B") 5934 // There is a risk that when stopping the listener and starting 5935 // the actual server, that port is being reused by some other process. 5936 ob.Gateway.Port = port 5937 l.Close() 5938 sb := runGatewayServer(ob) 5939 defer sb.Shutdown() 5940 5941 // To make sure that we don't fail, bump the TLSTimeout now. 5942 cfg := sa.getRemoteGateway("B") 5943 cfg.Lock() 5944 cfg.TLSTimeout = 2.0 5945 cfg.Unlock() 5946 5947 waitForOutboundGateways(t, sa, 1, time.Second) 5948 sa.gateway.Lock() 5949 lm := len(sa.gateway.out) 5950 sa.gateway.Unlock() 5951 if lm != 1 { 5952 t.Fatalf("Expected 1 outbound, got %v", lm) 5953 } 5954 } 5955 5956 func TestGatewayReplyMapTracking(t *testing.T) { 5957 // Increase the recSubExp value on servers so we have time 5958 // to check the replies mapping structures. 5959 subExp := 400 * time.Millisecond 5960 setRecSub := func(s *Server) { 5961 s.gateway.pasi.Lock() 5962 s.gateway.recSubExp = subExp 5963 s.gateway.pasi.Unlock() 5964 } 5965 5966 ob := testDefaultOptionsForGateway("B") 5967 sb := runGatewayServer(ob) 5968 defer sb.Shutdown() 5969 setRecSub(sb) 5970 5971 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 5972 sa := runGatewayServer(oa) 5973 defer sa.Shutdown() 5974 setRecSub(sa) 5975 5976 waitForOutboundGateways(t, sa, 1, 2*time.Second) 5977 waitForInboundGateways(t, sa, 1, 2*time.Second) 5978 waitForOutboundGateways(t, sb, 1, 2*time.Second) 5979 waitForInboundGateways(t, sb, 1, 2*time.Second) 5980 5981 ncb := natsConnect(t, sb.ClientURL()) 5982 defer ncb.Close() 5983 count := 0 5984 total := 100 5985 ch := make(chan bool, 1) 5986 natsSub(t, ncb, "foo", func(m *nats.Msg) { 5987 m.Respond([]byte("reply")) 5988 if count++; count == total { 5989 ch <- true 5990 } 5991 }) 5992 natsFlush(t, ncb) 5993 5994 var bc *client 5995 sb.mu.Lock() 5996 for _, c := range sb.clients { 5997 bc = c 5998 break 5999 } 6000 sb.mu.Unlock() 6001 6002 nca := natsConnect(t, sa.ClientURL()) 6003 defer nca.Close() 6004 6005 replySub := natsSubSync(t, nca, "bar.>") 6006 for i := 0; i < total; i++ { 6007 nca.PublishRequest("foo", fmt.Sprintf("bar.%d", i), []byte("request")) 6008 } 6009 6010 waitCh(t, ch, "Did not receive all requests") 6011 6012 check := func(t *testing.T, expectedIndicator int32, expectLenMap int, expectedSrvMapEmpty bool) { 6013 t.Helper() 6014 bc.mu.Lock() 6015 mapIndicator := atomic.LoadInt32(&bc.gwReplyMapping.check) 6016 var lenMap int 6017 if bc.gwReplyMapping.mapping != nil { 6018 lenMap = len(bc.gwReplyMapping.mapping) 6019 } 6020 bc.mu.Unlock() 6021 if mapIndicator != expectedIndicator { 6022 t.Fatalf("Client should map indicator should be %v, got %v", expectedIndicator, mapIndicator) 6023 } 6024 if lenMap != expectLenMap { 6025 t.Fatalf("Client map should have %v entries, got %v", expectLenMap, lenMap) 6026 } 6027 srvMapEmpty := true 6028 sb.gwrm.m.Range(func(_, _ any) bool { 6029 srvMapEmpty = false 6030 return false 6031 }) 6032 if srvMapEmpty != expectedSrvMapEmpty { 6033 t.Fatalf("Expected server map to be empty=%v, got %v", expectedSrvMapEmpty, srvMapEmpty) 6034 } 6035 } 6036 // Check that indicator is set and that there "total" entries in the map 6037 // and that srv map is not empty 6038 check(t, 1, total, false) 6039 6040 // Receive all replies 6041 for i := 0; i < total; i++ { 6042 natsNexMsg(t, replySub, time.Second) 6043 } 6044 6045 // Wait until entries expire 6046 time.Sleep(2*subExp + 100*time.Millisecond) 6047 6048 // Now check again. 6049 check(t, 0, 0, true) 6050 } 6051 6052 func TestGatewayNoAccountUnsubWhenServiceReplyInUse(t *testing.T) { 6053 oa := testDefaultOptionsForGateway("A") 6054 setAccountUserPassInOptions(oa, "$foo", "clientFoo", "password") 6055 setAccountUserPassInOptions(oa, "$bar", "clientBar", "password") 6056 sa := runGatewayServer(oa) 6057 defer sa.Shutdown() 6058 6059 ob := testGatewayOptionsFromToWithServers(t, "B", "A", sa) 6060 setAccountUserPassInOptions(ob, "$foo", "clientFoo", "password") 6061 setAccountUserPassInOptions(ob, "$bar", "clientBar", "password") 6062 sb := runGatewayServer(ob) 6063 defer sb.Shutdown() 6064 6065 waitForOutboundGateways(t, sa, 1, time.Second) 6066 waitForOutboundGateways(t, sb, 1, time.Second) 6067 waitForInboundGateways(t, sa, 1, time.Second) 6068 waitForInboundGateways(t, sb, 1, time.Second) 6069 6070 // Get accounts 6071 fooA, _ := sa.LookupAccount("$foo") 6072 barA, _ := sa.LookupAccount("$bar") 6073 fooB, _ := sb.LookupAccount("$foo") 6074 barB, _ := sb.LookupAccount("$bar") 6075 6076 // Add in the service export for the requests. Make it public. 6077 fooA.AddServiceExport("test.request", nil) 6078 fooB.AddServiceExport("test.request", nil) 6079 6080 // Add import abilities to server B's bar account from foo. 6081 if err := barB.AddServiceImport(fooB, "foo.request", "test.request"); err != nil { 6082 t.Fatalf("Error adding service import: %v", err) 6083 } 6084 // Same on A. 6085 if err := barA.AddServiceImport(fooA, "foo.request", "test.request"); err != nil { 6086 t.Fatalf("Error adding service import: %v", err) 6087 } 6088 6089 // clientA will be connected to srvA and be the service endpoint and responder. 6090 aURL := fmt.Sprintf("nats://clientFoo:password@127.0.0.1:%d", oa.Port) 6091 clientA := natsConnect(t, aURL) 6092 defer clientA.Close() 6093 6094 natsSub(t, clientA, "test.request", func(m *nats.Msg) { 6095 m.Respond([]byte("reply")) 6096 }) 6097 natsFlush(t, clientA) 6098 6099 // Now setup client B on srvB who will send the requests. 6100 bURL := fmt.Sprintf("nats://clientBar:password@127.0.0.1:%d", ob.Port) 6101 clientB := natsConnect(t, bURL) 6102 defer clientB.Close() 6103 6104 if _, err := clientB.Request("foo.request", []byte("request"), time.Second); err != nil { 6105 t.Fatalf("Did not get the reply: %v", err) 6106 } 6107 6108 quitCh := make(chan bool, 1) 6109 wg := sync.WaitGroup{} 6110 wg.Add(1) 6111 go func() { 6112 defer wg.Done() 6113 6114 for { 6115 select { 6116 case <-quitCh: 6117 return 6118 default: 6119 clientA.Publish("any.subject", []byte("any message")) 6120 time.Sleep(time.Millisecond) 6121 } 6122 } 6123 }() 6124 for i := 0; i < 1000; i++ { 6125 if _, err := clientB.Request("foo.request", []byte("request"), time.Second); err != nil { 6126 t.Fatalf("Did not get the reply: %v", err) 6127 } 6128 } 6129 close(quitCh) 6130 wg.Wait() 6131 } 6132 6133 func TestGatewayCloseTLSConnection(t *testing.T) { 6134 oa := testGatewayOptionsWithTLS(t, "A") 6135 oa.DisableShortFirstPing = true 6136 oa.Gateway.TLSConfig.ClientAuth = tls.NoClientCert 6137 oa.Gateway.TLSTimeout = 100 6138 sa := runGatewayServer(oa) 6139 defer sa.Shutdown() 6140 6141 ob1 := testGatewayOptionsFromToWithTLS(t, "B", "A", []string{fmt.Sprintf("nats://127.0.0.1:%d", sa.GatewayAddr().Port)}) 6142 sb1 := runGatewayServer(ob1) 6143 defer sb1.Shutdown() 6144 6145 waitForOutboundGateways(t, sa, 1, 2*time.Second) 6146 waitForInboundGateways(t, sa, 1, 2*time.Second) 6147 waitForOutboundGateways(t, sb1, 1, 2*time.Second) 6148 waitForInboundGateways(t, sb1, 1, 2*time.Second) 6149 6150 endpoint := fmt.Sprintf("%s:%d", oa.Gateway.Host, oa.Gateway.Port) 6151 conn, err := net.DialTimeout("tcp", endpoint, 2*time.Second) 6152 if err != nil { 6153 t.Fatalf("Unexpected error on dial: %v", err) 6154 } 6155 defer conn.Close() 6156 6157 tlsConn := tls.Client(conn, &tls.Config{InsecureSkipVerify: true}) 6158 defer tlsConn.Close() 6159 if err := tlsConn.Handshake(); err != nil { 6160 t.Fatalf("Unexpected error during handshake: %v", err) 6161 } 6162 connectOp := []byte("CONNECT {\"name\":\"serverID\",\"verbose\":false,\"pedantic\":false,\"tls_required\":true,\"gateway\":\"B\"}\r\n") 6163 if _, err := tlsConn.Write(connectOp); err != nil { 6164 t.Fatalf("Unexpected error writing CONNECT: %v", err) 6165 } 6166 infoOp := []byte("INFO {\"server_id\":\"serverID\",\"tls_required\":true,\"gateway\":\"B\",\"gateway_nrp\":true}\r\n") 6167 if _, err := tlsConn.Write(infoOp); err != nil { 6168 t.Fatalf("Unexpected error writing CONNECT: %v", err) 6169 } 6170 if _, err := tlsConn.Write([]byte("PING\r\n")); err != nil { 6171 t.Fatalf("Unexpected error writing PING: %v", err) 6172 } 6173 6174 // Get gw connection 6175 var gw *client 6176 checkFor(t, time.Second, 15*time.Millisecond, func() error { 6177 sa.gateway.RLock() 6178 for _, g := range sa.gateway.in { 6179 g.mu.Lock() 6180 if g.opts.Name == "serverID" { 6181 gw = g 6182 } 6183 g.mu.Unlock() 6184 break 6185 } 6186 sa.gateway.RUnlock() 6187 if gw == nil { 6188 return fmt.Errorf("No gw registered yet") 6189 } 6190 return nil 6191 }) 6192 // Fill the buffer. We want to timeout on write so that nc.Close() 6193 // would block due to a write that cannot complete. 6194 buf := make([]byte, 64*1024) 6195 done := false 6196 for !done { 6197 gw.nc.SetWriteDeadline(time.Now().Add(time.Second)) 6198 if _, err := gw.nc.Write(buf); err != nil { 6199 done = true 6200 } 6201 gw.nc.SetWriteDeadline(time.Time{}) 6202 } 6203 ch := make(chan bool) 6204 go func() { 6205 select { 6206 case <-ch: 6207 return 6208 case <-time.After(3 * time.Second): 6209 fmt.Println("!!!! closeConnection is blocked, test will hang !!!") 6210 return 6211 } 6212 }() 6213 // Close the gateway 6214 gw.closeConnection(SlowConsumerWriteDeadline) 6215 ch <- true 6216 } 6217 6218 func TestGatewayNoCrashOnInvalidSubject(t *testing.T) { 6219 ob := testDefaultOptionsForGateway("B") 6220 sb := runGatewayServer(ob) 6221 defer sb.Shutdown() 6222 6223 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 6224 sa := runGatewayServer(oa) 6225 defer sa.Shutdown() 6226 6227 waitForOutboundGateways(t, sa, 1, 2*time.Second) 6228 waitForInboundGateways(t, sa, 1, 2*time.Second) 6229 waitForOutboundGateways(t, sb, 1, 2*time.Second) 6230 waitForInboundGateways(t, sb, 1, 2*time.Second) 6231 6232 ncB := natsConnect(t, sb.ClientURL()) 6233 defer ncB.Close() 6234 6235 natsSubSync(t, ncB, "foo") 6236 natsFlush(t, ncB) 6237 6238 ncA := natsConnect(t, sa.ClientURL()) 6239 defer ncA.Close() 6240 6241 // Send on an invalid subject. Since there is interest on B, 6242 // we will receive an RS- instead of A- 6243 natsPub(t, ncA, "bar..baz", []byte("bad subject")) 6244 natsFlush(t, ncA) 6245 6246 // Now create on B a sub on a wildcard subject 6247 sub := natsSubSync(t, ncB, "bar.*") 6248 natsFlush(t, ncB) 6249 6250 // Server should not have crashed... 6251 natsPub(t, ncA, "bar.baz", []byte("valid subject")) 6252 if _, err := sub.NextMsg(time.Second); err != nil { 6253 t.Fatalf("Error getting message: %v", err) 6254 } 6255 } 6256 6257 func TestGatewayUpdateURLsFromRemoteCluster(t *testing.T) { 6258 ob1 := testDefaultOptionsForGateway("B") 6259 sb1 := RunServer(ob1) 6260 defer sb1.Shutdown() 6261 6262 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb1) 6263 sa := RunServer(oa) 6264 defer sa.Shutdown() 6265 6266 waitForOutboundGateways(t, sa, 1, 2*time.Second) 6267 waitForOutboundGateways(t, sb1, 1, 2*time.Second) 6268 6269 // Add a server to cluster B. 6270 ob2 := testDefaultOptionsForGateway("B") 6271 ob2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", ob1.Cluster.Port)) 6272 sb2 := RunServer(ob2) 6273 defer sb2.Shutdown() 6274 6275 checkClusterFormed(t, sb1, sb2) 6276 waitForOutboundGateways(t, sb2, 1, 2*time.Second) 6277 waitForInboundGateways(t, sa, 2, 2*time.Second) 6278 6279 pmap := make(map[int]string) 6280 pmap[ob1.Gateway.Port] = "B1" 6281 pmap[ob2.Gateway.Port] = "B2" 6282 6283 checkURLs := func(eurls map[string]string) { 6284 t.Helper() 6285 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 6286 rg := sa.getRemoteGateway("B") 6287 urls := rg.getURLsAsStrings() 6288 for _, u := range urls { 6289 if _, ok := eurls[u]; !ok { 6290 _, sport, _ := net.SplitHostPort(u) 6291 port, _ := strconv.Atoi(sport) 6292 return fmt.Errorf("URL %q (%s) should not be in the list of urls (%q)", u, pmap[port], eurls) 6293 } 6294 } 6295 return nil 6296 }) 6297 } 6298 expected := make(map[string]string) 6299 expected[fmt.Sprintf("127.0.0.1:%d", ob1.Gateway.Port)] = "B1" 6300 expected[fmt.Sprintf("127.0.0.1:%d", ob2.Gateway.Port)] = "B2" 6301 checkURLs(expected) 6302 6303 // Add another in cluster B 6304 ob3 := testDefaultOptionsForGateway("B") 6305 ob3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", ob1.Cluster.Port)) 6306 sb3 := RunServer(ob3) 6307 defer sb3.Shutdown() 6308 6309 checkClusterFormed(t, sb1, sb2, sb3) 6310 waitForOutboundGateways(t, sb3, 1, 2*time.Second) 6311 waitForInboundGateways(t, sa, 3, 2*time.Second) 6312 6313 pmap[ob3.Gateway.Port] = "B3" 6314 6315 expected = make(map[string]string) 6316 expected[fmt.Sprintf("127.0.0.1:%d", ob1.Gateway.Port)] = "B1" 6317 expected[fmt.Sprintf("127.0.0.1:%d", ob2.Gateway.Port)] = "B2" 6318 expected[fmt.Sprintf("127.0.0.1:%d", ob3.Gateway.Port)] = "B3" 6319 checkURLs(expected) 6320 6321 // Now stop server SB2, which should cause SA to remove it from its list. 6322 sb2.Shutdown() 6323 6324 expected = make(map[string]string) 6325 expected[fmt.Sprintf("127.0.0.1:%d", ob1.Gateway.Port)] = "B1" 6326 expected[fmt.Sprintf("127.0.0.1:%d", ob3.Gateway.Port)] = "B3" 6327 checkURLs(expected) 6328 } 6329 6330 type capturePingConn struct { 6331 net.Conn 6332 ch chan struct{} 6333 } 6334 6335 func (c *capturePingConn) Write(b []byte) (int, error) { 6336 if bytes.Contains(b, []byte(pingProto)) { 6337 select { 6338 case c.ch <- struct{}{}: 6339 default: 6340 } 6341 } 6342 return c.Conn.Write(b) 6343 } 6344 6345 func TestGatewayPings(t *testing.T) { 6346 gatewayMaxPingInterval = 50 * time.Millisecond 6347 defer func() { gatewayMaxPingInterval = gwMaxPingInterval }() 6348 6349 ob := testDefaultOptionsForGateway("B") 6350 sb := RunServer(ob) 6351 defer sb.Shutdown() 6352 6353 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 6354 sa := RunServer(oa) 6355 defer sa.Shutdown() 6356 6357 waitForInboundGateways(t, sa, 1, 2*time.Second) 6358 waitForOutboundGateways(t, sa, 1, 2*time.Second) 6359 waitForInboundGateways(t, sb, 1, 2*time.Second) 6360 waitForOutboundGateways(t, sb, 1, 2*time.Second) 6361 6362 c := sa.getOutboundGatewayConnection("B") 6363 ch := make(chan struct{}, 1) 6364 c.mu.Lock() 6365 c.nc = &capturePingConn{c.nc, ch} 6366 c.mu.Unlock() 6367 6368 for i := 0; i < 5; i++ { 6369 select { 6370 case <-ch: 6371 case <-time.After(250 * time.Millisecond): 6372 t.Fatalf("Did not send PING") 6373 } 6374 } 6375 } 6376 6377 func TestGatewayTLSConfigReload(t *testing.T) { 6378 template := ` 6379 listen: 127.0.0.1:-1 6380 gateway { 6381 name: "A" 6382 listen: "127.0.0.1:-1" 6383 tls { 6384 cert_file: "../test/configs/certs/server-cert.pem" 6385 key_file: "../test/configs/certs/server-key.pem" 6386 %s 6387 timeout: 2 6388 } 6389 } 6390 ` 6391 confA := createConfFile(t, []byte(fmt.Sprintf(template, ""))) 6392 6393 srvA, optsA := RunServerWithConfig(confA) 6394 defer srvA.Shutdown() 6395 6396 optsB := testGatewayOptionsFromToWithTLS(t, "B", "A", []string{fmt.Sprintf("nats://127.0.0.1:%d", optsA.Gateway.Port)}) 6397 srvB := runGatewayServer(optsB) 6398 defer srvB.Shutdown() 6399 6400 waitForGatewayFailedConnect(t, srvB, "A", true, time.Second) 6401 6402 reloadUpdateConfig(t, srvA, confA, fmt.Sprintf(template, `ca_file: "../test/configs/certs/ca.pem"`)) 6403 6404 waitForInboundGateways(t, srvA, 1, time.Second) 6405 waitForOutboundGateways(t, srvA, 1, time.Second) 6406 waitForInboundGateways(t, srvB, 1, time.Second) 6407 waitForOutboundGateways(t, srvB, 1, time.Second) 6408 } 6409 6410 func TestGatewayTLSConfigReloadForRemote(t *testing.T) { 6411 SetGatewaysSolicitDelay(5 * time.Millisecond) 6412 defer ResetGatewaysSolicitDelay() 6413 6414 optsA := testGatewayOptionsWithTLS(t, "A") 6415 srvA := runGatewayServer(optsA) 6416 defer srvA.Shutdown() 6417 6418 template := ` 6419 listen: 127.0.0.1:-1 6420 gateway { 6421 name: "B" 6422 listen: "127.0.0.1:-1" 6423 tls { 6424 cert_file: "../test/configs/certs/server-cert.pem" 6425 key_file: "../test/configs/certs/server-key.pem" 6426 ca_file: "../test/configs/certs/ca.pem" 6427 timeout: 2 6428 } 6429 gateways [ 6430 { 6431 name: "A" 6432 url: "nats://127.0.0.1:%d" 6433 tls { 6434 cert_file: "../test/configs/certs/server-cert.pem" 6435 key_file: "../test/configs/certs/server-key.pem" 6436 %s 6437 timeout: 2 6438 } 6439 } 6440 ] 6441 } 6442 ` 6443 confB := createConfFile(t, []byte(fmt.Sprintf(template, optsA.Gateway.Port, ""))) 6444 6445 srvB, _ := RunServerWithConfig(confB) 6446 defer srvB.Shutdown() 6447 6448 waitForGatewayFailedConnect(t, srvB, "A", true, time.Second) 6449 6450 reloadUpdateConfig(t, srvB, confB, fmt.Sprintf(template, optsA.Gateway.Port, `ca_file: "../test/configs/certs/ca.pem"`)) 6451 6452 waitForInboundGateways(t, srvA, 1, time.Second) 6453 waitForOutboundGateways(t, srvA, 1, time.Second) 6454 waitForInboundGateways(t, srvB, 1, time.Second) 6455 waitForOutboundGateways(t, srvB, 1, time.Second) 6456 } 6457 6458 func TestGatewayAuthDiscovered(t *testing.T) { 6459 SetGatewaysSolicitDelay(5 * time.Millisecond) 6460 defer ResetGatewaysSolicitDelay() 6461 6462 confA := createConfFile(t, []byte(` 6463 listen: 127.0.0.1:-1 6464 gateway { 6465 name: "A" 6466 listen: 127.0.0.1:-1 6467 authorization: { user: gwuser, password: changeme } 6468 } 6469 `)) 6470 srvA, optsA := RunServerWithConfig(confA) 6471 defer srvA.Shutdown() 6472 6473 confB := createConfFile(t, []byte(fmt.Sprintf(` 6474 listen: 127.0.0.1:-1 6475 gateway { 6476 name: "B" 6477 listen: 127.0.0.1:-1 6478 authorization: { user: gwuser, password: changeme } 6479 gateways: [ 6480 { name: A, url: nats://gwuser:changeme@127.0.0.1:%d } 6481 ] 6482 } 6483 `, optsA.Gateway.Port))) 6484 srvB, _ := RunServerWithConfig(confB) 6485 defer srvB.Shutdown() 6486 6487 waitForInboundGateways(t, srvA, 1, time.Second) 6488 waitForOutboundGateways(t, srvA, 1, time.Second) 6489 waitForInboundGateways(t, srvB, 1, time.Second) 6490 waitForOutboundGateways(t, srvB, 1, time.Second) 6491 } 6492 6493 func TestTLSGatewaysCertificateImplicitAllowPass(t *testing.T) { 6494 testTLSGatewaysCertificateImplicitAllow(t, true) 6495 } 6496 6497 func TestTLSGatewaysCertificateImplicitAllowFail(t *testing.T) { 6498 testTLSGatewaysCertificateImplicitAllow(t, false) 6499 } 6500 6501 func testTLSGatewaysCertificateImplicitAllow(t *testing.T, pass bool) { 6502 // Base config for the servers 6503 cfg := createTempFile(t, "cfg") 6504 cfg.WriteString(fmt.Sprintf(` 6505 gateway { 6506 tls { 6507 cert_file = "../test/configs/certs/tlsauth/server.pem" 6508 key_file = "../test/configs/certs/tlsauth/server-key.pem" 6509 ca_file = "../test/configs/certs/tlsauth/ca.pem" 6510 verify_cert_and_check_known_urls = true 6511 insecure = %t 6512 timeout = 1 6513 } 6514 } 6515 `, !pass)) // set insecure to skip verification on the outgoing end 6516 if err := cfg.Sync(); err != nil { 6517 t.Fatal(err) 6518 } 6519 cfg.Close() 6520 6521 optsA := LoadConfig(cfg.Name()) 6522 optsB := LoadConfig(cfg.Name()) 6523 6524 urlA := "nats://localhost:9995" 6525 urlB := "nats://localhost:9996" 6526 if !pass { 6527 urlA = "nats://127.0.0.1:9995" 6528 urlB = "nats://127.0.0.1:9996" 6529 } 6530 6531 gwA, err := url.Parse(urlA) 6532 if err != nil { 6533 t.Fatal(err) 6534 } 6535 gwB, err := url.Parse(urlB) 6536 if err != nil { 6537 t.Fatal(err) 6538 } 6539 6540 optsA.Host = "127.0.0.1" 6541 optsA.Port = -1 6542 optsA.Gateway.Name = "A" 6543 optsA.Gateway.Port = 9995 6544 optsA.Gateway.resolver = &localhostResolver{} 6545 6546 optsB.Host = "127.0.0.1" 6547 optsB.Port = -1 6548 optsB.Gateway.Name = "B" 6549 optsB.Gateway.Port = 9996 6550 optsB.Gateway.resolver = &localhostResolver{} 6551 6552 gateways := make([]*RemoteGatewayOpts, 2) 6553 gateways[0] = &RemoteGatewayOpts{ 6554 Name: optsA.Gateway.Name, 6555 URLs: []*url.URL{gwA}, 6556 } 6557 gateways[1] = &RemoteGatewayOpts{ 6558 Name: optsB.Gateway.Name, 6559 URLs: []*url.URL{gwB}, 6560 } 6561 6562 optsA.Gateway.Gateways = gateways 6563 optsB.Gateway.Gateways = gateways 6564 6565 SetGatewaysSolicitDelay(100 * time.Millisecond) 6566 defer ResetGatewaysSolicitDelay() 6567 6568 srvA := RunServer(optsA) 6569 defer srvA.Shutdown() 6570 6571 srvB := RunServer(optsB) 6572 defer srvB.Shutdown() 6573 6574 if pass { 6575 waitForOutboundGateways(t, srvA, 1, 5*time.Second) 6576 waitForOutboundGateways(t, srvB, 1, 5*time.Second) 6577 } else { 6578 time.Sleep(1 * time.Second) // the fail case uses the IP, so a short wait is sufficient 6579 checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { 6580 if srvA.NumOutboundGateways() != 0 || srvB.NumOutboundGateways() != 0 { 6581 return fmt.Errorf("No outbound gateway connection expected") 6582 } 6583 return nil 6584 }) 6585 } 6586 } 6587 6588 func TestGatewayURLsNotRemovedOnDuplicateRoute(t *testing.T) { 6589 // For this test, we need to have servers in cluster B creating routes 6590 // to each other to help produce the "duplicate route" situation, so 6591 // we are forced to use deterministic ports. 6592 getEphemeralPort := func() int { 6593 t.Helper() 6594 l, err := net.Listen("tcp", "127.0.0.1:0") 6595 if err != nil { 6596 t.Fatalf("Error getting a port: %v", err) 6597 } 6598 p := l.Addr().(*net.TCPAddr).Port 6599 l.Close() 6600 return p 6601 } 6602 p1 := getEphemeralPort() 6603 p2 := getEphemeralPort() 6604 routeURLs := fmt.Sprintf("nats://127.0.0.1:%d,nats://127.0.0.1:%d", p1, p2) 6605 6606 ob1 := testDefaultOptionsForGateway("B") 6607 ob1.Cluster.Port = p1 6608 ob1.Routes = RoutesFromStr(routeURLs) 6609 sb1 := RunServer(ob1) 6610 defer sb1.Shutdown() 6611 6612 ob2 := testDefaultOptionsForGateway("B") 6613 ob2.Cluster.Port = p2 6614 ob2.Routes = RoutesFromStr(routeURLs) 6615 sb2 := RunServer(ob2) 6616 defer sb2.Shutdown() 6617 6618 checkClusterFormed(t, sb1, sb2) 6619 6620 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb1) 6621 sa := RunServer(oa) 6622 defer sa.Shutdown() 6623 6624 waitForOutboundGateways(t, sb1, 1, 2*time.Second) 6625 waitForOutboundGateways(t, sb2, 1, 2*time.Second) 6626 waitForOutboundGateways(t, sa, 1, 2*time.Second) 6627 waitForInboundGateways(t, sa, 2, 2*time.Second) 6628 6629 checkURLs := func(s *Server) { 6630 t.Helper() 6631 s.mu.Lock() 6632 urls := s.gateway.URLs.getAsStringSlice() 6633 s.mu.Unlock() 6634 if len(urls) != 2 { 6635 t.Fatalf("Expected 2 urls, got %v", urls) 6636 } 6637 } 6638 checkURLs(sb1) 6639 checkURLs(sb2) 6640 6641 // As for sa, we should have both sb1 and sb2 urls in its outbound urls map 6642 c := sa.getOutboundGatewayConnection("B") 6643 if c == nil { 6644 t.Fatal("No outound connection found!") 6645 } 6646 c.mu.Lock() 6647 urls := c.gw.cfg.urls 6648 c.mu.Unlock() 6649 if len(urls) != 2 { 6650 t.Fatalf("Expected 2 urls to B, got %v", urls) 6651 } 6652 } 6653 6654 func TestGatewayDuplicateServerName(t *testing.T) { 6655 // We will have 2 servers per cluster names "nats1" and "nats2", and have 6656 // the servers in the second cluster with the same name, but we will make 6657 // sure to connect "A/nats1" to "B/nats2" and "A/nats2" to "B/nats1" and 6658 // verify that we still discover the duplicate names. 6659 ob1 := testDefaultOptionsForGateway("B") 6660 ob1.ServerName = "nats1" 6661 sb1 := RunServer(ob1) 6662 defer sb1.Shutdown() 6663 6664 ob2 := testDefaultOptionsForGateway("B") 6665 ob2.ServerName = "nats2" 6666 ob2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", ob1.Cluster.Port)) 6667 sb2 := RunServer(ob2) 6668 defer sb2.Shutdown() 6669 6670 checkClusterFormed(t, sb1, sb2) 6671 6672 oa1 := testGatewayOptionsFromToWithServers(t, "A", "B", sb2) 6673 oa1.ServerName = "nats1" 6674 // Needed later in the test 6675 oa1.Gateway.RejectUnknown = true 6676 sa1 := RunServer(oa1) 6677 defer sa1.Shutdown() 6678 sa1l := &captureErrorLogger{errCh: make(chan string, 100)} 6679 sa1.SetLogger(sa1l, false, false) 6680 6681 oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb1) 6682 oa2.ServerName = "nats2" 6683 // Needed later in the test 6684 oa2.Gateway.RejectUnknown = true 6685 oa2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", oa1.Cluster.Port)) 6686 sa2 := RunServer(oa2) 6687 defer sa2.Shutdown() 6688 sa2l := &captureErrorLogger{errCh: make(chan string, 100)} 6689 sa2.SetLogger(sa2l, false, false) 6690 6691 checkClusterFormed(t, sa1, sa2) 6692 6693 checkForDupError := func(errCh chan string) { 6694 t.Helper() 6695 timeout := time.NewTimer(time.Second) 6696 for done := false; !done; { 6697 select { 6698 case err := <-errCh: 6699 if strings.Contains(err, "server has a duplicate name") { 6700 done = true 6701 } 6702 case <-timeout.C: 6703 t.Fatal("Did not get error about servers in super-cluster with same name") 6704 } 6705 } 6706 } 6707 6708 // Since only servers from "A" have configured outbound to 6709 // cluster "B", only servers on "A" are expected to report error. 6710 for _, errCh := range []chan string{sa1l.errCh, sa2l.errCh} { 6711 checkForDupError(errCh) 6712 } 6713 6714 // So now we are going to fix names and wait for the super cluster to form. 6715 sa2.Shutdown() 6716 sa1.Shutdown() 6717 6718 // Drain the error channels 6719 for _, errCh := range []chan string{sa1l.errCh, sa2l.errCh} { 6720 for done := false; !done; { 6721 select { 6722 case <-errCh: 6723 default: 6724 done = true 6725 } 6726 } 6727 } 6728 6729 oa1.ServerName = "a_nats1" 6730 oa2.ServerName = "a_nats2" 6731 sa1 = RunServer(oa1) 6732 defer sa1.Shutdown() 6733 sa2 = RunServer(oa2) 6734 defer sa2.Shutdown() 6735 6736 checkClusterFormed(t, sa1, sa2) 6737 6738 waitForOutboundGateways(t, sa1, 1, 2*time.Second) 6739 waitForOutboundGateways(t, sa2, 1, 2*time.Second) 6740 waitForOutboundGateways(t, sb1, 1, 2*time.Second) 6741 waitForOutboundGateways(t, sb2, 1, 2*time.Second) 6742 6743 // Now add a server on cluster B (that does not have outbound 6744 // gateway connections explicitly defined) and use the name 6745 // of one of the cluster A's server. We should get an error. 6746 ob3 := testDefaultOptionsForGateway("B") 6747 ob3.ServerName = "a_nats2" 6748 ob3.Accounts = []*Account{NewAccount("sys")} 6749 ob3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", ob2.Cluster.Port)) 6750 sb3 := RunServer(ob3) 6751 defer sb3.Shutdown() 6752 sb3l := &captureErrorLogger{errCh: make(chan string, 100)} 6753 sb3.SetLogger(sb3l, false, false) 6754 6755 checkClusterFormed(t, sb1, sb2, sb3) 6756 6757 // It should report the error when trying to create the GW connection 6758 checkForDupError(sb3l.errCh) 6759 6760 // Stop this node 6761 sb3.Shutdown() 6762 checkClusterFormed(t, sb1, sb2) 6763 6764 // Now create a GW "C" with a server that uses the same name than one of 6765 // the server on "A", say "a_nats2". 6766 // This server will connect to "B", and "B" will gossip "A" back to "C" 6767 // and "C" will then try to connect to "A", but "A" rejects unknown, so 6768 // connection will be refused. However, we want to make sure that the 6769 // duplicate server name is still detected. 6770 oc := testGatewayOptionsFromToWithServers(t, "C", "B", sb1) 6771 oc.ServerName = "a_nats2" 6772 oc.Accounts = []*Account{NewAccount("sys")} 6773 sc := RunServer(oc) 6774 defer sc.Shutdown() 6775 scl := &captureErrorLogger{errCh: make(chan string, 100)} 6776 sc.SetLogger(scl, false, false) 6777 6778 // It should report the error when trying to create the GW connection 6779 // to cluster "A" 6780 checkForDupError(scl.errCh) 6781 } 6782 6783 func TestGatewayNoPanicOnStartupWithMonitoring(t *testing.T) { 6784 o := testDefaultOptionsForGateway("B") 6785 o.HTTPHost = "127.0.0.1" 6786 o.HTTPPort = 8888 6787 s, err := NewServer(o) 6788 require_NoError(t, err) 6789 6790 wg := sync.WaitGroup{} 6791 wg.Add(1) 6792 go func() { 6793 defer wg.Done() 6794 6795 time.Sleep(50 * time.Millisecond) 6796 s.Start() 6797 s.WaitForShutdown() 6798 }() 6799 6800 for { 6801 g, err := s.Gatewayz(nil) 6802 if err != nil { 6803 continue 6804 } 6805 if g.Port != 0 && g.Port != s.GatewayAddr().Port { 6806 t.Fatalf("Unexpected port: %v vs %v", g.Port, s.GatewayAddr().Port) 6807 } 6808 break 6809 } 6810 s.Shutdown() 6811 wg.Wait() 6812 } 6813 6814 func TestGatewaySwitchToInterestOnlyModeImmediately(t *testing.T) { 6815 o2 := testDefaultOptionsForGateway("B") 6816 // Add users to cause s2 to require auth. Will add an account with user later. 6817 o2.Users = append([]*User(nil), &User{Username: "test", Password: "pwd"}) 6818 s2 := runGatewayServer(o2) 6819 defer s2.Shutdown() 6820 6821 o1 := testGatewayOptionsFromToWithServers(t, "A", "B", s2) 6822 setAccountUserPassInOptions(o1, "$foo", "ivan", "password") 6823 s1 := runGatewayServer(o1) 6824 defer s1.Shutdown() 6825 6826 waitForOutboundGateways(t, s1, 1, time.Second) 6827 waitForOutboundGateways(t, s2, 1, time.Second) 6828 6829 s1Url := fmt.Sprintf("nats://ivan:password@127.0.0.1:%d", o1.Port) 6830 nc := natsConnect(t, s1Url) 6831 defer nc.Close() 6832 natsPub(t, nc, "foo", []byte("hello")) 6833 natsFlush(t, nc) 6834 6835 checkCount := func(t *testing.T, c *client, expected int) { 6836 t.Helper() 6837 c.mu.Lock() 6838 out := c.outMsgs 6839 c.mu.Unlock() 6840 if int(out) != expected { 6841 t.Fatalf("Expected %d message(s) to be sent over, got %v", expected, out) 6842 } 6843 } 6844 // No message should be sent 6845 gwcb := s1.getOutboundGatewayConnection("B") 6846 checkCount(t, gwcb, 0) 6847 6848 // Check that we are in interest-only mode, but in this case, since s2 does 6849 // have the account, we should have the account not even present in the map. 6850 checkGWInterestOnlyModeOrNotPresent(t, s1, "B", "$foo", true) 6851 6852 // Add account to S2 and a client. 6853 s2FooAcc, err := s2.RegisterAccount("$foo") 6854 if err != nil { 6855 t.Fatalf("Error registering account: %v", err) 6856 } 6857 s2.mu.Lock() 6858 s2.users["ivan"] = &User{Account: s2FooAcc, Username: "ivan", Password: "password"} 6859 s2.mu.Unlock() 6860 s2Url := fmt.Sprintf("nats://ivan:password@127.0.0.1:%d", o2.Port) 6861 ncS2 := natsConnect(t, s2Url) 6862 defer ncS2.Close() 6863 natsSubSync(t, ncS2, "asub") 6864 // This time we will have the account in the map and it will be interest-only 6865 checkGWInterestOnlyMode(t, s1, "B", "$foo") 6866 6867 // Now publish a message, still should not go because the sub is on "asub" 6868 natsPub(t, nc, "foo", []byte("hello")) 6869 natsFlush(t, nc) 6870 checkCount(t, gwcb, 0) 6871 6872 natsSubSync(t, ncS2, "foo") 6873 natsFlush(t, ncS2) 6874 6875 checkGWInterestOnlyModeInterestOn(t, s1, "B", "$foo", "foo") 6876 6877 // Publish on foo 6878 natsPub(t, nc, "foo", []byte("hello")) 6879 natsFlush(t, nc) 6880 checkCount(t, gwcb, 1) 6881 } 6882 6883 func TestGatewaySlowConsumer(t *testing.T) { 6884 gatewayMaxPingInterval = 50 * time.Millisecond 6885 defer func() { gatewayMaxPingInterval = gwMaxPingInterval }() 6886 6887 ob := testDefaultOptionsForGateway("B") 6888 sb := RunServer(ob) 6889 defer sb.Shutdown() 6890 6891 oa := testGatewayOptionsFromToWithServers(t, "A", "B", sb) 6892 sa := RunServer(oa) 6893 defer sa.Shutdown() 6894 6895 waitForInboundGateways(t, sa, 1, 2*time.Second) 6896 waitForOutboundGateways(t, sa, 1, 2*time.Second) 6897 waitForInboundGateways(t, sb, 1, 2*time.Second) 6898 waitForOutboundGateways(t, sb, 1, 2*time.Second) 6899 6900 c := sa.getOutboundGatewayConnection("B") 6901 c.mu.Lock() 6902 c.out.wdl = time.Nanosecond 6903 c.mu.Unlock() 6904 6905 <-time.After(250 * time.Millisecond) 6906 got := sa.NumSlowConsumersGateways() 6907 expected := uint64(1) 6908 if got != 1 { 6909 t.Errorf("got: %d, expected: %d", got, expected) 6910 } 6911 got = sb.NumSlowConsumersGateways() 6912 expected = 0 6913 if got != expected { 6914 t.Errorf("got: %d, expected: %d", got, expected) 6915 } 6916 } 6917 6918 // https://github.com/nats-io/nats-server/issues/5187 6919 func TestGatewayConnectEvents(t *testing.T) { 6920 checkEvents := func(t *testing.T, name string, queue bool) { 6921 t.Run(name, func(t *testing.T) { 6922 ca := createClusterEx(t, true, 5*time.Millisecond, true, "A", 2) 6923 defer shutdownCluster(ca) 6924 6925 cb := createClusterEx(t, true, 5*time.Millisecond, true, "B", 2, ca) 6926 defer shutdownCluster(cb) 6927 6928 sysA, err := nats.Connect(ca.randomServer().ClientURL(), nats.UserInfo("sys", "pass")) 6929 require_NoError(t, err) 6930 defer sysA.Close() 6931 6932 var sub1 *nats.Subscription 6933 if queue { 6934 sub1, err = sysA.QueueSubscribeSync("$SYS.ACCOUNT.FOO.CONNECT", "myqueue") 6935 } else { 6936 sub1, err = sysA.SubscribeSync("$SYS.ACCOUNT.FOO.CONNECT") 6937 } 6938 require_NoError(t, err) 6939 6940 cA, err := nats.Connect(ca.randomServer().ClientURL(), nats.UserInfo("foo", "pass")) 6941 require_NoError(t, err) 6942 defer cA.Close() 6943 6944 msg, err := sub1.NextMsg(time.Second) 6945 require_NoError(t, err) 6946 require_Equal(t, msg.Subject, "$SYS.ACCOUNT.FOO.CONNECT") 6947 6948 cB, err := nats.Connect(cb.randomServer().ClientURL(), nats.UserInfo("foo", "pass")) 6949 require_NoError(t, err) 6950 defer cB.Close() 6951 6952 msg, err = sub1.NextMsg(time.Second) 6953 require_NoError(t, err) 6954 require_Equal(t, msg.Subject, "$SYS.ACCOUNT.FOO.CONNECT") 6955 }) 6956 } 6957 6958 checkEvents(t, "Unqueued", false) 6959 checkEvents(t, "Queued", true) 6960 } 6961 6962 func disconnectInboundGateways(s *Server) { 6963 s.gateway.RLock() 6964 in := s.gateway.in 6965 s.gateway.RUnlock() 6966 6967 s.gateway.RLock() 6968 for _, client := range in { 6969 s.gateway.RUnlock() 6970 client.closeConnection(ClientClosed) 6971 s.gateway.RLock() 6972 } 6973 s.gateway.RUnlock() 6974 } 6975 6976 type testMissingOCSPStapleLogger struct { 6977 DummyLogger 6978 ch chan string 6979 } 6980 6981 func (l *testMissingOCSPStapleLogger) Errorf(format string, v ...any) { 6982 msg := fmt.Sprintf(format, v...) 6983 if strings.Contains(msg, "peer missing OCSP Staple") { 6984 select { 6985 case l.ch <- msg: 6986 default: 6987 } 6988 } 6989 } 6990 6991 func TestOCSPGatewayMissingPeerStapleIssue(t *testing.T) { 6992 const ( 6993 caCert = "../test/configs/certs/ocsp/ca-cert.pem" 6994 caKey = "../test/configs/certs/ocsp/ca-key.pem" 6995 ) 6996 ctx, cancel := context.WithCancel(context.Background()) 6997 defer cancel() 6998 ocspr := NewOCSPResponderCustomTimeout(t, caCert, caKey, 10*time.Minute) 6999 defer ocspr.Shutdown(ctx) 7000 addr := fmt.Sprintf("http://%s", ocspr.Addr) 7001 7002 // Node A 7003 SetOCSPStatus(t, addr, "../test/configs/certs/ocsp/server-status-request-url-01-cert.pem", ocsp.Good) 7004 SetOCSPStatus(t, addr, "../test/configs/certs/ocsp/server-status-request-url-02-cert.pem", ocsp.Good) 7005 7006 // Node B 7007 SetOCSPStatus(t, addr, "../test/configs/certs/ocsp/server-status-request-url-03-cert.pem", ocsp.Good) 7008 SetOCSPStatus(t, addr, "../test/configs/certs/ocsp/server-status-request-url-04-cert.pem", ocsp.Good) 7009 7010 // Node C 7011 SetOCSPStatus(t, addr, "../test/configs/certs/ocsp/server-status-request-url-05-cert.pem", ocsp.Good) 7012 SetOCSPStatus(t, addr, "../test/configs/certs/ocsp/server-status-request-url-06-cert.pem", ocsp.Good) 7013 7014 // Node A rotated certs 7015 SetOCSPStatus(t, addr, "../test/configs/certs/ocsp/server-status-request-url-07-cert.pem", ocsp.Good) 7016 SetOCSPStatus(t, addr, "../test/configs/certs/ocsp/server-status-request-url-08-cert.pem", ocsp.Good) 7017 7018 // Store Dirs 7019 storeDirA := t.TempDir() 7020 storeDirB := t.TempDir() 7021 storeDirC := t.TempDir() 7022 7023 // Gateway server configuration 7024 srvConfA := ` 7025 host: "127.0.0.1" 7026 port: -1 7027 7028 server_name: "AAA" 7029 7030 ocsp { mode = always } 7031 7032 system_account = sys 7033 accounts { 7034 sys { users = [{ user: sys, pass: sys }]} 7035 guest { users = [{ user: guest, pass: guest }]} 7036 } 7037 no_auth_user = guest 7038 7039 store_dir: '%s' 7040 gateway { 7041 name: A 7042 host: "127.0.0.1" 7043 port: -1 7044 advertise: "127.0.0.1" 7045 7046 tls { 7047 cert_file: "../test/configs/certs/ocsp/server-status-request-url-02-cert.pem" 7048 key_file: "../test/configs/certs/ocsp/server-status-request-url-02-key.pem" 7049 ca_file: "../test/configs/certs/ocsp/ca-cert.pem" 7050 timeout: 5 7051 } 7052 } 7053 ` 7054 srvConfA = fmt.Sprintf(srvConfA, storeDirA) 7055 sconfA := createConfFile(t, []byte(srvConfA)) 7056 srvA, optsA := RunServerWithConfig(sconfA) 7057 defer srvA.Shutdown() 7058 7059 // Gateway B connects to Gateway A. 7060 srvConfB := ` 7061 host: "127.0.0.1" 7062 port: -1 7063 7064 server_name: "BBB" 7065 7066 ocsp { mode = always } 7067 7068 system_account = sys 7069 accounts { 7070 sys { users = [{ user: sys, pass: sys }]} 7071 guest { users = [{ user: guest, pass: guest }]} 7072 } 7073 no_auth_user = guest 7074 7075 store_dir: '%s' 7076 gateway { 7077 name: B 7078 host: "127.0.0.1" 7079 advertise: "127.0.0.1" 7080 port: -1 7081 gateways: [{ 7082 name: "A" 7083 url: "nats://127.0.0.1:%d" 7084 }] 7085 7086 tls { 7087 cert_file: "../test/configs/certs/ocsp/server-status-request-url-04-cert.pem" 7088 key_file: "../test/configs/certs/ocsp/server-status-request-url-04-key.pem" 7089 ca_file: "../test/configs/certs/ocsp/ca-cert.pem" 7090 timeout: 5 7091 } 7092 } 7093 ` 7094 srvConfB = fmt.Sprintf(srvConfB, storeDirB, optsA.Gateway.Port) 7095 conf := createConfFile(t, []byte(srvConfB)) 7096 srvB, optsB := RunServerWithConfig(conf) 7097 defer srvB.Shutdown() 7098 7099 // Client connects to server A. 7100 cA, err := nats.Connect(fmt.Sprintf("nats://127.0.0.1:%d", optsA.Port), 7101 nats.ErrorHandler(noOpErrHandler), 7102 ) 7103 if err != nil { 7104 t.Fatal(err) 7105 } 7106 defer cA.Close() 7107 7108 // Wait for connectivity between A and B. 7109 waitForOutboundGateways(t, srvB, 1, 5*time.Second) 7110 7111 // Gateway C also connects to Gateway A. 7112 srvConfC := ` 7113 host: "127.0.0.1" 7114 port: -1 7115 7116 server_name: "CCC" 7117 7118 ocsp { mode = always } 7119 7120 system_account = sys 7121 accounts { 7122 sys { users = [{ user: sys, pass: sys }]} 7123 guest { users = [{ user: guest, pass: guest }]} 7124 } 7125 no_auth_user = guest 7126 7127 store_dir: '%s' 7128 gateway { 7129 name: C 7130 host: "127.0.0.1" 7131 advertise: "127.0.0.1" 7132 port: -1 7133 gateways: [{name: "A", url: "nats://127.0.0.1:%d" }] 7134 7135 tls { 7136 cert_file: "../test/configs/certs/ocsp/server-status-request-url-06-cert.pem" 7137 key_file: "../test/configs/certs/ocsp/server-status-request-url-06-key.pem" 7138 ca_file: "../test/configs/certs/ocsp/ca-cert.pem" 7139 timeout: 5 7140 } 7141 } 7142 ` 7143 srvConfC = fmt.Sprintf(srvConfC, storeDirC, optsA.Gateway.Port) 7144 conf = createConfFile(t, []byte(srvConfC)) 7145 srvC, optsC := RunServerWithConfig(conf) 7146 defer srvC.Shutdown() 7147 7148 //////////////////////////////////////////////////////////////////////////// 7149 // // 7150 // A and B are connected at this point and A is starting with certs that // 7151 // will be rotated. 7152 // // 7153 //////////////////////////////////////////////////////////////////////////// 7154 cB, err := nats.Connect(fmt.Sprintf("nats://127.0.0.1:%d", optsB.Port), 7155 nats.ErrorHandler(noOpErrHandler), 7156 ) 7157 require_NoError(t, err) 7158 defer cB.Close() 7159 7160 cC, err := nats.Connect(fmt.Sprintf("nats://127.0.0.1:%d", optsC.Port), 7161 nats.ErrorHandler(noOpErrHandler), 7162 ) 7163 require_NoError(t, err) 7164 defer cC.Close() 7165 7166 _, err = cA.Subscribe("foo", func(m *nats.Msg) { 7167 m.Respond(nil) 7168 }) 7169 require_NoError(t, err) 7170 7171 cA.Flush() 7172 7173 _, err = cB.Subscribe("bar", func(m *nats.Msg) { 7174 m.Respond(nil) 7175 }) 7176 require_NoError(t, err) 7177 cB.Flush() 7178 7179 waitForOutboundGateways(t, srvB, 1, 10*time.Second) 7180 waitForOutboundGateways(t, srvC, 2, 10*time.Second) 7181 7182 ///////////////////////////////////////////////////////////////////////////////// 7183 // // 7184 // Switch all the certs from server A, all OCSP monitors should be restarted // 7185 // so it should have new staples. // 7186 // // 7187 ///////////////////////////////////////////////////////////////////////////////// 7188 srvConfA = ` 7189 host: "127.0.0.1" 7190 port: -1 7191 7192 server_name: "AAA" 7193 7194 ocsp { mode = always } 7195 7196 system_account = sys 7197 accounts { 7198 sys { users = [{ user: sys, pass: sys }]} 7199 guest { users = [{ user: guest, pass: guest }]} 7200 } 7201 no_auth_user = guest 7202 7203 store_dir: '%s' 7204 gateway { 7205 name: A 7206 host: "127.0.0.1" 7207 port: -1 7208 advertise: "127.0.0.1" 7209 7210 tls { 7211 cert_file: "../test/configs/certs/ocsp/server-status-request-url-08-cert.pem" 7212 key_file: "../test/configs/certs/ocsp/server-status-request-url-08-key.pem" 7213 ca_file: "../test/configs/certs/ocsp/ca-cert.pem" 7214 timeout: 5 7215 7216 } 7217 } 7218 ` 7219 7220 srvConfA = fmt.Sprintf(srvConfA, storeDirA) 7221 if err := os.WriteFile(sconfA, []byte(srvConfA), 0666); err != nil { 7222 t.Fatalf("Error writing config: %v", err) 7223 } 7224 if err := srvA.Reload(); err != nil { 7225 t.Fatal(err) 7226 } 7227 waitForOutboundGateways(t, srvA, 2, 5*time.Second) 7228 waitForOutboundGateways(t, srvB, 2, 5*time.Second) 7229 waitForOutboundGateways(t, srvC, 2, 5*time.Second) 7230 7231 // Now clients connect to C can communicate with B and A. 7232 _, err = cC.Request("foo", nil, 2*time.Second) 7233 require_NoError(t, err) 7234 7235 _, err = cC.Request("bar", nil, 2*time.Second) 7236 require_NoError(t, err) 7237 7238 // Reload and disconnect very fast trying to produce the race. 7239 ctx, cancel = context.WithTimeout(context.Background(), 15*time.Second) 7240 defer cancel() 7241 7242 // Swap logger from server to capture the missing peer log. 7243 lA := &testMissingOCSPStapleLogger{ch: make(chan string, 30)} 7244 srvA.SetLogger(lA, false, false) 7245 7246 lB := &testMissingOCSPStapleLogger{ch: make(chan string, 30)} 7247 srvB.SetLogger(lB, false, false) 7248 7249 lC := &testMissingOCSPStapleLogger{ch: make(chan string, 30)} 7250 srvC.SetLogger(lC, false, false) 7251 7252 // Start with a reload from the last server that connected directly to A. 7253 err = srvC.Reload() 7254 require_NoError(t, err) 7255 7256 // Stress reconnections and reloading servers without getting 7257 // missing OCSP peer staple errors. 7258 var wg sync.WaitGroup 7259 7260 wg.Add(1) 7261 go func() { 7262 for range time.NewTicker(500 * time.Millisecond).C { 7263 select { 7264 case <-ctx.Done(): 7265 wg.Done() 7266 return 7267 default: 7268 } 7269 disconnectInboundGateways(srvA) 7270 } 7271 }() 7272 7273 wg.Add(1) 7274 go func() { 7275 for range time.NewTicker(500 * time.Millisecond).C { 7276 select { 7277 case <-ctx.Done(): 7278 wg.Done() 7279 return 7280 default: 7281 } 7282 disconnectInboundGateways(srvB) 7283 } 7284 }() 7285 7286 wg.Add(1) 7287 go func() { 7288 for range time.NewTicker(500 * time.Millisecond).C { 7289 select { 7290 case <-ctx.Done(): 7291 wg.Done() 7292 return 7293 default: 7294 } 7295 disconnectInboundGateways(srvC) 7296 } 7297 }() 7298 7299 wg.Add(1) 7300 go func() { 7301 for range time.NewTicker(700 * time.Millisecond).C { 7302 select { 7303 case <-ctx.Done(): 7304 wg.Done() 7305 return 7306 default: 7307 } 7308 srvC.Reload() 7309 } 7310 }() 7311 7312 wg.Add(1) 7313 go func() { 7314 for range time.NewTicker(800 * time.Millisecond).C { 7315 select { 7316 case <-ctx.Done(): 7317 wg.Done() 7318 return 7319 default: 7320 } 7321 srvB.Reload() 7322 } 7323 }() 7324 7325 wg.Add(1) 7326 go func() { 7327 for range time.NewTicker(900 * time.Millisecond).C { 7328 select { 7329 case <-ctx.Done(): 7330 wg.Done() 7331 return 7332 default: 7333 } 7334 srvA.Reload() 7335 } 7336 }() 7337 7338 select { 7339 case <-ctx.Done(): 7340 case msg := <-lA.ch: 7341 t.Fatalf("Server A: Got OCSP Staple error: %v", msg) 7342 case msg := <-lB.ch: 7343 t.Fatalf("Server B: Got OCSP Staple error: %v", msg) 7344 case msg := <-lC.ch: 7345 t.Fatalf("Server C: Got OCSP Staple error: %v", msg) 7346 } 7347 waitForOutboundGateways(t, srvA, 2, 5*time.Second) 7348 waitForOutboundGateways(t, srvB, 2, 5*time.Second) 7349 waitForOutboundGateways(t, srvC, 2, 5*time.Second) 7350 wg.Wait() 7351 }