get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/server/norace_test.go (about)

     1  // Copyright 2018-2024 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  //go:build !race && !skip_no_race_tests
    15  // +build !race,!skip_no_race_tests
    16  
    17  package server
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"compress/gzip"
    23  	"context"
    24  	"encoding/binary"
    25  	"encoding/json"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"math"
    30  	"math/rand"
    31  	"net"
    32  	"net/http"
    33  	"net/url"
    34  	"path/filepath"
    35  	"reflect"
    36  	"runtime"
    37  	"runtime/debug"
    38  	"strconv"
    39  	"strings"
    40  	"sync"
    41  	"sync/atomic"
    42  	"testing"
    43  	"time"
    44  
    45  	"crypto/hmac"
    46  	crand "crypto/rand"
    47  	"crypto/sha256"
    48  
    49  	"get.pme.sh/pnats/server/avl"
    50  	"github.com/klauspost/compress/s2"
    51  	"github.com/nats-io/jwt/v2"
    52  	"github.com/nats-io/nats.go"
    53  	"github.com/nats-io/nkeys"
    54  	"github.com/nats-io/nuid"
    55  )
    56  
    57  // IMPORTANT: Tests in this file are not executed when running with the -race flag.
    58  //            The test name should be prefixed with TestNoRace so we can run only
    59  //            those tests: go test -run=TestNoRace ...
    60  
    61  func TestNoRaceAvoidSlowConsumerBigMessages(t *testing.T) {
    62  	opts := DefaultOptions() // Use defaults to make sure they avoid pending slow consumer.
    63  	opts.NoSystemAccount = true
    64  	s := RunServer(opts)
    65  	defer s.Shutdown()
    66  
    67  	nc1, err := nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port))
    68  	if err != nil {
    69  		t.Fatalf("Error on connect: %v", err)
    70  	}
    71  	defer nc1.Close()
    72  
    73  	nc2, err := nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port))
    74  	if err != nil {
    75  		t.Fatalf("Error on connect: %v", err)
    76  	}
    77  	defer nc2.Close()
    78  
    79  	data := make([]byte, 1024*1024) // 1MB payload
    80  	crand.Read(data)
    81  
    82  	expected := int32(500)
    83  	received := int32(0)
    84  
    85  	done := make(chan bool)
    86  
    87  	// Create Subscription.
    88  	nc1.Subscribe("slow.consumer", func(m *nats.Msg) {
    89  		// Just eat it so that we are not measuring
    90  		// code time, just delivery.
    91  		atomic.AddInt32(&received, 1)
    92  		if received >= expected {
    93  			done <- true
    94  		}
    95  	})
    96  
    97  	// Create Error handler
    98  	nc1.SetErrorHandler(func(c *nats.Conn, s *nats.Subscription, err error) {
    99  		t.Fatalf("Received an error on the subscription's connection: %v\n", err)
   100  	})
   101  
   102  	nc1.Flush()
   103  
   104  	for i := 0; i < int(expected); i++ {
   105  		nc2.Publish("slow.consumer", data)
   106  	}
   107  	nc2.Flush()
   108  
   109  	select {
   110  	case <-done:
   111  		return
   112  	case <-time.After(10 * time.Second):
   113  		r := atomic.LoadInt32(&received)
   114  		if s.NumSlowConsumers() > 0 {
   115  			t.Fatalf("Did not receive all large messages due to slow consumer status: %d of %d", r, expected)
   116  		}
   117  		t.Fatalf("Failed to receive all large messages: %d of %d\n", r, expected)
   118  	}
   119  }
   120  
   121  func TestNoRaceRoutedQueueAutoUnsubscribe(t *testing.T) {
   122  	optsA, err := ProcessConfigFile("./configs/seed.conf")
   123  	require_NoError(t, err)
   124  	optsA.NoSigs, optsA.NoLog = true, true
   125  	optsA.NoSystemAccount = true
   126  	srvA := RunServer(optsA)
   127  	defer srvA.Shutdown()
   128  
   129  	srvARouteURL := fmt.Sprintf("nats://%s:%d", optsA.Cluster.Host, srvA.ClusterAddr().Port)
   130  	optsB := nextServerOpts(optsA)
   131  	optsB.Routes = RoutesFromStr(srvARouteURL)
   132  
   133  	srvB := RunServer(optsB)
   134  	defer srvB.Shutdown()
   135  
   136  	// Wait for these 2 to connect to each other
   137  	checkClusterFormed(t, srvA, srvB)
   138  
   139  	// Have a client connection to each server
   140  	ncA, err := nats.Connect(fmt.Sprintf("nats://%s:%d", optsA.Host, optsA.Port))
   141  	if err != nil {
   142  		t.Fatalf("Error on connect: %v", err)
   143  	}
   144  	defer ncA.Close()
   145  
   146  	ncB, err := nats.Connect(fmt.Sprintf("nats://%s:%d", optsB.Host, optsB.Port))
   147  	if err != nil {
   148  		t.Fatalf("Error on connect: %v", err)
   149  	}
   150  	defer ncB.Close()
   151  
   152  	rbar := int32(0)
   153  	barCb := func(m *nats.Msg) {
   154  		atomic.AddInt32(&rbar, 1)
   155  	}
   156  	rbaz := int32(0)
   157  	bazCb := func(m *nats.Msg) {
   158  		atomic.AddInt32(&rbaz, 1)
   159  	}
   160  
   161  	// Create 125 queue subs with auto-unsubscribe to each server for
   162  	// group bar and group baz. So 250 total per queue group.
   163  	cons := []*nats.Conn{ncA, ncB}
   164  	for _, c := range cons {
   165  		for i := 0; i < 100; i++ {
   166  			qsub, err := c.QueueSubscribe("foo", "bar", barCb)
   167  			if err != nil {
   168  				t.Fatalf("Error on subscribe: %v", err)
   169  			}
   170  			if err := qsub.AutoUnsubscribe(1); err != nil {
   171  				t.Fatalf("Error on auto-unsubscribe: %v", err)
   172  			}
   173  			qsub, err = c.QueueSubscribe("foo", "baz", bazCb)
   174  			if err != nil {
   175  				t.Fatalf("Error on subscribe: %v", err)
   176  			}
   177  			if err := qsub.AutoUnsubscribe(1); err != nil {
   178  				t.Fatalf("Error on auto-unsubscribe: %v", err)
   179  			}
   180  		}
   181  		c.Subscribe("TEST.COMPLETE", func(m *nats.Msg) {})
   182  	}
   183  
   184  	// We coelasce now so for each server we will have all local (200) plus
   185  	// two from the remote side for each queue group. We also create one more
   186  	// and will wait til each server has 204 subscriptions, that will make sure
   187  	// that we have everything setup.
   188  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
   189  		subsA := srvA.NumSubscriptions()
   190  		subsB := srvB.NumSubscriptions()
   191  		if subsA != 204 || subsB != 204 {
   192  			return fmt.Errorf("Not all subs processed yet: %d and %d", subsA, subsB)
   193  		}
   194  		return nil
   195  	})
   196  
   197  	expected := int32(200)
   198  	// Now send messages from each server
   199  	for i := int32(0); i < expected; i++ {
   200  		c := cons[i%2]
   201  		c.Publish("foo", []byte("Don't Drop Me!"))
   202  	}
   203  	for _, c := range cons {
   204  		c.Flush()
   205  	}
   206  
   207  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
   208  		nbar := atomic.LoadInt32(&rbar)
   209  		nbaz := atomic.LoadInt32(&rbaz)
   210  		if nbar == expected && nbaz == expected {
   211  			return nil
   212  		}
   213  		return fmt.Errorf("Did not receive all %d queue messages, received %d for 'bar' and %d for 'baz'",
   214  			expected, atomic.LoadInt32(&rbar), atomic.LoadInt32(&rbaz))
   215  	})
   216  }
   217  
   218  func TestNoRaceClosedSlowConsumerWriteDeadline(t *testing.T) {
   219  	opts := DefaultOptions()
   220  	opts.NoSystemAccount = true
   221  	opts.WriteDeadline = 10 * time.Millisecond // Make very small to trip.
   222  	opts.MaxPending = 500 * 1024 * 1024        // Set high so it will not trip here.
   223  	s := RunServer(opts)
   224  	defer s.Shutdown()
   225  
   226  	c, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", opts.Host, opts.Port), 3*time.Second)
   227  	if err != nil {
   228  		t.Fatalf("Error on connect: %v", err)
   229  	}
   230  	defer c.Close()
   231  	if _, err := c.Write([]byte("CONNECT {}\r\nPING\r\nSUB foo 1\r\n")); err != nil {
   232  		t.Fatalf("Error sending protocols to server: %v", err)
   233  	}
   234  	// Reduce socket buffer to increase reliability of data backing up in the server destined
   235  	// for our subscribed client.
   236  	c.(*net.TCPConn).SetReadBuffer(128)
   237  
   238  	url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)
   239  	sender, err := nats.Connect(url)
   240  	if err != nil {
   241  		t.Fatalf("Error on connect: %v", err)
   242  	}
   243  	defer sender.Close()
   244  
   245  	payload := make([]byte, 1024*1024)
   246  	for i := 0; i < 100; i++ {
   247  		if err := sender.Publish("foo", payload); err != nil {
   248  			t.Fatalf("Error on publish: %v", err)
   249  		}
   250  	}
   251  
   252  	// Flush sender connection to ensure that all data has been sent.
   253  	if err := sender.Flush(); err != nil {
   254  		t.Fatalf("Error on flush: %v", err)
   255  	}
   256  
   257  	// At this point server should have closed connection c.
   258  	checkClosedConns(t, s, 1, 2*time.Second)
   259  	conns := s.closedClients()
   260  	if lc := len(conns); lc != 1 {
   261  		t.Fatalf("len(conns) expected to be %d, got %d\n", 1, lc)
   262  	}
   263  	checkReason(t, conns[0].Reason, SlowConsumerWriteDeadline)
   264  }
   265  
   266  func TestNoRaceClosedSlowConsumerPendingBytes(t *testing.T) {
   267  	opts := DefaultOptions()
   268  	opts.NoSystemAccount = true
   269  	opts.WriteDeadline = 30 * time.Second // Wait for long time so write deadline does not trigger slow consumer.
   270  	opts.MaxPending = 1 * 1024 * 1024     // Set to low value (1MB) to allow SC to trip.
   271  	s := RunServer(opts)
   272  	defer s.Shutdown()
   273  
   274  	c, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", opts.Host, opts.Port), 3*time.Second)
   275  	if err != nil {
   276  		t.Fatalf("Error on connect: %v", err)
   277  	}
   278  	defer c.Close()
   279  	if _, err := c.Write([]byte("CONNECT {}\r\nPING\r\nSUB foo 1\r\n")); err != nil {
   280  		t.Fatalf("Error sending protocols to server: %v", err)
   281  	}
   282  	// Reduce socket buffer to increase reliability of data backing up in the server destined
   283  	// for our subscribed client.
   284  	c.(*net.TCPConn).SetReadBuffer(128)
   285  
   286  	url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)
   287  	sender, err := nats.Connect(url)
   288  	if err != nil {
   289  		t.Fatalf("Error on connect: %v", err)
   290  	}
   291  	defer sender.Close()
   292  
   293  	payload := make([]byte, 1024*1024)
   294  	for i := 0; i < 100; i++ {
   295  		if err := sender.Publish("foo", payload); err != nil {
   296  			t.Fatalf("Error on publish: %v", err)
   297  		}
   298  	}
   299  
   300  	// Flush sender connection to ensure that all data has been sent.
   301  	if err := sender.Flush(); err != nil {
   302  		t.Fatalf("Error on flush: %v", err)
   303  	}
   304  
   305  	// At this point server should have closed connection c.
   306  	checkClosedConns(t, s, 1, 2*time.Second)
   307  	conns := s.closedClients()
   308  	if lc := len(conns); lc != 1 {
   309  		t.Fatalf("len(conns) expected to be %d, got %d\n", 1, lc)
   310  	}
   311  	checkReason(t, conns[0].Reason, SlowConsumerPendingBytes)
   312  }
   313  
   314  func TestNoRaceSlowConsumerPendingBytes(t *testing.T) {
   315  	opts := DefaultOptions()
   316  	opts.NoSystemAccount = true
   317  	opts.WriteDeadline = 30 * time.Second // Wait for long time so write deadline does not trigger slow consumer.
   318  	opts.MaxPending = 1 * 1024 * 1024     // Set to low value (1MB) to allow SC to trip.
   319  	s := RunServer(opts)
   320  	defer s.Shutdown()
   321  
   322  	c, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", opts.Host, opts.Port), 3*time.Second)
   323  	if err != nil {
   324  		t.Fatalf("Error on connect: %v", err)
   325  	}
   326  	defer c.Close()
   327  	if _, err := c.Write([]byte("CONNECT {}\r\nPING\r\nSUB foo 1\r\n")); err != nil {
   328  		t.Fatalf("Error sending protocols to server: %v", err)
   329  	}
   330  	// Reduce socket buffer to increase reliability of data backing up in the server destined
   331  	// for our subscribed client.
   332  	c.(*net.TCPConn).SetReadBuffer(128)
   333  
   334  	url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)
   335  	sender, err := nats.Connect(url)
   336  	if err != nil {
   337  		t.Fatalf("Error on connect: %v", err)
   338  	}
   339  	defer sender.Close()
   340  
   341  	payload := make([]byte, 1024*1024)
   342  	for i := 0; i < 100; i++ {
   343  		if err := sender.Publish("foo", payload); err != nil {
   344  			t.Fatalf("Error on publish: %v", err)
   345  		}
   346  	}
   347  
   348  	// Flush sender connection to ensure that all data has been sent.
   349  	if err := sender.Flush(); err != nil {
   350  		t.Fatalf("Error on flush: %v", err)
   351  	}
   352  
   353  	// At this point server should have closed connection c.
   354  
   355  	// On certain platforms, it may take more than one call before
   356  	// getting the error.
   357  	for i := 0; i < 100; i++ {
   358  		if _, err := c.Write([]byte("PUB bar 5\r\nhello\r\n")); err != nil {
   359  			// ok
   360  			return
   361  		}
   362  	}
   363  	t.Fatal("Connection should have been closed")
   364  }
   365  
   366  func TestNoRaceGatewayNoMissingReplies(t *testing.T) {
   367  	// This test will have following setup:
   368  	//
   369  	// responder1		         requestor
   370  	//    |                          |
   371  	//    v                          v
   372  	//   [A1]<-------gw------------[B1]
   373  	//    |  \                      |
   374  	//    |   \______gw__________   | route
   375  	//    |                     _\| |
   376  	//   [  ]--------gw----------->[  ]
   377  	//   [A2]<-------gw------------[B2]
   378  	//   [  ]                      [  ]
   379  	//    ^
   380  	//    |
   381  	// responder2
   382  	//
   383  	// There is a possible race that when the requestor creates
   384  	// a subscription on the reply subject, the subject interest
   385  	// being sent from the inbound gateway, and B1 having none,
   386  	// the SUB first goes to B2 before being sent to A1 from
   387  	// B2's inbound GW. But the request can go from B1 to A1
   388  	// right away and the responder1 connecting to A1 may send
   389  	// back the reply before the interest on the reply makes it
   390  	// to A1 (from B2).
   391  	// This test will also verify that if the responder is instead
   392  	// connected to A2, the reply is properly received by requestor
   393  	// on B1.
   394  
   395  	// For this test we want to be in interestOnly mode, so
   396  	// make it happen quickly
   397  	gatewayMaxRUnsubBeforeSwitch = 1
   398  	defer func() { gatewayMaxRUnsubBeforeSwitch = defaultGatewayMaxRUnsubBeforeSwitch }()
   399  
   400  	// Start with setting up A2 and B2.
   401  	ob2 := testDefaultOptionsForGateway("B")
   402  	sb2 := runGatewayServer(ob2)
   403  	defer sb2.Shutdown()
   404  
   405  	oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb2)
   406  	sa2 := runGatewayServer(oa2)
   407  	defer sa2.Shutdown()
   408  
   409  	waitForOutboundGateways(t, sa2, 1, time.Second)
   410  	waitForInboundGateways(t, sa2, 1, time.Second)
   411  	waitForOutboundGateways(t, sb2, 1, time.Second)
   412  	waitForInboundGateways(t, sb2, 1, time.Second)
   413  
   414  	// Now start A1 which will connect to B2
   415  	oa1 := testGatewayOptionsFromToWithServers(t, "A", "B", sb2)
   416  	oa1.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa2.Cluster.Host, oa2.Cluster.Port))
   417  	sa1 := runGatewayServer(oa1)
   418  	defer sa1.Shutdown()
   419  
   420  	waitForOutboundGateways(t, sa1, 1, time.Second)
   421  	waitForInboundGateways(t, sb2, 2, time.Second)
   422  
   423  	checkClusterFormed(t, sa1, sa2)
   424  
   425  	// Finally, start B1 that will connect to A1.
   426  	ob1 := testGatewayOptionsFromToWithServers(t, "B", "A", sa1)
   427  	ob1.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", ob2.Cluster.Host, ob2.Cluster.Port))
   428  	sb1 := runGatewayServer(ob1)
   429  	defer sb1.Shutdown()
   430  
   431  	// Check that we have the outbound gateway from B1 to A1
   432  	checkFor(t, 3*time.Second, 15*time.Millisecond, func() error {
   433  		c := sb1.getOutboundGatewayConnection("A")
   434  		if c == nil {
   435  			return fmt.Errorf("Outbound connection to A not created yet")
   436  		}
   437  		c.mu.Lock()
   438  		name := c.opts.Name
   439  		nc := c.nc
   440  		c.mu.Unlock()
   441  		if name != sa1.ID() {
   442  			// Force a disconnect
   443  			nc.Close()
   444  			return fmt.Errorf("Was unable to have B1 connect to A1")
   445  		}
   446  		return nil
   447  	})
   448  
   449  	waitForInboundGateways(t, sa1, 1, time.Second)
   450  	checkClusterFormed(t, sb1, sb2)
   451  
   452  	a1URL := fmt.Sprintf("nats://%s:%d", oa1.Host, oa1.Port)
   453  	a2URL := fmt.Sprintf("nats://%s:%d", oa2.Host, oa2.Port)
   454  	b1URL := fmt.Sprintf("nats://%s:%d", ob1.Host, ob1.Port)
   455  	b2URL := fmt.Sprintf("nats://%s:%d", ob2.Host, ob2.Port)
   456  
   457  	ncb1 := natsConnect(t, b1URL)
   458  	defer ncb1.Close()
   459  
   460  	ncb2 := natsConnect(t, b2URL)
   461  	defer ncb2.Close()
   462  
   463  	natsSubSync(t, ncb1, "just.a.sub")
   464  	natsSubSync(t, ncb2, "just.a.sub")
   465  	checkExpectedSubs(t, 2, sb1, sb2)
   466  
   467  	// For this test, we want A to be checking B's interest in order
   468  	// to send messages (which would cause replies to be dropped if
   469  	// there is no interest registered on A). So from A servers,
   470  	// send to various subjects and cause B's to switch to interestOnly
   471  	// mode.
   472  	nca1 := natsConnect(t, a1URL)
   473  	defer nca1.Close()
   474  	for i := 0; i < 10; i++ {
   475  		natsPub(t, nca1, fmt.Sprintf("reject.%d", i), []byte("hello"))
   476  	}
   477  	nca2 := natsConnect(t, a2URL)
   478  	defer nca2.Close()
   479  	for i := 0; i < 10; i++ {
   480  		natsPub(t, nca2, fmt.Sprintf("reject.%d", i), []byte("hello"))
   481  	}
   482  
   483  	checkSwitchedMode := func(t *testing.T, s *Server) {
   484  		t.Helper()
   485  		checkFor(t, 2*time.Second, 15*time.Millisecond, func() error {
   486  			var switchedMode bool
   487  			c := s.getOutboundGatewayConnection("B")
   488  			ei, _ := c.gw.outsim.Load(globalAccountName)
   489  			if ei != nil {
   490  				e := ei.(*outsie)
   491  				e.RLock()
   492  				switchedMode = e.ni == nil && e.mode == InterestOnly
   493  				e.RUnlock()
   494  			}
   495  			if !switchedMode {
   496  				return fmt.Errorf("Still not switched mode")
   497  			}
   498  			return nil
   499  		})
   500  	}
   501  	checkSwitchedMode(t, sa1)
   502  	checkSwitchedMode(t, sa2)
   503  
   504  	// Setup a subscriber on _INBOX.> on each of A's servers.
   505  	total := 1000
   506  	expected := int32(total)
   507  	rcvOnA := int32(0)
   508  	qrcvOnA := int32(0)
   509  	natsSub(t, nca1, "myreply.>", func(_ *nats.Msg) {
   510  		atomic.AddInt32(&rcvOnA, 1)
   511  	})
   512  	natsQueueSub(t, nca2, "myreply.>", "bar", func(_ *nats.Msg) {
   513  		atomic.AddInt32(&qrcvOnA, 1)
   514  	})
   515  	checkExpectedSubs(t, 2, sa1, sa2)
   516  
   517  	// Ok.. so now we will run the actual test where we
   518  	// create a responder on A1 and make sure that every
   519  	// single request from B1 gets the reply. Will repeat
   520  	// test with responder connected to A2.
   521  	sendReqs := func(t *testing.T, subConn *nats.Conn) {
   522  		t.Helper()
   523  		responder := natsSub(t, subConn, "foo", func(m *nats.Msg) {
   524  			m.Respond([]byte("reply"))
   525  		})
   526  		natsFlush(t, subConn)
   527  		checkExpectedSubs(t, 3, sa1, sa2)
   528  
   529  		// We are not going to use Request() because this sets
   530  		// a wildcard subscription on an INBOX and less likely
   531  		// to produce the race. Instead we will explicitly set
   532  		// the subscription on the reply subject and create one
   533  		// per request.
   534  		for i := 0; i < total/2; i++ {
   535  			reply := fmt.Sprintf("myreply.%d", i)
   536  			replySub := natsQueueSubSync(t, ncb1, reply, "bar")
   537  			natsFlush(t, ncb1)
   538  
   539  			// Let's make sure we have interest on B2.
   540  			if r := sb2.globalAccount().sl.Match(reply); len(r.qsubs) == 0 {
   541  				checkFor(t, time.Second, time.Millisecond, func() error {
   542  					if r := sb2.globalAccount().sl.Match(reply); len(r.qsubs) == 0 {
   543  						return fmt.Errorf("B still not registered interest on %s", reply)
   544  					}
   545  					return nil
   546  				})
   547  			}
   548  			natsPubReq(t, ncb1, "foo", reply, []byte("request"))
   549  			if _, err := replySub.NextMsg(time.Second); err != nil {
   550  				t.Fatalf("Did not receive reply: %v", err)
   551  			}
   552  			natsUnsub(t, replySub)
   553  		}
   554  
   555  		responder.Unsubscribe()
   556  		natsFlush(t, subConn)
   557  		checkExpectedSubs(t, 2, sa1, sa2)
   558  	}
   559  	sendReqs(t, nca1)
   560  	sendReqs(t, nca2)
   561  
   562  	checkFor(t, time.Second, 15*time.Millisecond, func() error {
   563  		if n := atomic.LoadInt32(&rcvOnA); n != expected {
   564  			return fmt.Errorf("Subs on A expected to get %v replies, got %v", expected, n)
   565  		}
   566  		return nil
   567  	})
   568  
   569  	// We should not have received a single message on the queue sub
   570  	// on cluster A because messages will have been delivered to
   571  	// the member on cluster B.
   572  	if n := atomic.LoadInt32(&qrcvOnA); n != 0 {
   573  		t.Fatalf("Queue sub on A should not have received message, got %v", n)
   574  	}
   575  }
   576  
   577  func TestNoRaceRouteMemUsage(t *testing.T) {
   578  	oa := DefaultOptions()
   579  	sa := RunServer(oa)
   580  	defer sa.Shutdown()
   581  
   582  	ob := DefaultOptions()
   583  	ob.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa.Cluster.Host, oa.Cluster.Port))
   584  	sb := RunServer(ob)
   585  	defer sb.Shutdown()
   586  
   587  	checkClusterFormed(t, sa, sb)
   588  
   589  	responder := natsConnect(t, fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port))
   590  	defer responder.Close()
   591  	for i := 0; i < 10; i++ {
   592  		natsSub(t, responder, "foo", func(m *nats.Msg) {
   593  			m.Respond(m.Data)
   594  		})
   595  	}
   596  	natsFlush(t, responder)
   597  
   598  	payload := make([]byte, 50*1024)
   599  
   600  	bURL := fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)
   601  
   602  	// Capture mem usage
   603  	mem := runtime.MemStats{}
   604  	runtime.ReadMemStats(&mem)
   605  	inUseBefore := mem.HeapInuse
   606  
   607  	for i := 0; i < 100; i++ {
   608  		requestor := natsConnect(t, bURL)
   609  		// Don't use a defer here otherwise that will make the memory check fail!
   610  		// We are closing the connection just after these few instructions that
   611  		// are not calling t.Fatal() anyway.
   612  		inbox := nats.NewInbox()
   613  		sub := natsSubSync(t, requestor, inbox)
   614  		natsPubReq(t, requestor, "foo", inbox, payload)
   615  		for j := 0; j < 10; j++ {
   616  			natsNexMsg(t, sub, time.Second)
   617  		}
   618  		requestor.Close()
   619  	}
   620  
   621  	runtime.GC()
   622  	debug.FreeOSMemory()
   623  	runtime.ReadMemStats(&mem)
   624  	inUseNow := mem.HeapInuse
   625  	if inUseNow > 3*inUseBefore {
   626  		t.Fatalf("Heap in-use before was %v, now %v: too high", inUseBefore, inUseNow)
   627  	}
   628  }
   629  
   630  func TestNoRaceRouteCache(t *testing.T) {
   631  	maxPerAccountCacheSize = 20
   632  	prunePerAccountCacheSize = 5
   633  	closedSubsCheckInterval = 250 * time.Millisecond
   634  
   635  	defer func() {
   636  		maxPerAccountCacheSize = defaultMaxPerAccountCacheSize
   637  		prunePerAccountCacheSize = defaultPrunePerAccountCacheSize
   638  		closedSubsCheckInterval = defaultClosedSubsCheckInterval
   639  	}()
   640  
   641  	for _, test := range []struct {
   642  		name     string
   643  		useQueue bool
   644  	}{
   645  		{"plain_sub", false},
   646  		{"queue_sub", true},
   647  	} {
   648  		t.Run(test.name, func(t *testing.T) {
   649  
   650  			oa := DefaultOptions()
   651  			oa.NoSystemAccount = true
   652  			oa.Cluster.PoolSize = -1
   653  			sa := RunServer(oa)
   654  			defer sa.Shutdown()
   655  
   656  			ob := DefaultOptions()
   657  			ob.NoSystemAccount = true
   658  			ob.Cluster.PoolSize = -1
   659  			ob.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa.Cluster.Host, oa.Cluster.Port))
   660  			sb := RunServer(ob)
   661  			defer sb.Shutdown()
   662  
   663  			checkClusterFormed(t, sa, sb)
   664  
   665  			responder := natsConnect(t, fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port))
   666  			defer responder.Close()
   667  			natsSub(t, responder, "foo", func(m *nats.Msg) {
   668  				m.Respond(m.Data)
   669  			})
   670  			natsFlush(t, responder)
   671  
   672  			checkExpectedSubs(t, 1, sa)
   673  			checkExpectedSubs(t, 1, sb)
   674  
   675  			bURL := fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)
   676  			requestor := natsConnect(t, bURL)
   677  			defer requestor.Close()
   678  
   679  			ch := make(chan struct{}, 1)
   680  			cb := func(_ *nats.Msg) {
   681  				select {
   682  				case ch <- struct{}{}:
   683  				default:
   684  				}
   685  			}
   686  
   687  			sendReqs := func(t *testing.T, nc *nats.Conn, count int, unsub bool) {
   688  				t.Helper()
   689  				for i := 0; i < count; i++ {
   690  					inbox := nats.NewInbox()
   691  					var sub *nats.Subscription
   692  					if test.useQueue {
   693  						sub = natsQueueSub(t, nc, inbox, "queue", cb)
   694  					} else {
   695  						sub = natsSub(t, nc, inbox, cb)
   696  					}
   697  					natsPubReq(t, nc, "foo", inbox, []byte("hello"))
   698  					select {
   699  					case <-ch:
   700  					case <-time.After(time.Second):
   701  						t.Fatalf("Failed to get reply")
   702  					}
   703  					if unsub {
   704  						natsUnsub(t, sub)
   705  					}
   706  				}
   707  			}
   708  			sendReqs(t, requestor, maxPerAccountCacheSize+1, true)
   709  
   710  			var route *client
   711  			sb.mu.Lock()
   712  			route = getFirstRoute(sb)
   713  			sb.mu.Unlock()
   714  
   715  			checkExpected := func(t *testing.T, expected int) {
   716  				t.Helper()
   717  				checkFor(t, 2*time.Second, 15*time.Millisecond, func() error {
   718  					route.mu.Lock()
   719  					n := len(route.in.pacache)
   720  					route.mu.Unlock()
   721  					if n != expected {
   722  						return fmt.Errorf("Expected %v subs in the cache, got %v", expected, n)
   723  					}
   724  					return nil
   725  				})
   726  			}
   727  			checkExpected(t, (maxPerAccountCacheSize+1)-(prunePerAccountCacheSize+1))
   728  
   729  			// Wait for more than the orphan check
   730  			time.Sleep(2 * closedSubsCheckInterval)
   731  
   732  			// Add a new subs up to point where new prune would occur
   733  			sendReqs(t, requestor, prunePerAccountCacheSize+1, false)
   734  
   735  			// Now closed subs should have been removed, so expected
   736  			// subs in the cache should be the new ones.
   737  			checkExpected(t, prunePerAccountCacheSize+1)
   738  
   739  			// Now try wil implicit unsubscribe (due to connection close)
   740  			sendReqs(t, requestor, maxPerAccountCacheSize+1, false)
   741  			requestor.Close()
   742  
   743  			checkExpected(t, maxPerAccountCacheSize-prunePerAccountCacheSize)
   744  
   745  			// Wait for more than the orphan check
   746  			time.Sleep(2 * closedSubsCheckInterval)
   747  
   748  			// Now create new connection and send prunePerAccountCacheSize+1
   749  			// and that should cause all subs from previous connection to be
   750  			// removed from cache
   751  			requestor = natsConnect(t, bURL)
   752  			defer requestor.Close()
   753  
   754  			sendReqs(t, requestor, prunePerAccountCacheSize+1, false)
   755  			checkExpected(t, prunePerAccountCacheSize+1)
   756  		})
   757  	}
   758  }
   759  
   760  func TestNoRaceFetchAccountDoesNotRegisterAccountTwice(t *testing.T) {
   761  	sa, oa, sb, ob, _ := runTrustedGateways(t)
   762  	defer sa.Shutdown()
   763  	defer sb.Shutdown()
   764  
   765  	// Let's create a user account.
   766  	okp, _ := nkeys.FromSeed(oSeed)
   767  	akp, _ := nkeys.CreateAccount()
   768  	pub, _ := akp.PublicKey()
   769  	nac := jwt.NewAccountClaims(pub)
   770  	jwt, _ := nac.Encode(okp)
   771  	userAcc := pub
   772  
   773  	// Replace B's account resolver with one that introduces
   774  	// delay during the Fetch()
   775  	sac := &slowAccResolver{AccountResolver: sb.AccountResolver()}
   776  	sb.SetAccountResolver(sac)
   777  
   778  	// Add the account in sa and sb
   779  	addAccountToMemResolver(sa, userAcc, jwt)
   780  	addAccountToMemResolver(sb, userAcc, jwt)
   781  
   782  	// Tell the slow account resolver which account to slow down
   783  	sac.Lock()
   784  	sac.acc = userAcc
   785  	sac.Unlock()
   786  
   787  	urlA := fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port)
   788  	urlB := fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)
   789  
   790  	nca, err := nats.Connect(urlA, createUserCreds(t, sa, akp))
   791  	if err != nil {
   792  		t.Fatalf("Error connecting to A: %v", err)
   793  	}
   794  	defer nca.Close()
   795  
   796  	// Since there is an optimistic send, this message will go to B
   797  	// and on processing this message, B will lookup/fetch this
   798  	// account, which can produce race with the fetch of this
   799  	// account from A's system account that sent a notification
   800  	// about this account, or with the client connect just after
   801  	// that.
   802  	nca.Publish("foo", []byte("hello"))
   803  
   804  	// Now connect and create a subscription on B
   805  	ncb, err := nats.Connect(urlB, createUserCreds(t, sb, akp))
   806  	if err != nil {
   807  		t.Fatalf("Error connecting to A: %v", err)
   808  	}
   809  	defer ncb.Close()
   810  	sub, err := ncb.SubscribeSync("foo")
   811  	if err != nil {
   812  		t.Fatalf("Error on subscribe: %v", err)
   813  	}
   814  	ncb.Flush()
   815  
   816  	// Now send messages from A and B should ultimately start to receive
   817  	// them (once the subscription has been correctly registered)
   818  	ok := false
   819  	for i := 0; i < 10; i++ {
   820  		nca.Publish("foo", []byte("hello"))
   821  		if _, err := sub.NextMsg(100 * time.Millisecond); err != nil {
   822  			continue
   823  		}
   824  		ok = true
   825  		break
   826  	}
   827  	if !ok {
   828  		t.Fatalf("B should be able to receive messages")
   829  	}
   830  
   831  	checkTmpAccounts := func(t *testing.T, s *Server) {
   832  		t.Helper()
   833  		empty := true
   834  		s.tmpAccounts.Range(func(_, _ interface{}) bool {
   835  			empty = false
   836  			return false
   837  		})
   838  		if !empty {
   839  			t.Fatalf("tmpAccounts is not empty")
   840  		}
   841  	}
   842  	checkTmpAccounts(t, sa)
   843  	checkTmpAccounts(t, sb)
   844  }
   845  
   846  func TestNoRaceWriteDeadline(t *testing.T) {
   847  	opts := DefaultOptions()
   848  	opts.NoSystemAccount = true
   849  	opts.WriteDeadline = 30 * time.Millisecond
   850  	s := RunServer(opts)
   851  	defer s.Shutdown()
   852  
   853  	c, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", opts.Host, opts.Port), 3*time.Second)
   854  	if err != nil {
   855  		t.Fatalf("Error on connect: %v", err)
   856  	}
   857  	defer c.Close()
   858  	if _, err := c.Write([]byte("CONNECT {}\r\nPING\r\nSUB foo 1\r\n")); err != nil {
   859  		t.Fatalf("Error sending protocols to server: %v", err)
   860  	}
   861  	// Reduce socket buffer to increase reliability of getting
   862  	// write deadline errors.
   863  	c.(*net.TCPConn).SetReadBuffer(4)
   864  
   865  	url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)
   866  	sender, err := nats.Connect(url)
   867  	if err != nil {
   868  		t.Fatalf("Error on connect: %v", err)
   869  	}
   870  	defer sender.Close()
   871  
   872  	payload := make([]byte, 1000000)
   873  	total := 1000
   874  	for i := 0; i < total; i++ {
   875  		if err := sender.Publish("foo", payload); err != nil {
   876  			t.Fatalf("Error on publish: %v", err)
   877  		}
   878  	}
   879  	// Flush sender connection to ensure that all data has been sent.
   880  	if err := sender.Flush(); err != nil {
   881  		t.Fatalf("Error on flush: %v", err)
   882  	}
   883  
   884  	// At this point server should have closed connection c.
   885  
   886  	// On certain platforms, it may take more than one call before
   887  	// getting the error.
   888  	for i := 0; i < 100; i++ {
   889  		if _, err := c.Write([]byte("PUB bar 5\r\nhello\r\n")); err != nil {
   890  			// ok
   891  			return
   892  		}
   893  	}
   894  	t.Fatal("Connection should have been closed")
   895  }
   896  
   897  func TestNoRaceLeafNodeClusterNameConflictDeadlock(t *testing.T) {
   898  	o := DefaultOptions()
   899  	o.LeafNode.Port = -1
   900  	s := RunServer(o)
   901  	defer s.Shutdown()
   902  
   903  	u, err := url.Parse(fmt.Sprintf("nats://127.0.0.1:%d", o.LeafNode.Port))
   904  	if err != nil {
   905  		t.Fatalf("Error parsing url: %v", err)
   906  	}
   907  
   908  	o1 := DefaultOptions()
   909  	o1.ServerName = "A1"
   910  	o1.Cluster.Name = "clusterA"
   911  	o1.LeafNode.Remotes = []*RemoteLeafOpts{{URLs: []*url.URL{u}}}
   912  	s1 := RunServer(o1)
   913  	defer s1.Shutdown()
   914  
   915  	checkLeafNodeConnected(t, s1)
   916  
   917  	o2 := DefaultOptions()
   918  	o2.ServerName = "A2"
   919  	o2.Cluster.Name = "clusterA"
   920  	o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port))
   921  	o2.LeafNode.Remotes = []*RemoteLeafOpts{{URLs: []*url.URL{u}}}
   922  	s2 := RunServer(o2)
   923  	defer s2.Shutdown()
   924  
   925  	checkLeafNodeConnected(t, s2)
   926  	checkClusterFormed(t, s1, s2)
   927  
   928  	o3 := DefaultOptions()
   929  	o3.ServerName = "A3"
   930  	o3.Cluster.Name = "" // intentionally not set
   931  	o3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port))
   932  	o3.LeafNode.Remotes = []*RemoteLeafOpts{{URLs: []*url.URL{u}}}
   933  	s3 := RunServer(o3)
   934  	defer s3.Shutdown()
   935  
   936  	checkLeafNodeConnected(t, s3)
   937  	checkClusterFormed(t, s1, s2, s3)
   938  }
   939  
   940  // This test is same than TestAccountAddServiceImportRace but running
   941  // without the -race flag, it would capture more easily the possible
   942  // duplicate sid, resulting in less than expected number of subscriptions
   943  // in the account's internal subscriptions map.
   944  func TestNoRaceAccountAddServiceImportRace(t *testing.T) {
   945  	TestAccountAddServiceImportRace(t)
   946  }
   947  
   948  // Similar to the routed version. Make sure we receive all of the
   949  // messages with auto-unsubscribe enabled.
   950  func TestNoRaceQueueAutoUnsubscribe(t *testing.T) {
   951  	opts := DefaultOptions()
   952  	s := RunServer(opts)
   953  	defer s.Shutdown()
   954  
   955  	nc, err := nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port))
   956  	if err != nil {
   957  		t.Fatalf("Error on connect: %v", err)
   958  	}
   959  	defer nc.Close()
   960  
   961  	rbar := int32(0)
   962  	barCb := func(m *nats.Msg) {
   963  		atomic.AddInt32(&rbar, 1)
   964  	}
   965  	rbaz := int32(0)
   966  	bazCb := func(m *nats.Msg) {
   967  		atomic.AddInt32(&rbaz, 1)
   968  	}
   969  
   970  	// Create 1000 subscriptions with auto-unsubscribe of 1.
   971  	// Do two groups, one bar and one baz.
   972  	total := 1000
   973  	for i := 0; i < total; i++ {
   974  		qsub, err := nc.QueueSubscribe("foo", "bar", barCb)
   975  		if err != nil {
   976  			t.Fatalf("Error on subscribe: %v", err)
   977  		}
   978  		if err := qsub.AutoUnsubscribe(1); err != nil {
   979  			t.Fatalf("Error on auto-unsubscribe: %v", err)
   980  		}
   981  		qsub, err = nc.QueueSubscribe("foo", "baz", bazCb)
   982  		if err != nil {
   983  			t.Fatalf("Error on subscribe: %v", err)
   984  		}
   985  		if err := qsub.AutoUnsubscribe(1); err != nil {
   986  			t.Fatalf("Error on auto-unsubscribe: %v", err)
   987  		}
   988  	}
   989  	nc.Flush()
   990  
   991  	expected := int32(total)
   992  	for i := int32(0); i < expected; i++ {
   993  		nc.Publish("foo", []byte("Don't Drop Me!"))
   994  	}
   995  	nc.Flush()
   996  
   997  	checkFor(t, 5*time.Second, 10*time.Millisecond, func() error {
   998  		nbar := atomic.LoadInt32(&rbar)
   999  		nbaz := atomic.LoadInt32(&rbaz)
  1000  		if nbar == expected && nbaz == expected {
  1001  			return nil
  1002  		}
  1003  		return fmt.Errorf("Did not receive all %d queue messages, received %d for 'bar' and %d for 'baz'",
  1004  			expected, atomic.LoadInt32(&rbar), atomic.LoadInt32(&rbaz))
  1005  	})
  1006  }
  1007  
  1008  func TestNoRaceAcceptLoopsDoNotLeaveOpenedConn(t *testing.T) {
  1009  	for _, test := range []struct {
  1010  		name string
  1011  		url  func(o *Options) (string, int)
  1012  	}{
  1013  		{"client", func(o *Options) (string, int) { return o.Host, o.Port }},
  1014  		{"route", func(o *Options) (string, int) { return o.Cluster.Host, o.Cluster.Port }},
  1015  		{"gateway", func(o *Options) (string, int) { return o.Gateway.Host, o.Gateway.Port }},
  1016  		{"leafnode", func(o *Options) (string, int) { return o.LeafNode.Host, o.LeafNode.Port }},
  1017  		{"websocket", func(o *Options) (string, int) { return o.Websocket.Host, o.Websocket.Port }},
  1018  	} {
  1019  		t.Run(test.name, func(t *testing.T) {
  1020  			o := DefaultOptions()
  1021  			o.DisableShortFirstPing = true
  1022  			o.Accounts = []*Account{NewAccount("$SYS")}
  1023  			o.SystemAccount = "$SYS"
  1024  			o.Cluster.Name = "abc"
  1025  			o.Cluster.Host = "127.0.0.1"
  1026  			o.Cluster.Port = -1
  1027  			o.Gateway.Name = "abc"
  1028  			o.Gateway.Host = "127.0.0.1"
  1029  			o.Gateway.Port = -1
  1030  			o.LeafNode.Host = "127.0.0.1"
  1031  			o.LeafNode.Port = -1
  1032  			o.Websocket.Host = "127.0.0.1"
  1033  			o.Websocket.Port = -1
  1034  			o.Websocket.HandshakeTimeout = 1
  1035  			o.Websocket.NoTLS = true
  1036  			s := RunServer(o)
  1037  			defer s.Shutdown()
  1038  
  1039  			host, port := test.url(o)
  1040  			url := fmt.Sprintf("%s:%d", host, port)
  1041  			var conns []net.Conn
  1042  
  1043  			wg := sync.WaitGroup{}
  1044  			wg.Add(1)
  1045  			done := make(chan struct{}, 1)
  1046  			go func() {
  1047  				defer wg.Done()
  1048  				// Have an upper limit
  1049  				for i := 0; i < 200; i++ {
  1050  					c, err := net.Dial("tcp", url)
  1051  					if err != nil {
  1052  						return
  1053  					}
  1054  					conns = append(conns, c)
  1055  					select {
  1056  					case <-done:
  1057  						return
  1058  					default:
  1059  					}
  1060  				}
  1061  			}()
  1062  			time.Sleep(15 * time.Millisecond)
  1063  			s.Shutdown()
  1064  			close(done)
  1065  			wg.Wait()
  1066  			for _, c := range conns {
  1067  				c.SetReadDeadline(time.Now().Add(2 * time.Second))
  1068  				br := bufio.NewReader(c)
  1069  				// Read INFO for connections that were accepted
  1070  				_, _, err := br.ReadLine()
  1071  				if err == nil {
  1072  					// After that, the connection should be closed,
  1073  					// so we should get an error here.
  1074  					_, _, err = br.ReadLine()
  1075  				}
  1076  				// We expect an io.EOF or any other error indicating the use of a closed
  1077  				// connection, but we should not get the timeout error.
  1078  				if ne, ok := err.(net.Error); ok && ne.Timeout() {
  1079  					err = nil
  1080  				}
  1081  				if err == nil {
  1082  					var buf [10]byte
  1083  					c.SetDeadline(time.Now().Add(2 * time.Second))
  1084  					c.Write([]byte("C"))
  1085  					_, err = c.Read(buf[:])
  1086  					if ne, ok := err.(net.Error); ok && ne.Timeout() {
  1087  						err = nil
  1088  					}
  1089  				}
  1090  				if err == nil {
  1091  					t.Fatalf("Connection should have been closed")
  1092  				}
  1093  				c.Close()
  1094  			}
  1095  		})
  1096  	}
  1097  }
  1098  
  1099  func TestNoRaceJetStreamDeleteStreamManyConsumers(t *testing.T) {
  1100  	s := RunBasicJetStreamServer(t)
  1101  	defer s.Shutdown()
  1102  
  1103  	mname := "MYS"
  1104  	mset, err := s.GlobalAccount().addStream(&StreamConfig{Name: mname, Storage: FileStorage})
  1105  	if err != nil {
  1106  		t.Fatalf("Unexpected error adding stream: %v", err)
  1107  	}
  1108  
  1109  	// This number needs to be higher than the internal sendq size to trigger what this test is testing.
  1110  	for i := 0; i < 2000; i++ {
  1111  		_, err := mset.addConsumer(&ConsumerConfig{
  1112  			Durable:        fmt.Sprintf("D-%d", i),
  1113  			DeliverSubject: fmt.Sprintf("deliver.%d", i),
  1114  		})
  1115  		if err != nil {
  1116  			t.Fatalf("Error creating consumer: %v", err)
  1117  		}
  1118  	}
  1119  	// With bug this would not return and would hang.
  1120  	mset.delete()
  1121  }
  1122  
  1123  // We used to swap accounts on an inbound message when processing service imports.
  1124  // Until JetStream this was kinda ok, but with JetStream we can have pull consumers
  1125  // trying to access the clients account in another Go routine now which causes issues.
  1126  // This is not limited to the case above, its just the one that exposed it.
  1127  // This test is to show that issue and that the fix works, meaning we no longer swap c.acc.
  1128  func TestNoRaceJetStreamServiceImportAccountSwapIssue(t *testing.T) {
  1129  	s := RunBasicJetStreamServer(t)
  1130  	defer s.Shutdown()
  1131  
  1132  	// Client based API
  1133  	nc, js := jsClientConnect(t, s)
  1134  	defer nc.Close()
  1135  
  1136  	_, err := js.AddStream(&nats.StreamConfig{
  1137  		Name:     "TEST",
  1138  		Subjects: []string{"foo", "bar"},
  1139  	})
  1140  	if err != nil {
  1141  		t.Fatalf("Unexpected error: %v", err)
  1142  	}
  1143  
  1144  	sub, err := js.PullSubscribe("foo", "dlc")
  1145  	if err != nil {
  1146  		t.Fatalf("Unexpected error: %v", err)
  1147  	}
  1148  
  1149  	beforeSubs := s.NumSubscriptions()
  1150  
  1151  	// How long we want both sides to run.
  1152  	timeout := time.Now().Add(3 * time.Second)
  1153  	errs := make(chan error, 1)
  1154  
  1155  	// Publishing side, which will signal the consumer that is waiting and which will access c.acc. If publish
  1156  	// operation runs concurrently we will catch c.acc being $SYS some of the time.
  1157  	go func() {
  1158  		time.Sleep(100 * time.Millisecond)
  1159  		for time.Now().Before(timeout) {
  1160  			// This will signal the delivery of the pull messages.
  1161  			js.Publish("foo", []byte("Hello"))
  1162  			// This will swap the account because of JetStream service import.
  1163  			// We can get an error here with the bug or not.
  1164  			if _, err := js.StreamInfo("TEST"); err != nil {
  1165  				errs <- err
  1166  				return
  1167  			}
  1168  		}
  1169  		errs <- nil
  1170  	}()
  1171  
  1172  	// Pull messages flow.
  1173  	var received int
  1174  	for time.Now().Before(timeout.Add(2 * time.Second)) {
  1175  		if msgs, err := sub.Fetch(1, nats.MaxWait(200*time.Millisecond)); err == nil {
  1176  			for _, m := range msgs {
  1177  				received++
  1178  				m.AckSync()
  1179  			}
  1180  		} else {
  1181  			break
  1182  		}
  1183  	}
  1184  	// Wait on publisher Go routine and check for errors.
  1185  	if err := <-errs; err != nil {
  1186  		t.Fatalf("Unexpected error: %v", err)
  1187  	}
  1188  	// Double check all received.
  1189  	si, err := js.StreamInfo("TEST")
  1190  	if err != nil {
  1191  		t.Fatalf("Unexpected error: %v", err)
  1192  	}
  1193  	if int(si.State.Msgs) != received {
  1194  		t.Fatalf("Expected to receive %d msgs, only got %d", si.State.Msgs, received)
  1195  	}
  1196  	// Now check for leaked subs from the fetch call above. That is what we first saw from the bug.
  1197  	if afterSubs := s.NumSubscriptions(); afterSubs != beforeSubs {
  1198  		t.Fatalf("Leaked subscriptions: %d before, %d after", beforeSubs, afterSubs)
  1199  	}
  1200  }
  1201  
  1202  func TestNoRaceJetStreamAPIStreamListPaging(t *testing.T) {
  1203  	s := RunBasicJetStreamServer(t)
  1204  	defer s.Shutdown()
  1205  
  1206  	// Create 2X limit
  1207  	streamsNum := 2 * JSApiNamesLimit
  1208  	for i := 1; i <= streamsNum; i++ {
  1209  		name := fmt.Sprintf("STREAM-%06d", i)
  1210  		cfg := StreamConfig{Name: name, Storage: MemoryStorage}
  1211  		_, err := s.GlobalAccount().addStream(&cfg)
  1212  		if err != nil {
  1213  			t.Fatalf("Unexpected error adding stream: %v", err)
  1214  		}
  1215  	}
  1216  
  1217  	// Client for API requests.
  1218  	nc := clientConnectToServer(t, s)
  1219  	defer nc.Close()
  1220  
  1221  	reqList := func(offset int) []byte {
  1222  		t.Helper()
  1223  		var req []byte
  1224  		if offset > 0 {
  1225  			req, _ = json.Marshal(&ApiPagedRequest{Offset: offset})
  1226  		}
  1227  		resp, err := nc.Request(JSApiStreams, req, time.Second)
  1228  		if err != nil {
  1229  			t.Fatalf("Unexpected error getting stream list: %v", err)
  1230  		}
  1231  		return resp.Data
  1232  	}
  1233  
  1234  	checkResp := func(resp []byte, expectedLen, expectedOffset int) {
  1235  		t.Helper()
  1236  		var listResponse JSApiStreamNamesResponse
  1237  		if err := json.Unmarshal(resp, &listResponse); err != nil {
  1238  			t.Fatalf("Unexpected error: %v", err)
  1239  		}
  1240  		if len(listResponse.Streams) != expectedLen {
  1241  			t.Fatalf("Expected only %d streams but got %d", expectedLen, len(listResponse.Streams))
  1242  		}
  1243  		if listResponse.Total != streamsNum {
  1244  			t.Fatalf("Expected total to be %d but got %d", streamsNum, listResponse.Total)
  1245  		}
  1246  		if listResponse.Offset != expectedOffset {
  1247  			t.Fatalf("Expected offset to be %d but got %d", expectedOffset, listResponse.Offset)
  1248  		}
  1249  		if expectedLen < 1 {
  1250  			return
  1251  		}
  1252  		// Make sure we get the right stream.
  1253  		sname := fmt.Sprintf("STREAM-%06d", expectedOffset+1)
  1254  		if listResponse.Streams[0] != sname {
  1255  			t.Fatalf("Expected stream %q to be first, got %q", sname, listResponse.Streams[0])
  1256  		}
  1257  	}
  1258  
  1259  	checkResp(reqList(0), JSApiNamesLimit, 0)
  1260  	checkResp(reqList(JSApiNamesLimit), JSApiNamesLimit, JSApiNamesLimit)
  1261  	checkResp(reqList(streamsNum), 0, streamsNum)
  1262  	checkResp(reqList(streamsNum-22), 22, streamsNum-22)
  1263  	checkResp(reqList(streamsNum+22), 0, streamsNum)
  1264  }
  1265  
  1266  func TestNoRaceJetStreamAPIConsumerListPaging(t *testing.T) {
  1267  	s := RunBasicJetStreamServer(t)
  1268  	defer s.Shutdown()
  1269  
  1270  	sname := "MYSTREAM"
  1271  	mset, err := s.GlobalAccount().addStream(&StreamConfig{Name: sname})
  1272  	if err != nil {
  1273  		t.Fatalf("Unexpected error adding stream: %v", err)
  1274  	}
  1275  
  1276  	// Client for API requests.
  1277  	nc := clientConnectToServer(t, s)
  1278  	defer nc.Close()
  1279  
  1280  	consumersNum := JSApiNamesLimit
  1281  	for i := 1; i <= consumersNum; i++ {
  1282  		dsubj := fmt.Sprintf("d.%d", i)
  1283  		sub, _ := nc.SubscribeSync(dsubj)
  1284  		defer sub.Unsubscribe()
  1285  		nc.Flush()
  1286  
  1287  		_, err := mset.addConsumer(&ConsumerConfig{DeliverSubject: dsubj})
  1288  		if err != nil {
  1289  			t.Fatalf("Unexpected error: %v", err)
  1290  		}
  1291  	}
  1292  
  1293  	reqListSubject := fmt.Sprintf(JSApiConsumersT, sname)
  1294  	reqList := func(offset int) []byte {
  1295  		t.Helper()
  1296  		var req []byte
  1297  		if offset > 0 {
  1298  			req, _ = json.Marshal(&JSApiConsumersRequest{ApiPagedRequest: ApiPagedRequest{Offset: offset}})
  1299  		}
  1300  		resp, err := nc.Request(reqListSubject, req, time.Second)
  1301  		if err != nil {
  1302  			t.Fatalf("Unexpected error getting stream list: %v", err)
  1303  		}
  1304  		return resp.Data
  1305  	}
  1306  
  1307  	checkResp := func(resp []byte, expectedLen, expectedOffset int) {
  1308  		t.Helper()
  1309  		var listResponse JSApiConsumerNamesResponse
  1310  		if err := json.Unmarshal(resp, &listResponse); err != nil {
  1311  			t.Fatalf("Unexpected error: %v", err)
  1312  		}
  1313  		if len(listResponse.Consumers) != expectedLen {
  1314  			t.Fatalf("Expected only %d streams but got %d", expectedLen, len(listResponse.Consumers))
  1315  		}
  1316  		if listResponse.Total != consumersNum {
  1317  			t.Fatalf("Expected total to be %d but got %d", consumersNum, listResponse.Total)
  1318  		}
  1319  		if listResponse.Offset != expectedOffset {
  1320  			t.Fatalf("Expected offset to be %d but got %d", expectedOffset, listResponse.Offset)
  1321  		}
  1322  	}
  1323  
  1324  	checkResp(reqList(0), JSApiNamesLimit, 0)
  1325  	checkResp(reqList(consumersNum-22), 22, consumersNum-22)
  1326  	checkResp(reqList(consumersNum+22), 0, consumersNum)
  1327  }
  1328  
  1329  func TestNoRaceJetStreamWorkQueueLoadBalance(t *testing.T) {
  1330  	s := RunBasicJetStreamServer(t)
  1331  	defer s.Shutdown()
  1332  
  1333  	mname := "MY_MSG_SET"
  1334  	mset, err := s.GlobalAccount().addStream(&StreamConfig{Name: mname, Subjects: []string{"foo", "bar"}})
  1335  	if err != nil {
  1336  		t.Fatalf("Unexpected error adding message set: %v", err)
  1337  	}
  1338  	defer mset.delete()
  1339  
  1340  	// Create basic work queue mode consumer.
  1341  	oname := "WQ"
  1342  	o, err := mset.addConsumer(&ConsumerConfig{Durable: oname, AckPolicy: AckExplicit})
  1343  	if err != nil {
  1344  		t.Fatalf("Expected no error with durable, got %v", err)
  1345  	}
  1346  	defer o.delete()
  1347  
  1348  	// To send messages.
  1349  	nc := clientConnectToServer(t, s)
  1350  	defer nc.Close()
  1351  
  1352  	// For normal work queue semantics, you send requests to the subject with stream and consumer name.
  1353  	reqMsgSubj := o.requestNextMsgSubject()
  1354  
  1355  	numWorkers := 25
  1356  	counts := make([]int32, numWorkers)
  1357  	var received int32
  1358  
  1359  	rwg := &sync.WaitGroup{}
  1360  	rwg.Add(numWorkers)
  1361  
  1362  	wg := &sync.WaitGroup{}
  1363  	wg.Add(numWorkers)
  1364  	ch := make(chan bool)
  1365  
  1366  	toSend := 1000
  1367  
  1368  	for i := 0; i < numWorkers; i++ {
  1369  		nc := clientConnectToServer(t, s)
  1370  		defer nc.Close()
  1371  
  1372  		go func(index int32) {
  1373  			rwg.Done()
  1374  			defer wg.Done()
  1375  			<-ch
  1376  
  1377  			for counter := &counts[index]; ; {
  1378  				m, err := nc.Request(reqMsgSubj, nil, 100*time.Millisecond)
  1379  				if err != nil {
  1380  					return
  1381  				}
  1382  				m.Respond(nil)
  1383  				atomic.AddInt32(counter, 1)
  1384  				if total := atomic.AddInt32(&received, 1); total >= int32(toSend) {
  1385  					return
  1386  				}
  1387  			}
  1388  		}(int32(i))
  1389  	}
  1390  
  1391  	// Wait for requestors to be ready
  1392  	rwg.Wait()
  1393  	close(ch)
  1394  
  1395  	sendSubj := "bar"
  1396  	for i := 0; i < toSend; i++ {
  1397  		sendStreamMsg(t, nc, sendSubj, "Hello World!")
  1398  	}
  1399  
  1400  	// Wait for test to complete.
  1401  	wg.Wait()
  1402  
  1403  	target := toSend / numWorkers
  1404  	delta := target/2 + 5
  1405  	low, high := int32(target-delta), int32(target+delta)
  1406  
  1407  	for i := 0; i < numWorkers; i++ {
  1408  		if msgs := atomic.LoadInt32(&counts[i]); msgs < low || msgs > high {
  1409  			t.Fatalf("Messages received for worker [%d] too far off from target of %d, got %d", i, target, msgs)
  1410  		}
  1411  	}
  1412  }
  1413  
  1414  func TestNoRaceJetStreamClusterLargeStreamInlineCatchup(t *testing.T) {
  1415  	c := createJetStreamClusterExplicit(t, "LSS", 3)
  1416  	defer c.shutdown()
  1417  
  1418  	// Client based API
  1419  	s := c.randomServer()
  1420  	nc, js := jsClientConnect(t, s)
  1421  	defer nc.Close()
  1422  
  1423  	_, err := js.AddStream(&nats.StreamConfig{
  1424  		Name:     "TEST",
  1425  		Subjects: []string{"foo"},
  1426  		Replicas: 3,
  1427  	})
  1428  	if err != nil {
  1429  		t.Fatalf("Unexpected error: %v", err)
  1430  	}
  1431  
  1432  	sr := c.randomNonStreamLeader("$G", "TEST")
  1433  	sr.Shutdown()
  1434  
  1435  	// In case sr was meta leader.
  1436  	c.waitOnLeader()
  1437  
  1438  	msg, toSend := []byte("Hello JS Clustering"), 5000
  1439  
  1440  	// Now fill up stream.
  1441  	for i := 0; i < toSend; i++ {
  1442  		if _, err = js.Publish("foo", msg); err != nil {
  1443  			t.Fatalf("Unexpected publish error: %v", err)
  1444  		}
  1445  	}
  1446  	si, err := js.StreamInfo("TEST")
  1447  	if err != nil {
  1448  		t.Fatalf("Unexpected error: %v", err)
  1449  	}
  1450  	// Check active state as well, shows that the owner answered.
  1451  	if si.State.Msgs != uint64(toSend) {
  1452  		t.Fatalf("Expected %d msgs, got bad state: %+v", toSend, si.State)
  1453  	}
  1454  
  1455  	// Kill our current leader to make just 2.
  1456  	c.streamLeader("$G", "TEST").Shutdown()
  1457  
  1458  	// Now restart the shutdown peer and wait for it to be current.
  1459  	sr = c.restartServer(sr)
  1460  	c.waitOnStreamCurrent(sr, "$G", "TEST")
  1461  
  1462  	// Ask other servers to stepdown as leader so that sr becomes the leader.
  1463  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  1464  		c.waitOnStreamLeader("$G", "TEST")
  1465  		if sl := c.streamLeader("$G", "TEST"); sl != sr {
  1466  			sl.JetStreamStepdownStream("$G", "TEST")
  1467  			return fmt.Errorf("Server %s is not leader yet", sr)
  1468  		}
  1469  		return nil
  1470  	})
  1471  
  1472  	si, err = js.StreamInfo("TEST")
  1473  	if err != nil {
  1474  		t.Fatalf("Unexpected error: %v", err)
  1475  	}
  1476  	// Check that we have all of our messsages stored.
  1477  	// Wait for a bit for upper layers to process.
  1478  	checkFor(t, 2*time.Second, 100*time.Millisecond, func() error {
  1479  		if si.State.Msgs != uint64(toSend) {
  1480  			return fmt.Errorf("Expected %d msgs, got %d", toSend, si.State.Msgs)
  1481  		}
  1482  		return nil
  1483  	})
  1484  }
  1485  
  1486  func TestNoRaceJetStreamClusterStreamCreateAndLostQuorum(t *testing.T) {
  1487  	c := createJetStreamClusterExplicit(t, "R5S", 3)
  1488  	defer c.shutdown()
  1489  
  1490  	// Client based API
  1491  	s := c.randomServer()
  1492  	nc, js := jsClientConnect(t, s)
  1493  	defer nc.Close()
  1494  
  1495  	sub, err := nc.SubscribeSync(JSAdvisoryStreamQuorumLostPre + ".*")
  1496  	if err != nil {
  1497  		t.Fatalf("Unexpected error: %v", err)
  1498  	}
  1499  
  1500  	if _, err := js.AddStream(&nats.StreamConfig{Name: "NO-LQ-START", Replicas: 3}); err != nil {
  1501  		t.Fatalf("Unexpected error: %v", err)
  1502  	}
  1503  	c.waitOnStreamLeader("$G", "NO-LQ-START")
  1504  	checkSubsPending(t, sub, 0)
  1505  
  1506  	c.stopAll()
  1507  	// Start up the one we were connected to first and wait for it to be connected.
  1508  	s = c.restartServer(s)
  1509  	nc, err = nats.Connect(s.ClientURL())
  1510  	if err != nil {
  1511  		t.Fatalf("Failed to create client: %v", err)
  1512  	}
  1513  	defer nc.Close()
  1514  
  1515  	sub, err = nc.SubscribeSync(JSAdvisoryStreamQuorumLostPre + ".*")
  1516  	if err != nil {
  1517  		t.Fatalf("Unexpected error: %v", err)
  1518  	}
  1519  	nc.Flush()
  1520  
  1521  	c.restartAll()
  1522  	c.waitOnStreamLeader("$G", "NO-LQ-START")
  1523  
  1524  	checkSubsPending(t, sub, 0)
  1525  }
  1526  
  1527  func TestNoRaceJetStreamSuperClusterMirrors(t *testing.T) {
  1528  	sc := createJetStreamSuperCluster(t, 3, 3)
  1529  	defer sc.shutdown()
  1530  
  1531  	// Client based API
  1532  	s := sc.clusterForName("C2").randomServer()
  1533  	nc, js := jsClientConnect(t, s)
  1534  	defer nc.Close()
  1535  
  1536  	// Create source stream.
  1537  	_, err := js.AddStream(&nats.StreamConfig{Name: "S1", Subjects: []string{"foo", "bar"}, Replicas: 3, Placement: &nats.Placement{Cluster: "C2"}})
  1538  	if err != nil {
  1539  		t.Fatalf("Unexpected error: %v", err)
  1540  	}
  1541  
  1542  	// Needed while Go client does not have mirror support.
  1543  	createStream := func(cfg *nats.StreamConfig) {
  1544  		t.Helper()
  1545  		if _, err := js.AddStream(cfg); err != nil {
  1546  			t.Fatalf("Unexpected error: %+v", err)
  1547  		}
  1548  	}
  1549  
  1550  	// Send 100 messages.
  1551  	for i := 0; i < 100; i++ {
  1552  		if _, err := js.Publish("foo", []byte("MIRRORS!")); err != nil {
  1553  			t.Fatalf("Unexpected publish error: %v", err)
  1554  		}
  1555  	}
  1556  
  1557  	createStream(&nats.StreamConfig{
  1558  		Name:      "M1",
  1559  		Mirror:    &nats.StreamSource{Name: "S1"},
  1560  		Placement: &nats.Placement{Cluster: "C1"},
  1561  	})
  1562  
  1563  	checkFor(t, 2*time.Second, 100*time.Millisecond, func() error {
  1564  		si, err := js.StreamInfo("M1")
  1565  		if err != nil {
  1566  			t.Fatalf("Unexpected error: %v", err)
  1567  		}
  1568  		if si.State.Msgs != 100 {
  1569  			return fmt.Errorf("Expected 100 msgs, got state: %+v", si.State)
  1570  		}
  1571  		return nil
  1572  	})
  1573  
  1574  	// Purge the source stream.
  1575  	if err := js.PurgeStream("S1"); err != nil {
  1576  		t.Fatalf("Unexpected purge error: %v", err)
  1577  	}
  1578  	// Send 50 more msgs now.
  1579  	for i := 0; i < 50; i++ {
  1580  		if _, err := js.Publish("bar", []byte("OK")); err != nil {
  1581  			t.Fatalf("Unexpected publish error: %v", err)
  1582  		}
  1583  	}
  1584  
  1585  	createStream(&nats.StreamConfig{
  1586  		Name:      "M2",
  1587  		Mirror:    &nats.StreamSource{Name: "S1"},
  1588  		Replicas:  3,
  1589  		Placement: &nats.Placement{Cluster: "C3"},
  1590  	})
  1591  
  1592  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
  1593  		si, err := js.StreamInfo("M2")
  1594  		if err != nil {
  1595  			t.Fatalf("Unexpected error: %v", err)
  1596  		}
  1597  		if si.State.Msgs != 50 {
  1598  			return fmt.Errorf("Expected 50 msgs, got state: %+v", si.State)
  1599  		}
  1600  		if si.State.FirstSeq != 101 {
  1601  			return fmt.Errorf("Expected start seq of 101, got state: %+v", si.State)
  1602  		}
  1603  		return nil
  1604  	})
  1605  
  1606  	sl := sc.clusterForName("C3").streamLeader("$G", "M2")
  1607  	doneCh := make(chan bool)
  1608  
  1609  	// Now test that if the mirror get's interrupted that it picks up where it left off etc.
  1610  	go func() {
  1611  		// Send 100 more messages.
  1612  		for i := 0; i < 100; i++ {
  1613  			if _, err := js.Publish("foo", []byte("MIRRORS!")); err != nil {
  1614  				t.Errorf("Unexpected publish on %d error: %v", i, err)
  1615  			}
  1616  			time.Sleep(2 * time.Millisecond)
  1617  		}
  1618  		doneCh <- true
  1619  	}()
  1620  
  1621  	time.Sleep(20 * time.Millisecond)
  1622  	sl.Shutdown()
  1623  
  1624  	<-doneCh
  1625  	sc.clusterForName("C3").waitOnStreamLeader("$G", "M2")
  1626  
  1627  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
  1628  		si, err := js.StreamInfo("M2")
  1629  		if err != nil {
  1630  			t.Fatalf("Unexpected error: %v", err)
  1631  		}
  1632  		if si.State.Msgs != 150 {
  1633  			return fmt.Errorf("Expected 150 msgs, got state: %+v", si.State)
  1634  		}
  1635  		if si.State.FirstSeq != 101 {
  1636  			return fmt.Errorf("Expected start seq of 101, got state: %+v", si.State)
  1637  		}
  1638  		return nil
  1639  	})
  1640  }
  1641  
  1642  func TestNoRaceJetStreamSuperClusterMixedModeMirrors(t *testing.T) {
  1643  	// Unlike the similar sources test, this test is not reliably catching the bug
  1644  	// that would cause mirrors to not have the expected messages count.
  1645  	// Still, adding this test in case we have a regression and we are lucky in
  1646  	// getting the failure while running this.
  1647  
  1648  	tmpl := `
  1649  		listen: 127.0.0.1:-1
  1650  		server_name: %s
  1651  		jetstream: { domain: ngs, max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  1652  		leaf: { listen: 127.0.0.1:-1 }
  1653  
  1654  		cluster {
  1655  			name: %s
  1656  			listen: 127.0.0.1:%d
  1657  			routes = [%s]
  1658  		}
  1659  
  1660  		accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
  1661  	`
  1662  	sc := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 7, 4,
  1663  		func(serverName, clusterName, storeDir, conf string) string {
  1664  			sname := serverName[strings.Index(serverName, "-")+1:]
  1665  			switch sname {
  1666  			case "S5", "S6", "S7":
  1667  				conf = strings.ReplaceAll(conf, "jetstream: { ", "#jetstream: { ")
  1668  			default:
  1669  				conf = strings.ReplaceAll(conf, "leaf: { ", "#leaf: { ")
  1670  			}
  1671  			return conf
  1672  		}, nil)
  1673  	defer sc.shutdown()
  1674  
  1675  	// Connect our client to a non JS server
  1676  	c := sc.randomCluster()
  1677  	var s *Server
  1678  	for s == nil {
  1679  		if as := c.randomServer(); !as.JetStreamEnabled() {
  1680  			s = as
  1681  			break
  1682  		}
  1683  	}
  1684  	nc, js := jsClientConnect(t, s)
  1685  	defer nc.Close()
  1686  
  1687  	toSend := 1000
  1688  	// Create 10 origin streams
  1689  	for i := 0; i < 10; i++ {
  1690  		name := fmt.Sprintf("S%d", i+1)
  1691  		if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil {
  1692  			t.Fatalf("Unexpected error: %v", err)
  1693  		}
  1694  		c.waitOnStreamLeader(globalAccountName, name)
  1695  		// Load them up with a bunch of messages.
  1696  		for n := 0; n < toSend; n++ {
  1697  			m := nats.NewMsg(name)
  1698  			m.Header.Set("stream", name)
  1699  			m.Header.Set("idx", strconv.FormatInt(int64(n+1), 10))
  1700  			if err := nc.PublishMsg(m); err != nil {
  1701  				t.Fatalf("Unexpected publish error: %v", err)
  1702  			}
  1703  		}
  1704  	}
  1705  
  1706  	for i := 0; i < 3; i++ {
  1707  		// Now create our mirrors
  1708  		wg := sync.WaitGroup{}
  1709  		mirrorsCount := 10
  1710  		wg.Add(mirrorsCount)
  1711  		errCh := make(chan error, 1)
  1712  		for m := 0; m < mirrorsCount; m++ {
  1713  			sname := fmt.Sprintf("S%d", rand.Intn(10)+1)
  1714  			go func(sname string, mirrorIdx int) {
  1715  				defer wg.Done()
  1716  				if _, err := js.AddStream(&nats.StreamConfig{
  1717  					Name:     fmt.Sprintf("M%d", mirrorIdx),
  1718  					Mirror:   &nats.StreamSource{Name: sname},
  1719  					Replicas: 3,
  1720  				}); err != nil {
  1721  					select {
  1722  					case errCh <- err:
  1723  					default:
  1724  					}
  1725  				}
  1726  			}(sname, m+1)
  1727  		}
  1728  		wg.Wait()
  1729  		select {
  1730  		case err := <-errCh:
  1731  			t.Fatalf("Error creating mirrors: %v", err)
  1732  		default:
  1733  		}
  1734  		// Now check the mirrors have all expected messages
  1735  		for m := 0; m < mirrorsCount; m++ {
  1736  			name := fmt.Sprintf("M%d", m+1)
  1737  			checkFor(t, 15*time.Second, 500*time.Millisecond, func() error {
  1738  				si, err := js.StreamInfo(name)
  1739  				if err != nil {
  1740  					t.Fatalf("Could not retrieve stream info")
  1741  				}
  1742  				if si.State.Msgs != uint64(toSend) {
  1743  					return fmt.Errorf("Expected %d msgs, got state: %+v", toSend, si.State)
  1744  				}
  1745  				return nil
  1746  			})
  1747  			err := js.DeleteStream(name)
  1748  			require_NoError(t, err)
  1749  		}
  1750  	}
  1751  }
  1752  
  1753  func TestNoRaceJetStreamSuperClusterSources(t *testing.T) {
  1754  	sc := createJetStreamSuperCluster(t, 3, 3)
  1755  	defer sc.shutdown()
  1756  
  1757  	// Client based API
  1758  	s := sc.clusterForName("C1").randomServer()
  1759  	nc, js := jsClientConnect(t, s)
  1760  	defer nc.Close()
  1761  
  1762  	// Create our source streams.
  1763  	for _, sname := range []string{"foo", "bar", "baz"} {
  1764  		if _, err := js.AddStream(&nats.StreamConfig{Name: sname, Replicas: 1}); err != nil {
  1765  			t.Fatalf("Unexpected error: %v", err)
  1766  		}
  1767  	}
  1768  
  1769  	sendBatch := func(subject string, n int) {
  1770  		for i := 0; i < n; i++ {
  1771  			msg := fmt.Sprintf("MSG-%d", i+1)
  1772  			if _, err := js.Publish(subject, []byte(msg)); err != nil {
  1773  				t.Fatalf("Unexpected publish error: %v", err)
  1774  			}
  1775  		}
  1776  	}
  1777  	// Populate each one.
  1778  	sendBatch("foo", 10)
  1779  	sendBatch("bar", 15)
  1780  	sendBatch("baz", 25)
  1781  
  1782  	// Needed while Go client does not have mirror support for creating mirror or source streams.
  1783  	createStream := func(cfg *nats.StreamConfig) {
  1784  		t.Helper()
  1785  		if _, err := js.AddStream(cfg); err != nil {
  1786  			t.Fatalf("Unexpected error: %+v", err)
  1787  		}
  1788  	}
  1789  
  1790  	cfg := &nats.StreamConfig{
  1791  		Name: "MS",
  1792  		Sources: []*nats.StreamSource{
  1793  			{Name: "foo"},
  1794  			{Name: "bar"},
  1795  			{Name: "baz"},
  1796  		},
  1797  	}
  1798  
  1799  	createStream(cfg)
  1800  	time.Sleep(time.Second)
  1801  
  1802  	// Faster timeout since we loop below checking for condition.
  1803  	js2, err := nc.JetStream(nats.MaxWait(50 * time.Millisecond))
  1804  	if err != nil {
  1805  		t.Fatalf("Unexpected error: %v", err)
  1806  	}
  1807  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
  1808  		si, err := js2.StreamInfo("MS")
  1809  		if err != nil {
  1810  			return err
  1811  		}
  1812  		if si.State.Msgs != 50 {
  1813  			return fmt.Errorf("Expected 50 msgs, got state: %+v", si.State)
  1814  		}
  1815  		return nil
  1816  	})
  1817  
  1818  	// Purge the source streams.
  1819  	for _, sname := range []string{"foo", "bar", "baz"} {
  1820  		if err := js.PurgeStream(sname); err != nil {
  1821  			t.Fatalf("Unexpected purge error: %v", err)
  1822  		}
  1823  	}
  1824  
  1825  	if err := js.DeleteStream("MS"); err != nil {
  1826  		t.Fatalf("Unexpected delete error: %v", err)
  1827  	}
  1828  
  1829  	// Send more msgs now.
  1830  	sendBatch("foo", 10)
  1831  	sendBatch("bar", 15)
  1832  	sendBatch("baz", 25)
  1833  
  1834  	cfg = &nats.StreamConfig{
  1835  		Name: "MS2",
  1836  		Sources: []*nats.StreamSource{
  1837  			{Name: "foo"},
  1838  			{Name: "bar"},
  1839  			{Name: "baz"},
  1840  		},
  1841  		Replicas:  3,
  1842  		Placement: &nats.Placement{Cluster: "C3"},
  1843  	}
  1844  
  1845  	createStream(cfg)
  1846  
  1847  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  1848  		si, err := js2.StreamInfo("MS2")
  1849  		if err != nil {
  1850  			t.Fatalf("Unexpected error: %v", err)
  1851  		}
  1852  		if si.State.Msgs != 50 {
  1853  			return fmt.Errorf("Expected 50 msgs, got state: %+v", si.State)
  1854  		}
  1855  		if si.State.FirstSeq != 1 {
  1856  			return fmt.Errorf("Expected start seq of 1, got state: %+v", si.State)
  1857  		}
  1858  		return nil
  1859  	})
  1860  
  1861  	sl := sc.clusterForName("C3").streamLeader("$G", "MS2")
  1862  	doneCh := make(chan bool)
  1863  
  1864  	if sl == sc.leader() {
  1865  		nc.Request(JSApiLeaderStepDown, nil, time.Second)
  1866  		sc.waitOnLeader()
  1867  	}
  1868  
  1869  	// Now test that if the mirror get's interrupted that it picks up where it left off etc.
  1870  	go func() {
  1871  		// Send 50 more messages each.
  1872  		for i := 0; i < 50; i++ {
  1873  			msg := fmt.Sprintf("R-MSG-%d", i+1)
  1874  			for _, sname := range []string{"foo", "bar", "baz"} {
  1875  				m := nats.NewMsg(sname)
  1876  				m.Data = []byte(msg)
  1877  				if _, err := js.PublishMsg(m); err != nil {
  1878  					t.Errorf("Unexpected publish error: %v", err)
  1879  				}
  1880  			}
  1881  			time.Sleep(2 * time.Millisecond)
  1882  		}
  1883  		doneCh <- true
  1884  	}()
  1885  
  1886  	time.Sleep(20 * time.Millisecond)
  1887  	sl.Shutdown()
  1888  
  1889  	sc.clusterForName("C3").waitOnStreamLeader("$G", "MS2")
  1890  	<-doneCh
  1891  
  1892  	checkFor(t, 20*time.Second, time.Second, func() error {
  1893  		si, err := js2.StreamInfo("MS2")
  1894  		if err != nil {
  1895  			return err
  1896  		}
  1897  		if si.State.Msgs != 200 {
  1898  			return fmt.Errorf("Expected 200 msgs, got state: %+v", si.State)
  1899  		}
  1900  		return nil
  1901  	})
  1902  }
  1903  
  1904  func TestNoRaceJetStreamClusterSourcesMuxd(t *testing.T) {
  1905  	c := createJetStreamClusterExplicit(t, "SMUX", 3)
  1906  	defer c.shutdown()
  1907  
  1908  	// Client for API requests.
  1909  	nc, js := jsClientConnect(t, c.randomServer())
  1910  	defer nc.Close()
  1911  
  1912  	// Send in 10000 messages.
  1913  	msg, toSend := make([]byte, 1024), 10000
  1914  	crand.Read(msg)
  1915  
  1916  	var sources []*nats.StreamSource
  1917  	// Create 10 origin streams.
  1918  	for i := 1; i <= 10; i++ {
  1919  		name := fmt.Sprintf("O-%d", i)
  1920  		if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil {
  1921  			t.Fatalf("Unexpected error: %v", err)
  1922  		}
  1923  		// Make sure we have a leader before publishing, especially since we use
  1924  		// non JS publisher, we would not know if the messages made it to those
  1925  		// streams or not.
  1926  		c.waitOnStreamLeader(globalAccountName, name)
  1927  		// Load them up with a bunch of messages.
  1928  		for n := 0; n < toSend; n++ {
  1929  			if err := nc.Publish(name, msg); err != nil {
  1930  				t.Fatalf("Unexpected publish error: %v", err)
  1931  			}
  1932  		}
  1933  		sources = append(sources, &nats.StreamSource{Name: name})
  1934  	}
  1935  
  1936  	// Now create our downstream stream that sources from all of them.
  1937  	if _, err := js.AddStream(&nats.StreamConfig{Name: "S", Replicas: 2, Sources: sources}); err != nil {
  1938  		t.Fatalf("Unexpected error: %v", err)
  1939  	}
  1940  
  1941  	checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  1942  		si, err := js.StreamInfo("S")
  1943  		if err != nil {
  1944  			t.Fatalf("Could not retrieve stream info")
  1945  		}
  1946  		if si.State.Msgs != uint64(10*toSend) {
  1947  			return fmt.Errorf("Expected %d msgs, got state: %+v", toSend*10, si.State)
  1948  		}
  1949  		return nil
  1950  	})
  1951  
  1952  }
  1953  
  1954  func TestNoRaceJetStreamSuperClusterMixedModeSources(t *testing.T) {
  1955  	tmpl := `
  1956  		listen: 127.0.0.1:-1
  1957  		server_name: %s
  1958  		jetstream: { domain: ngs, max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  1959  		leaf: { listen: 127.0.0.1:-1 }
  1960  
  1961  		cluster {
  1962  			name: %s
  1963  			listen: 127.0.0.1:%d
  1964  			routes = [%s]
  1965  		}
  1966  
  1967  		accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
  1968  	`
  1969  	sc := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 7, 2,
  1970  		func(serverName, clusterName, storeDir, conf string) string {
  1971  			sname := serverName[strings.Index(serverName, "-")+1:]
  1972  			switch sname {
  1973  			case "S5", "S6", "S7":
  1974  				conf = strings.ReplaceAll(conf, "jetstream: { ", "#jetstream: { ")
  1975  			default:
  1976  				conf = strings.ReplaceAll(conf, "leaf: { ", "#leaf: { ")
  1977  			}
  1978  			return conf
  1979  		}, nil)
  1980  	defer sc.shutdown()
  1981  
  1982  	// Connect our client to a non JS server
  1983  	c := sc.randomCluster()
  1984  	var s *Server
  1985  	for s == nil {
  1986  		if as := c.randomServer(); !as.JetStreamEnabled() {
  1987  			s = as
  1988  			break
  1989  		}
  1990  	}
  1991  	nc, js := jsClientConnect(t, s)
  1992  	defer nc.Close()
  1993  
  1994  	toSend := 1000
  1995  	var sources []*nats.StreamSource
  1996  	// Create 100 origin streams.
  1997  	for i := 1; i <= 100; i++ {
  1998  		name := fmt.Sprintf("O-%d", i)
  1999  		if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil {
  2000  			t.Fatalf("Unexpected error: %v", err)
  2001  		}
  2002  		c.waitOnStreamLeader(globalAccountName, name)
  2003  		// Load them up with a bunch of messages.
  2004  		for n := 0; n < toSend; n++ {
  2005  			m := nats.NewMsg(name)
  2006  			m.Header.Set("stream", name)
  2007  			m.Header.Set("idx", strconv.FormatInt(int64(n+1), 10))
  2008  			if err := nc.PublishMsg(m); err != nil {
  2009  				t.Fatalf("Unexpected publish error: %v", err)
  2010  			}
  2011  		}
  2012  		sources = append(sources, &nats.StreamSource{Name: name})
  2013  	}
  2014  
  2015  	for i := 0; i < 3; i++ {
  2016  		// Now create our downstream stream that sources from all of them.
  2017  		if _, err := js.AddStream(&nats.StreamConfig{Name: "S", Replicas: 3, Sources: sources}); err != nil {
  2018  			t.Fatalf("Unexpected error: %v", err)
  2019  		}
  2020  
  2021  		checkFor(t, 15*time.Second, 1000*time.Millisecond, func() error {
  2022  			si, err := js.StreamInfo("S")
  2023  			if err != nil {
  2024  				t.Fatalf("Could not retrieve stream info")
  2025  			}
  2026  			if si.State.Msgs != uint64(100*toSend) {
  2027  				return fmt.Errorf("Expected %d msgs, got state: %+v", toSend*100, si.State)
  2028  			}
  2029  			return nil
  2030  		})
  2031  
  2032  		err := js.DeleteStream("S")
  2033  		require_NoError(t, err)
  2034  	}
  2035  }
  2036  
  2037  func TestNoRaceJetStreamClusterExtendedStreamPurgeStall(t *testing.T) {
  2038  	// Uncomment to run. Needs to be on a big machine. Do not want as part of Travis tests atm.
  2039  	skip(t)
  2040  
  2041  	cerr := func(t *testing.T, err error) {
  2042  		t.Helper()
  2043  		if err != nil {
  2044  			t.Fatalf("unexepected err: %s", err)
  2045  		}
  2046  	}
  2047  
  2048  	s := RunBasicJetStreamServer(t)
  2049  	defer s.Shutdown()
  2050  
  2051  	nc, js := jsClientConnect(t, s)
  2052  	defer nc.Close()
  2053  
  2054  	si, err := js.AddStream(&nats.StreamConfig{
  2055  		Name:     "KV",
  2056  		Subjects: []string{"kv.>"},
  2057  		Storage:  nats.FileStorage,
  2058  	})
  2059  	cerr(t, err)
  2060  
  2061  	// 100kb messages spread over 1000 different subjects
  2062  	body := make([]byte, 100*1024)
  2063  	for i := 0; i < 50000; i++ {
  2064  		if _, err := js.PublishAsync(fmt.Sprintf("kv.%d", i%1000), body); err != nil {
  2065  			cerr(t, err)
  2066  		}
  2067  	}
  2068  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  2069  		if si, err = js.StreamInfo("KV"); err != nil {
  2070  			return err
  2071  		}
  2072  		if si.State.Msgs == 50000 {
  2073  			return nil
  2074  		}
  2075  		return fmt.Errorf("waiting for more")
  2076  	})
  2077  
  2078  	jp, _ := json.Marshal(&JSApiStreamPurgeRequest{Subject: "kv.20"})
  2079  	start := time.Now()
  2080  	res, err := nc.Request(fmt.Sprintf(JSApiStreamPurgeT, "KV"), jp, time.Minute)
  2081  	elapsed := time.Since(start)
  2082  	cerr(t, err)
  2083  	pres := JSApiStreamPurgeResponse{}
  2084  	err = json.Unmarshal(res.Data, &pres)
  2085  	cerr(t, err)
  2086  	if !pres.Success {
  2087  		t.Fatalf("purge failed: %#v", pres)
  2088  	}
  2089  	if elapsed > time.Second {
  2090  		t.Fatalf("Purge took too long %s", elapsed)
  2091  	}
  2092  	v, _ := s.Varz(nil)
  2093  	if v.Mem > 100*1024*1024 { // 100MB limit but in practice < 100MB -> Was ~7GB when failing.
  2094  		t.Fatalf("Used too much memory: %v", friendlyBytes(v.Mem))
  2095  	}
  2096  }
  2097  
  2098  func TestNoRaceJetStreamClusterMirrorExpirationAndMissingSequences(t *testing.T) {
  2099  	c := createJetStreamClusterExplicit(t, "MMS", 9)
  2100  	defer c.shutdown()
  2101  
  2102  	// Client for API requests.
  2103  	nc, js := jsClientConnect(t, c.randomServer())
  2104  	defer nc.Close()
  2105  
  2106  	sendBatch := func(n int) {
  2107  		t.Helper()
  2108  		// Send a batch to a given subject.
  2109  		for i := 0; i < n; i++ {
  2110  			if _, err := js.Publish("TEST", []byte("OK")); err != nil {
  2111  				t.Fatalf("Unexpected publish error: %v", err)
  2112  			}
  2113  		}
  2114  	}
  2115  
  2116  	checkStream := func(stream string, num uint64) {
  2117  		t.Helper()
  2118  		checkFor(t, 20*time.Second, 20*time.Millisecond, func() error {
  2119  			si, err := js.StreamInfo(stream)
  2120  			if err != nil {
  2121  				return err
  2122  			}
  2123  			if si.State.Msgs != num {
  2124  				return fmt.Errorf("Expected %d msgs, got %d", num, si.State.Msgs)
  2125  			}
  2126  			return nil
  2127  		})
  2128  	}
  2129  
  2130  	checkMirror := func(num uint64) { t.Helper(); checkStream("M", num) }
  2131  	checkTest := func(num uint64) { t.Helper(); checkStream("TEST", num) }
  2132  
  2133  	// Origin
  2134  	_, err := js.AddStream(&nats.StreamConfig{
  2135  		Name:   "TEST",
  2136  		MaxAge: 500 * time.Millisecond,
  2137  	})
  2138  	if err != nil {
  2139  		t.Fatalf("Unexpected error: %v", err)
  2140  	}
  2141  
  2142  	ts := c.streamLeader("$G", "TEST")
  2143  	ml := c.leader()
  2144  
  2145  	// Create mirror now.
  2146  	for ms := ts; ms == ts || ms == ml; {
  2147  		_, err = js.AddStream(&nats.StreamConfig{
  2148  			Name:     "M",
  2149  			Mirror:   &nats.StreamSource{Name: "TEST"},
  2150  			Replicas: 2,
  2151  		})
  2152  		if err != nil {
  2153  			t.Fatalf("Unexpected error: %v", err)
  2154  		}
  2155  		ms = c.streamLeader("$G", "M")
  2156  		if ts == ms || ms == ml {
  2157  			// Delete and retry.
  2158  			js.DeleteStream("M")
  2159  		}
  2160  	}
  2161  
  2162  	sendBatch(10)
  2163  	checkMirror(10)
  2164  
  2165  	// Now shutdown the server with the mirror.
  2166  	ms := c.streamLeader("$G", "M")
  2167  	ms.Shutdown()
  2168  	c.waitOnLeader()
  2169  
  2170  	// Send more messages but let them expire.
  2171  	sendBatch(10)
  2172  	checkTest(0)
  2173  
  2174  	c.restartServer(ms)
  2175  	c.checkClusterFormed()
  2176  	c.waitOnStreamLeader("$G", "M")
  2177  
  2178  	sendBatch(10)
  2179  	checkMirror(20)
  2180  }
  2181  
  2182  func TestNoRaceJetStreamClusterLargeActiveOnReplica(t *testing.T) {
  2183  	// Uncomment to run.
  2184  	skip(t)
  2185  
  2186  	c := createJetStreamClusterExplicit(t, "LAG", 3)
  2187  	defer c.shutdown()
  2188  
  2189  	// Client for API requests.
  2190  	nc, js := jsClientConnect(t, c.randomServer())
  2191  	defer nc.Close()
  2192  
  2193  	timeout := time.Now().Add(60 * time.Second)
  2194  	for time.Now().Before(timeout) {
  2195  		si, err := js.AddStream(&nats.StreamConfig{
  2196  			Name:     "TEST",
  2197  			Subjects: []string{"foo", "bar"},
  2198  			Replicas: 3,
  2199  		})
  2200  		if err != nil {
  2201  			t.Fatalf("Unexpected error: %v", err)
  2202  		}
  2203  		for _, r := range si.Cluster.Replicas {
  2204  			if r.Active > 5*time.Second {
  2205  				t.Fatalf("Bad Active value: %+v", r)
  2206  			}
  2207  		}
  2208  		if err := js.DeleteStream("TEST"); err != nil {
  2209  			t.Fatalf("Unexpected delete error: %v", err)
  2210  		}
  2211  	}
  2212  }
  2213  
  2214  func TestNoRaceJetStreamSuperClusterRIPStress(t *testing.T) {
  2215  	// Uncomment to run. Needs to be on a big machine.
  2216  	skip(t)
  2217  
  2218  	sc := createJetStreamSuperCluster(t, 3, 3)
  2219  	defer sc.shutdown()
  2220  
  2221  	// Client based API
  2222  	s := sc.clusterForName("C2").randomServer()
  2223  	nc, js := jsClientConnect(t, s)
  2224  	defer nc.Close()
  2225  
  2226  	scm := make(map[string][]string)
  2227  
  2228  	// Create 50 streams per cluster.
  2229  	for _, cn := range []string{"C1", "C2", "C3"} {
  2230  		var streams []string
  2231  		for i := 0; i < 50; i++ {
  2232  			sn := fmt.Sprintf("%s-S%d", cn, i+1)
  2233  			streams = append(streams, sn)
  2234  			_, err := js.AddStream(&nats.StreamConfig{
  2235  				Name:      sn,
  2236  				Replicas:  3,
  2237  				Placement: &nats.Placement{Cluster: cn},
  2238  				MaxAge:    2 * time.Minute,
  2239  				MaxMsgs:   50_000,
  2240  			})
  2241  			if err != nil {
  2242  				t.Fatalf("Unexpected error: %v", err)
  2243  			}
  2244  		}
  2245  		scm[cn] = streams
  2246  	}
  2247  
  2248  	sourceForCluster := func(cn string) []*nats.StreamSource {
  2249  		var sns []string
  2250  		switch cn {
  2251  		case "C1":
  2252  			sns = scm["C2"]
  2253  		case "C2":
  2254  			sns = scm["C3"]
  2255  		case "C3":
  2256  			sns = scm["C1"]
  2257  		default:
  2258  			t.Fatalf("Unknown cluster %q", cn)
  2259  		}
  2260  		var ss []*nats.StreamSource
  2261  		for _, sn := range sns {
  2262  			ss = append(ss, &nats.StreamSource{Name: sn})
  2263  		}
  2264  		return ss
  2265  	}
  2266  
  2267  	// Mux all 50 streams from one cluster to a single stream across a GW connection to another cluster.
  2268  	_, err := js.AddStream(&nats.StreamConfig{
  2269  		Name:      "C1-S-MUX",
  2270  		Replicas:  2,
  2271  		Placement: &nats.Placement{Cluster: "C1"},
  2272  		Sources:   sourceForCluster("C2"),
  2273  		MaxAge:    time.Minute,
  2274  		MaxMsgs:   20_000,
  2275  	})
  2276  	if err != nil {
  2277  		t.Fatalf("Unexpected error: %v", err)
  2278  	}
  2279  
  2280  	_, err = js.AddStream(&nats.StreamConfig{
  2281  		Name:      "C2-S-MUX",
  2282  		Replicas:  2,
  2283  		Placement: &nats.Placement{Cluster: "C2"},
  2284  		Sources:   sourceForCluster("C3"),
  2285  		MaxAge:    time.Minute,
  2286  		MaxMsgs:   20_000,
  2287  	})
  2288  	if err != nil {
  2289  		t.Fatalf("Unexpected error: %v", err)
  2290  	}
  2291  
  2292  	_, err = js.AddStream(&nats.StreamConfig{
  2293  		Name:      "C3-S-MUX",
  2294  		Replicas:  2,
  2295  		Placement: &nats.Placement{Cluster: "C3"},
  2296  		Sources:   sourceForCluster("C1"),
  2297  		MaxAge:    time.Minute,
  2298  		MaxMsgs:   20_000,
  2299  	})
  2300  	if err != nil {
  2301  		t.Fatalf("Unexpected error: %v", err)
  2302  	}
  2303  
  2304  	// Now create mirrors for our mux'd streams.
  2305  	_, err = js.AddStream(&nats.StreamConfig{
  2306  		Name:      "C1-MIRROR",
  2307  		Replicas:  3,
  2308  		Placement: &nats.Placement{Cluster: "C1"},
  2309  		Mirror:    &nats.StreamSource{Name: "C3-S-MUX"},
  2310  		MaxAge:    5 * time.Minute,
  2311  		MaxMsgs:   10_000,
  2312  	})
  2313  	if err != nil {
  2314  		t.Fatalf("Unexpected error: %v", err)
  2315  	}
  2316  
  2317  	_, err = js.AddStream(&nats.StreamConfig{
  2318  		Name:      "C2-MIRROR",
  2319  		Replicas:  3,
  2320  		Placement: &nats.Placement{Cluster: "C2"},
  2321  		Mirror:    &nats.StreamSource{Name: "C2-S-MUX"},
  2322  		MaxAge:    5 * time.Minute,
  2323  		MaxMsgs:   10_000,
  2324  	})
  2325  	if err != nil {
  2326  		t.Fatalf("Unexpected error: %v", err)
  2327  	}
  2328  
  2329  	_, err = js.AddStream(&nats.StreamConfig{
  2330  		Name:      "C3-MIRROR",
  2331  		Replicas:  3,
  2332  		Placement: &nats.Placement{Cluster: "C3"},
  2333  		Mirror:    &nats.StreamSource{Name: "C1-S-MUX"},
  2334  		MaxAge:    5 * time.Minute,
  2335  		MaxMsgs:   10_000,
  2336  	})
  2337  	if err != nil {
  2338  		t.Fatalf("Unexpected error: %v", err)
  2339  	}
  2340  
  2341  	var jsc []nats.JetStream
  2342  
  2343  	// Create 64 clients.
  2344  	for i := 0; i < 64; i++ {
  2345  		s := sc.randomCluster().randomServer()
  2346  		nc, _ := jsClientConnect(t, s)
  2347  		defer nc.Close()
  2348  		js, err := nc.JetStream(nats.PublishAsyncMaxPending(8 * 1024))
  2349  		if err != nil {
  2350  			t.Fatalf("Unexpected error: %v", err)
  2351  		}
  2352  		jsc = append(jsc, js)
  2353  	}
  2354  
  2355  	msg := make([]byte, 1024)
  2356  	crand.Read(msg)
  2357  
  2358  	// 10 minutes
  2359  	expires := time.Now().Add(480 * time.Second)
  2360  	for time.Now().Before(expires) {
  2361  		for _, sns := range scm {
  2362  			rand.Shuffle(len(sns), func(i, j int) { sns[i], sns[j] = sns[j], sns[i] })
  2363  			for _, sn := range sns {
  2364  				js := jsc[rand.Intn(len(jsc))]
  2365  				if _, err = js.PublishAsync(sn, msg); err != nil {
  2366  					t.Fatalf("Unexpected publish error: %v", err)
  2367  				}
  2368  			}
  2369  		}
  2370  		time.Sleep(10 * time.Millisecond)
  2371  	}
  2372  }
  2373  
  2374  func TestNoRaceJetStreamSlowFilteredInititalPendingAndFirstMsg(t *testing.T) {
  2375  	s := RunBasicJetStreamServer(t)
  2376  	defer s.Shutdown()
  2377  
  2378  	// Create directly here to force multiple blocks, etc.
  2379  	a, err := s.LookupAccount("$G")
  2380  	if err != nil {
  2381  		t.Fatalf("Unexpected error: %v", err)
  2382  	}
  2383  	mset, err := a.addStreamWithStore(
  2384  		&StreamConfig{
  2385  			Name:     "S",
  2386  			Subjects: []string{"foo", "bar", "baz", "foo.bar.baz", "foo.*"},
  2387  		},
  2388  		&FileStoreConfig{
  2389  			BlockSize:  4 * 1024 * 1024,
  2390  			AsyncFlush: true,
  2391  		},
  2392  	)
  2393  	if err != nil {
  2394  		t.Fatalf("Unexpected error: %v", err)
  2395  	}
  2396  
  2397  	nc, js := jsClientConnect(t, s)
  2398  	defer nc.Close()
  2399  
  2400  	toSend := 100_000 // 500k total though.
  2401  
  2402  	// Messages will be 'foo' 'bar' 'baz' repeated 100k times.
  2403  	// Then 'foo.bar.baz' all contigous for 100k.
  2404  	// Then foo.N for 1-100000
  2405  	for i := 0; i < toSend; i++ {
  2406  		js.PublishAsync("foo", []byte("HELLO"))
  2407  		js.PublishAsync("bar", []byte("WORLD"))
  2408  		js.PublishAsync("baz", []byte("AGAIN"))
  2409  	}
  2410  	// Make contiguous block of same subject.
  2411  	for i := 0; i < toSend; i++ {
  2412  		js.PublishAsync("foo.bar.baz", []byte("ALL-TOGETHER"))
  2413  	}
  2414  	// Now add some more at the end.
  2415  	for i := 0; i < toSend; i++ {
  2416  		js.PublishAsync(fmt.Sprintf("foo.%d", i+1), []byte("LATER"))
  2417  	}
  2418  
  2419  	checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
  2420  		si, err := js.StreamInfo("S")
  2421  		if err != nil {
  2422  			return err
  2423  		}
  2424  		if si.State.Msgs != uint64(5*toSend) {
  2425  			return fmt.Errorf("Expected %d msgs, got %d", 5*toSend, si.State.Msgs)
  2426  		}
  2427  		return nil
  2428  	})
  2429  
  2430  	// Threshold for taking too long.
  2431  	const thresh = 150 * time.Millisecond
  2432  
  2433  	var dindex int
  2434  	testConsumerCreate := func(subj string, startSeq, expectedNumPending uint64) {
  2435  		t.Helper()
  2436  		dindex++
  2437  		dname := fmt.Sprintf("dur-%d", dindex)
  2438  		cfg := ConsumerConfig{FilterSubject: subj, Durable: dname, AckPolicy: AckExplicit}
  2439  		if startSeq > 1 {
  2440  			cfg.OptStartSeq, cfg.DeliverPolicy = startSeq, DeliverByStartSequence
  2441  		}
  2442  		start := time.Now()
  2443  		o, err := mset.addConsumer(&cfg)
  2444  		if err != nil {
  2445  			t.Fatalf("Unexpected error: %v", err)
  2446  		}
  2447  		if delta := time.Since(start); delta > thresh {
  2448  			t.Fatalf("Creating consumer for %q and start: %d took too long: %v", subj, startSeq, delta)
  2449  		}
  2450  		if ci := o.info(); ci.NumPending != expectedNumPending {
  2451  			t.Fatalf("Expected NumPending of %d, got %d", expectedNumPending, ci.NumPending)
  2452  		}
  2453  	}
  2454  
  2455  	testConsumerCreate("foo.100000", 1, 1)
  2456  	testConsumerCreate("foo.100000", 222_000, 1)
  2457  	testConsumerCreate("foo", 1, 100_000)
  2458  	testConsumerCreate("foo", 4, 100_000-1)
  2459  	testConsumerCreate("foo.bar.baz", 1, 100_000)
  2460  	testConsumerCreate("foo.bar.baz", 350_001, 50_000)
  2461  	testConsumerCreate("*", 1, 300_000)
  2462  	testConsumerCreate("*", 4, 300_000-3)
  2463  	testConsumerCreate(">", 1, 500_000)
  2464  	testConsumerCreate(">", 50_000, 500_000-50_000+1)
  2465  	testConsumerCreate("foo.10", 1, 1)
  2466  
  2467  	// Also test that we do not take long if the start sequence is later in the stream.
  2468  	sub, err := js.PullSubscribe("foo.100000", "dlc")
  2469  	if err != nil {
  2470  		t.Fatalf("Unexpected error: %v", err)
  2471  	}
  2472  	start := time.Now()
  2473  	fetchMsgs(t, sub, 1, time.Second)
  2474  	if delta := time.Since(start); delta > thresh {
  2475  		t.Fatalf("Took too long for pull subscriber to fetch the message: %v", delta)
  2476  	}
  2477  
  2478  	// Now do some deletes and make sure these are handled correctly.
  2479  	// Delete 3 foo messages.
  2480  	mset.removeMsg(1)
  2481  	mset.removeMsg(4)
  2482  	mset.removeMsg(7)
  2483  	testConsumerCreate("foo", 1, 100_000-3)
  2484  
  2485  	// Make sure wider scoped subjects do the right thing from a pending perspective.
  2486  	o, err := mset.addConsumer(&ConsumerConfig{FilterSubject: ">", Durable: "cat", AckPolicy: AckExplicit})
  2487  	if err != nil {
  2488  		t.Fatalf("Unexpected error: %v", err)
  2489  	}
  2490  	ci, expected := o.info(), uint64(500_000-3)
  2491  	if ci.NumPending != expected {
  2492  		t.Fatalf("Expected NumPending of %d, got %d", expected, ci.NumPending)
  2493  	}
  2494  	// Send another and make sure its captured by our wide scope consumer.
  2495  	js.Publish("foo", []byte("HELLO AGAIN"))
  2496  	if ci = o.info(); ci.NumPending != expected+1 {
  2497  		t.Fatalf("Expected the consumer to recognize the wide scoped consumer, wanted pending of %d, got %d", expected+1, ci.NumPending)
  2498  	}
  2499  
  2500  	// Stop current server and test restart..
  2501  	sd := s.JetStreamConfig().StoreDir
  2502  	s.Shutdown()
  2503  	// Restart.
  2504  	s = RunJetStreamServerOnPort(-1, sd)
  2505  	defer s.Shutdown()
  2506  
  2507  	a, err = s.LookupAccount("$G")
  2508  	if err != nil {
  2509  		t.Fatalf("Unexpected error: %v", err)
  2510  	}
  2511  	mset, err = a.lookupStream("S")
  2512  	if err != nil {
  2513  		t.Fatalf("Unexpected error: %v", err)
  2514  	}
  2515  
  2516  	// Make sure we recovered our per subject state on restart.
  2517  	testConsumerCreate("foo.100000", 1, 1)
  2518  	testConsumerCreate("foo", 1, 100_000-2)
  2519  }
  2520  
  2521  func TestNoRaceJetStreamFileStoreBufferReuse(t *testing.T) {
  2522  	// Uncomment to run. Needs to be on a big machine.
  2523  	skip(t)
  2524  
  2525  	s := RunBasicJetStreamServer(t)
  2526  	defer s.Shutdown()
  2527  
  2528  	cfg := &StreamConfig{Name: "TEST", Subjects: []string{"foo", "bar", "baz"}, Storage: FileStorage}
  2529  	if _, err := s.GlobalAccount().addStreamWithStore(cfg, nil); err != nil {
  2530  		t.Fatalf("Unexpected error adding stream: %v", err)
  2531  	}
  2532  
  2533  	// Client for API requests.
  2534  	nc, js := jsClientConnect(t, s)
  2535  	defer nc.Close()
  2536  
  2537  	toSend := 200_000
  2538  
  2539  	m := nats.NewMsg("foo")
  2540  	m.Data = make([]byte, 8*1024)
  2541  	crand.Read(m.Data)
  2542  
  2543  	start := time.Now()
  2544  	for i := 0; i < toSend; i++ {
  2545  		m.Reply = _EMPTY_
  2546  		switch i % 3 {
  2547  		case 0:
  2548  			m.Subject = "foo"
  2549  		case 1:
  2550  			m.Subject = "bar"
  2551  		case 2:
  2552  			m.Subject = "baz"
  2553  		}
  2554  		m.Header.Set("X-ID2", fmt.Sprintf("XXXXX-%d", i))
  2555  		if _, err := js.PublishMsgAsync(m); err != nil {
  2556  			t.Fatalf("Err on publish: %v", err)
  2557  		}
  2558  	}
  2559  	<-js.PublishAsyncComplete()
  2560  	fmt.Printf("TOOK %v to publish\n", time.Since(start))
  2561  
  2562  	v, err := s.Varz(nil)
  2563  	if err != nil {
  2564  		t.Fatalf("Unexpected error: %v", err)
  2565  	}
  2566  	fmt.Printf("MEM AFTER PUBLISH is %v\n", friendlyBytes(v.Mem))
  2567  
  2568  	si, _ := js.StreamInfo("TEST")
  2569  	fmt.Printf("si is %+v\n", si.State)
  2570  
  2571  	received := 0
  2572  	done := make(chan bool)
  2573  
  2574  	cb := func(m *nats.Msg) {
  2575  		received++
  2576  		if received >= toSend {
  2577  			done <- true
  2578  		}
  2579  	}
  2580  
  2581  	start = time.Now()
  2582  	sub, err := js.Subscribe("*", cb, nats.EnableFlowControl(), nats.IdleHeartbeat(time.Second), nats.AckNone())
  2583  	if err != nil {
  2584  		t.Fatalf("Unexpected error: %v", err)
  2585  	}
  2586  	defer sub.Unsubscribe()
  2587  	<-done
  2588  	fmt.Printf("TOOK %v to consume\n", time.Since(start))
  2589  
  2590  	v, err = s.Varz(nil)
  2591  	if err != nil {
  2592  		t.Fatalf("Unexpected error: %v", err)
  2593  	}
  2594  	fmt.Printf("MEM AFTER SUBSCRIBE is %v\n", friendlyBytes(v.Mem))
  2595  }
  2596  
  2597  // Report of slow restart for a server that has many messages that have expired while it was not running.
  2598  func TestNoRaceJetStreamSlowRestartWithManyExpiredMsgs(t *testing.T) {
  2599  	opts := DefaultTestOptions
  2600  	opts.Port = -1
  2601  	opts.JetStream = true
  2602  	s := RunServer(&opts)
  2603  	if config := s.JetStreamConfig(); config != nil {
  2604  		defer removeDir(t, config.StoreDir)
  2605  	}
  2606  	defer s.Shutdown()
  2607  
  2608  	// Client for API requests.
  2609  	nc, js := jsClientConnect(t, s)
  2610  	defer nc.Close()
  2611  
  2612  	ttl := 2 * time.Second
  2613  	_, err := js.AddStream(&nats.StreamConfig{
  2614  		Name:     "ORDERS",
  2615  		Subjects: []string{"orders.*"},
  2616  		MaxAge:   ttl,
  2617  	})
  2618  	if err != nil {
  2619  		t.Fatalf("Unexpected error: %v", err)
  2620  	}
  2621  
  2622  	// Attach a consumer who is filtering on a wildcard subject as well.
  2623  	// This does not affect it like I thought originally but will keep it here.
  2624  	_, err = js.AddConsumer("ORDERS", &nats.ConsumerConfig{
  2625  		Durable:       "c22",
  2626  		FilterSubject: "orders.*",
  2627  		AckPolicy:     nats.AckExplicitPolicy,
  2628  	})
  2629  	if err != nil {
  2630  		t.Fatalf("Unexpected error: %v", err)
  2631  	}
  2632  
  2633  	// Now fill up with messages.
  2634  	toSend := 100_000
  2635  	for i := 1; i <= toSend; i++ {
  2636  		js.PublishAsync(fmt.Sprintf("orders.%d", i), []byte("OK"))
  2637  	}
  2638  	<-js.PublishAsyncComplete()
  2639  
  2640  	sdir := strings.TrimSuffix(s.JetStreamConfig().StoreDir, JetStreamStoreDir)
  2641  	s.Shutdown()
  2642  
  2643  	// Let them expire while not running.
  2644  	time.Sleep(ttl + 500*time.Millisecond)
  2645  
  2646  	start := time.Now()
  2647  	opts.Port = -1
  2648  	opts.StoreDir = sdir
  2649  	s = RunServer(&opts)
  2650  	elapsed := time.Since(start)
  2651  	defer s.Shutdown()
  2652  
  2653  	if elapsed > 2*time.Second {
  2654  		t.Fatalf("Took %v for restart which is too long", elapsed)
  2655  	}
  2656  
  2657  	// Check everything is correct.
  2658  	nc, js = jsClientConnect(t, s)
  2659  	defer nc.Close()
  2660  
  2661  	si, err := js.StreamInfo("ORDERS")
  2662  	if err != nil {
  2663  		t.Fatalf("Unexpected error: %v", err)
  2664  	}
  2665  	if si.State.Msgs != 0 {
  2666  		t.Fatalf("Expected no msgs after restart, got %d", si.State.Msgs)
  2667  	}
  2668  }
  2669  
  2670  func TestNoRaceJetStreamStalledMirrorsAfterExpire(t *testing.T) {
  2671  	c := createJetStreamClusterExplicit(t, "JSC", 3)
  2672  	defer c.shutdown()
  2673  
  2674  	nc, js := jsClientConnect(t, c.randomServer())
  2675  	defer nc.Close()
  2676  
  2677  	cfg := &nats.StreamConfig{
  2678  		Name:     "TEST",
  2679  		Subjects: []string{"foo.*"},
  2680  		Replicas: 1,
  2681  		MaxAge:   100 * time.Millisecond,
  2682  	}
  2683  
  2684  	if _, err := js.AddStream(cfg); err != nil {
  2685  		t.Fatalf("Error creating stream: %v", err)
  2686  	}
  2687  
  2688  	if _, err := js.AddStream(&nats.StreamConfig{
  2689  		Name:     "M",
  2690  		Replicas: 2,
  2691  		Mirror:   &nats.StreamSource{Name: "TEST"},
  2692  	}); err != nil {
  2693  		t.Fatalf("Unexpected error: %v", err)
  2694  	}
  2695  
  2696  	sendBatch := func(batch int) {
  2697  		t.Helper()
  2698  		for i := 0; i < batch; i++ {
  2699  			js.PublishAsync("foo.bar", []byte("Hello"))
  2700  		}
  2701  		select {
  2702  		case <-js.PublishAsyncComplete():
  2703  		case <-time.After(5 * time.Second):
  2704  			t.Fatalf("Did not receive completion signal")
  2705  		}
  2706  	}
  2707  
  2708  	numMsgs := 10_000
  2709  	sendBatch(numMsgs)
  2710  
  2711  	// Turn off expiration so we can test we did not stall.
  2712  	cfg.MaxAge = 0
  2713  	if _, err := js.UpdateStream(cfg); err != nil {
  2714  		t.Fatalf("Unexpected error: %v", err)
  2715  	}
  2716  
  2717  	sendBatch(numMsgs)
  2718  
  2719  	// Wait for mirror to be caught up.
  2720  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  2721  		si, err := js.StreamInfo("M")
  2722  		if err != nil {
  2723  			t.Fatalf("Unexpected error: %v", err)
  2724  		}
  2725  		if si.State.LastSeq != uint64(2*numMsgs) {
  2726  			return fmt.Errorf("Expected %d as last sequence, got state: %+v", 2*numMsgs, si.State)
  2727  		}
  2728  		return nil
  2729  	})
  2730  }
  2731  
  2732  // We will use JetStream helpers to create supercluster but this test is about exposing the ability to access
  2733  // account scoped connz with subject interest filtering.
  2734  func TestNoRaceJetStreamSuperClusterAccountConnz(t *testing.T) {
  2735  	// This has 4 different account, 3 general and system.
  2736  	sc := createJetStreamSuperClusterWithTemplate(t, jsClusterAccountsTempl, 3, 3)
  2737  	defer sc.shutdown()
  2738  
  2739  	// Create 20 connections on account one and two
  2740  	// Create JetStream assets for each as well to make sure by default we do not report them.
  2741  	num := 20
  2742  	for i := 0; i < num; i++ {
  2743  		nc, _ := jsClientConnect(t, sc.randomServer(), nats.UserInfo("one", "p"), nats.Name("one"))
  2744  		defer nc.Close()
  2745  
  2746  		if i%2 == 0 {
  2747  			nc.SubscribeSync("foo")
  2748  		} else {
  2749  			nc.SubscribeSync("bar")
  2750  		}
  2751  
  2752  		nc, js := jsClientConnect(t, sc.randomServer(), nats.UserInfo("two", "p"), nats.Name("two"))
  2753  		defer nc.Close()
  2754  		nc.SubscribeSync("baz")
  2755  		nc.SubscribeSync("foo.bar.*")
  2756  		nc.SubscribeSync(fmt.Sprintf("id.%d", i+1))
  2757  
  2758  		js.AddStream(&nats.StreamConfig{Name: fmt.Sprintf("TEST:%d", i+1)})
  2759  	}
  2760  
  2761  	type czapi struct {
  2762  		Server *ServerInfo
  2763  		Data   *Connz
  2764  		Error  *ApiError
  2765  	}
  2766  
  2767  	parseConnz := func(buf []byte) *Connz {
  2768  		t.Helper()
  2769  		var cz czapi
  2770  		if err := json.Unmarshal(buf, &cz); err != nil {
  2771  			t.Fatalf("Unexpected error: %v", err)
  2772  		}
  2773  		if cz.Error != nil {
  2774  			t.Fatalf("Unexpected error: %+v", cz.Error)
  2775  		}
  2776  		return cz.Data
  2777  	}
  2778  
  2779  	doRequest := func(reqSubj, acc, filter string, expected int) {
  2780  		t.Helper()
  2781  		nc, _ := jsClientConnect(t, sc.randomServer(), nats.UserInfo(acc, "p"), nats.Name(acc))
  2782  		defer nc.Close()
  2783  
  2784  		mch := make(chan *nats.Msg, 9)
  2785  		sub, _ := nc.ChanSubscribe(nats.NewInbox(), mch)
  2786  
  2787  		var req []byte
  2788  		if filter != _EMPTY_ {
  2789  			req, _ = json.Marshal(&ConnzOptions{FilterSubject: filter})
  2790  		}
  2791  
  2792  		if err := nc.PublishRequest(reqSubj, sub.Subject, req); err != nil {
  2793  			t.Fatalf("Unexpected error: %v", err)
  2794  		}
  2795  
  2796  		// So we can igniore ourtselves.
  2797  		cid, _ := nc.GetClientID()
  2798  		sid := nc.ConnectedServerId()
  2799  
  2800  		wt := time.NewTimer(200 * time.Millisecond)
  2801  		var conns []*ConnInfo
  2802  	LOOP:
  2803  		for {
  2804  			select {
  2805  			case m := <-mch:
  2806  				if len(m.Data) == 0 {
  2807  					t.Fatalf("No responders")
  2808  				}
  2809  				cr := parseConnz(m.Data)
  2810  				// For account scoped, NumConns and Total should be the same (sans limits and offsets).
  2811  				// It Total should not include other accounts since that would leak information about the system.
  2812  				if filter == _EMPTY_ && cr.NumConns != cr.Total {
  2813  					t.Fatalf("NumConns and Total should be same with account scoped connz, got %+v", cr)
  2814  				}
  2815  				for _, c := range cr.Conns {
  2816  					if c.Name != acc {
  2817  						t.Fatalf("Got wrong account: %q vs %q for %+v", acc, c.Account, c)
  2818  					}
  2819  					if !(c.Cid == cid && cr.ID == sid) {
  2820  						conns = append(conns, c)
  2821  					}
  2822  				}
  2823  				wt.Reset(200 * time.Millisecond)
  2824  			case <-wt.C:
  2825  				break LOOP
  2826  			}
  2827  		}
  2828  		if len(conns) != expected {
  2829  			t.Fatalf("Expected to see %d conns but got %d", expected, len(conns))
  2830  		}
  2831  	}
  2832  
  2833  	doSysRequest := func(acc string, expected int) {
  2834  		t.Helper()
  2835  		doRequest("$SYS.REQ.SERVER.PING.CONNZ", acc, _EMPTY_, expected)
  2836  	}
  2837  	doAccRequest := func(acc string, expected int) {
  2838  		t.Helper()
  2839  		doRequest("$SYS.REQ.ACCOUNT.PING.CONNZ", acc, _EMPTY_, expected)
  2840  	}
  2841  	doFiltered := func(acc, filter string, expected int) {
  2842  		t.Helper()
  2843  		doRequest("$SYS.REQ.SERVER.PING.CONNZ", acc, filter, expected)
  2844  	}
  2845  
  2846  	doSysRequest("one", 20)
  2847  	doAccRequest("one", 20)
  2848  
  2849  	doSysRequest("two", 20)
  2850  	doAccRequest("two", 20)
  2851  
  2852  	// Now check filtering.
  2853  	doFiltered("one", _EMPTY_, 20)
  2854  	doFiltered("one", ">", 20)
  2855  	doFiltered("one", "bar", 10)
  2856  	doFiltered("two", "bar", 0)
  2857  	doFiltered("two", "id.1", 1)
  2858  	doFiltered("two", "id.*", 20)
  2859  	doFiltered("two", "foo.bar.*", 20)
  2860  	doFiltered("two", "foo.>", 20)
  2861  }
  2862  
  2863  func TestNoRaceCompressedConnz(t *testing.T) {
  2864  	s := RunBasicJetStreamServer(t)
  2865  	defer s.Shutdown()
  2866  
  2867  	nc, _ := jsClientConnect(t, s)
  2868  	defer nc.Close()
  2869  
  2870  	doRequest := func(compress string) {
  2871  		t.Helper()
  2872  		m := nats.NewMsg("$SYS.REQ.ACCOUNT.PING.CONNZ")
  2873  		m.Header.Add("Accept-Encoding", compress)
  2874  		resp, err := nc.RequestMsg(m, time.Second)
  2875  		if err != nil {
  2876  			t.Fatalf("Unexpected error: %v", err)
  2877  		}
  2878  		buf := resp.Data
  2879  
  2880  		// Make sure we have an encoding header.
  2881  		ce := resp.Header.Get("Content-Encoding")
  2882  		switch strings.ToLower(ce) {
  2883  		case "gzip":
  2884  			zr, err := gzip.NewReader(bytes.NewReader(buf))
  2885  			if err != nil {
  2886  				t.Fatalf("Unexpected error: %v", err)
  2887  			}
  2888  			defer zr.Close()
  2889  			buf, err = io.ReadAll(zr)
  2890  			if err != nil && err != io.ErrUnexpectedEOF {
  2891  				t.Fatalf("Unexpected error: %v", err)
  2892  			}
  2893  		case "snappy", "s2":
  2894  			sr := s2.NewReader(bytes.NewReader(buf))
  2895  			buf, err = io.ReadAll(sr)
  2896  			if err != nil && err != io.ErrUnexpectedEOF {
  2897  				t.Fatalf("Unexpected error: %v", err)
  2898  			}
  2899  		default:
  2900  			t.Fatalf("Unknown content-encoding of %q", ce)
  2901  		}
  2902  
  2903  		var cz ServerAPIConnzResponse
  2904  		if err := json.Unmarshal(buf, &cz); err != nil {
  2905  			t.Fatalf("Unexpected error: %v", err)
  2906  		}
  2907  		if cz.Error != nil {
  2908  			t.Fatalf("Unexpected error: %+v", cz.Error)
  2909  		}
  2910  	}
  2911  
  2912  	doRequest("gzip")
  2913  	doRequest("snappy")
  2914  	doRequest("s2")
  2915  }
  2916  
  2917  func TestNoRaceJetStreamClusterExtendedStreamPurge(t *testing.T) {
  2918  	for _, st := range []StorageType{FileStorage, MemoryStorage} {
  2919  		t.Run(st.String(), func(t *testing.T) {
  2920  			c := createJetStreamClusterExplicit(t, "JSC", 3)
  2921  			defer c.shutdown()
  2922  
  2923  			nc, js := jsClientConnect(t, c.randomServer())
  2924  			defer nc.Close()
  2925  
  2926  			cfg := StreamConfig{
  2927  				Name:       "KV",
  2928  				Subjects:   []string{"kv.>"},
  2929  				Storage:    st,
  2930  				Replicas:   2,
  2931  				MaxMsgsPer: 100,
  2932  			}
  2933  			req, err := json.Marshal(cfg)
  2934  			if err != nil {
  2935  				t.Fatalf("Unexpected error: %v", err)
  2936  			}
  2937  			// Do manually for now.
  2938  			nc.Request(fmt.Sprintf(JSApiStreamCreateT, cfg.Name), req, time.Second)
  2939  			c.waitOnStreamLeader("$G", "KV")
  2940  
  2941  			si, err := js.StreamInfo("KV")
  2942  			if err != nil {
  2943  				t.Fatalf("Unexpected error: %v", err)
  2944  			}
  2945  			if si == nil || si.Config.Name != "KV" {
  2946  				t.Fatalf("StreamInfo is not correct %+v", si)
  2947  			}
  2948  
  2949  			for i := 0; i < 1000; i++ {
  2950  				js.PublishAsync("kv.foo", []byte("OK")) // 1 * i
  2951  				js.PublishAsync("kv.bar", []byte("OK")) // 2 * i
  2952  				js.PublishAsync("kv.baz", []byte("OK")) // 3 * i
  2953  			}
  2954  			// First is 2700, last is 3000
  2955  			for i := 0; i < 700; i++ {
  2956  				js.PublishAsync(fmt.Sprintf("kv.%d", i+1), []byte("OK"))
  2957  			}
  2958  			// Now first is 2700, last is 3700
  2959  			select {
  2960  			case <-js.PublishAsyncComplete():
  2961  			case <-time.After(10 * time.Second):
  2962  				t.Fatalf("Did not receive completion signal")
  2963  			}
  2964  
  2965  			si, err = js.StreamInfo("KV")
  2966  			if err != nil {
  2967  				t.Fatalf("Unexpected error: %v", err)
  2968  			}
  2969  			if si.State.Msgs != 1000 {
  2970  				t.Fatalf("Expected %d msgs, got %d", 1000, si.State.Msgs)
  2971  			}
  2972  
  2973  			shouldFail := func(preq *JSApiStreamPurgeRequest) {
  2974  				req, _ := json.Marshal(preq)
  2975  				resp, err := nc.Request(fmt.Sprintf(JSApiStreamPurgeT, "KV"), req, time.Second)
  2976  				if err != nil {
  2977  					t.Fatalf("Unexpected error: %v", err)
  2978  				}
  2979  				var pResp JSApiStreamPurgeResponse
  2980  				if err = json.Unmarshal(resp.Data, &pResp); err != nil {
  2981  					t.Fatalf("Unexpected error: %v", err)
  2982  				}
  2983  				if pResp.Success || pResp.Error == nil {
  2984  					t.Fatalf("Expected an error response but got none")
  2985  				}
  2986  			}
  2987  
  2988  			// Sequence and Keep should be mutually exclusive.
  2989  			shouldFail(&JSApiStreamPurgeRequest{Sequence: 10, Keep: 10})
  2990  
  2991  			purge := func(preq *JSApiStreamPurgeRequest, newTotal uint64) {
  2992  				t.Helper()
  2993  				req, _ := json.Marshal(preq)
  2994  				resp, err := nc.Request(fmt.Sprintf(JSApiStreamPurgeT, "KV"), req, time.Second)
  2995  				if err != nil {
  2996  					t.Fatalf("Unexpected error: %v", err)
  2997  				}
  2998  				var pResp JSApiStreamPurgeResponse
  2999  				if err = json.Unmarshal(resp.Data, &pResp); err != nil {
  3000  					t.Fatalf("Unexpected error: %v", err)
  3001  				}
  3002  				if !pResp.Success || pResp.Error != nil {
  3003  					t.Fatalf("Got a bad response %+v", pResp)
  3004  				}
  3005  				si, err = js.StreamInfo("KV")
  3006  				if err != nil {
  3007  					t.Fatalf("Unexpected error: %v", err)
  3008  				}
  3009  				if si.State.Msgs != newTotal {
  3010  					t.Fatalf("Expected total after purge to be %d but got %d", newTotal, si.State.Msgs)
  3011  				}
  3012  			}
  3013  			expectLeft := func(subject string, expected uint64) {
  3014  				t.Helper()
  3015  				ci, err := js.AddConsumer("KV", &nats.ConsumerConfig{Durable: "dlc", FilterSubject: subject, AckPolicy: nats.AckExplicitPolicy})
  3016  				if err != nil {
  3017  					t.Fatalf("Unexpected error: %v", err)
  3018  				}
  3019  				defer js.DeleteConsumer("KV", "dlc")
  3020  				if ci.NumPending != expected {
  3021  					t.Fatalf("Expected %d remaining but got %d", expected, ci.NumPending)
  3022  				}
  3023  			}
  3024  
  3025  			purge(&JSApiStreamPurgeRequest{Subject: "kv.foo"}, 900)
  3026  			expectLeft("kv.foo", 0)
  3027  
  3028  			purge(&JSApiStreamPurgeRequest{Subject: "kv.bar", Keep: 1}, 801)
  3029  			expectLeft("kv.bar", 1)
  3030  
  3031  			purge(&JSApiStreamPurgeRequest{Subject: "kv.baz", Sequence: 2851}, 751)
  3032  			expectLeft("kv.baz", 50)
  3033  
  3034  			purge(&JSApiStreamPurgeRequest{Subject: "kv.*"}, 0)
  3035  
  3036  			// RESET
  3037  			js.DeleteStream("KV")
  3038  			// Do manually for now.
  3039  			nc.Request(fmt.Sprintf(JSApiStreamCreateT, cfg.Name), req, time.Second)
  3040  			c.waitOnStreamLeader("$G", "KV")
  3041  
  3042  			if _, err := js.StreamInfo("KV"); err != nil {
  3043  				t.Fatalf("Unexpected error: %v", err)
  3044  			}
  3045  			// Put in 100.
  3046  			for i := 0; i < 100; i++ {
  3047  				js.PublishAsync("kv.foo", []byte("OK"))
  3048  			}
  3049  			select {
  3050  			case <-js.PublishAsyncComplete():
  3051  			case <-time.After(time.Second):
  3052  				t.Fatalf("Did not receive completion signal")
  3053  			}
  3054  			purge(&JSApiStreamPurgeRequest{Subject: "kv.foo", Keep: 10}, 10)
  3055  			purge(&JSApiStreamPurgeRequest{Subject: "kv.foo", Keep: 10}, 10)
  3056  			expectLeft("kv.foo", 10)
  3057  
  3058  			// RESET AGAIN
  3059  			js.DeleteStream("KV")
  3060  			// Do manually for now.
  3061  			nc.Request(fmt.Sprintf(JSApiStreamCreateT, cfg.Name), req, time.Second)
  3062  			c.waitOnStreamLeader("$G", "KV")
  3063  
  3064  			if _, err := js.StreamInfo("KV"); err != nil {
  3065  				t.Fatalf("Unexpected error: %v", err)
  3066  			}
  3067  			// Put in 100.
  3068  			for i := 0; i < 100; i++ {
  3069  				js.Publish("kv.foo", []byte("OK"))
  3070  			}
  3071  			purge(&JSApiStreamPurgeRequest{Keep: 10}, 10)
  3072  			expectLeft(">", 10)
  3073  
  3074  			// RESET AGAIN
  3075  			js.DeleteStream("KV")
  3076  			// Do manually for now.
  3077  			nc.Request(fmt.Sprintf(JSApiStreamCreateT, cfg.Name), req, time.Second)
  3078  			if _, err := js.StreamInfo("KV"); err != nil {
  3079  				t.Fatalf("Unexpected error: %v", err)
  3080  			}
  3081  			// Put in 100.
  3082  			for i := 0; i < 100; i++ {
  3083  				js.Publish("kv.foo", []byte("OK"))
  3084  			}
  3085  			purge(&JSApiStreamPurgeRequest{Sequence: 90}, 11) // Up to 90 so we keep that, hence the 11.
  3086  			expectLeft(">", 11)
  3087  		})
  3088  	}
  3089  }
  3090  
  3091  func TestNoRaceJetStreamFileStoreCompaction(t *testing.T) {
  3092  	s := RunBasicJetStreamServer(t)
  3093  	defer s.Shutdown()
  3094  
  3095  	nc, js := jsClientConnect(t, s)
  3096  	defer nc.Close()
  3097  
  3098  	cfg := &nats.StreamConfig{
  3099  		Name:              "KV",
  3100  		Subjects:          []string{"KV.>"},
  3101  		MaxMsgsPerSubject: 1,
  3102  	}
  3103  	if _, err := js.AddStream(cfg); err != nil {
  3104  		t.Fatalf("Unexpected error: %v", err)
  3105  	}
  3106  
  3107  	toSend := 10_000
  3108  	data := make([]byte, 4*1024)
  3109  	crand.Read(data)
  3110  
  3111  	// First one.
  3112  	js.PublishAsync("KV.FM", data)
  3113  
  3114  	for i := 0; i < toSend; i++ {
  3115  		js.PublishAsync(fmt.Sprintf("KV.%d", i+1), data)
  3116  	}
  3117  	// Do again and overwrite the previous batch.
  3118  	for i := 0; i < toSend; i++ {
  3119  		js.PublishAsync(fmt.Sprintf("KV.%d", i+1), data)
  3120  	}
  3121  	select {
  3122  	case <-js.PublishAsyncComplete():
  3123  	case <-time.After(10 * time.Second):
  3124  		t.Fatalf("Did not receive completion signal")
  3125  	}
  3126  
  3127  	// Now check by hand the utilization level.
  3128  	mset, err := s.GlobalAccount().lookupStream("KV")
  3129  	if err != nil {
  3130  		t.Fatalf("Unexpected error: %v", err)
  3131  	}
  3132  	total, used, _ := mset.Store().Utilization()
  3133  	if pu := 100.0 * float32(used) / float32(total); pu < 80.0 {
  3134  		t.Fatalf("Utilization is less than 80%%, got %.2f", pu)
  3135  	}
  3136  }
  3137  
  3138  func TestNoRaceJetStreamEncryptionEnabledOnRestartWithExpire(t *testing.T) {
  3139  	conf := createConfFile(t, []byte(fmt.Sprintf(`
  3140  		listen: 127.0.0.1:-1
  3141  		jetstream {
  3142  			store_dir = %q
  3143  		}
  3144  	`, t.TempDir())))
  3145  
  3146  	s, _ := RunServerWithConfig(conf)
  3147  	defer s.Shutdown()
  3148  
  3149  	config := s.JetStreamConfig()
  3150  	if config == nil {
  3151  		t.Fatalf("Expected config but got none")
  3152  	}
  3153  	defer removeDir(t, config.StoreDir)
  3154  
  3155  	nc, js := jsClientConnect(t, s)
  3156  	defer nc.Close()
  3157  
  3158  	toSend := 10_000
  3159  
  3160  	cfg := &nats.StreamConfig{
  3161  		Name:     "TEST",
  3162  		Subjects: []string{"foo", "bar"},
  3163  		MaxMsgs:  int64(toSend),
  3164  	}
  3165  	if _, err := js.AddStream(cfg); err != nil {
  3166  		t.Fatalf("Unexpected error: %v", err)
  3167  	}
  3168  
  3169  	data := make([]byte, 4*1024) // 4K payload
  3170  	crand.Read(data)
  3171  
  3172  	for i := 0; i < toSend; i++ {
  3173  		js.PublishAsync("foo", data)
  3174  		js.PublishAsync("bar", data)
  3175  	}
  3176  	select {
  3177  	case <-js.PublishAsyncComplete():
  3178  	case <-time.After(5 * time.Second):
  3179  		t.Fatalf("Did not receive completion signal")
  3180  	}
  3181  
  3182  	_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "dlc", AckPolicy: nats.AckExplicitPolicy})
  3183  	if err != nil {
  3184  		t.Fatalf("Unexpected error: %v", err)
  3185  	}
  3186  
  3187  	// Restart
  3188  	nc.Close()
  3189  	s.Shutdown()
  3190  
  3191  	ncs := fmt.Sprintf("\nlisten: 127.0.0.1:-1\njetstream: {key: %q, store_dir: %q}\n", "s3cr3t!", config.StoreDir)
  3192  	conf = createConfFile(t, []byte(ncs))
  3193  
  3194  	// Try to drain entropy to see if effects startup time.
  3195  	drain := make([]byte, 32*1024*1024) // Pull 32Mb of crypto rand.
  3196  	crand.Read(drain)
  3197  
  3198  	start := time.Now()
  3199  	s, _ = RunServerWithConfig(conf)
  3200  	defer s.Shutdown()
  3201  	dd := time.Since(start)
  3202  	if dd > 5*time.Second {
  3203  		t.Fatalf("Restart took longer than expected: %v", dd)
  3204  	}
  3205  }
  3206  
  3207  // This test was from Ivan K. and showed a bug in the filestore implementation.
  3208  // This is skipped by default since it takes >40s to run.
  3209  func TestNoRaceJetStreamOrderedConsumerMissingMsg(t *testing.T) {
  3210  	// Uncomment to run. Needs to be on a big machine. Do not want as part of Travis tests atm.
  3211  	skip(t)
  3212  
  3213  	s := RunBasicJetStreamServer(t)
  3214  	defer s.Shutdown()
  3215  
  3216  	nc, js := jsClientConnect(t, s)
  3217  	defer nc.Close()
  3218  
  3219  	if _, err := js.AddStream(&nats.StreamConfig{
  3220  		Name:     "benchstream",
  3221  		Subjects: []string{"testsubject"},
  3222  		Replicas: 1,
  3223  	}); err != nil {
  3224  		t.Fatalf("add stream failed: %s", err)
  3225  	}
  3226  
  3227  	total := 1_000_000
  3228  
  3229  	numSubs := 10
  3230  	ch := make(chan struct{}, numSubs)
  3231  	wg := sync.WaitGroup{}
  3232  	wg.Add(numSubs)
  3233  	errCh := make(chan error, 1)
  3234  	for i := 0; i < numSubs; i++ {
  3235  		nc, js := jsClientConnect(t, s)
  3236  		defer nc.Close()
  3237  		go func(nc *nats.Conn, js nats.JetStreamContext) {
  3238  			defer wg.Done()
  3239  			received := 0
  3240  			_, err := js.Subscribe("testsubject", func(m *nats.Msg) {
  3241  				meta, _ := m.Metadata()
  3242  				if meta.Sequence.Consumer != meta.Sequence.Stream {
  3243  					nc.Close()
  3244  					errCh <- fmt.Errorf("Bad meta: %+v", meta)
  3245  				}
  3246  				received++
  3247  				if received == total {
  3248  					ch <- struct{}{}
  3249  				}
  3250  			}, nats.OrderedConsumer())
  3251  			if err != nil {
  3252  				select {
  3253  				case errCh <- fmt.Errorf("Error creating sub: %v", err):
  3254  				default:
  3255  				}
  3256  
  3257  			}
  3258  		}(nc, js)
  3259  	}
  3260  	wg.Wait()
  3261  	select {
  3262  	case e := <-errCh:
  3263  		t.Fatal(e)
  3264  	default:
  3265  	}
  3266  
  3267  	payload := make([]byte, 500)
  3268  	for i := 1; i <= total; i++ {
  3269  		js.PublishAsync("testsubject", payload)
  3270  	}
  3271  	select {
  3272  	case <-js.PublishAsyncComplete():
  3273  	case <-time.After(10 * time.Second):
  3274  		t.Fatalf("Did not send all messages")
  3275  	}
  3276  
  3277  	// Now wait for consumers to be done:
  3278  	for i := 0; i < numSubs; i++ {
  3279  		select {
  3280  		case <-ch:
  3281  		case <-time.After(10 * time.Second):
  3282  			t.Fatal("Did not receive all messages for all consumers in time")
  3283  		}
  3284  	}
  3285  
  3286  }
  3287  
  3288  // Issue #2488 - Bad accounting, can not reproduce the stalled consumers after last several PRs.
  3289  // Issue did show bug in ack logic for no-ack and interest based retention.
  3290  func TestNoRaceJetStreamClusterInterestPolicyAckNone(t *testing.T) {
  3291  	for _, test := range []struct {
  3292  		name    string
  3293  		durable string
  3294  	}{
  3295  		{"durable", "dlc"},
  3296  		{"ephemeral", _EMPTY_},
  3297  	} {
  3298  		t.Run(test.name, func(t *testing.T) {
  3299  			c := createJetStreamClusterExplicit(t, "R3S", 3)
  3300  			defer c.shutdown()
  3301  
  3302  			// Client based API
  3303  			nc, js := jsClientConnect(t, c.randomServer())
  3304  			defer nc.Close()
  3305  
  3306  			_, err := js.AddStream(&nats.StreamConfig{
  3307  				Name:      "cluster",
  3308  				Subjects:  []string{"cluster.*"},
  3309  				Retention: nats.InterestPolicy,
  3310  				Discard:   nats.DiscardOld,
  3311  				Replicas:  3,
  3312  			})
  3313  			if err != nil {
  3314  				t.Fatalf("Unexpected error: %v", err)
  3315  			}
  3316  
  3317  			var received uint32
  3318  			mh := func(m *nats.Msg) {
  3319  				atomic.AddUint32(&received, 1)
  3320  			}
  3321  
  3322  			opts := []nats.SubOpt{nats.DeliverNew(), nats.AckNone()}
  3323  			if test.durable != _EMPTY_ {
  3324  				opts = append(opts, nats.Durable(test.durable))
  3325  			}
  3326  			_, err = js.Subscribe("cluster.created", mh, opts...)
  3327  			if err != nil {
  3328  				t.Fatalf("Unexpected error: %v", err)
  3329  			}
  3330  
  3331  			msg := []byte("ACK ME")
  3332  			const total = uint32(1_000)
  3333  			for i := 0; i < int(total); i++ {
  3334  				if _, err := js.Publish("cluster.created", msg); err != nil {
  3335  					t.Fatalf("Unexpected error: %v", err)
  3336  				}
  3337  				//time.Sleep(100 * time.Microsecond)
  3338  			}
  3339  
  3340  			// Wait for all messages to be received.
  3341  			checkFor(t, 2*time.Second, 100*time.Millisecond, func() error {
  3342  				r := atomic.LoadUint32(&received)
  3343  				if r == total {
  3344  					return nil
  3345  				}
  3346  				return fmt.Errorf("Received only %d out of %d", r, total)
  3347  			})
  3348  
  3349  			checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  3350  				si, err := js.StreamInfo("cluster")
  3351  				if err != nil {
  3352  					t.Fatalf("Error getting stream info: %v", err)
  3353  				}
  3354  				if si.State.Msgs != 0 {
  3355  					return fmt.Errorf("Expected no messages, got %d", si.State.Msgs)
  3356  				}
  3357  				return nil
  3358  			})
  3359  		})
  3360  	}
  3361  }
  3362  
  3363  // There was a bug in the filestore compact code that would cause a store
  3364  // with JSExpectedLastSubjSeq to fail with "wrong last sequence: 0"
  3365  func TestNoRaceJetStreamLastSubjSeqAndFilestoreCompact(t *testing.T) {
  3366  	s := RunBasicJetStreamServer(t)
  3367  	defer s.Shutdown()
  3368  
  3369  	// Client based API
  3370  	nc, js := jsClientConnect(t, s)
  3371  	defer nc.Close()
  3372  
  3373  	_, err := js.AddStream(&nats.StreamConfig{
  3374  		Name:              "MQTT_sess",
  3375  		Subjects:          []string{"MQTT.sess.>"},
  3376  		Storage:           nats.FileStorage,
  3377  		Retention:         nats.LimitsPolicy,
  3378  		Replicas:          1,
  3379  		MaxMsgsPerSubject: 1,
  3380  	})
  3381  	if err != nil {
  3382  		t.Fatalf("Unexpected error: %v", err)
  3383  	}
  3384  
  3385  	firstPayload := make([]byte, 40)
  3386  	secondPayload := make([]byte, 380)
  3387  	for iter := 0; iter < 2; iter++ {
  3388  		for i := 0; i < 4000; i++ {
  3389  			subj := "MQTT.sess." + getHash(fmt.Sprintf("client_%d", i))
  3390  			pa, err := js.Publish(subj, firstPayload)
  3391  			if err != nil {
  3392  				t.Fatalf("Error on publish: %v", err)
  3393  			}
  3394  			m := nats.NewMsg(subj)
  3395  			m.Data = secondPayload
  3396  			eseq := strconv.FormatInt(int64(pa.Sequence), 10)
  3397  			m.Header.Set(JSExpectedLastSubjSeq, eseq)
  3398  			if _, err := js.PublishMsg(m); err != nil {
  3399  				t.Fatalf("Error on publish (iter=%v seq=%v): %v", iter+1, pa.Sequence, err)
  3400  			}
  3401  		}
  3402  	}
  3403  }
  3404  
  3405  // Issue #2548
  3406  func TestNoRaceJetStreamClusterMemoryStreamConsumerRaftGrowth(t *testing.T) {
  3407  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3408  	defer c.shutdown()
  3409  
  3410  	nc, js := jsClientConnect(t, c.randomServer())
  3411  	defer nc.Close()
  3412  
  3413  	_, err := js.AddStream(&nats.StreamConfig{
  3414  		Name:      "memory-leak",
  3415  		Subjects:  []string{"memory-leak"},
  3416  		Retention: nats.LimitsPolicy,
  3417  		MaxMsgs:   1000,
  3418  		Discard:   nats.DiscardOld,
  3419  		MaxAge:    time.Minute,
  3420  		Storage:   nats.MemoryStorage,
  3421  		Replicas:  3,
  3422  	})
  3423  	if err != nil {
  3424  		t.Fatalf("Unexpected error: %v", err)
  3425  	}
  3426  
  3427  	_, err = js.QueueSubscribe("memory-leak", "q1", func(msg *nats.Msg) {
  3428  		time.Sleep(1 * time.Second)
  3429  		msg.AckSync()
  3430  	})
  3431  	if err != nil {
  3432  		t.Fatalf("Unexpected error: %v", err)
  3433  	}
  3434  
  3435  	// Send 10k (Must be > 8192 which is compactNumMin from monitorConsumer.
  3436  	msg := []byte("NATS is a connective technology that powers modern distributed systems.")
  3437  	for i := 0; i < 10_000; i++ {
  3438  		if _, err := js.Publish("memory-leak", msg); err != nil {
  3439  			t.Fatalf("Unexpected error: %v", err)
  3440  		}
  3441  	}
  3442  
  3443  	// We will verify here that the underlying raft layer for the leader is not > 8192
  3444  	cl := c.consumerLeader("$G", "memory-leak", "q1")
  3445  	mset, err := cl.GlobalAccount().lookupStream("memory-leak")
  3446  	if err != nil {
  3447  		t.Fatalf("Unexpected error: %v", err)
  3448  	}
  3449  	o := mset.lookupConsumer("q1")
  3450  	if o == nil {
  3451  		t.Fatalf("Error looking up consumer %q", "q1")
  3452  	}
  3453  	node := o.raftNode().(*raft)
  3454  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
  3455  		if ms := node.wal.(*memStore); ms.State().Msgs > 8192 {
  3456  			return fmt.Errorf("Did not compact the raft memory WAL")
  3457  		}
  3458  		return nil
  3459  	})
  3460  }
  3461  
  3462  func TestNoRaceJetStreamClusterCorruptWAL(t *testing.T) {
  3463  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3464  	defer c.shutdown()
  3465  
  3466  	nc, js := jsClientConnect(t, c.randomServer())
  3467  	defer nc.Close()
  3468  
  3469  	if _, err := js.AddStream(&nats.StreamConfig{Name: "TEST", Subjects: []string{"foo"}, Replicas: 3}); err != nil {
  3470  		t.Fatalf("Unexpected error: %v", err)
  3471  	}
  3472  
  3473  	sub, err := js.PullSubscribe("foo", "dlc")
  3474  	if err != nil {
  3475  		t.Fatalf("Unexpected error: %v", err)
  3476  	}
  3477  
  3478  	numMsgs := 1000
  3479  	for i := 0; i < numMsgs; i++ {
  3480  		js.PublishAsync("foo", []byte("WAL"))
  3481  	}
  3482  	select {
  3483  	case <-js.PublishAsyncComplete():
  3484  	case <-time.After(5 * time.Second):
  3485  		t.Fatalf("Did not receive completion signal")
  3486  	}
  3487  
  3488  	for i, m := range fetchMsgs(t, sub, 200, 5*time.Second) {
  3489  		// Ack first 50 and every other even on after that..
  3490  		if i < 50 || i%2 == 1 {
  3491  			m.AckSync()
  3492  		}
  3493  	}
  3494  	// Make sure acks processed.
  3495  	time.Sleep(200 * time.Millisecond)
  3496  	nc.Close()
  3497  
  3498  	// Check consumer consistency.
  3499  	checkConsumerWith := func(delivered, ackFloor uint64, ackPending int) {
  3500  		t.Helper()
  3501  		nc, js := jsClientConnect(t, c.randomServer())
  3502  		defer nc.Close()
  3503  
  3504  		checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  3505  			ci, err := js.ConsumerInfo("TEST", "dlc")
  3506  			if err != nil {
  3507  				return fmt.Errorf("Unexpected error: %v", err)
  3508  			}
  3509  			if ci.Delivered.Consumer != ci.Delivered.Stream || ci.Delivered.Consumer != delivered {
  3510  				return fmt.Errorf("Expected %d for delivered, got %+v", delivered, ci.Delivered)
  3511  			}
  3512  			if ci.AckFloor.Consumer != ci.AckFloor.Stream || ci.AckFloor.Consumer != ackFloor {
  3513  				return fmt.Errorf("Expected %d for ack floor, got %+v", ackFloor, ci.AckFloor)
  3514  			}
  3515  			nm := uint64(numMsgs)
  3516  			if ci.NumPending != nm-delivered {
  3517  				return fmt.Errorf("Expected num pending to be %d, got %d", nm-delivered, ci.NumPending)
  3518  			}
  3519  			if ci.NumAckPending != ackPending {
  3520  				return fmt.Errorf("Expected num ack pending to be %d, got %d", ackPending, ci.NumAckPending)
  3521  			}
  3522  			return nil
  3523  		})
  3524  	}
  3525  
  3526  	checkConsumer := func() {
  3527  		t.Helper()
  3528  		checkConsumerWith(200, 50, 75)
  3529  	}
  3530  
  3531  	checkConsumer()
  3532  
  3533  	// Grab the consumer leader.
  3534  	cl := c.consumerLeader("$G", "TEST", "dlc")
  3535  	mset, err := cl.GlobalAccount().lookupStream("TEST")
  3536  	if err != nil {
  3537  		t.Fatalf("Unexpected error: %v", err)
  3538  	}
  3539  	o := mset.lookupConsumer("dlc")
  3540  	if o == nil {
  3541  		t.Fatalf("Error looking up consumer %q", "dlc")
  3542  	}
  3543  	// Grab underlying raft node and the WAL (filestore) and we will attempt to "corrupt" it.
  3544  	node := o.raftNode().(*raft)
  3545  	// We are doing a stop here to prevent the internal consumer snapshot from happening on exit
  3546  	node.Stop()
  3547  	fs := node.wal.(*fileStore)
  3548  	fcfg, cfg := fs.fcfg, fs.cfg.StreamConfig
  3549  	// Stop all the servers.
  3550  	c.stopAll()
  3551  
  3552  	// Manipulate directly with cluster down.
  3553  	fs, err = newFileStore(fcfg, cfg)
  3554  	if err != nil {
  3555  		t.Fatalf("Unexpected error: %v", err)
  3556  	}
  3557  	state := fs.State()
  3558  	sm, err := fs.LoadMsg(state.LastSeq, nil)
  3559  	if err != nil {
  3560  		t.Fatalf("Unexpected error: %v", err)
  3561  	}
  3562  	ae, err := node.decodeAppendEntry(sm.msg, nil, _EMPTY_)
  3563  	if err != nil {
  3564  		t.Fatalf("Unexpected error: %v", err)
  3565  	}
  3566  
  3567  	dentry := func(dseq, sseq, dc uint64, ts int64) []byte {
  3568  		b := make([]byte, 4*binary.MaxVarintLen64+1)
  3569  		b[0] = byte(updateDeliveredOp)
  3570  		n := 1
  3571  		n += binary.PutUvarint(b[n:], dseq)
  3572  		n += binary.PutUvarint(b[n:], sseq)
  3573  		n += binary.PutUvarint(b[n:], dc)
  3574  		n += binary.PutVarint(b[n:], ts)
  3575  		return b[:n]
  3576  	}
  3577  
  3578  	// Let's put a non-contigous AppendEntry into the system.
  3579  	ae.pindex += 10
  3580  	// Add in delivered record.
  3581  	ae.entries = []*Entry{{EntryNormal, dentry(1000, 1000, 1, time.Now().UnixNano())}}
  3582  	encoded, err := ae.encode(nil)
  3583  	if err != nil {
  3584  		t.Fatalf("Unexpected error: %v", err)
  3585  	}
  3586  	if _, _, err := fs.StoreMsg(_EMPTY_, nil, encoded); err != nil {
  3587  		t.Fatalf("Unexpected error: %v", err)
  3588  	}
  3589  	fs.Stop()
  3590  
  3591  	c.restartAllSamePorts()
  3592  	c.waitOnStreamLeader("$G", "TEST")
  3593  	c.waitOnConsumerLeader("$G", "TEST", "dlc")
  3594  
  3595  	checkConsumer()
  3596  
  3597  	// Now we will truncate out the WAL out from underneath the leader.
  3598  	// Grab the consumer leader.
  3599  
  3600  	nc, js = jsClientConnect(t, c.randomServer())
  3601  	defer nc.Close()
  3602  
  3603  	cl = c.consumerLeader("$G", "TEST", "dlc")
  3604  	mset, err = cl.GlobalAccount().lookupStream("TEST")
  3605  	require_NoError(t, err)
  3606  	o = mset.lookupConsumer("dlc")
  3607  	require_NoError(t, err)
  3608  
  3609  	// Grab underlying raft node and the WAL (filestore) and truncate it.
  3610  	// This will simulate the WAL losing state due to truncate and we want to make sure it recovers.
  3611  
  3612  	fs = o.raftNode().(*raft).wal.(*fileStore)
  3613  	state = fs.State()
  3614  	err = fs.Truncate(state.FirstSeq)
  3615  	require_True(t, err == nil || err == ErrInvalidSequence)
  3616  	state = fs.State()
  3617  
  3618  	sub, err = js.PullSubscribe("foo", "dlc")
  3619  	require_NoError(t, err)
  3620  
  3621  	// This will cause us to stepdown and truncate our WAL.
  3622  	sub.Fetch(100)
  3623  	c.waitOnConsumerLeader("$G", "TEST", "dlc")
  3624  	// We can't trust the results sans that we have a leader back in place and the ackFloor.
  3625  	ci, err := js.ConsumerInfo("TEST", "dlc")
  3626  	require_NoError(t, err)
  3627  	if ci.AckFloor.Consumer != ci.AckFloor.Stream || ci.AckFloor.Consumer != 50 {
  3628  		t.Fatalf("Expected %d for ack floor, got %+v", 50, ci.AckFloor)
  3629  	}
  3630  }
  3631  
  3632  func TestNoRaceJetStreamClusterInterestRetentionDeadlock(t *testing.T) {
  3633  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3634  	defer c.shutdown()
  3635  
  3636  	// Client based API
  3637  	s := c.randomServer()
  3638  	nc, js := jsClientConnect(t, s)
  3639  	defer nc.Close()
  3640  
  3641  	// This can trigger deadlock with current architecture.
  3642  	// If stream is !limitsRetention and consumer is DIRECT and ack none we will try to place the msg seq
  3643  	// onto a chan for the stream to consider removing. All conditions above must hold to trigger.
  3644  
  3645  	// We will attempt to trigger here with a stream mirror setup which uses and R=1 DIRECT consumer to replicate msgs.
  3646  	_, err := js.AddStream(&nats.StreamConfig{Name: "S", Retention: nats.InterestPolicy, Storage: nats.MemoryStorage})
  3647  	if err != nil {
  3648  		t.Fatalf("Unexpected error: %v", err)
  3649  	}
  3650  
  3651  	// Create a mirror which will create the consumer profile to trigger.
  3652  	_, err = js.AddStream(&nats.StreamConfig{Name: "M", Mirror: &nats.StreamSource{Name: "S"}})
  3653  	if err != nil {
  3654  		t.Fatalf("Unexpected error: %v", err)
  3655  	}
  3656  
  3657  	// Queue up alot of messages.
  3658  	numRequests := 20_000
  3659  	for i := 0; i < numRequests; i++ {
  3660  		js.PublishAsync("S", []byte("Q"))
  3661  	}
  3662  	select {
  3663  	case <-js.PublishAsyncComplete():
  3664  	case <-time.After(5 * time.Second):
  3665  		t.Fatalf("Did not receive completion signal")
  3666  	}
  3667  
  3668  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  3669  		si, err := js.StreamInfo("S")
  3670  		if err != nil {
  3671  			t.Fatalf("Unexpected error: %v", err)
  3672  		}
  3673  		if si.State.Msgs != 0 {
  3674  			return fmt.Errorf("Expected 0 msgs, got state: %+v", si.State)
  3675  		}
  3676  		return nil
  3677  	})
  3678  }
  3679  
  3680  func TestNoRaceJetStreamClusterMaxConsumersAndDirect(t *testing.T) {
  3681  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3682  	defer c.shutdown()
  3683  
  3684  	// Client based API
  3685  	s := c.randomServer()
  3686  	nc, js := jsClientConnect(t, s)
  3687  	defer nc.Close()
  3688  
  3689  	// We want to max sure max consumer limits do not affect mirrors or sources etc.
  3690  	_, err := js.AddStream(&nats.StreamConfig{Name: "S", Storage: nats.MemoryStorage, MaxConsumers: 1})
  3691  	if err != nil {
  3692  		t.Fatalf("Unexpected error: %v", err)
  3693  	}
  3694  
  3695  	var mirrors []string
  3696  	for i := 0; i < 10; i++ {
  3697  		// Create a mirror.
  3698  		mname := fmt.Sprintf("M-%d", i+1)
  3699  		mirrors = append(mirrors, mname)
  3700  		_, err = js.AddStream(&nats.StreamConfig{Name: mname, Mirror: &nats.StreamSource{Name: "S"}})
  3701  		if err != nil {
  3702  			t.Fatalf("Unexpected error: %v", err)
  3703  		}
  3704  	}
  3705  
  3706  	// Queue up messages.
  3707  	numRequests := 20
  3708  	for i := 0; i < numRequests; i++ {
  3709  		js.Publish("S", []byte("Q"))
  3710  	}
  3711  
  3712  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  3713  		for _, mname := range mirrors {
  3714  			si, err := js.StreamInfo(mname)
  3715  			if err != nil {
  3716  				t.Fatalf("Unexpected error: %v", err)
  3717  			}
  3718  			if si.State.Msgs != uint64(numRequests) {
  3719  				return fmt.Errorf("Expected %d msgs for %q, got state: %+v", numRequests, mname, si.State)
  3720  			}
  3721  		}
  3722  		return nil
  3723  	})
  3724  }
  3725  
  3726  // Make sure when we try to hard reset a stream state in a cluster that we also re-create the consumers.
  3727  func TestNoRaceJetStreamClusterStreamReset(t *testing.T) {
  3728  	// Speed up raft
  3729  	omin, omax, ohb := minElectionTimeout, maxElectionTimeout, hbInterval
  3730  	minElectionTimeout = 250 * time.Millisecond
  3731  	maxElectionTimeout = time.Second
  3732  	hbInterval = 50 * time.Millisecond
  3733  	defer func() {
  3734  		minElectionTimeout = omin
  3735  		maxElectionTimeout = omax
  3736  		hbInterval = ohb
  3737  	}()
  3738  
  3739  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3740  	defer c.shutdown()
  3741  
  3742  	// Client based API
  3743  	s := c.randomServer()
  3744  	nc, js := jsClientConnect(t, s)
  3745  	defer nc.Close()
  3746  
  3747  	_, err := js.AddStream(&nats.StreamConfig{
  3748  		Name:      "TEST",
  3749  		Subjects:  []string{"foo.*"},
  3750  		Replicas:  2,
  3751  		Retention: nats.WorkQueuePolicy,
  3752  	})
  3753  	if err != nil {
  3754  		t.Fatalf("Unexpected error: %v", err)
  3755  	}
  3756  
  3757  	numRequests := 20
  3758  	for i := 0; i < numRequests; i++ {
  3759  		js.Publish("foo.created", []byte("REQ"))
  3760  	}
  3761  
  3762  	// Durable.
  3763  	sub, err := js.SubscribeSync("foo.created", nats.Durable("d1"))
  3764  	if err != nil {
  3765  		t.Fatalf("Unexpected error: %v", err)
  3766  	}
  3767  	defer sub.Unsubscribe()
  3768  
  3769  	si, err := js.StreamInfo("TEST")
  3770  	require_NoError(t, err)
  3771  	require_True(t, si.State.Msgs == uint64(numRequests))
  3772  
  3773  	// Let settle a bit for Go routine checks.
  3774  	time.Sleep(500 * time.Millisecond)
  3775  
  3776  	// Grab number go routines.
  3777  	base := runtime.NumGoroutine()
  3778  
  3779  	// Make the consumer busy here by async sending a bunch of messages.
  3780  	for i := 0; i < numRequests*10; i++ {
  3781  		js.PublishAsync("foo.created", []byte("REQ"))
  3782  	}
  3783  
  3784  	// Grab a server that is the consumer leader for the durable.
  3785  	cl := c.consumerLeader("$G", "TEST", "d1")
  3786  	mset, err := cl.GlobalAccount().lookupStream("TEST")
  3787  	if err != nil {
  3788  		t.Fatalf("Unexpected error: %v", err)
  3789  	}
  3790  	// Do a hard reset here by hand.
  3791  	mset.resetClusteredState(nil)
  3792  
  3793  	// Wait til we have the consumer leader re-elected.
  3794  	c.waitOnConsumerLeader("$G", "TEST", "d1")
  3795  
  3796  	// Make sure we can get the consumer info eventually.
  3797  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  3798  		_, err := js.ConsumerInfo("TEST", "d1", nats.MaxWait(250*time.Millisecond))
  3799  		return err
  3800  	})
  3801  
  3802  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  3803  		if after := runtime.NumGoroutine(); base > after {
  3804  			return fmt.Errorf("Expected %d go routines, got %d", base, after)
  3805  		}
  3806  		return nil
  3807  	})
  3808  
  3809  	// Simulate a low level write error on our consumer and make sure we can recover etc.
  3810  	checkFor(t, 10*time.Second, 200*time.Millisecond, func() error {
  3811  		if cl = c.consumerLeader("$G", "TEST", "d1"); cl != nil {
  3812  			return nil
  3813  		}
  3814  		return errors.New("waiting on consumer leader")
  3815  	})
  3816  
  3817  	mset, err = cl.GlobalAccount().lookupStream("TEST")
  3818  	if err != nil {
  3819  		t.Fatalf("Unexpected error: %v", err)
  3820  	}
  3821  	o := mset.lookupConsumer("d1")
  3822  	if o == nil {
  3823  		t.Fatalf("Did not retrieve consumer")
  3824  	}
  3825  	node := o.raftNode().(*raft)
  3826  	if node == nil {
  3827  		t.Fatalf("could not retrieve the raft node for consumer")
  3828  	}
  3829  
  3830  	nc.Close()
  3831  	node.setWriteErr(io.ErrShortWrite)
  3832  
  3833  	c.stopAll()
  3834  	c.restartAll()
  3835  
  3836  	c.waitOnStreamLeader("$G", "TEST")
  3837  	c.waitOnConsumerLeader("$G", "TEST", "d1")
  3838  }
  3839  
  3840  // Reports of high cpu on compaction for a KV store.
  3841  func TestNoRaceJetStreamKeyValueCompaction(t *testing.T) {
  3842  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3843  	defer c.shutdown()
  3844  
  3845  	// Client based API
  3846  	nc, js := jsClientConnect(t, c.randomServer())
  3847  	defer nc.Close()
  3848  
  3849  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  3850  		Bucket:   "COMPACT",
  3851  		Replicas: 3,
  3852  	})
  3853  	if err != nil {
  3854  		t.Fatalf("Unexpected error: %v", err)
  3855  	}
  3856  
  3857  	value := strings.Repeat("A", 128*1024)
  3858  	for i := 0; i < 5_000; i++ {
  3859  		key := fmt.Sprintf("K-%d", rand.Intn(256)+1)
  3860  		if _, err := kv.PutString(key, value); err != nil {
  3861  			t.Fatalf("Unexpected error: %v", err)
  3862  		}
  3863  	}
  3864  }
  3865  
  3866  // Trying to recreate an issue rip saw with KV and server restarts complaining about
  3867  // mismatch for a few minutes and growing memory.
  3868  func TestNoRaceJetStreamClusterStreamSeqMismatchIssue(t *testing.T) {
  3869  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3870  	defer c.shutdown()
  3871  
  3872  	// Client based API
  3873  	nc, js := jsClientConnect(t, c.randomServer())
  3874  	defer nc.Close()
  3875  
  3876  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  3877  		Bucket:   "MM",
  3878  		Replicas: 3,
  3879  		TTL:      500 * time.Millisecond,
  3880  	})
  3881  	require_NoError(t, err)
  3882  
  3883  	for i := 1; i <= 10; i++ {
  3884  		if _, err := kv.PutString("k", "1"); err != nil {
  3885  			t.Fatalf("Unexpected error: %v", err)
  3886  		}
  3887  	}
  3888  	// Close in case we are connected here. Will recreate.
  3889  	nc.Close()
  3890  
  3891  	// Shutdown a non-leader.
  3892  	s := c.randomNonStreamLeader("$G", "KV_MM")
  3893  	s.Shutdown()
  3894  
  3895  	nc, js = jsClientConnect(t, c.randomServer())
  3896  	defer nc.Close()
  3897  
  3898  	kv, err = js.KeyValue("MM")
  3899  	require_NoError(t, err)
  3900  
  3901  	// Now change the state of the stream such that we have to do a compact upon restart
  3902  	// of the downed server.
  3903  	for i := 1; i <= 10; i++ {
  3904  		if _, err := kv.PutString("k", "2"); err != nil {
  3905  			t.Fatalf("Unexpected error: %v", err)
  3906  		}
  3907  	}
  3908  
  3909  	// Raft could save us here so need to run a compact on the leader.
  3910  	snapshotLeader := func() {
  3911  		sl := c.streamLeader("$G", "KV_MM")
  3912  		if sl == nil {
  3913  			t.Fatalf("Did not get the leader")
  3914  		}
  3915  		mset, err := sl.GlobalAccount().lookupStream("KV_MM")
  3916  		require_NoError(t, err)
  3917  		node := mset.raftNode()
  3918  		if node == nil {
  3919  			t.Fatalf("Could not get stream group")
  3920  		}
  3921  		if err := node.InstallSnapshot(mset.stateSnapshot()); err != nil {
  3922  			t.Fatalf("Error installing snapshot: %v", err)
  3923  		}
  3924  	}
  3925  
  3926  	// Now wait for expiration
  3927  	time.Sleep(time.Second)
  3928  
  3929  	snapshotLeader()
  3930  
  3931  	s = c.restartServer(s)
  3932  	c.waitOnServerCurrent(s)
  3933  
  3934  	// We want to make sure we do not reset the raft state on a catchup due to no request yield.
  3935  	// Bug was if we did not actually request any help from snapshot we did not set mset.lseq properly.
  3936  	// So when we send next batch that would cause raft reset due to cluster reset for our stream.
  3937  	mset, err := s.GlobalAccount().lookupStream("KV_MM")
  3938  	require_NoError(t, err)
  3939  
  3940  	for i := 1; i <= 10; i++ {
  3941  		if _, err := kv.PutString("k1", "X"); err != nil {
  3942  			t.Fatalf("Unexpected error: %v", err)
  3943  		}
  3944  	}
  3945  
  3946  	c.waitOnStreamCurrent(s, "$G", "KV_MM")
  3947  
  3948  	// Make sure we did not reset our stream.
  3949  	msetNew, err := s.GlobalAccount().lookupStream("KV_MM")
  3950  	require_NoError(t, err)
  3951  	if msetNew != mset {
  3952  		t.Fatalf("Stream was reset")
  3953  	}
  3954  }
  3955  
  3956  func TestNoRaceJetStreamClusterStreamDropCLFS(t *testing.T) {
  3957  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3958  	defer c.shutdown()
  3959  
  3960  	// Client based API
  3961  	nc, js := jsClientConnect(t, c.randomServer())
  3962  	defer nc.Close()
  3963  
  3964  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  3965  		Bucket:   "CLFS",
  3966  		Replicas: 3,
  3967  	})
  3968  	require_NoError(t, err)
  3969  
  3970  	// Will work
  3971  	_, err = kv.Create("k.1", []byte("X"))
  3972  	require_NoError(t, err)
  3973  	// Drive up CLFS state on leader.
  3974  	for i := 0; i < 10; i++ {
  3975  		_, err = kv.Create("k.1", []byte("X"))
  3976  		require_Error(t, err)
  3977  	}
  3978  	// Bookend with new key success.
  3979  	_, err = kv.Create("k.2", []byte("Z"))
  3980  	require_NoError(t, err)
  3981  
  3982  	// Close in case we are connected here. Will recreate.
  3983  	nc.Close()
  3984  
  3985  	// Shutdown, which will also clear clfs.
  3986  	s := c.randomNonStreamLeader("$G", "KV_CLFS")
  3987  	s.Shutdown()
  3988  
  3989  	nc, js = jsClientConnect(t, c.randomServer())
  3990  	defer nc.Close()
  3991  
  3992  	kv, err = js.KeyValue("CLFS")
  3993  	require_NoError(t, err)
  3994  
  3995  	// Drive up CLFS state on leader.
  3996  	for i := 0; i < 10; i++ {
  3997  		_, err = kv.Create("k.1", []byte("X"))
  3998  		require_Error(t, err)
  3999  	}
  4000  
  4001  	sl := c.streamLeader("$G", "KV_CLFS")
  4002  	if sl == nil {
  4003  		t.Fatalf("Did not get the leader")
  4004  	}
  4005  	mset, err := sl.GlobalAccount().lookupStream("KV_CLFS")
  4006  	require_NoError(t, err)
  4007  	node := mset.raftNode()
  4008  	if node == nil {
  4009  		t.Fatalf("Could not get stream group")
  4010  	}
  4011  	if err := node.InstallSnapshot(mset.stateSnapshot()); err != nil {
  4012  		t.Fatalf("Error installing snapshot: %v", err)
  4013  	}
  4014  
  4015  	_, err = kv.Create("k.3", []byte("ZZZ"))
  4016  	require_NoError(t, err)
  4017  
  4018  	s = c.restartServer(s)
  4019  	c.waitOnServerCurrent(s)
  4020  
  4021  	mset, err = s.GlobalAccount().lookupStream("KV_CLFS")
  4022  	require_NoError(t, err)
  4023  
  4024  	_, err = kv.Create("k.4", []byte("YYY"))
  4025  	require_NoError(t, err)
  4026  
  4027  	c.waitOnStreamCurrent(s, "$G", "KV_CLFS")
  4028  
  4029  	// Make sure we did not reset our stream.
  4030  	msetNew, err := s.GlobalAccount().lookupStream("KV_CLFS")
  4031  	require_NoError(t, err)
  4032  	if msetNew != mset {
  4033  		t.Fatalf("Stream was reset")
  4034  	}
  4035  }
  4036  
  4037  func TestNoRaceJetStreamMemstoreWithLargeInteriorDeletes(t *testing.T) {
  4038  	s := RunBasicJetStreamServer(t)
  4039  	defer s.Shutdown()
  4040  
  4041  	// Client for API requests.
  4042  	nc, js := jsClientConnect(t, s)
  4043  	defer nc.Close()
  4044  
  4045  	_, err := js.AddStream(&nats.StreamConfig{
  4046  		Name:              "TEST",
  4047  		Subjects:          []string{"foo", "bar"},
  4048  		MaxMsgsPerSubject: 1,
  4049  		Storage:           nats.MemoryStorage,
  4050  	})
  4051  	require_NoError(t, err)
  4052  
  4053  	acc, err := s.lookupAccount("$G")
  4054  	require_NoError(t, err)
  4055  	mset, err := acc.lookupStream("TEST")
  4056  	require_NoError(t, err)
  4057  
  4058  	msg := []byte("Hello World!")
  4059  	if _, err := js.PublishAsync("foo", msg); err != nil {
  4060  		t.Fatalf("Unexpected publish error: %v", err)
  4061  	}
  4062  	for i := 1; i <= 1_000_000; i++ {
  4063  		if _, err := js.PublishAsync("bar", msg); err != nil {
  4064  			t.Fatalf("Unexpected publish error: %v", err)
  4065  		}
  4066  	}
  4067  	select {
  4068  	case <-js.PublishAsyncComplete():
  4069  	case <-time.After(5 * time.Second):
  4070  		t.Fatalf("Did not receive completion signal")
  4071  	}
  4072  
  4073  	now := time.Now()
  4074  	ss := mset.stateWithDetail(true)
  4075  	// Before the fix the snapshot for this test would be > 200ms on my setup.
  4076  	if elapsed := time.Since(now); elapsed > 100*time.Millisecond {
  4077  		t.Fatalf("Took too long to snapshot: %v", elapsed)
  4078  	} else if elapsed > 50*time.Millisecond {
  4079  		t.Logf("WRN: Took longer than usual to snapshot: %v", elapsed)
  4080  	}
  4081  
  4082  	if ss.Msgs != 2 || ss.FirstSeq != 1 || ss.LastSeq != 1_000_001 || ss.NumDeleted != 999999 {
  4083  		// To not print out on error.
  4084  		ss.Deleted = nil
  4085  		t.Fatalf("Bad State: %+v", ss)
  4086  	}
  4087  }
  4088  
  4089  // This is related to an issue reported where we were exhausting threads by trying to
  4090  // cleanup too many consumers at the same time.
  4091  // https://github.com/nats-io/nats-server/issues/2742
  4092  func TestNoRaceJetStreamConsumerFileStoreConcurrentDiskIO(t *testing.T) {
  4093  	storeDir := t.TempDir()
  4094  
  4095  	// Artificially adjust our environment for this test.
  4096  	gmp := runtime.GOMAXPROCS(32)
  4097  	defer runtime.GOMAXPROCS(gmp)
  4098  
  4099  	maxT := debug.SetMaxThreads(1050) // 1024 now
  4100  	defer debug.SetMaxThreads(maxT)
  4101  
  4102  	fs, err := newFileStore(FileStoreConfig{StoreDir: storeDir}, StreamConfig{Name: "MT", Storage: FileStorage})
  4103  	require_NoError(t, err)
  4104  	defer fs.Stop()
  4105  
  4106  	startCh := make(chan bool)
  4107  	var wg sync.WaitGroup
  4108  	var swg sync.WaitGroup
  4109  
  4110  	ts := time.Now().UnixNano()
  4111  
  4112  	// Create 1000 consumerStores
  4113  	n := 1000
  4114  	swg.Add(n)
  4115  
  4116  	for i := 1; i <= n; i++ {
  4117  		name := fmt.Sprintf("o%d", i)
  4118  		o, err := fs.ConsumerStore(name, &ConsumerConfig{AckPolicy: AckExplicit})
  4119  		require_NoError(t, err)
  4120  		wg.Add(1)
  4121  		swg.Done()
  4122  
  4123  		go func() {
  4124  			defer wg.Done()
  4125  			// Will make everyone run concurrently.
  4126  			<-startCh
  4127  			o.UpdateDelivered(22, 22, 1, ts)
  4128  			buf, _ := o.(*consumerFileStore).encodeState()
  4129  			o.(*consumerFileStore).writeState(buf)
  4130  			o.Delete()
  4131  		}()
  4132  	}
  4133  
  4134  	swg.Wait()
  4135  	close(startCh)
  4136  	wg.Wait()
  4137  }
  4138  
  4139  func TestNoRaceJetStreamClusterHealthz(t *testing.T) {
  4140  	c := createJetStreamCluster(t, jsClusterAccountsTempl, "HZ", _EMPTY_, 3, 23033, true)
  4141  	defer c.shutdown()
  4142  
  4143  	nc1, js1 := jsClientConnect(t, c.randomServer(), nats.UserInfo("one", "p"))
  4144  	defer nc1.Close()
  4145  
  4146  	nc2, js2 := jsClientConnect(t, c.randomServer(), nats.UserInfo("two", "p"))
  4147  	defer nc2.Close()
  4148  
  4149  	var err error
  4150  	for _, sname := range []string{"foo", "bar", "baz"} {
  4151  		_, err = js1.AddStream(&nats.StreamConfig{Name: sname, Replicas: 3})
  4152  		require_NoError(t, err)
  4153  		_, err = js2.AddStream(&nats.StreamConfig{Name: sname, Replicas: 3})
  4154  		require_NoError(t, err)
  4155  	}
  4156  	// R1
  4157  	_, err = js1.AddStream(&nats.StreamConfig{Name: "r1", Replicas: 1})
  4158  	require_NoError(t, err)
  4159  
  4160  	// Now shutdown then send a bunch of data.
  4161  	s := c.servers[0]
  4162  	s.Shutdown()
  4163  
  4164  	for i := 0; i < 5_000; i++ {
  4165  		_, err = js1.PublishAsync("foo", []byte("OK"))
  4166  		require_NoError(t, err)
  4167  		_, err = js2.PublishAsync("bar", []byte("OK"))
  4168  		require_NoError(t, err)
  4169  	}
  4170  	select {
  4171  	case <-js1.PublishAsyncComplete():
  4172  	case <-time.After(5 * time.Second):
  4173  		t.Fatalf("Did not receive completion signal")
  4174  	}
  4175  	select {
  4176  	case <-js2.PublishAsyncComplete():
  4177  	case <-time.After(5 * time.Second):
  4178  		t.Fatalf("Did not receive completion signal")
  4179  	}
  4180  
  4181  	s = c.restartServer(s)
  4182  	opts := s.getOpts()
  4183  	opts.HTTPHost = "127.0.0.1"
  4184  	opts.HTTPPort = 11222
  4185  	err = s.StartMonitoring()
  4186  	require_NoError(t, err)
  4187  	url := fmt.Sprintf("http://127.0.0.1:%d/healthz", opts.HTTPPort)
  4188  
  4189  	getHealth := func() (int, *HealthStatus) {
  4190  		resp, err := http.Get(url)
  4191  		require_NoError(t, err)
  4192  		defer resp.Body.Close()
  4193  		body, err := io.ReadAll(resp.Body)
  4194  		require_NoError(t, err)
  4195  		var hs HealthStatus
  4196  		err = json.Unmarshal(body, &hs)
  4197  		require_NoError(t, err)
  4198  		return resp.StatusCode, &hs
  4199  	}
  4200  
  4201  	errors := 0
  4202  	checkFor(t, 20*time.Second, 100*time.Millisecond, func() error {
  4203  		code, hs := getHealth()
  4204  		if code >= 200 && code < 300 {
  4205  			return nil
  4206  		}
  4207  		errors++
  4208  		return fmt.Errorf("Got %d status with %+v", code, hs)
  4209  	})
  4210  	if errors == 0 {
  4211  		t.Fatalf("Expected to have some errors until we became current, got none")
  4212  	}
  4213  }
  4214  
  4215  // Test that we can receive larger messages with stream subject details.
  4216  // Also test that we will fail at some point and the user can fall back to
  4217  // an orderedconsumer like we do with watch for KV Keys() call.
  4218  func TestNoRaceJetStreamStreamInfoSubjectDetailsLimits(t *testing.T) {
  4219  	conf := createConfFile(t, []byte(fmt.Sprintf(`
  4220  		listen: 127.0.0.1:-1
  4221  		jetstream {
  4222  			store_dir = %q
  4223  		}
  4224  		accounts: {
  4225  		  default: {
  4226  			jetstream: true
  4227  			users: [ {user: me, password: pwd} ]
  4228  			limits { max_payload: 512 }
  4229  		  }
  4230  		}
  4231  	`, t.TempDir())))
  4232  
  4233  	s, _ := RunServerWithConfig(conf)
  4234  	if config := s.JetStreamConfig(); config != nil {
  4235  		defer removeDir(t, config.StoreDir)
  4236  	}
  4237  	defer s.Shutdown()
  4238  
  4239  	nc, js := jsClientConnect(t, s, nats.UserInfo("me", "pwd"))
  4240  	defer nc.Close()
  4241  
  4242  	// Make sure to flush so we process the 2nd INFO.
  4243  	nc.Flush()
  4244  
  4245  	// Make sure we cannot send larger than 512 bytes.
  4246  	// But we can receive larger.
  4247  	sub, err := nc.SubscribeSync("foo")
  4248  	require_NoError(t, err)
  4249  	err = nc.Publish("foo", []byte(strings.Repeat("A", 600)))
  4250  	require_Error(t, err, nats.ErrMaxPayload)
  4251  	sub.Unsubscribe()
  4252  
  4253  	_, err = js.AddStream(&nats.StreamConfig{
  4254  		Name:     "TEST",
  4255  		Subjects: []string{"*", "X.*"},
  4256  	})
  4257  	require_NoError(t, err)
  4258  
  4259  	n := JSMaxSubjectDetails
  4260  	for i := 0; i < n; i++ {
  4261  		_, err := js.PublishAsync(fmt.Sprintf("X.%d", i), []byte("OK"))
  4262  		require_NoError(t, err)
  4263  	}
  4264  	select {
  4265  	case <-js.PublishAsyncComplete():
  4266  	case <-time.After(5 * time.Second):
  4267  		t.Fatalf("Did not receive completion signal")
  4268  	}
  4269  
  4270  	// Need to grab StreamInfo by hand for now.
  4271  	req, err := json.Marshal(&JSApiStreamInfoRequest{SubjectsFilter: "X.*"})
  4272  	require_NoError(t, err)
  4273  	resp, err := nc.Request(fmt.Sprintf(JSApiStreamInfoT, "TEST"), req, 5*time.Second)
  4274  	require_NoError(t, err)
  4275  	var si StreamInfo
  4276  	err = json.Unmarshal(resp.Data, &si)
  4277  	require_NoError(t, err)
  4278  	if len(si.State.Subjects) != n {
  4279  		t.Fatalf("Expected to get %d subject details, got %d", n, len(si.State.Subjects))
  4280  	}
  4281  
  4282  	// Now add one more message to check pagination
  4283  	_, err = js.Publish("foo", []byte("TOO MUCH"))
  4284  	require_NoError(t, err)
  4285  
  4286  	req, err = json.Marshal(&JSApiStreamInfoRequest{ApiPagedRequest: ApiPagedRequest{Offset: n}, SubjectsFilter: nats.AllKeys})
  4287  	require_NoError(t, err)
  4288  	resp, err = nc.Request(fmt.Sprintf(JSApiStreamInfoT, "TEST"), req, 5*time.Second)
  4289  	require_NoError(t, err)
  4290  	var sir JSApiStreamInfoResponse
  4291  	err = json.Unmarshal(resp.Data, &sir)
  4292  	require_NoError(t, err)
  4293  	if len(sir.State.Subjects) != 1 {
  4294  		t.Fatalf("Expected to get 1 extra subject detail, got %d", len(sir.State.Subjects))
  4295  	}
  4296  }
  4297  
  4298  func TestNoRaceJetStreamSparseConsumers(t *testing.T) {
  4299  	s := RunBasicJetStreamServer(t)
  4300  	defer s.Shutdown()
  4301  
  4302  	nc, js := jsClientConnect(t, s)
  4303  	defer nc.Close()
  4304  
  4305  	msg := []byte("ok")
  4306  
  4307  	cases := []struct {
  4308  		name    string
  4309  		mconfig *nats.StreamConfig
  4310  	}{
  4311  		{"MemoryStore", &nats.StreamConfig{Name: "TEST", Storage: nats.MemoryStorage, MaxMsgsPerSubject: 25_000_000,
  4312  			Subjects: []string{"*"}}},
  4313  		{"FileStore", &nats.StreamConfig{Name: "TEST", Storage: nats.FileStorage, MaxMsgsPerSubject: 25_000_000,
  4314  			Subjects: []string{"*"}}},
  4315  	}
  4316  	for _, c := range cases {
  4317  		t.Run(c.name, func(t *testing.T) {
  4318  			js.DeleteStream("TEST")
  4319  			_, err := js.AddStream(c.mconfig)
  4320  			require_NoError(t, err)
  4321  
  4322  			// We will purposely place foo msgs near the beginning, then in middle, then at the end.
  4323  			for n := 0; n < 2; n++ {
  4324  				_, err = js.PublishAsync("foo", msg, nats.StallWait(800*time.Millisecond))
  4325  				require_NoError(t, err)
  4326  
  4327  				for i := 0; i < 1_000_000; i++ {
  4328  					_, err = js.PublishAsync("bar", msg, nats.StallWait(800*time.Millisecond))
  4329  					require_NoError(t, err)
  4330  				}
  4331  				_, err = js.PublishAsync("foo", msg, nats.StallWait(800*time.Millisecond))
  4332  				require_NoError(t, err)
  4333  			}
  4334  			select {
  4335  			case <-js.PublishAsyncComplete():
  4336  			case <-time.After(5 * time.Second):
  4337  				t.Fatalf("Did not receive completion signal")
  4338  			}
  4339  
  4340  			// Now create a consumer on foo.
  4341  			ci, err := js.AddConsumer("TEST", &nats.ConsumerConfig{DeliverSubject: "x.x", FilterSubject: "foo", AckPolicy: nats.AckNonePolicy})
  4342  			require_NoError(t, err)
  4343  
  4344  			done, received := make(chan bool), uint64(0)
  4345  
  4346  			cb := func(m *nats.Msg) {
  4347  				received++
  4348  				if received >= ci.NumPending {
  4349  					done <- true
  4350  				}
  4351  			}
  4352  
  4353  			sub, err := nc.Subscribe("x.x", cb)
  4354  			require_NoError(t, err)
  4355  			defer sub.Unsubscribe()
  4356  			start := time.Now()
  4357  			var elapsed time.Duration
  4358  
  4359  			select {
  4360  			case <-done:
  4361  				elapsed = time.Since(start)
  4362  			case <-time.After(10 * time.Second):
  4363  				t.Fatal("Did not receive all messages for all consumers in time")
  4364  			}
  4365  
  4366  			if elapsed > 500*time.Millisecond {
  4367  				t.Fatalf("Getting all messages took longer than expected: %v", elapsed)
  4368  			}
  4369  		})
  4370  	}
  4371  }
  4372  
  4373  func TestNoRaceJetStreamConsumerFilterPerfDegradation(t *testing.T) {
  4374  	s := RunBasicJetStreamServer(t)
  4375  	defer s.Shutdown()
  4376  
  4377  	nc, _ := jsClientConnect(t, s)
  4378  	defer nc.Close()
  4379  
  4380  	js, err := nc.JetStream(nats.PublishAsyncMaxPending(256))
  4381  	require_NoError(t, err)
  4382  
  4383  	_, err = js.AddStream(&nats.StreamConfig{
  4384  		Name:     "test",
  4385  		Subjects: []string{"test.*.subj"},
  4386  		Replicas: 1,
  4387  	})
  4388  	require_NoError(t, err)
  4389  
  4390  	toSend := 50_000
  4391  	count := 0
  4392  	ch := make(chan struct{}, 6)
  4393  	_, err = js.Subscribe("test.*.subj", func(m *nats.Msg) {
  4394  		m.Ack()
  4395  		if count++; count == toSend {
  4396  			ch <- struct{}{}
  4397  		}
  4398  	}, nats.DeliverNew(), nats.ManualAck())
  4399  	require_NoError(t, err)
  4400  
  4401  	msg := make([]byte, 1024)
  4402  	sent := int32(0)
  4403  	send := func() {
  4404  		defer func() { ch <- struct{}{} }()
  4405  		for i := 0; i < toSend/5; i++ {
  4406  			msgID := atomic.AddInt32(&sent, 1)
  4407  			_, err := js.Publish(fmt.Sprintf("test.%d.subj", msgID), msg)
  4408  			if err != nil {
  4409  				t.Error(err)
  4410  				return
  4411  			}
  4412  		}
  4413  	}
  4414  	for i := 0; i < 5; i++ {
  4415  		go send()
  4416  	}
  4417  	timeout := time.NewTimer(10 * time.Second)
  4418  	for i := 0; i < 6; i++ {
  4419  		select {
  4420  		case <-ch:
  4421  		case <-timeout.C:
  4422  			t.Fatal("Took too long")
  4423  		}
  4424  	}
  4425  }
  4426  
  4427  func TestNoRaceJetStreamFileStoreKeyFileCleanup(t *testing.T) {
  4428  	storeDir := t.TempDir()
  4429  
  4430  	prf := func(context []byte) ([]byte, error) {
  4431  		h := hmac.New(sha256.New, []byte("dlc22"))
  4432  		if _, err := h.Write(context); err != nil {
  4433  			return nil, err
  4434  		}
  4435  		return h.Sum(nil), nil
  4436  	}
  4437  
  4438  	fs, err := newFileStoreWithCreated(
  4439  		FileStoreConfig{StoreDir: storeDir, BlockSize: 1024 * 1024},
  4440  		StreamConfig{Name: "TEST", Storage: FileStorage},
  4441  		time.Now(),
  4442  		prf, nil)
  4443  	require_NoError(t, err)
  4444  	defer fs.Stop()
  4445  
  4446  	n, msg := 10_000, []byte(strings.Repeat("Z", 1024))
  4447  	for i := 0; i < n; i++ {
  4448  		_, _, err := fs.StoreMsg(fmt.Sprintf("X.%d", i), nil, msg)
  4449  		require_NoError(t, err)
  4450  	}
  4451  
  4452  	var seqs []uint64
  4453  	for i := 1; i <= n; i++ {
  4454  		seqs = append(seqs, uint64(i))
  4455  	}
  4456  	// Randomly delete msgs, make sure we cleanup as we empty the message blocks.
  4457  	rand.Shuffle(len(seqs), func(i, j int) { seqs[i], seqs[j] = seqs[j], seqs[i] })
  4458  
  4459  	for _, seq := range seqs {
  4460  		_, err := fs.RemoveMsg(seq)
  4461  		require_NoError(t, err)
  4462  	}
  4463  
  4464  	// We will have cleanup the main .blk and .idx sans the lmb, but we should not have any *.fss files.
  4465  	kms, err := filepath.Glob(filepath.Join(storeDir, msgDir, keyScanAll))
  4466  	require_NoError(t, err)
  4467  
  4468  	if len(kms) > 1 {
  4469  		t.Fatalf("Expected to find only 1 key file, found %d", len(kms))
  4470  	}
  4471  }
  4472  
  4473  func TestNoRaceJetStreamMsgIdPerfDuringCatchup(t *testing.T) {
  4474  	// Uncomment to run. Needs to be on a bigger machine. Do not want as part of Travis tests atm.
  4475  	skip(t)
  4476  
  4477  	c := createJetStreamClusterExplicit(t, "JSC", 3)
  4478  	defer c.shutdown()
  4479  
  4480  	nc, js := jsClientConnect(t, c.serverByName("S-1"))
  4481  	defer nc.Close()
  4482  
  4483  	_, err := js.AddStream(&nats.StreamConfig{
  4484  		Name:     "TEST",
  4485  		Replicas: 3,
  4486  	})
  4487  	require_NoError(t, err)
  4488  
  4489  	// This will be the one we restart.
  4490  	sl := c.streamLeader("$G", "TEST")
  4491  	// Now move leader.
  4492  	_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST"), nil, time.Second)
  4493  	require_NoError(t, err)
  4494  	c.waitOnStreamLeader("$G", "TEST")
  4495  
  4496  	// Connect to new leader.
  4497  	nc, _ = jsClientConnect(t, c.streamLeader("$G", "TEST"))
  4498  	defer nc.Close()
  4499  
  4500  	js, err = nc.JetStream(nats.PublishAsyncMaxPending(1024))
  4501  	require_NoError(t, err)
  4502  
  4503  	n, ss, sr := 1_000_000, 250_000, 800_000
  4504  	m := nats.NewMsg("TEST")
  4505  	m.Data = []byte(strings.Repeat("Z", 2048))
  4506  
  4507  	// Target rate 10k msgs/sec
  4508  	start := time.Now()
  4509  
  4510  	for i := 0; i < n; i++ {
  4511  		m.Header.Set(JSMsgId, strconv.Itoa(i))
  4512  		_, err := js.PublishMsgAsync(m)
  4513  		require_NoError(t, err)
  4514  		//time.Sleep(42 * time.Microsecond)
  4515  		if i == ss {
  4516  			fmt.Printf("SD")
  4517  			sl.Shutdown()
  4518  		} else if i == sr {
  4519  			nc.Flush()
  4520  			select {
  4521  			case <-js.PublishAsyncComplete():
  4522  			case <-time.After(10 * time.Second):
  4523  			}
  4524  			fmt.Printf("RS")
  4525  			sl = c.restartServer(sl)
  4526  		}
  4527  		if i%10_000 == 0 {
  4528  			fmt.Print("#")
  4529  		}
  4530  	}
  4531  	fmt.Println()
  4532  
  4533  	// Wait to receive all messages.
  4534  	select {
  4535  	case <-js.PublishAsyncComplete():
  4536  	case <-time.After(20 * time.Second):
  4537  		t.Fatalf("Did not receive completion signal")
  4538  	}
  4539  
  4540  	tt := time.Since(start)
  4541  	si, err := js.StreamInfo("TEST")
  4542  	require_NoError(t, err)
  4543  
  4544  	fmt.Printf("Took %v to send %d msgs\n", tt, n)
  4545  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4546  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4547  
  4548  	c.waitOnStreamCurrent(sl, "$G", "TEST")
  4549  	for _, s := range c.servers {
  4550  		mset, _ := s.GlobalAccount().lookupStream("TEST")
  4551  		if state := mset.store.State(); state.Msgs != uint64(n) {
  4552  			t.Fatalf("Expected server %v to have correct number of msgs %d but got %d", s, n, state.Msgs)
  4553  		}
  4554  	}
  4555  }
  4556  
  4557  func TestNoRaceJetStreamRebuildDeDupeAndMemoryPerf(t *testing.T) {
  4558  	skip(t)
  4559  
  4560  	s := RunBasicJetStreamServer(t)
  4561  	defer s.Shutdown()
  4562  
  4563  	nc, js := jsClientConnect(t, s)
  4564  	defer nc.Close()
  4565  
  4566  	_, err := js.AddStream(&nats.StreamConfig{Name: "DD"})
  4567  	require_NoError(t, err)
  4568  
  4569  	m := nats.NewMsg("DD")
  4570  	m.Data = []byte(strings.Repeat("Z", 2048))
  4571  
  4572  	start := time.Now()
  4573  
  4574  	n := 1_000_000
  4575  	for i := 0; i < n; i++ {
  4576  		m.Header.Set(JSMsgId, strconv.Itoa(i))
  4577  		_, err := js.PublishMsgAsync(m)
  4578  		require_NoError(t, err)
  4579  	}
  4580  
  4581  	select {
  4582  	case <-js.PublishAsyncComplete():
  4583  	case <-time.After(20 * time.Second):
  4584  		t.Fatalf("Did not receive completion signal")
  4585  	}
  4586  
  4587  	tt := time.Since(start)
  4588  	si, err := js.StreamInfo("DD")
  4589  	require_NoError(t, err)
  4590  
  4591  	fmt.Printf("Took %v to send %d msgs\n", tt, n)
  4592  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4593  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4594  
  4595  	v, _ := s.Varz(nil)
  4596  	fmt.Printf("Memory AFTER SEND: %v\n", friendlyBytes(v.Mem))
  4597  
  4598  	mset, err := s.GlobalAccount().lookupStream("DD")
  4599  	require_NoError(t, err)
  4600  
  4601  	mset.mu.Lock()
  4602  	mset.ddloaded = false
  4603  	start = time.Now()
  4604  	mset.rebuildDedupe()
  4605  	fmt.Printf("TOOK %v to rebuild dd\n", time.Since(start))
  4606  	mset.mu.Unlock()
  4607  
  4608  	v, _ = s.Varz(nil)
  4609  	fmt.Printf("Memory: %v\n", friendlyBytes(v.Mem))
  4610  
  4611  	// Now do an ephemeral consumer and whip through every message. Doing same calculations.
  4612  	start = time.Now()
  4613  	received, done := 0, make(chan bool)
  4614  	sub, err := js.Subscribe("DD", func(m *nats.Msg) {
  4615  		received++
  4616  		if received >= n {
  4617  			done <- true
  4618  		}
  4619  	}, nats.OrderedConsumer())
  4620  	require_NoError(t, err)
  4621  
  4622  	select {
  4623  	case <-done:
  4624  	case <-time.After(10 * time.Second):
  4625  		if s.NumSlowConsumers() > 0 {
  4626  			t.Fatalf("Did not receive all large messages due to slow consumer status: %d of %d", received, n)
  4627  		}
  4628  		t.Fatalf("Failed to receive all large messages: %d of %d\n", received, n)
  4629  	}
  4630  
  4631  	fmt.Printf("TOOK %v to receive all %d msgs\n", time.Since(start), n)
  4632  	sub.Unsubscribe()
  4633  
  4634  	v, _ = s.Varz(nil)
  4635  	fmt.Printf("Memory: %v\n", friendlyBytes(v.Mem))
  4636  }
  4637  
  4638  func TestNoRaceJetStreamMemoryUsageOnLimitedStreamWithMirror(t *testing.T) {
  4639  	skip(t)
  4640  
  4641  	s := RunBasicJetStreamServer(t)
  4642  	defer s.Shutdown()
  4643  
  4644  	nc, js := jsClientConnect(t, s)
  4645  	defer nc.Close()
  4646  
  4647  	_, err := js.AddStream(&nats.StreamConfig{Name: "DD", Subjects: []string{"ORDERS.*"}, MaxMsgs: 10_000})
  4648  	require_NoError(t, err)
  4649  
  4650  	_, err = js.AddStream(&nats.StreamConfig{
  4651  		Name:    "M",
  4652  		Mirror:  &nats.StreamSource{Name: "DD"},
  4653  		MaxMsgs: 10_000,
  4654  	})
  4655  	require_NoError(t, err)
  4656  
  4657  	m := nats.NewMsg("ORDERS.0")
  4658  	m.Data = []byte(strings.Repeat("Z", 2048))
  4659  
  4660  	start := time.Now()
  4661  
  4662  	n := 1_000_000
  4663  	for i := 0; i < n; i++ {
  4664  		m.Subject = fmt.Sprintf("ORDERS.%d", i)
  4665  		m.Header.Set(JSMsgId, strconv.Itoa(i))
  4666  		_, err := js.PublishMsgAsync(m)
  4667  		require_NoError(t, err)
  4668  	}
  4669  
  4670  	select {
  4671  	case <-js.PublishAsyncComplete():
  4672  	case <-time.After(20 * time.Second):
  4673  		t.Fatalf("Did not receive completion signal")
  4674  	}
  4675  
  4676  	tt := time.Since(start)
  4677  	si, err := js.StreamInfo("DD")
  4678  	require_NoError(t, err)
  4679  
  4680  	fmt.Printf("Took %v to send %d msgs\n", tt, n)
  4681  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4682  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4683  
  4684  	v, _ := s.Varz(nil)
  4685  	fmt.Printf("Memory AFTER SEND: %v\n", friendlyBytes(v.Mem))
  4686  }
  4687  
  4688  func TestNoRaceJetStreamOrderedConsumerLongRTTPerformance(t *testing.T) {
  4689  	skip(t)
  4690  
  4691  	s := RunBasicJetStreamServer(t)
  4692  	defer s.Shutdown()
  4693  
  4694  	nc, _ := jsClientConnect(t, s)
  4695  	defer nc.Close()
  4696  
  4697  	js, err := nc.JetStream(nats.PublishAsyncMaxPending(1000))
  4698  	require_NoError(t, err)
  4699  
  4700  	_, err = js.AddStream(&nats.StreamConfig{Name: "OCP"})
  4701  	require_NoError(t, err)
  4702  
  4703  	n, msg := 100_000, []byte(strings.Repeat("D", 30_000))
  4704  
  4705  	for i := 0; i < n; i++ {
  4706  		_, err := js.PublishAsync("OCP", msg)
  4707  		require_NoError(t, err)
  4708  	}
  4709  	select {
  4710  	case <-js.PublishAsyncComplete():
  4711  	case <-time.After(5 * time.Second):
  4712  		t.Fatalf("Did not receive completion signal")
  4713  	}
  4714  
  4715  	// Approximately 3GB
  4716  	si, err := js.StreamInfo("OCP")
  4717  	require_NoError(t, err)
  4718  
  4719  	start := time.Now()
  4720  	received, done := 0, make(chan bool)
  4721  	sub, err := js.Subscribe("OCP", func(m *nats.Msg) {
  4722  		received++
  4723  		if received >= n {
  4724  			done <- true
  4725  		}
  4726  	}, nats.OrderedConsumer())
  4727  	require_NoError(t, err)
  4728  	defer sub.Unsubscribe()
  4729  
  4730  	// Wait to receive all messages.
  4731  	select {
  4732  	case <-done:
  4733  	case <-time.After(30 * time.Second):
  4734  		t.Fatalf("Did not receive all of our messages")
  4735  	}
  4736  
  4737  	tt := time.Since(start)
  4738  	fmt.Printf("Took %v to receive %d msgs\n", tt, n)
  4739  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4740  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4741  
  4742  	sub.Unsubscribe()
  4743  
  4744  	rtt := 10 * time.Millisecond
  4745  	bw := 10 * 1024 * 1024 * 1024
  4746  	proxy := newNetProxy(rtt, bw, bw, s.ClientURL())
  4747  	defer proxy.stop()
  4748  
  4749  	nc, err = nats.Connect(proxy.clientURL())
  4750  	require_NoError(t, err)
  4751  	defer nc.Close()
  4752  	js, err = nc.JetStream()
  4753  	require_NoError(t, err)
  4754  
  4755  	start, received = time.Now(), 0
  4756  	sub, err = js.Subscribe("OCP", func(m *nats.Msg) {
  4757  		received++
  4758  		if received >= n {
  4759  			done <- true
  4760  		}
  4761  	}, nats.OrderedConsumer())
  4762  	require_NoError(t, err)
  4763  	defer sub.Unsubscribe()
  4764  
  4765  	// Wait to receive all messages.
  4766  	select {
  4767  	case <-done:
  4768  	case <-time.After(60 * time.Second):
  4769  		t.Fatalf("Did not receive all of our messages")
  4770  	}
  4771  
  4772  	tt = time.Since(start)
  4773  	fmt.Printf("Proxy RTT: %v, UP: %d, DOWN: %d\n", rtt, bw, bw)
  4774  	fmt.Printf("Took %v to receive %d msgs\n", tt, n)
  4775  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4776  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4777  }
  4778  
  4779  var jsClusterStallCatchupTempl = `
  4780  	listen: 127.0.0.1:-1
  4781  	server_name: %s
  4782  	jetstream: {max_mem_store: 256MB, max_file_store: 32GB, store_dir: '%s'}
  4783  
  4784  	leaf {
  4785  		listen: 127.0.0.1:-1
  4786  	}
  4787  
  4788  	cluster {
  4789  		name: %s
  4790  		listen: 127.0.0.1:%d
  4791  		routes = [%s]
  4792  	}
  4793  
  4794  	# For access to system account.
  4795  	accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
  4796  `
  4797  
  4798  // Test our global stall gate for outstanding catchup bytes.
  4799  func TestNoRaceJetStreamClusterCatchupStallGate(t *testing.T) {
  4800  	skip(t)
  4801  
  4802  	c := createJetStreamClusterWithTemplate(t, jsClusterStallCatchupTempl, "GSG", 3)
  4803  	defer c.shutdown()
  4804  
  4805  	nc, js := jsClientConnect(t, c.randomServer())
  4806  	defer nc.Close()
  4807  
  4808  	// ~100k per message.
  4809  	msg := []byte(strings.Repeat("A", 99_960))
  4810  
  4811  	// Create 200 streams with 100MB.
  4812  	// Each server has ~2GB
  4813  	var wg sync.WaitGroup
  4814  	for i := 0; i < 20; i++ {
  4815  		wg.Add(1)
  4816  		go func(x int) {
  4817  			defer wg.Done()
  4818  			for n := 1; n <= 10; n++ {
  4819  				sn := fmt.Sprintf("S-%d", n+x)
  4820  				_, err := js.AddStream(&nats.StreamConfig{
  4821  					Name:     sn,
  4822  					Replicas: 3,
  4823  				})
  4824  				require_NoError(t, err)
  4825  				for i := 0; i < 100; i++ {
  4826  					_, err := js.Publish(sn, msg)
  4827  					require_NoError(t, err)
  4828  				}
  4829  			}
  4830  		}(i * 20)
  4831  	}
  4832  	wg.Wait()
  4833  
  4834  	info, err := js.AccountInfo()
  4835  	require_NoError(t, err)
  4836  	require_True(t, info.Streams == 200)
  4837  
  4838  	runtime.GC()
  4839  	debug.FreeOSMemory()
  4840  
  4841  	// Now bring a server down and wipe its storage.
  4842  	s := c.servers[0]
  4843  	vz, err := s.Varz(nil)
  4844  	require_NoError(t, err)
  4845  	fmt.Printf("MEM BEFORE is %v\n", friendlyBytes(vz.Mem))
  4846  
  4847  	sd := s.JetStreamConfig().StoreDir
  4848  	s.Shutdown()
  4849  	removeDir(t, sd)
  4850  	s = c.restartServer(s)
  4851  
  4852  	c.waitOnServerHealthz(s)
  4853  
  4854  	runtime.GC()
  4855  	debug.FreeOSMemory()
  4856  
  4857  	vz, err = s.Varz(nil)
  4858  	require_NoError(t, err)
  4859  	fmt.Printf("MEM AFTER is %v\n", friendlyBytes(vz.Mem))
  4860  }
  4861  
  4862  func TestNoRaceJetStreamClusterCatchupBailMidway(t *testing.T) {
  4863  	skip(t)
  4864  
  4865  	c := createJetStreamClusterWithTemplate(t, jsClusterStallCatchupTempl, "GSG", 3)
  4866  	defer c.shutdown()
  4867  
  4868  	ml := c.leader()
  4869  	nc, js := jsClientConnect(t, ml)
  4870  	defer nc.Close()
  4871  
  4872  	msg := []byte(strings.Repeat("A", 480))
  4873  
  4874  	for i := 0; i < maxConcurrentSyncRequests*2; i++ {
  4875  		sn := fmt.Sprintf("CUP-%d", i+1)
  4876  		_, err := js.AddStream(&nats.StreamConfig{
  4877  			Name:     sn,
  4878  			Replicas: 3,
  4879  		})
  4880  		require_NoError(t, err)
  4881  
  4882  		for i := 0; i < 10_000; i++ {
  4883  			_, err := js.PublishAsync(sn, msg)
  4884  			require_NoError(t, err)
  4885  		}
  4886  		select {
  4887  		case <-js.PublishAsyncComplete():
  4888  		case <-time.After(10 * time.Second):
  4889  			t.Fatalf("Did not receive completion signal")
  4890  		}
  4891  	}
  4892  
  4893  	jsz, _ := ml.Jsz(nil)
  4894  	expectedMsgs := jsz.Messages
  4895  
  4896  	// Now select a server and shut it down, removing the storage directory.
  4897  	s := c.randomNonLeader()
  4898  	sd := s.JetStreamConfig().StoreDir
  4899  	s.Shutdown()
  4900  	removeDir(t, sd)
  4901  
  4902  	// Now restart the server.
  4903  	s = c.restartServer(s)
  4904  
  4905  	// We want to force the follower to bail before the catchup through the
  4906  	// upper level catchup logic completes.
  4907  	checkFor(t, 5*time.Second, 10*time.Millisecond, func() error {
  4908  		jsz, _ := s.Jsz(nil)
  4909  		if jsz.Messages > expectedMsgs/2 {
  4910  			s.Shutdown()
  4911  			return nil
  4912  		}
  4913  		return fmt.Errorf("Not enough yet")
  4914  	})
  4915  
  4916  	// Now restart the server.
  4917  	s = c.restartServer(s)
  4918  
  4919  	checkFor(t, 5*time.Second, 500*time.Millisecond, func() error {
  4920  		jsz, _ := s.Jsz(nil)
  4921  		if jsz.Messages == expectedMsgs {
  4922  			return nil
  4923  		}
  4924  		return fmt.Errorf("Not enough yet")
  4925  	})
  4926  }
  4927  
  4928  func TestNoRaceJetStreamAccountLimitsAndRestart(t *testing.T) {
  4929  	c := createJetStreamClusterWithTemplate(t, jsClusterAccountLimitsTempl, "A3S", 3)
  4930  	defer c.shutdown()
  4931  
  4932  	nc, js := jsClientConnect(t, c.randomServer())
  4933  	defer nc.Close()
  4934  
  4935  	if _, err := js.AddStream(&nats.StreamConfig{Name: "TEST", Replicas: 3}); err != nil {
  4936  		t.Fatalf("Unexpected error: %v", err)
  4937  	}
  4938  
  4939  	for i := 0; i < 20_000; i++ {
  4940  		if _, err := js.Publish("TEST", []byte("A")); err != nil {
  4941  			break
  4942  		}
  4943  		if i == 5_000 {
  4944  			snl := c.randomNonStreamLeader("$JS", "TEST")
  4945  			snl.Shutdown()
  4946  		}
  4947  	}
  4948  
  4949  	c.stopAll()
  4950  	c.restartAll()
  4951  	c.waitOnLeader()
  4952  	c.waitOnStreamLeader("$JS", "TEST")
  4953  
  4954  	for _, cs := range c.servers {
  4955  		c.waitOnStreamCurrent(cs, "$JS", "TEST")
  4956  	}
  4957  }
  4958  
  4959  func TestNoRaceJetStreamPullConsumersAndInteriorDeletes(t *testing.T) {
  4960  	c := createJetStreamClusterExplicit(t, "ID", 3)
  4961  	defer c.shutdown()
  4962  
  4963  	nc, js := jsClientConnect(t, c.randomServer())
  4964  	defer nc.Close()
  4965  
  4966  	_, err := js.AddStream(&nats.StreamConfig{
  4967  		Name:      "foo",
  4968  		Replicas:  3,
  4969  		MaxMsgs:   50000,
  4970  		Retention: nats.InterestPolicy,
  4971  	})
  4972  	require_NoError(t, err)
  4973  
  4974  	c.waitOnStreamLeader(globalAccountName, "foo")
  4975  
  4976  	_, err = js.AddConsumer("foo", &nats.ConsumerConfig{
  4977  		Durable:       "foo",
  4978  		FilterSubject: "foo",
  4979  		MaxAckPending: 20000,
  4980  		AckWait:       time.Minute,
  4981  		AckPolicy:     nats.AckExplicitPolicy,
  4982  	})
  4983  	require_NoError(t, err)
  4984  
  4985  	c.waitOnConsumerLeader(globalAccountName, "foo", "foo")
  4986  
  4987  	rcv := int32(0)
  4988  	prods := 5
  4989  	cons := 5
  4990  	wg := sync.WaitGroup{}
  4991  	wg.Add(prods + cons)
  4992  	toSend := 100000
  4993  
  4994  	for i := 0; i < cons; i++ {
  4995  		go func() {
  4996  			defer wg.Done()
  4997  
  4998  			sub, err := js.PullSubscribe("foo", "foo")
  4999  			if err != nil {
  5000  				return
  5001  			}
  5002  			for {
  5003  				msgs, err := sub.Fetch(200, nats.MaxWait(250*time.Millisecond))
  5004  				if err != nil {
  5005  					if n := int(atomic.LoadInt32(&rcv)); n >= toSend {
  5006  						return
  5007  					}
  5008  					continue
  5009  				}
  5010  				for _, m := range msgs {
  5011  					m.Ack()
  5012  					atomic.AddInt32(&rcv, 1)
  5013  				}
  5014  			}
  5015  		}()
  5016  	}
  5017  
  5018  	for i := 0; i < prods; i++ {
  5019  		go func() {
  5020  			defer wg.Done()
  5021  
  5022  			for i := 0; i < toSend/prods; i++ {
  5023  				js.Publish("foo", []byte("hello"))
  5024  			}
  5025  		}()
  5026  	}
  5027  
  5028  	time.Sleep(time.Second)
  5029  	resp, err := nc.Request(fmt.Sprintf(JSApiConsumerLeaderStepDownT, "foo", "foo"), nil, time.Second)
  5030  	if err != nil {
  5031  		t.Fatalf("Unexpected error: %v", err)
  5032  	}
  5033  	var cdResp JSApiConsumerLeaderStepDownResponse
  5034  	if err := json.Unmarshal(resp.Data, &cdResp); err != nil {
  5035  		t.Fatalf("Unexpected error: %v", err)
  5036  	}
  5037  	if cdResp.Error != nil {
  5038  		t.Fatalf("Unexpected error: %+v", cdResp.Error)
  5039  	}
  5040  	ch := make(chan struct{})
  5041  	go func() {
  5042  		wg.Wait()
  5043  		close(ch)
  5044  	}()
  5045  	select {
  5046  	case <-ch:
  5047  		// OK
  5048  	case <-time.After(30 * time.Second):
  5049  		t.Fatalf("Consumers took too long to consumer all messages")
  5050  	}
  5051  }
  5052  
  5053  func TestNoRaceJetStreamClusterInterestPullConsumerStreamLimitBug(t *testing.T) {
  5054  	c := createJetStreamClusterExplicit(t, "JSC", 3)
  5055  	defer c.shutdown()
  5056  
  5057  	nc, js := jsClientConnect(t, c.randomServer())
  5058  	defer nc.Close()
  5059  
  5060  	limit := uint64(1000)
  5061  
  5062  	_, err := js.AddStream(&nats.StreamConfig{
  5063  		Name:      "TEST",
  5064  		Subjects:  []string{"foo"},
  5065  		Retention: nats.InterestPolicy,
  5066  		MaxMsgs:   int64(limit),
  5067  		Replicas:  3,
  5068  	})
  5069  	require_NoError(t, err)
  5070  
  5071  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "dur", AckPolicy: nats.AckExplicitPolicy})
  5072  	require_NoError(t, err)
  5073  
  5074  	qch := make(chan bool)
  5075  	var wg sync.WaitGroup
  5076  
  5077  	// Publisher
  5078  	wg.Add(1)
  5079  	go func() {
  5080  		defer wg.Done()
  5081  		for {
  5082  			pt := time.NewTimer(time.Duration(rand.Intn(2)) * time.Millisecond)
  5083  			select {
  5084  			case <-pt.C:
  5085  				_, err := js.Publish("foo", []byte("BUG!"))
  5086  				require_NoError(t, err)
  5087  			case <-qch:
  5088  				pt.Stop()
  5089  				return
  5090  			}
  5091  		}
  5092  	}()
  5093  
  5094  	time.Sleep(time.Second)
  5095  
  5096  	// Pull Consumers
  5097  	wg.Add(100)
  5098  	for i := 0; i < 100; i++ {
  5099  		go func() {
  5100  			defer wg.Done()
  5101  			nc := natsConnect(t, c.randomServer().ClientURL())
  5102  			defer nc.Close()
  5103  
  5104  			js, err := nc.JetStream(nats.MaxWait(time.Second))
  5105  			require_NoError(t, err)
  5106  
  5107  			var sub *nats.Subscription
  5108  			for j := 0; j < 5; j++ {
  5109  				sub, err = js.PullSubscribe("foo", "dur")
  5110  				if err == nil {
  5111  					break
  5112  				}
  5113  			}
  5114  			require_NoError(t, err)
  5115  
  5116  			for {
  5117  				pt := time.NewTimer(time.Duration(rand.Intn(300)) * time.Millisecond)
  5118  				select {
  5119  				case <-pt.C:
  5120  					msgs, err := sub.Fetch(1)
  5121  					if err != nil {
  5122  						t.Logf("Got a Fetch error: %v", err)
  5123  						return
  5124  					}
  5125  					if len(msgs) > 0 {
  5126  						go func() {
  5127  							ackDelay := time.Duration(rand.Intn(375)+15) * time.Millisecond
  5128  							m := msgs[0]
  5129  							time.AfterFunc(ackDelay, func() { m.AckSync() })
  5130  						}()
  5131  					}
  5132  				case <-qch:
  5133  					return
  5134  				}
  5135  			}
  5136  		}()
  5137  	}
  5138  
  5139  	// Make sure we have hit the limit for the number of messages we expected.
  5140  	checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  5141  		si, err := js.StreamInfo("TEST")
  5142  		require_NoError(t, err)
  5143  		if si.State.Msgs < limit {
  5144  			return fmt.Errorf("Not hit limit yet")
  5145  		}
  5146  		return nil
  5147  	})
  5148  
  5149  	close(qch)
  5150  	wg.Wait()
  5151  
  5152  	checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  5153  		si, err := js.StreamInfo("TEST")
  5154  		require_NoError(t, err)
  5155  		ci, err := js.ConsumerInfo("TEST", "dur")
  5156  		require_NoError(t, err)
  5157  
  5158  		np := ci.NumPending + uint64(ci.NumAckPending)
  5159  		if np != si.State.Msgs {
  5160  			return fmt.Errorf("Expected NumPending to be %d got %d", si.State.Msgs-uint64(ci.NumAckPending), ci.NumPending)
  5161  		}
  5162  		return nil
  5163  	})
  5164  }
  5165  
  5166  // Test that all peers have the direct access subs that participate in a queue group,
  5167  // but only when they are current and ready. So we will start with R1, add in messages
  5168  // then scale up while also still adding messages.
  5169  func TestNoRaceJetStreamClusterDirectAccessAllPeersSubs(t *testing.T) {
  5170  	c := createJetStreamClusterExplicit(t, "JSC", 3)
  5171  	defer c.shutdown()
  5172  
  5173  	nc, js := jsClientConnect(t, c.randomServer())
  5174  	defer nc.Close()
  5175  
  5176  	// Start as R1
  5177  	cfg := &StreamConfig{
  5178  		Name:        "TEST",
  5179  		Subjects:    []string{"kv.>"},
  5180  		MaxMsgsPer:  10,
  5181  		AllowDirect: true,
  5182  		Replicas:    1,
  5183  		Storage:     FileStorage,
  5184  	}
  5185  	addStream(t, nc, cfg)
  5186  
  5187  	// Seed with enough messages to start then we will scale up while still adding more messages.
  5188  	num, msg := 1000, bytes.Repeat([]byte("XYZ"), 64)
  5189  	for i := 0; i < num; i++ {
  5190  		js.PublishAsync(fmt.Sprintf("kv.%d", i), msg)
  5191  	}
  5192  	select {
  5193  	case <-js.PublishAsyncComplete():
  5194  	case <-time.After(5 * time.Second):
  5195  		t.Fatalf("Did not receive completion signal")
  5196  	}
  5197  
  5198  	getSubj := fmt.Sprintf(JSDirectMsgGetT, "TEST")
  5199  	getMsg := func(key string) *nats.Msg {
  5200  		t.Helper()
  5201  		req := []byte(fmt.Sprintf(`{"last_by_subj":%q}`, key))
  5202  		m, err := nc.Request(getSubj, req, time.Second)
  5203  		require_NoError(t, err)
  5204  		require_True(t, m.Header.Get(JSSubject) == key)
  5205  		return m
  5206  	}
  5207  
  5208  	// Just make sure we can succeed here.
  5209  	getMsg("kv.22")
  5210  
  5211  	// Now crank up a go routine to continue sending more messages.
  5212  	qch := make(chan bool)
  5213  	var wg sync.WaitGroup
  5214  
  5215  	for i := 0; i < 5; i++ {
  5216  		wg.Add(1)
  5217  		go func() {
  5218  			defer wg.Done()
  5219  			nc, js := jsClientConnect(t, c.randomServer())
  5220  			defer nc.Close()
  5221  			for {
  5222  				select {
  5223  				case <-qch:
  5224  					select {
  5225  					case <-js.PublishAsyncComplete():
  5226  					case <-time.After(10 * time.Second):
  5227  					}
  5228  					return
  5229  				default:
  5230  					// Send as fast as we can.
  5231  					js.Publish(fmt.Sprintf("kv.%d", rand.Intn(1000)), msg)
  5232  				}
  5233  			}
  5234  		}()
  5235  	}
  5236  
  5237  	time.Sleep(200 * time.Millisecond)
  5238  
  5239  	// Now let's scale up to an R3.
  5240  	cfg.Replicas = 3
  5241  	updateStream(t, nc, cfg)
  5242  
  5243  	// Wait for the stream to register the new replicas and have a leader.
  5244  	checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  5245  		si, err := js.StreamInfo("TEST")
  5246  		if err != nil {
  5247  			return err
  5248  		}
  5249  		if si.Cluster == nil {
  5250  			return fmt.Errorf("No cluster yet")
  5251  		}
  5252  		if si.Cluster.Leader == _EMPTY_ || len(si.Cluster.Replicas) != 2 {
  5253  			return fmt.Errorf("Cluster not ready yet")
  5254  		}
  5255  		return nil
  5256  	})
  5257  
  5258  	close(qch)
  5259  	wg.Wait()
  5260  
  5261  	// Just make sure we can succeed here.
  5262  	getMsg("kv.22")
  5263  
  5264  	// For each non-leader check that the direct sub fires up.
  5265  	// We just test all, the leader will already have a directSub.
  5266  	for _, s := range c.servers {
  5267  		mset, err := s.GlobalAccount().lookupStream("TEST")
  5268  		require_NoError(t, err)
  5269  		checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  5270  			mset.mu.RLock()
  5271  			ok := mset.directSub != nil
  5272  			mset.mu.RUnlock()
  5273  			if ok {
  5274  				return nil
  5275  			}
  5276  			return fmt.Errorf("No directSub yet")
  5277  		})
  5278  	}
  5279  
  5280  	si, err := js.StreamInfo("TEST")
  5281  	require_NoError(t, err)
  5282  
  5283  	if si.State.Msgs == uint64(num) {
  5284  		t.Fatalf("Expected to see messages increase, got %d", si.State.Msgs)
  5285  	}
  5286  
  5287  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  5288  		// Make sure they are all the same from a state perspective.
  5289  		// Leader will have the expected state.
  5290  		lmset, err := c.streamLeader("$G", "TEST").GlobalAccount().lookupStream("TEST")
  5291  		require_NoError(t, err)
  5292  		expected := lmset.state()
  5293  
  5294  		for _, s := range c.servers {
  5295  			mset, err := s.GlobalAccount().lookupStream("TEST")
  5296  			require_NoError(t, err)
  5297  			if state := mset.state(); !reflect.DeepEqual(expected, state) {
  5298  				return fmt.Errorf("Expected %+v, got %+v", expected, state)
  5299  			}
  5300  		}
  5301  		return nil
  5302  	})
  5303  
  5304  }
  5305  
  5306  func TestNoRaceJetStreamClusterStreamNamesAndInfosMoreThanAPILimit(t *testing.T) {
  5307  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5308  	defer c.shutdown()
  5309  
  5310  	s := c.randomServer()
  5311  	nc, js := jsClientConnect(t, s)
  5312  	defer nc.Close()
  5313  
  5314  	createStream := func(name string) {
  5315  		t.Helper()
  5316  		if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil {
  5317  			t.Fatalf("Unexpected error: %v", err)
  5318  		}
  5319  	}
  5320  
  5321  	max := JSApiListLimit
  5322  	if JSApiNamesLimit > max {
  5323  		max = JSApiNamesLimit
  5324  	}
  5325  	max += 10
  5326  
  5327  	for i := 0; i < max; i++ {
  5328  		name := fmt.Sprintf("foo_%d", i)
  5329  		createStream(name)
  5330  	}
  5331  
  5332  	// Not using the JS API here beacause we want to make sure that the
  5333  	// server returns the proper Total count, but also that it does not
  5334  	// send more than when the API limit is in one go.
  5335  	check := func(subj string, limit int) {
  5336  		t.Helper()
  5337  
  5338  		nreq := JSApiStreamNamesRequest{}
  5339  		b, _ := json.Marshal(nreq)
  5340  		msg, err := nc.Request(subj, b, 2*time.Second)
  5341  		require_NoError(t, err)
  5342  
  5343  		nresp := JSApiStreamNamesResponse{}
  5344  		json.Unmarshal(msg.Data, &nresp)
  5345  		if n := nresp.ApiPaged.Total; n != max {
  5346  			t.Fatalf("Expected total to be %v, got %v", max, n)
  5347  		}
  5348  		if n := nresp.ApiPaged.Limit; n != limit {
  5349  			t.Fatalf("Expected limit to be %v, got %v", limit, n)
  5350  		}
  5351  		if n := len(nresp.Streams); n != limit {
  5352  			t.Fatalf("Expected number of streams to be %v, got %v", limit, n)
  5353  		}
  5354  	}
  5355  
  5356  	check(JSApiStreams, JSApiNamesLimit)
  5357  	check(JSApiStreamList, JSApiListLimit)
  5358  }
  5359  
  5360  func TestNoRaceJetStreamClusterConsumerListPaging(t *testing.T) {
  5361  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5362  	defer c.shutdown()
  5363  
  5364  	s := c.randomNonLeader()
  5365  	nc, js := jsClientConnect(t, s)
  5366  	defer nc.Close()
  5367  
  5368  	_, err := js.AddStream(&nats.StreamConfig{
  5369  		Name:     "TEST",
  5370  		Subjects: []string{"foo"},
  5371  		Replicas: 3,
  5372  	})
  5373  	require_NoError(t, err)
  5374  	c.waitOnStreamLeader(globalAccountName, "TEST")
  5375  
  5376  	cfg := &nats.ConsumerConfig{
  5377  		Replicas:      1,
  5378  		MemoryStorage: true,
  5379  		AckPolicy:     nats.AckExplicitPolicy,
  5380  	}
  5381  
  5382  	// create 3000 consumers.
  5383  	numConsumers := 3000
  5384  	for i := 1; i <= numConsumers; i++ {
  5385  		cfg.Durable = fmt.Sprintf("d-%.4d", i)
  5386  		_, err := js.AddConsumer("TEST", cfg)
  5387  		require_NoError(t, err)
  5388  	}
  5389  
  5390  	// Test both names and list operations.
  5391  
  5392  	// Names
  5393  	reqSubj := fmt.Sprintf(JSApiConsumersT, "TEST")
  5394  	grabConsumerNames := func(offset int) []string {
  5395  		req := fmt.Sprintf(`{"offset":%d}`, offset)
  5396  		respMsg, err := nc.Request(reqSubj, []byte(req), time.Second)
  5397  		require_NoError(t, err)
  5398  		var resp JSApiConsumerNamesResponse
  5399  		err = json.Unmarshal(respMsg.Data, &resp)
  5400  		require_NoError(t, err)
  5401  		// Sanity check that we are actually paging properly around limits.
  5402  		if resp.Limit < len(resp.Consumers) {
  5403  			t.Fatalf("Expected total limited to %d but got %d", resp.Limit, len(resp.Consumers))
  5404  		}
  5405  		if resp.Total != numConsumers {
  5406  			t.Fatalf("Invalid total response: expected %d got %d", numConsumers, resp.Total)
  5407  		}
  5408  		return resp.Consumers
  5409  	}
  5410  
  5411  	results := make(map[string]bool)
  5412  
  5413  	for offset := 0; len(results) < numConsumers; {
  5414  		consumers := grabConsumerNames(offset)
  5415  		offset += len(consumers)
  5416  		for _, name := range consumers {
  5417  			if results[name] {
  5418  				t.Fatalf("Found duplicate %q", name)
  5419  			}
  5420  			results[name] = true
  5421  		}
  5422  	}
  5423  
  5424  	// List
  5425  	reqSubj = fmt.Sprintf(JSApiConsumerListT, "TEST")
  5426  	grabConsumerList := func(offset int) []*ConsumerInfo {
  5427  		req := fmt.Sprintf(`{"offset":%d}`, offset)
  5428  		respMsg, err := nc.Request(reqSubj, []byte(req), time.Second)
  5429  		require_NoError(t, err)
  5430  		var resp JSApiConsumerListResponse
  5431  		err = json.Unmarshal(respMsg.Data, &resp)
  5432  		require_NoError(t, err)
  5433  		// Sanity check that we are actually paging properly around limits.
  5434  		if resp.Limit < len(resp.Consumers) {
  5435  			t.Fatalf("Expected total limited to %d but got %d", resp.Limit, len(resp.Consumers))
  5436  		}
  5437  		if resp.Total != numConsumers {
  5438  			t.Fatalf("Invalid total response: expected %d got %d", numConsumers, resp.Total)
  5439  		}
  5440  		return resp.Consumers
  5441  	}
  5442  
  5443  	results = make(map[string]bool)
  5444  
  5445  	for offset := 0; len(results) < numConsumers; {
  5446  		consumers := grabConsumerList(offset)
  5447  		offset += len(consumers)
  5448  		for _, ci := range consumers {
  5449  			name := ci.Config.Durable
  5450  			if results[name] {
  5451  				t.Fatalf("Found duplicate %q", name)
  5452  			}
  5453  			results[name] = true
  5454  		}
  5455  	}
  5456  
  5457  	if len(results) != numConsumers {
  5458  		t.Fatalf("Received %d / %d consumers", len(results), numConsumers)
  5459  	}
  5460  }
  5461  
  5462  func TestNoRaceJetStreamFileStoreLargeKVAccessTiming(t *testing.T) {
  5463  	storeDir := t.TempDir()
  5464  
  5465  	blkSize := uint64(4 * 1024)
  5466  	// Compensate for slower IO on MacOSX
  5467  	if runtime.GOOS == "darwin" {
  5468  		blkSize *= 4
  5469  	}
  5470  
  5471  	fs, err := newFileStore(
  5472  		FileStoreConfig{StoreDir: storeDir, BlockSize: blkSize, CacheExpire: 30 * time.Second},
  5473  		StreamConfig{Name: "zzz", Subjects: []string{"KV.STREAM_NAME.*"}, Storage: FileStorage, MaxMsgsPer: 1},
  5474  	)
  5475  	require_NoError(t, err)
  5476  	defer fs.Stop()
  5477  
  5478  	tmpl := "KV.STREAM_NAME.%d"
  5479  	nkeys, val := 100_000, bytes.Repeat([]byte("Z"), 1024)
  5480  
  5481  	for i := 1; i <= nkeys; i++ {
  5482  		subj := fmt.Sprintf(tmpl, i)
  5483  		_, _, err := fs.StoreMsg(subj, nil, val)
  5484  		require_NoError(t, err)
  5485  	}
  5486  
  5487  	first := fmt.Sprintf(tmpl, 1)
  5488  	last := fmt.Sprintf(tmpl, nkeys)
  5489  
  5490  	start := time.Now()
  5491  	sm, err := fs.LoadLastMsg(last, nil)
  5492  	require_NoError(t, err)
  5493  	base := time.Since(start)
  5494  
  5495  	if !bytes.Equal(sm.msg, val) {
  5496  		t.Fatalf("Retrieved value did not match")
  5497  	}
  5498  
  5499  	start = time.Now()
  5500  	_, err = fs.LoadLastMsg(first, nil)
  5501  	require_NoError(t, err)
  5502  	slow := time.Since(start)
  5503  
  5504  	if slow > 4*base || slow > time.Millisecond {
  5505  		t.Fatalf("Took too long to look up first key vs last: %v vs %v", base, slow)
  5506  	}
  5507  
  5508  	// time first seq lookup for both as well.
  5509  	// Base will be first in this case.
  5510  	fs.mu.RLock()
  5511  	start = time.Now()
  5512  	fs.firstSeqForSubj(first)
  5513  	base = time.Since(start)
  5514  	start = time.Now()
  5515  	fs.firstSeqForSubj(last)
  5516  	slow = time.Since(start)
  5517  	fs.mu.RUnlock()
  5518  
  5519  	if slow > 4*base || slow > time.Millisecond {
  5520  		t.Fatalf("Took too long to look up last key by subject vs first: %v vs %v", base, slow)
  5521  	}
  5522  }
  5523  
  5524  func TestNoRaceJetStreamKVLock(t *testing.T) {
  5525  	s := RunBasicJetStreamServer(t)
  5526  	defer s.Shutdown()
  5527  
  5528  	nc, js := jsClientConnect(t, s)
  5529  	defer nc.Close()
  5530  
  5531  	_, err := js.CreateKeyValue(&nats.KeyValueConfig{Bucket: "LOCKS"})
  5532  	require_NoError(t, err)
  5533  
  5534  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
  5535  	defer cancel()
  5536  
  5537  	var wg sync.WaitGroup
  5538  	start := make(chan bool)
  5539  
  5540  	var tracker int64
  5541  
  5542  	for i := 0; i < 100; i++ {
  5543  		wg.Add(1)
  5544  		go func() {
  5545  			defer wg.Done()
  5546  
  5547  			nc, js := jsClientConnect(t, s)
  5548  			defer nc.Close()
  5549  			kv, err := js.KeyValue("LOCKS")
  5550  			require_NoError(t, err)
  5551  
  5552  			<-start
  5553  
  5554  			for {
  5555  				last, err := kv.Create("MY_LOCK", []byte("Z"))
  5556  				if err != nil {
  5557  					select {
  5558  					case <-time.After(10 * time.Millisecond):
  5559  						continue
  5560  					case <-ctx.Done():
  5561  						return
  5562  					}
  5563  				}
  5564  
  5565  				if v := atomic.AddInt64(&tracker, 1); v != 1 {
  5566  					t.Logf("TRACKER NOT 1 -> %d\n", v)
  5567  					cancel()
  5568  				}
  5569  
  5570  				time.Sleep(10 * time.Millisecond)
  5571  				if v := atomic.AddInt64(&tracker, -1); v != 0 {
  5572  					t.Logf("TRACKER NOT 0 AFTER RELEASE -> %d\n", v)
  5573  					cancel()
  5574  				}
  5575  
  5576  				err = kv.Delete("MY_LOCK", nats.LastRevision(last))
  5577  				if err != nil {
  5578  					t.Logf("Could not unlock for last %d: %v", last, err)
  5579  				}
  5580  
  5581  				if ctx.Err() != nil {
  5582  					return
  5583  				}
  5584  			}
  5585  		}()
  5586  	}
  5587  
  5588  	close(start)
  5589  	wg.Wait()
  5590  }
  5591  
  5592  func TestNoRaceJetStreamSuperClusterStreamMoveLongRTT(t *testing.T) {
  5593  	// Make C2 far away.
  5594  	gwm := gwProxyMap{
  5595  		"C2": &gwProxy{
  5596  			rtt:  20 * time.Millisecond,
  5597  			up:   1 * 1024 * 1024 * 1024, // 1gbit
  5598  			down: 1 * 1024 * 1024 * 1024, // 1gbit
  5599  		},
  5600  	}
  5601  	sc := createJetStreamTaggedSuperClusterWithGWProxy(t, gwm)
  5602  	defer sc.shutdown()
  5603  
  5604  	nc, js := jsClientConnect(t, sc.randomServer())
  5605  	defer nc.Close()
  5606  
  5607  	cfg := &nats.StreamConfig{
  5608  		Name:      "TEST",
  5609  		Subjects:  []string{"chunk.*"},
  5610  		Placement: &nats.Placement{Tags: []string{"cloud:aws", "country:us"}},
  5611  		Replicas:  3,
  5612  	}
  5613  
  5614  	// Place a stream in C1.
  5615  	_, err := js.AddStream(cfg, nats.MaxWait(10*time.Second))
  5616  	require_NoError(t, err)
  5617  
  5618  	chunk := bytes.Repeat([]byte("Z"), 1000*1024) // ~1MB
  5619  	// 256 MB
  5620  	for i := 0; i < 256; i++ {
  5621  		subj := fmt.Sprintf("chunk.%d", i)
  5622  		js.PublishAsync(subj, chunk)
  5623  	}
  5624  	select {
  5625  	case <-js.PublishAsyncComplete():
  5626  	case <-time.After(10 * time.Second):
  5627  		t.Fatalf("Did not receive completion signal")
  5628  	}
  5629  
  5630  	// C2, slow RTT.
  5631  	cfg.Placement = &nats.Placement{Tags: []string{"cloud:gcp", "country:uk"}}
  5632  	_, err = js.UpdateStream(cfg)
  5633  	require_NoError(t, err)
  5634  
  5635  	checkFor(t, 20*time.Second, time.Second, func() error {
  5636  		si, err := js.StreamInfo("TEST", nats.MaxWait(time.Second))
  5637  		if err != nil {
  5638  			return err
  5639  		}
  5640  		if si.Cluster.Name != "C2" {
  5641  			return fmt.Errorf("Wrong cluster: %q", si.Cluster.Name)
  5642  		}
  5643  		if si.Cluster.Leader == _EMPTY_ {
  5644  			return fmt.Errorf("No leader yet")
  5645  		} else if !strings.HasPrefix(si.Cluster.Leader, "C2-") {
  5646  			return fmt.Errorf("Wrong leader: %q", si.Cluster.Leader)
  5647  		}
  5648  		// Now we want to see that we shrink back to original.
  5649  		if len(si.Cluster.Replicas) != cfg.Replicas-1 {
  5650  			return fmt.Errorf("Expected %d replicas, got %d", cfg.Replicas-1, len(si.Cluster.Replicas))
  5651  		}
  5652  		return nil
  5653  	})
  5654  }
  5655  
  5656  // https://github.com/nats-io/nats-server/issues/3455
  5657  func TestNoRaceJetStreamConcurrentPullConsumerBatch(t *testing.T) {
  5658  	s := RunBasicJetStreamServer(t)
  5659  	defer s.Shutdown()
  5660  
  5661  	nc, js := jsClientConnect(t, s)
  5662  	defer nc.Close()
  5663  
  5664  	_, err := js.AddStream(&nats.StreamConfig{
  5665  		Name:      "TEST",
  5666  		Subjects:  []string{"ORDERS.*"},
  5667  		Storage:   nats.MemoryStorage,
  5668  		Retention: nats.WorkQueuePolicy,
  5669  	})
  5670  	require_NoError(t, err)
  5671  
  5672  	toSend := int32(100_000)
  5673  
  5674  	for i := 0; i < 100_000; i++ {
  5675  		subj := fmt.Sprintf("ORDERS.%d", i+1)
  5676  		js.PublishAsync(subj, []byte("BUY"))
  5677  	}
  5678  	select {
  5679  	case <-js.PublishAsyncComplete():
  5680  	case <-time.After(5 * time.Second):
  5681  		t.Fatalf("Did not receive completion signal")
  5682  	}
  5683  
  5684  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  5685  		Durable:       "PROCESSOR",
  5686  		AckPolicy:     nats.AckExplicitPolicy,
  5687  		MaxAckPending: 5000,
  5688  	})
  5689  	require_NoError(t, err)
  5690  
  5691  	nc, js = jsClientConnect(t, s)
  5692  	defer nc.Close()
  5693  
  5694  	sub1, err := js.PullSubscribe(_EMPTY_, _EMPTY_, nats.Bind("TEST", "PROCESSOR"))
  5695  	require_NoError(t, err)
  5696  
  5697  	nc, js = jsClientConnect(t, s)
  5698  	defer nc.Close()
  5699  
  5700  	sub2, err := js.PullSubscribe(_EMPTY_, _EMPTY_, nats.Bind("TEST", "PROCESSOR"))
  5701  	require_NoError(t, err)
  5702  
  5703  	startCh := make(chan bool)
  5704  
  5705  	var received int32
  5706  
  5707  	wg := sync.WaitGroup{}
  5708  
  5709  	fetchSize := 1000
  5710  	fetch := func(sub *nats.Subscription) {
  5711  		<-startCh
  5712  		defer wg.Done()
  5713  
  5714  		for {
  5715  			msgs, err := sub.Fetch(fetchSize, nats.MaxWait(time.Second))
  5716  			if atomic.AddInt32(&received, int32(len(msgs))) >= toSend {
  5717  				break
  5718  			}
  5719  			// We should always receive a full batch here if not last competing fetch.
  5720  			if err != nil || len(msgs) != fetchSize {
  5721  				break
  5722  			}
  5723  			for _, m := range msgs {
  5724  				m.Ack()
  5725  			}
  5726  		}
  5727  	}
  5728  
  5729  	wg.Add(2)
  5730  
  5731  	go fetch(sub1)
  5732  	go fetch(sub2)
  5733  
  5734  	close(startCh)
  5735  
  5736  	wg.Wait()
  5737  	require_True(t, received == toSend)
  5738  }
  5739  
  5740  func TestNoRaceJetStreamManyPullConsumersNeedAckOptimization(t *testing.T) {
  5741  	// Uncomment to run. Do not want as part of Travis tests atm.
  5742  	// Run with cpu and memory profiling to make sure we have improved.
  5743  	skip(t)
  5744  
  5745  	s := RunBasicJetStreamServer(t)
  5746  	defer s.Shutdown()
  5747  
  5748  	nc, js := jsClientConnect(t, s)
  5749  	defer nc.Close()
  5750  
  5751  	_, err := js.AddStream(&nats.StreamConfig{
  5752  		Name:      "ORDERS",
  5753  		Subjects:  []string{"ORDERS.*"},
  5754  		Storage:   nats.MemoryStorage,
  5755  		Retention: nats.InterestPolicy,
  5756  	})
  5757  	require_NoError(t, err)
  5758  
  5759  	toSend := 100_000
  5760  	numConsumers := 500
  5761  
  5762  	// Create 500 consumers
  5763  	for i := 1; i <= numConsumers; i++ {
  5764  		_, err := js.AddConsumer("ORDERS", &nats.ConsumerConfig{
  5765  			Durable:       fmt.Sprintf("ORDERS_%d", i),
  5766  			FilterSubject: fmt.Sprintf("ORDERS.%d", i),
  5767  			AckPolicy:     nats.AckAllPolicy,
  5768  		})
  5769  		require_NoError(t, err)
  5770  	}
  5771  
  5772  	for i := 1; i <= toSend; i++ {
  5773  		subj := fmt.Sprintf("ORDERS.%d", i%numConsumers+1)
  5774  		js.PublishAsync(subj, []byte("HELLO"))
  5775  	}
  5776  	select {
  5777  	case <-js.PublishAsyncComplete():
  5778  	case <-time.After(5 * time.Second):
  5779  		t.Fatalf("Did not receive completion signal")
  5780  	}
  5781  
  5782  	sub, err := js.PullSubscribe("ORDERS.500", "ORDERS_500")
  5783  	require_NoError(t, err)
  5784  
  5785  	fetchSize := toSend / numConsumers
  5786  	msgs, err := sub.Fetch(fetchSize, nats.MaxWait(time.Second))
  5787  	require_NoError(t, err)
  5788  
  5789  	last := msgs[len(msgs)-1]
  5790  	last.AckSync()
  5791  }
  5792  
  5793  // https://github.com/nats-io/nats-server/issues/3499
  5794  func TestNoRaceJetStreamDeleteConsumerWithInterestStreamAndHighSeqs(t *testing.T) {
  5795  	s := RunBasicJetStreamServer(t)
  5796  	defer s.Shutdown()
  5797  
  5798  	// Client for API requests.
  5799  	nc, js := jsClientConnect(t, s)
  5800  	defer nc.Close()
  5801  
  5802  	_, err := js.AddStream(&nats.StreamConfig{
  5803  		Name:      "TEST",
  5804  		Subjects:  []string{"log.>"},
  5805  		Retention: nats.InterestPolicy,
  5806  	})
  5807  	require_NoError(t, err)
  5808  
  5809  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  5810  		Durable:   "c",
  5811  		AckPolicy: nats.AckExplicitPolicy,
  5812  	})
  5813  	require_NoError(t, err)
  5814  
  5815  	// Set baseline for time to delete so we can see linear increase as sequence numbers increase.
  5816  	start := time.Now()
  5817  	err = js.DeleteConsumer("TEST", "c")
  5818  	require_NoError(t, err)
  5819  	elapsed := time.Since(start)
  5820  
  5821  	// Crank up sequence numbers.
  5822  	msg := []byte(strings.Repeat("ZZZ", 128))
  5823  	for i := 0; i < 5_000_000; i++ {
  5824  		nc.Publish("log.Z", msg)
  5825  	}
  5826  	nc.Flush()
  5827  
  5828  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  5829  		Durable:   "c",
  5830  		AckPolicy: nats.AckExplicitPolicy,
  5831  	})
  5832  	require_NoError(t, err)
  5833  
  5834  	// We have a bug that spins unecessarily through all the sequences from this consumer's
  5835  	// ackfloor(0) and the last sequence for the stream. We will detect by looking for the time
  5836  	// to delete being 100x more. Should be the same since both times no messages exist in the stream.
  5837  	start = time.Now()
  5838  	err = js.DeleteConsumer("TEST", "c")
  5839  	require_NoError(t, err)
  5840  
  5841  	if e := time.Since(start); e > 100*elapsed {
  5842  		t.Fatalf("Consumer delete took too long: %v vs baseline %v", e, elapsed)
  5843  	}
  5844  }
  5845  
  5846  // Bug when we encode a timestamp that upon decode causes an error which causes server to panic.
  5847  // This can happen on consumer redelivery since they adjusted timstamps can be in the future, and result
  5848  // in a negative encoding. If that encoding was exactly -1 seconds, would cause decodeConsumerState to fail
  5849  // and the server to panic.
  5850  func TestNoRaceEncodeConsumerStateBug(t *testing.T) {
  5851  	for i := 0; i < 200_000; i++ {
  5852  		// Pretend we redelivered and updated the timestamp to reflect the new start time for expiration.
  5853  		// The bug will trip when time.Now() rounded to seconds in encode is 1 second below the truncated version
  5854  		// of pending.
  5855  		pending := Pending{Sequence: 1, Timestamp: time.Now().Add(time.Second).UnixNano()}
  5856  		state := ConsumerState{
  5857  			Delivered: SequencePair{Consumer: 1, Stream: 1},
  5858  			Pending:   map[uint64]*Pending{1: &pending},
  5859  		}
  5860  		buf := encodeConsumerState(&state)
  5861  		_, err := decodeConsumerState(buf)
  5862  		require_NoError(t, err)
  5863  	}
  5864  }
  5865  
  5866  // Performance impact on stream ingress with large number of consumers.
  5867  func TestNoRaceJetStreamLargeNumConsumersPerfImpact(t *testing.T) {
  5868  	skip(t)
  5869  
  5870  	s := RunBasicJetStreamServer(t)
  5871  	defer s.Shutdown()
  5872  
  5873  	// Client for API requests.
  5874  	nc, js := jsClientConnect(t, s)
  5875  	defer nc.Close()
  5876  
  5877  	_, err := js.AddStream(&nats.StreamConfig{
  5878  		Name:     "TEST",
  5879  		Subjects: []string{"foo"},
  5880  	})
  5881  	require_NoError(t, err)
  5882  
  5883  	// Baseline with no consumers.
  5884  	toSend := 1_000_000
  5885  	start := time.Now()
  5886  	for i := 0; i < toSend; i++ {
  5887  		js.PublishAsync("foo", []byte("OK"))
  5888  	}
  5889  	<-js.PublishAsyncComplete()
  5890  	tt := time.Since(start)
  5891  	fmt.Printf("Base time is %v\n", tt)
  5892  	fmt.Printf("%.0f msgs/sec\n", float64(toSend)/tt.Seconds())
  5893  
  5894  	err = js.PurgeStream("TEST")
  5895  	require_NoError(t, err)
  5896  
  5897  	// Now add in 10 idle consumers.
  5898  	for i := 1; i <= 10; i++ {
  5899  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  5900  			Durable:   fmt.Sprintf("d-%d", i),
  5901  			AckPolicy: nats.AckExplicitPolicy,
  5902  		})
  5903  		require_NoError(t, err)
  5904  	}
  5905  
  5906  	start = time.Now()
  5907  	for i := 0; i < toSend; i++ {
  5908  		js.PublishAsync("foo", []byte("OK"))
  5909  	}
  5910  	<-js.PublishAsyncComplete()
  5911  	tt = time.Since(start)
  5912  	fmt.Printf("\n10 consumers time is %v\n", tt)
  5913  	fmt.Printf("%.0f msgs/sec\n", float64(toSend)/tt.Seconds())
  5914  
  5915  	err = js.PurgeStream("TEST")
  5916  	require_NoError(t, err)
  5917  
  5918  	// Now add in 90 more idle consumers.
  5919  	for i := 11; i <= 100; i++ {
  5920  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  5921  			Durable:   fmt.Sprintf("d-%d", i),
  5922  			AckPolicy: nats.AckExplicitPolicy,
  5923  		})
  5924  		require_NoError(t, err)
  5925  	}
  5926  
  5927  	start = time.Now()
  5928  	for i := 0; i < toSend; i++ {
  5929  		js.PublishAsync("foo", []byte("OK"))
  5930  	}
  5931  	<-js.PublishAsyncComplete()
  5932  	tt = time.Since(start)
  5933  	fmt.Printf("\n100 consumers time is %v\n", tt)
  5934  	fmt.Printf("%.0f msgs/sec\n", float64(toSend)/tt.Seconds())
  5935  
  5936  	err = js.PurgeStream("TEST")
  5937  	require_NoError(t, err)
  5938  
  5939  	// Now add in 900 more
  5940  	for i := 101; i <= 1000; i++ {
  5941  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  5942  			Durable:   fmt.Sprintf("d-%d", i),
  5943  			AckPolicy: nats.AckExplicitPolicy,
  5944  		})
  5945  		require_NoError(t, err)
  5946  	}
  5947  
  5948  	start = time.Now()
  5949  	for i := 0; i < toSend; i++ {
  5950  		js.PublishAsync("foo", []byte("OK"))
  5951  	}
  5952  	<-js.PublishAsyncComplete()
  5953  	tt = time.Since(start)
  5954  	fmt.Printf("\n1000 consumers time is %v\n", tt)
  5955  	fmt.Printf("%.0f msgs/sec\n", float64(toSend)/tt.Seconds())
  5956  }
  5957  
  5958  // Performance impact on large number of consumers but sparse delivery.
  5959  func TestNoRaceJetStreamLargeNumConsumersSparseDelivery(t *testing.T) {
  5960  	skip(t)
  5961  
  5962  	s := RunBasicJetStreamServer(t)
  5963  	defer s.Shutdown()
  5964  
  5965  	// Client for API requests.
  5966  	nc, js := jsClientConnect(t, s)
  5967  	defer nc.Close()
  5968  
  5969  	_, err := js.AddStream(&nats.StreamConfig{
  5970  		Name:     "TEST",
  5971  		Subjects: []string{"ID.*"},
  5972  	})
  5973  	require_NoError(t, err)
  5974  
  5975  	// Now add in ~10k consumers on different subjects.
  5976  	for i := 3; i <= 10_000; i++ {
  5977  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  5978  			Durable:       fmt.Sprintf("d-%d", i),
  5979  			FilterSubject: fmt.Sprintf("ID.%d", i),
  5980  			AckPolicy:     nats.AckNonePolicy,
  5981  		})
  5982  		require_NoError(t, err)
  5983  	}
  5984  
  5985  	toSend := 100_000
  5986  
  5987  	// Bind a consumer to ID.2.
  5988  	var received int
  5989  	done := make(chan bool)
  5990  
  5991  	nc, js = jsClientConnect(t, s)
  5992  	defer nc.Close()
  5993  
  5994  	mh := func(m *nats.Msg) {
  5995  		received++
  5996  		if received >= toSend {
  5997  			close(done)
  5998  		}
  5999  	}
  6000  	_, err = js.Subscribe("ID.2", mh)
  6001  	require_NoError(t, err)
  6002  
  6003  	last := make(chan bool)
  6004  	_, err = js.Subscribe("ID.1", func(_ *nats.Msg) { close(last) })
  6005  	require_NoError(t, err)
  6006  
  6007  	nc, _ = jsClientConnect(t, s)
  6008  	defer nc.Close()
  6009  	js, err = nc.JetStream(nats.PublishAsyncMaxPending(8 * 1024))
  6010  	require_NoError(t, err)
  6011  
  6012  	start := time.Now()
  6013  	for i := 0; i < toSend; i++ {
  6014  		js.PublishAsync("ID.2", []byte("ok"))
  6015  	}
  6016  	// Check latency for this one message.
  6017  	// This will show the issue better than throughput which can bypass signal processing.
  6018  	js.PublishAsync("ID.1", []byte("ok"))
  6019  
  6020  	select {
  6021  	case <-done:
  6022  		break
  6023  	case <-time.After(10 * time.Second):
  6024  		t.Fatalf("Failed to receive all messages: %d of %d\n", received, toSend)
  6025  	}
  6026  
  6027  	tt := time.Since(start)
  6028  	fmt.Printf("Took %v to receive %d msgs\n", tt, toSend)
  6029  	fmt.Printf("%.0f msgs/s\n", float64(toSend)/tt.Seconds())
  6030  
  6031  	select {
  6032  	case <-last:
  6033  		break
  6034  	case <-time.After(30 * time.Second):
  6035  		t.Fatalf("Failed to receive last message\n")
  6036  	}
  6037  	lt := time.Since(start)
  6038  
  6039  	fmt.Printf("Took %v to receive last msg\n", lt)
  6040  }
  6041  
  6042  func TestNoRaceJetStreamEndToEndLatency(t *testing.T) {
  6043  	s := RunBasicJetStreamServer(t)
  6044  	defer s.Shutdown()
  6045  
  6046  	// Client for API requests.
  6047  	nc, js := jsClientConnect(t, s)
  6048  	defer nc.Close()
  6049  
  6050  	_, err := js.AddStream(&nats.StreamConfig{
  6051  		Name:     "TEST",
  6052  		Subjects: []string{"foo"},
  6053  	})
  6054  	require_NoError(t, err)
  6055  
  6056  	nc, js = jsClientConnect(t, s)
  6057  	defer nc.Close()
  6058  
  6059  	var sent time.Time
  6060  	var max time.Duration
  6061  	next := make(chan struct{})
  6062  
  6063  	mh := func(m *nats.Msg) {
  6064  		received := time.Now()
  6065  		tt := received.Sub(sent)
  6066  		if max == 0 || tt > max {
  6067  			max = tt
  6068  		}
  6069  		next <- struct{}{}
  6070  	}
  6071  	sub, err := js.Subscribe("foo", mh)
  6072  	require_NoError(t, err)
  6073  
  6074  	nc, js = jsClientConnect(t, s)
  6075  	defer nc.Close()
  6076  
  6077  	toSend := 50_000
  6078  	for i := 0; i < toSend; i++ {
  6079  		sent = time.Now()
  6080  		js.Publish("foo", []byte("ok"))
  6081  		<-next
  6082  	}
  6083  	sub.Unsubscribe()
  6084  
  6085  	if max > 250*time.Millisecond {
  6086  		t.Fatalf("Expected max latency to be < 250ms, got %v", max)
  6087  	}
  6088  }
  6089  
  6090  func TestNoRaceJetStreamClusterEnsureWALCompact(t *testing.T) {
  6091  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6092  	defer c.shutdown()
  6093  
  6094  	nc, js := jsClientConnect(t, c.randomServer())
  6095  	defer nc.Close()
  6096  
  6097  	_, err := js.AddStream(&nats.StreamConfig{
  6098  		Name:     "TEST",
  6099  		Subjects: []string{"foo"},
  6100  		Replicas: 3,
  6101  	})
  6102  	require_NoError(t, err)
  6103  
  6104  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  6105  		Durable:        "dlc",
  6106  		DeliverSubject: "zz",
  6107  		Replicas:       3,
  6108  	})
  6109  	require_NoError(t, err)
  6110  
  6111  	// Force snapshot on stream leader.
  6112  	sl := c.streamLeader(globalAccountName, "TEST")
  6113  	mset, err := sl.GlobalAccount().lookupStream("TEST")
  6114  	require_NoError(t, err)
  6115  	node := mset.raftNode()
  6116  	require_True(t, node != nil)
  6117  
  6118  	err = node.InstallSnapshot(mset.stateSnapshot())
  6119  	require_NoError(t, err)
  6120  
  6121  	// Now publish more than should be needed to cause an additional snapshot.
  6122  	ns := 75_000
  6123  	for i := 0; i <= ns; i++ {
  6124  		_, err := js.Publish("foo", []byte("bar"))
  6125  		require_NoError(t, err)
  6126  	}
  6127  
  6128  	// Grab progress and use that to look into WAL entries.
  6129  	_, _, applied := node.Progress()
  6130  	// If ne == ns that means snapshots and compacts were not happening when
  6131  	// they should have been.
  6132  	if ne, _ := node.Applied(applied); ne >= uint64(ns) {
  6133  		t.Fatalf("Did not snapshot and compact the raft WAL, entries == %d", ne)
  6134  	}
  6135  
  6136  	// Now check consumer.
  6137  	// Force snapshot on consumerleader.
  6138  	cl := c.consumerLeader(globalAccountName, "TEST", "dlc")
  6139  	mset, err = cl.GlobalAccount().lookupStream("TEST")
  6140  	require_NoError(t, err)
  6141  	o := mset.lookupConsumer("dlc")
  6142  	require_True(t, o != nil)
  6143  
  6144  	node = o.raftNode()
  6145  	require_True(t, node != nil)
  6146  
  6147  	snap, err := o.store.EncodedState()
  6148  	require_NoError(t, err)
  6149  	err = node.InstallSnapshot(snap)
  6150  	require_NoError(t, err)
  6151  
  6152  	received, done := 0, make(chan bool, 1)
  6153  
  6154  	nc.Subscribe("zz", func(m *nats.Msg) {
  6155  		received++
  6156  		if received >= ns {
  6157  			select {
  6158  			case done <- true:
  6159  			default:
  6160  			}
  6161  		}
  6162  		m.Ack()
  6163  	})
  6164  
  6165  	select {
  6166  	case <-done:
  6167  		return
  6168  	case <-time.After(10 * time.Second):
  6169  		t.Fatalf("Did not received all %d msgs, only %d", ns, received)
  6170  	}
  6171  
  6172  	// Do same trick and check that WAL was compacted.
  6173  	// Grab progress and use that to look into WAL entries.
  6174  	_, _, applied = node.Progress()
  6175  	// If ne == ns that means snapshots and compacts were not happening when
  6176  	// they should have been.
  6177  	if ne, _ := node.Applied(applied); ne >= uint64(ns) {
  6178  		t.Fatalf("Did not snapshot and compact the raft WAL, entries == %d", ne)
  6179  	}
  6180  }
  6181  
  6182  func TestNoRaceFileStoreStreamMaxAgePerformance(t *testing.T) {
  6183  	// Uncomment to run.
  6184  	skip(t)
  6185  
  6186  	storeDir := t.TempDir()
  6187  	maxAge := 5 * time.Second
  6188  
  6189  	fs, err := newFileStore(
  6190  		FileStoreConfig{StoreDir: storeDir},
  6191  		StreamConfig{Name: "MA",
  6192  			Subjects: []string{"foo.*"},
  6193  			MaxAge:   maxAge,
  6194  			Storage:  FileStorage},
  6195  	)
  6196  	require_NoError(t, err)
  6197  	defer fs.Stop()
  6198  
  6199  	// Simulate a callback similar to consumers decrementing.
  6200  	var mu sync.RWMutex
  6201  	var pending int64
  6202  
  6203  	fs.RegisterStorageUpdates(func(md, bd int64, seq uint64, subj string) {
  6204  		mu.Lock()
  6205  		defer mu.Unlock()
  6206  		pending += md
  6207  	})
  6208  
  6209  	start, num, subj := time.Now(), 0, "foo.foo"
  6210  
  6211  	timeout := start.Add(maxAge)
  6212  	for time.Now().Before(timeout) {
  6213  		// We will store in blocks of 100.
  6214  		for i := 0; i < 100; i++ {
  6215  			_, _, err := fs.StoreMsg(subj, nil, []byte("Hello World"))
  6216  			require_NoError(t, err)
  6217  			num++
  6218  		}
  6219  	}
  6220  	elapsed := time.Since(start)
  6221  	fmt.Printf("Took %v to store %d\n", elapsed, num)
  6222  	fmt.Printf("%.0f msgs/sec\n", float64(num)/elapsed.Seconds())
  6223  
  6224  	// Now keep running for 2x longer knowing we are expiring messages in the background.
  6225  	// We want to see the effect on performance.
  6226  
  6227  	start = time.Now()
  6228  	timeout = start.Add(maxAge * 2)
  6229  
  6230  	for time.Now().Before(timeout) {
  6231  		// We will store in blocks of 100.
  6232  		for i := 0; i < 100; i++ {
  6233  			_, _, err := fs.StoreMsg(subj, nil, []byte("Hello World"))
  6234  			require_NoError(t, err)
  6235  			num++
  6236  		}
  6237  	}
  6238  	elapsed = time.Since(start)
  6239  	fmt.Printf("Took %v to store %d\n", elapsed, num)
  6240  	fmt.Printf("%.0f msgs/sec\n", float64(num)/elapsed.Seconds())
  6241  }
  6242  
  6243  // SequenceSet memory tests vs dmaps.
  6244  func TestNoRaceSeqSetSizeComparison(t *testing.T) {
  6245  	// Create 5M random entries (dupes possible but ok for this test) out of 8M range.
  6246  	num := 5_000_000
  6247  	max := 7_000_000
  6248  
  6249  	seqs := make([]uint64, 0, num)
  6250  	for i := 0; i < num; i++ {
  6251  		n := uint64(rand.Int63n(int64(max + 1)))
  6252  		seqs = append(seqs, n)
  6253  	}
  6254  
  6255  	runtime.GC()
  6256  	// Disable to get stable results.
  6257  	gcp := debug.SetGCPercent(-1)
  6258  	defer debug.SetGCPercent(gcp)
  6259  
  6260  	mem := runtime.MemStats{}
  6261  	runtime.ReadMemStats(&mem)
  6262  	inUseBefore := mem.HeapInuse
  6263  
  6264  	dmap := make(map[uint64]struct{}, num)
  6265  	for _, n := range seqs {
  6266  		dmap[n] = struct{}{}
  6267  	}
  6268  	runtime.ReadMemStats(&mem)
  6269  	dmapUse := mem.HeapInuse - inUseBefore
  6270  	inUseBefore = mem.HeapInuse
  6271  
  6272  	// Now do SequenceSet on same dataset.
  6273  	var sset avl.SequenceSet
  6274  	for _, n := range seqs {
  6275  		sset.Insert(n)
  6276  	}
  6277  
  6278  	runtime.ReadMemStats(&mem)
  6279  	seqSetUse := mem.HeapInuse - inUseBefore
  6280  
  6281  	if seqSetUse > 2*1024*1024 {
  6282  		t.Fatalf("Expected SequenceSet size to be < 2M, got %v", friendlyBytes(int64(seqSetUse)))
  6283  	}
  6284  	if seqSetUse*50 > dmapUse {
  6285  		t.Fatalf("Expected SequenceSet to be at least 50x better then dmap approach: %v vs %v",
  6286  			friendlyBytes(int64(seqSetUse)),
  6287  			friendlyBytes(int64(dmapUse)),
  6288  		)
  6289  	}
  6290  }
  6291  
  6292  // FilteredState for ">" with large interior deletes was very slow.
  6293  func TestNoRaceFileStoreFilteredStateWithLargeDeletes(t *testing.T) {
  6294  	storeDir := t.TempDir()
  6295  
  6296  	fs, err := newFileStore(
  6297  		FileStoreConfig{StoreDir: storeDir, BlockSize: 4096},
  6298  		StreamConfig{Name: "zzz", Subjects: []string{"foo"}, Storage: FileStorage},
  6299  	)
  6300  	require_NoError(t, err)
  6301  	defer fs.Stop()
  6302  
  6303  	subj, msg := "foo", []byte("Hello World")
  6304  
  6305  	toStore := 500_000
  6306  	for i := 0; i < toStore; i++ {
  6307  		_, _, err := fs.StoreMsg(subj, nil, msg)
  6308  		require_NoError(t, err)
  6309  	}
  6310  
  6311  	// Now delete every other one.
  6312  	for seq := 2; seq <= toStore; seq += 2 {
  6313  		_, err := fs.RemoveMsg(uint64(seq))
  6314  		require_NoError(t, err)
  6315  	}
  6316  
  6317  	runtime.GC()
  6318  	// Disable to get stable results.
  6319  	gcp := debug.SetGCPercent(-1)
  6320  	defer debug.SetGCPercent(gcp)
  6321  
  6322  	start := time.Now()
  6323  	fss := fs.FilteredState(1, _EMPTY_)
  6324  	elapsed := time.Since(start)
  6325  
  6326  	require_True(t, fss.Msgs == uint64(toStore/2))
  6327  	require_True(t, elapsed < 500*time.Microsecond)
  6328  }
  6329  
  6330  // ConsumerInfo seems to being called quite a bit more than we had anticipated.
  6331  // Under certain circumstances, since we reset num pending, this can be very costly.
  6332  // We will use the fast path to alleviate that performance bottleneck but also make
  6333  // sure we are still being accurate.
  6334  func TestNoRaceJetStreamClusterConsumerInfoSpeed(t *testing.T) {
  6335  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6336  	defer c.shutdown()
  6337  
  6338  	c.waitOnLeader()
  6339  	server := c.randomNonLeader()
  6340  
  6341  	nc, js := jsClientConnect(t, server)
  6342  	defer nc.Close()
  6343  
  6344  	_, err := js.AddStream(&nats.StreamConfig{
  6345  		Name:     "TEST",
  6346  		Subjects: []string{"events.>"},
  6347  		Replicas: 3,
  6348  	})
  6349  	require_NoError(t, err)
  6350  
  6351  	// The issue is compounded when we have lots of different subjects captured
  6352  	// by a terminal fwc. The consumer will have a terminal pwc.
  6353  	// Here make all subjects unique.
  6354  
  6355  	sub, err := js.PullSubscribe("events.*", "DLC")
  6356  	require_NoError(t, err)
  6357  
  6358  	toSend := 250_000
  6359  	for i := 0; i < toSend; i++ {
  6360  		subj := fmt.Sprintf("events.%d", i+1)
  6361  		js.PublishAsync(subj, []byte("ok"))
  6362  	}
  6363  	select {
  6364  	case <-js.PublishAsyncComplete():
  6365  	case <-time.After(5 * time.Second):
  6366  		t.Fatalf("Did not receive completion signal")
  6367  	}
  6368  
  6369  	checkNumPending := func(expected int) {
  6370  		t.Helper()
  6371  		start := time.Now()
  6372  		ci, err := js.ConsumerInfo("TEST", "DLC")
  6373  		require_NoError(t, err)
  6374  		// Make sure these are fast now.
  6375  		if elapsed := time.Since(start); elapsed > 5*time.Millisecond {
  6376  			t.Fatalf("ConsumerInfo took too long: %v", elapsed)
  6377  		}
  6378  		// Make sure pending == expected.
  6379  		if ci.NumPending != uint64(expected) {
  6380  			t.Fatalf("Expected %d NumPending, got %d", expected, ci.NumPending)
  6381  		}
  6382  	}
  6383  	// Make sure in simple case it is correct.
  6384  	checkNumPending(toSend)
  6385  
  6386  	// Do a few acks.
  6387  	toAck := 25
  6388  	for _, m := range fetchMsgs(t, sub, 25, time.Second) {
  6389  		err = m.AckSync()
  6390  		require_NoError(t, err)
  6391  	}
  6392  	checkNumPending(toSend - toAck)
  6393  
  6394  	// Now do a purge such that we only keep so many.
  6395  	// We want to make sure we do the right thing here and have correct calculations.
  6396  	toKeep := 100_000
  6397  	err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Keep: uint64(toKeep)})
  6398  	require_NoError(t, err)
  6399  
  6400  	checkNumPending(toKeep)
  6401  }
  6402  
  6403  func TestNoRaceJetStreamKVAccountWithServerRestarts(t *testing.T) {
  6404  	// Uncomment to run. Needs fast machine to not time out on KeyValue lookup.
  6405  	skip(t)
  6406  
  6407  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6408  	defer c.shutdown()
  6409  
  6410  	nc, js := jsClientConnect(t, c.randomServer())
  6411  	defer nc.Close()
  6412  
  6413  	_, err := js.CreateKeyValue(&nats.KeyValueConfig{
  6414  		Bucket:   "TEST",
  6415  		Replicas: 3,
  6416  	})
  6417  	require_NoError(t, err)
  6418  
  6419  	npubs := 10_000
  6420  	par := 8
  6421  	iter := 2
  6422  	nsubjs := 250
  6423  
  6424  	wg := sync.WaitGroup{}
  6425  	putKeys := func() {
  6426  		wg.Add(1)
  6427  		go func() {
  6428  			defer wg.Done()
  6429  			nc, js := jsClientConnect(t, c.randomServer())
  6430  			defer nc.Close()
  6431  			kv, err := js.KeyValue("TEST")
  6432  			require_NoError(t, err)
  6433  
  6434  			for i := 0; i < npubs; i++ {
  6435  				subj := fmt.Sprintf("KEY-%d", rand.Intn(nsubjs))
  6436  				if _, err := kv.PutString(subj, "hello"); err != nil {
  6437  					nc, js := jsClientConnect(t, c.randomServer())
  6438  					defer nc.Close()
  6439  					kv, err = js.KeyValue("TEST")
  6440  					require_NoError(t, err)
  6441  				}
  6442  			}
  6443  		}()
  6444  	}
  6445  
  6446  	restartServers := func() {
  6447  		time.Sleep(2 * time.Second)
  6448  		// Rotate through and restart the servers.
  6449  		for _, server := range c.servers {
  6450  			server.Shutdown()
  6451  			restarted := c.restartServer(server)
  6452  			checkFor(t, time.Second, 200*time.Millisecond, func() error {
  6453  				hs := restarted.healthz(&HealthzOptions{
  6454  					JSEnabled:    true,
  6455  					JSServerOnly: true,
  6456  				})
  6457  				if hs.Error != _EMPTY_ {
  6458  					return errors.New(hs.Error)
  6459  				}
  6460  				return nil
  6461  			})
  6462  		}
  6463  		c.waitOnLeader()
  6464  		c.waitOnStreamLeader(globalAccountName, "KV_TEST")
  6465  	}
  6466  
  6467  	for n := 0; n < iter; n++ {
  6468  		for i := 0; i < par; i++ {
  6469  			putKeys()
  6470  		}
  6471  		restartServers()
  6472  	}
  6473  	wg.Wait()
  6474  
  6475  	nc, js = jsClientConnect(t, c.randomServer())
  6476  	defer nc.Close()
  6477  
  6478  	si, err := js.StreamInfo("KV_TEST")
  6479  	require_NoError(t, err)
  6480  	require_True(t, si.State.NumSubjects == uint64(nsubjs))
  6481  }
  6482  
  6483  // Test for consumer create when the subject cardinality is high and the
  6484  // consumer is filtered with a wildcard that forces linear scans.
  6485  // We have an optimization to use in memory structures in filestore to speed up.
  6486  // Only if asking to scan all (DeliverAll).
  6487  func TestNoRaceJetStreamConsumerCreateTimeNumPending(t *testing.T) {
  6488  	s := RunBasicJetStreamServer(t)
  6489  	defer s.Shutdown()
  6490  
  6491  	nc, js := jsClientConnect(t, s)
  6492  	defer nc.Close()
  6493  
  6494  	_, err := js.AddStream(&nats.StreamConfig{
  6495  		Name:     "TEST",
  6496  		Subjects: []string{"events.>"},
  6497  	})
  6498  	require_NoError(t, err)
  6499  
  6500  	n := 500_000
  6501  	msg := bytes.Repeat([]byte("X"), 8*1024)
  6502  
  6503  	for i := 0; i < n; i++ {
  6504  		subj := fmt.Sprintf("events.%d", rand.Intn(100_000))
  6505  		js.PublishAsync(subj, msg)
  6506  	}
  6507  	select {
  6508  	case <-js.PublishAsyncComplete():
  6509  	case <-time.After(5 * time.Second):
  6510  	}
  6511  
  6512  	// Should stay under 5ms now, but for Travis variability say 50ms.
  6513  	threshold := 50 * time.Millisecond
  6514  
  6515  	start := time.Now()
  6516  	_, err = js.PullSubscribe("events.*", "dlc")
  6517  	require_NoError(t, err)
  6518  	if elapsed := time.Since(start); elapsed > threshold {
  6519  		t.Fatalf("Consumer create took longer than expected, %v vs %v", elapsed, threshold)
  6520  	}
  6521  
  6522  	start = time.Now()
  6523  	_, err = js.PullSubscribe("events.99999", "xxx")
  6524  	require_NoError(t, err)
  6525  	if elapsed := time.Since(start); elapsed > threshold {
  6526  		t.Fatalf("Consumer create took longer than expected, %v vs %v", elapsed, threshold)
  6527  	}
  6528  
  6529  	start = time.Now()
  6530  	_, err = js.PullSubscribe(">", "zzz")
  6531  	require_NoError(t, err)
  6532  	if elapsed := time.Since(start); elapsed > threshold {
  6533  		t.Fatalf("Consumer create took longer than expected, %v vs %v", elapsed, threshold)
  6534  	}
  6535  }
  6536  
  6537  func TestNoRaceJetStreamClusterGhostConsumers(t *testing.T) {
  6538  	c := createJetStreamClusterExplicit(t, "GHOST", 3)
  6539  	defer c.shutdown()
  6540  
  6541  	nc, js := jsClientConnect(t, c.randomServer())
  6542  	defer nc.Close()
  6543  
  6544  	_, err := js.AddStream(&nats.StreamConfig{
  6545  		Name:     "TEST",
  6546  		Subjects: []string{"events.>"},
  6547  		Replicas: 3,
  6548  	})
  6549  	require_NoError(t, err)
  6550  
  6551  	for i := 0; i < 10; i++ {
  6552  		for j := 0; j < 10; j++ {
  6553  			require_NoError(t, nc.Publish(fmt.Sprintf("events.%d.%d", i, j), []byte(`test`)))
  6554  		}
  6555  	}
  6556  
  6557  	fetch := func(id int) {
  6558  		subject := fmt.Sprintf("events.%d.*", id)
  6559  		subscription, err := js.PullSubscribe(subject,
  6560  			_EMPTY_, // ephemeral consumer
  6561  			nats.DeliverAll(),
  6562  			nats.ReplayInstant(),
  6563  			nats.BindStream("TEST"),
  6564  			nats.ConsumerReplicas(1),
  6565  			nats.ConsumerMemoryStorage(),
  6566  		)
  6567  		if err != nil {
  6568  			return
  6569  		}
  6570  		defer subscription.Unsubscribe()
  6571  
  6572  		info, err := subscription.ConsumerInfo()
  6573  		if err != nil {
  6574  			return
  6575  		}
  6576  
  6577  		subscription.Fetch(int(info.NumPending))
  6578  	}
  6579  
  6580  	replay := func(ctx context.Context, id int) {
  6581  		for {
  6582  			select {
  6583  			case <-ctx.Done():
  6584  				return
  6585  			default:
  6586  				fetch(id)
  6587  			}
  6588  		}
  6589  	}
  6590  
  6591  	ctx, cancel := context.WithCancel(context.Background())
  6592  
  6593  	go replay(ctx, 0)
  6594  	go replay(ctx, 1)
  6595  	go replay(ctx, 2)
  6596  	go replay(ctx, 3)
  6597  	go replay(ctx, 4)
  6598  	go replay(ctx, 5)
  6599  	go replay(ctx, 6)
  6600  	go replay(ctx, 7)
  6601  	go replay(ctx, 8)
  6602  	go replay(ctx, 9)
  6603  
  6604  	time.Sleep(5 * time.Second)
  6605  
  6606  	for _, server := range c.servers {
  6607  		server.Shutdown()
  6608  		restarted := c.restartServer(server)
  6609  		checkFor(t, time.Second, 200*time.Millisecond, func() error {
  6610  			hs := restarted.healthz(&HealthzOptions{
  6611  				JSEnabled:    true,
  6612  				JSServerOnly: true,
  6613  			})
  6614  			if hs.Error != _EMPTY_ {
  6615  				return errors.New(hs.Error)
  6616  			}
  6617  			return nil
  6618  		})
  6619  		c.waitOnStreamLeader(globalAccountName, "TEST")
  6620  		time.Sleep(time.Second * 2)
  6621  		go replay(ctx, 5)
  6622  		go replay(ctx, 6)
  6623  		go replay(ctx, 7)
  6624  		go replay(ctx, 8)
  6625  		go replay(ctx, 9)
  6626  	}
  6627  
  6628  	time.Sleep(5 * time.Second)
  6629  	cancel()
  6630  
  6631  	getMissing := func() []string {
  6632  		m, err := nc.Request("$JS.API.CONSUMER.LIST.TEST", nil, time.Second*10)
  6633  		require_NoError(t, err)
  6634  
  6635  		var resp JSApiConsumerListResponse
  6636  		err = json.Unmarshal(m.Data, &resp)
  6637  		require_NoError(t, err)
  6638  		return resp.Missing
  6639  	}
  6640  
  6641  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  6642  		missing := getMissing()
  6643  		if len(missing) == 0 {
  6644  			return nil
  6645  		}
  6646  		return fmt.Errorf("Still have missing: %+v", missing)
  6647  	})
  6648  }
  6649  
  6650  // This is to test a publish slowdown and general instability experienced in a setup similar to this.
  6651  // We have feeder streams that are all sourced to an aggregate stream. All streams are interest retention.
  6652  // We want to monitor the avg publish time for the sync publishers to the feeder streams, the ingest rate to
  6653  // the aggregate stream, and general health of the consumers on the aggregate stream.
  6654  // Target publish rate is ~2k/s with publish time being ~40-60ms but remaining stable.
  6655  // We can also simulate max redeliveries that create interior deletes in streams.
  6656  func TestNoRaceJetStreamClusterF3Setup(t *testing.T) {
  6657  	// Uncomment to run. Needs to be on a pretty big machine. Do not want as part of Travis tests atm.
  6658  	skip(t)
  6659  
  6660  	// These and the settings below achieve ~60ms pub time on avg and ~2k msgs per sec inbound to the aggregate stream.
  6661  	// On my machine though.
  6662  	np := clusterProxy{
  6663  		rtt:  2 * time.Millisecond,
  6664  		up:   1 * 1024 * 1024 * 1024, // 1gbit
  6665  		down: 1 * 1024 * 1024 * 1024, // 1gbit
  6666  	}
  6667  
  6668  	// Test params.
  6669  	numSourceStreams := 20
  6670  	numConsumersPerSource := 1
  6671  	numPullersPerConsumer := 50
  6672  	numPublishers := 100
  6673  	setHighStartSequence := false
  6674  	simulateMaxRedeliveries := false
  6675  	maxBadPubTimes := uint32(20)
  6676  	badPubThresh := 500 * time.Millisecond
  6677  	testTime := 5 * time.Minute // make sure to do --timeout=65m
  6678  
  6679  	t.Logf("Starting Test: Total Test Time %v", testTime)
  6680  
  6681  	c := createJetStreamClusterWithNetProxy(t, "R3S", 3, &np)
  6682  	defer c.shutdown()
  6683  
  6684  	// Do some quick sanity checking for latency stuff.
  6685  	{
  6686  		nc, js := jsClientConnect(t, c.randomServer())
  6687  		defer nc.Close()
  6688  
  6689  		_, err := js.AddStream(&nats.StreamConfig{
  6690  			Name:      "TEST",
  6691  			Replicas:  3,
  6692  			Subjects:  []string{"foo"},
  6693  			Retention: nats.InterestPolicy,
  6694  		})
  6695  		require_NoError(t, err)
  6696  		defer js.DeleteStream("TEST")
  6697  
  6698  		sl := c.streamLeader(globalAccountName, "TEST")
  6699  		nc, js = jsClientConnect(t, sl)
  6700  		defer nc.Close()
  6701  		start := time.Now()
  6702  		_, err = js.Publish("foo", []byte("hello"))
  6703  		require_NoError(t, err)
  6704  		// This is best case, and with client connection being close to free, this should be at least > rtt
  6705  		if elapsed := time.Since(start); elapsed < np.rtt {
  6706  			t.Fatalf("Expected publish time to be > %v, got %v", np.rtt, elapsed)
  6707  		}
  6708  
  6709  		nl := c.randomNonStreamLeader(globalAccountName, "TEST")
  6710  		nc, js = jsClientConnect(t, nl)
  6711  		defer nc.Close()
  6712  		start = time.Now()
  6713  		_, err = js.Publish("foo", []byte("hello"))
  6714  		require_NoError(t, err)
  6715  		// This is worst case, meaning message has to travel to leader, then to fastest replica, then back.
  6716  		// So should be at 3x rtt, so check at least > 2x rtt.
  6717  		if elapsed := time.Since(start); elapsed < 2*np.rtt {
  6718  			t.Fatalf("Expected publish time to be > %v, got %v", 2*np.rtt, elapsed)
  6719  		}
  6720  	}
  6721  
  6722  	// Setup source streams.
  6723  	nc, js := jsClientConnect(t, c.randomServer())
  6724  	defer nc.Close()
  6725  
  6726  	t.Logf("Creating %d Source Streams", numSourceStreams)
  6727  
  6728  	var sources []string
  6729  	wg := sync.WaitGroup{}
  6730  	for i := 0; i < numSourceStreams; i++ {
  6731  		sname := fmt.Sprintf("EVENT-%s", nuid.Next())
  6732  		sources = append(sources, sname)
  6733  		wg.Add(1)
  6734  		go func(stream string) {
  6735  			defer wg.Done()
  6736  			t.Logf("  %q", stream)
  6737  			subj := fmt.Sprintf("%s.>", stream)
  6738  			_, err := js.AddStream(&nats.StreamConfig{
  6739  				Name:      stream,
  6740  				Subjects:  []string{subj},
  6741  				Replicas:  3,
  6742  				Retention: nats.InterestPolicy,
  6743  			})
  6744  			require_NoError(t, err)
  6745  			for j := 0; j < numConsumersPerSource; j++ {
  6746  				consumer := fmt.Sprintf("C%d", j)
  6747  				_, err := js.Subscribe(_EMPTY_, func(msg *nats.Msg) {
  6748  					msg.Ack()
  6749  				}, nats.BindStream(stream), nats.Durable(consumer), nats.ManualAck())
  6750  				require_NoError(t, err)
  6751  			}
  6752  		}(sname)
  6753  	}
  6754  	wg.Wait()
  6755  
  6756  	var streamSources []*nats.StreamSource
  6757  	for _, src := range sources {
  6758  		streamSources = append(streamSources, &nats.StreamSource{Name: src})
  6759  
  6760  	}
  6761  
  6762  	t.Log("Creating Aggregate Stream")
  6763  
  6764  	// Now create the aggregate stream.
  6765  	_, err := js.AddStream(&nats.StreamConfig{
  6766  		Name:      "EVENTS",
  6767  		Replicas:  3,
  6768  		Retention: nats.InterestPolicy,
  6769  		Sources:   streamSources,
  6770  	})
  6771  	require_NoError(t, err)
  6772  
  6773  	// Set first sequence to a high number.
  6774  	if setHighStartSequence {
  6775  		require_NoError(t, js.PurgeStream("EVENTS", &nats.StreamPurgeRequest{Sequence: 32_000_001}))
  6776  	}
  6777  
  6778  	// Now create 2 pull consumers.
  6779  	_, err = js.PullSubscribe(_EMPTY_, "C1",
  6780  		nats.BindStream("EVENTS"),
  6781  		nats.MaxDeliver(1),
  6782  		nats.AckWait(10*time.Second),
  6783  		nats.ManualAck(),
  6784  	)
  6785  	require_NoError(t, err)
  6786  
  6787  	_, err = js.PullSubscribe(_EMPTY_, "C2",
  6788  		nats.BindStream("EVENTS"),
  6789  		nats.MaxDeliver(1),
  6790  		nats.AckWait(10*time.Second),
  6791  		nats.ManualAck(),
  6792  	)
  6793  	require_NoError(t, err)
  6794  
  6795  	t.Logf("Creating %d x 2 Pull Subscribers", numPullersPerConsumer)
  6796  
  6797  	// Now create the pullers.
  6798  	for _, subName := range []string{"C1", "C2"} {
  6799  		for i := 0; i < numPullersPerConsumer; i++ {
  6800  			go func(subName string) {
  6801  				nc, js := jsClientConnect(t, c.randomServer())
  6802  				defer nc.Close()
  6803  
  6804  				sub, err := js.PullSubscribe(_EMPTY_, subName,
  6805  					nats.BindStream("EVENTS"),
  6806  					nats.MaxDeliver(1),
  6807  					nats.AckWait(10*time.Second),
  6808  					nats.ManualAck(),
  6809  				)
  6810  				require_NoError(t, err)
  6811  
  6812  				for {
  6813  					msgs, err := sub.Fetch(25, nats.MaxWait(2*time.Second))
  6814  					if err != nil && err != nats.ErrTimeout {
  6815  						t.Logf("Exiting pull subscriber %q: %v", subName, err)
  6816  						return
  6817  					}
  6818  					// Shuffle
  6819  					rand.Shuffle(len(msgs), func(i, j int) { msgs[i], msgs[j] = msgs[j], msgs[i] })
  6820  
  6821  					// Wait for a random interval up to 100ms.
  6822  					time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond)
  6823  
  6824  					for _, m := range msgs {
  6825  						// If we want to simulate max redeliveries being hit, since not acking
  6826  						// once will cause it due to subscriber setup.
  6827  						// 100_000 == 0.01%
  6828  						if simulateMaxRedeliveries && rand.Intn(100_000) == 0 {
  6829  							md, err := m.Metadata()
  6830  							require_NoError(t, err)
  6831  							t.Logf("** Skipping Ack: %d **", md.Sequence.Stream)
  6832  						} else {
  6833  							m.Ack()
  6834  						}
  6835  					}
  6836  				}
  6837  			}(subName)
  6838  		}
  6839  	}
  6840  
  6841  	// Now create feeder publishers.
  6842  	eventTypes := []string{"PAYMENT", "SUBMISSION", "CANCEL"}
  6843  
  6844  	msg := make([]byte, 2*1024) // 2k payload
  6845  	crand.Read(msg)
  6846  
  6847  	// For tracking pub times.
  6848  	var pubs int
  6849  	var totalPubTime time.Duration
  6850  	var pmu sync.Mutex
  6851  	last := time.Now()
  6852  
  6853  	updatePubStats := func(elapsed time.Duration) {
  6854  		pmu.Lock()
  6855  		defer pmu.Unlock()
  6856  		// Reset every 5s
  6857  		if time.Since(last) > 5*time.Second {
  6858  			pubs = 0
  6859  			totalPubTime = 0
  6860  			last = time.Now()
  6861  		}
  6862  		pubs++
  6863  		totalPubTime += elapsed
  6864  	}
  6865  	avgPubTime := func() time.Duration {
  6866  		pmu.Lock()
  6867  		np := pubs
  6868  		tpt := totalPubTime
  6869  		pmu.Unlock()
  6870  		return tpt / time.Duration(np)
  6871  	}
  6872  
  6873  	t.Logf("Creating %d Publishers", numPublishers)
  6874  
  6875  	var numLimitsExceeded atomic.Uint32
  6876  	errCh := make(chan error, 100)
  6877  
  6878  	for i := 0; i < numPublishers; i++ {
  6879  		go func() {
  6880  			nc, js := jsClientConnect(t, c.randomServer())
  6881  			defer nc.Close()
  6882  
  6883  			for {
  6884  				// Grab a random source stream
  6885  				stream := sources[rand.Intn(len(sources))]
  6886  				// Grab random event type.
  6887  				evt := eventTypes[rand.Intn(len(eventTypes))]
  6888  				subj := fmt.Sprintf("%s.%s", stream, evt)
  6889  				start := time.Now()
  6890  				_, err := js.Publish(subj, msg)
  6891  				if err != nil {
  6892  					t.Logf("Exiting publisher: %v", err)
  6893  					return
  6894  				}
  6895  				elapsed := time.Since(start)
  6896  				if elapsed > badPubThresh {
  6897  					t.Logf("Publish time took more than expected: %v", elapsed)
  6898  					numLimitsExceeded.Add(1)
  6899  					if ne := numLimitsExceeded.Load(); ne > maxBadPubTimes {
  6900  						errCh <- fmt.Errorf("Too many exceeded times on publish: %d", ne)
  6901  						return
  6902  					}
  6903  				}
  6904  				updatePubStats(elapsed)
  6905  			}
  6906  		}()
  6907  	}
  6908  
  6909  	t.Log("Creating Monitoring Routine - Data in ~10s")
  6910  
  6911  	// Create monitoring routine.
  6912  	go func() {
  6913  		nc, js := jsClientConnect(t, c.randomServer())
  6914  		defer nc.Close()
  6915  
  6916  		fseq, lseq := uint64(0), uint64(0)
  6917  		for {
  6918  			// Grab consumers
  6919  			var minAckFloor uint64 = math.MaxUint64
  6920  			for _, consumer := range []string{"C1", "C2"} {
  6921  				ci, err := js.ConsumerInfo("EVENTS", consumer)
  6922  				if err != nil {
  6923  					t.Logf("Exiting Monitor: %v", err)
  6924  					return
  6925  				}
  6926  				if lseq > 0 {
  6927  					t.Logf("%s:\n  Delivered:\t%d\n  AckFloor:\t%d\n  AckPending:\t%d\n  NumPending:\t%d",
  6928  						consumer, ci.Delivered.Stream, ci.AckFloor.Stream, ci.NumAckPending, ci.NumPending)
  6929  				}
  6930  				if ci.AckFloor.Stream < minAckFloor {
  6931  					minAckFloor = ci.AckFloor.Stream
  6932  				}
  6933  			}
  6934  			// Now grab aggregate stream state.
  6935  			si, err := js.StreamInfo("EVENTS")
  6936  			if err != nil {
  6937  				t.Logf("Exiting Monitor: %v", err)
  6938  				return
  6939  			}
  6940  			state := si.State
  6941  			if lseq != 0 {
  6942  				t.Logf("Stream:\n  Msgs: \t%d\n  First:\t%d\n  Last: \t%d\n  Deletes:\t%d\n",
  6943  					state.Msgs, state.FirstSeq, state.LastSeq, state.NumDeleted)
  6944  				t.Logf("Publish Stats:\n  Msgs/s:\t%0.2f\n  Avg Pub:\t%v\n\n", float64(si.State.LastSeq-lseq)/5.0, avgPubTime())
  6945  				if si.State.FirstSeq < minAckFloor && si.State.FirstSeq == fseq {
  6946  					t.Log("Stream first seq < minimum ack floor")
  6947  				}
  6948  			}
  6949  			fseq, lseq = si.State.FirstSeq, si.State.LastSeq
  6950  			time.Sleep(5 * time.Second)
  6951  		}
  6952  
  6953  	}()
  6954  
  6955  	select {
  6956  	case e := <-errCh:
  6957  		t.Fatal(e)
  6958  	case <-time.After(testTime):
  6959  		t.Fatalf("Did not receive completion signal")
  6960  	}
  6961  }
  6962  
  6963  // Unbalanced stretch cluster.
  6964  // S2 (stream leader) will have a slow path to S1 (via proxy) and S3 (consumer leader) will have a fast path.
  6965  //
  6966  //	 Route Ports
  6967  //		"S1": 14622
  6968  //		"S2": 15622
  6969  //		"S3": 16622
  6970  func createStretchUnbalancedCluster(t testing.TB) (c *cluster, np *netProxy) {
  6971  	t.Helper()
  6972  
  6973  	tmpl := `
  6974  	listen: 127.0.0.1:-1
  6975  	server_name: %s
  6976  	jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  6977  
  6978  	cluster {
  6979  		name: "F3"
  6980  		listen: 127.0.0.1:%d
  6981  		routes = [%s]
  6982  	}
  6983  
  6984  	accounts {
  6985  		$SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] }
  6986  	}
  6987  	`
  6988  	// Do these in order, S1, S2 (proxy) then S3.
  6989  	c = &cluster{t: t, servers: make([]*Server, 3), opts: make([]*Options, 3), name: "F3"}
  6990  
  6991  	// S1
  6992  	conf := fmt.Sprintf(tmpl, "S1", t.TempDir(), 14622, "route://127.0.0.1:15622, route://127.0.0.1:16622")
  6993  	c.servers[0], c.opts[0] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  6994  
  6995  	// S2
  6996  	// Create the proxy first. Connect this to S1. Make it slow, e.g. 5ms RTT.
  6997  	np = createNetProxy(1*time.Millisecond, 1024*1024*1024, 1024*1024*1024, "route://127.0.0.1:14622", true)
  6998  	routes := fmt.Sprintf("%s, route://127.0.0.1:16622", np.routeURL())
  6999  	conf = fmt.Sprintf(tmpl, "S2", t.TempDir(), 15622, routes)
  7000  	c.servers[1], c.opts[1] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  7001  
  7002  	// S3
  7003  	conf = fmt.Sprintf(tmpl, "S3", t.TempDir(), 16622, "route://127.0.0.1:14622, route://127.0.0.1:15622")
  7004  	c.servers[2], c.opts[2] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  7005  
  7006  	c.checkClusterFormed()
  7007  	c.waitOnClusterReady()
  7008  
  7009  	return c, np
  7010  }
  7011  
  7012  // We test an interest based stream that has a cluster with a node with asymmetric paths from
  7013  // the stream leader and the consumer leader such that the consumer leader path is fast and
  7014  // replicated acks arrive sooner then the actual message. This path was considered, but also
  7015  // categorized as very rare and was expensive as it tried to forward a new stream msg delete
  7016  // proposal to the original stream leader. It now will deal with the issue locally and not
  7017  // slow down the ingest rate to the stream's publishers.
  7018  func TestNoRaceJetStreamClusterDifferentRTTInterestBasedStreamSetup(t *testing.T) {
  7019  	// Uncomment to run. Do not want as part of Travis tests atm.
  7020  	skip(t)
  7021  
  7022  	c, np := createStretchUnbalancedCluster(t)
  7023  	defer c.shutdown()
  7024  	defer np.stop()
  7025  
  7026  	nc, js := jsClientConnect(t, c.randomServer())
  7027  	defer nc.Close()
  7028  
  7029  	// Now create the stream.
  7030  	_, err := js.AddStream(&nats.StreamConfig{
  7031  		Name:      "EVENTS",
  7032  		Subjects:  []string{"EV.>"},
  7033  		Replicas:  3,
  7034  		Retention: nats.InterestPolicy,
  7035  	})
  7036  	require_NoError(t, err)
  7037  
  7038  	// Make sure it's leader is on S2.
  7039  	sl := c.servers[1]
  7040  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7041  		c.waitOnStreamLeader(globalAccountName, "EVENTS")
  7042  		if s := c.streamLeader(globalAccountName, "EVENTS"); s != sl {
  7043  			s.JetStreamStepdownStream(globalAccountName, "EVENTS")
  7044  			return fmt.Errorf("Server %s is not stream leader yet", sl)
  7045  		}
  7046  		return nil
  7047  	})
  7048  
  7049  	// Now create the consumer.
  7050  	_, err = js.PullSubscribe(_EMPTY_, "C", nats.BindStream("EVENTS"), nats.ManualAck())
  7051  	require_NoError(t, err)
  7052  
  7053  	// Make sure the consumer leader is on S3.
  7054  	cl := c.servers[2]
  7055  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7056  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "C")
  7057  		if s := c.consumerLeader(globalAccountName, "EVENTS", "C"); s != cl {
  7058  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "C")
  7059  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7060  		}
  7061  		return nil
  7062  	})
  7063  
  7064  	go func(js nats.JetStream) {
  7065  		sub, err := js.PullSubscribe(_EMPTY_, "C", nats.BindStream("EVENTS"), nats.ManualAck())
  7066  		require_NoError(t, err)
  7067  
  7068  		for {
  7069  			msgs, err := sub.Fetch(100, nats.MaxWait(2*time.Second))
  7070  			if err != nil && err != nats.ErrTimeout {
  7071  				return
  7072  			}
  7073  			// Shuffle
  7074  			rand.Shuffle(len(msgs), func(i, j int) { msgs[i], msgs[j] = msgs[j], msgs[i] })
  7075  			for _, m := range msgs {
  7076  				m.Ack()
  7077  			}
  7078  		}
  7079  	}(js)
  7080  
  7081  	numPublishers := 25
  7082  	pubThresh := 2 * time.Second
  7083  	var maxExceeded atomic.Int64
  7084  	errCh := make(chan error, numPublishers)
  7085  	wg := sync.WaitGroup{}
  7086  
  7087  	msg := make([]byte, 2*1024) // 2k payload
  7088  	crand.Read(msg)
  7089  
  7090  	// Publishers.
  7091  	for i := 0; i < numPublishers; i++ {
  7092  		wg.Add(1)
  7093  		go func(iter int) {
  7094  			defer wg.Done()
  7095  
  7096  			// Connect to random, the slow ones will be connected to the slow node.
  7097  			// But if you connect them all there it will pass.
  7098  			s := c.randomServer()
  7099  			nc, js := jsClientConnect(t, s)
  7100  			defer nc.Close()
  7101  
  7102  			for i := 0; i < 1_000; i++ {
  7103  				start := time.Now()
  7104  				_, err := js.Publish("EV.PAID", msg)
  7105  				if err != nil {
  7106  					errCh <- fmt.Errorf("Publish error: %v", err)
  7107  					return
  7108  				}
  7109  				if elapsed := time.Since(start); elapsed > pubThresh {
  7110  					errCh <- fmt.Errorf("Publish time exceeded")
  7111  					if int64(elapsed) > maxExceeded.Load() {
  7112  						maxExceeded.Store(int64(elapsed))
  7113  					}
  7114  					return
  7115  				}
  7116  			}
  7117  		}(i)
  7118  	}
  7119  
  7120  	wg.Wait()
  7121  
  7122  	select {
  7123  	case e := <-errCh:
  7124  		t.Fatalf("%v: threshold is %v, maximum seen: %v", e, pubThresh, time.Duration(maxExceeded.Load()))
  7125  	default:
  7126  	}
  7127  }
  7128  
  7129  func TestNoRaceJetStreamInterestStreamCheckInterestRaceBug(t *testing.T) {
  7130  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  7131  	defer c.shutdown()
  7132  
  7133  	nc, js := jsClientConnect(t, c.randomServer())
  7134  	defer nc.Close()
  7135  
  7136  	_, err := js.AddStream(&nats.StreamConfig{
  7137  		Name:      "TEST",
  7138  		Subjects:  []string{"foo"},
  7139  		Replicas:  3,
  7140  		Retention: nats.InterestPolicy,
  7141  	})
  7142  	require_NoError(t, err)
  7143  
  7144  	numConsumers := 10
  7145  	for i := 0; i < numConsumers; i++ {
  7146  		nc, js := jsClientConnect(t, c.randomServer())
  7147  		defer nc.Close()
  7148  
  7149  		_, err = js.Subscribe("foo", func(m *nats.Msg) {
  7150  			m.Ack()
  7151  		}, nats.Durable(fmt.Sprintf("C%d", i)), nats.ManualAck())
  7152  		require_NoError(t, err)
  7153  	}
  7154  
  7155  	numToSend := 10_000
  7156  	for i := 0; i < numToSend; i++ {
  7157  		_, err := js.PublishAsync("foo", nil, nats.StallWait(800*time.Millisecond))
  7158  		require_NoError(t, err)
  7159  	}
  7160  	select {
  7161  	case <-js.PublishAsyncComplete():
  7162  	case <-time.After(20 * time.Second):
  7163  		t.Fatalf("Did not receive completion signal")
  7164  	}
  7165  
  7166  	// Wait til ackfloor is correct for all consumers.
  7167  	checkFor(t, 20*time.Second, 100*time.Millisecond, func() error {
  7168  		for _, s := range c.servers {
  7169  			mset, err := s.GlobalAccount().lookupStream("TEST")
  7170  			require_NoError(t, err)
  7171  
  7172  			mset.mu.RLock()
  7173  			defer mset.mu.RUnlock()
  7174  
  7175  			require_True(t, len(mset.consumers) == numConsumers)
  7176  
  7177  			for _, o := range mset.consumers {
  7178  				state, err := o.store.State()
  7179  				require_NoError(t, err)
  7180  				if state.AckFloor.Stream != uint64(numToSend) {
  7181  					return fmt.Errorf("Ackfloor not correct yet")
  7182  				}
  7183  			}
  7184  		}
  7185  		return nil
  7186  	})
  7187  
  7188  	for _, s := range c.servers {
  7189  		mset, err := s.GlobalAccount().lookupStream("TEST")
  7190  		require_NoError(t, err)
  7191  
  7192  		mset.mu.RLock()
  7193  		defer mset.mu.RUnlock()
  7194  
  7195  		state := mset.state()
  7196  		require_True(t, state.Msgs == 0)
  7197  		require_True(t, state.FirstSeq == uint64(numToSend+1))
  7198  	}
  7199  }
  7200  
  7201  func TestNoRaceJetStreamClusterInterestStreamConsistencyAfterRollingRestart(t *testing.T) {
  7202  	// Uncomment to run. Needs to be on a big machine. Do not want as part of Travis tests atm.
  7203  	skip(t)
  7204  
  7205  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  7206  	defer c.shutdown()
  7207  
  7208  	numStreams := 200
  7209  	numConsumersPer := 5
  7210  	numPublishers := 10
  7211  
  7212  	nc, js := jsClientConnect(t, c.randomServer())
  7213  	defer nc.Close()
  7214  
  7215  	qch := make(chan bool)
  7216  
  7217  	var mm sync.Mutex
  7218  	ackMap := make(map[string]map[uint64][]string)
  7219  
  7220  	addAckTracking := func(seq uint64, stream, consumer string) {
  7221  		mm.Lock()
  7222  		defer mm.Unlock()
  7223  		sam := ackMap[stream]
  7224  		if sam == nil {
  7225  			sam = make(map[uint64][]string)
  7226  			ackMap[stream] = sam
  7227  		}
  7228  		sam[seq] = append(sam[seq], consumer)
  7229  	}
  7230  
  7231  	doPullSubscriber := func(stream, consumer, filter string) {
  7232  		nc, js := jsClientConnect(t, c.randomServer())
  7233  		defer nc.Close()
  7234  
  7235  		var err error
  7236  		var sub *nats.Subscription
  7237  		timeout := time.Now().Add(5 * time.Second)
  7238  		for time.Now().Before(timeout) {
  7239  			sub, err = js.PullSubscribe(filter, consumer, nats.BindStream(stream), nats.ManualAck())
  7240  			if err == nil {
  7241  				break
  7242  			}
  7243  		}
  7244  		if err != nil {
  7245  			t.Logf("Error on pull subscriber: %v", err)
  7246  			return
  7247  		}
  7248  
  7249  		for {
  7250  			select {
  7251  			case <-time.After(500 * time.Millisecond):
  7252  				msgs, err := sub.Fetch(100, nats.MaxWait(time.Second))
  7253  				if err != nil {
  7254  					continue
  7255  				}
  7256  				// Shuffle
  7257  				rand.Shuffle(len(msgs), func(i, j int) { msgs[i], msgs[j] = msgs[j], msgs[i] })
  7258  				for _, m := range msgs {
  7259  					meta, err := m.Metadata()
  7260  					require_NoError(t, err)
  7261  					m.Ack()
  7262  					addAckTracking(meta.Sequence.Stream, stream, consumer)
  7263  					if meta.NumDelivered > 1 {
  7264  						t.Logf("Got a msg redelivered %d for sequence %d on %q %q\n", meta.NumDelivered, meta.Sequence.Stream, stream, consumer)
  7265  					}
  7266  				}
  7267  			case <-qch:
  7268  				nc.Flush()
  7269  				return
  7270  			}
  7271  		}
  7272  	}
  7273  
  7274  	// Setup
  7275  	wg := sync.WaitGroup{}
  7276  	for i := 0; i < numStreams; i++ {
  7277  		wg.Add(1)
  7278  		go func(stream string) {
  7279  			defer wg.Done()
  7280  			subj := fmt.Sprintf("%s.>", stream)
  7281  			_, err := js.AddStream(&nats.StreamConfig{
  7282  				Name:      stream,
  7283  				Subjects:  []string{subj},
  7284  				Replicas:  3,
  7285  				Retention: nats.InterestPolicy,
  7286  			})
  7287  			require_NoError(t, err)
  7288  			for i := 0; i < numConsumersPer; i++ {
  7289  				consumer := fmt.Sprintf("C%d", i)
  7290  				filter := fmt.Sprintf("%s.%d", stream, i)
  7291  				_, err = js.AddConsumer(stream, &nats.ConsumerConfig{
  7292  					Durable:       consumer,
  7293  					FilterSubject: filter,
  7294  					AckPolicy:     nats.AckExplicitPolicy,
  7295  					AckWait:       2 * time.Second,
  7296  				})
  7297  				require_NoError(t, err)
  7298  				c.waitOnConsumerLeader(globalAccountName, stream, consumer)
  7299  				go doPullSubscriber(stream, consumer, filter)
  7300  			}
  7301  		}(fmt.Sprintf("A-%d", i))
  7302  	}
  7303  	wg.Wait()
  7304  
  7305  	msg := make([]byte, 2*1024) // 2k payload
  7306  	crand.Read(msg)
  7307  
  7308  	// Controls if publishing is on or off.
  7309  	var pubActive atomic.Bool
  7310  
  7311  	doPublish := func() {
  7312  		nc, js := jsClientConnect(t, c.randomServer())
  7313  		defer nc.Close()
  7314  
  7315  		for {
  7316  			select {
  7317  			case <-time.After(100 * time.Millisecond):
  7318  				if pubActive.Load() {
  7319  					for i := 0; i < numStreams; i++ {
  7320  						for j := 0; j < numConsumersPer; j++ {
  7321  							subj := fmt.Sprintf("A-%d.%d", i, j)
  7322  							// Don't care about errors here for this test.
  7323  							js.Publish(subj, msg)
  7324  						}
  7325  					}
  7326  				}
  7327  			case <-qch:
  7328  				return
  7329  			}
  7330  		}
  7331  	}
  7332  
  7333  	pubActive.Store(true)
  7334  
  7335  	for i := 0; i < numPublishers; i++ {
  7336  		go doPublish()
  7337  	}
  7338  
  7339  	// Let run for a bit.
  7340  	time.Sleep(20 * time.Second)
  7341  
  7342  	// Do a rolling restart.
  7343  	for _, s := range c.servers {
  7344  		t.Logf("Shutdown %v\n", s)
  7345  		s.Shutdown()
  7346  		s.WaitForShutdown()
  7347  		time.Sleep(20 * time.Second)
  7348  		t.Logf("Restarting %v\n", s)
  7349  		s = c.restartServer(s)
  7350  		c.waitOnServerHealthz(s)
  7351  	}
  7352  
  7353  	// Let run for a bit longer.
  7354  	time.Sleep(10 * time.Second)
  7355  
  7356  	// Stop pubs.
  7357  	pubActive.Store(false)
  7358  
  7359  	// Let settle.
  7360  	time.Sleep(10 * time.Second)
  7361  	close(qch)
  7362  	time.Sleep(20 * time.Second)
  7363  
  7364  	nc, js = jsClientConnect(t, c.randomServer())
  7365  	defer nc.Close()
  7366  
  7367  	minAckFloor := func(stream string) (uint64, string) {
  7368  		var maf uint64 = math.MaxUint64
  7369  		var consumer string
  7370  		for i := 0; i < numConsumersPer; i++ {
  7371  			cname := fmt.Sprintf("C%d", i)
  7372  			ci, err := js.ConsumerInfo(stream, cname)
  7373  			require_NoError(t, err)
  7374  			if ci.AckFloor.Stream < maf {
  7375  				maf = ci.AckFloor.Stream
  7376  				consumer = cname
  7377  			}
  7378  		}
  7379  		return maf, consumer
  7380  	}
  7381  
  7382  	checkStreamAcks := func(stream string) {
  7383  		mm.Lock()
  7384  		defer mm.Unlock()
  7385  		if sam := ackMap[stream]; sam != nil {
  7386  			for seq := 1; ; seq++ {
  7387  				acks := sam[uint64(seq)]
  7388  				if acks == nil {
  7389  					if sam[uint64(seq+1)] != nil {
  7390  						t.Logf("Missing an ack on stream %q for sequence %d\n", stream, seq)
  7391  					} else {
  7392  						break
  7393  					}
  7394  				}
  7395  				if len(acks) > 1 {
  7396  					t.Logf("Multiple acks for %d which is not expected: %+v", seq, acks)
  7397  				}
  7398  			}
  7399  		}
  7400  	}
  7401  
  7402  	// Now check all streams such that their first sequence is equal to the minimum of all consumers.
  7403  	for i := 0; i < numStreams; i++ {
  7404  		stream := fmt.Sprintf("A-%d", i)
  7405  		si, err := js.StreamInfo(stream)
  7406  		require_NoError(t, err)
  7407  
  7408  		if maf, consumer := minAckFloor(stream); maf > si.State.FirstSeq {
  7409  			t.Logf("\nBAD STATE DETECTED FOR %q, CHECKING OTHER SERVERS! ACK %d vs %+v LEADER %v, CL FOR %q %v\n",
  7410  				stream, maf, si.State, c.streamLeader(globalAccountName, stream), consumer, c.consumerLeader(globalAccountName, stream, consumer))
  7411  
  7412  			t.Logf("TEST ACKS %+v\n", ackMap)
  7413  
  7414  			checkStreamAcks(stream)
  7415  
  7416  			for _, s := range c.servers {
  7417  				mset, err := s.GlobalAccount().lookupStream(stream)
  7418  				require_NoError(t, err)
  7419  				state := mset.state()
  7420  				t.Logf("Server %v Stream STATE %+v\n", s, state)
  7421  
  7422  				var smv StoreMsg
  7423  				if sm, err := mset.store.LoadMsg(state.FirstSeq, &smv); err == nil {
  7424  					t.Logf("Subject for msg %d is %q", state.FirstSeq, sm.subj)
  7425  				} else {
  7426  					t.Logf("Could not retrieve msg for %d: %v", state.FirstSeq, err)
  7427  				}
  7428  
  7429  				if len(mset.preAcks) > 0 {
  7430  					t.Logf("%v preAcks %+v\n", s, mset.preAcks)
  7431  				}
  7432  
  7433  				for _, o := range mset.consumers {
  7434  					ostate, err := o.store.State()
  7435  					require_NoError(t, err)
  7436  					t.Logf("Consumer STATE for %q is %+v\n", o.name, ostate)
  7437  				}
  7438  			}
  7439  			t.Fatalf("BAD STATE: ACKFLOOR > FIRST %d vs %d\n", maf, si.State.FirstSeq)
  7440  		}
  7441  	}
  7442  }
  7443  
  7444  func TestNoRaceFileStoreNumPending(t *testing.T) {
  7445  	// No need for all permutations here.
  7446  	storeDir := t.TempDir()
  7447  	fcfg := FileStoreConfig{
  7448  		StoreDir:  storeDir,
  7449  		BlockSize: 2 * 1024, // Create many blocks on purpose.
  7450  	}
  7451  	fs, err := newFileStore(fcfg, StreamConfig{Name: "zzz", Subjects: []string{"*.*.*.*"}, Storage: FileStorage})
  7452  	require_NoError(t, err)
  7453  	defer fs.Stop()
  7454  
  7455  	tokens := []string{"foo", "bar", "baz"}
  7456  	genSubj := func() string {
  7457  		return fmt.Sprintf("%s.%s.%s.%s",
  7458  			tokens[rand.Intn(len(tokens))],
  7459  			tokens[rand.Intn(len(tokens))],
  7460  			tokens[rand.Intn(len(tokens))],
  7461  			tokens[rand.Intn(len(tokens))],
  7462  		)
  7463  	}
  7464  
  7465  	for i := 0; i < 50_000; i++ {
  7466  		subj := genSubj()
  7467  		_, _, err := fs.StoreMsg(subj, nil, []byte("Hello World"))
  7468  		require_NoError(t, err)
  7469  	}
  7470  
  7471  	state := fs.State()
  7472  
  7473  	// Scan one by one for sanity check against other calculations.
  7474  	sanityCheck := func(sseq uint64, filter string) SimpleState {
  7475  		t.Helper()
  7476  		var ss SimpleState
  7477  		var smv StoreMsg
  7478  		// For here we know 0 is invalid, set to 1.
  7479  		if sseq == 0 {
  7480  			sseq = 1
  7481  		}
  7482  		for seq := sseq; seq <= state.LastSeq; seq++ {
  7483  			sm, err := fs.LoadMsg(seq, &smv)
  7484  			if err != nil {
  7485  				t.Logf("Encountered error %v loading sequence: %d", err, seq)
  7486  				continue
  7487  			}
  7488  			if subjectIsSubsetMatch(sm.subj, filter) {
  7489  				ss.Msgs++
  7490  				ss.Last = seq
  7491  				if ss.First == 0 || seq < ss.First {
  7492  					ss.First = seq
  7493  				}
  7494  			}
  7495  		}
  7496  		return ss
  7497  	}
  7498  
  7499  	check := func(sseq uint64, filter string) {
  7500  		t.Helper()
  7501  		np, lvs := fs.NumPending(sseq, filter, false)
  7502  		ss := fs.FilteredState(sseq, filter)
  7503  		sss := sanityCheck(sseq, filter)
  7504  		if lvs != state.LastSeq {
  7505  			t.Fatalf("Expected NumPending to return valid through last of %d but got %d", state.LastSeq, lvs)
  7506  		}
  7507  		if ss.Msgs != np {
  7508  			t.Fatalf("NumPending of %d did not match ss.Msgs of %d", np, ss.Msgs)
  7509  		}
  7510  		if ss != sss {
  7511  			t.Fatalf("Failed sanity check, expected %+v got %+v", sss, ss)
  7512  		}
  7513  	}
  7514  
  7515  	sanityCheckLastOnly := func(sseq uint64, filter string) SimpleState {
  7516  		t.Helper()
  7517  		var ss SimpleState
  7518  		var smv StoreMsg
  7519  		// For here we know 0 is invalid, set to 1.
  7520  		if sseq == 0 {
  7521  			sseq = 1
  7522  		}
  7523  		seen := make(map[string]bool)
  7524  		for seq := state.LastSeq; seq >= sseq; seq-- {
  7525  			sm, err := fs.LoadMsg(seq, &smv)
  7526  			if err != nil {
  7527  				t.Logf("Encountered error %v loading sequence: %d", err, seq)
  7528  				continue
  7529  			}
  7530  			if !seen[sm.subj] && subjectIsSubsetMatch(sm.subj, filter) {
  7531  				ss.Msgs++
  7532  				if ss.Last == 0 {
  7533  					ss.Last = seq
  7534  				}
  7535  				if ss.First == 0 || seq < ss.First {
  7536  					ss.First = seq
  7537  				}
  7538  				seen[sm.subj] = true
  7539  			}
  7540  		}
  7541  		return ss
  7542  	}
  7543  
  7544  	checkLastOnly := func(sseq uint64, filter string) {
  7545  		t.Helper()
  7546  		np, lvs := fs.NumPending(sseq, filter, true)
  7547  		ss := sanityCheckLastOnly(sseq, filter)
  7548  		if lvs != state.LastSeq {
  7549  			t.Fatalf("Expected NumPending to return valid through last of %d but got %d", state.LastSeq, lvs)
  7550  		}
  7551  		if ss.Msgs != np {
  7552  			t.Fatalf("NumPending of %d did not match ss.Msgs of %d", np, ss.Msgs)
  7553  		}
  7554  	}
  7555  
  7556  	startSeqs := []uint64{0, 1, 2, 200, 444, 555, 2222, 8888, 12_345, 28_222, 33_456, 44_400, 49_999}
  7557  	checkSubs := []string{"foo.>", "*.bar.>", "foo.bar.*.baz", "*.bar.>", "*.foo.bar.*", "foo.foo.bar.baz"}
  7558  
  7559  	for _, filter := range checkSubs {
  7560  		for _, start := range startSeqs {
  7561  			check(start, filter)
  7562  			checkLastOnly(start, filter)
  7563  		}
  7564  	}
  7565  }
  7566  
  7567  func TestNoRaceJetStreamClusterUnbalancedInterestMultipleConsumers(t *testing.T) {
  7568  	c, np := createStretchUnbalancedCluster(t)
  7569  	defer c.shutdown()
  7570  	defer np.stop()
  7571  
  7572  	nc, js := jsClientConnect(t, c.randomServer())
  7573  	defer nc.Close()
  7574  
  7575  	// Now create the stream.
  7576  	_, err := js.AddStream(&nats.StreamConfig{
  7577  		Name:      "EVENTS",
  7578  		Subjects:  []string{"EV.>"},
  7579  		Replicas:  3,
  7580  		Retention: nats.InterestPolicy,
  7581  	})
  7582  	require_NoError(t, err)
  7583  
  7584  	// Make sure it's leader is on S2.
  7585  	sl := c.servers[1]
  7586  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7587  		c.waitOnStreamLeader(globalAccountName, "EVENTS")
  7588  		if s := c.streamLeader(globalAccountName, "EVENTS"); s != sl {
  7589  			s.JetStreamStepdownStream(globalAccountName, "EVENTS")
  7590  			return fmt.Errorf("Server %s is not stream leader yet", sl)
  7591  		}
  7592  		return nil
  7593  	})
  7594  
  7595  	// Create a fast ack consumer.
  7596  	_, err = js.Subscribe("EV.NEW", func(m *nats.Msg) {
  7597  		m.Ack()
  7598  	}, nats.Durable("C"), nats.ManualAck())
  7599  	require_NoError(t, err)
  7600  
  7601  	// Make sure the consumer leader is on S3.
  7602  	cl := c.servers[2]
  7603  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7604  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "C")
  7605  		if s := c.consumerLeader(globalAccountName, "EVENTS", "C"); s != cl {
  7606  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "C")
  7607  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7608  		}
  7609  		return nil
  7610  	})
  7611  
  7612  	// Connect a client directly to the stream leader.
  7613  	nc, js = jsClientConnect(t, sl)
  7614  	defer nc.Close()
  7615  
  7616  	// Now create a pull subscriber.
  7617  	sub, err := js.PullSubscribe("EV.NEW", "D", nats.ManualAck())
  7618  	require_NoError(t, err)
  7619  
  7620  	// Make sure this consumer leader is on S1.
  7621  	cl = c.servers[0]
  7622  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7623  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "D")
  7624  		if s := c.consumerLeader(globalAccountName, "EVENTS", "D"); s != cl {
  7625  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "D")
  7626  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7627  		}
  7628  		return nil
  7629  	})
  7630  
  7631  	numToSend := 1000
  7632  	for i := 0; i < numToSend; i++ {
  7633  		_, err := js.PublishAsync("EV.NEW", nil)
  7634  		require_NoError(t, err)
  7635  	}
  7636  	select {
  7637  	case <-js.PublishAsyncComplete():
  7638  	case <-time.After(20 * time.Second):
  7639  		t.Fatalf("Did not receive completion signal")
  7640  	}
  7641  
  7642  	// Now make sure we can pull messages since we have not acked.
  7643  	// The bug is that the acks arrive on S1 faster then the messages but we want to
  7644  	// make sure we do not remove prematurely.
  7645  	msgs, err := sub.Fetch(100, nats.MaxWait(time.Second))
  7646  	require_NoError(t, err)
  7647  	require_True(t, len(msgs) == 100)
  7648  	for _, m := range msgs {
  7649  		m.AckSync()
  7650  	}
  7651  
  7652  	ci, err := js.ConsumerInfo("EVENTS", "D")
  7653  	require_NoError(t, err)
  7654  	require_True(t, ci.NumPending == uint64(numToSend-100))
  7655  	require_True(t, ci.NumAckPending == 0)
  7656  	require_True(t, ci.Delivered.Stream == 100)
  7657  	require_True(t, ci.AckFloor.Stream == 100)
  7658  
  7659  	// Check stream state on all servers.
  7660  	for _, s := range c.servers {
  7661  		mset, err := s.GlobalAccount().lookupStream("EVENTS")
  7662  		require_NoError(t, err)
  7663  		state := mset.state()
  7664  		require_True(t, state.Msgs == 900)
  7665  		require_True(t, state.FirstSeq == 101)
  7666  		require_True(t, state.LastSeq == 1000)
  7667  		require_True(t, state.Consumers == 2)
  7668  	}
  7669  
  7670  	msgs, err = sub.Fetch(900, nats.MaxWait(time.Second))
  7671  	require_NoError(t, err)
  7672  	require_True(t, len(msgs) == 900)
  7673  	for _, m := range msgs {
  7674  		m.AckSync()
  7675  	}
  7676  
  7677  	// Let acks propagate.
  7678  	time.Sleep(250 * time.Millisecond)
  7679  
  7680  	// Check final stream state on all servers.
  7681  	for _, s := range c.servers {
  7682  		mset, err := s.GlobalAccount().lookupStream("EVENTS")
  7683  		require_NoError(t, err)
  7684  		state := mset.state()
  7685  		require_True(t, state.Msgs == 0)
  7686  		require_True(t, state.FirstSeq == 1001)
  7687  		require_True(t, state.LastSeq == 1000)
  7688  		require_True(t, state.Consumers == 2)
  7689  		// Now check preAcks
  7690  		mset.mu.RLock()
  7691  		numPreAcks := len(mset.preAcks)
  7692  		mset.mu.RUnlock()
  7693  		require_True(t, numPreAcks == 0)
  7694  	}
  7695  }
  7696  
  7697  func TestNoRaceJetStreamClusterUnbalancedInterestMultipleFilteredConsumers(t *testing.T) {
  7698  	c, np := createStretchUnbalancedCluster(t)
  7699  	defer c.shutdown()
  7700  	defer np.stop()
  7701  
  7702  	nc, js := jsClientConnect(t, c.randomServer())
  7703  	defer nc.Close()
  7704  
  7705  	// Now create the stream.
  7706  	_, err := js.AddStream(&nats.StreamConfig{
  7707  		Name:      "EVENTS",
  7708  		Subjects:  []string{"EV.>"},
  7709  		Replicas:  3,
  7710  		Retention: nats.InterestPolicy,
  7711  	})
  7712  	require_NoError(t, err)
  7713  
  7714  	// Make sure it's leader is on S2.
  7715  	sl := c.servers[1]
  7716  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7717  		c.waitOnStreamLeader(globalAccountName, "EVENTS")
  7718  		if s := c.streamLeader(globalAccountName, "EVENTS"); s != sl {
  7719  			s.JetStreamStepdownStream(globalAccountName, "EVENTS")
  7720  			return fmt.Errorf("Server %s is not stream leader yet", sl)
  7721  		}
  7722  		return nil
  7723  	})
  7724  
  7725  	// Create a fast ack consumer.
  7726  	_, err = js.Subscribe("EV.NEW", func(m *nats.Msg) {
  7727  		m.Ack()
  7728  	}, nats.Durable("C"), nats.ManualAck())
  7729  	require_NoError(t, err)
  7730  
  7731  	// Make sure the consumer leader is on S3.
  7732  	cl := c.servers[2]
  7733  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7734  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "C")
  7735  		if s := c.consumerLeader(globalAccountName, "EVENTS", "C"); s != cl {
  7736  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "C")
  7737  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7738  		}
  7739  		return nil
  7740  	})
  7741  
  7742  	// Connect a client directly to the stream leader.
  7743  	nc, js = jsClientConnect(t, sl)
  7744  	defer nc.Close()
  7745  
  7746  	// Now create another fast ack consumer.
  7747  	_, err = js.Subscribe("EV.UPDATED", func(m *nats.Msg) {
  7748  		m.Ack()
  7749  	}, nats.Durable("D"), nats.ManualAck())
  7750  	require_NoError(t, err)
  7751  
  7752  	// Make sure this consumer leader is on S1.
  7753  	cl = c.servers[0]
  7754  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7755  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "D")
  7756  		if s := c.consumerLeader(globalAccountName, "EVENTS", "D"); s != cl {
  7757  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "D")
  7758  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7759  		}
  7760  		return nil
  7761  	})
  7762  
  7763  	numToSend := 500
  7764  	for i := 0; i < numToSend; i++ {
  7765  		_, err := js.PublishAsync("EV.NEW", nil)
  7766  		require_NoError(t, err)
  7767  		_, err = js.PublishAsync("EV.UPDATED", nil)
  7768  		require_NoError(t, err)
  7769  	}
  7770  	select {
  7771  	case <-js.PublishAsyncComplete():
  7772  	case <-time.After(20 * time.Second):
  7773  		t.Fatalf("Did not receive completion signal")
  7774  	}
  7775  
  7776  	// Let acks propagate.
  7777  	time.Sleep(250 * time.Millisecond)
  7778  
  7779  	ci, err := js.ConsumerInfo("EVENTS", "D")
  7780  	require_NoError(t, err)
  7781  	require_True(t, ci.NumPending == 0)
  7782  	require_True(t, ci.NumAckPending == 0)
  7783  	require_True(t, ci.Delivered.Consumer == 500)
  7784  	require_True(t, ci.Delivered.Stream == 1000)
  7785  	require_True(t, ci.AckFloor.Consumer == 500)
  7786  	require_True(t, ci.AckFloor.Stream == 1000)
  7787  
  7788  	// Check final stream state on all servers.
  7789  	for _, s := range c.servers {
  7790  		mset, err := s.GlobalAccount().lookupStream("EVENTS")
  7791  		require_NoError(t, err)
  7792  		state := mset.state()
  7793  		require_True(t, state.Msgs == 0)
  7794  		require_True(t, state.FirstSeq == 1001)
  7795  		require_True(t, state.LastSeq == 1000)
  7796  		require_True(t, state.Consumers == 2)
  7797  		// Now check preAcks
  7798  		mset.mu.RLock()
  7799  		numPreAcks := len(mset.preAcks)
  7800  		mset.mu.RUnlock()
  7801  		require_True(t, numPreAcks == 0)
  7802  	}
  7803  }
  7804  
  7805  func TestNoRaceParallelStreamAndConsumerCreation(t *testing.T) {
  7806  	s := RunBasicJetStreamServer(t)
  7807  	defer s.Shutdown()
  7808  
  7809  	// stream config.
  7810  	scfg := &StreamConfig{
  7811  		Name:     "TEST",
  7812  		Subjects: []string{"foo", "bar"},
  7813  		MaxMsgs:  10,
  7814  		Storage:  FileStorage,
  7815  		Replicas: 1,
  7816  	}
  7817  
  7818  	// Will do these direct against the low level API to really make
  7819  	// sure parallel creation ok.
  7820  	np := 1000
  7821  	startCh := make(chan bool)
  7822  	errCh := make(chan error, np)
  7823  	wg := sync.WaitGroup{}
  7824  	wg.Add(np)
  7825  
  7826  	var streams sync.Map
  7827  
  7828  	for i := 0; i < np; i++ {
  7829  		go func() {
  7830  			defer wg.Done()
  7831  
  7832  			// Make them all fire at once.
  7833  			<-startCh
  7834  
  7835  			if mset, err := s.GlobalAccount().addStream(scfg); err != nil {
  7836  				t.Logf("Stream create got an error: %v", err)
  7837  				errCh <- err
  7838  			} else {
  7839  				streams.Store(mset, true)
  7840  			}
  7841  		}()
  7842  	}
  7843  	time.Sleep(100 * time.Millisecond)
  7844  	close(startCh)
  7845  	wg.Wait()
  7846  
  7847  	// Check for no errors.
  7848  	if len(errCh) > 0 {
  7849  		t.Fatalf("Expected no errors, got %d", len(errCh))
  7850  	}
  7851  
  7852  	// Now make sure we really only created one stream.
  7853  	var numStreams int
  7854  	streams.Range(func(k, v any) bool {
  7855  		numStreams++
  7856  		return true
  7857  	})
  7858  	if numStreams > 1 {
  7859  		t.Fatalf("Expected only one stream to be really created, got %d out of %d attempts", numStreams, np)
  7860  	}
  7861  
  7862  	// Also make sure we cleanup the inflight entries for streams.
  7863  	gacc := s.GlobalAccount()
  7864  	_, jsa, err := gacc.checkForJetStream()
  7865  	require_NoError(t, err)
  7866  	var numEntries int
  7867  	jsa.inflight.Range(func(k, v any) bool {
  7868  		numEntries++
  7869  		return true
  7870  	})
  7871  	if numEntries > 0 {
  7872  		t.Fatalf("Expected no inflight entries to be left over, got %d", numEntries)
  7873  	}
  7874  
  7875  	// Now do consumers.
  7876  	mset, err := gacc.lookupStream("TEST")
  7877  	require_NoError(t, err)
  7878  
  7879  	cfg := &ConsumerConfig{
  7880  		DeliverSubject: "to",
  7881  		Name:           "DLC",
  7882  		AckPolicy:      AckExplicit,
  7883  	}
  7884  
  7885  	startCh = make(chan bool)
  7886  	errCh = make(chan error, np)
  7887  	wg.Add(np)
  7888  
  7889  	var consumers sync.Map
  7890  
  7891  	for i := 0; i < np; i++ {
  7892  		go func() {
  7893  			defer wg.Done()
  7894  
  7895  			// Make them all fire at once.
  7896  			<-startCh
  7897  
  7898  			if _, err = mset.addConsumer(cfg); err != nil {
  7899  				t.Logf("Consumer create got an error: %v", err)
  7900  				errCh <- err
  7901  			} else {
  7902  				consumers.Store(mset, true)
  7903  			}
  7904  		}()
  7905  	}
  7906  	time.Sleep(100 * time.Millisecond)
  7907  	close(startCh)
  7908  	wg.Wait()
  7909  
  7910  	// Check for no errors.
  7911  	if len(errCh) > 0 {
  7912  		t.Fatalf("Expected no errors, got %d", len(errCh))
  7913  	}
  7914  
  7915  	// Now make sure we really only created one stream.
  7916  	var numConsumers int
  7917  	consumers.Range(func(k, v any) bool {
  7918  		numConsumers++
  7919  		return true
  7920  	})
  7921  	if numConsumers > 1 {
  7922  		t.Fatalf("Expected only one consumer to be really created, got %d out of %d attempts", numConsumers, np)
  7923  	}
  7924  }
  7925  
  7926  func TestNoRaceRoutePool(t *testing.T) {
  7927  	var dur1 time.Duration
  7928  	var dur2 time.Duration
  7929  
  7930  	total := 1_000_000
  7931  
  7932  	for _, test := range []struct {
  7933  		name     string
  7934  		poolSize int
  7935  	}{
  7936  		{"no pooling", 0},
  7937  		{"pooling", 5},
  7938  	} {
  7939  		t.Run(test.name, func(t *testing.T) {
  7940  			tmpl := `
  7941  			port: -1
  7942  			accounts {
  7943  				A { users: [{user: "A", password: "A"}] }
  7944  				B { users: [{user: "B", password: "B"}] }
  7945  				C { users: [{user: "C", password: "C"}] }
  7946  				D { users: [{user: "D", password: "D"}] }
  7947  				E { users: [{user: "E", password: "E"}] }
  7948  			}
  7949  			cluster {
  7950  				port: -1
  7951  				name: "local"
  7952  				%s
  7953  				pool_size: %d
  7954  			}
  7955  		`
  7956  			conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, _EMPTY_, test.poolSize)))
  7957  			s1, o1 := RunServerWithConfig(conf1)
  7958  			defer s1.Shutdown()
  7959  
  7960  			conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl,
  7961  				fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port),
  7962  				test.poolSize)))
  7963  			s2, _ := RunServerWithConfig(conf2)
  7964  			defer s2.Shutdown()
  7965  
  7966  			checkClusterFormed(t, s1, s2)
  7967  
  7968  			wg := sync.WaitGroup{}
  7969  			wg.Add(5)
  7970  
  7971  			sendAndRecv := func(acc string) (*nats.Conn, *nats.Conn) {
  7972  				t.Helper()
  7973  
  7974  				s2nc := natsConnect(t, s2.ClientURL(), nats.UserInfo(acc, acc))
  7975  				count := 0
  7976  				natsSub(t, s2nc, "foo", func(_ *nats.Msg) {
  7977  					if count++; count == total {
  7978  						wg.Done()
  7979  					}
  7980  				})
  7981  				natsFlush(t, s2nc)
  7982  
  7983  				s1nc := natsConnect(t, s1.ClientURL(), nats.UserInfo(acc, acc))
  7984  
  7985  				checkSubInterest(t, s1, acc, "foo", time.Second)
  7986  				return s2nc, s1nc
  7987  			}
  7988  
  7989  			var rcv = [5]*nats.Conn{}
  7990  			var snd = [5]*nats.Conn{}
  7991  			accs := []string{"A", "B", "C", "D", "E"}
  7992  
  7993  			for i := 0; i < 5; i++ {
  7994  				rcv[i], snd[i] = sendAndRecv(accs[i])
  7995  				defer rcv[i].Close()
  7996  				defer snd[i].Close()
  7997  			}
  7998  
  7999  			payload := []byte("some message")
  8000  			start := time.Now()
  8001  			for i := 0; i < 5; i++ {
  8002  				go func(idx int) {
  8003  					for i := 0; i < total; i++ {
  8004  						snd[idx].Publish("foo", payload)
  8005  					}
  8006  				}(i)
  8007  			}
  8008  
  8009  			wg.Wait()
  8010  			dur := time.Since(start)
  8011  			if test.poolSize == 0 {
  8012  				dur1 = dur
  8013  			} else {
  8014  				dur2 = dur
  8015  			}
  8016  		})
  8017  	}
  8018  	perf1 := float64(total*5) / dur1.Seconds()
  8019  	t.Logf("No pooling: %.0f msgs/sec", perf1)
  8020  	perf2 := float64(total*5) / dur2.Seconds()
  8021  	t.Logf("Pooling   : %.0f msgs/sec", perf2)
  8022  	t.Logf("Gain      : %.2fx", perf2/perf1)
  8023  }
  8024  
  8025  func testNoRaceRoutePerAccount(t *testing.T, useWildCard bool) {
  8026  	var dur1 time.Duration
  8027  	var dur2 time.Duration
  8028  
  8029  	accounts := make([]string, 5)
  8030  	for i := 0; i < 5; i++ {
  8031  		akp, _ := nkeys.CreateAccount()
  8032  		pub, _ := akp.PublicKey()
  8033  		accounts[i] = pub
  8034  	}
  8035  	routeAccs := fmt.Sprintf("accounts: [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"]",
  8036  		accounts[0], accounts[1], accounts[2], accounts[3], accounts[4])
  8037  
  8038  	total := 1_000_000
  8039  
  8040  	for _, test := range []struct {
  8041  		name      string
  8042  		dedicated bool
  8043  	}{
  8044  		{"route for all accounts", false},
  8045  		{"route per account", true},
  8046  	} {
  8047  		t.Run(test.name, func(t *testing.T) {
  8048  			tmpl := `
  8049  			server_name: "%s"
  8050  			port: -1
  8051  			accounts {
  8052  				%s { users: [{user: "0", password: "0"}] }
  8053  				%s { users: [{user: "1", password: "1"}] }
  8054  				%s { users: [{user: "2", password: "2"}] }
  8055  				%s { users: [{user: "3", password: "3"}] }
  8056  				%s { users: [{user: "4", password: "4"}] }
  8057  			}
  8058  			cluster {
  8059  				port: -1
  8060  				name: "local"
  8061  				%s
  8062  				%s
  8063  			}
  8064  		`
  8065  			var racc string
  8066  			if test.dedicated {
  8067  				racc = routeAccs
  8068  			} else {
  8069  				racc = _EMPTY_
  8070  			}
  8071  			conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "A",
  8072  				accounts[0], accounts[1], accounts[2], accounts[3],
  8073  				accounts[4], _EMPTY_, racc)))
  8074  			s1, o1 := RunServerWithConfig(conf1)
  8075  			defer s1.Shutdown()
  8076  
  8077  			conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "B",
  8078  				accounts[0], accounts[1], accounts[2], accounts[3], accounts[4],
  8079  				fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port),
  8080  				racc)))
  8081  			s2, _ := RunServerWithConfig(conf2)
  8082  			defer s2.Shutdown()
  8083  
  8084  			checkClusterFormed(t, s1, s2)
  8085  
  8086  			wg := sync.WaitGroup{}
  8087  			wg.Add(5)
  8088  
  8089  			sendAndRecv := func(acc string, user string) (*nats.Conn, *nats.Conn) {
  8090  				t.Helper()
  8091  
  8092  				s2nc := natsConnect(t, s2.ClientURL(), nats.UserInfo(user, user))
  8093  				count := 0
  8094  				var subj string
  8095  				var checkSubj string
  8096  				if useWildCard {
  8097  					subj, checkSubj = "foo.*", "foo.0"
  8098  				} else {
  8099  					subj, checkSubj = "foo", "foo"
  8100  				}
  8101  				natsSub(t, s2nc, subj, func(_ *nats.Msg) {
  8102  					if count++; count == total {
  8103  						wg.Done()
  8104  					}
  8105  				})
  8106  				natsFlush(t, s2nc)
  8107  
  8108  				s1nc := natsConnect(t, s1.ClientURL(), nats.UserInfo(user, user))
  8109  
  8110  				checkSubInterest(t, s1, acc, checkSubj, time.Second)
  8111  				return s2nc, s1nc
  8112  			}
  8113  
  8114  			var rcv = [5]*nats.Conn{}
  8115  			var snd = [5]*nats.Conn{}
  8116  			users := []string{"0", "1", "2", "3", "4"}
  8117  
  8118  			for i := 0; i < 5; i++ {
  8119  				rcv[i], snd[i] = sendAndRecv(accounts[i], users[i])
  8120  				defer rcv[i].Close()
  8121  				defer snd[i].Close()
  8122  			}
  8123  
  8124  			payload := []byte("some message")
  8125  			start := time.Now()
  8126  			for i := 0; i < 5; i++ {
  8127  				go func(idx int) {
  8128  					for i := 0; i < total; i++ {
  8129  						var subj string
  8130  						if useWildCard {
  8131  							subj = fmt.Sprintf("foo.%d", i)
  8132  						} else {
  8133  							subj = "foo"
  8134  						}
  8135  						snd[idx].Publish(subj, payload)
  8136  					}
  8137  				}(i)
  8138  			}
  8139  
  8140  			wg.Wait()
  8141  			dur := time.Since(start)
  8142  			if !test.dedicated {
  8143  				dur1 = dur
  8144  			} else {
  8145  				dur2 = dur
  8146  			}
  8147  		})
  8148  	}
  8149  	perf1 := float64(total*5) / dur1.Seconds()
  8150  	t.Logf("Route for all accounts: %.0f msgs/sec", perf1)
  8151  	perf2 := float64(total*5) / dur2.Seconds()
  8152  	t.Logf("Route per account     : %.0f msgs/sec", perf2)
  8153  	t.Logf("Gain                  : %.2fx", perf2/perf1)
  8154  }
  8155  
  8156  func TestNoRaceRoutePerAccount(t *testing.T) {
  8157  	testNoRaceRoutePerAccount(t, false)
  8158  }
  8159  
  8160  func TestNoRaceRoutePerAccountSubWithWildcard(t *testing.T) {
  8161  	testNoRaceRoutePerAccount(t, true)
  8162  }
  8163  
  8164  // This test, which checks that messages are not duplicated when pooling or
  8165  // per-account routes are reloaded, would cause a DATA RACE that is not
  8166  // specific to the changes for pooling/per_account. For this reason, this
  8167  // test is located in the norace_test.go file.
  8168  func TestNoRaceRoutePoolAndPerAccountConfigReload(t *testing.T) {
  8169  	for _, test := range []struct {
  8170  		name           string
  8171  		poolSizeBefore string
  8172  		poolSizeAfter  string
  8173  		accountsBefore string
  8174  		accountsAfter  string
  8175  	}{
  8176  		{"from no pool to pool", _EMPTY_, "pool_size: 2", _EMPTY_, _EMPTY_},
  8177  		{"increase pool size", "pool_size: 2", "pool_size: 5", _EMPTY_, _EMPTY_},
  8178  		{"decrease pool size", "pool_size: 5", "pool_size: 2", _EMPTY_, _EMPTY_},
  8179  		{"from pool to no pool", "pool_size: 5", _EMPTY_, _EMPTY_, _EMPTY_},
  8180  		{"from no account to account", _EMPTY_, _EMPTY_, _EMPTY_, "accounts: [\"A\"]"},
  8181  		{"add account", _EMPTY_, _EMPTY_, "accounts: [\"B\"]", "accounts: [\"A\",\"B\"]"},
  8182  		{"remove account", _EMPTY_, _EMPTY_, "accounts: [\"A\",\"B\"]", "accounts: [\"B\"]"},
  8183  		{"from account to no account", _EMPTY_, _EMPTY_, "accounts: [\"A\"]", _EMPTY_},
  8184  		{"increase pool size and add account", "pool_size: 2", "pool_size: 3", "accounts: [\"B\"]", "accounts: [\"B\",\"A\"]"},
  8185  		{"decrease pool size and remove account", "pool_size: 3", "pool_size: 2", "accounts: [\"A\",\"B\"]", "accounts: [\"B\"]"},
  8186  	} {
  8187  		t.Run(test.name, func(t *testing.T) {
  8188  			tmplA := `
  8189  				port: -1
  8190  				server_name: "A"
  8191  				accounts {
  8192  					A { users: [{user: a, password: pwd}] }
  8193  					B { users: [{user: b, password: pwd}] }
  8194  				}
  8195  				cluster: {
  8196  					port: -1
  8197  					name: "local"
  8198  					%s
  8199  					%s
  8200  				}
  8201  			`
  8202  			confA := createConfFile(t, []byte(fmt.Sprintf(tmplA, test.poolSizeBefore, test.accountsBefore)))
  8203  			srva, optsA := RunServerWithConfig(confA)
  8204  			defer srva.Shutdown()
  8205  
  8206  			tmplB := `
  8207  				port: -1
  8208  				server_name: "B"
  8209  				accounts {
  8210  					A { users: [{user: a, password: pwd}] }
  8211  					B { users: [{user: b, password: pwd}] }
  8212  				}
  8213  				cluster: {
  8214  					port: -1
  8215  					name: "local"
  8216  					%s
  8217  					%s
  8218  					routes: ["nats://127.0.0.1:%d"]
  8219  				}
  8220  			`
  8221  			confB := createConfFile(t, []byte(fmt.Sprintf(tmplB, test.poolSizeBefore, test.accountsBefore, optsA.Cluster.Port)))
  8222  			srvb, _ := RunServerWithConfig(confB)
  8223  			defer srvb.Shutdown()
  8224  
  8225  			checkClusterFormed(t, srva, srvb)
  8226  
  8227  			ncA := natsConnect(t, srva.ClientURL(), nats.UserInfo("a", "pwd"))
  8228  			defer ncA.Close()
  8229  
  8230  			sub := natsSubSync(t, ncA, "foo")
  8231  			sub.SetPendingLimits(-1, -1)
  8232  			checkSubInterest(t, srvb, "A", "foo", time.Second)
  8233  
  8234  			ncB := natsConnect(t, srvb.ClientURL(), nats.UserInfo("a", "pwd"))
  8235  			defer ncB.Close()
  8236  
  8237  			wg := sync.WaitGroup{}
  8238  			wg.Add(1)
  8239  			ch := make(chan struct{})
  8240  			go func() {
  8241  				defer wg.Done()
  8242  
  8243  				for i := 0; ; i++ {
  8244  					ncB.Publish("foo", []byte(fmt.Sprintf("%d", i)))
  8245  					select {
  8246  					case <-ch:
  8247  						return
  8248  					default:
  8249  					}
  8250  					if i%300 == 0 {
  8251  						time.Sleep(time.Duration(rand.Intn(5)) * time.Millisecond)
  8252  					}
  8253  				}
  8254  			}()
  8255  
  8256  			var l *captureErrorLogger
  8257  			if test.accountsBefore != _EMPTY_ && test.accountsAfter == _EMPTY_ {
  8258  				l = &captureErrorLogger{errCh: make(chan string, 100)}
  8259  				srva.SetLogger(l, false, false)
  8260  			}
  8261  
  8262  			time.Sleep(250 * time.Millisecond)
  8263  			reloadUpdateConfig(t, srva, confA, fmt.Sprintf(tmplA, test.poolSizeAfter, test.accountsAfter))
  8264  			time.Sleep(125 * time.Millisecond)
  8265  			reloadUpdateConfig(t, srvb, confB, fmt.Sprintf(tmplB, test.poolSizeAfter, test.accountsAfter, optsA.Cluster.Port))
  8266  
  8267  			checkClusterFormed(t, srva, srvb)
  8268  			checkSubInterest(t, srvb, "A", "foo", time.Second)
  8269  
  8270  			if l != nil {
  8271  				// Errors regarding "No route for account" should stop
  8272  				var ok bool
  8273  				for numErrs := 0; !ok && numErrs < 10; {
  8274  					select {
  8275  					case e := <-l.errCh:
  8276  						if strings.Contains(e, "No route for account") {
  8277  							numErrs++
  8278  						}
  8279  					case <-time.After(DEFAULT_ROUTE_RECONNECT + 250*time.Millisecond):
  8280  						ok = true
  8281  					}
  8282  				}
  8283  				if !ok {
  8284  					t.Fatalf("Still report of no route for account")
  8285  				}
  8286  			}
  8287  
  8288  			close(ch)
  8289  			wg.Wait()
  8290  
  8291  			for prev := -1; ; {
  8292  				msg, err := sub.NextMsg(50 * time.Millisecond)
  8293  				if err != nil {
  8294  					break
  8295  				}
  8296  				cur, _ := strconv.Atoi(string(msg.Data))
  8297  				if cur <= prev {
  8298  					t.Fatalf("Previous was %d, got %d", prev, cur)
  8299  				}
  8300  				prev = cur
  8301  			}
  8302  		})
  8303  	}
  8304  }
  8305  
  8306  // This test ensures that outbound queues don't cause a run on
  8307  // memory when sending something to lots of clients.
  8308  func TestNoRaceClientOutboundQueueMemory(t *testing.T) {
  8309  	opts := DefaultOptions()
  8310  	s := RunServer(opts)
  8311  	defer s.Shutdown()
  8312  
  8313  	var before runtime.MemStats
  8314  	var after runtime.MemStats
  8315  
  8316  	var err error
  8317  	clients := make([]*nats.Conn, 50000)
  8318  	wait := &sync.WaitGroup{}
  8319  	wait.Add(len(clients))
  8320  
  8321  	for i := 0; i < len(clients); i++ {
  8322  		clients[i], err = nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port), nats.InProcessServer(s))
  8323  		if err != nil {
  8324  			t.Fatalf("Error on connect: %v", err)
  8325  		}
  8326  		defer clients[i].Close()
  8327  
  8328  		clients[i].Subscribe("test", func(m *nats.Msg) {
  8329  			wait.Done()
  8330  		})
  8331  	}
  8332  
  8333  	runtime.GC()
  8334  	runtime.ReadMemStats(&before)
  8335  
  8336  	nc, err := nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port), nats.InProcessServer(s))
  8337  	if err != nil {
  8338  		t.Fatalf("Error on connect: %v", err)
  8339  	}
  8340  	defer nc.Close()
  8341  
  8342  	var m [48000]byte
  8343  	if err = nc.Publish("test", m[:]); err != nil {
  8344  		t.Fatal(err)
  8345  	}
  8346  
  8347  	wait.Wait()
  8348  
  8349  	runtime.GC()
  8350  	runtime.ReadMemStats(&after)
  8351  
  8352  	hb, ha := float64(before.HeapAlloc), float64(after.HeapAlloc)
  8353  	ms := float64(len(m))
  8354  	diff := float64(ha) - float64(hb)
  8355  	inc := (diff / float64(hb)) * 100
  8356  
  8357  	if inc > 10 {
  8358  		t.Logf("Message size:       %.1fKB\n", ms/1024)
  8359  		t.Logf("Subscribed clients: %d\n", len(clients))
  8360  		t.Logf("Heap allocs before: %.1fMB\n", hb/1024/1024)
  8361  		t.Logf("Heap allocs after:  %.1fMB\n", ha/1024/1024)
  8362  		t.Logf("Heap allocs delta:  %.1f%%\n", inc)
  8363  
  8364  		t.Fatalf("memory increase was %.1f%% (should be <= 10%%)", inc)
  8365  	}
  8366  }
  8367  
  8368  func TestNoRaceJetStreamClusterLeafnodeConnectPerf(t *testing.T) {
  8369  	// Uncomment to run. Needs to be on a big machine. Do not want as part of Travis tests atm.
  8370  	skip(t)
  8371  
  8372  	tmpl := strings.Replace(jsClusterAccountsTempl, "store_dir:", "domain: cloud, store_dir:", 1)
  8373  	c := createJetStreamCluster(t, tmpl, "CLOUD", _EMPTY_, 3, 18033, true)
  8374  	defer c.shutdown()
  8375  
  8376  	nc, js := jsClientConnect(t, c.randomServer())
  8377  	defer nc.Close()
  8378  
  8379  	_, err := js.AddStream(&nats.StreamConfig{
  8380  		Name:     "STATE",
  8381  		Subjects: []string{"STATE.GLOBAL.CELL1.*.>"},
  8382  		Replicas: 3,
  8383  	})
  8384  	require_NoError(t, err)
  8385  
  8386  	tmpl = strings.Replace(jsClusterTemplWithSingleFleetLeafNode, "store_dir:", "domain: vehicle, store_dir:", 1)
  8387  
  8388  	var vinSerial int
  8389  	genVIN := func() string {
  8390  		vinSerial++
  8391  		return fmt.Sprintf("7PDSGAALXNN%06d", vinSerial)
  8392  	}
  8393  
  8394  	numVehicles := 500
  8395  	for i := 0; i < numVehicles; i++ {
  8396  		start := time.Now()
  8397  		vin := genVIN()
  8398  		ln := c.createLeafNodeWithTemplateNoSystemWithProto(vin, tmpl, "ws")
  8399  		nc, js := jsClientConnect(t, ln)
  8400  		_, err := js.AddStream(&nats.StreamConfig{
  8401  			Name:     "VEHICLE",
  8402  			Subjects: []string{"STATE.GLOBAL.LOCAL.>"},
  8403  			Sources: []*nats.StreamSource{{
  8404  				Name:          "STATE",
  8405  				FilterSubject: fmt.Sprintf("STATE.GLOBAL.CELL1.%s.>", vin),
  8406  				External: &nats.ExternalStream{
  8407  					APIPrefix:     "$JS.cloud.API",
  8408  					DeliverPrefix: fmt.Sprintf("DELIVER.STATE.GLOBAL.CELL1.%s", vin),
  8409  				},
  8410  			}},
  8411  		})
  8412  		require_NoError(t, err)
  8413  		// Create the sourced stream.
  8414  		checkLeafNodeConnectedCount(t, ln, 1)
  8415  		if elapsed := time.Since(start); elapsed > 2*time.Second {
  8416  			t.Fatalf("Took too long to create leafnode %d connection: %v", i+1, elapsed)
  8417  		}
  8418  		nc.Close()
  8419  	}
  8420  }
  8421  
  8422  func TestNoRaceJetStreamClusterDifferentRTTInterestBasedStreamPreAck(t *testing.T) {
  8423  	tmpl := `
  8424  	listen: 127.0.0.1:-1
  8425  	server_name: %s
  8426  	jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  8427  
  8428  	cluster {
  8429  		name: "F3"
  8430  		listen: 127.0.0.1:%d
  8431  		routes = [%s]
  8432  	}
  8433  
  8434  	accounts {
  8435  		$SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] }
  8436  	}
  8437  	`
  8438  
  8439  	//  Route Ports
  8440  	//	"S1": 14622,
  8441  	//	"S2": 15622,
  8442  	//	"S3": 16622,
  8443  
  8444  	// S2 (stream leader) will have a slow path to S1 (via proxy) and S3 (consumer leader) will have a fast path.
  8445  
  8446  	// Do these in order, S1, S2 (proxy) then S3.
  8447  	c := &cluster{t: t, servers: make([]*Server, 3), opts: make([]*Options, 3), name: "F3"}
  8448  
  8449  	// S1
  8450  	conf := fmt.Sprintf(tmpl, "S1", t.TempDir(), 14622, "route://127.0.0.1:15622, route://127.0.0.1:16622")
  8451  	c.servers[0], c.opts[0] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  8452  
  8453  	// S2
  8454  	// Create the proxy first. Connect this to S1. Make it slow, e.g. 5ms RTT.
  8455  	np := createNetProxy(1*time.Millisecond, 1024*1024*1024, 1024*1024*1024, "route://127.0.0.1:14622", true)
  8456  	routes := fmt.Sprintf("%s, route://127.0.0.1:16622", np.routeURL())
  8457  	conf = fmt.Sprintf(tmpl, "S2", t.TempDir(), 15622, routes)
  8458  	c.servers[1], c.opts[1] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  8459  
  8460  	// S3
  8461  	conf = fmt.Sprintf(tmpl, "S3", t.TempDir(), 16622, "route://127.0.0.1:14622, route://127.0.0.1:15622")
  8462  	c.servers[2], c.opts[2] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  8463  
  8464  	c.checkClusterFormed()
  8465  	c.waitOnClusterReady()
  8466  	defer c.shutdown()
  8467  	defer np.stop()
  8468  
  8469  	nc, js := jsClientConnect(t, c.randomServer())
  8470  	defer nc.Close()
  8471  
  8472  	// Now create the stream.
  8473  	_, err := js.AddStream(&nats.StreamConfig{
  8474  		Name:      "EVENTS",
  8475  		Subjects:  []string{"EV.>"},
  8476  		Replicas:  3,
  8477  		Retention: nats.InterestPolicy,
  8478  	})
  8479  	require_NoError(t, err)
  8480  
  8481  	// Make sure it's leader is on S2.
  8482  	sl := c.servers[1]
  8483  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  8484  		c.waitOnStreamLeader(globalAccountName, "EVENTS")
  8485  		if s := c.streamLeader(globalAccountName, "EVENTS"); s != sl {
  8486  			s.JetStreamStepdownStream(globalAccountName, "EVENTS")
  8487  			return fmt.Errorf("Server %s is not stream leader yet", sl)
  8488  		}
  8489  		return nil
  8490  	})
  8491  
  8492  	// Now create the consumer.
  8493  	_, err = js.AddConsumer("EVENTS", &nats.ConsumerConfig{
  8494  		Durable:        "C",
  8495  		AckPolicy:      nats.AckExplicitPolicy,
  8496  		DeliverSubject: "dx",
  8497  	})
  8498  	require_NoError(t, err)
  8499  
  8500  	// Make sure the consumer leader is on S3.
  8501  	cl := c.servers[2]
  8502  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  8503  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "C")
  8504  		if s := c.consumerLeader(globalAccountName, "EVENTS", "C"); s != cl {
  8505  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "C")
  8506  			return fmt.Errorf("Server %s is not consumer leader yet", sl)
  8507  		}
  8508  		return nil
  8509  	})
  8510  
  8511  	// Create the real consumer on the consumer leader to make it efficient.
  8512  	nc, js = jsClientConnect(t, cl)
  8513  	defer nc.Close()
  8514  
  8515  	_, err = js.Subscribe(_EMPTY_, func(msg *nats.Msg) {
  8516  		msg.Ack()
  8517  	}, nats.BindStream("EVENTS"), nats.Durable("C"), nats.ManualAck())
  8518  	require_NoError(t, err)
  8519  
  8520  	for i := 0; i < 1_000; i++ {
  8521  		_, err := js.PublishAsync("EVENTS.PAID", []byte("ok"))
  8522  		require_NoError(t, err)
  8523  	}
  8524  	select {
  8525  	case <-js.PublishAsyncComplete():
  8526  	case <-time.After(5 * time.Second):
  8527  		t.Fatalf("Did not receive completion signal")
  8528  	}
  8529  
  8530  	slow := c.servers[0]
  8531  	mset, err := slow.GlobalAccount().lookupStream("EVENTS")
  8532  	require_NoError(t, err)
  8533  
  8534  	// Make sure preAck is non-nil, so we know the logic has kicked in.
  8535  	mset.mu.RLock()
  8536  	preAcks := mset.preAcks
  8537  	mset.mu.RUnlock()
  8538  	require_NotNil(t, preAcks)
  8539  
  8540  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  8541  		state := mset.state()
  8542  		if state.Msgs == 0 {
  8543  			mset.mu.RLock()
  8544  			lp := len(mset.preAcks)
  8545  			mset.mu.RUnlock()
  8546  			if lp == 0 {
  8547  				return nil
  8548  			} else {
  8549  				t.Fatalf("Expected no preAcks with no msgs, but got %d", lp)
  8550  			}
  8551  		}
  8552  		return fmt.Errorf("Still have %d msgs left", state.Msgs)
  8553  	})
  8554  
  8555  }
  8556  
  8557  func TestNoRaceCheckAckFloorWithVeryLargeFirstSeqAndNewConsumers(t *testing.T) {
  8558  	s := RunBasicJetStreamServer(t)
  8559  	defer s.Shutdown()
  8560  
  8561  	nc, _ := jsClientConnect(t, s)
  8562  	defer nc.Close()
  8563  
  8564  	// Make sure to time bound here for the acksync call below.
  8565  	js, err := nc.JetStream(nats.MaxWait(200 * time.Millisecond))
  8566  	require_NoError(t, err)
  8567  
  8568  	_, err = js.AddStream(&nats.StreamConfig{
  8569  		Name:      "TEST",
  8570  		Subjects:  []string{"wq-req"},
  8571  		Retention: nats.WorkQueuePolicy,
  8572  	})
  8573  	require_NoError(t, err)
  8574  
  8575  	largeFirstSeq := uint64(1_200_000_000)
  8576  	err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Sequence: largeFirstSeq})
  8577  	require_NoError(t, err)
  8578  	si, err := js.StreamInfo("TEST")
  8579  	require_NoError(t, err)
  8580  	require_True(t, si.State.FirstSeq == largeFirstSeq)
  8581  
  8582  	// Add a simple request to the stream.
  8583  	sendStreamMsg(t, nc, "wq-req", "HELP")
  8584  
  8585  	sub, err := js.PullSubscribe("wq-req", "dlc")
  8586  	require_NoError(t, err)
  8587  
  8588  	msgs, err := sub.Fetch(1)
  8589  	require_NoError(t, err)
  8590  	require_True(t, len(msgs) == 1)
  8591  
  8592  	// The bug is around the checkAckFloor walking the sequences from current ackfloor
  8593  	// to the first sequence of the stream. We time bound the max wait with the js context
  8594  	// to 200ms. Since checkAckFloor is spinning and holding up processing of acks this will fail.
  8595  	// We will short circuit new consumers to fix this one.
  8596  	require_NoError(t, msgs[0].AckSync())
  8597  
  8598  	// Now do again so we move past the new consumer with no ack floor situation.
  8599  	err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Sequence: 2 * largeFirstSeq})
  8600  	require_NoError(t, err)
  8601  	si, err = js.StreamInfo("TEST")
  8602  	require_NoError(t, err)
  8603  	require_True(t, si.State.FirstSeq == 2*largeFirstSeq)
  8604  
  8605  	sendStreamMsg(t, nc, "wq-req", "MORE HELP")
  8606  
  8607  	// We check this one directly for this use case.
  8608  	mset, err := s.GlobalAccount().lookupStream("TEST")
  8609  	require_NoError(t, err)
  8610  	o := mset.lookupConsumer("dlc")
  8611  	require_True(t, o != nil)
  8612  
  8613  	// Purge will move the stream floor by default, so force into the situation where it is back to largeFirstSeq.
  8614  	// This will not trigger the new consumer logic, but will trigger a walk of the sequence space.
  8615  	// Fix will be to walk the lesser of the two linear spaces.
  8616  	o.mu.Lock()
  8617  	o.asflr = largeFirstSeq
  8618  	o.mu.Unlock()
  8619  
  8620  	done := make(chan bool)
  8621  	go func() {
  8622  		o.checkAckFloor()
  8623  		done <- true
  8624  	}()
  8625  
  8626  	select {
  8627  	case <-done:
  8628  		return
  8629  	case <-time.After(time.Second):
  8630  		t.Fatalf("Check ack floor taking too long!")
  8631  	}
  8632  }
  8633  
  8634  func TestNoRaceReplicatedMirrorWithLargeStartingSequenceOverLeafnode(t *testing.T) {
  8635  	// Cluster B
  8636  	tmpl := strings.Replace(jsClusterTempl, "store_dir:", "domain: B, store_dir:", 1)
  8637  	c := createJetStreamCluster(t, tmpl, "B", _EMPTY_, 3, 22020, true)
  8638  	defer c.shutdown()
  8639  
  8640  	// Cluster A
  8641  	// Domain is "A'
  8642  	lc := c.createLeafNodesWithStartPortAndDomain("A", 3, 22110, "A")
  8643  	defer lc.shutdown()
  8644  
  8645  	lc.waitOnClusterReady()
  8646  
  8647  	// Create a stream on B (HUB/CLOUD) and set its starting sequence very high.
  8648  	nc, js := jsClientConnect(t, c.randomServer())
  8649  	defer nc.Close()
  8650  
  8651  	_, err := js.AddStream(&nats.StreamConfig{
  8652  		Name:     "TEST",
  8653  		Subjects: []string{"foo"},
  8654  		Replicas: 3,
  8655  	})
  8656  	require_NoError(t, err)
  8657  
  8658  	err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Sequence: 1_000_000_000})
  8659  	require_NoError(t, err)
  8660  
  8661  	// Send in a small amount of messages.
  8662  	for i := 0; i < 1000; i++ {
  8663  		sendStreamMsg(t, nc, "foo", "Hello")
  8664  	}
  8665  
  8666  	si, err := js.StreamInfo("TEST")
  8667  	require_NoError(t, err)
  8668  	require_True(t, si.State.FirstSeq == 1_000_000_000)
  8669  
  8670  	// Now try to create a replicated mirror on the leaf cluster.
  8671  	lnc, ljs := jsClientConnect(t, lc.randomServer())
  8672  	defer lnc.Close()
  8673  
  8674  	_, err = ljs.AddStream(&nats.StreamConfig{
  8675  		Name: "TEST",
  8676  		Mirror: &nats.StreamSource{
  8677  			Name:   "TEST",
  8678  			Domain: "B",
  8679  		},
  8680  	})
  8681  	require_NoError(t, err)
  8682  
  8683  	// Make sure we sync quickly.
  8684  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  8685  		si, err = ljs.StreamInfo("TEST")
  8686  		require_NoError(t, err)
  8687  		if si.State.Msgs == 1000 && si.State.FirstSeq == 1_000_000_000 {
  8688  			return nil
  8689  		}
  8690  		return fmt.Errorf("Mirror state not correct: %+v", si.State)
  8691  	})
  8692  }
  8693  
  8694  func TestNoRaceBinaryStreamSnapshotEncodingBasic(t *testing.T) {
  8695  	s := RunBasicJetStreamServer(t)
  8696  	defer s.Shutdown()
  8697  
  8698  	nc, js := jsClientConnect(t, s)
  8699  	defer nc.Close()
  8700  
  8701  	_, err := js.AddStream(&nats.StreamConfig{
  8702  		Name:              "TEST",
  8703  		Subjects:          []string{"*"},
  8704  		MaxMsgsPerSubject: 1,
  8705  	})
  8706  	require_NoError(t, err)
  8707  
  8708  	// Set first key
  8709  	sendStreamMsg(t, nc, "key:1", "hello")
  8710  
  8711  	// Set Second key but keep updating it, causing a laggard pattern.
  8712  	value := bytes.Repeat([]byte("Z"), 8*1024)
  8713  
  8714  	for i := 0; i <= 1000; i++ {
  8715  		_, err := js.PublishAsync("key:2", value)
  8716  		require_NoError(t, err)
  8717  	}
  8718  	select {
  8719  	case <-js.PublishAsyncComplete():
  8720  	case <-time.After(5 * time.Second):
  8721  		t.Fatalf("Did not receive completion signal")
  8722  	}
  8723  
  8724  	// Now do more of swiss cheese style.
  8725  	for i := 3; i <= 1000; i++ {
  8726  		key := fmt.Sprintf("key:%d", i)
  8727  		_, err := js.PublishAsync(key, value)
  8728  		require_NoError(t, err)
  8729  		// Send it twice to create hole right behind it, like swiss cheese.
  8730  		_, err = js.PublishAsync(key, value)
  8731  		require_NoError(t, err)
  8732  	}
  8733  	select {
  8734  	case <-js.PublishAsyncComplete():
  8735  	case <-time.After(5 * time.Second):
  8736  		t.Fatalf("Did not receive completion signal")
  8737  	}
  8738  
  8739  	// Make for round numbers for stream state.
  8740  	sendStreamMsg(t, nc, "key:2", "hello")
  8741  	sendStreamMsg(t, nc, "key:2", "world")
  8742  
  8743  	si, err := js.StreamInfo("TEST")
  8744  	require_NoError(t, err)
  8745  	require_True(t, si.State.FirstSeq == 1)
  8746  	require_True(t, si.State.LastSeq == 3000)
  8747  	require_True(t, si.State.Msgs == 1000)
  8748  	require_True(t, si.State.NumDeleted == 2000)
  8749  
  8750  	mset, err := s.GlobalAccount().lookupStream("TEST")
  8751  	require_NoError(t, err)
  8752  
  8753  	snap, err := mset.store.EncodedStreamState(0)
  8754  	require_NoError(t, err)
  8755  
  8756  	// Now decode the snapshot.
  8757  	ss, err := DecodeStreamState(snap)
  8758  	require_NoError(t, err)
  8759  
  8760  	require_Equal(t, ss.FirstSeq, 1)
  8761  	require_Equal(t, ss.LastSeq, 3000)
  8762  	require_Equal(t, ss.Msgs, 1000)
  8763  	require_Equal(t, ss.Deleted.NumDeleted(), 2000)
  8764  }
  8765  
  8766  func TestNoRaceFilestoreBinaryStreamSnapshotEncodingLargeGaps(t *testing.T) {
  8767  	storeDir := t.TempDir()
  8768  	fcfg := FileStoreConfig{
  8769  		StoreDir:  storeDir,
  8770  		BlockSize: 512, // Small on purpose to create alot of blks.
  8771  	}
  8772  	fs, err := newFileStore(fcfg, StreamConfig{Name: "zzz", Subjects: []string{"zzz"}, Storage: FileStorage})
  8773  	require_NoError(t, err)
  8774  	defer fs.Stop()
  8775  
  8776  	subj, msg := "zzz", bytes.Repeat([]byte("X"), 128)
  8777  	numMsgs := 20_000
  8778  
  8779  	fs.StoreMsg(subj, nil, msg)
  8780  	for i := 2; i < numMsgs; i++ {
  8781  		seq, _, err := fs.StoreMsg(subj, nil, nil)
  8782  		require_NoError(t, err)
  8783  		fs.RemoveMsg(seq)
  8784  	}
  8785  	fs.StoreMsg(subj, nil, msg)
  8786  
  8787  	snap, err := fs.EncodedStreamState(0)
  8788  	require_NoError(t, err)
  8789  	require_True(t, len(snap) < 512)
  8790  
  8791  	// Now decode the snapshot.
  8792  	ss, err := DecodeStreamState(snap)
  8793  	require_NoError(t, err)
  8794  
  8795  	require_True(t, ss.FirstSeq == 1)
  8796  	require_True(t, ss.LastSeq == 20_000)
  8797  	require_True(t, ss.Msgs == 2)
  8798  	require_True(t, len(ss.Deleted) <= 2)
  8799  	require_True(t, ss.Deleted.NumDeleted() == 19_998)
  8800  }
  8801  
  8802  func TestNoRaceJetStreamClusterStreamSnapshotCatchup(t *testing.T) {
  8803  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  8804  	defer c.shutdown()
  8805  
  8806  	// Client based API
  8807  	nc, js := jsClientConnect(t, c.randomServer())
  8808  	defer nc.Close()
  8809  
  8810  	_, err := js.AddStream(&nats.StreamConfig{
  8811  		Name:              "TEST",
  8812  		Subjects:          []string{"*"},
  8813  		MaxMsgsPerSubject: 1,
  8814  		Replicas:          3,
  8815  	})
  8816  	require_NoError(t, err)
  8817  
  8818  	msg := []byte("Hello World")
  8819  	_, err = js.Publish("foo", msg)
  8820  	require_NoError(t, err)
  8821  
  8822  	for i := 1; i < 1000; i++ {
  8823  		_, err := js.PublishAsync("bar", msg)
  8824  		require_NoError(t, err)
  8825  	}
  8826  	select {
  8827  	case <-js.PublishAsyncComplete():
  8828  	case <-time.After(5 * time.Second):
  8829  		t.Fatalf("Did not receive completion signal")
  8830  	}
  8831  
  8832  	sr := c.randomNonStreamLeader(globalAccountName, "TEST")
  8833  	sr.Shutdown()
  8834  
  8835  	// In case we were connected to sr.
  8836  	nc, js = jsClientConnect(t, c.randomServer())
  8837  	defer nc.Close()
  8838  
  8839  	// Now create a large gap.
  8840  	for i := 0; i < 50_000; i++ {
  8841  		_, err := js.PublishAsync("bar", msg)
  8842  		require_NoError(t, err)
  8843  	}
  8844  	select {
  8845  	case <-js.PublishAsyncComplete():
  8846  	case <-time.After(10 * time.Second):
  8847  		t.Fatalf("Did not receive completion signal")
  8848  	}
  8849  
  8850  	sl := c.streamLeader(globalAccountName, "TEST")
  8851  	sl.JetStreamSnapshotStream(globalAccountName, "TEST")
  8852  
  8853  	sr = c.restartServer(sr)
  8854  	c.checkClusterFormed()
  8855  	c.waitOnServerCurrent(sr)
  8856  	c.waitOnStreamCurrent(sr, globalAccountName, "TEST")
  8857  
  8858  	mset, err := sr.GlobalAccount().lookupStream("TEST")
  8859  	require_NoError(t, err)
  8860  
  8861  	// Make sure it's caught up
  8862  	var state StreamState
  8863  	mset.store.FastState(&state)
  8864  	require_Equal(t, state.Msgs, 2)
  8865  	require_Equal(t, state.FirstSeq, 1)
  8866  	require_Equal(t, state.LastSeq, 51_000)
  8867  	require_Equal(t, state.NumDeleted, 51_000-2)
  8868  
  8869  	sr.Shutdown()
  8870  
  8871  	_, err = js.Publish("baz", msg)
  8872  	require_NoError(t, err)
  8873  
  8874  	sl.JetStreamSnapshotStream(globalAccountName, "TEST")
  8875  
  8876  	sr = c.restartServer(sr)
  8877  	c.checkClusterFormed()
  8878  	c.waitOnServerCurrent(sr)
  8879  	c.waitOnStreamCurrent(sr, globalAccountName, "TEST")
  8880  
  8881  	mset, err = sr.GlobalAccount().lookupStream("TEST")
  8882  	require_NoError(t, err)
  8883  	mset.store.FastState(&state)
  8884  
  8885  	require_Equal(t, state.Msgs, 3)
  8886  	require_Equal(t, state.FirstSeq, 1)
  8887  	require_Equal(t, state.LastSeq, 51_001)
  8888  	require_Equal(t, state.NumDeleted, 51_001-3)
  8889  }
  8890  
  8891  func TestNoRaceStoreStreamEncoderDecoder(t *testing.T) {
  8892  	cfg := &StreamConfig{
  8893  		Name:       "zzz",
  8894  		Subjects:   []string{"*"},
  8895  		MaxMsgsPer: 1,
  8896  		Storage:    MemoryStorage,
  8897  	}
  8898  	ms, err := newMemStore(cfg)
  8899  	require_NoError(t, err)
  8900  
  8901  	fs, err := newFileStore(
  8902  		FileStoreConfig{StoreDir: t.TempDir()},
  8903  		StreamConfig{Name: "zzz", Subjects: []string{"*"}, MaxMsgsPer: 1, Storage: FileStorage},
  8904  	)
  8905  	require_NoError(t, err)
  8906  	defer fs.Stop()
  8907  
  8908  	const seed = 2222222
  8909  	msg := bytes.Repeat([]byte("ABC"), 33) // ~100bytes
  8910  
  8911  	maxEncodeTime := 2 * time.Second
  8912  	maxEncodeSize := 700 * 1024
  8913  
  8914  	test := func(t *testing.T, gs StreamStore) {
  8915  		t.Parallel()
  8916  		prand := rand.New(rand.NewSource(seed))
  8917  		tick := time.NewTicker(time.Second)
  8918  		defer tick.Stop()
  8919  		done := time.NewTimer(10 * time.Second)
  8920  
  8921  		for running := true; running; {
  8922  			select {
  8923  			case <-tick.C:
  8924  				var state StreamState
  8925  				gs.FastState(&state)
  8926  				if state.NumDeleted == 0 {
  8927  					continue
  8928  				}
  8929  				start := time.Now()
  8930  				snap, err := gs.EncodedStreamState(0)
  8931  				require_NoError(t, err)
  8932  				elapsed := time.Since(start)
  8933  				// Should take <1ms without race but if CI/CD is slow we will give it a bit of room.
  8934  				if elapsed > maxEncodeTime {
  8935  					t.Logf("Encode took longer then expected: %v", elapsed)
  8936  				}
  8937  				if len(snap) > maxEncodeSize {
  8938  					t.Fatalf("Expected snapshot size < %v got %v", friendlyBytes(maxEncodeSize), friendlyBytes(len(snap)))
  8939  				}
  8940  				ss, err := DecodeStreamState(snap)
  8941  				require_True(t, len(ss.Deleted) > 0)
  8942  				require_NoError(t, err)
  8943  			case <-done.C:
  8944  				running = false
  8945  			default:
  8946  				key := strconv.Itoa(prand.Intn(256_000))
  8947  				gs.StoreMsg(key, nil, msg)
  8948  			}
  8949  		}
  8950  	}
  8951  
  8952  	for _, gs := range []StreamStore{ms, fs} {
  8953  		switch gs.(type) {
  8954  		case *memStore:
  8955  			t.Run("MemStore", func(t *testing.T) {
  8956  				test(t, gs)
  8957  			})
  8958  		case *fileStore:
  8959  			t.Run("FileStore", func(t *testing.T) {
  8960  				test(t, gs)
  8961  			})
  8962  		}
  8963  	}
  8964  }
  8965  
  8966  func TestNoRaceJetStreamClusterKVWithServerKill(t *testing.T) {
  8967  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  8968  	defer c.shutdown()
  8969  
  8970  	// Setup the KV bucket and use for making assertions.
  8971  	nc, js := jsClientConnect(t, c.randomServer())
  8972  	defer nc.Close()
  8973  	_, err := js.CreateKeyValue(&nats.KeyValueConfig{
  8974  		Bucket:   "TEST",
  8975  		Replicas: 3,
  8976  		History:  10,
  8977  	})
  8978  	require_NoError(t, err)
  8979  
  8980  	// Total number of keys to range over.
  8981  	numKeys := 50
  8982  
  8983  	// ID is the server id to explicitly connect to.
  8984  	work := func(ctx context.Context, wg *sync.WaitGroup, id int) {
  8985  		defer wg.Done()
  8986  
  8987  		nc, js := jsClientConnect(t, c.servers[id])
  8988  		defer nc.Close()
  8989  
  8990  		kv, err := js.KeyValue("TEST")
  8991  		require_NoError(t, err)
  8992  
  8993  		// 100 messages a second for each single client.
  8994  		tk := time.NewTicker(10 * time.Millisecond)
  8995  		defer tk.Stop()
  8996  
  8997  		for {
  8998  			select {
  8999  			case <-ctx.Done():
  9000  				return
  9001  
  9002  			case <-tk.C:
  9003  				// Pick a random key within the range.
  9004  				k := fmt.Sprintf("key.%d", rand.Intn(numKeys))
  9005  				// Attempt to get a key.
  9006  				e, err := kv.Get(k)
  9007  				// If found, attempt to update or delete.
  9008  				if err == nil {
  9009  					if rand.Intn(10) < 3 {
  9010  						kv.Delete(k, nats.LastRevision(e.Revision()))
  9011  					} else {
  9012  						kv.Update(k, nil, e.Revision())
  9013  					}
  9014  				} else if errors.Is(err, nats.ErrKeyNotFound) {
  9015  					kv.Create(k, nil)
  9016  				}
  9017  			}
  9018  		}
  9019  	}
  9020  
  9021  	ctx, cancel := context.WithCancel(context.Background())
  9022  	defer cancel()
  9023  
  9024  	var wg sync.WaitGroup
  9025  	wg.Add(3)
  9026  
  9027  	go work(ctx, &wg, 0)
  9028  	go work(ctx, &wg, 1)
  9029  	go work(ctx, &wg, 2)
  9030  
  9031  	time.Sleep(time.Second)
  9032  
  9033  	// Simulate server stop and restart.
  9034  	for i := 0; i < 10; i++ {
  9035  		s := c.randomServer()
  9036  		s.Shutdown()
  9037  		c.waitOnLeader()
  9038  		c.waitOnStreamLeader(globalAccountName, "KV_TEST")
  9039  
  9040  		// Wait for a bit and then start the server again.
  9041  		time.Sleep(time.Duration(rand.Intn(1500)) * time.Millisecond)
  9042  		s = c.restartServer(s)
  9043  		c.waitOnServerCurrent(s)
  9044  		c.waitOnLeader()
  9045  		c.waitOnStreamLeader(globalAccountName, "KV_TEST")
  9046  		c.waitOnPeerCount(3)
  9047  	}
  9048  
  9049  	// Stop the workload.
  9050  	cancel()
  9051  	wg.Wait()
  9052  
  9053  	type fullState struct {
  9054  		state StreamState
  9055  		lseq  uint64
  9056  		clfs  uint64
  9057  	}
  9058  
  9059  	grabState := func(mset *stream) *fullState {
  9060  		mset.mu.RLock()
  9061  		defer mset.mu.RUnlock()
  9062  		var state StreamState
  9063  		mset.store.FastState(&state)
  9064  		return &fullState{state, mset.lseq, mset.clfs}
  9065  	}
  9066  
  9067  	grabStore := func(mset *stream) map[string][]uint64 {
  9068  		mset.mu.RLock()
  9069  		store := mset.store
  9070  		mset.mu.RUnlock()
  9071  		var state StreamState
  9072  		store.FastState(&state)
  9073  		storeMap := make(map[string][]uint64)
  9074  		for seq := state.FirstSeq; seq <= state.LastSeq; seq++ {
  9075  			if sm, err := store.LoadMsg(seq, nil); err == nil {
  9076  				storeMap[sm.subj] = append(storeMap[sm.subj], sm.seq)
  9077  			}
  9078  		}
  9079  		return storeMap
  9080  	}
  9081  
  9082  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  9083  		// Current stream leader.
  9084  		sl := c.streamLeader(globalAccountName, "KV_TEST")
  9085  		mset, err := sl.GlobalAccount().lookupStream("KV_TEST")
  9086  		require_NoError(t, err)
  9087  		lstate := grabState(mset)
  9088  		golden := grabStore(mset)
  9089  
  9090  		// Report messages per server.
  9091  		for _, s := range c.servers {
  9092  			if s == sl {
  9093  				continue
  9094  			}
  9095  			mset, err := s.GlobalAccount().lookupStream("KV_TEST")
  9096  			require_NoError(t, err)
  9097  			state := grabState(mset)
  9098  			if !reflect.DeepEqual(state, lstate) {
  9099  				return fmt.Errorf("Expected follower state\n%+v\nto match leader's\n %+v", state, lstate)
  9100  			}
  9101  			sm := grabStore(mset)
  9102  			if !reflect.DeepEqual(sm, golden) {
  9103  				t.Fatalf("Expected follower store for %v\n%+v\nto match leader's %v\n %+v", s, sm, sl, golden)
  9104  			}
  9105  		}
  9106  		return nil
  9107  	})
  9108  }
  9109  
  9110  func TestNoRaceFileStoreLargeMsgsAndFirstMatching(t *testing.T) {
  9111  	sd := t.TempDir()
  9112  	fs, err := newFileStore(
  9113  		FileStoreConfig{StoreDir: sd, BlockSize: 8 * 1024 * 1024},
  9114  		StreamConfig{Name: "zzz", Subjects: []string{">"}, Storage: FileStorage})
  9115  	require_NoError(t, err)
  9116  	defer fs.Stop()
  9117  
  9118  	for i := 0; i < 150_000; i++ {
  9119  		fs.StoreMsg(fmt.Sprintf("foo.bar.%d", i), nil, nil)
  9120  	}
  9121  	for i := 0; i < 150_000; i++ {
  9122  		fs.StoreMsg(fmt.Sprintf("foo.baz.%d", i), nil, nil)
  9123  	}
  9124  	require_Equal(t, fs.numMsgBlocks(), 2)
  9125  	fs.mu.RLock()
  9126  	mb := fs.blks[1]
  9127  	fs.mu.RUnlock()
  9128  	fseq := atomic.LoadUint64(&mb.first.seq)
  9129  	// The -40 leaves enough mb.fss entries to kick in linear scan.
  9130  	for seq := fseq; seq < 300_000-40; seq++ {
  9131  		fs.RemoveMsg(uint64(seq))
  9132  	}
  9133  	start := time.Now()
  9134  	fs.LoadNextMsg("*.baz.*", true, fseq, nil)
  9135  	require_True(t, time.Since(start) < 200*time.Microsecond)
  9136  	// Now remove more to kick into non-linear logic.
  9137  	for seq := 300_000 - 40; seq < 300_000; seq++ {
  9138  		fs.RemoveMsg(uint64(seq))
  9139  	}
  9140  	start = time.Now()
  9141  	fs.LoadNextMsg("*.baz.*", true, fseq, nil)
  9142  	require_True(t, time.Since(start) < 200*time.Microsecond)
  9143  }
  9144  
  9145  func TestNoRaceWSNoCorruptionWithFrameSizeLimit(t *testing.T) {
  9146  	testWSNoCorruptionWithFrameSizeLimit(t, 50000)
  9147  }
  9148  
  9149  func TestNoRaceJetStreamAPIDispatchQueuePending(t *testing.T) {
  9150  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9151  	defer c.shutdown()
  9152  
  9153  	// Setup the KV bucket and use for making assertions.
  9154  	nc, js := jsClientConnect(t, c.randomServer())
  9155  	defer nc.Close()
  9156  
  9157  	_, err := js.AddStream(&nats.StreamConfig{
  9158  		Name:     "TEST",
  9159  		Subjects: []string{"foo.*.*"},
  9160  	})
  9161  	require_NoError(t, err)
  9162  
  9163  	// Queue up 500k messages all with different subjects.
  9164  	// We want to make num pending for a consumer expensive, so a large subject
  9165  	// space and wildcards for now does the trick.
  9166  	toks := []string{"foo", "bar", "baz"} // for second token.
  9167  	for i := 1; i <= 500_000; i++ {
  9168  		subj := fmt.Sprintf("foo.%s.%d", toks[rand.Intn(len(toks))], i)
  9169  		_, err := js.PublishAsync(subj, nil, nats.StallWait(time.Second))
  9170  		require_NoError(t, err)
  9171  	}
  9172  	select {
  9173  	case <-js.PublishAsyncComplete():
  9174  	case <-time.After(20 * time.Second):
  9175  		t.Fatalf("Did not receive completion signal")
  9176  	}
  9177  
  9178  	// To back up our pending queue we will create lots of filtered, with wildcards, R1 consumers
  9179  	// from a different server then the one hosting the stream.
  9180  	// ok to share this connection here.
  9181  	sldr := c.streamLeader(globalAccountName, "TEST")
  9182  	for _, s := range c.servers {
  9183  		if s != sldr {
  9184  			nc, js = jsClientConnect(t, s)
  9185  			defer nc.Close()
  9186  			break
  9187  		}
  9188  	}
  9189  
  9190  	ngr, ncons := 100, 10
  9191  	startCh, errCh := make(chan bool), make(chan error, ngr)
  9192  	var wg, swg sync.WaitGroup
  9193  	wg.Add(ngr)
  9194  	swg.Add(ngr)
  9195  
  9196  	// The wildcard in the filter subject is the key.
  9197  	cfg := &nats.ConsumerConfig{FilterSubject: "foo.*.22"}
  9198  	var tt atomic.Int64
  9199  
  9200  	for i := 0; i < ngr; i++ {
  9201  		go func() {
  9202  			defer wg.Done()
  9203  			swg.Done()
  9204  			// Make them all fire at once.
  9205  			<-startCh
  9206  
  9207  			for i := 0; i < ncons; i++ {
  9208  				start := time.Now()
  9209  				if _, err := js.AddConsumer("TEST", cfg); err != nil {
  9210  					errCh <- err
  9211  					t.Logf("Got err creating consumer: %v", err)
  9212  				}
  9213  				elapsed := time.Since(start)
  9214  				tt.Add(int64(elapsed))
  9215  			}
  9216  		}()
  9217  	}
  9218  	swg.Wait()
  9219  	close(startCh)
  9220  	time.Sleep(time.Millisecond)
  9221  	jsz, _ := sldr.Jsz(nil)
  9222  	// This could be 0 legit, so just log, don't fail.
  9223  	if jsz.JetStreamStats.API.Inflight == 0 {
  9224  		t.Log("Expected a non-zero inflight")
  9225  	}
  9226  	wg.Wait()
  9227  
  9228  	if len(errCh) > 0 {
  9229  		t.Fatalf("Expected no errors, got %d", len(errCh))
  9230  	}
  9231  }
  9232  
  9233  func TestNoRaceJetStreamMirrorAndSourceConsumerFailBackoff(t *testing.T) {
  9234  	// Check calculations first.
  9235  	for i := 1; i <= 20; i++ {
  9236  		backoff := calculateRetryBackoff(i)
  9237  		if i < 12 {
  9238  			require_Equal(t, backoff, time.Duration(i)*10*time.Second)
  9239  		} else {
  9240  			require_Equal(t, backoff, retryMaximum)
  9241  		}
  9242  	}
  9243  
  9244  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9245  	defer c.shutdown()
  9246  
  9247  	// Setup the KV bucket and use for making assertions.
  9248  	nc, js := jsClientConnect(t, c.randomServer())
  9249  	defer nc.Close()
  9250  
  9251  	_, err := js.AddStream(&nats.StreamConfig{
  9252  		Name:     "TEST",
  9253  		Subjects: []string{"foo.*.*"},
  9254  	})
  9255  	require_NoError(t, err)
  9256  	sl := c.streamLeader(globalAccountName, "TEST")
  9257  
  9258  	// Create a mirror.
  9259  	ml := sl
  9260  	// Make sure not on the same server. Should not happened in general but possible.
  9261  	for ml == sl {
  9262  		js.DeleteStream("MIRROR")
  9263  		_, err = js.AddStream(&nats.StreamConfig{
  9264  			Name:   "MIRROR",
  9265  			Mirror: &nats.StreamSource{Name: "TEST"},
  9266  		})
  9267  		require_NoError(t, err)
  9268  		ml = c.streamLeader(globalAccountName, "MIRROR")
  9269  	}
  9270  
  9271  	// Create sub to watch for the consumer create requests.
  9272  	nc, _ = jsClientConnect(t, ml)
  9273  	defer nc.Close()
  9274  	sub, err := nc.SubscribeSync("$JS.API.CONSUMER.CREATE.>")
  9275  	require_NoError(t, err)
  9276  
  9277  	// Kill the server where the source is..
  9278  	sldr := c.streamLeader(globalAccountName, "TEST")
  9279  	sldr.Shutdown()
  9280  
  9281  	// Wait for just greater than 10s. We should only see 1 request during this time.
  9282  	time.Sleep(11 * time.Second)
  9283  	n, _, _ := sub.Pending()
  9284  	require_Equal(t, n, 1)
  9285  
  9286  	// Now make sure that the fails is set properly.
  9287  	mset, err := ml.GlobalAccount().lookupStream("MIRROR")
  9288  	require_NoError(t, err)
  9289  	mset.mu.RLock()
  9290  	fails := mset.mirror.fails
  9291  	mset.mu.RUnlock()
  9292  	require_Equal(t, fails, 1)
  9293  
  9294  	js.DeleteStream("MIRROR")
  9295  	// Clear sub
  9296  	sub.NextMsg(time.Second)
  9297  	// Make sure sources behave similarly.
  9298  	_, err = js.AddStream(&nats.StreamConfig{
  9299  		Name:    "SOURCE",
  9300  		Sources: []*nats.StreamSource{{Name: "TEST"}},
  9301  	})
  9302  	require_NoError(t, err)
  9303  
  9304  	// Wait for just greater than 10s. We should only see 1 request during this time.
  9305  	time.Sleep(11 * time.Second)
  9306  	n, _, _ = sub.Pending()
  9307  	require_Equal(t, n, 1)
  9308  
  9309  	mset, err = c.streamLeader(globalAccountName, "SOURCE").GlobalAccount().lookupStream("SOURCE")
  9310  	require_NoError(t, err)
  9311  	mset.mu.RLock()
  9312  	si := mset.sources["TEST > >"]
  9313  	mset.mu.RUnlock()
  9314  	require_True(t, si != nil)
  9315  	require_Equal(t, si.fails, 1)
  9316  }
  9317  
  9318  func TestNoRaceJetStreamClusterStreamCatchupLargeInteriorDeletes(t *testing.T) {
  9319  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9320  	defer c.shutdown()
  9321  
  9322  	nc, js := jsClientConnect(t, c.randomServer())
  9323  	defer nc.Close()
  9324  
  9325  	cfg := &nats.StreamConfig{
  9326  		Name:              "TEST",
  9327  		Subjects:          []string{"foo.*"},
  9328  		MaxMsgsPerSubject: 100,
  9329  		Replicas:          1,
  9330  	}
  9331  
  9332  	_, err := js.AddStream(cfg)
  9333  	require_NoError(t, err)
  9334  
  9335  	msg := bytes.Repeat([]byte("Z"), 2*1024)
  9336  	// We will create lots of interior deletes on our R1 then scale up.
  9337  	_, err = js.Publish("foo.0", msg)
  9338  	require_NoError(t, err)
  9339  
  9340  	// Create 50k messages randomly from 1-100
  9341  	for i := 0; i < 50_000; i++ {
  9342  		subj := fmt.Sprintf("foo.%d", rand.Intn(100)+1)
  9343  		js.PublishAsync(subj, msg)
  9344  	}
  9345  	select {
  9346  	case <-js.PublishAsyncComplete():
  9347  	case <-time.After(5 * time.Second):
  9348  		t.Fatalf("Did not receive completion signal")
  9349  	}
  9350  	// Now create a large gap.
  9351  	for i := 0; i < 100_000; i++ {
  9352  		js.PublishAsync("foo.2", msg)
  9353  	}
  9354  	select {
  9355  	case <-js.PublishAsyncComplete():
  9356  	case <-time.After(5 * time.Second):
  9357  		t.Fatalf("Did not receive completion signal")
  9358  	}
  9359  	// Do 50k random again at end.
  9360  	for i := 0; i < 50_000; i++ {
  9361  		subj := fmt.Sprintf("foo.%d", rand.Intn(100)+1)
  9362  		js.PublishAsync(subj, msg)
  9363  	}
  9364  	select {
  9365  	case <-js.PublishAsyncComplete():
  9366  	case <-time.After(5 * time.Second):
  9367  		t.Fatalf("Did not receive completion signal")
  9368  	}
  9369  
  9370  	si, err := js.StreamInfo("TEST")
  9371  	require_NoError(t, err)
  9372  
  9373  	cfg.Replicas = 2
  9374  	_, err = js.UpdateStream(cfg)
  9375  	require_NoError(t, err)
  9376  
  9377  	// Let catchup start.
  9378  	c.waitOnStreamLeader(globalAccountName, "TEST")
  9379  
  9380  	nl := c.randomNonStreamLeader(globalAccountName, "TEST")
  9381  	require_True(t, nl != nil)
  9382  	mset, err := nl.GlobalAccount().lookupStream("TEST")
  9383  	require_NoError(t, err)
  9384  
  9385  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  9386  		state := mset.state()
  9387  		if state.Msgs == si.State.Msgs {
  9388  			return nil
  9389  		}
  9390  		return fmt.Errorf("Msgs not equal %d vs %d", state.Msgs, si.State.Msgs)
  9391  	})
  9392  }
  9393  
  9394  func TestNoRaceJetStreamClusterBadRestartsWithHealthzPolling(t *testing.T) {
  9395  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9396  	defer c.shutdown()
  9397  
  9398  	nc, js := jsClientConnect(t, c.randomServer())
  9399  	defer nc.Close()
  9400  
  9401  	cfg := &nats.StreamConfig{
  9402  		Name:     "TEST",
  9403  		Subjects: []string{"foo.>"},
  9404  		Replicas: 3,
  9405  	}
  9406  	_, err := js.AddStream(cfg)
  9407  	require_NoError(t, err)
  9408  
  9409  	// We will poll healthz at a decent clip and make sure any restart logic works
  9410  	// correctly with assets coming and going.
  9411  	ch := make(chan struct{})
  9412  	defer close(ch)
  9413  
  9414  	go func() {
  9415  		for {
  9416  			select {
  9417  			case <-ch:
  9418  				return
  9419  			case <-time.After(50 * time.Millisecond):
  9420  				for _, s := range c.servers {
  9421  					s.healthz(nil)
  9422  				}
  9423  			}
  9424  		}
  9425  	}()
  9426  
  9427  	numConsumers := 500
  9428  	consumers := make([]string, 0, numConsumers)
  9429  
  9430  	var wg sync.WaitGroup
  9431  
  9432  	for i := 0; i < numConsumers; i++ {
  9433  		cname := fmt.Sprintf("CONS-%d", i+1)
  9434  		consumers = append(consumers, cname)
  9435  		wg.Add(1)
  9436  		go func() {
  9437  			defer wg.Done()
  9438  			_, err := js.PullSubscribe("foo.>", cname, nats.BindStream("TEST"))
  9439  			require_NoError(t, err)
  9440  		}()
  9441  	}
  9442  	wg.Wait()
  9443  
  9444  	// Make sure all are reported.
  9445  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  9446  		for _, s := range c.servers {
  9447  			jsz, _ := s.Jsz(nil)
  9448  			if jsz.Consumers != numConsumers {
  9449  				return fmt.Errorf("%v wrong number of consumers: %d vs %d", s, jsz.Consumers, numConsumers)
  9450  			}
  9451  		}
  9452  		return nil
  9453  	})
  9454  
  9455  	// Now do same for streams.
  9456  	numStreams := 200
  9457  	streams := make([]string, 0, numStreams)
  9458  
  9459  	for i := 0; i < numStreams; i++ {
  9460  		sname := fmt.Sprintf("TEST-%d", i+1)
  9461  		streams = append(streams, sname)
  9462  		wg.Add(1)
  9463  		go func() {
  9464  			defer wg.Done()
  9465  			_, err := js.AddStream(&nats.StreamConfig{Name: sname, Replicas: 3})
  9466  			require_NoError(t, err)
  9467  		}()
  9468  	}
  9469  	wg.Wait()
  9470  
  9471  	// Make sure all are reported.
  9472  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  9473  		for _, s := range c.servers {
  9474  			jsz, _ := s.Jsz(nil)
  9475  			if jsz.Streams != numStreams+1 {
  9476  				return fmt.Errorf("%v wrong number of streams: %d vs %d", s, jsz.Streams, numStreams+1)
  9477  			}
  9478  		}
  9479  		return nil
  9480  	})
  9481  
  9482  	// Delete consumers.
  9483  	for _, cname := range consumers {
  9484  		err := js.DeleteConsumer("TEST", cname)
  9485  		require_NoError(t, err)
  9486  	}
  9487  	// Make sure reporting goes to zero.
  9488  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  9489  		for _, s := range c.servers {
  9490  			jsz, _ := s.Jsz(nil)
  9491  			if jsz.Consumers != 0 {
  9492  				return fmt.Errorf("%v still has %d consumers", s, jsz.Consumers)
  9493  			}
  9494  		}
  9495  		return nil
  9496  	})
  9497  
  9498  	// Delete streams
  9499  	for _, sname := range streams {
  9500  		err := js.DeleteStream(sname)
  9501  		require_NoError(t, err)
  9502  	}
  9503  	err = js.DeleteStream("TEST")
  9504  	require_NoError(t, err)
  9505  
  9506  	// Make sure reporting goes to zero.
  9507  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  9508  		for _, s := range c.servers {
  9509  			jsz, _ := s.Jsz(nil)
  9510  			if jsz.Streams != 0 {
  9511  				return fmt.Errorf("%v still has %d streams", s, jsz.Streams)
  9512  			}
  9513  		}
  9514  		return nil
  9515  	})
  9516  }
  9517  
  9518  func TestNoRaceJetStreamKVReplaceWithServerRestart(t *testing.T) {
  9519  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9520  	defer c.shutdown()
  9521  
  9522  	nc, _ := jsClientConnect(t, c.randomServer())
  9523  	defer nc.Close()
  9524  	// Shorten wait time for disconnects.
  9525  	js, err := nc.JetStream(nats.MaxWait(time.Second))
  9526  	require_NoError(t, err)
  9527  
  9528  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  9529  		Bucket:   "TEST",
  9530  		Replicas: 3,
  9531  	})
  9532  	require_NoError(t, err)
  9533  
  9534  	createData := func(n int) []byte {
  9535  		const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
  9536  		b := make([]byte, n)
  9537  		for i := range b {
  9538  			b[i] = letterBytes[rand.Intn(len(letterBytes))]
  9539  		}
  9540  		return b
  9541  	}
  9542  
  9543  	_, err = kv.Create("foo", createData(160))
  9544  	require_NoError(t, err)
  9545  
  9546  	ch := make(chan struct{})
  9547  	wg := sync.WaitGroup{}
  9548  
  9549  	// For counting errors that should not happen.
  9550  	errCh := make(chan error, 1024)
  9551  
  9552  	wg.Add(1)
  9553  	go func() {
  9554  		defer wg.Done()
  9555  
  9556  		var lastData []byte
  9557  		var revision uint64
  9558  
  9559  		for {
  9560  			select {
  9561  			case <-ch:
  9562  				return
  9563  			default:
  9564  				k, err := kv.Get("foo")
  9565  				if err == nats.ErrKeyNotFound {
  9566  					errCh <- err
  9567  				} else if k != nil {
  9568  					if lastData != nil && k.Revision() == revision && !bytes.Equal(lastData, k.Value()) {
  9569  						errCh <- fmt.Errorf("data loss [%s][rev:%d] expected:[%q] is:[%q]\n", "foo", revision, lastData, k.Value())
  9570  					}
  9571  					newData := createData(160)
  9572  					if revision, err = kv.Update("foo", newData, k.Revision()); err == nil {
  9573  						lastData = newData
  9574  					}
  9575  				}
  9576  			}
  9577  		}
  9578  	}()
  9579  
  9580  	// Wait a short bit.
  9581  	time.Sleep(2 * time.Second)
  9582  	for _, s := range c.servers {
  9583  		s.Shutdown()
  9584  		// Need to leave servers down for awhile to trigger bug properly.
  9585  		time.Sleep(5 * time.Second)
  9586  		s = c.restartServer(s)
  9587  		c.waitOnServerHealthz(s)
  9588  	}
  9589  
  9590  	// Shutdown the go routine above.
  9591  	close(ch)
  9592  	// Wait for it to finish.
  9593  	wg.Wait()
  9594  
  9595  	if len(errCh) != 0 {
  9596  		for err := range errCh {
  9597  			t.Logf("Received err %v during test", err)
  9598  		}
  9599  		t.Fatalf("Encountered errors")
  9600  	}
  9601  }
  9602  
  9603  func TestNoRaceMemStoreCompactPerformance(t *testing.T) {
  9604  	//Load MemStore so that it is full
  9605  	subj, msg := "foo", make([]byte, 1000)
  9606  	storedMsgSize := memStoreMsgSize(subj, nil, msg)
  9607  
  9608  	toStore := uint64(10_000)
  9609  	toStoreOnTop := uint64(1_000)
  9610  	setSeqNo := uint64(10_000_000_000)
  9611  
  9612  	expectedPurge := toStore - 1
  9613  	maxBytes := storedMsgSize * toStore
  9614  
  9615  	ms, err := newMemStore(&StreamConfig{Storage: MemoryStorage, MaxBytes: int64(maxBytes)})
  9616  	require_NoError(t, err)
  9617  	defer ms.Stop()
  9618  
  9619  	for i := uint64(0); i < toStore; i++ {
  9620  		ms.StoreMsg(subj, nil, msg)
  9621  	}
  9622  	state := ms.State()
  9623  	require_Equal(t, toStore, state.Msgs)
  9624  	require_Equal(t, state.Bytes, storedMsgSize*toStore)
  9625  
  9626  	//1st run: Load additional messages then compact
  9627  	for i := uint64(0); i < toStoreOnTop; i++ {
  9628  		ms.StoreMsg(subj, nil, msg)
  9629  	}
  9630  	startFirstRun := time.Now()
  9631  	purgedFirstRun, _ := ms.Compact(toStore + toStoreOnTop)
  9632  	elapsedFirstRun := time.Since(startFirstRun)
  9633  	require_Equal(t, expectedPurge, purgedFirstRun)
  9634  
  9635  	//set the seq number to a very high value by compacting with a too high seq number
  9636  	purgedFull, _ := ms.Compact(setSeqNo)
  9637  	require_Equal(t, 1, purgedFull)
  9638  
  9639  	//2nd run: Compact again
  9640  	for i := uint64(0); i < toStore; i++ {
  9641  		ms.StoreMsg(subj, nil, msg)
  9642  	}
  9643  	startSecondRun := time.Now()
  9644  	purgedSecondRun, _ := ms.Compact(setSeqNo + toStore - 1)
  9645  	elapsedSecondRun := time.Since(startSecondRun)
  9646  	require_Equal(t, expectedPurge, purgedSecondRun)
  9647  
  9648  	//Calculate delta between runs and fail if it is too high
  9649  	require_LessThan(t, elapsedSecondRun-elapsedFirstRun, time.Duration(1)*time.Second)
  9650  }
  9651  
  9652  func TestNoRaceJetStreamSnapshotsWithSlowAckDontSlowConsumer(t *testing.T) {
  9653  	s := RunBasicJetStreamServer(t)
  9654  	defer s.Shutdown()
  9655  
  9656  	ech := make(chan error)
  9657  	ecb := func(_ *nats.Conn, _ *nats.Subscription, err error) {
  9658  		if err != nil {
  9659  			ech <- err
  9660  		}
  9661  	}
  9662  	nc, js := jsClientConnect(t, s, nats.ErrorHandler(ecb))
  9663  	defer nc.Close()
  9664  
  9665  	_, err := js.AddStream(&nats.StreamConfig{
  9666  		Name:     "TEST",
  9667  		Subjects: []string{"foo"},
  9668  	})
  9669  	require_NoError(t, err)
  9670  
  9671  	// Put in over 64MB.
  9672  	msg, toSend := make([]byte, 1024*1024), 80
  9673  	crand.Read(msg)
  9674  
  9675  	for i := 0; i < toSend; i++ {
  9676  		_, err := js.Publish("foo", msg)
  9677  		require_NoError(t, err)
  9678  	}
  9679  
  9680  	sreq := &JSApiStreamSnapshotRequest{
  9681  		DeliverSubject: nats.NewInbox(),
  9682  		ChunkSize:      1024 * 1024,
  9683  	}
  9684  	req, _ := json.Marshal(sreq)
  9685  	rmsg, err := nc.Request(fmt.Sprintf(JSApiStreamSnapshotT, "TEST"), req, time.Second)
  9686  	require_NoError(t, err)
  9687  
  9688  	var resp JSApiStreamSnapshotResponse
  9689  	json.Unmarshal(rmsg.Data, &resp)
  9690  	require_True(t, resp.Error == nil)
  9691  
  9692  	done := make(chan *nats.Msg)
  9693  	sub, _ := nc.Subscribe(sreq.DeliverSubject, func(m *nats.Msg) {
  9694  		// EOF
  9695  		if len(m.Data) == 0 {
  9696  			done <- m
  9697  			return
  9698  		}
  9699  	})
  9700  	defer sub.Unsubscribe()
  9701  
  9702  	// Check that we do not get disconnected due to slow consumer.
  9703  	select {
  9704  	case msg := <-done:
  9705  		require_Equal(t, msg.Header.Get("Status"), "408")
  9706  		require_Equal(t, msg.Header.Get("Description"), "No Flow Response")
  9707  	case <-ech:
  9708  		t.Fatalf("Got disconnected: %v", err)
  9709  	case <-time.After(5 * time.Second):
  9710  		t.Fatalf("Should have received EOF with error status")
  9711  	}
  9712  }