github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/norace_test.go (about)

     1  // Copyright 2018-2024 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  //go:build !race && !skip_no_race_tests
    15  // +build !race,!skip_no_race_tests
    16  
    17  package server
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"compress/gzip"
    23  	"context"
    24  	"encoding/binary"
    25  	"encoding/json"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"math"
    30  	"math/rand"
    31  	"net"
    32  	"net/http"
    33  	"net/url"
    34  	"os"
    35  	"path/filepath"
    36  	"reflect"
    37  	"runtime"
    38  	"runtime/debug"
    39  	"sort"
    40  	"strconv"
    41  	"strings"
    42  	"sync"
    43  	"sync/atomic"
    44  	"testing"
    45  	"time"
    46  
    47  	"crypto/hmac"
    48  	crand "crypto/rand"
    49  	"crypto/sha256"
    50  
    51  	"github.com/klauspost/compress/s2"
    52  	"github.com/nats-io/jwt/v2"
    53  	"github.com/nats-io/nats-server/v2/server/avl"
    54  	"github.com/nats-io/nats.go"
    55  	"github.com/nats-io/nkeys"
    56  	"github.com/nats-io/nuid"
    57  )
    58  
    59  // IMPORTANT: Tests in this file are not executed when running with the -race flag.
    60  //            The test name should be prefixed with TestNoRace so we can run only
    61  //            those tests: go test -run=TestNoRace ...
    62  
    63  func TestNoRaceAvoidSlowConsumerBigMessages(t *testing.T) {
    64  	opts := DefaultOptions() // Use defaults to make sure they avoid pending slow consumer.
    65  	opts.NoSystemAccount = true
    66  	s := RunServer(opts)
    67  	defer s.Shutdown()
    68  
    69  	nc1, err := nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port))
    70  	if err != nil {
    71  		t.Fatalf("Error on connect: %v", err)
    72  	}
    73  	defer nc1.Close()
    74  
    75  	nc2, err := nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port))
    76  	if err != nil {
    77  		t.Fatalf("Error on connect: %v", err)
    78  	}
    79  	defer nc2.Close()
    80  
    81  	data := make([]byte, 1024*1024) // 1MB payload
    82  	crand.Read(data)
    83  
    84  	expected := int32(500)
    85  	received := int32(0)
    86  
    87  	done := make(chan bool)
    88  
    89  	// Create Subscription.
    90  	nc1.Subscribe("slow.consumer", func(m *nats.Msg) {
    91  		// Just eat it so that we are not measuring
    92  		// code time, just delivery.
    93  		atomic.AddInt32(&received, 1)
    94  		if received >= expected {
    95  			done <- true
    96  		}
    97  	})
    98  
    99  	// Create Error handler
   100  	nc1.SetErrorHandler(func(c *nats.Conn, s *nats.Subscription, err error) {
   101  		t.Fatalf("Received an error on the subscription's connection: %v\n", err)
   102  	})
   103  
   104  	nc1.Flush()
   105  
   106  	for i := 0; i < int(expected); i++ {
   107  		nc2.Publish("slow.consumer", data)
   108  	}
   109  	nc2.Flush()
   110  
   111  	select {
   112  	case <-done:
   113  		return
   114  	case <-time.After(10 * time.Second):
   115  		r := atomic.LoadInt32(&received)
   116  		if s.NumSlowConsumers() > 0 {
   117  			t.Fatalf("Did not receive all large messages due to slow consumer status: %d of %d", r, expected)
   118  		}
   119  		t.Fatalf("Failed to receive all large messages: %d of %d\n", r, expected)
   120  	}
   121  }
   122  
   123  func TestNoRaceRoutedQueueAutoUnsubscribe(t *testing.T) {
   124  	optsA, err := ProcessConfigFile("./configs/seed.conf")
   125  	require_NoError(t, err)
   126  	optsA.NoSigs, optsA.NoLog = true, true
   127  	optsA.NoSystemAccount = true
   128  	srvA := RunServer(optsA)
   129  	defer srvA.Shutdown()
   130  
   131  	srvARouteURL := fmt.Sprintf("nats://%s:%d", optsA.Cluster.Host, srvA.ClusterAddr().Port)
   132  	optsB := nextServerOpts(optsA)
   133  	optsB.Routes = RoutesFromStr(srvARouteURL)
   134  
   135  	srvB := RunServer(optsB)
   136  	defer srvB.Shutdown()
   137  
   138  	// Wait for these 2 to connect to each other
   139  	checkClusterFormed(t, srvA, srvB)
   140  
   141  	// Have a client connection to each server
   142  	ncA, err := nats.Connect(fmt.Sprintf("nats://%s:%d", optsA.Host, optsA.Port))
   143  	if err != nil {
   144  		t.Fatalf("Error on connect: %v", err)
   145  	}
   146  	defer ncA.Close()
   147  
   148  	ncB, err := nats.Connect(fmt.Sprintf("nats://%s:%d", optsB.Host, optsB.Port))
   149  	if err != nil {
   150  		t.Fatalf("Error on connect: %v", err)
   151  	}
   152  	defer ncB.Close()
   153  
   154  	rbar := int32(0)
   155  	barCb := func(m *nats.Msg) {
   156  		atomic.AddInt32(&rbar, 1)
   157  	}
   158  	rbaz := int32(0)
   159  	bazCb := func(m *nats.Msg) {
   160  		atomic.AddInt32(&rbaz, 1)
   161  	}
   162  
   163  	// Create 125 queue subs with auto-unsubscribe to each server for
   164  	// group bar and group baz. So 250 total per queue group.
   165  	cons := []*nats.Conn{ncA, ncB}
   166  	for _, c := range cons {
   167  		for i := 0; i < 100; i++ {
   168  			qsub, err := c.QueueSubscribe("foo", "bar", barCb)
   169  			if err != nil {
   170  				t.Fatalf("Error on subscribe: %v", err)
   171  			}
   172  			if err := qsub.AutoUnsubscribe(1); err != nil {
   173  				t.Fatalf("Error on auto-unsubscribe: %v", err)
   174  			}
   175  			qsub, err = c.QueueSubscribe("foo", "baz", bazCb)
   176  			if err != nil {
   177  				t.Fatalf("Error on subscribe: %v", err)
   178  			}
   179  			if err := qsub.AutoUnsubscribe(1); err != nil {
   180  				t.Fatalf("Error on auto-unsubscribe: %v", err)
   181  			}
   182  		}
   183  		c.Subscribe("TEST.COMPLETE", func(m *nats.Msg) {})
   184  	}
   185  
   186  	// We coelasce now so for each server we will have all local (200) plus
   187  	// two from the remote side for each queue group. We also create one more
   188  	// and will wait til each server has 204 subscriptions, that will make sure
   189  	// that we have everything setup.
   190  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
   191  		subsA := srvA.NumSubscriptions()
   192  		subsB := srvB.NumSubscriptions()
   193  		if subsA != 204 || subsB != 204 {
   194  			return fmt.Errorf("Not all subs processed yet: %d and %d", subsA, subsB)
   195  		}
   196  		return nil
   197  	})
   198  
   199  	expected := int32(200)
   200  	// Now send messages from each server
   201  	for i := int32(0); i < expected; i++ {
   202  		c := cons[i%2]
   203  		c.Publish("foo", []byte("Don't Drop Me!"))
   204  	}
   205  	for _, c := range cons {
   206  		c.Flush()
   207  	}
   208  
   209  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
   210  		nbar := atomic.LoadInt32(&rbar)
   211  		nbaz := atomic.LoadInt32(&rbaz)
   212  		if nbar == expected && nbaz == expected {
   213  			return nil
   214  		}
   215  		return fmt.Errorf("Did not receive all %d queue messages, received %d for 'bar' and %d for 'baz'",
   216  			expected, atomic.LoadInt32(&rbar), atomic.LoadInt32(&rbaz))
   217  	})
   218  }
   219  
   220  func TestNoRaceClosedSlowConsumerWriteDeadline(t *testing.T) {
   221  	opts := DefaultOptions()
   222  	opts.NoSystemAccount = true
   223  	opts.WriteDeadline = 10 * time.Millisecond // Make very small to trip.
   224  	opts.MaxPending = 500 * 1024 * 1024        // Set high so it will not trip here.
   225  	s := RunServer(opts)
   226  	defer s.Shutdown()
   227  
   228  	c, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", opts.Host, opts.Port), 3*time.Second)
   229  	if err != nil {
   230  		t.Fatalf("Error on connect: %v", err)
   231  	}
   232  	defer c.Close()
   233  	if _, err := c.Write([]byte("CONNECT {}\r\nPING\r\nSUB foo 1\r\n")); err != nil {
   234  		t.Fatalf("Error sending protocols to server: %v", err)
   235  	}
   236  	// Reduce socket buffer to increase reliability of data backing up in the server destined
   237  	// for our subscribed client.
   238  	c.(*net.TCPConn).SetReadBuffer(128)
   239  
   240  	url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)
   241  	sender, err := nats.Connect(url)
   242  	if err != nil {
   243  		t.Fatalf("Error on connect: %v", err)
   244  	}
   245  	defer sender.Close()
   246  
   247  	payload := make([]byte, 1024*1024)
   248  	for i := 0; i < 100; i++ {
   249  		if err := sender.Publish("foo", payload); err != nil {
   250  			t.Fatalf("Error on publish: %v", err)
   251  		}
   252  	}
   253  
   254  	// Flush sender connection to ensure that all data has been sent.
   255  	if err := sender.Flush(); err != nil {
   256  		t.Fatalf("Error on flush: %v", err)
   257  	}
   258  
   259  	// At this point server should have closed connection c.
   260  	checkClosedConns(t, s, 1, 2*time.Second)
   261  	conns := s.closedClients()
   262  	if lc := len(conns); lc != 1 {
   263  		t.Fatalf("len(conns) expected to be %d, got %d\n", 1, lc)
   264  	}
   265  	checkReason(t, conns[0].Reason, SlowConsumerWriteDeadline)
   266  }
   267  
   268  func TestNoRaceClosedSlowConsumerPendingBytes(t *testing.T) {
   269  	opts := DefaultOptions()
   270  	opts.NoSystemAccount = true
   271  	opts.WriteDeadline = 30 * time.Second // Wait for long time so write deadline does not trigger slow consumer.
   272  	opts.MaxPending = 1 * 1024 * 1024     // Set to low value (1MB) to allow SC to trip.
   273  	s := RunServer(opts)
   274  	defer s.Shutdown()
   275  
   276  	c, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", opts.Host, opts.Port), 3*time.Second)
   277  	if err != nil {
   278  		t.Fatalf("Error on connect: %v", err)
   279  	}
   280  	defer c.Close()
   281  	if _, err := c.Write([]byte("CONNECT {}\r\nPING\r\nSUB foo 1\r\n")); err != nil {
   282  		t.Fatalf("Error sending protocols to server: %v", err)
   283  	}
   284  	// Reduce socket buffer to increase reliability of data backing up in the server destined
   285  	// for our subscribed client.
   286  	c.(*net.TCPConn).SetReadBuffer(128)
   287  
   288  	url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)
   289  	sender, err := nats.Connect(url)
   290  	if err != nil {
   291  		t.Fatalf("Error on connect: %v", err)
   292  	}
   293  	defer sender.Close()
   294  
   295  	payload := make([]byte, 1024*1024)
   296  	for i := 0; i < 100; i++ {
   297  		if err := sender.Publish("foo", payload); err != nil {
   298  			t.Fatalf("Error on publish: %v", err)
   299  		}
   300  	}
   301  
   302  	// Flush sender connection to ensure that all data has been sent.
   303  	if err := sender.Flush(); err != nil {
   304  		t.Fatalf("Error on flush: %v", err)
   305  	}
   306  
   307  	// At this point server should have closed connection c.
   308  	checkClosedConns(t, s, 1, 2*time.Second)
   309  	conns := s.closedClients()
   310  	if lc := len(conns); lc != 1 {
   311  		t.Fatalf("len(conns) expected to be %d, got %d\n", 1, lc)
   312  	}
   313  	checkReason(t, conns[0].Reason, SlowConsumerPendingBytes)
   314  }
   315  
   316  func TestNoRaceSlowConsumerPendingBytes(t *testing.T) {
   317  	opts := DefaultOptions()
   318  	opts.NoSystemAccount = true
   319  	opts.WriteDeadline = 30 * time.Second // Wait for long time so write deadline does not trigger slow consumer.
   320  	opts.MaxPending = 1 * 1024 * 1024     // Set to low value (1MB) to allow SC to trip.
   321  	s := RunServer(opts)
   322  	defer s.Shutdown()
   323  
   324  	c, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", opts.Host, opts.Port), 3*time.Second)
   325  	if err != nil {
   326  		t.Fatalf("Error on connect: %v", err)
   327  	}
   328  	defer c.Close()
   329  	if _, err := c.Write([]byte("CONNECT {}\r\nPING\r\nSUB foo 1\r\n")); err != nil {
   330  		t.Fatalf("Error sending protocols to server: %v", err)
   331  	}
   332  	// Reduce socket buffer to increase reliability of data backing up in the server destined
   333  	// for our subscribed client.
   334  	c.(*net.TCPConn).SetReadBuffer(128)
   335  
   336  	url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)
   337  	sender, err := nats.Connect(url)
   338  	if err != nil {
   339  		t.Fatalf("Error on connect: %v", err)
   340  	}
   341  	defer sender.Close()
   342  
   343  	payload := make([]byte, 1024*1024)
   344  	for i := 0; i < 100; i++ {
   345  		if err := sender.Publish("foo", payload); err != nil {
   346  			t.Fatalf("Error on publish: %v", err)
   347  		}
   348  	}
   349  
   350  	// Flush sender connection to ensure that all data has been sent.
   351  	if err := sender.Flush(); err != nil {
   352  		t.Fatalf("Error on flush: %v", err)
   353  	}
   354  
   355  	// At this point server should have closed connection c.
   356  
   357  	// On certain platforms, it may take more than one call before
   358  	// getting the error.
   359  	for i := 0; i < 100; i++ {
   360  		if _, err := c.Write([]byte("PUB bar 5\r\nhello\r\n")); err != nil {
   361  			// ok
   362  			return
   363  		}
   364  	}
   365  	t.Fatal("Connection should have been closed")
   366  }
   367  
   368  func TestNoRaceGatewayNoMissingReplies(t *testing.T) {
   369  	// This test will have following setup:
   370  	//
   371  	// responder1		         requestor
   372  	//    |                          |
   373  	//    v                          v
   374  	//   [A1]<-------gw------------[B1]
   375  	//    |  \                      |
   376  	//    |   \______gw__________   | route
   377  	//    |                     _\| |
   378  	//   [  ]--------gw----------->[  ]
   379  	//   [A2]<-------gw------------[B2]
   380  	//   [  ]                      [  ]
   381  	//    ^
   382  	//    |
   383  	// responder2
   384  	//
   385  	// There is a possible race that when the requestor creates
   386  	// a subscription on the reply subject, the subject interest
   387  	// being sent from the inbound gateway, and B1 having none,
   388  	// the SUB first goes to B2 before being sent to A1 from
   389  	// B2's inbound GW. But the request can go from B1 to A1
   390  	// right away and the responder1 connecting to A1 may send
   391  	// back the reply before the interest on the reply makes it
   392  	// to A1 (from B2).
   393  	// This test will also verify that if the responder is instead
   394  	// connected to A2, the reply is properly received by requestor
   395  	// on B1.
   396  
   397  	// For this test we want to be in interestOnly mode, so
   398  	// make it happen quickly
   399  	gatewayMaxRUnsubBeforeSwitch = 1
   400  	defer func() { gatewayMaxRUnsubBeforeSwitch = defaultGatewayMaxRUnsubBeforeSwitch }()
   401  
   402  	// Start with setting up A2 and B2.
   403  	ob2 := testDefaultOptionsForGateway("B")
   404  	sb2 := runGatewayServer(ob2)
   405  	defer sb2.Shutdown()
   406  
   407  	oa2 := testGatewayOptionsFromToWithServers(t, "A", "B", sb2)
   408  	sa2 := runGatewayServer(oa2)
   409  	defer sa2.Shutdown()
   410  
   411  	waitForOutboundGateways(t, sa2, 1, time.Second)
   412  	waitForInboundGateways(t, sa2, 1, time.Second)
   413  	waitForOutboundGateways(t, sb2, 1, time.Second)
   414  	waitForInboundGateways(t, sb2, 1, time.Second)
   415  
   416  	// Now start A1 which will connect to B2
   417  	oa1 := testGatewayOptionsFromToWithServers(t, "A", "B", sb2)
   418  	oa1.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa2.Cluster.Host, oa2.Cluster.Port))
   419  	sa1 := runGatewayServer(oa1)
   420  	defer sa1.Shutdown()
   421  
   422  	waitForOutboundGateways(t, sa1, 1, time.Second)
   423  	waitForInboundGateways(t, sb2, 2, time.Second)
   424  
   425  	checkClusterFormed(t, sa1, sa2)
   426  
   427  	// Finally, start B1 that will connect to A1.
   428  	ob1 := testGatewayOptionsFromToWithServers(t, "B", "A", sa1)
   429  	ob1.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", ob2.Cluster.Host, ob2.Cluster.Port))
   430  	sb1 := runGatewayServer(ob1)
   431  	defer sb1.Shutdown()
   432  
   433  	// Check that we have the outbound gateway from B1 to A1
   434  	checkFor(t, 3*time.Second, 15*time.Millisecond, func() error {
   435  		c := sb1.getOutboundGatewayConnection("A")
   436  		if c == nil {
   437  			return fmt.Errorf("Outbound connection to A not created yet")
   438  		}
   439  		c.mu.Lock()
   440  		name := c.opts.Name
   441  		nc := c.nc
   442  		c.mu.Unlock()
   443  		if name != sa1.ID() {
   444  			// Force a disconnect
   445  			nc.Close()
   446  			return fmt.Errorf("Was unable to have B1 connect to A1")
   447  		}
   448  		return nil
   449  	})
   450  
   451  	waitForInboundGateways(t, sa1, 1, time.Second)
   452  	checkClusterFormed(t, sb1, sb2)
   453  
   454  	a1URL := fmt.Sprintf("nats://%s:%d", oa1.Host, oa1.Port)
   455  	a2URL := fmt.Sprintf("nats://%s:%d", oa2.Host, oa2.Port)
   456  	b1URL := fmt.Sprintf("nats://%s:%d", ob1.Host, ob1.Port)
   457  	b2URL := fmt.Sprintf("nats://%s:%d", ob2.Host, ob2.Port)
   458  
   459  	ncb1 := natsConnect(t, b1URL)
   460  	defer ncb1.Close()
   461  
   462  	ncb2 := natsConnect(t, b2URL)
   463  	defer ncb2.Close()
   464  
   465  	natsSubSync(t, ncb1, "just.a.sub")
   466  	natsSubSync(t, ncb2, "just.a.sub")
   467  	checkExpectedSubs(t, 2, sb1, sb2)
   468  
   469  	// For this test, we want A to be checking B's interest in order
   470  	// to send messages (which would cause replies to be dropped if
   471  	// there is no interest registered on A). So from A servers,
   472  	// send to various subjects and cause B's to switch to interestOnly
   473  	// mode.
   474  	nca1 := natsConnect(t, a1URL)
   475  	defer nca1.Close()
   476  	for i := 0; i < 10; i++ {
   477  		natsPub(t, nca1, fmt.Sprintf("reject.%d", i), []byte("hello"))
   478  	}
   479  	nca2 := natsConnect(t, a2URL)
   480  	defer nca2.Close()
   481  	for i := 0; i < 10; i++ {
   482  		natsPub(t, nca2, fmt.Sprintf("reject.%d", i), []byte("hello"))
   483  	}
   484  
   485  	checkSwitchedMode := func(t *testing.T, s *Server) {
   486  		t.Helper()
   487  		checkFor(t, 2*time.Second, 15*time.Millisecond, func() error {
   488  			var switchedMode bool
   489  			c := s.getOutboundGatewayConnection("B")
   490  			ei, _ := c.gw.outsim.Load(globalAccountName)
   491  			if ei != nil {
   492  				e := ei.(*outsie)
   493  				e.RLock()
   494  				switchedMode = e.ni == nil && e.mode == InterestOnly
   495  				e.RUnlock()
   496  			}
   497  			if !switchedMode {
   498  				return fmt.Errorf("Still not switched mode")
   499  			}
   500  			return nil
   501  		})
   502  	}
   503  	checkSwitchedMode(t, sa1)
   504  	checkSwitchedMode(t, sa2)
   505  
   506  	// Setup a subscriber on _INBOX.> on each of A's servers.
   507  	total := 1000
   508  	expected := int32(total)
   509  	rcvOnA := int32(0)
   510  	qrcvOnA := int32(0)
   511  	natsSub(t, nca1, "myreply.>", func(_ *nats.Msg) {
   512  		atomic.AddInt32(&rcvOnA, 1)
   513  	})
   514  	natsQueueSub(t, nca2, "myreply.>", "bar", func(_ *nats.Msg) {
   515  		atomic.AddInt32(&qrcvOnA, 1)
   516  	})
   517  	checkExpectedSubs(t, 2, sa1, sa2)
   518  
   519  	// Ok.. so now we will run the actual test where we
   520  	// create a responder on A1 and make sure that every
   521  	// single request from B1 gets the reply. Will repeat
   522  	// test with responder connected to A2.
   523  	sendReqs := func(t *testing.T, subConn *nats.Conn) {
   524  		t.Helper()
   525  		responder := natsSub(t, subConn, "foo", func(m *nats.Msg) {
   526  			m.Respond([]byte("reply"))
   527  		})
   528  		natsFlush(t, subConn)
   529  		checkExpectedSubs(t, 3, sa1, sa2)
   530  
   531  		// We are not going to use Request() because this sets
   532  		// a wildcard subscription on an INBOX and less likely
   533  		// to produce the race. Instead we will explicitly set
   534  		// the subscription on the reply subject and create one
   535  		// per request.
   536  		for i := 0; i < total/2; i++ {
   537  			reply := fmt.Sprintf("myreply.%d", i)
   538  			replySub := natsQueueSubSync(t, ncb1, reply, "bar")
   539  			natsFlush(t, ncb1)
   540  
   541  			// Let's make sure we have interest on B2.
   542  			if r := sb2.globalAccount().sl.Match(reply); len(r.qsubs) == 0 {
   543  				checkFor(t, time.Second, time.Millisecond, func() error {
   544  					if r := sb2.globalAccount().sl.Match(reply); len(r.qsubs) == 0 {
   545  						return fmt.Errorf("B still not registered interest on %s", reply)
   546  					}
   547  					return nil
   548  				})
   549  			}
   550  			natsPubReq(t, ncb1, "foo", reply, []byte("request"))
   551  			if _, err := replySub.NextMsg(time.Second); err != nil {
   552  				t.Fatalf("Did not receive reply: %v", err)
   553  			}
   554  			natsUnsub(t, replySub)
   555  		}
   556  
   557  		responder.Unsubscribe()
   558  		natsFlush(t, subConn)
   559  		checkExpectedSubs(t, 2, sa1, sa2)
   560  	}
   561  	sendReqs(t, nca1)
   562  	sendReqs(t, nca2)
   563  
   564  	checkFor(t, time.Second, 15*time.Millisecond, func() error {
   565  		if n := atomic.LoadInt32(&rcvOnA); n != expected {
   566  			return fmt.Errorf("Subs on A expected to get %v replies, got %v", expected, n)
   567  		}
   568  		return nil
   569  	})
   570  
   571  	// We should not have received a single message on the queue sub
   572  	// on cluster A because messages will have been delivered to
   573  	// the member on cluster B.
   574  	if n := atomic.LoadInt32(&qrcvOnA); n != 0 {
   575  		t.Fatalf("Queue sub on A should not have received message, got %v", n)
   576  	}
   577  }
   578  
   579  func TestNoRaceRouteMemUsage(t *testing.T) {
   580  	oa := DefaultOptions()
   581  	sa := RunServer(oa)
   582  	defer sa.Shutdown()
   583  
   584  	ob := DefaultOptions()
   585  	ob.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa.Cluster.Host, oa.Cluster.Port))
   586  	sb := RunServer(ob)
   587  	defer sb.Shutdown()
   588  
   589  	checkClusterFormed(t, sa, sb)
   590  
   591  	responder := natsConnect(t, fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port))
   592  	defer responder.Close()
   593  	for i := 0; i < 10; i++ {
   594  		natsSub(t, responder, "foo", func(m *nats.Msg) {
   595  			m.Respond(m.Data)
   596  		})
   597  	}
   598  	natsFlush(t, responder)
   599  
   600  	payload := make([]byte, 50*1024)
   601  
   602  	bURL := fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)
   603  
   604  	// Capture mem usage
   605  	mem := runtime.MemStats{}
   606  	runtime.ReadMemStats(&mem)
   607  	inUseBefore := mem.HeapInuse
   608  
   609  	for i := 0; i < 100; i++ {
   610  		requestor := natsConnect(t, bURL)
   611  		// Don't use a defer here otherwise that will make the memory check fail!
   612  		// We are closing the connection just after these few instructions that
   613  		// are not calling t.Fatal() anyway.
   614  		inbox := nats.NewInbox()
   615  		sub := natsSubSync(t, requestor, inbox)
   616  		natsPubReq(t, requestor, "foo", inbox, payload)
   617  		for j := 0; j < 10; j++ {
   618  			natsNexMsg(t, sub, time.Second)
   619  		}
   620  		requestor.Close()
   621  	}
   622  
   623  	runtime.GC()
   624  	debug.FreeOSMemory()
   625  	runtime.ReadMemStats(&mem)
   626  	inUseNow := mem.HeapInuse
   627  	if inUseNow > 3*inUseBefore {
   628  		t.Fatalf("Heap in-use before was %v, now %v: too high", inUseBefore, inUseNow)
   629  	}
   630  }
   631  
   632  func TestNoRaceRouteCache(t *testing.T) {
   633  	maxPerAccountCacheSize = 20
   634  	prunePerAccountCacheSize = 5
   635  	closedSubsCheckInterval = 250 * time.Millisecond
   636  
   637  	defer func() {
   638  		maxPerAccountCacheSize = defaultMaxPerAccountCacheSize
   639  		prunePerAccountCacheSize = defaultPrunePerAccountCacheSize
   640  		closedSubsCheckInterval = defaultClosedSubsCheckInterval
   641  	}()
   642  
   643  	for _, test := range []struct {
   644  		name     string
   645  		useQueue bool
   646  	}{
   647  		{"plain_sub", false},
   648  		{"queue_sub", true},
   649  	} {
   650  		t.Run(test.name, func(t *testing.T) {
   651  
   652  			oa := DefaultOptions()
   653  			oa.NoSystemAccount = true
   654  			oa.Cluster.PoolSize = -1
   655  			sa := RunServer(oa)
   656  			defer sa.Shutdown()
   657  
   658  			ob := DefaultOptions()
   659  			ob.NoSystemAccount = true
   660  			ob.Cluster.PoolSize = -1
   661  			ob.Routes = RoutesFromStr(fmt.Sprintf("nats://%s:%d", oa.Cluster.Host, oa.Cluster.Port))
   662  			sb := RunServer(ob)
   663  			defer sb.Shutdown()
   664  
   665  			checkClusterFormed(t, sa, sb)
   666  
   667  			responder := natsConnect(t, fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port))
   668  			defer responder.Close()
   669  			natsSub(t, responder, "foo", func(m *nats.Msg) {
   670  				m.Respond(m.Data)
   671  			})
   672  			natsFlush(t, responder)
   673  
   674  			checkExpectedSubs(t, 1, sa)
   675  			checkExpectedSubs(t, 1, sb)
   676  
   677  			bURL := fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)
   678  			requestor := natsConnect(t, bURL)
   679  			defer requestor.Close()
   680  
   681  			ch := make(chan struct{}, 1)
   682  			cb := func(_ *nats.Msg) {
   683  				select {
   684  				case ch <- struct{}{}:
   685  				default:
   686  				}
   687  			}
   688  
   689  			sendReqs := func(t *testing.T, nc *nats.Conn, count int, unsub bool) {
   690  				t.Helper()
   691  				for i := 0; i < count; i++ {
   692  					inbox := nats.NewInbox()
   693  					var sub *nats.Subscription
   694  					if test.useQueue {
   695  						sub = natsQueueSub(t, nc, inbox, "queue", cb)
   696  					} else {
   697  						sub = natsSub(t, nc, inbox, cb)
   698  					}
   699  					natsPubReq(t, nc, "foo", inbox, []byte("hello"))
   700  					select {
   701  					case <-ch:
   702  					case <-time.After(time.Second):
   703  						t.Fatalf("Failed to get reply")
   704  					}
   705  					if unsub {
   706  						natsUnsub(t, sub)
   707  					}
   708  				}
   709  			}
   710  			sendReqs(t, requestor, maxPerAccountCacheSize+1, true)
   711  
   712  			var route *client
   713  			sb.mu.Lock()
   714  			route = getFirstRoute(sb)
   715  			sb.mu.Unlock()
   716  
   717  			checkExpected := func(t *testing.T, expected int) {
   718  				t.Helper()
   719  				checkFor(t, 2*time.Second, 15*time.Millisecond, func() error {
   720  					route.mu.Lock()
   721  					n := len(route.in.pacache)
   722  					route.mu.Unlock()
   723  					if n != expected {
   724  						return fmt.Errorf("Expected %v subs in the cache, got %v", expected, n)
   725  					}
   726  					return nil
   727  				})
   728  			}
   729  			checkExpected(t, (maxPerAccountCacheSize+1)-(prunePerAccountCacheSize+1))
   730  
   731  			// Wait for more than the orphan check
   732  			time.Sleep(2 * closedSubsCheckInterval)
   733  
   734  			// Add a new subs up to point where new prune would occur
   735  			sendReqs(t, requestor, prunePerAccountCacheSize+1, false)
   736  
   737  			// Now closed subs should have been removed, so expected
   738  			// subs in the cache should be the new ones.
   739  			checkExpected(t, prunePerAccountCacheSize+1)
   740  
   741  			// Now try wil implicit unsubscribe (due to connection close)
   742  			sendReqs(t, requestor, maxPerAccountCacheSize+1, false)
   743  			requestor.Close()
   744  
   745  			checkExpected(t, maxPerAccountCacheSize-prunePerAccountCacheSize)
   746  
   747  			// Wait for more than the orphan check
   748  			time.Sleep(2 * closedSubsCheckInterval)
   749  
   750  			// Now create new connection and send prunePerAccountCacheSize+1
   751  			// and that should cause all subs from previous connection to be
   752  			// removed from cache
   753  			requestor = natsConnect(t, bURL)
   754  			defer requestor.Close()
   755  
   756  			sendReqs(t, requestor, prunePerAccountCacheSize+1, false)
   757  			checkExpected(t, prunePerAccountCacheSize+1)
   758  		})
   759  	}
   760  }
   761  
   762  func TestNoRaceFetchAccountDoesNotRegisterAccountTwice(t *testing.T) {
   763  	sa, oa, sb, ob, _ := runTrustedGateways(t)
   764  	defer sa.Shutdown()
   765  	defer sb.Shutdown()
   766  
   767  	// Let's create a user account.
   768  	okp, _ := nkeys.FromSeed(oSeed)
   769  	akp, _ := nkeys.CreateAccount()
   770  	pub, _ := akp.PublicKey()
   771  	nac := jwt.NewAccountClaims(pub)
   772  	jwt, _ := nac.Encode(okp)
   773  	userAcc := pub
   774  
   775  	// Replace B's account resolver with one that introduces
   776  	// delay during the Fetch()
   777  	sac := &slowAccResolver{AccountResolver: sb.AccountResolver()}
   778  	sb.SetAccountResolver(sac)
   779  
   780  	// Add the account in sa and sb
   781  	addAccountToMemResolver(sa, userAcc, jwt)
   782  	addAccountToMemResolver(sb, userAcc, jwt)
   783  
   784  	// Tell the slow account resolver which account to slow down
   785  	sac.Lock()
   786  	sac.acc = userAcc
   787  	sac.Unlock()
   788  
   789  	urlA := fmt.Sprintf("nats://%s:%d", oa.Host, oa.Port)
   790  	urlB := fmt.Sprintf("nats://%s:%d", ob.Host, ob.Port)
   791  
   792  	nca, err := nats.Connect(urlA, createUserCreds(t, sa, akp))
   793  	if err != nil {
   794  		t.Fatalf("Error connecting to A: %v", err)
   795  	}
   796  	defer nca.Close()
   797  
   798  	// Since there is an optimistic send, this message will go to B
   799  	// and on processing this message, B will lookup/fetch this
   800  	// account, which can produce race with the fetch of this
   801  	// account from A's system account that sent a notification
   802  	// about this account, or with the client connect just after
   803  	// that.
   804  	nca.Publish("foo", []byte("hello"))
   805  
   806  	// Now connect and create a subscription on B
   807  	ncb, err := nats.Connect(urlB, createUserCreds(t, sb, akp))
   808  	if err != nil {
   809  		t.Fatalf("Error connecting to A: %v", err)
   810  	}
   811  	defer ncb.Close()
   812  	sub, err := ncb.SubscribeSync("foo")
   813  	if err != nil {
   814  		t.Fatalf("Error on subscribe: %v", err)
   815  	}
   816  	ncb.Flush()
   817  
   818  	// Now send messages from A and B should ultimately start to receive
   819  	// them (once the subscription has been correctly registered)
   820  	ok := false
   821  	for i := 0; i < 10; i++ {
   822  		nca.Publish("foo", []byte("hello"))
   823  		if _, err := sub.NextMsg(100 * time.Millisecond); err != nil {
   824  			continue
   825  		}
   826  		ok = true
   827  		break
   828  	}
   829  	if !ok {
   830  		t.Fatalf("B should be able to receive messages")
   831  	}
   832  
   833  	checkTmpAccounts := func(t *testing.T, s *Server) {
   834  		t.Helper()
   835  		empty := true
   836  		s.tmpAccounts.Range(func(_, _ any) bool {
   837  			empty = false
   838  			return false
   839  		})
   840  		if !empty {
   841  			t.Fatalf("tmpAccounts is not empty")
   842  		}
   843  	}
   844  	checkTmpAccounts(t, sa)
   845  	checkTmpAccounts(t, sb)
   846  }
   847  
   848  func TestNoRaceWriteDeadline(t *testing.T) {
   849  	opts := DefaultOptions()
   850  	opts.NoSystemAccount = true
   851  	opts.WriteDeadline = 30 * time.Millisecond
   852  	s := RunServer(opts)
   853  	defer s.Shutdown()
   854  
   855  	c, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", opts.Host, opts.Port), 3*time.Second)
   856  	if err != nil {
   857  		t.Fatalf("Error on connect: %v", err)
   858  	}
   859  	defer c.Close()
   860  	if _, err := c.Write([]byte("CONNECT {}\r\nPING\r\nSUB foo 1\r\n")); err != nil {
   861  		t.Fatalf("Error sending protocols to server: %v", err)
   862  	}
   863  	// Reduce socket buffer to increase reliability of getting
   864  	// write deadline errors.
   865  	c.(*net.TCPConn).SetReadBuffer(4)
   866  
   867  	url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)
   868  	sender, err := nats.Connect(url)
   869  	if err != nil {
   870  		t.Fatalf("Error on connect: %v", err)
   871  	}
   872  	defer sender.Close()
   873  
   874  	payload := make([]byte, 1000000)
   875  	total := 1000
   876  	for i := 0; i < total; i++ {
   877  		if err := sender.Publish("foo", payload); err != nil {
   878  			t.Fatalf("Error on publish: %v", err)
   879  		}
   880  	}
   881  	// Flush sender connection to ensure that all data has been sent.
   882  	if err := sender.Flush(); err != nil {
   883  		t.Fatalf("Error on flush: %v", err)
   884  	}
   885  
   886  	// At this point server should have closed connection c.
   887  
   888  	// On certain platforms, it may take more than one call before
   889  	// getting the error.
   890  	for i := 0; i < 100; i++ {
   891  		if _, err := c.Write([]byte("PUB bar 5\r\nhello\r\n")); err != nil {
   892  			// ok
   893  			return
   894  		}
   895  	}
   896  	t.Fatal("Connection should have been closed")
   897  }
   898  
   899  func TestNoRaceLeafNodeClusterNameConflictDeadlock(t *testing.T) {
   900  	o := DefaultOptions()
   901  	o.LeafNode.Port = -1
   902  	s := RunServer(o)
   903  	defer s.Shutdown()
   904  
   905  	u, err := url.Parse(fmt.Sprintf("nats://127.0.0.1:%d", o.LeafNode.Port))
   906  	if err != nil {
   907  		t.Fatalf("Error parsing url: %v", err)
   908  	}
   909  
   910  	o1 := DefaultOptions()
   911  	o1.ServerName = "A1"
   912  	o1.Cluster.Name = "clusterA"
   913  	o1.LeafNode.Remotes = []*RemoteLeafOpts{{URLs: []*url.URL{u}}}
   914  	s1 := RunServer(o1)
   915  	defer s1.Shutdown()
   916  
   917  	checkLeafNodeConnected(t, s1)
   918  
   919  	o2 := DefaultOptions()
   920  	o2.ServerName = "A2"
   921  	o2.Cluster.Name = "clusterA"
   922  	o2.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port))
   923  	o2.LeafNode.Remotes = []*RemoteLeafOpts{{URLs: []*url.URL{u}}}
   924  	s2 := RunServer(o2)
   925  	defer s2.Shutdown()
   926  
   927  	checkLeafNodeConnected(t, s2)
   928  	checkClusterFormed(t, s1, s2)
   929  
   930  	o3 := DefaultOptions()
   931  	o3.ServerName = "A3"
   932  	o3.Cluster.Name = "" // intentionally not set
   933  	o3.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", o1.Cluster.Port))
   934  	o3.LeafNode.Remotes = []*RemoteLeafOpts{{URLs: []*url.URL{u}}}
   935  	s3 := RunServer(o3)
   936  	defer s3.Shutdown()
   937  
   938  	checkLeafNodeConnected(t, s3)
   939  	checkClusterFormed(t, s1, s2, s3)
   940  }
   941  
   942  // This test is same than TestAccountAddServiceImportRace but running
   943  // without the -race flag, it would capture more easily the possible
   944  // duplicate sid, resulting in less than expected number of subscriptions
   945  // in the account's internal subscriptions map.
   946  func TestNoRaceAccountAddServiceImportRace(t *testing.T) {
   947  	TestAccountAddServiceImportRace(t)
   948  }
   949  
   950  // Similar to the routed version. Make sure we receive all of the
   951  // messages with auto-unsubscribe enabled.
   952  func TestNoRaceQueueAutoUnsubscribe(t *testing.T) {
   953  	opts := DefaultOptions()
   954  	s := RunServer(opts)
   955  	defer s.Shutdown()
   956  
   957  	nc, err := nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port))
   958  	if err != nil {
   959  		t.Fatalf("Error on connect: %v", err)
   960  	}
   961  	defer nc.Close()
   962  
   963  	rbar := int32(0)
   964  	barCb := func(m *nats.Msg) {
   965  		atomic.AddInt32(&rbar, 1)
   966  	}
   967  	rbaz := int32(0)
   968  	bazCb := func(m *nats.Msg) {
   969  		atomic.AddInt32(&rbaz, 1)
   970  	}
   971  
   972  	// Create 1000 subscriptions with auto-unsubscribe of 1.
   973  	// Do two groups, one bar and one baz.
   974  	total := 1000
   975  	for i := 0; i < total; i++ {
   976  		qsub, err := nc.QueueSubscribe("foo", "bar", barCb)
   977  		if err != nil {
   978  			t.Fatalf("Error on subscribe: %v", err)
   979  		}
   980  		if err := qsub.AutoUnsubscribe(1); err != nil {
   981  			t.Fatalf("Error on auto-unsubscribe: %v", err)
   982  		}
   983  		qsub, err = nc.QueueSubscribe("foo", "baz", bazCb)
   984  		if err != nil {
   985  			t.Fatalf("Error on subscribe: %v", err)
   986  		}
   987  		if err := qsub.AutoUnsubscribe(1); err != nil {
   988  			t.Fatalf("Error on auto-unsubscribe: %v", err)
   989  		}
   990  	}
   991  	nc.Flush()
   992  
   993  	expected := int32(total)
   994  	for i := int32(0); i < expected; i++ {
   995  		nc.Publish("foo", []byte("Don't Drop Me!"))
   996  	}
   997  	nc.Flush()
   998  
   999  	checkFor(t, 5*time.Second, 10*time.Millisecond, func() error {
  1000  		nbar := atomic.LoadInt32(&rbar)
  1001  		nbaz := atomic.LoadInt32(&rbaz)
  1002  		if nbar == expected && nbaz == expected {
  1003  			return nil
  1004  		}
  1005  		return fmt.Errorf("Did not receive all %d queue messages, received %d for 'bar' and %d for 'baz'",
  1006  			expected, atomic.LoadInt32(&rbar), atomic.LoadInt32(&rbaz))
  1007  	})
  1008  }
  1009  
  1010  func TestNoRaceAcceptLoopsDoNotLeaveOpenedConn(t *testing.T) {
  1011  	for _, test := range []struct {
  1012  		name string
  1013  		url  func(o *Options) (string, int)
  1014  	}{
  1015  		{"client", func(o *Options) (string, int) { return o.Host, o.Port }},
  1016  		{"route", func(o *Options) (string, int) { return o.Cluster.Host, o.Cluster.Port }},
  1017  		{"gateway", func(o *Options) (string, int) { return o.Gateway.Host, o.Gateway.Port }},
  1018  		{"leafnode", func(o *Options) (string, int) { return o.LeafNode.Host, o.LeafNode.Port }},
  1019  		{"websocket", func(o *Options) (string, int) { return o.Websocket.Host, o.Websocket.Port }},
  1020  	} {
  1021  		t.Run(test.name, func(t *testing.T) {
  1022  			o := DefaultOptions()
  1023  			o.DisableShortFirstPing = true
  1024  			o.Accounts = []*Account{NewAccount("$SYS")}
  1025  			o.SystemAccount = "$SYS"
  1026  			o.Cluster.Name = "abc"
  1027  			o.Cluster.Host = "127.0.0.1"
  1028  			o.Cluster.Port = -1
  1029  			o.Gateway.Name = "abc"
  1030  			o.Gateway.Host = "127.0.0.1"
  1031  			o.Gateway.Port = -1
  1032  			o.LeafNode.Host = "127.0.0.1"
  1033  			o.LeafNode.Port = -1
  1034  			o.Websocket.Host = "127.0.0.1"
  1035  			o.Websocket.Port = -1
  1036  			o.Websocket.HandshakeTimeout = 1
  1037  			o.Websocket.NoTLS = true
  1038  			s := RunServer(o)
  1039  			defer s.Shutdown()
  1040  
  1041  			host, port := test.url(o)
  1042  			url := fmt.Sprintf("%s:%d", host, port)
  1043  			var conns []net.Conn
  1044  
  1045  			wg := sync.WaitGroup{}
  1046  			wg.Add(1)
  1047  			done := make(chan struct{}, 1)
  1048  			go func() {
  1049  				defer wg.Done()
  1050  				// Have an upper limit
  1051  				for i := 0; i < 200; i++ {
  1052  					c, err := net.Dial("tcp", url)
  1053  					if err != nil {
  1054  						return
  1055  					}
  1056  					conns = append(conns, c)
  1057  					select {
  1058  					case <-done:
  1059  						return
  1060  					default:
  1061  					}
  1062  				}
  1063  			}()
  1064  			time.Sleep(15 * time.Millisecond)
  1065  			s.Shutdown()
  1066  			close(done)
  1067  			wg.Wait()
  1068  			for _, c := range conns {
  1069  				c.SetReadDeadline(time.Now().Add(2 * time.Second))
  1070  				br := bufio.NewReader(c)
  1071  				// Read INFO for connections that were accepted
  1072  				_, _, err := br.ReadLine()
  1073  				if err == nil {
  1074  					// After that, the connection should be closed,
  1075  					// so we should get an error here.
  1076  					_, _, err = br.ReadLine()
  1077  				}
  1078  				// We expect an io.EOF or any other error indicating the use of a closed
  1079  				// connection, but we should not get the timeout error.
  1080  				if ne, ok := err.(net.Error); ok && ne.Timeout() {
  1081  					err = nil
  1082  				}
  1083  				if err == nil {
  1084  					var buf [10]byte
  1085  					c.SetDeadline(time.Now().Add(2 * time.Second))
  1086  					c.Write([]byte("C"))
  1087  					_, err = c.Read(buf[:])
  1088  					if ne, ok := err.(net.Error); ok && ne.Timeout() {
  1089  						err = nil
  1090  					}
  1091  				}
  1092  				if err == nil {
  1093  					t.Fatalf("Connection should have been closed")
  1094  				}
  1095  				c.Close()
  1096  			}
  1097  		})
  1098  	}
  1099  }
  1100  
  1101  func TestNoRaceJetStreamDeleteStreamManyConsumers(t *testing.T) {
  1102  	s := RunBasicJetStreamServer(t)
  1103  	defer s.Shutdown()
  1104  
  1105  	mname := "MYS"
  1106  	mset, err := s.GlobalAccount().addStream(&StreamConfig{Name: mname, Storage: FileStorage})
  1107  	if err != nil {
  1108  		t.Fatalf("Unexpected error adding stream: %v", err)
  1109  	}
  1110  
  1111  	// This number needs to be higher than the internal sendq size to trigger what this test is testing.
  1112  	for i := 0; i < 2000; i++ {
  1113  		_, err := mset.addConsumer(&ConsumerConfig{
  1114  			Durable:        fmt.Sprintf("D-%d", i),
  1115  			DeliverSubject: fmt.Sprintf("deliver.%d", i),
  1116  		})
  1117  		if err != nil {
  1118  			t.Fatalf("Error creating consumer: %v", err)
  1119  		}
  1120  	}
  1121  	// With bug this would not return and would hang.
  1122  	mset.delete()
  1123  }
  1124  
  1125  // We used to swap accounts on an inbound message when processing service imports.
  1126  // Until JetStream this was kinda ok, but with JetStream we can have pull consumers
  1127  // trying to access the clients account in another Go routine now which causes issues.
  1128  // This is not limited to the case above, its just the one that exposed it.
  1129  // This test is to show that issue and that the fix works, meaning we no longer swap c.acc.
  1130  func TestNoRaceJetStreamServiceImportAccountSwapIssue(t *testing.T) {
  1131  	s := RunBasicJetStreamServer(t)
  1132  	defer s.Shutdown()
  1133  
  1134  	// Client based API
  1135  	nc, js := jsClientConnect(t, s)
  1136  	defer nc.Close()
  1137  
  1138  	_, err := js.AddStream(&nats.StreamConfig{
  1139  		Name:     "TEST",
  1140  		Subjects: []string{"foo", "bar"},
  1141  	})
  1142  	if err != nil {
  1143  		t.Fatalf("Unexpected error: %v", err)
  1144  	}
  1145  
  1146  	sub, err := js.PullSubscribe("foo", "dlc")
  1147  	if err != nil {
  1148  		t.Fatalf("Unexpected error: %v", err)
  1149  	}
  1150  
  1151  	beforeSubs := s.NumSubscriptions()
  1152  
  1153  	// How long we want both sides to run.
  1154  	timeout := time.Now().Add(3 * time.Second)
  1155  	errs := make(chan error, 1)
  1156  
  1157  	// Publishing side, which will signal the consumer that is waiting and which will access c.acc. If publish
  1158  	// operation runs concurrently we will catch c.acc being $SYS some of the time.
  1159  	go func() {
  1160  		time.Sleep(100 * time.Millisecond)
  1161  		for time.Now().Before(timeout) {
  1162  			// This will signal the delivery of the pull messages.
  1163  			js.Publish("foo", []byte("Hello"))
  1164  			// This will swap the account because of JetStream service import.
  1165  			// We can get an error here with the bug or not.
  1166  			if _, err := js.StreamInfo("TEST"); err != nil {
  1167  				errs <- err
  1168  				return
  1169  			}
  1170  		}
  1171  		errs <- nil
  1172  	}()
  1173  
  1174  	// Pull messages flow.
  1175  	var received int
  1176  	for time.Now().Before(timeout.Add(2 * time.Second)) {
  1177  		if msgs, err := sub.Fetch(1, nats.MaxWait(200*time.Millisecond)); err == nil {
  1178  			for _, m := range msgs {
  1179  				received++
  1180  				m.AckSync()
  1181  			}
  1182  		} else {
  1183  			break
  1184  		}
  1185  	}
  1186  	// Wait on publisher Go routine and check for errors.
  1187  	if err := <-errs; err != nil {
  1188  		t.Fatalf("Unexpected error: %v", err)
  1189  	}
  1190  	// Double check all received.
  1191  	si, err := js.StreamInfo("TEST")
  1192  	if err != nil {
  1193  		t.Fatalf("Unexpected error: %v", err)
  1194  	}
  1195  	if int(si.State.Msgs) != received {
  1196  		t.Fatalf("Expected to receive %d msgs, only got %d", si.State.Msgs, received)
  1197  	}
  1198  	// Now check for leaked subs from the fetch call above. That is what we first saw from the bug.
  1199  	if afterSubs := s.NumSubscriptions(); afterSubs != beforeSubs {
  1200  		t.Fatalf("Leaked subscriptions: %d before, %d after", beforeSubs, afterSubs)
  1201  	}
  1202  }
  1203  
  1204  func TestNoRaceJetStreamAPIStreamListPaging(t *testing.T) {
  1205  	s := RunBasicJetStreamServer(t)
  1206  	defer s.Shutdown()
  1207  
  1208  	// Create 2X limit
  1209  	streamsNum := 2 * JSApiNamesLimit
  1210  	for i := 1; i <= streamsNum; i++ {
  1211  		name := fmt.Sprintf("STREAM-%06d", i)
  1212  		cfg := StreamConfig{Name: name, Storage: MemoryStorage}
  1213  		_, err := s.GlobalAccount().addStream(&cfg)
  1214  		if err != nil {
  1215  			t.Fatalf("Unexpected error adding stream: %v", err)
  1216  		}
  1217  	}
  1218  
  1219  	// Client for API requests.
  1220  	nc := clientConnectToServer(t, s)
  1221  	defer nc.Close()
  1222  
  1223  	reqList := func(offset int) []byte {
  1224  		t.Helper()
  1225  		var req []byte
  1226  		if offset > 0 {
  1227  			req, _ = json.Marshal(&ApiPagedRequest{Offset: offset})
  1228  		}
  1229  		resp, err := nc.Request(JSApiStreams, req, time.Second)
  1230  		if err != nil {
  1231  			t.Fatalf("Unexpected error getting stream list: %v", err)
  1232  		}
  1233  		return resp.Data
  1234  	}
  1235  
  1236  	checkResp := func(resp []byte, expectedLen, expectedOffset int) {
  1237  		t.Helper()
  1238  		var listResponse JSApiStreamNamesResponse
  1239  		if err := json.Unmarshal(resp, &listResponse); err != nil {
  1240  			t.Fatalf("Unexpected error: %v", err)
  1241  		}
  1242  		if len(listResponse.Streams) != expectedLen {
  1243  			t.Fatalf("Expected only %d streams but got %d", expectedLen, len(listResponse.Streams))
  1244  		}
  1245  		if listResponse.Total != streamsNum {
  1246  			t.Fatalf("Expected total to be %d but got %d", streamsNum, listResponse.Total)
  1247  		}
  1248  		if listResponse.Offset != expectedOffset {
  1249  			t.Fatalf("Expected offset to be %d but got %d", expectedOffset, listResponse.Offset)
  1250  		}
  1251  		if expectedLen < 1 {
  1252  			return
  1253  		}
  1254  		// Make sure we get the right stream.
  1255  		sname := fmt.Sprintf("STREAM-%06d", expectedOffset+1)
  1256  		if listResponse.Streams[0] != sname {
  1257  			t.Fatalf("Expected stream %q to be first, got %q", sname, listResponse.Streams[0])
  1258  		}
  1259  	}
  1260  
  1261  	checkResp(reqList(0), JSApiNamesLimit, 0)
  1262  	checkResp(reqList(JSApiNamesLimit), JSApiNamesLimit, JSApiNamesLimit)
  1263  	checkResp(reqList(streamsNum), 0, streamsNum)
  1264  	checkResp(reqList(streamsNum-22), 22, streamsNum-22)
  1265  	checkResp(reqList(streamsNum+22), 0, streamsNum)
  1266  }
  1267  
  1268  func TestNoRaceJetStreamAPIConsumerListPaging(t *testing.T) {
  1269  	s := RunBasicJetStreamServer(t)
  1270  	defer s.Shutdown()
  1271  
  1272  	sname := "MYSTREAM"
  1273  	mset, err := s.GlobalAccount().addStream(&StreamConfig{Name: sname})
  1274  	if err != nil {
  1275  		t.Fatalf("Unexpected error adding stream: %v", err)
  1276  	}
  1277  
  1278  	// Client for API requests.
  1279  	nc := clientConnectToServer(t, s)
  1280  	defer nc.Close()
  1281  
  1282  	consumersNum := JSApiNamesLimit
  1283  	for i := 1; i <= consumersNum; i++ {
  1284  		dsubj := fmt.Sprintf("d.%d", i)
  1285  		sub, _ := nc.SubscribeSync(dsubj)
  1286  		defer sub.Unsubscribe()
  1287  		nc.Flush()
  1288  
  1289  		_, err := mset.addConsumer(&ConsumerConfig{DeliverSubject: dsubj})
  1290  		if err != nil {
  1291  			t.Fatalf("Unexpected error: %v", err)
  1292  		}
  1293  	}
  1294  
  1295  	reqListSubject := fmt.Sprintf(JSApiConsumersT, sname)
  1296  	reqList := func(offset int) []byte {
  1297  		t.Helper()
  1298  		var req []byte
  1299  		if offset > 0 {
  1300  			req, _ = json.Marshal(&JSApiConsumersRequest{ApiPagedRequest: ApiPagedRequest{Offset: offset}})
  1301  		}
  1302  		resp, err := nc.Request(reqListSubject, req, time.Second)
  1303  		if err != nil {
  1304  			t.Fatalf("Unexpected error getting stream list: %v", err)
  1305  		}
  1306  		return resp.Data
  1307  	}
  1308  
  1309  	checkResp := func(resp []byte, expectedLen, expectedOffset int) {
  1310  		t.Helper()
  1311  		var listResponse JSApiConsumerNamesResponse
  1312  		if err := json.Unmarshal(resp, &listResponse); err != nil {
  1313  			t.Fatalf("Unexpected error: %v", err)
  1314  		}
  1315  		if len(listResponse.Consumers) != expectedLen {
  1316  			t.Fatalf("Expected only %d streams but got %d", expectedLen, len(listResponse.Consumers))
  1317  		}
  1318  		if listResponse.Total != consumersNum {
  1319  			t.Fatalf("Expected total to be %d but got %d", consumersNum, listResponse.Total)
  1320  		}
  1321  		if listResponse.Offset != expectedOffset {
  1322  			t.Fatalf("Expected offset to be %d but got %d", expectedOffset, listResponse.Offset)
  1323  		}
  1324  	}
  1325  
  1326  	checkResp(reqList(0), JSApiNamesLimit, 0)
  1327  	checkResp(reqList(consumersNum-22), 22, consumersNum-22)
  1328  	checkResp(reqList(consumersNum+22), 0, consumersNum)
  1329  }
  1330  
  1331  func TestNoRaceJetStreamWorkQueueLoadBalance(t *testing.T) {
  1332  	s := RunBasicJetStreamServer(t)
  1333  	defer s.Shutdown()
  1334  
  1335  	mname := "MY_MSG_SET"
  1336  	mset, err := s.GlobalAccount().addStream(&StreamConfig{Name: mname, Subjects: []string{"foo", "bar"}})
  1337  	if err != nil {
  1338  		t.Fatalf("Unexpected error adding message set: %v", err)
  1339  	}
  1340  	defer mset.delete()
  1341  
  1342  	// Create basic work queue mode consumer.
  1343  	oname := "WQ"
  1344  	o, err := mset.addConsumer(&ConsumerConfig{Durable: oname, AckPolicy: AckExplicit})
  1345  	if err != nil {
  1346  		t.Fatalf("Expected no error with durable, got %v", err)
  1347  	}
  1348  	defer o.delete()
  1349  
  1350  	// To send messages.
  1351  	nc := clientConnectToServer(t, s)
  1352  	defer nc.Close()
  1353  
  1354  	// For normal work queue semantics, you send requests to the subject with stream and consumer name.
  1355  	reqMsgSubj := o.requestNextMsgSubject()
  1356  
  1357  	numWorkers := 25
  1358  	counts := make([]int32, numWorkers)
  1359  	var received int32
  1360  
  1361  	rwg := &sync.WaitGroup{}
  1362  	rwg.Add(numWorkers)
  1363  
  1364  	wg := &sync.WaitGroup{}
  1365  	wg.Add(numWorkers)
  1366  	ch := make(chan bool)
  1367  
  1368  	toSend := 1000
  1369  
  1370  	for i := 0; i < numWorkers; i++ {
  1371  		nc := clientConnectToServer(t, s)
  1372  		defer nc.Close()
  1373  
  1374  		go func(index int32) {
  1375  			rwg.Done()
  1376  			defer wg.Done()
  1377  			<-ch
  1378  
  1379  			for counter := &counts[index]; ; {
  1380  				m, err := nc.Request(reqMsgSubj, nil, 100*time.Millisecond)
  1381  				if err != nil {
  1382  					return
  1383  				}
  1384  				m.Respond(nil)
  1385  				atomic.AddInt32(counter, 1)
  1386  				if total := atomic.AddInt32(&received, 1); total >= int32(toSend) {
  1387  					return
  1388  				}
  1389  			}
  1390  		}(int32(i))
  1391  	}
  1392  
  1393  	// Wait for requestors to be ready
  1394  	rwg.Wait()
  1395  	close(ch)
  1396  
  1397  	sendSubj := "bar"
  1398  	for i := 0; i < toSend; i++ {
  1399  		sendStreamMsg(t, nc, sendSubj, "Hello World!")
  1400  	}
  1401  
  1402  	// Wait for test to complete.
  1403  	wg.Wait()
  1404  
  1405  	target := toSend / numWorkers
  1406  	delta := target/2 + 5
  1407  	low, high := int32(target-delta), int32(target+delta)
  1408  
  1409  	for i := 0; i < numWorkers; i++ {
  1410  		if msgs := atomic.LoadInt32(&counts[i]); msgs < low || msgs > high {
  1411  			t.Fatalf("Messages received for worker [%d] too far off from target of %d, got %d", i, target, msgs)
  1412  		}
  1413  	}
  1414  }
  1415  
  1416  func TestNoRaceJetStreamClusterLargeStreamInlineCatchup(t *testing.T) {
  1417  	c := createJetStreamClusterExplicit(t, "LSS", 3)
  1418  	defer c.shutdown()
  1419  
  1420  	// Client based API
  1421  	s := c.randomServer()
  1422  	nc, js := jsClientConnect(t, s)
  1423  	defer nc.Close()
  1424  
  1425  	_, err := js.AddStream(&nats.StreamConfig{
  1426  		Name:     "TEST",
  1427  		Subjects: []string{"foo"},
  1428  		Replicas: 3,
  1429  	})
  1430  	if err != nil {
  1431  		t.Fatalf("Unexpected error: %v", err)
  1432  	}
  1433  
  1434  	sr := c.randomNonStreamLeader("$G", "TEST")
  1435  	sr.Shutdown()
  1436  
  1437  	// In case sr was meta leader.
  1438  	c.waitOnLeader()
  1439  
  1440  	msg, toSend := []byte("Hello JS Clustering"), 5000
  1441  
  1442  	// Now fill up stream.
  1443  	for i := 0; i < toSend; i++ {
  1444  		if _, err = js.Publish("foo", msg); err != nil {
  1445  			t.Fatalf("Unexpected publish error: %v", err)
  1446  		}
  1447  	}
  1448  	si, err := js.StreamInfo("TEST")
  1449  	if err != nil {
  1450  		t.Fatalf("Unexpected error: %v", err)
  1451  	}
  1452  	// Check active state as well, shows that the owner answered.
  1453  	if si.State.Msgs != uint64(toSend) {
  1454  		t.Fatalf("Expected %d msgs, got bad state: %+v", toSend, si.State)
  1455  	}
  1456  
  1457  	// Kill our current leader to make just 2.
  1458  	c.streamLeader("$G", "TEST").Shutdown()
  1459  
  1460  	// Now restart the shutdown peer and wait for it to be current.
  1461  	sr = c.restartServer(sr)
  1462  	c.waitOnStreamCurrent(sr, "$G", "TEST")
  1463  
  1464  	// Ask other servers to stepdown as leader so that sr becomes the leader.
  1465  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  1466  		c.waitOnStreamLeader("$G", "TEST")
  1467  		if sl := c.streamLeader("$G", "TEST"); sl != sr {
  1468  			sl.JetStreamStepdownStream("$G", "TEST")
  1469  			return fmt.Errorf("Server %s is not leader yet", sr)
  1470  		}
  1471  		return nil
  1472  	})
  1473  
  1474  	si, err = js.StreamInfo("TEST")
  1475  	if err != nil {
  1476  		t.Fatalf("Unexpected error: %v", err)
  1477  	}
  1478  	// Check that we have all of our messsages stored.
  1479  	// Wait for a bit for upper layers to process.
  1480  	checkFor(t, 2*time.Second, 100*time.Millisecond, func() error {
  1481  		if si.State.Msgs != uint64(toSend) {
  1482  			return fmt.Errorf("Expected %d msgs, got %d", toSend, si.State.Msgs)
  1483  		}
  1484  		return nil
  1485  	})
  1486  }
  1487  
  1488  func TestNoRaceJetStreamClusterStreamCreateAndLostQuorum(t *testing.T) {
  1489  	c := createJetStreamClusterExplicit(t, "R5S", 3)
  1490  	defer c.shutdown()
  1491  
  1492  	// Client based API
  1493  	s := c.randomServer()
  1494  	nc, js := jsClientConnect(t, s)
  1495  	defer nc.Close()
  1496  
  1497  	sub, err := nc.SubscribeSync(JSAdvisoryStreamQuorumLostPre + ".*")
  1498  	if err != nil {
  1499  		t.Fatalf("Unexpected error: %v", err)
  1500  	}
  1501  
  1502  	if _, err := js.AddStream(&nats.StreamConfig{Name: "NO-LQ-START", Replicas: 3}); err != nil {
  1503  		t.Fatalf("Unexpected error: %v", err)
  1504  	}
  1505  	c.waitOnStreamLeader("$G", "NO-LQ-START")
  1506  	checkSubsPending(t, sub, 0)
  1507  
  1508  	c.stopAll()
  1509  	// Start up the one we were connected to first and wait for it to be connected.
  1510  	s = c.restartServer(s)
  1511  	nc, err = nats.Connect(s.ClientURL())
  1512  	if err != nil {
  1513  		t.Fatalf("Failed to create client: %v", err)
  1514  	}
  1515  	defer nc.Close()
  1516  
  1517  	sub, err = nc.SubscribeSync(JSAdvisoryStreamQuorumLostPre + ".*")
  1518  	if err != nil {
  1519  		t.Fatalf("Unexpected error: %v", err)
  1520  	}
  1521  	nc.Flush()
  1522  
  1523  	c.restartAll()
  1524  	c.waitOnStreamLeader("$G", "NO-LQ-START")
  1525  
  1526  	checkSubsPending(t, sub, 0)
  1527  }
  1528  
  1529  func TestNoRaceJetStreamSuperClusterMirrors(t *testing.T) {
  1530  	sc := createJetStreamSuperCluster(t, 3, 3)
  1531  	defer sc.shutdown()
  1532  
  1533  	// Client based API
  1534  	s := sc.clusterForName("C2").randomServer()
  1535  	nc, js := jsClientConnect(t, s)
  1536  	defer nc.Close()
  1537  
  1538  	// Create source stream.
  1539  	_, err := js.AddStream(&nats.StreamConfig{Name: "S1", Subjects: []string{"foo", "bar"}, Replicas: 3, Placement: &nats.Placement{Cluster: "C2"}})
  1540  	if err != nil {
  1541  		t.Fatalf("Unexpected error: %v", err)
  1542  	}
  1543  
  1544  	// Needed while Go client does not have mirror support.
  1545  	createStream := func(cfg *nats.StreamConfig) {
  1546  		t.Helper()
  1547  		if _, err := js.AddStream(cfg); err != nil {
  1548  			t.Fatalf("Unexpected error: %+v", err)
  1549  		}
  1550  	}
  1551  
  1552  	// Send 100 messages.
  1553  	for i := 0; i < 100; i++ {
  1554  		if _, err := js.Publish("foo", []byte("MIRRORS!")); err != nil {
  1555  			t.Fatalf("Unexpected publish error: %v", err)
  1556  		}
  1557  	}
  1558  
  1559  	createStream(&nats.StreamConfig{
  1560  		Name:      "M1",
  1561  		Mirror:    &nats.StreamSource{Name: "S1"},
  1562  		Placement: &nats.Placement{Cluster: "C1"},
  1563  	})
  1564  
  1565  	checkFor(t, 2*time.Second, 100*time.Millisecond, func() error {
  1566  		si, err := js.StreamInfo("M1")
  1567  		if err != nil {
  1568  			t.Fatalf("Unexpected error: %v", err)
  1569  		}
  1570  		if si.State.Msgs != 100 {
  1571  			return fmt.Errorf("Expected 100 msgs, got state: %+v", si.State)
  1572  		}
  1573  		return nil
  1574  	})
  1575  
  1576  	// Purge the source stream.
  1577  	if err := js.PurgeStream("S1"); err != nil {
  1578  		t.Fatalf("Unexpected purge error: %v", err)
  1579  	}
  1580  	// Send 50 more msgs now.
  1581  	for i := 0; i < 50; i++ {
  1582  		if _, err := js.Publish("bar", []byte("OK")); err != nil {
  1583  			t.Fatalf("Unexpected publish error: %v", err)
  1584  		}
  1585  	}
  1586  
  1587  	createStream(&nats.StreamConfig{
  1588  		Name:      "M2",
  1589  		Mirror:    &nats.StreamSource{Name: "S1"},
  1590  		Replicas:  3,
  1591  		Placement: &nats.Placement{Cluster: "C3"},
  1592  	})
  1593  
  1594  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
  1595  		si, err := js.StreamInfo("M2")
  1596  		if err != nil {
  1597  			t.Fatalf("Unexpected error: %v", err)
  1598  		}
  1599  		if si.State.Msgs != 50 {
  1600  			return fmt.Errorf("Expected 50 msgs, got state: %+v", si.State)
  1601  		}
  1602  		if si.State.FirstSeq != 101 {
  1603  			return fmt.Errorf("Expected start seq of 101, got state: %+v", si.State)
  1604  		}
  1605  		return nil
  1606  	})
  1607  
  1608  	sl := sc.clusterForName("C3").streamLeader("$G", "M2")
  1609  	doneCh := make(chan bool)
  1610  
  1611  	// Now test that if the mirror get's interrupted that it picks up where it left off etc.
  1612  	go func() {
  1613  		// Send 100 more messages.
  1614  		for i := 0; i < 100; i++ {
  1615  			if _, err := js.Publish("foo", []byte("MIRRORS!")); err != nil {
  1616  				t.Errorf("Unexpected publish on %d error: %v", i, err)
  1617  			}
  1618  			time.Sleep(2 * time.Millisecond)
  1619  		}
  1620  		doneCh <- true
  1621  	}()
  1622  
  1623  	time.Sleep(20 * time.Millisecond)
  1624  	sl.Shutdown()
  1625  
  1626  	<-doneCh
  1627  	sc.clusterForName("C3").waitOnStreamLeader("$G", "M2")
  1628  
  1629  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
  1630  		si, err := js.StreamInfo("M2")
  1631  		if err != nil {
  1632  			t.Fatalf("Unexpected error: %v", err)
  1633  		}
  1634  		if si.State.Msgs != 150 {
  1635  			return fmt.Errorf("Expected 150 msgs, got state: %+v", si.State)
  1636  		}
  1637  		if si.State.FirstSeq != 101 {
  1638  			return fmt.Errorf("Expected start seq of 101, got state: %+v", si.State)
  1639  		}
  1640  		return nil
  1641  	})
  1642  }
  1643  
  1644  func TestNoRaceJetStreamSuperClusterMixedModeMirrors(t *testing.T) {
  1645  	// Unlike the similar sources test, this test is not reliably catching the bug
  1646  	// that would cause mirrors to not have the expected messages count.
  1647  	// Still, adding this test in case we have a regression and we are lucky in
  1648  	// getting the failure while running this.
  1649  
  1650  	tmpl := `
  1651  		listen: 127.0.0.1:-1
  1652  		server_name: %s
  1653  		jetstream: { domain: ngs, max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  1654  		leaf: { listen: 127.0.0.1:-1 }
  1655  
  1656  		cluster {
  1657  			name: %s
  1658  			listen: 127.0.0.1:%d
  1659  			routes = [%s]
  1660  		}
  1661  
  1662  		accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
  1663  	`
  1664  	sc := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 7, 4,
  1665  		func(serverName, clusterName, storeDir, conf string) string {
  1666  			sname := serverName[strings.Index(serverName, "-")+1:]
  1667  			switch sname {
  1668  			case "S5", "S6", "S7":
  1669  				conf = strings.ReplaceAll(conf, "jetstream: { ", "#jetstream: { ")
  1670  			default:
  1671  				conf = strings.ReplaceAll(conf, "leaf: { ", "#leaf: { ")
  1672  			}
  1673  			return conf
  1674  		}, nil)
  1675  	defer sc.shutdown()
  1676  
  1677  	// Connect our client to a non JS server
  1678  	c := sc.randomCluster()
  1679  	var s *Server
  1680  	for {
  1681  		if as := c.randomServer(); !as.JetStreamEnabled() {
  1682  			s = as
  1683  			break
  1684  		}
  1685  	}
  1686  	nc, js := jsClientConnect(t, s)
  1687  	defer nc.Close()
  1688  
  1689  	numStreams := 10
  1690  	toSend := 1000
  1691  	errCh := make(chan error, numStreams)
  1692  	wg := sync.WaitGroup{}
  1693  	wg.Add(numStreams)
  1694  	// Create 10 origin streams
  1695  	for i := 0; i < 10; i++ {
  1696  		go func(idx int) {
  1697  			defer wg.Done()
  1698  			name := fmt.Sprintf("S%d", idx+1)
  1699  			if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil {
  1700  				errCh <- fmt.Errorf("unexpected error: %v", err)
  1701  				return
  1702  			}
  1703  			c.waitOnStreamLeader(globalAccountName, name)
  1704  			// Load them up with a bunch of messages.
  1705  			for n := 0; n < toSend; n++ {
  1706  				m := nats.NewMsg(name)
  1707  				m.Header.Set("stream", name)
  1708  				m.Header.Set("idx", strconv.FormatInt(int64(n+1), 10))
  1709  				if err := nc.PublishMsg(m); err != nil {
  1710  					errCh <- fmt.Errorf("unexpected publish error: %v", err)
  1711  				}
  1712  			}
  1713  		}(i)
  1714  	}
  1715  	wg.Wait()
  1716  	select {
  1717  	case err := <-errCh:
  1718  		t.Fatal(err)
  1719  	default:
  1720  	}
  1721  
  1722  	for i := 0; i < 3; i++ {
  1723  		// Now create our mirrors
  1724  		wg := sync.WaitGroup{}
  1725  		mirrorsCount := 10
  1726  		wg.Add(mirrorsCount)
  1727  		errCh := make(chan error, 1)
  1728  		for m := 0; m < mirrorsCount; m++ {
  1729  			sname := fmt.Sprintf("S%d", rand.Intn(10)+1)
  1730  			go func(sname string, mirrorIdx int) {
  1731  				defer wg.Done()
  1732  				if _, err := js.AddStream(&nats.StreamConfig{
  1733  					Name:     fmt.Sprintf("M%d", mirrorIdx),
  1734  					Mirror:   &nats.StreamSource{Name: sname},
  1735  					Replicas: 3,
  1736  				}); err != nil {
  1737  					select {
  1738  					case errCh <- err:
  1739  					default:
  1740  					}
  1741  				}
  1742  			}(sname, m+1)
  1743  		}
  1744  		wg.Wait()
  1745  		select {
  1746  		case err := <-errCh:
  1747  			t.Fatalf("Error creating mirrors: %v", err)
  1748  		default:
  1749  		}
  1750  		// Now check the mirrors have all expected messages
  1751  		for m := 0; m < mirrorsCount; m++ {
  1752  			name := fmt.Sprintf("M%d", m+1)
  1753  			checkFor(t, 15*time.Second, 500*time.Millisecond, func() error {
  1754  				si, err := js.StreamInfo(name)
  1755  				if err != nil {
  1756  					t.Fatalf("Could not retrieve stream info")
  1757  				}
  1758  				if si.State.Msgs != uint64(toSend) {
  1759  					return fmt.Errorf("Expected %d msgs, got state: %+v", toSend, si.State)
  1760  				}
  1761  				return nil
  1762  			})
  1763  			err := js.DeleteStream(name)
  1764  			require_NoError(t, err)
  1765  		}
  1766  	}
  1767  }
  1768  
  1769  func TestNoRaceJetStreamSuperClusterSources(t *testing.T) {
  1770  	sc := createJetStreamSuperCluster(t, 3, 3)
  1771  	defer sc.shutdown()
  1772  
  1773  	// Client based API
  1774  	s := sc.clusterForName("C1").randomServer()
  1775  	nc, js := jsClientConnect(t, s)
  1776  	defer nc.Close()
  1777  
  1778  	// Create our source streams.
  1779  	for _, sname := range []string{"foo", "bar", "baz"} {
  1780  		if _, err := js.AddStream(&nats.StreamConfig{Name: sname, Replicas: 1}); err != nil {
  1781  			t.Fatalf("Unexpected error: %v", err)
  1782  		}
  1783  	}
  1784  
  1785  	sendBatch := func(subject string, n int) {
  1786  		for i := 0; i < n; i++ {
  1787  			msg := fmt.Sprintf("MSG-%d", i+1)
  1788  			if _, err := js.Publish(subject, []byte(msg)); err != nil {
  1789  				t.Fatalf("Unexpected publish error: %v", err)
  1790  			}
  1791  		}
  1792  	}
  1793  	// Populate each one.
  1794  	sendBatch("foo", 10)
  1795  	sendBatch("bar", 15)
  1796  	sendBatch("baz", 25)
  1797  
  1798  	// Needed while Go client does not have mirror support for creating mirror or source streams.
  1799  	createStream := func(cfg *nats.StreamConfig) {
  1800  		t.Helper()
  1801  		if _, err := js.AddStream(cfg); err != nil {
  1802  			t.Fatalf("Unexpected error: %+v", err)
  1803  		}
  1804  	}
  1805  
  1806  	cfg := &nats.StreamConfig{
  1807  		Name: "MS",
  1808  		Sources: []*nats.StreamSource{
  1809  			{Name: "foo"},
  1810  			{Name: "bar"},
  1811  			{Name: "baz"},
  1812  		},
  1813  	}
  1814  
  1815  	createStream(cfg)
  1816  	time.Sleep(time.Second)
  1817  
  1818  	// Faster timeout since we loop below checking for condition.
  1819  	js2, err := nc.JetStream(nats.MaxWait(50 * time.Millisecond))
  1820  	if err != nil {
  1821  		t.Fatalf("Unexpected error: %v", err)
  1822  	}
  1823  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
  1824  		si, err := js2.StreamInfo("MS")
  1825  		if err != nil {
  1826  			return err
  1827  		}
  1828  		if si.State.Msgs != 50 {
  1829  			return fmt.Errorf("Expected 50 msgs, got state: %+v", si.State)
  1830  		}
  1831  		return nil
  1832  	})
  1833  
  1834  	// Purge the source streams.
  1835  	for _, sname := range []string{"foo", "bar", "baz"} {
  1836  		if err := js.PurgeStream(sname); err != nil {
  1837  			t.Fatalf("Unexpected purge error: %v", err)
  1838  		}
  1839  	}
  1840  
  1841  	if err := js.DeleteStream("MS"); err != nil {
  1842  		t.Fatalf("Unexpected delete error: %v", err)
  1843  	}
  1844  
  1845  	// Send more msgs now.
  1846  	sendBatch("foo", 10)
  1847  	sendBatch("bar", 15)
  1848  	sendBatch("baz", 25)
  1849  
  1850  	cfg = &nats.StreamConfig{
  1851  		Name: "MS2",
  1852  		Sources: []*nats.StreamSource{
  1853  			{Name: "foo"},
  1854  			{Name: "bar"},
  1855  			{Name: "baz"},
  1856  		},
  1857  		Replicas:  3,
  1858  		Placement: &nats.Placement{Cluster: "C3"},
  1859  	}
  1860  
  1861  	createStream(cfg)
  1862  
  1863  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  1864  		si, err := js2.StreamInfo("MS2")
  1865  		if err != nil {
  1866  			t.Fatalf("Unexpected error: %v", err)
  1867  		}
  1868  		if si.State.Msgs != 50 {
  1869  			return fmt.Errorf("Expected 50 msgs, got state: %+v", si.State)
  1870  		}
  1871  		if si.State.FirstSeq != 1 {
  1872  			return fmt.Errorf("Expected start seq of 1, got state: %+v", si.State)
  1873  		}
  1874  		return nil
  1875  	})
  1876  
  1877  	sl := sc.clusterForName("C3").streamLeader("$G", "MS2")
  1878  	doneCh := make(chan bool)
  1879  
  1880  	if sl == sc.leader() {
  1881  		nc.Request(JSApiLeaderStepDown, nil, time.Second)
  1882  		sc.waitOnLeader()
  1883  	}
  1884  
  1885  	// Now test that if the mirror get's interrupted that it picks up where it left off etc.
  1886  	go func() {
  1887  		// Send 50 more messages each.
  1888  		for i := 0; i < 50; i++ {
  1889  			msg := fmt.Sprintf("R-MSG-%d", i+1)
  1890  			for _, sname := range []string{"foo", "bar", "baz"} {
  1891  				m := nats.NewMsg(sname)
  1892  				m.Data = []byte(msg)
  1893  				if _, err := js.PublishMsg(m); err != nil {
  1894  					t.Errorf("Unexpected publish error: %v", err)
  1895  				}
  1896  			}
  1897  			time.Sleep(2 * time.Millisecond)
  1898  		}
  1899  		doneCh <- true
  1900  	}()
  1901  
  1902  	time.Sleep(20 * time.Millisecond)
  1903  	sl.Shutdown()
  1904  
  1905  	sc.clusterForName("C3").waitOnStreamLeader("$G", "MS2")
  1906  	<-doneCh
  1907  
  1908  	checkFor(t, 20*time.Second, time.Second, func() error {
  1909  		si, err := js2.StreamInfo("MS2")
  1910  		if err != nil {
  1911  			return err
  1912  		}
  1913  		if si.State.Msgs != 200 {
  1914  			return fmt.Errorf("Expected 200 msgs, got state: %+v", si.State)
  1915  		}
  1916  		return nil
  1917  	})
  1918  }
  1919  
  1920  func TestNoRaceJetStreamClusterSourcesMuxd(t *testing.T) {
  1921  	c := createJetStreamClusterExplicit(t, "SMUX", 3)
  1922  	defer c.shutdown()
  1923  
  1924  	// Client for API requests.
  1925  	nc, js := jsClientConnect(t, c.randomServer())
  1926  	defer nc.Close()
  1927  
  1928  	// Send in 10000 messages.
  1929  	msg, toSend := make([]byte, 1024), 10000
  1930  	crand.Read(msg)
  1931  
  1932  	var sources []*nats.StreamSource
  1933  	// Create 10 origin streams.
  1934  	for i := 1; i <= 10; i++ {
  1935  		name := fmt.Sprintf("O-%d", i)
  1936  		if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil {
  1937  			t.Fatalf("Unexpected error: %v", err)
  1938  		}
  1939  		// Make sure we have a leader before publishing, especially since we use
  1940  		// non JS publisher, we would not know if the messages made it to those
  1941  		// streams or not.
  1942  		c.waitOnStreamLeader(globalAccountName, name)
  1943  		// Load them up with a bunch of messages.
  1944  		for n := 0; n < toSend; n++ {
  1945  			if err := nc.Publish(name, msg); err != nil {
  1946  				t.Fatalf("Unexpected publish error: %v", err)
  1947  			}
  1948  		}
  1949  		sources = append(sources, &nats.StreamSource{Name: name})
  1950  	}
  1951  
  1952  	// Now create our downstream stream that sources from all of them.
  1953  	if _, err := js.AddStream(&nats.StreamConfig{Name: "S", Replicas: 2, Sources: sources}); err != nil {
  1954  		t.Fatalf("Unexpected error: %v", err)
  1955  	}
  1956  
  1957  	checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  1958  		si, err := js.StreamInfo("S")
  1959  		if err != nil {
  1960  			t.Fatalf("Could not retrieve stream info")
  1961  		}
  1962  		if si.State.Msgs != uint64(10*toSend) {
  1963  			return fmt.Errorf("Expected %d msgs, got state: %+v", toSend*10, si.State)
  1964  		}
  1965  		return nil
  1966  	})
  1967  
  1968  }
  1969  
  1970  func TestNoRaceJetStreamSuperClusterMixedModeSources(t *testing.T) {
  1971  	tmpl := `
  1972  		listen: 127.0.0.1:-1
  1973  		server_name: %s
  1974  		jetstream: { domain: ngs, max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  1975  		leaf: { listen: 127.0.0.1:-1 }
  1976  
  1977  		cluster {
  1978  			name: %s
  1979  			listen: 127.0.0.1:%d
  1980  			routes = [%s]
  1981  		}
  1982  
  1983  		accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
  1984  	`
  1985  	sc := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 7, 2,
  1986  		func(serverName, clusterName, storeDir, conf string) string {
  1987  			sname := serverName[strings.Index(serverName, "-")+1:]
  1988  			switch sname {
  1989  			case "S5", "S6", "S7":
  1990  				conf = strings.ReplaceAll(conf, "jetstream: { ", "#jetstream: { ")
  1991  			default:
  1992  				conf = strings.ReplaceAll(conf, "leaf: { ", "#leaf: { ")
  1993  			}
  1994  			return conf
  1995  		}, nil)
  1996  	defer sc.shutdown()
  1997  	// Connect our client to a non JS server
  1998  	c := sc.randomCluster()
  1999  	var s *Server
  2000  	for {
  2001  		if as := c.randomServer(); !as.JetStreamEnabled() {
  2002  			s = as
  2003  			break
  2004  		}
  2005  	}
  2006  	nc, js := jsClientConnect(t, s)
  2007  	defer nc.Close()
  2008  
  2009  	numStreams := 100
  2010  	toSend := 1000
  2011  	var sources []*nats.StreamSource
  2012  	errCh := make(chan error, numStreams)
  2013  	srcCh := make(chan *nats.StreamSource, numStreams)
  2014  	wg := sync.WaitGroup{}
  2015  	wg.Add(numStreams)
  2016  	// Create 100 origin streams.
  2017  	for i := 1; i <= numStreams; i++ {
  2018  		go func(idx int) {
  2019  			defer wg.Done()
  2020  
  2021  			name := fmt.Sprintf("O-%d", idx)
  2022  			if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil {
  2023  				errCh <- fmt.Errorf("unexpected error: %v", err)
  2024  				return
  2025  			}
  2026  			c.waitOnStreamLeader(globalAccountName, name)
  2027  			// Load them up with a bunch of messages.
  2028  			for n := 0; n < toSend; n++ {
  2029  				m := nats.NewMsg(name)
  2030  				m.Header.Set("stream", name)
  2031  				m.Header.Set("idx", strconv.FormatInt(int64(n+1), 10))
  2032  				if err := nc.PublishMsg(m); err != nil {
  2033  					errCh <- fmt.Errorf("unexpected publish error: %v", err)
  2034  					return
  2035  				}
  2036  			}
  2037  			srcCh <- &nats.StreamSource{Name: name}
  2038  		}(i)
  2039  	}
  2040  	wg.Wait()
  2041  	select {
  2042  	case err := <-errCh:
  2043  		t.Fatal(err)
  2044  	default:
  2045  	}
  2046  	for i := 0; i < numStreams; i++ {
  2047  		sources = append(sources, <-srcCh)
  2048  	}
  2049  
  2050  	for i := 0; i < 3; i++ {
  2051  		// Now create our downstream stream that sources from all of them.
  2052  		if _, err := js.AddStream(&nats.StreamConfig{Name: "S", Replicas: 3, Sources: sources}); err != nil {
  2053  			t.Fatalf("Unexpected error: %v", err)
  2054  		}
  2055  
  2056  		checkFor(t, 15*time.Second, 1000*time.Millisecond, func() error {
  2057  			si, err := js.StreamInfo("S")
  2058  			if err != nil {
  2059  				t.Fatalf("Could not retrieve stream info")
  2060  			}
  2061  			if si.State.Msgs != uint64(numStreams*toSend) {
  2062  				return fmt.Errorf("Expected %d msgs, got state: %+v", numStreams*toSend, si.State)
  2063  			}
  2064  			return nil
  2065  		})
  2066  
  2067  		err := js.DeleteStream("S")
  2068  		require_NoError(t, err)
  2069  	}
  2070  }
  2071  
  2072  func TestNoRaceJetStreamClusterExtendedStreamPurgeStall(t *testing.T) {
  2073  	// Uncomment to run. Needs to be on a big machine. Do not want as part of Travis tests atm.
  2074  	skip(t)
  2075  
  2076  	cerr := func(t *testing.T, err error) {
  2077  		t.Helper()
  2078  		if err != nil {
  2079  			t.Fatalf("unexepected err: %s", err)
  2080  		}
  2081  	}
  2082  
  2083  	s := RunBasicJetStreamServer(t)
  2084  	defer s.Shutdown()
  2085  
  2086  	nc, js := jsClientConnect(t, s)
  2087  	defer nc.Close()
  2088  
  2089  	si, err := js.AddStream(&nats.StreamConfig{
  2090  		Name:     "KV",
  2091  		Subjects: []string{"kv.>"},
  2092  		Storage:  nats.FileStorage,
  2093  	})
  2094  	cerr(t, err)
  2095  
  2096  	// 100kb messages spread over 1000 different subjects
  2097  	body := make([]byte, 100*1024)
  2098  	for i := 0; i < 50000; i++ {
  2099  		if _, err := js.PublishAsync(fmt.Sprintf("kv.%d", i%1000), body); err != nil {
  2100  			cerr(t, err)
  2101  		}
  2102  	}
  2103  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  2104  		if si, err = js.StreamInfo("KV"); err != nil {
  2105  			return err
  2106  		}
  2107  		if si.State.Msgs == 50000 {
  2108  			return nil
  2109  		}
  2110  		return fmt.Errorf("waiting for more")
  2111  	})
  2112  
  2113  	jp, _ := json.Marshal(&JSApiStreamPurgeRequest{Subject: "kv.20"})
  2114  	start := time.Now()
  2115  	res, err := nc.Request(fmt.Sprintf(JSApiStreamPurgeT, "KV"), jp, time.Minute)
  2116  	elapsed := time.Since(start)
  2117  	cerr(t, err)
  2118  	pres := JSApiStreamPurgeResponse{}
  2119  	err = json.Unmarshal(res.Data, &pres)
  2120  	cerr(t, err)
  2121  	if !pres.Success {
  2122  		t.Fatalf("purge failed: %#v", pres)
  2123  	}
  2124  	if elapsed > time.Second {
  2125  		t.Fatalf("Purge took too long %s", elapsed)
  2126  	}
  2127  	v, _ := s.Varz(nil)
  2128  	if v.Mem > 100*1024*1024 { // 100MB limit but in practice < 100MB -> Was ~7GB when failing.
  2129  		t.Fatalf("Used too much memory: %v", friendlyBytes(v.Mem))
  2130  	}
  2131  }
  2132  
  2133  func TestNoRaceJetStreamClusterMirrorExpirationAndMissingSequences(t *testing.T) {
  2134  	c := createJetStreamClusterExplicit(t, "MMS", 9)
  2135  	defer c.shutdown()
  2136  
  2137  	// Client for API requests.
  2138  	nc, js := jsClientConnect(t, c.randomServer())
  2139  	defer nc.Close()
  2140  
  2141  	sendBatch := func(n int) {
  2142  		t.Helper()
  2143  		// Send a batch to a given subject.
  2144  		for i := 0; i < n; i++ {
  2145  			if _, err := js.Publish("TEST", []byte("OK")); err != nil {
  2146  				t.Fatalf("Unexpected publish error: %v", err)
  2147  			}
  2148  		}
  2149  	}
  2150  
  2151  	checkStream := func(stream string, num uint64) {
  2152  		t.Helper()
  2153  		checkFor(t, 20*time.Second, 20*time.Millisecond, func() error {
  2154  			si, err := js.StreamInfo(stream)
  2155  			if err != nil {
  2156  				return err
  2157  			}
  2158  			if si.State.Msgs != num {
  2159  				return fmt.Errorf("Expected %d msgs, got %d", num, si.State.Msgs)
  2160  			}
  2161  			return nil
  2162  		})
  2163  	}
  2164  
  2165  	checkMirror := func(num uint64) { t.Helper(); checkStream("M", num) }
  2166  	checkTest := func(num uint64) { t.Helper(); checkStream("TEST", num) }
  2167  
  2168  	// Origin
  2169  	_, err := js.AddStream(&nats.StreamConfig{
  2170  		Name:   "TEST",
  2171  		MaxAge: 500 * time.Millisecond,
  2172  	})
  2173  	if err != nil {
  2174  		t.Fatalf("Unexpected error: %v", err)
  2175  	}
  2176  
  2177  	ts := c.streamLeader("$G", "TEST")
  2178  	ml := c.leader()
  2179  
  2180  	// Create mirror now.
  2181  	for ms := ts; ms == ts || ms == ml; {
  2182  		_, err = js.AddStream(&nats.StreamConfig{
  2183  			Name:     "M",
  2184  			Mirror:   &nats.StreamSource{Name: "TEST"},
  2185  			Replicas: 2,
  2186  		})
  2187  		if err != nil {
  2188  			t.Fatalf("Unexpected error: %v", err)
  2189  		}
  2190  		ms = c.streamLeader("$G", "M")
  2191  		if ts == ms || ms == ml {
  2192  			// Delete and retry.
  2193  			js.DeleteStream("M")
  2194  		}
  2195  	}
  2196  
  2197  	sendBatch(10)
  2198  	checkMirror(10)
  2199  
  2200  	// Now shutdown the server with the mirror.
  2201  	ms := c.streamLeader("$G", "M")
  2202  	ms.Shutdown()
  2203  	c.waitOnLeader()
  2204  
  2205  	// Send more messages but let them expire.
  2206  	sendBatch(10)
  2207  	checkTest(0)
  2208  
  2209  	c.restartServer(ms)
  2210  	c.checkClusterFormed()
  2211  	c.waitOnStreamLeader("$G", "M")
  2212  
  2213  	sendBatch(10)
  2214  	checkMirror(20)
  2215  }
  2216  
  2217  func TestNoRaceJetStreamClusterLargeActiveOnReplica(t *testing.T) {
  2218  	// Uncomment to run.
  2219  	skip(t)
  2220  
  2221  	c := createJetStreamClusterExplicit(t, "LAG", 3)
  2222  	defer c.shutdown()
  2223  
  2224  	// Client for API requests.
  2225  	nc, js := jsClientConnect(t, c.randomServer())
  2226  	defer nc.Close()
  2227  
  2228  	timeout := time.Now().Add(60 * time.Second)
  2229  	for time.Now().Before(timeout) {
  2230  		si, err := js.AddStream(&nats.StreamConfig{
  2231  			Name:     "TEST",
  2232  			Subjects: []string{"foo", "bar"},
  2233  			Replicas: 3,
  2234  		})
  2235  		if err != nil {
  2236  			t.Fatalf("Unexpected error: %v", err)
  2237  		}
  2238  		for _, r := range si.Cluster.Replicas {
  2239  			if r.Active > 5*time.Second {
  2240  				t.Fatalf("Bad Active value: %+v", r)
  2241  			}
  2242  		}
  2243  		if err := js.DeleteStream("TEST"); err != nil {
  2244  			t.Fatalf("Unexpected delete error: %v", err)
  2245  		}
  2246  	}
  2247  }
  2248  
  2249  func TestNoRaceJetStreamSuperClusterRIPStress(t *testing.T) {
  2250  	// Uncomment to run. Needs to be on a big machine.
  2251  	skip(t)
  2252  
  2253  	sc := createJetStreamSuperCluster(t, 3, 3)
  2254  	defer sc.shutdown()
  2255  
  2256  	// Client based API
  2257  	s := sc.clusterForName("C2").randomServer()
  2258  	nc, js := jsClientConnect(t, s)
  2259  	defer nc.Close()
  2260  
  2261  	scm := make(map[string][]string)
  2262  
  2263  	// Create 50 streams per cluster.
  2264  	for _, cn := range []string{"C1", "C2", "C3"} {
  2265  		var streams []string
  2266  		for i := 0; i < 50; i++ {
  2267  			sn := fmt.Sprintf("%s-S%d", cn, i+1)
  2268  			streams = append(streams, sn)
  2269  			_, err := js.AddStream(&nats.StreamConfig{
  2270  				Name:      sn,
  2271  				Replicas:  3,
  2272  				Placement: &nats.Placement{Cluster: cn},
  2273  				MaxAge:    2 * time.Minute,
  2274  				MaxMsgs:   50_000,
  2275  			})
  2276  			if err != nil {
  2277  				t.Fatalf("Unexpected error: %v", err)
  2278  			}
  2279  		}
  2280  		scm[cn] = streams
  2281  	}
  2282  
  2283  	sourceForCluster := func(cn string) []*nats.StreamSource {
  2284  		var sns []string
  2285  		switch cn {
  2286  		case "C1":
  2287  			sns = scm["C2"]
  2288  		case "C2":
  2289  			sns = scm["C3"]
  2290  		case "C3":
  2291  			sns = scm["C1"]
  2292  		default:
  2293  			t.Fatalf("Unknown cluster %q", cn)
  2294  		}
  2295  		var ss []*nats.StreamSource
  2296  		for _, sn := range sns {
  2297  			ss = append(ss, &nats.StreamSource{Name: sn})
  2298  		}
  2299  		return ss
  2300  	}
  2301  
  2302  	// Mux all 50 streams from one cluster to a single stream across a GW connection to another cluster.
  2303  	_, err := js.AddStream(&nats.StreamConfig{
  2304  		Name:      "C1-S-MUX",
  2305  		Replicas:  2,
  2306  		Placement: &nats.Placement{Cluster: "C1"},
  2307  		Sources:   sourceForCluster("C2"),
  2308  		MaxAge:    time.Minute,
  2309  		MaxMsgs:   20_000,
  2310  	})
  2311  	if err != nil {
  2312  		t.Fatalf("Unexpected error: %v", err)
  2313  	}
  2314  
  2315  	_, err = js.AddStream(&nats.StreamConfig{
  2316  		Name:      "C2-S-MUX",
  2317  		Replicas:  2,
  2318  		Placement: &nats.Placement{Cluster: "C2"},
  2319  		Sources:   sourceForCluster("C3"),
  2320  		MaxAge:    time.Minute,
  2321  		MaxMsgs:   20_000,
  2322  	})
  2323  	if err != nil {
  2324  		t.Fatalf("Unexpected error: %v", err)
  2325  	}
  2326  
  2327  	_, err = js.AddStream(&nats.StreamConfig{
  2328  		Name:      "C3-S-MUX",
  2329  		Replicas:  2,
  2330  		Placement: &nats.Placement{Cluster: "C3"},
  2331  		Sources:   sourceForCluster("C1"),
  2332  		MaxAge:    time.Minute,
  2333  		MaxMsgs:   20_000,
  2334  	})
  2335  	if err != nil {
  2336  		t.Fatalf("Unexpected error: %v", err)
  2337  	}
  2338  
  2339  	// Now create mirrors for our mux'd streams.
  2340  	_, err = js.AddStream(&nats.StreamConfig{
  2341  		Name:      "C1-MIRROR",
  2342  		Replicas:  3,
  2343  		Placement: &nats.Placement{Cluster: "C1"},
  2344  		Mirror:    &nats.StreamSource{Name: "C3-S-MUX"},
  2345  		MaxAge:    5 * time.Minute,
  2346  		MaxMsgs:   10_000,
  2347  	})
  2348  	if err != nil {
  2349  		t.Fatalf("Unexpected error: %v", err)
  2350  	}
  2351  
  2352  	_, err = js.AddStream(&nats.StreamConfig{
  2353  		Name:      "C2-MIRROR",
  2354  		Replicas:  3,
  2355  		Placement: &nats.Placement{Cluster: "C2"},
  2356  		Mirror:    &nats.StreamSource{Name: "C2-S-MUX"},
  2357  		MaxAge:    5 * time.Minute,
  2358  		MaxMsgs:   10_000,
  2359  	})
  2360  	if err != nil {
  2361  		t.Fatalf("Unexpected error: %v", err)
  2362  	}
  2363  
  2364  	_, err = js.AddStream(&nats.StreamConfig{
  2365  		Name:      "C3-MIRROR",
  2366  		Replicas:  3,
  2367  		Placement: &nats.Placement{Cluster: "C3"},
  2368  		Mirror:    &nats.StreamSource{Name: "C1-S-MUX"},
  2369  		MaxAge:    5 * time.Minute,
  2370  		MaxMsgs:   10_000,
  2371  	})
  2372  	if err != nil {
  2373  		t.Fatalf("Unexpected error: %v", err)
  2374  	}
  2375  
  2376  	var jsc []nats.JetStream
  2377  
  2378  	// Create 64 clients.
  2379  	for i := 0; i < 64; i++ {
  2380  		s := sc.randomCluster().randomServer()
  2381  		nc, _ := jsClientConnect(t, s)
  2382  		defer nc.Close()
  2383  		js, err := nc.JetStream(nats.PublishAsyncMaxPending(8 * 1024))
  2384  		if err != nil {
  2385  			t.Fatalf("Unexpected error: %v", err)
  2386  		}
  2387  		jsc = append(jsc, js)
  2388  	}
  2389  
  2390  	msg := make([]byte, 1024)
  2391  	crand.Read(msg)
  2392  
  2393  	// 10 minutes
  2394  	expires := time.Now().Add(480 * time.Second)
  2395  	for time.Now().Before(expires) {
  2396  		for _, sns := range scm {
  2397  			rand.Shuffle(len(sns), func(i, j int) { sns[i], sns[j] = sns[j], sns[i] })
  2398  			for _, sn := range sns {
  2399  				js := jsc[rand.Intn(len(jsc))]
  2400  				if _, err = js.PublishAsync(sn, msg); err != nil {
  2401  					t.Fatalf("Unexpected publish error: %v", err)
  2402  				}
  2403  			}
  2404  		}
  2405  		time.Sleep(10 * time.Millisecond)
  2406  	}
  2407  }
  2408  
  2409  func TestNoRaceJetStreamSlowFilteredInititalPendingAndFirstMsg(t *testing.T) {
  2410  	s := RunBasicJetStreamServer(t)
  2411  	defer s.Shutdown()
  2412  
  2413  	// Create directly here to force multiple blocks, etc.
  2414  	a, err := s.LookupAccount("$G")
  2415  	if err != nil {
  2416  		t.Fatalf("Unexpected error: %v", err)
  2417  	}
  2418  	mset, err := a.addStreamWithStore(
  2419  		&StreamConfig{
  2420  			Name:     "S",
  2421  			Subjects: []string{"foo", "bar", "baz", "foo.bar.baz", "foo.*"},
  2422  		},
  2423  		&FileStoreConfig{
  2424  			BlockSize:  4 * 1024 * 1024,
  2425  			AsyncFlush: true,
  2426  		},
  2427  	)
  2428  	if err != nil {
  2429  		t.Fatalf("Unexpected error: %v", err)
  2430  	}
  2431  
  2432  	nc, js := jsClientConnect(t, s)
  2433  	defer nc.Close()
  2434  
  2435  	toSend := 100_000 // 500k total though.
  2436  
  2437  	// Messages will be 'foo' 'bar' 'baz' repeated 100k times.
  2438  	// Then 'foo.bar.baz' all contigous for 100k.
  2439  	// Then foo.N for 1-100000
  2440  	for i := 0; i < toSend; i++ {
  2441  		js.PublishAsync("foo", []byte("HELLO"))
  2442  		js.PublishAsync("bar", []byte("WORLD"))
  2443  		js.PublishAsync("baz", []byte("AGAIN"))
  2444  	}
  2445  	// Make contiguous block of same subject.
  2446  	for i := 0; i < toSend; i++ {
  2447  		js.PublishAsync("foo.bar.baz", []byte("ALL-TOGETHER"))
  2448  	}
  2449  	// Now add some more at the end.
  2450  	for i := 0; i < toSend; i++ {
  2451  		js.PublishAsync(fmt.Sprintf("foo.%d", i+1), []byte("LATER"))
  2452  	}
  2453  
  2454  	checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
  2455  		si, err := js.StreamInfo("S")
  2456  		if err != nil {
  2457  			return err
  2458  		}
  2459  		if si.State.Msgs != uint64(5*toSend) {
  2460  			return fmt.Errorf("Expected %d msgs, got %d", 5*toSend, si.State.Msgs)
  2461  		}
  2462  		return nil
  2463  	})
  2464  
  2465  	// Threshold for taking too long.
  2466  	const thresh = 150 * time.Millisecond
  2467  
  2468  	var dindex int
  2469  	testConsumerCreate := func(subj string, startSeq, expectedNumPending uint64) {
  2470  		t.Helper()
  2471  		dindex++
  2472  		dname := fmt.Sprintf("dur-%d", dindex)
  2473  		cfg := ConsumerConfig{FilterSubject: subj, Durable: dname, AckPolicy: AckExplicit}
  2474  		if startSeq > 1 {
  2475  			cfg.OptStartSeq, cfg.DeliverPolicy = startSeq, DeliverByStartSequence
  2476  		}
  2477  		start := time.Now()
  2478  		o, err := mset.addConsumer(&cfg)
  2479  		if err != nil {
  2480  			t.Fatalf("Unexpected error: %v", err)
  2481  		}
  2482  		if delta := time.Since(start); delta > thresh {
  2483  			t.Fatalf("Creating consumer for %q and start: %d took too long: %v", subj, startSeq, delta)
  2484  		}
  2485  		if ci := o.info(); ci.NumPending != expectedNumPending {
  2486  			t.Fatalf("Expected NumPending of %d, got %d", expectedNumPending, ci.NumPending)
  2487  		}
  2488  	}
  2489  
  2490  	testConsumerCreate("foo.100000", 1, 1)
  2491  	testConsumerCreate("foo.100000", 222_000, 1)
  2492  	testConsumerCreate("foo", 1, 100_000)
  2493  	testConsumerCreate("foo", 4, 100_000-1)
  2494  	testConsumerCreate("foo.bar.baz", 1, 100_000)
  2495  	testConsumerCreate("foo.bar.baz", 350_001, 50_000)
  2496  	testConsumerCreate("*", 1, 300_000)
  2497  	testConsumerCreate("*", 4, 300_000-3)
  2498  	testConsumerCreate(">", 1, 500_000)
  2499  	testConsumerCreate(">", 50_000, 500_000-50_000+1)
  2500  	testConsumerCreate("foo.10", 1, 1)
  2501  
  2502  	// Also test that we do not take long if the start sequence is later in the stream.
  2503  	sub, err := js.PullSubscribe("foo.100000", "dlc")
  2504  	if err != nil {
  2505  		t.Fatalf("Unexpected error: %v", err)
  2506  	}
  2507  	start := time.Now()
  2508  	fetchMsgs(t, sub, 1, time.Second)
  2509  	if delta := time.Since(start); delta > thresh {
  2510  		t.Fatalf("Took too long for pull subscriber to fetch the message: %v", delta)
  2511  	}
  2512  
  2513  	// Now do some deletes and make sure these are handled correctly.
  2514  	// Delete 3 foo messages.
  2515  	mset.removeMsg(1)
  2516  	mset.removeMsg(4)
  2517  	mset.removeMsg(7)
  2518  	testConsumerCreate("foo", 1, 100_000-3)
  2519  
  2520  	// Make sure wider scoped subjects do the right thing from a pending perspective.
  2521  	o, err := mset.addConsumer(&ConsumerConfig{FilterSubject: ">", Durable: "cat", AckPolicy: AckExplicit})
  2522  	if err != nil {
  2523  		t.Fatalf("Unexpected error: %v", err)
  2524  	}
  2525  	ci, expected := o.info(), uint64(500_000-3)
  2526  	if ci.NumPending != expected {
  2527  		t.Fatalf("Expected NumPending of %d, got %d", expected, ci.NumPending)
  2528  	}
  2529  	// Send another and make sure its captured by our wide scope consumer.
  2530  	js.Publish("foo", []byte("HELLO AGAIN"))
  2531  	if ci = o.info(); ci.NumPending != expected+1 {
  2532  		t.Fatalf("Expected the consumer to recognize the wide scoped consumer, wanted pending of %d, got %d", expected+1, ci.NumPending)
  2533  	}
  2534  
  2535  	// Stop current server and test restart..
  2536  	sd := s.JetStreamConfig().StoreDir
  2537  	s.Shutdown()
  2538  	// Restart.
  2539  	s = RunJetStreamServerOnPort(-1, sd)
  2540  	defer s.Shutdown()
  2541  
  2542  	a, err = s.LookupAccount("$G")
  2543  	if err != nil {
  2544  		t.Fatalf("Unexpected error: %v", err)
  2545  	}
  2546  	mset, err = a.lookupStream("S")
  2547  	if err != nil {
  2548  		t.Fatalf("Unexpected error: %v", err)
  2549  	}
  2550  
  2551  	// Make sure we recovered our per subject state on restart.
  2552  	testConsumerCreate("foo.100000", 1, 1)
  2553  	testConsumerCreate("foo", 1, 100_000-2)
  2554  }
  2555  
  2556  func TestNoRaceJetStreamFileStoreBufferReuse(t *testing.T) {
  2557  	// Uncomment to run. Needs to be on a big machine.
  2558  	skip(t)
  2559  
  2560  	s := RunBasicJetStreamServer(t)
  2561  	defer s.Shutdown()
  2562  
  2563  	cfg := &StreamConfig{Name: "TEST", Subjects: []string{"foo", "bar", "baz"}, Storage: FileStorage}
  2564  	if _, err := s.GlobalAccount().addStreamWithStore(cfg, nil); err != nil {
  2565  		t.Fatalf("Unexpected error adding stream: %v", err)
  2566  	}
  2567  
  2568  	// Client for API requests.
  2569  	nc, js := jsClientConnect(t, s)
  2570  	defer nc.Close()
  2571  
  2572  	toSend := 200_000
  2573  
  2574  	m := nats.NewMsg("foo")
  2575  	m.Data = make([]byte, 8*1024)
  2576  	crand.Read(m.Data)
  2577  
  2578  	start := time.Now()
  2579  	for i := 0; i < toSend; i++ {
  2580  		m.Reply = _EMPTY_
  2581  		switch i % 3 {
  2582  		case 0:
  2583  			m.Subject = "foo"
  2584  		case 1:
  2585  			m.Subject = "bar"
  2586  		case 2:
  2587  			m.Subject = "baz"
  2588  		}
  2589  		m.Header.Set("X-ID2", fmt.Sprintf("XXXXX-%d", i))
  2590  		if _, err := js.PublishMsgAsync(m); err != nil {
  2591  			t.Fatalf("Err on publish: %v", err)
  2592  		}
  2593  	}
  2594  	<-js.PublishAsyncComplete()
  2595  	fmt.Printf("TOOK %v to publish\n", time.Since(start))
  2596  
  2597  	v, err := s.Varz(nil)
  2598  	if err != nil {
  2599  		t.Fatalf("Unexpected error: %v", err)
  2600  	}
  2601  	fmt.Printf("MEM AFTER PUBLISH is %v\n", friendlyBytes(v.Mem))
  2602  
  2603  	si, _ := js.StreamInfo("TEST")
  2604  	fmt.Printf("si is %+v\n", si.State)
  2605  
  2606  	received := 0
  2607  	done := make(chan bool)
  2608  
  2609  	cb := func(m *nats.Msg) {
  2610  		received++
  2611  		if received >= toSend {
  2612  			done <- true
  2613  		}
  2614  	}
  2615  
  2616  	start = time.Now()
  2617  	sub, err := js.Subscribe("*", cb, nats.EnableFlowControl(), nats.IdleHeartbeat(time.Second), nats.AckNone())
  2618  	if err != nil {
  2619  		t.Fatalf("Unexpected error: %v", err)
  2620  	}
  2621  	defer sub.Unsubscribe()
  2622  	<-done
  2623  	fmt.Printf("TOOK %v to consume\n", time.Since(start))
  2624  
  2625  	v, err = s.Varz(nil)
  2626  	if err != nil {
  2627  		t.Fatalf("Unexpected error: %v", err)
  2628  	}
  2629  	fmt.Printf("MEM AFTER SUBSCRIBE is %v\n", friendlyBytes(v.Mem))
  2630  }
  2631  
  2632  // Report of slow restart for a server that has many messages that have expired while it was not running.
  2633  func TestNoRaceJetStreamSlowRestartWithManyExpiredMsgs(t *testing.T) {
  2634  	opts := DefaultTestOptions
  2635  	opts.Port = -1
  2636  	opts.JetStream = true
  2637  	s := RunServer(&opts)
  2638  	if config := s.JetStreamConfig(); config != nil {
  2639  		defer removeDir(t, config.StoreDir)
  2640  	}
  2641  	defer s.Shutdown()
  2642  
  2643  	// Client for API requests.
  2644  	nc, js := jsClientConnect(t, s)
  2645  	defer nc.Close()
  2646  
  2647  	ttl := 2 * time.Second
  2648  	_, err := js.AddStream(&nats.StreamConfig{
  2649  		Name:     "ORDERS",
  2650  		Subjects: []string{"orders.*"},
  2651  		MaxAge:   ttl,
  2652  	})
  2653  	if err != nil {
  2654  		t.Fatalf("Unexpected error: %v", err)
  2655  	}
  2656  
  2657  	// Attach a consumer who is filtering on a wildcard subject as well.
  2658  	// This does not affect it like I thought originally but will keep it here.
  2659  	_, err = js.AddConsumer("ORDERS", &nats.ConsumerConfig{
  2660  		Durable:       "c22",
  2661  		FilterSubject: "orders.*",
  2662  		AckPolicy:     nats.AckExplicitPolicy,
  2663  	})
  2664  	if err != nil {
  2665  		t.Fatalf("Unexpected error: %v", err)
  2666  	}
  2667  
  2668  	// Now fill up with messages.
  2669  	toSend := 100_000
  2670  	for i := 1; i <= toSend; i++ {
  2671  		js.PublishAsync(fmt.Sprintf("orders.%d", i), []byte("OK"))
  2672  	}
  2673  	<-js.PublishAsyncComplete()
  2674  
  2675  	sdir := strings.TrimSuffix(s.JetStreamConfig().StoreDir, JetStreamStoreDir)
  2676  	s.Shutdown()
  2677  
  2678  	// Let them expire while not running.
  2679  	time.Sleep(ttl + 500*time.Millisecond)
  2680  
  2681  	start := time.Now()
  2682  	opts.Port = -1
  2683  	opts.StoreDir = sdir
  2684  	s = RunServer(&opts)
  2685  	elapsed := time.Since(start)
  2686  	defer s.Shutdown()
  2687  
  2688  	if elapsed > 2*time.Second {
  2689  		t.Fatalf("Took %v for restart which is too long", elapsed)
  2690  	}
  2691  
  2692  	// Check everything is correct.
  2693  	nc, js = jsClientConnect(t, s)
  2694  	defer nc.Close()
  2695  
  2696  	si, err := js.StreamInfo("ORDERS")
  2697  	if err != nil {
  2698  		t.Fatalf("Unexpected error: %v", err)
  2699  	}
  2700  	if si.State.Msgs != 0 {
  2701  		t.Fatalf("Expected no msgs after restart, got %d", si.State.Msgs)
  2702  	}
  2703  }
  2704  
  2705  func TestNoRaceJetStreamStalledMirrorsAfterExpire(t *testing.T) {
  2706  	c := createJetStreamClusterExplicit(t, "JSC", 3)
  2707  	defer c.shutdown()
  2708  
  2709  	nc, js := jsClientConnect(t, c.randomServer())
  2710  	defer nc.Close()
  2711  
  2712  	cfg := &nats.StreamConfig{
  2713  		Name:     "TEST",
  2714  		Subjects: []string{"foo.*"},
  2715  		Replicas: 1,
  2716  		MaxAge:   100 * time.Millisecond,
  2717  	}
  2718  
  2719  	if _, err := js.AddStream(cfg); err != nil {
  2720  		t.Fatalf("Error creating stream: %v", err)
  2721  	}
  2722  
  2723  	if _, err := js.AddStream(&nats.StreamConfig{
  2724  		Name:     "M",
  2725  		Replicas: 2,
  2726  		Mirror:   &nats.StreamSource{Name: "TEST"},
  2727  	}); err != nil {
  2728  		t.Fatalf("Unexpected error: %v", err)
  2729  	}
  2730  
  2731  	sendBatch := func(batch int) {
  2732  		t.Helper()
  2733  		for i := 0; i < batch; i++ {
  2734  			js.PublishAsync("foo.bar", []byte("Hello"))
  2735  		}
  2736  		select {
  2737  		case <-js.PublishAsyncComplete():
  2738  		case <-time.After(5 * time.Second):
  2739  			t.Fatalf("Did not receive completion signal")
  2740  		}
  2741  	}
  2742  
  2743  	numMsgs := 10_000
  2744  	sendBatch(numMsgs)
  2745  
  2746  	// Turn off expiration so we can test we did not stall.
  2747  	cfg.MaxAge = 0
  2748  	if _, err := js.UpdateStream(cfg); err != nil {
  2749  		t.Fatalf("Unexpected error: %v", err)
  2750  	}
  2751  
  2752  	sendBatch(numMsgs)
  2753  
  2754  	// Wait for mirror to be caught up.
  2755  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  2756  		si, err := js.StreamInfo("M")
  2757  		if err != nil {
  2758  			t.Fatalf("Unexpected error: %v", err)
  2759  		}
  2760  		if si.State.LastSeq != uint64(2*numMsgs) {
  2761  			return fmt.Errorf("Expected %d as last sequence, got state: %+v", 2*numMsgs, si.State)
  2762  		}
  2763  		return nil
  2764  	})
  2765  }
  2766  
  2767  // We will use JetStream helpers to create supercluster but this test is about exposing the ability to access
  2768  // account scoped connz with subject interest filtering.
  2769  func TestNoRaceJetStreamSuperClusterAccountConnz(t *testing.T) {
  2770  	// This has 4 different account, 3 general and system.
  2771  	sc := createJetStreamSuperClusterWithTemplate(t, jsClusterAccountsTempl, 3, 3)
  2772  	defer sc.shutdown()
  2773  
  2774  	// Create 20 connections on account one and two
  2775  	// Create JetStream assets for each as well to make sure by default we do not report them.
  2776  	num := 20
  2777  	for i := 0; i < num; i++ {
  2778  		nc, _ := jsClientConnect(t, sc.randomServer(), nats.UserInfo("one", "p"), nats.Name("one"))
  2779  		defer nc.Close()
  2780  
  2781  		if i%2 == 0 {
  2782  			nc.SubscribeSync("foo")
  2783  		} else {
  2784  			nc.SubscribeSync("bar")
  2785  		}
  2786  
  2787  		nc, js := jsClientConnect(t, sc.randomServer(), nats.UserInfo("two", "p"), nats.Name("two"))
  2788  		defer nc.Close()
  2789  		nc.SubscribeSync("baz")
  2790  		nc.SubscribeSync("foo.bar.*")
  2791  		nc.SubscribeSync(fmt.Sprintf("id.%d", i+1))
  2792  
  2793  		js.AddStream(&nats.StreamConfig{Name: fmt.Sprintf("TEST:%d", i+1)})
  2794  	}
  2795  
  2796  	type czapi struct {
  2797  		Server *ServerInfo
  2798  		Data   *Connz
  2799  		Error  *ApiError
  2800  	}
  2801  
  2802  	parseConnz := func(buf []byte) *Connz {
  2803  		t.Helper()
  2804  		var cz czapi
  2805  		if err := json.Unmarshal(buf, &cz); err != nil {
  2806  			t.Fatalf("Unexpected error: %v", err)
  2807  		}
  2808  		if cz.Error != nil {
  2809  			t.Fatalf("Unexpected error: %+v", cz.Error)
  2810  		}
  2811  		return cz.Data
  2812  	}
  2813  
  2814  	doRequest := func(reqSubj, acc, filter string, expected int) {
  2815  		t.Helper()
  2816  		nc, _ := jsClientConnect(t, sc.randomServer(), nats.UserInfo(acc, "p"), nats.Name(acc))
  2817  		defer nc.Close()
  2818  
  2819  		mch := make(chan *nats.Msg, 9)
  2820  		sub, _ := nc.ChanSubscribe(nats.NewInbox(), mch)
  2821  
  2822  		var req []byte
  2823  		if filter != _EMPTY_ {
  2824  			req, _ = json.Marshal(&ConnzOptions{FilterSubject: filter})
  2825  		}
  2826  
  2827  		if err := nc.PublishRequest(reqSubj, sub.Subject, req); err != nil {
  2828  			t.Fatalf("Unexpected error: %v", err)
  2829  		}
  2830  
  2831  		// So we can igniore ourtselves.
  2832  		cid, _ := nc.GetClientID()
  2833  		sid := nc.ConnectedServerId()
  2834  
  2835  		wt := time.NewTimer(200 * time.Millisecond)
  2836  		var conns []*ConnInfo
  2837  	LOOP:
  2838  		for {
  2839  			select {
  2840  			case m := <-mch:
  2841  				if len(m.Data) == 0 {
  2842  					t.Fatalf("No responders")
  2843  				}
  2844  				cr := parseConnz(m.Data)
  2845  				// For account scoped, NumConns and Total should be the same (sans limits and offsets).
  2846  				// It Total should not include other accounts since that would leak information about the system.
  2847  				if filter == _EMPTY_ && cr.NumConns != cr.Total {
  2848  					t.Fatalf("NumConns and Total should be same with account scoped connz, got %+v", cr)
  2849  				}
  2850  				for _, c := range cr.Conns {
  2851  					if c.Name != acc {
  2852  						t.Fatalf("Got wrong account: %q vs %q for %+v", acc, c.Account, c)
  2853  					}
  2854  					if !(c.Cid == cid && cr.ID == sid) {
  2855  						conns = append(conns, c)
  2856  					}
  2857  				}
  2858  				wt.Reset(200 * time.Millisecond)
  2859  			case <-wt.C:
  2860  				break LOOP
  2861  			}
  2862  		}
  2863  		if len(conns) != expected {
  2864  			t.Fatalf("Expected to see %d conns but got %d", expected, len(conns))
  2865  		}
  2866  	}
  2867  
  2868  	doSysRequest := func(acc string, expected int) {
  2869  		t.Helper()
  2870  		doRequest("$SYS.REQ.SERVER.PING.CONNZ", acc, _EMPTY_, expected)
  2871  	}
  2872  	doAccRequest := func(acc string, expected int) {
  2873  		t.Helper()
  2874  		doRequest("$SYS.REQ.ACCOUNT.PING.CONNZ", acc, _EMPTY_, expected)
  2875  	}
  2876  	doFiltered := func(acc, filter string, expected int) {
  2877  		t.Helper()
  2878  		doRequest("$SYS.REQ.SERVER.PING.CONNZ", acc, filter, expected)
  2879  	}
  2880  
  2881  	doSysRequest("one", 20)
  2882  	doAccRequest("one", 20)
  2883  
  2884  	doSysRequest("two", 20)
  2885  	doAccRequest("two", 20)
  2886  
  2887  	// Now check filtering.
  2888  	doFiltered("one", _EMPTY_, 20)
  2889  	doFiltered("one", ">", 20)
  2890  	doFiltered("one", "bar", 10)
  2891  	doFiltered("two", "bar", 0)
  2892  	doFiltered("two", "id.1", 1)
  2893  	doFiltered("two", "id.*", 20)
  2894  	doFiltered("two", "foo.bar.*", 20)
  2895  	doFiltered("two", "foo.>", 20)
  2896  }
  2897  
  2898  func TestNoRaceCompressedConnz(t *testing.T) {
  2899  	s := RunBasicJetStreamServer(t)
  2900  	defer s.Shutdown()
  2901  
  2902  	nc, _ := jsClientConnect(t, s)
  2903  	defer nc.Close()
  2904  
  2905  	doRequest := func(compress string) {
  2906  		t.Helper()
  2907  		m := nats.NewMsg("$SYS.REQ.ACCOUNT.PING.CONNZ")
  2908  		m.Header.Add("Accept-Encoding", compress)
  2909  		resp, err := nc.RequestMsg(m, time.Second)
  2910  		if err != nil {
  2911  			t.Fatalf("Unexpected error: %v", err)
  2912  		}
  2913  		buf := resp.Data
  2914  
  2915  		// Make sure we have an encoding header.
  2916  		ce := resp.Header.Get("Content-Encoding")
  2917  		switch strings.ToLower(ce) {
  2918  		case "gzip":
  2919  			zr, err := gzip.NewReader(bytes.NewReader(buf))
  2920  			if err != nil {
  2921  				t.Fatalf("Unexpected error: %v", err)
  2922  			}
  2923  			defer zr.Close()
  2924  			buf, err = io.ReadAll(zr)
  2925  			if err != nil && err != io.ErrUnexpectedEOF {
  2926  				t.Fatalf("Unexpected error: %v", err)
  2927  			}
  2928  		case "snappy", "s2":
  2929  			sr := s2.NewReader(bytes.NewReader(buf))
  2930  			buf, err = io.ReadAll(sr)
  2931  			if err != nil && err != io.ErrUnexpectedEOF {
  2932  				t.Fatalf("Unexpected error: %v", err)
  2933  			}
  2934  		default:
  2935  			t.Fatalf("Unknown content-encoding of %q", ce)
  2936  		}
  2937  
  2938  		var cz ServerAPIConnzResponse
  2939  		if err := json.Unmarshal(buf, &cz); err != nil {
  2940  			t.Fatalf("Unexpected error: %v", err)
  2941  		}
  2942  		if cz.Error != nil {
  2943  			t.Fatalf("Unexpected error: %+v", cz.Error)
  2944  		}
  2945  	}
  2946  
  2947  	doRequest("gzip")
  2948  	doRequest("snappy")
  2949  	doRequest("s2")
  2950  }
  2951  
  2952  func TestNoRaceJetStreamClusterExtendedStreamPurge(t *testing.T) {
  2953  	for _, st := range []StorageType{FileStorage, MemoryStorage} {
  2954  		t.Run(st.String(), func(t *testing.T) {
  2955  			c := createJetStreamClusterExplicit(t, "JSC", 3)
  2956  			defer c.shutdown()
  2957  
  2958  			nc, js := jsClientConnect(t, c.randomServer())
  2959  			defer nc.Close()
  2960  
  2961  			cfg := StreamConfig{
  2962  				Name:       "KV",
  2963  				Subjects:   []string{"kv.>"},
  2964  				Storage:    st,
  2965  				Replicas:   2,
  2966  				MaxMsgsPer: 100,
  2967  			}
  2968  			req, err := json.Marshal(cfg)
  2969  			if err != nil {
  2970  				t.Fatalf("Unexpected error: %v", err)
  2971  			}
  2972  			// Do manually for now.
  2973  			nc.Request(fmt.Sprintf(JSApiStreamCreateT, cfg.Name), req, time.Second)
  2974  			c.waitOnStreamLeader("$G", "KV")
  2975  
  2976  			si, err := js.StreamInfo("KV")
  2977  			if err != nil {
  2978  				t.Fatalf("Unexpected error: %v", err)
  2979  			}
  2980  			if si == nil || si.Config.Name != "KV" {
  2981  				t.Fatalf("StreamInfo is not correct %+v", si)
  2982  			}
  2983  
  2984  			for i := 0; i < 1000; i++ {
  2985  				js.PublishAsync("kv.foo", []byte("OK")) // 1 * i
  2986  				js.PublishAsync("kv.bar", []byte("OK")) // 2 * i
  2987  				js.PublishAsync("kv.baz", []byte("OK")) // 3 * i
  2988  			}
  2989  			// First is 2700, last is 3000
  2990  			for i := 0; i < 700; i++ {
  2991  				js.PublishAsync(fmt.Sprintf("kv.%d", i+1), []byte("OK"))
  2992  			}
  2993  			// Now first is 2700, last is 3700
  2994  			select {
  2995  			case <-js.PublishAsyncComplete():
  2996  			case <-time.After(10 * time.Second):
  2997  				t.Fatalf("Did not receive completion signal")
  2998  			}
  2999  
  3000  			si, err = js.StreamInfo("KV")
  3001  			if err != nil {
  3002  				t.Fatalf("Unexpected error: %v", err)
  3003  			}
  3004  			if si.State.Msgs != 1000 {
  3005  				t.Fatalf("Expected %d msgs, got %d", 1000, si.State.Msgs)
  3006  			}
  3007  
  3008  			shouldFail := func(preq *JSApiStreamPurgeRequest) {
  3009  				req, _ := json.Marshal(preq)
  3010  				resp, err := nc.Request(fmt.Sprintf(JSApiStreamPurgeT, "KV"), req, time.Second)
  3011  				if err != nil {
  3012  					t.Fatalf("Unexpected error: %v", err)
  3013  				}
  3014  				var pResp JSApiStreamPurgeResponse
  3015  				if err = json.Unmarshal(resp.Data, &pResp); err != nil {
  3016  					t.Fatalf("Unexpected error: %v", err)
  3017  				}
  3018  				if pResp.Success || pResp.Error == nil {
  3019  					t.Fatalf("Expected an error response but got none")
  3020  				}
  3021  			}
  3022  
  3023  			// Sequence and Keep should be mutually exclusive.
  3024  			shouldFail(&JSApiStreamPurgeRequest{Sequence: 10, Keep: 10})
  3025  
  3026  			purge := func(preq *JSApiStreamPurgeRequest, newTotal uint64) {
  3027  				t.Helper()
  3028  				req, _ := json.Marshal(preq)
  3029  				resp, err := nc.Request(fmt.Sprintf(JSApiStreamPurgeT, "KV"), req, time.Second)
  3030  				if err != nil {
  3031  					t.Fatalf("Unexpected error: %v", err)
  3032  				}
  3033  				var pResp JSApiStreamPurgeResponse
  3034  				if err = json.Unmarshal(resp.Data, &pResp); err != nil {
  3035  					t.Fatalf("Unexpected error: %v", err)
  3036  				}
  3037  				if !pResp.Success || pResp.Error != nil {
  3038  					t.Fatalf("Got a bad response %+v", pResp)
  3039  				}
  3040  				si, err = js.StreamInfo("KV")
  3041  				if err != nil {
  3042  					t.Fatalf("Unexpected error: %v", err)
  3043  				}
  3044  				if si.State.Msgs != newTotal {
  3045  					t.Fatalf("Expected total after purge to be %d but got %d", newTotal, si.State.Msgs)
  3046  				}
  3047  			}
  3048  			expectLeft := func(subject string, expected uint64) {
  3049  				t.Helper()
  3050  				ci, err := js.AddConsumer("KV", &nats.ConsumerConfig{Durable: "dlc", FilterSubject: subject, AckPolicy: nats.AckExplicitPolicy})
  3051  				if err != nil {
  3052  					t.Fatalf("Unexpected error: %v", err)
  3053  				}
  3054  				defer js.DeleteConsumer("KV", "dlc")
  3055  				if ci.NumPending != expected {
  3056  					t.Fatalf("Expected %d remaining but got %d", expected, ci.NumPending)
  3057  				}
  3058  			}
  3059  
  3060  			purge(&JSApiStreamPurgeRequest{Subject: "kv.foo"}, 900)
  3061  			expectLeft("kv.foo", 0)
  3062  
  3063  			purge(&JSApiStreamPurgeRequest{Subject: "kv.bar", Keep: 1}, 801)
  3064  			expectLeft("kv.bar", 1)
  3065  
  3066  			purge(&JSApiStreamPurgeRequest{Subject: "kv.baz", Sequence: 2851}, 751)
  3067  			expectLeft("kv.baz", 50)
  3068  
  3069  			purge(&JSApiStreamPurgeRequest{Subject: "kv.*"}, 0)
  3070  
  3071  			// RESET
  3072  			js.DeleteStream("KV")
  3073  			// Do manually for now.
  3074  			nc.Request(fmt.Sprintf(JSApiStreamCreateT, cfg.Name), req, time.Second)
  3075  			c.waitOnStreamLeader("$G", "KV")
  3076  
  3077  			if _, err := js.StreamInfo("KV"); err != nil {
  3078  				t.Fatalf("Unexpected error: %v", err)
  3079  			}
  3080  			// Put in 100.
  3081  			for i := 0; i < 100; i++ {
  3082  				js.PublishAsync("kv.foo", []byte("OK"))
  3083  			}
  3084  			select {
  3085  			case <-js.PublishAsyncComplete():
  3086  			case <-time.After(time.Second):
  3087  				t.Fatalf("Did not receive completion signal")
  3088  			}
  3089  			purge(&JSApiStreamPurgeRequest{Subject: "kv.foo", Keep: 10}, 10)
  3090  			purge(&JSApiStreamPurgeRequest{Subject: "kv.foo", Keep: 10}, 10)
  3091  			expectLeft("kv.foo", 10)
  3092  
  3093  			// RESET AGAIN
  3094  			js.DeleteStream("KV")
  3095  			// Do manually for now.
  3096  			nc.Request(fmt.Sprintf(JSApiStreamCreateT, cfg.Name), req, time.Second)
  3097  			c.waitOnStreamLeader("$G", "KV")
  3098  
  3099  			if _, err := js.StreamInfo("KV"); err != nil {
  3100  				t.Fatalf("Unexpected error: %v", err)
  3101  			}
  3102  			// Put in 100.
  3103  			for i := 0; i < 100; i++ {
  3104  				js.Publish("kv.foo", []byte("OK"))
  3105  			}
  3106  			purge(&JSApiStreamPurgeRequest{Keep: 10}, 10)
  3107  			expectLeft(">", 10)
  3108  
  3109  			// RESET AGAIN
  3110  			js.DeleteStream("KV")
  3111  			// Do manually for now.
  3112  			nc.Request(fmt.Sprintf(JSApiStreamCreateT, cfg.Name), req, time.Second)
  3113  			if _, err := js.StreamInfo("KV"); err != nil {
  3114  				t.Fatalf("Unexpected error: %v", err)
  3115  			}
  3116  			// Put in 100.
  3117  			for i := 0; i < 100; i++ {
  3118  				js.Publish("kv.foo", []byte("OK"))
  3119  			}
  3120  			purge(&JSApiStreamPurgeRequest{Sequence: 90}, 11) // Up to 90 so we keep that, hence the 11.
  3121  			expectLeft(">", 11)
  3122  		})
  3123  	}
  3124  }
  3125  
  3126  func TestNoRaceJetStreamFileStoreCompaction(t *testing.T) {
  3127  	s := RunBasicJetStreamServer(t)
  3128  	defer s.Shutdown()
  3129  
  3130  	nc, js := jsClientConnect(t, s)
  3131  	defer nc.Close()
  3132  
  3133  	cfg := &nats.StreamConfig{
  3134  		Name:              "KV",
  3135  		Subjects:          []string{"KV.>"},
  3136  		MaxMsgsPerSubject: 1,
  3137  	}
  3138  	if _, err := js.AddStream(cfg); err != nil {
  3139  		t.Fatalf("Unexpected error: %v", err)
  3140  	}
  3141  
  3142  	toSend := 10_000
  3143  	data := make([]byte, 4*1024)
  3144  	crand.Read(data)
  3145  
  3146  	// First one.
  3147  	js.PublishAsync("KV.FM", data)
  3148  
  3149  	for i := 0; i < toSend; i++ {
  3150  		js.PublishAsync(fmt.Sprintf("KV.%d", i+1), data)
  3151  	}
  3152  	// Do again and overwrite the previous batch.
  3153  	for i := 0; i < toSend; i++ {
  3154  		js.PublishAsync(fmt.Sprintf("KV.%d", i+1), data)
  3155  	}
  3156  	select {
  3157  	case <-js.PublishAsyncComplete():
  3158  	case <-time.After(10 * time.Second):
  3159  		t.Fatalf("Did not receive completion signal")
  3160  	}
  3161  
  3162  	// Now check by hand the utilization level.
  3163  	mset, err := s.GlobalAccount().lookupStream("KV")
  3164  	if err != nil {
  3165  		t.Fatalf("Unexpected error: %v", err)
  3166  	}
  3167  	total, used, _ := mset.Store().Utilization()
  3168  	if pu := 100.0 * float32(used) / float32(total); pu < 80.0 {
  3169  		t.Fatalf("Utilization is less than 80%%, got %.2f", pu)
  3170  	}
  3171  }
  3172  
  3173  func TestNoRaceJetStreamEncryptionEnabledOnRestartWithExpire(t *testing.T) {
  3174  	conf := createConfFile(t, []byte(fmt.Sprintf(`
  3175  		listen: 127.0.0.1:-1
  3176  		jetstream {
  3177  			store_dir = %q
  3178  		}
  3179  	`, t.TempDir())))
  3180  
  3181  	s, _ := RunServerWithConfig(conf)
  3182  	defer s.Shutdown()
  3183  
  3184  	config := s.JetStreamConfig()
  3185  	if config == nil {
  3186  		t.Fatalf("Expected config but got none")
  3187  	}
  3188  	defer removeDir(t, config.StoreDir)
  3189  
  3190  	nc, js := jsClientConnect(t, s)
  3191  	defer nc.Close()
  3192  
  3193  	toSend := 10_000
  3194  
  3195  	cfg := &nats.StreamConfig{
  3196  		Name:     "TEST",
  3197  		Subjects: []string{"foo", "bar"},
  3198  		MaxMsgs:  int64(toSend),
  3199  	}
  3200  	if _, err := js.AddStream(cfg); err != nil {
  3201  		t.Fatalf("Unexpected error: %v", err)
  3202  	}
  3203  
  3204  	data := make([]byte, 4*1024) // 4K payload
  3205  	crand.Read(data)
  3206  
  3207  	for i := 0; i < toSend; i++ {
  3208  		js.PublishAsync("foo", data)
  3209  		js.PublishAsync("bar", data)
  3210  	}
  3211  	select {
  3212  	case <-js.PublishAsyncComplete():
  3213  	case <-time.After(5 * time.Second):
  3214  		t.Fatalf("Did not receive completion signal")
  3215  	}
  3216  
  3217  	_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "dlc", AckPolicy: nats.AckExplicitPolicy})
  3218  	if err != nil {
  3219  		t.Fatalf("Unexpected error: %v", err)
  3220  	}
  3221  
  3222  	// Restart
  3223  	nc.Close()
  3224  	s.Shutdown()
  3225  
  3226  	ncs := fmt.Sprintf("\nlisten: 127.0.0.1:-1\njetstream: {key: %q, store_dir: %q}\n", "s3cr3t!", config.StoreDir)
  3227  	conf = createConfFile(t, []byte(ncs))
  3228  
  3229  	// Try to drain entropy to see if effects startup time.
  3230  	drain := make([]byte, 32*1024*1024) // Pull 32Mb of crypto rand.
  3231  	crand.Read(drain)
  3232  
  3233  	start := time.Now()
  3234  	s, _ = RunServerWithConfig(conf)
  3235  	defer s.Shutdown()
  3236  	dd := time.Since(start)
  3237  	if dd > 5*time.Second {
  3238  		t.Fatalf("Restart took longer than expected: %v", dd)
  3239  	}
  3240  }
  3241  
  3242  // This test was from Ivan K. and showed a bug in the filestore implementation.
  3243  // This is skipped by default since it takes >40s to run.
  3244  func TestNoRaceJetStreamOrderedConsumerMissingMsg(t *testing.T) {
  3245  	// Uncomment to run. Needs to be on a big machine. Do not want as part of Travis tests atm.
  3246  	skip(t)
  3247  
  3248  	s := RunBasicJetStreamServer(t)
  3249  	defer s.Shutdown()
  3250  
  3251  	nc, js := jsClientConnect(t, s)
  3252  	defer nc.Close()
  3253  
  3254  	if _, err := js.AddStream(&nats.StreamConfig{
  3255  		Name:     "benchstream",
  3256  		Subjects: []string{"testsubject"},
  3257  		Replicas: 1,
  3258  	}); err != nil {
  3259  		t.Fatalf("add stream failed: %s", err)
  3260  	}
  3261  
  3262  	total := 1_000_000
  3263  
  3264  	numSubs := 10
  3265  	ch := make(chan struct{}, numSubs)
  3266  	wg := sync.WaitGroup{}
  3267  	wg.Add(numSubs)
  3268  	errCh := make(chan error, 1)
  3269  	for i := 0; i < numSubs; i++ {
  3270  		nc, js := jsClientConnect(t, s)
  3271  		defer nc.Close()
  3272  		go func(nc *nats.Conn, js nats.JetStreamContext) {
  3273  			defer wg.Done()
  3274  			received := 0
  3275  			_, err := js.Subscribe("testsubject", func(m *nats.Msg) {
  3276  				meta, _ := m.Metadata()
  3277  				if meta.Sequence.Consumer != meta.Sequence.Stream {
  3278  					nc.Close()
  3279  					errCh <- fmt.Errorf("Bad meta: %+v", meta)
  3280  				}
  3281  				received++
  3282  				if received == total {
  3283  					ch <- struct{}{}
  3284  				}
  3285  			}, nats.OrderedConsumer())
  3286  			if err != nil {
  3287  				select {
  3288  				case errCh <- fmt.Errorf("Error creating sub: %v", err):
  3289  				default:
  3290  				}
  3291  
  3292  			}
  3293  		}(nc, js)
  3294  	}
  3295  	wg.Wait()
  3296  	select {
  3297  	case e := <-errCh:
  3298  		t.Fatal(e)
  3299  	default:
  3300  	}
  3301  
  3302  	payload := make([]byte, 500)
  3303  	for i := 1; i <= total; i++ {
  3304  		js.PublishAsync("testsubject", payload)
  3305  	}
  3306  	select {
  3307  	case <-js.PublishAsyncComplete():
  3308  	case <-time.After(10 * time.Second):
  3309  		t.Fatalf("Did not send all messages")
  3310  	}
  3311  
  3312  	// Now wait for consumers to be done:
  3313  	for i := 0; i < numSubs; i++ {
  3314  		select {
  3315  		case <-ch:
  3316  		case <-time.After(10 * time.Second):
  3317  			t.Fatal("Did not receive all messages for all consumers in time")
  3318  		}
  3319  	}
  3320  
  3321  }
  3322  
  3323  // Issue #2488 - Bad accounting, can not reproduce the stalled consumers after last several PRs.
  3324  // Issue did show bug in ack logic for no-ack and interest based retention.
  3325  func TestNoRaceJetStreamClusterInterestPolicyAckNone(t *testing.T) {
  3326  	for _, test := range []struct {
  3327  		name    string
  3328  		durable string
  3329  	}{
  3330  		{"durable", "dlc"},
  3331  		{"ephemeral", _EMPTY_},
  3332  	} {
  3333  		t.Run(test.name, func(t *testing.T) {
  3334  			c := createJetStreamClusterExplicit(t, "R3S", 3)
  3335  			defer c.shutdown()
  3336  
  3337  			// Client based API
  3338  			nc, js := jsClientConnect(t, c.randomServer())
  3339  			defer nc.Close()
  3340  
  3341  			_, err := js.AddStream(&nats.StreamConfig{
  3342  				Name:      "cluster",
  3343  				Subjects:  []string{"cluster.*"},
  3344  				Retention: nats.InterestPolicy,
  3345  				Discard:   nats.DiscardOld,
  3346  				Replicas:  3,
  3347  			})
  3348  			if err != nil {
  3349  				t.Fatalf("Unexpected error: %v", err)
  3350  			}
  3351  
  3352  			var received uint32
  3353  			mh := func(m *nats.Msg) {
  3354  				atomic.AddUint32(&received, 1)
  3355  			}
  3356  
  3357  			opts := []nats.SubOpt{nats.DeliverNew(), nats.AckNone()}
  3358  			if test.durable != _EMPTY_ {
  3359  				opts = append(opts, nats.Durable(test.durable))
  3360  			}
  3361  			_, err = js.Subscribe("cluster.created", mh, opts...)
  3362  			if err != nil {
  3363  				t.Fatalf("Unexpected error: %v", err)
  3364  			}
  3365  
  3366  			msg := []byte("ACK ME")
  3367  			const total = uint32(1_000)
  3368  			for i := 0; i < int(total); i++ {
  3369  				if _, err := js.Publish("cluster.created", msg); err != nil {
  3370  					t.Fatalf("Unexpected error: %v", err)
  3371  				}
  3372  				//time.Sleep(100 * time.Microsecond)
  3373  			}
  3374  
  3375  			// Wait for all messages to be received.
  3376  			checkFor(t, 2*time.Second, 100*time.Millisecond, func() error {
  3377  				r := atomic.LoadUint32(&received)
  3378  				if r == total {
  3379  					return nil
  3380  				}
  3381  				return fmt.Errorf("Received only %d out of %d", r, total)
  3382  			})
  3383  
  3384  			checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  3385  				si, err := js.StreamInfo("cluster")
  3386  				if err != nil {
  3387  					t.Fatalf("Error getting stream info: %v", err)
  3388  				}
  3389  				if si.State.Msgs != 0 {
  3390  					return fmt.Errorf("Expected no messages, got %d", si.State.Msgs)
  3391  				}
  3392  				return nil
  3393  			})
  3394  		})
  3395  	}
  3396  }
  3397  
  3398  // There was a bug in the filestore compact code that would cause a store
  3399  // with JSExpectedLastSubjSeq to fail with "wrong last sequence: 0"
  3400  func TestNoRaceJetStreamLastSubjSeqAndFilestoreCompact(t *testing.T) {
  3401  	s := RunBasicJetStreamServer(t)
  3402  	defer s.Shutdown()
  3403  
  3404  	// Client based API
  3405  	nc, js := jsClientConnect(t, s)
  3406  	defer nc.Close()
  3407  
  3408  	_, err := js.AddStream(&nats.StreamConfig{
  3409  		Name:              "MQTT_sess",
  3410  		Subjects:          []string{"MQTT.sess.>"},
  3411  		Storage:           nats.FileStorage,
  3412  		Retention:         nats.LimitsPolicy,
  3413  		Replicas:          1,
  3414  		MaxMsgsPerSubject: 1,
  3415  	})
  3416  	if err != nil {
  3417  		t.Fatalf("Unexpected error: %v", err)
  3418  	}
  3419  
  3420  	firstPayload := make([]byte, 40)
  3421  	secondPayload := make([]byte, 380)
  3422  	for iter := 0; iter < 2; iter++ {
  3423  		for i := 0; i < 4000; i++ {
  3424  			subj := "MQTT.sess." + getHash(fmt.Sprintf("client_%d", i))
  3425  			pa, err := js.Publish(subj, firstPayload)
  3426  			if err != nil {
  3427  				t.Fatalf("Error on publish: %v", err)
  3428  			}
  3429  			m := nats.NewMsg(subj)
  3430  			m.Data = secondPayload
  3431  			eseq := strconv.FormatInt(int64(pa.Sequence), 10)
  3432  			m.Header.Set(JSExpectedLastSubjSeq, eseq)
  3433  			if _, err := js.PublishMsg(m); err != nil {
  3434  				t.Fatalf("Error on publish (iter=%v seq=%v): %v", iter+1, pa.Sequence, err)
  3435  			}
  3436  		}
  3437  	}
  3438  }
  3439  
  3440  // Issue #2548
  3441  func TestNoRaceJetStreamClusterMemoryStreamConsumerRaftGrowth(t *testing.T) {
  3442  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3443  	defer c.shutdown()
  3444  
  3445  	nc, js := jsClientConnect(t, c.randomServer())
  3446  	defer nc.Close()
  3447  
  3448  	_, err := js.AddStream(&nats.StreamConfig{
  3449  		Name:      "memory-leak",
  3450  		Subjects:  []string{"memory-leak"},
  3451  		Retention: nats.LimitsPolicy,
  3452  		MaxMsgs:   1000,
  3453  		Discard:   nats.DiscardOld,
  3454  		MaxAge:    time.Minute,
  3455  		Storage:   nats.MemoryStorage,
  3456  		Replicas:  3,
  3457  	})
  3458  	if err != nil {
  3459  		t.Fatalf("Unexpected error: %v", err)
  3460  	}
  3461  
  3462  	_, err = js.QueueSubscribe("memory-leak", "q1", func(msg *nats.Msg) {
  3463  		time.Sleep(1 * time.Second)
  3464  		msg.AckSync()
  3465  	})
  3466  	if err != nil {
  3467  		t.Fatalf("Unexpected error: %v", err)
  3468  	}
  3469  
  3470  	// Send 10k (Must be > 8192 which is compactNumMin from monitorConsumer.
  3471  	msg := []byte("NATS is a connective technology that powers modern distributed systems.")
  3472  	for i := 0; i < 10_000; i++ {
  3473  		if _, err := js.Publish("memory-leak", msg); err != nil {
  3474  			t.Fatalf("Unexpected error: %v", err)
  3475  		}
  3476  	}
  3477  
  3478  	// We will verify here that the underlying raft layer for the leader is not > 8192
  3479  	cl := c.consumerLeader("$G", "memory-leak", "q1")
  3480  	mset, err := cl.GlobalAccount().lookupStream("memory-leak")
  3481  	if err != nil {
  3482  		t.Fatalf("Unexpected error: %v", err)
  3483  	}
  3484  	o := mset.lookupConsumer("q1")
  3485  	if o == nil {
  3486  		t.Fatalf("Error looking up consumer %q", "q1")
  3487  	}
  3488  	node := o.raftNode().(*raft)
  3489  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
  3490  		if ms := node.wal.(*memStore); ms.State().Msgs > 8192 {
  3491  			return fmt.Errorf("Did not compact the raft memory WAL")
  3492  		}
  3493  		return nil
  3494  	})
  3495  }
  3496  
  3497  func TestNoRaceJetStreamClusterCorruptWAL(t *testing.T) {
  3498  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3499  	defer c.shutdown()
  3500  
  3501  	nc, js := jsClientConnect(t, c.randomServer())
  3502  	defer nc.Close()
  3503  
  3504  	if _, err := js.AddStream(&nats.StreamConfig{Name: "TEST", Subjects: []string{"foo"}, Replicas: 3}); err != nil {
  3505  		t.Fatalf("Unexpected error: %v", err)
  3506  	}
  3507  
  3508  	sub, err := js.PullSubscribe("foo", "dlc")
  3509  	if err != nil {
  3510  		t.Fatalf("Unexpected error: %v", err)
  3511  	}
  3512  
  3513  	numMsgs := 1000
  3514  	for i := 0; i < numMsgs; i++ {
  3515  		js.PublishAsync("foo", []byte("WAL"))
  3516  	}
  3517  	select {
  3518  	case <-js.PublishAsyncComplete():
  3519  	case <-time.After(5 * time.Second):
  3520  		t.Fatalf("Did not receive completion signal")
  3521  	}
  3522  
  3523  	for i, m := range fetchMsgs(t, sub, 200, 5*time.Second) {
  3524  		// Ack first 50 and every other even on after that..
  3525  		if i < 50 || i%2 == 1 {
  3526  			m.AckSync()
  3527  		}
  3528  	}
  3529  	// Make sure acks processed.
  3530  	time.Sleep(200 * time.Millisecond)
  3531  	nc.Close()
  3532  
  3533  	// Check consumer consistency.
  3534  	checkConsumerWith := func(delivered, ackFloor uint64, ackPending int) {
  3535  		t.Helper()
  3536  		nc, js := jsClientConnect(t, c.randomServer())
  3537  		defer nc.Close()
  3538  
  3539  		checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  3540  			ci, err := js.ConsumerInfo("TEST", "dlc")
  3541  			if err != nil {
  3542  				return fmt.Errorf("Unexpected error: %v", err)
  3543  			}
  3544  			if ci.Delivered.Consumer != ci.Delivered.Stream || ci.Delivered.Consumer != delivered {
  3545  				return fmt.Errorf("Expected %d for delivered, got %+v", delivered, ci.Delivered)
  3546  			}
  3547  			if ci.AckFloor.Consumer != ci.AckFloor.Stream || ci.AckFloor.Consumer != ackFloor {
  3548  				return fmt.Errorf("Expected %d for ack floor, got %+v", ackFloor, ci.AckFloor)
  3549  			}
  3550  			nm := uint64(numMsgs)
  3551  			if ci.NumPending != nm-delivered {
  3552  				return fmt.Errorf("Expected num pending to be %d, got %d", nm-delivered, ci.NumPending)
  3553  			}
  3554  			if ci.NumAckPending != ackPending {
  3555  				return fmt.Errorf("Expected num ack pending to be %d, got %d", ackPending, ci.NumAckPending)
  3556  			}
  3557  			return nil
  3558  		})
  3559  	}
  3560  
  3561  	checkConsumer := func() {
  3562  		t.Helper()
  3563  		checkConsumerWith(200, 50, 75)
  3564  	}
  3565  
  3566  	checkConsumer()
  3567  
  3568  	// Grab the consumer leader.
  3569  	cl := c.consumerLeader("$G", "TEST", "dlc")
  3570  	mset, err := cl.GlobalAccount().lookupStream("TEST")
  3571  	if err != nil {
  3572  		t.Fatalf("Unexpected error: %v", err)
  3573  	}
  3574  	o := mset.lookupConsumer("dlc")
  3575  	if o == nil {
  3576  		t.Fatalf("Error looking up consumer %q", "dlc")
  3577  	}
  3578  	// Grab underlying raft node and the WAL (filestore) and we will attempt to "corrupt" it.
  3579  	node := o.raftNode().(*raft)
  3580  	// We are doing a stop here to prevent the internal consumer snapshot from happening on exit
  3581  	node.Stop()
  3582  	fs := node.wal.(*fileStore)
  3583  	fcfg, cfg := fs.fcfg, fs.cfg.StreamConfig
  3584  	// Stop all the servers.
  3585  	c.stopAll()
  3586  
  3587  	// Manipulate directly with cluster down.
  3588  	fs, err = newFileStore(fcfg, cfg)
  3589  	if err != nil {
  3590  		t.Fatalf("Unexpected error: %v", err)
  3591  	}
  3592  	state := fs.State()
  3593  	sm, err := fs.LoadMsg(state.LastSeq, nil)
  3594  	if err != nil {
  3595  		t.Fatalf("Unexpected error: %v", err)
  3596  	}
  3597  	ae, err := node.decodeAppendEntry(sm.msg, nil, _EMPTY_)
  3598  	if err != nil {
  3599  		t.Fatalf("Unexpected error: %v", err)
  3600  	}
  3601  
  3602  	dentry := func(dseq, sseq, dc uint64, ts int64) []byte {
  3603  		b := make([]byte, 4*binary.MaxVarintLen64+1)
  3604  		b[0] = byte(updateDeliveredOp)
  3605  		n := 1
  3606  		n += binary.PutUvarint(b[n:], dseq)
  3607  		n += binary.PutUvarint(b[n:], sseq)
  3608  		n += binary.PutUvarint(b[n:], dc)
  3609  		n += binary.PutVarint(b[n:], ts)
  3610  		return b[:n]
  3611  	}
  3612  
  3613  	// Let's put a non-contigous AppendEntry into the system.
  3614  	ae.pindex += 10
  3615  	// Add in delivered record.
  3616  	ae.entries = []*Entry{{EntryNormal, dentry(1000, 1000, 1, time.Now().UnixNano())}}
  3617  	encoded, err := ae.encode(nil)
  3618  	if err != nil {
  3619  		t.Fatalf("Unexpected error: %v", err)
  3620  	}
  3621  	if _, _, err := fs.StoreMsg(_EMPTY_, nil, encoded); err != nil {
  3622  		t.Fatalf("Unexpected error: %v", err)
  3623  	}
  3624  	fs.Stop()
  3625  
  3626  	c.restartAllSamePorts()
  3627  	c.waitOnStreamLeader("$G", "TEST")
  3628  	c.waitOnConsumerLeader("$G", "TEST", "dlc")
  3629  
  3630  	checkConsumer()
  3631  
  3632  	// Now we will truncate out the WAL out from underneath the leader.
  3633  	// Grab the consumer leader.
  3634  
  3635  	nc, js = jsClientConnect(t, c.randomServer())
  3636  	defer nc.Close()
  3637  
  3638  	cl = c.consumerLeader("$G", "TEST", "dlc")
  3639  	mset, err = cl.GlobalAccount().lookupStream("TEST")
  3640  	require_NoError(t, err)
  3641  	o = mset.lookupConsumer("dlc")
  3642  	require_NoError(t, err)
  3643  
  3644  	// Grab underlying raft node and the WAL (filestore) and truncate it.
  3645  	// This will simulate the WAL losing state due to truncate and we want to make sure it recovers.
  3646  
  3647  	fs = o.raftNode().(*raft).wal.(*fileStore)
  3648  	state = fs.State()
  3649  	err = fs.Truncate(state.FirstSeq)
  3650  	require_True(t, err == nil || err == ErrInvalidSequence)
  3651  	state = fs.State()
  3652  
  3653  	sub, err = js.PullSubscribe("foo", "dlc")
  3654  	require_NoError(t, err)
  3655  
  3656  	// This will cause us to stepdown and truncate our WAL.
  3657  	sub.Fetch(100)
  3658  	c.waitOnConsumerLeader("$G", "TEST", "dlc")
  3659  	// We can't trust the results sans that we have a leader back in place and the ackFloor.
  3660  	ci, err := js.ConsumerInfo("TEST", "dlc")
  3661  	require_NoError(t, err)
  3662  	if ci.AckFloor.Consumer != ci.AckFloor.Stream || ci.AckFloor.Consumer != 50 {
  3663  		t.Fatalf("Expected %d for ack floor, got %+v", 50, ci.AckFloor)
  3664  	}
  3665  }
  3666  
  3667  func TestNoRaceJetStreamClusterInterestRetentionDeadlock(t *testing.T) {
  3668  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3669  	defer c.shutdown()
  3670  
  3671  	// Client based API
  3672  	s := c.randomServer()
  3673  	nc, js := jsClientConnect(t, s)
  3674  	defer nc.Close()
  3675  
  3676  	// This can trigger deadlock with current architecture.
  3677  	// If stream is !limitsRetention and consumer is DIRECT and ack none we will try to place the msg seq
  3678  	// onto a chan for the stream to consider removing. All conditions above must hold to trigger.
  3679  
  3680  	// We will attempt to trigger here with a stream mirror setup which uses and R=1 DIRECT consumer to replicate msgs.
  3681  	_, err := js.AddStream(&nats.StreamConfig{Name: "S", Retention: nats.InterestPolicy, Storage: nats.MemoryStorage})
  3682  	if err != nil {
  3683  		t.Fatalf("Unexpected error: %v", err)
  3684  	}
  3685  
  3686  	// Create a mirror which will create the consumer profile to trigger.
  3687  	_, err = js.AddStream(&nats.StreamConfig{Name: "M", Mirror: &nats.StreamSource{Name: "S"}})
  3688  	if err != nil {
  3689  		t.Fatalf("Unexpected error: %v", err)
  3690  	}
  3691  
  3692  	// Queue up alot of messages.
  3693  	numRequests := 20_000
  3694  	for i := 0; i < numRequests; i++ {
  3695  		js.PublishAsync("S", []byte("Q"))
  3696  	}
  3697  	select {
  3698  	case <-js.PublishAsyncComplete():
  3699  	case <-time.After(5 * time.Second):
  3700  		t.Fatalf("Did not receive completion signal")
  3701  	}
  3702  
  3703  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  3704  		si, err := js.StreamInfo("S")
  3705  		if err != nil {
  3706  			t.Fatalf("Unexpected error: %v", err)
  3707  		}
  3708  		if si.State.Msgs != 0 {
  3709  			return fmt.Errorf("Expected 0 msgs, got state: %+v", si.State)
  3710  		}
  3711  		return nil
  3712  	})
  3713  }
  3714  
  3715  func TestNoRaceJetStreamClusterMaxConsumersAndDirect(t *testing.T) {
  3716  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3717  	defer c.shutdown()
  3718  
  3719  	// Client based API
  3720  	s := c.randomServer()
  3721  	nc, js := jsClientConnect(t, s)
  3722  	defer nc.Close()
  3723  
  3724  	// We want to max sure max consumer limits do not affect mirrors or sources etc.
  3725  	_, err := js.AddStream(&nats.StreamConfig{Name: "S", Storage: nats.MemoryStorage, MaxConsumers: 1})
  3726  	if err != nil {
  3727  		t.Fatalf("Unexpected error: %v", err)
  3728  	}
  3729  
  3730  	var mirrors []string
  3731  	for i := 0; i < 10; i++ {
  3732  		// Create a mirror.
  3733  		mname := fmt.Sprintf("M-%d", i+1)
  3734  		mirrors = append(mirrors, mname)
  3735  		_, err = js.AddStream(&nats.StreamConfig{Name: mname, Mirror: &nats.StreamSource{Name: "S"}})
  3736  		if err != nil {
  3737  			t.Fatalf("Unexpected error: %v", err)
  3738  		}
  3739  	}
  3740  
  3741  	// Queue up messages.
  3742  	numRequests := 20
  3743  	for i := 0; i < numRequests; i++ {
  3744  		js.Publish("S", []byte("Q"))
  3745  	}
  3746  
  3747  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  3748  		for _, mname := range mirrors {
  3749  			si, err := js.StreamInfo(mname)
  3750  			if err != nil {
  3751  				t.Fatalf("Unexpected error: %v", err)
  3752  			}
  3753  			if si.State.Msgs != uint64(numRequests) {
  3754  				return fmt.Errorf("Expected %d msgs for %q, got state: %+v", numRequests, mname, si.State)
  3755  			}
  3756  		}
  3757  		return nil
  3758  	})
  3759  }
  3760  
  3761  // Make sure when we try to hard reset a stream state in a cluster that we also re-create the consumers.
  3762  func TestNoRaceJetStreamClusterStreamReset(t *testing.T) {
  3763  	// Speed up raft
  3764  	omin, omax, ohb := minElectionTimeout, maxElectionTimeout, hbInterval
  3765  	minElectionTimeout = 250 * time.Millisecond
  3766  	maxElectionTimeout = time.Second
  3767  	hbInterval = 50 * time.Millisecond
  3768  	defer func() {
  3769  		minElectionTimeout = omin
  3770  		maxElectionTimeout = omax
  3771  		hbInterval = ohb
  3772  	}()
  3773  
  3774  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3775  	defer c.shutdown()
  3776  
  3777  	// Client based API
  3778  	s := c.randomServer()
  3779  	nc, js := jsClientConnect(t, s)
  3780  	defer nc.Close()
  3781  
  3782  	_, err := js.AddStream(&nats.StreamConfig{
  3783  		Name:      "TEST",
  3784  		Subjects:  []string{"foo.*"},
  3785  		Replicas:  2,
  3786  		Retention: nats.WorkQueuePolicy,
  3787  	})
  3788  	if err != nil {
  3789  		t.Fatalf("Unexpected error: %v", err)
  3790  	}
  3791  
  3792  	numRequests := 20
  3793  	for i := 0; i < numRequests; i++ {
  3794  		js.Publish("foo.created", []byte("REQ"))
  3795  	}
  3796  
  3797  	// Durable.
  3798  	sub, err := js.SubscribeSync("foo.created", nats.Durable("d1"))
  3799  	if err != nil {
  3800  		t.Fatalf("Unexpected error: %v", err)
  3801  	}
  3802  	defer sub.Unsubscribe()
  3803  
  3804  	si, err := js.StreamInfo("TEST")
  3805  	require_NoError(t, err)
  3806  	require_True(t, si.State.Msgs == uint64(numRequests))
  3807  
  3808  	// Let settle a bit for Go routine checks.
  3809  	time.Sleep(500 * time.Millisecond)
  3810  
  3811  	// Grab number go routines.
  3812  	base := runtime.NumGoroutine()
  3813  
  3814  	// Make the consumer busy here by async sending a bunch of messages.
  3815  	for i := 0; i < numRequests*10; i++ {
  3816  		js.PublishAsync("foo.created", []byte("REQ"))
  3817  	}
  3818  
  3819  	// Grab a server that is the consumer leader for the durable.
  3820  	cl := c.consumerLeader("$G", "TEST", "d1")
  3821  	mset, err := cl.GlobalAccount().lookupStream("TEST")
  3822  	if err != nil {
  3823  		t.Fatalf("Unexpected error: %v", err)
  3824  	}
  3825  	// Do a hard reset here by hand.
  3826  	mset.resetClusteredState(nil)
  3827  
  3828  	// Wait til we have the consumer leader re-elected.
  3829  	c.waitOnConsumerLeader("$G", "TEST", "d1")
  3830  
  3831  	// Make sure we can get the consumer info eventually.
  3832  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  3833  		_, err := js.ConsumerInfo("TEST", "d1", nats.MaxWait(250*time.Millisecond))
  3834  		return err
  3835  	})
  3836  
  3837  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  3838  		if after := runtime.NumGoroutine(); base > after {
  3839  			return fmt.Errorf("Expected %d go routines, got %d", base, after)
  3840  		}
  3841  		return nil
  3842  	})
  3843  
  3844  	// Simulate a low level write error on our consumer and make sure we can recover etc.
  3845  	checkFor(t, 10*time.Second, 200*time.Millisecond, func() error {
  3846  		if cl = c.consumerLeader("$G", "TEST", "d1"); cl != nil {
  3847  			return nil
  3848  		}
  3849  		return errors.New("waiting on consumer leader")
  3850  	})
  3851  
  3852  	mset, err = cl.GlobalAccount().lookupStream("TEST")
  3853  	if err != nil {
  3854  		t.Fatalf("Unexpected error: %v", err)
  3855  	}
  3856  	o := mset.lookupConsumer("d1")
  3857  	if o == nil {
  3858  		t.Fatalf("Did not retrieve consumer")
  3859  	}
  3860  	node := o.raftNode().(*raft)
  3861  	if node == nil {
  3862  		t.Fatalf("could not retrieve the raft node for consumer")
  3863  	}
  3864  
  3865  	nc.Close()
  3866  	node.setWriteErr(io.ErrShortWrite)
  3867  
  3868  	c.stopAll()
  3869  	c.restartAll()
  3870  
  3871  	c.waitOnStreamLeader("$G", "TEST")
  3872  	c.waitOnConsumerLeader("$G", "TEST", "d1")
  3873  }
  3874  
  3875  // Reports of high cpu on compaction for a KV store.
  3876  func TestNoRaceJetStreamKeyValueCompaction(t *testing.T) {
  3877  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3878  	defer c.shutdown()
  3879  
  3880  	// Client based API
  3881  	nc, js := jsClientConnect(t, c.randomServer())
  3882  	defer nc.Close()
  3883  
  3884  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  3885  		Bucket:   "COMPACT",
  3886  		Replicas: 3,
  3887  	})
  3888  	if err != nil {
  3889  		t.Fatalf("Unexpected error: %v", err)
  3890  	}
  3891  
  3892  	value := strings.Repeat("A", 128*1024)
  3893  	for i := 0; i < 5_000; i++ {
  3894  		key := fmt.Sprintf("K-%d", rand.Intn(256)+1)
  3895  		if _, err := kv.PutString(key, value); err != nil {
  3896  			t.Fatalf("Unexpected error: %v", err)
  3897  		}
  3898  	}
  3899  }
  3900  
  3901  // Trying to recreate an issue rip saw with KV and server restarts complaining about
  3902  // mismatch for a few minutes and growing memory.
  3903  func TestNoRaceJetStreamClusterStreamSeqMismatchIssue(t *testing.T) {
  3904  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3905  	defer c.shutdown()
  3906  
  3907  	// Client based API
  3908  	nc, js := jsClientConnect(t, c.randomServer())
  3909  	defer nc.Close()
  3910  
  3911  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  3912  		Bucket:   "MM",
  3913  		Replicas: 3,
  3914  		TTL:      500 * time.Millisecond,
  3915  	})
  3916  	require_NoError(t, err)
  3917  
  3918  	for i := 1; i <= 10; i++ {
  3919  		if _, err := kv.PutString("k", "1"); err != nil {
  3920  			t.Fatalf("Unexpected error: %v", err)
  3921  		}
  3922  	}
  3923  	// Close in case we are connected here. Will recreate.
  3924  	nc.Close()
  3925  
  3926  	// Shutdown a non-leader.
  3927  	s := c.randomNonStreamLeader("$G", "KV_MM")
  3928  	s.Shutdown()
  3929  
  3930  	nc, js = jsClientConnect(t, c.randomServer())
  3931  	defer nc.Close()
  3932  
  3933  	kv, err = js.KeyValue("MM")
  3934  	require_NoError(t, err)
  3935  
  3936  	// Now change the state of the stream such that we have to do a compact upon restart
  3937  	// of the downed server.
  3938  	for i := 1; i <= 10; i++ {
  3939  		if _, err := kv.PutString("k", "2"); err != nil {
  3940  			t.Fatalf("Unexpected error: %v", err)
  3941  		}
  3942  	}
  3943  
  3944  	// Raft could save us here so need to run a compact on the leader.
  3945  	snapshotLeader := func() {
  3946  		sl := c.streamLeader("$G", "KV_MM")
  3947  		if sl == nil {
  3948  			t.Fatalf("Did not get the leader")
  3949  		}
  3950  		mset, err := sl.GlobalAccount().lookupStream("KV_MM")
  3951  		require_NoError(t, err)
  3952  		node := mset.raftNode()
  3953  		if node == nil {
  3954  			t.Fatalf("Could not get stream group")
  3955  		}
  3956  		if err := node.InstallSnapshot(mset.stateSnapshot()); err != nil {
  3957  			t.Fatalf("Error installing snapshot: %v", err)
  3958  		}
  3959  	}
  3960  
  3961  	// Now wait for expiration
  3962  	time.Sleep(time.Second)
  3963  
  3964  	snapshotLeader()
  3965  
  3966  	s = c.restartServer(s)
  3967  	c.waitOnServerCurrent(s)
  3968  
  3969  	// We want to make sure we do not reset the raft state on a catchup due to no request yield.
  3970  	// Bug was if we did not actually request any help from snapshot we did not set mset.lseq properly.
  3971  	// So when we send next batch that would cause raft reset due to cluster reset for our stream.
  3972  	mset, err := s.GlobalAccount().lookupStream("KV_MM")
  3973  	require_NoError(t, err)
  3974  
  3975  	for i := 1; i <= 10; i++ {
  3976  		if _, err := kv.PutString("k1", "X"); err != nil {
  3977  			t.Fatalf("Unexpected error: %v", err)
  3978  		}
  3979  	}
  3980  
  3981  	c.waitOnStreamCurrent(s, "$G", "KV_MM")
  3982  
  3983  	// Make sure we did not reset our stream.
  3984  	msetNew, err := s.GlobalAccount().lookupStream("KV_MM")
  3985  	require_NoError(t, err)
  3986  	if msetNew != mset {
  3987  		t.Fatalf("Stream was reset")
  3988  	}
  3989  }
  3990  
  3991  func TestNoRaceJetStreamClusterStreamDropCLFS(t *testing.T) {
  3992  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3993  	defer c.shutdown()
  3994  
  3995  	// Client based API
  3996  	nc, js := jsClientConnect(t, c.randomServer())
  3997  	defer nc.Close()
  3998  
  3999  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  4000  		Bucket:   "CLFS",
  4001  		Replicas: 3,
  4002  	})
  4003  	require_NoError(t, err)
  4004  
  4005  	// Will work
  4006  	_, err = kv.Create("k.1", []byte("X"))
  4007  	require_NoError(t, err)
  4008  	// Drive up CLFS state on leader.
  4009  	for i := 0; i < 10; i++ {
  4010  		_, err = kv.Create("k.1", []byte("X"))
  4011  		require_Error(t, err)
  4012  	}
  4013  	// Bookend with new key success.
  4014  	_, err = kv.Create("k.2", []byte("Z"))
  4015  	require_NoError(t, err)
  4016  
  4017  	// Close in case we are connected here. Will recreate.
  4018  	nc.Close()
  4019  
  4020  	// Shutdown, which will also clear clfs.
  4021  	s := c.randomNonStreamLeader("$G", "KV_CLFS")
  4022  	s.Shutdown()
  4023  
  4024  	nc, js = jsClientConnect(t, c.randomServer())
  4025  	defer nc.Close()
  4026  
  4027  	kv, err = js.KeyValue("CLFS")
  4028  	require_NoError(t, err)
  4029  
  4030  	// Drive up CLFS state on leader.
  4031  	for i := 0; i < 10; i++ {
  4032  		_, err = kv.Create("k.1", []byte("X"))
  4033  		require_Error(t, err)
  4034  	}
  4035  
  4036  	sl := c.streamLeader("$G", "KV_CLFS")
  4037  	if sl == nil {
  4038  		t.Fatalf("Did not get the leader")
  4039  	}
  4040  	mset, err := sl.GlobalAccount().lookupStream("KV_CLFS")
  4041  	require_NoError(t, err)
  4042  	node := mset.raftNode()
  4043  	if node == nil {
  4044  		t.Fatalf("Could not get stream group")
  4045  	}
  4046  	if err := node.InstallSnapshot(mset.stateSnapshot()); err != nil {
  4047  		t.Fatalf("Error installing snapshot: %v", err)
  4048  	}
  4049  
  4050  	_, err = kv.Create("k.3", []byte("ZZZ"))
  4051  	require_NoError(t, err)
  4052  
  4053  	s = c.restartServer(s)
  4054  	c.waitOnServerCurrent(s)
  4055  
  4056  	mset, err = s.GlobalAccount().lookupStream("KV_CLFS")
  4057  	require_NoError(t, err)
  4058  
  4059  	_, err = kv.Create("k.4", []byte("YYY"))
  4060  	require_NoError(t, err)
  4061  
  4062  	c.waitOnStreamCurrent(s, "$G", "KV_CLFS")
  4063  
  4064  	// Make sure we did not reset our stream.
  4065  	msetNew, err := s.GlobalAccount().lookupStream("KV_CLFS")
  4066  	require_NoError(t, err)
  4067  	if msetNew != mset {
  4068  		t.Fatalf("Stream was reset")
  4069  	}
  4070  }
  4071  
  4072  func TestNoRaceJetStreamMemstoreWithLargeInteriorDeletes(t *testing.T) {
  4073  	s := RunBasicJetStreamServer(t)
  4074  	defer s.Shutdown()
  4075  
  4076  	// Client for API requests.
  4077  	nc, js := jsClientConnect(t, s)
  4078  	defer nc.Close()
  4079  
  4080  	_, err := js.AddStream(&nats.StreamConfig{
  4081  		Name:              "TEST",
  4082  		Subjects:          []string{"foo", "bar"},
  4083  		MaxMsgsPerSubject: 1,
  4084  		Storage:           nats.MemoryStorage,
  4085  	})
  4086  	require_NoError(t, err)
  4087  
  4088  	acc, err := s.lookupAccount("$G")
  4089  	require_NoError(t, err)
  4090  	mset, err := acc.lookupStream("TEST")
  4091  	require_NoError(t, err)
  4092  
  4093  	msg := []byte("Hello World!")
  4094  	if _, err := js.PublishAsync("foo", msg); err != nil {
  4095  		t.Fatalf("Unexpected publish error: %v", err)
  4096  	}
  4097  	for i := 1; i <= 1_000_000; i++ {
  4098  		if _, err := js.PublishAsync("bar", msg); err != nil {
  4099  			t.Fatalf("Unexpected publish error: %v", err)
  4100  		}
  4101  	}
  4102  	select {
  4103  	case <-js.PublishAsyncComplete():
  4104  	case <-time.After(5 * time.Second):
  4105  		t.Fatalf("Did not receive completion signal")
  4106  	}
  4107  
  4108  	now := time.Now()
  4109  	ss := mset.stateWithDetail(true)
  4110  	// Before the fix the snapshot for this test would be > 200ms on my setup.
  4111  	if elapsed := time.Since(now); elapsed > 100*time.Millisecond {
  4112  		t.Fatalf("Took too long to snapshot: %v", elapsed)
  4113  	} else if elapsed > 50*time.Millisecond {
  4114  		t.Logf("WRN: Took longer than usual to snapshot: %v", elapsed)
  4115  	}
  4116  
  4117  	if ss.Msgs != 2 || ss.FirstSeq != 1 || ss.LastSeq != 1_000_001 || ss.NumDeleted != 999999 {
  4118  		// To not print out on error.
  4119  		ss.Deleted = nil
  4120  		t.Fatalf("Bad State: %+v", ss)
  4121  	}
  4122  }
  4123  
  4124  // This is related to an issue reported where we were exhausting threads by trying to
  4125  // cleanup too many consumers at the same time.
  4126  // https://github.com/nats-io/nats-server/issues/2742
  4127  func TestNoRaceJetStreamConsumerFileStoreConcurrentDiskIO(t *testing.T) {
  4128  	storeDir := t.TempDir()
  4129  
  4130  	// Artificially adjust our environment for this test.
  4131  	gmp := runtime.GOMAXPROCS(32)
  4132  	defer runtime.GOMAXPROCS(gmp)
  4133  
  4134  	maxT := debug.SetMaxThreads(1050) // 1024 now
  4135  	defer debug.SetMaxThreads(maxT)
  4136  
  4137  	fs, err := newFileStore(FileStoreConfig{StoreDir: storeDir}, StreamConfig{Name: "MT", Storage: FileStorage})
  4138  	require_NoError(t, err)
  4139  	defer fs.Stop()
  4140  
  4141  	startCh := make(chan bool)
  4142  	var wg sync.WaitGroup
  4143  	var swg sync.WaitGroup
  4144  
  4145  	ts := time.Now().UnixNano()
  4146  
  4147  	// Create 1000 consumerStores
  4148  	n := 1000
  4149  	swg.Add(n)
  4150  
  4151  	for i := 1; i <= n; i++ {
  4152  		name := fmt.Sprintf("o%d", i)
  4153  		o, err := fs.ConsumerStore(name, &ConsumerConfig{AckPolicy: AckExplicit})
  4154  		require_NoError(t, err)
  4155  		wg.Add(1)
  4156  		swg.Done()
  4157  
  4158  		go func() {
  4159  			defer wg.Done()
  4160  			// Will make everyone run concurrently.
  4161  			<-startCh
  4162  			o.UpdateDelivered(22, 22, 1, ts)
  4163  			buf, _ := o.(*consumerFileStore).encodeState()
  4164  			o.(*consumerFileStore).writeState(buf)
  4165  			o.Delete()
  4166  		}()
  4167  	}
  4168  
  4169  	swg.Wait()
  4170  	close(startCh)
  4171  	wg.Wait()
  4172  }
  4173  
  4174  func TestNoRaceJetStreamClusterHealthz(t *testing.T) {
  4175  	c := createJetStreamCluster(t, jsClusterAccountsTempl, "HZ", _EMPTY_, 3, 23033, true)
  4176  	defer c.shutdown()
  4177  
  4178  	nc1, js1 := jsClientConnect(t, c.randomServer(), nats.UserInfo("one", "p"))
  4179  	defer nc1.Close()
  4180  
  4181  	nc2, js2 := jsClientConnect(t, c.randomServer(), nats.UserInfo("two", "p"))
  4182  	defer nc2.Close()
  4183  
  4184  	var err error
  4185  	for _, sname := range []string{"foo", "bar", "baz"} {
  4186  		_, err = js1.AddStream(&nats.StreamConfig{Name: sname, Replicas: 3})
  4187  		require_NoError(t, err)
  4188  		_, err = js2.AddStream(&nats.StreamConfig{Name: sname, Replicas: 3})
  4189  		require_NoError(t, err)
  4190  	}
  4191  	// R1
  4192  	_, err = js1.AddStream(&nats.StreamConfig{Name: "r1", Replicas: 1})
  4193  	require_NoError(t, err)
  4194  
  4195  	// Now shutdown then send a bunch of data.
  4196  	s := c.servers[0]
  4197  	s.Shutdown()
  4198  
  4199  	for i := 0; i < 5_000; i++ {
  4200  		_, err = js1.PublishAsync("foo", []byte("OK"))
  4201  		require_NoError(t, err)
  4202  		_, err = js2.PublishAsync("bar", []byte("OK"))
  4203  		require_NoError(t, err)
  4204  	}
  4205  	select {
  4206  	case <-js1.PublishAsyncComplete():
  4207  	case <-time.After(5 * time.Second):
  4208  		t.Fatalf("Did not receive completion signal")
  4209  	}
  4210  	select {
  4211  	case <-js2.PublishAsyncComplete():
  4212  	case <-time.After(5 * time.Second):
  4213  		t.Fatalf("Did not receive completion signal")
  4214  	}
  4215  
  4216  	s = c.restartServer(s)
  4217  	opts := s.getOpts()
  4218  	opts.HTTPHost = "127.0.0.1"
  4219  	opts.HTTPPort = 11222
  4220  	err = s.StartMonitoring()
  4221  	require_NoError(t, err)
  4222  	url := fmt.Sprintf("http://127.0.0.1:%d/healthz", opts.HTTPPort)
  4223  
  4224  	getHealth := func() (int, *HealthStatus) {
  4225  		resp, err := http.Get(url)
  4226  		require_NoError(t, err)
  4227  		defer resp.Body.Close()
  4228  		body, err := io.ReadAll(resp.Body)
  4229  		require_NoError(t, err)
  4230  		var hs HealthStatus
  4231  		err = json.Unmarshal(body, &hs)
  4232  		require_NoError(t, err)
  4233  		return resp.StatusCode, &hs
  4234  	}
  4235  
  4236  	errors := 0
  4237  	checkFor(t, 20*time.Second, 100*time.Millisecond, func() error {
  4238  		code, hs := getHealth()
  4239  		if code >= 200 && code < 300 {
  4240  			return nil
  4241  		}
  4242  		errors++
  4243  		return fmt.Errorf("Got %d status with %+v", code, hs)
  4244  	})
  4245  	if errors == 0 {
  4246  		t.Fatalf("Expected to have some errors until we became current, got none")
  4247  	}
  4248  }
  4249  
  4250  // Test that we can receive larger messages with stream subject details.
  4251  // Also test that we will fail at some point and the user can fall back to
  4252  // an orderedconsumer like we do with watch for KV Keys() call.
  4253  func TestNoRaceJetStreamStreamInfoSubjectDetailsLimits(t *testing.T) {
  4254  	conf := createConfFile(t, []byte(fmt.Sprintf(`
  4255  		listen: 127.0.0.1:-1
  4256  		jetstream {
  4257  			store_dir = %q
  4258  		}
  4259  		accounts: {
  4260  		  default: {
  4261  			jetstream: true
  4262  			users: [ {user: me, password: pwd} ]
  4263  			limits { max_payload: 512 }
  4264  		  }
  4265  		}
  4266  	`, t.TempDir())))
  4267  
  4268  	s, _ := RunServerWithConfig(conf)
  4269  	if config := s.JetStreamConfig(); config != nil {
  4270  		defer removeDir(t, config.StoreDir)
  4271  	}
  4272  	defer s.Shutdown()
  4273  
  4274  	nc, js := jsClientConnect(t, s, nats.UserInfo("me", "pwd"))
  4275  	defer nc.Close()
  4276  
  4277  	// Make sure to flush so we process the 2nd INFO.
  4278  	nc.Flush()
  4279  
  4280  	// Make sure we cannot send larger than 512 bytes.
  4281  	// But we can receive larger.
  4282  	sub, err := nc.SubscribeSync("foo")
  4283  	require_NoError(t, err)
  4284  	err = nc.Publish("foo", []byte(strings.Repeat("A", 600)))
  4285  	require_Error(t, err, nats.ErrMaxPayload)
  4286  	sub.Unsubscribe()
  4287  
  4288  	_, err = js.AddStream(&nats.StreamConfig{
  4289  		Name:     "TEST",
  4290  		Subjects: []string{"*", "X.*"},
  4291  	})
  4292  	require_NoError(t, err)
  4293  
  4294  	n := JSMaxSubjectDetails
  4295  	for i := 0; i < n; i++ {
  4296  		_, err := js.PublishAsync(fmt.Sprintf("X.%d", i), []byte("OK"))
  4297  		require_NoError(t, err)
  4298  	}
  4299  	select {
  4300  	case <-js.PublishAsyncComplete():
  4301  	case <-time.After(5 * time.Second):
  4302  		t.Fatalf("Did not receive completion signal")
  4303  	}
  4304  
  4305  	// Need to grab StreamInfo by hand for now.
  4306  	req, err := json.Marshal(&JSApiStreamInfoRequest{SubjectsFilter: "X.*"})
  4307  	require_NoError(t, err)
  4308  	resp, err := nc.Request(fmt.Sprintf(JSApiStreamInfoT, "TEST"), req, 5*time.Second)
  4309  	require_NoError(t, err)
  4310  	var si StreamInfo
  4311  	err = json.Unmarshal(resp.Data, &si)
  4312  	require_NoError(t, err)
  4313  	if len(si.State.Subjects) != n {
  4314  		t.Fatalf("Expected to get %d subject details, got %d", n, len(si.State.Subjects))
  4315  	}
  4316  
  4317  	// Now add one more message to check pagination
  4318  	_, err = js.Publish("foo", []byte("TOO MUCH"))
  4319  	require_NoError(t, err)
  4320  
  4321  	req, err = json.Marshal(&JSApiStreamInfoRequest{ApiPagedRequest: ApiPagedRequest{Offset: n}, SubjectsFilter: nats.AllKeys})
  4322  	require_NoError(t, err)
  4323  	resp, err = nc.Request(fmt.Sprintf(JSApiStreamInfoT, "TEST"), req, 5*time.Second)
  4324  	require_NoError(t, err)
  4325  	var sir JSApiStreamInfoResponse
  4326  	err = json.Unmarshal(resp.Data, &sir)
  4327  	require_NoError(t, err)
  4328  	if len(sir.State.Subjects) != 1 {
  4329  		t.Fatalf("Expected to get 1 extra subject detail, got %d", len(sir.State.Subjects))
  4330  	}
  4331  }
  4332  
  4333  func TestNoRaceJetStreamSparseConsumers(t *testing.T) {
  4334  	s := RunBasicJetStreamServer(t)
  4335  	defer s.Shutdown()
  4336  
  4337  	nc, js := jsClientConnect(t, s)
  4338  	defer nc.Close()
  4339  
  4340  	msg := []byte("ok")
  4341  
  4342  	cases := []struct {
  4343  		name    string
  4344  		mconfig *nats.StreamConfig
  4345  	}{
  4346  		{"MemoryStore", &nats.StreamConfig{Name: "TEST", Storage: nats.MemoryStorage, MaxMsgsPerSubject: 25_000_000,
  4347  			Subjects: []string{"*"}}},
  4348  		{"FileStore", &nats.StreamConfig{Name: "TEST", Storage: nats.FileStorage, MaxMsgsPerSubject: 25_000_000,
  4349  			Subjects: []string{"*"}}},
  4350  	}
  4351  	for _, c := range cases {
  4352  		t.Run(c.name, func(t *testing.T) {
  4353  			js.DeleteStream("TEST")
  4354  			_, err := js.AddStream(c.mconfig)
  4355  			require_NoError(t, err)
  4356  
  4357  			// We will purposely place foo msgs near the beginning, then in middle, then at the end.
  4358  			for n := 0; n < 2; n++ {
  4359  				_, err = js.PublishAsync("foo", msg, nats.StallWait(800*time.Millisecond))
  4360  				require_NoError(t, err)
  4361  
  4362  				for i := 0; i < 1_000_000; i++ {
  4363  					_, err = js.PublishAsync("bar", msg, nats.StallWait(800*time.Millisecond))
  4364  					require_NoError(t, err)
  4365  				}
  4366  				_, err = js.PublishAsync("foo", msg, nats.StallWait(800*time.Millisecond))
  4367  				require_NoError(t, err)
  4368  			}
  4369  			select {
  4370  			case <-js.PublishAsyncComplete():
  4371  			case <-time.After(5 * time.Second):
  4372  				t.Fatalf("Did not receive completion signal")
  4373  			}
  4374  
  4375  			// Now create a consumer on foo.
  4376  			ci, err := js.AddConsumer("TEST", &nats.ConsumerConfig{DeliverSubject: "x.x", FilterSubject: "foo", AckPolicy: nats.AckNonePolicy})
  4377  			require_NoError(t, err)
  4378  
  4379  			done, received := make(chan bool), uint64(0)
  4380  
  4381  			cb := func(m *nats.Msg) {
  4382  				received++
  4383  				if received >= ci.NumPending {
  4384  					done <- true
  4385  				}
  4386  			}
  4387  
  4388  			sub, err := nc.Subscribe("x.x", cb)
  4389  			require_NoError(t, err)
  4390  			defer sub.Unsubscribe()
  4391  			start := time.Now()
  4392  			var elapsed time.Duration
  4393  
  4394  			select {
  4395  			case <-done:
  4396  				elapsed = time.Since(start)
  4397  			case <-time.After(10 * time.Second):
  4398  				t.Fatal("Did not receive all messages for all consumers in time")
  4399  			}
  4400  
  4401  			if elapsed > 500*time.Millisecond {
  4402  				t.Fatalf("Getting all messages took longer than expected: %v", elapsed)
  4403  			}
  4404  		})
  4405  	}
  4406  }
  4407  
  4408  func TestNoRaceJetStreamConsumerFilterPerfDegradation(t *testing.T) {
  4409  	s := RunBasicJetStreamServer(t)
  4410  	defer s.Shutdown()
  4411  
  4412  	nc, _ := jsClientConnect(t, s)
  4413  	defer nc.Close()
  4414  
  4415  	js, err := nc.JetStream(nats.PublishAsyncMaxPending(256))
  4416  	require_NoError(t, err)
  4417  
  4418  	_, err = js.AddStream(&nats.StreamConfig{
  4419  		Name:     "test",
  4420  		Subjects: []string{"test.*.subj"},
  4421  		Replicas: 1,
  4422  	})
  4423  	require_NoError(t, err)
  4424  
  4425  	toSend := 50_000
  4426  	count := 0
  4427  	ch := make(chan struct{}, 6)
  4428  	_, err = js.Subscribe("test.*.subj", func(m *nats.Msg) {
  4429  		m.Ack()
  4430  		if count++; count == toSend {
  4431  			ch <- struct{}{}
  4432  		}
  4433  	}, nats.DeliverNew(), nats.ManualAck())
  4434  	require_NoError(t, err)
  4435  
  4436  	msg := make([]byte, 1024)
  4437  	sent := int32(0)
  4438  	send := func() {
  4439  		defer func() { ch <- struct{}{} }()
  4440  		for i := 0; i < toSend/5; i++ {
  4441  			msgID := atomic.AddInt32(&sent, 1)
  4442  			_, err := js.Publish(fmt.Sprintf("test.%d.subj", msgID), msg)
  4443  			if err != nil {
  4444  				t.Error(err)
  4445  				return
  4446  			}
  4447  		}
  4448  	}
  4449  	for i := 0; i < 5; i++ {
  4450  		go send()
  4451  	}
  4452  	timeout := time.NewTimer(10 * time.Second)
  4453  	for i := 0; i < 6; i++ {
  4454  		select {
  4455  		case <-ch:
  4456  		case <-timeout.C:
  4457  			t.Fatal("Took too long")
  4458  		}
  4459  	}
  4460  }
  4461  
  4462  func TestNoRaceJetStreamFileStoreKeyFileCleanup(t *testing.T) {
  4463  	storeDir := t.TempDir()
  4464  
  4465  	prf := func(context []byte) ([]byte, error) {
  4466  		h := hmac.New(sha256.New, []byte("dlc22"))
  4467  		if _, err := h.Write(context); err != nil {
  4468  			return nil, err
  4469  		}
  4470  		return h.Sum(nil), nil
  4471  	}
  4472  
  4473  	fs, err := newFileStoreWithCreated(
  4474  		FileStoreConfig{StoreDir: storeDir, BlockSize: 1024 * 1024},
  4475  		StreamConfig{Name: "TEST", Storage: FileStorage},
  4476  		time.Now(),
  4477  		prf, nil)
  4478  	require_NoError(t, err)
  4479  	defer fs.Stop()
  4480  
  4481  	n, msg := 10_000, []byte(strings.Repeat("Z", 1024))
  4482  	for i := 0; i < n; i++ {
  4483  		_, _, err := fs.StoreMsg(fmt.Sprintf("X.%d", i), nil, msg)
  4484  		require_NoError(t, err)
  4485  	}
  4486  
  4487  	var seqs []uint64
  4488  	for i := 1; i <= n; i++ {
  4489  		seqs = append(seqs, uint64(i))
  4490  	}
  4491  	// Randomly delete msgs, make sure we cleanup as we empty the message blocks.
  4492  	rand.Shuffle(len(seqs), func(i, j int) { seqs[i], seqs[j] = seqs[j], seqs[i] })
  4493  
  4494  	for _, seq := range seqs {
  4495  		_, err := fs.RemoveMsg(seq)
  4496  		require_NoError(t, err)
  4497  	}
  4498  
  4499  	// We will have cleanup the main .blk and .idx sans the lmb, but we should not have any *.fss files.
  4500  	kms, err := filepath.Glob(filepath.Join(storeDir, msgDir, keyScanAll))
  4501  	require_NoError(t, err)
  4502  
  4503  	if len(kms) > 1 {
  4504  		t.Fatalf("Expected to find only 1 key file, found %d", len(kms))
  4505  	}
  4506  }
  4507  
  4508  func TestNoRaceJetStreamMsgIdPerfDuringCatchup(t *testing.T) {
  4509  	// Uncomment to run. Needs to be on a bigger machine. Do not want as part of Travis tests atm.
  4510  	skip(t)
  4511  
  4512  	c := createJetStreamClusterExplicit(t, "JSC", 3)
  4513  	defer c.shutdown()
  4514  
  4515  	nc, js := jsClientConnect(t, c.serverByName("S-1"))
  4516  	defer nc.Close()
  4517  
  4518  	_, err := js.AddStream(&nats.StreamConfig{
  4519  		Name:     "TEST",
  4520  		Replicas: 3,
  4521  	})
  4522  	require_NoError(t, err)
  4523  
  4524  	// This will be the one we restart.
  4525  	sl := c.streamLeader("$G", "TEST")
  4526  	// Now move leader.
  4527  	_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST"), nil, time.Second)
  4528  	require_NoError(t, err)
  4529  	c.waitOnStreamLeader("$G", "TEST")
  4530  
  4531  	// Connect to new leader.
  4532  	nc, _ = jsClientConnect(t, c.streamLeader("$G", "TEST"))
  4533  	defer nc.Close()
  4534  
  4535  	js, err = nc.JetStream(nats.PublishAsyncMaxPending(1024))
  4536  	require_NoError(t, err)
  4537  
  4538  	n, ss, sr := 1_000_000, 250_000, 800_000
  4539  	m := nats.NewMsg("TEST")
  4540  	m.Data = []byte(strings.Repeat("Z", 2048))
  4541  
  4542  	// Target rate 10k msgs/sec
  4543  	start := time.Now()
  4544  
  4545  	for i := 0; i < n; i++ {
  4546  		m.Header.Set(JSMsgId, strconv.Itoa(i))
  4547  		_, err := js.PublishMsgAsync(m)
  4548  		require_NoError(t, err)
  4549  		//time.Sleep(42 * time.Microsecond)
  4550  		if i == ss {
  4551  			fmt.Printf("SD")
  4552  			sl.Shutdown()
  4553  		} else if i == sr {
  4554  			nc.Flush()
  4555  			select {
  4556  			case <-js.PublishAsyncComplete():
  4557  			case <-time.After(10 * time.Second):
  4558  			}
  4559  			fmt.Printf("RS")
  4560  			sl = c.restartServer(sl)
  4561  		}
  4562  		if i%10_000 == 0 {
  4563  			fmt.Print("#")
  4564  		}
  4565  	}
  4566  	fmt.Println()
  4567  
  4568  	// Wait to receive all messages.
  4569  	select {
  4570  	case <-js.PublishAsyncComplete():
  4571  	case <-time.After(20 * time.Second):
  4572  		t.Fatalf("Did not receive completion signal")
  4573  	}
  4574  
  4575  	tt := time.Since(start)
  4576  	si, err := js.StreamInfo("TEST")
  4577  	require_NoError(t, err)
  4578  
  4579  	fmt.Printf("Took %v to send %d msgs\n", tt, n)
  4580  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4581  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4582  
  4583  	c.waitOnStreamCurrent(sl, "$G", "TEST")
  4584  	for _, s := range c.servers {
  4585  		mset, _ := s.GlobalAccount().lookupStream("TEST")
  4586  		if state := mset.store.State(); state.Msgs != uint64(n) {
  4587  			t.Fatalf("Expected server %v to have correct number of msgs %d but got %d", s, n, state.Msgs)
  4588  		}
  4589  	}
  4590  }
  4591  
  4592  func TestNoRaceJetStreamRebuildDeDupeAndMemoryPerf(t *testing.T) {
  4593  	skip(t)
  4594  
  4595  	s := RunBasicJetStreamServer(t)
  4596  	defer s.Shutdown()
  4597  
  4598  	nc, js := jsClientConnect(t, s)
  4599  	defer nc.Close()
  4600  
  4601  	_, err := js.AddStream(&nats.StreamConfig{Name: "DD"})
  4602  	require_NoError(t, err)
  4603  
  4604  	m := nats.NewMsg("DD")
  4605  	m.Data = []byte(strings.Repeat("Z", 2048))
  4606  
  4607  	start := time.Now()
  4608  
  4609  	n := 1_000_000
  4610  	for i := 0; i < n; i++ {
  4611  		m.Header.Set(JSMsgId, strconv.Itoa(i))
  4612  		_, err := js.PublishMsgAsync(m)
  4613  		require_NoError(t, err)
  4614  	}
  4615  
  4616  	select {
  4617  	case <-js.PublishAsyncComplete():
  4618  	case <-time.After(20 * time.Second):
  4619  		t.Fatalf("Did not receive completion signal")
  4620  	}
  4621  
  4622  	tt := time.Since(start)
  4623  	si, err := js.StreamInfo("DD")
  4624  	require_NoError(t, err)
  4625  
  4626  	fmt.Printf("Took %v to send %d msgs\n", tt, n)
  4627  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4628  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4629  
  4630  	v, _ := s.Varz(nil)
  4631  	fmt.Printf("Memory AFTER SEND: %v\n", friendlyBytes(v.Mem))
  4632  
  4633  	mset, err := s.GlobalAccount().lookupStream("DD")
  4634  	require_NoError(t, err)
  4635  
  4636  	mset.mu.Lock()
  4637  	mset.ddloaded = false
  4638  	start = time.Now()
  4639  	mset.rebuildDedupe()
  4640  	fmt.Printf("TOOK %v to rebuild dd\n", time.Since(start))
  4641  	mset.mu.Unlock()
  4642  
  4643  	v, _ = s.Varz(nil)
  4644  	fmt.Printf("Memory: %v\n", friendlyBytes(v.Mem))
  4645  
  4646  	// Now do an ephemeral consumer and whip through every message. Doing same calculations.
  4647  	start = time.Now()
  4648  	received, done := 0, make(chan bool)
  4649  	sub, err := js.Subscribe("DD", func(m *nats.Msg) {
  4650  		received++
  4651  		if received >= n {
  4652  			done <- true
  4653  		}
  4654  	}, nats.OrderedConsumer())
  4655  	require_NoError(t, err)
  4656  
  4657  	select {
  4658  	case <-done:
  4659  	case <-time.After(10 * time.Second):
  4660  		if s.NumSlowConsumers() > 0 {
  4661  			t.Fatalf("Did not receive all large messages due to slow consumer status: %d of %d", received, n)
  4662  		}
  4663  		t.Fatalf("Failed to receive all large messages: %d of %d\n", received, n)
  4664  	}
  4665  
  4666  	fmt.Printf("TOOK %v to receive all %d msgs\n", time.Since(start), n)
  4667  	sub.Unsubscribe()
  4668  
  4669  	v, _ = s.Varz(nil)
  4670  	fmt.Printf("Memory: %v\n", friendlyBytes(v.Mem))
  4671  }
  4672  
  4673  func TestNoRaceJetStreamMemoryUsageOnLimitedStreamWithMirror(t *testing.T) {
  4674  	skip(t)
  4675  
  4676  	s := RunBasicJetStreamServer(t)
  4677  	defer s.Shutdown()
  4678  
  4679  	nc, js := jsClientConnect(t, s)
  4680  	defer nc.Close()
  4681  
  4682  	_, err := js.AddStream(&nats.StreamConfig{Name: "DD", Subjects: []string{"ORDERS.*"}, MaxMsgs: 10_000})
  4683  	require_NoError(t, err)
  4684  
  4685  	_, err = js.AddStream(&nats.StreamConfig{
  4686  		Name:    "M",
  4687  		Mirror:  &nats.StreamSource{Name: "DD"},
  4688  		MaxMsgs: 10_000,
  4689  	})
  4690  	require_NoError(t, err)
  4691  
  4692  	m := nats.NewMsg("ORDERS.0")
  4693  	m.Data = []byte(strings.Repeat("Z", 2048))
  4694  
  4695  	start := time.Now()
  4696  
  4697  	n := 1_000_000
  4698  	for i := 0; i < n; i++ {
  4699  		m.Subject = fmt.Sprintf("ORDERS.%d", i)
  4700  		m.Header.Set(JSMsgId, strconv.Itoa(i))
  4701  		_, err := js.PublishMsgAsync(m)
  4702  		require_NoError(t, err)
  4703  	}
  4704  
  4705  	select {
  4706  	case <-js.PublishAsyncComplete():
  4707  	case <-time.After(20 * time.Second):
  4708  		t.Fatalf("Did not receive completion signal")
  4709  	}
  4710  
  4711  	tt := time.Since(start)
  4712  	si, err := js.StreamInfo("DD")
  4713  	require_NoError(t, err)
  4714  
  4715  	fmt.Printf("Took %v to send %d msgs\n", tt, n)
  4716  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4717  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4718  
  4719  	v, _ := s.Varz(nil)
  4720  	fmt.Printf("Memory AFTER SEND: %v\n", friendlyBytes(v.Mem))
  4721  }
  4722  
  4723  func TestNoRaceJetStreamOrderedConsumerLongRTTPerformance(t *testing.T) {
  4724  	skip(t)
  4725  
  4726  	s := RunBasicJetStreamServer(t)
  4727  	defer s.Shutdown()
  4728  
  4729  	nc, _ := jsClientConnect(t, s)
  4730  	defer nc.Close()
  4731  
  4732  	js, err := nc.JetStream(nats.PublishAsyncMaxPending(1000))
  4733  	require_NoError(t, err)
  4734  
  4735  	_, err = js.AddStream(&nats.StreamConfig{Name: "OCP"})
  4736  	require_NoError(t, err)
  4737  
  4738  	n, msg := 100_000, []byte(strings.Repeat("D", 30_000))
  4739  
  4740  	for i := 0; i < n; i++ {
  4741  		_, err := js.PublishAsync("OCP", msg)
  4742  		require_NoError(t, err)
  4743  	}
  4744  	select {
  4745  	case <-js.PublishAsyncComplete():
  4746  	case <-time.After(5 * time.Second):
  4747  		t.Fatalf("Did not receive completion signal")
  4748  	}
  4749  
  4750  	// Approximately 3GB
  4751  	si, err := js.StreamInfo("OCP")
  4752  	require_NoError(t, err)
  4753  
  4754  	start := time.Now()
  4755  	received, done := 0, make(chan bool)
  4756  	sub, err := js.Subscribe("OCP", func(m *nats.Msg) {
  4757  		received++
  4758  		if received >= n {
  4759  			done <- true
  4760  		}
  4761  	}, nats.OrderedConsumer())
  4762  	require_NoError(t, err)
  4763  	defer sub.Unsubscribe()
  4764  
  4765  	// Wait to receive all messages.
  4766  	select {
  4767  	case <-done:
  4768  	case <-time.After(30 * time.Second):
  4769  		t.Fatalf("Did not receive all of our messages")
  4770  	}
  4771  
  4772  	tt := time.Since(start)
  4773  	fmt.Printf("Took %v to receive %d msgs\n", tt, n)
  4774  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4775  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4776  
  4777  	sub.Unsubscribe()
  4778  
  4779  	rtt := 10 * time.Millisecond
  4780  	bw := 10 * 1024 * 1024 * 1024
  4781  	proxy := newNetProxy(rtt, bw, bw, s.ClientURL())
  4782  	defer proxy.stop()
  4783  
  4784  	nc, err = nats.Connect(proxy.clientURL())
  4785  	require_NoError(t, err)
  4786  	defer nc.Close()
  4787  	js, err = nc.JetStream()
  4788  	require_NoError(t, err)
  4789  
  4790  	start, received = time.Now(), 0
  4791  	sub, err = js.Subscribe("OCP", func(m *nats.Msg) {
  4792  		received++
  4793  		if received >= n {
  4794  			done <- true
  4795  		}
  4796  	}, nats.OrderedConsumer())
  4797  	require_NoError(t, err)
  4798  	defer sub.Unsubscribe()
  4799  
  4800  	// Wait to receive all messages.
  4801  	select {
  4802  	case <-done:
  4803  	case <-time.After(60 * time.Second):
  4804  		t.Fatalf("Did not receive all of our messages")
  4805  	}
  4806  
  4807  	tt = time.Since(start)
  4808  	fmt.Printf("Proxy RTT: %v, UP: %d, DOWN: %d\n", rtt, bw, bw)
  4809  	fmt.Printf("Took %v to receive %d msgs\n", tt, n)
  4810  	fmt.Printf("%.0f msgs/s\n", float64(n)/tt.Seconds())
  4811  	fmt.Printf("%.0f mb/s\n\n", float64(si.State.Bytes/(1024*1024))/tt.Seconds())
  4812  }
  4813  
  4814  var jsClusterStallCatchupTempl = `
  4815  	listen: 127.0.0.1:-1
  4816  	server_name: %s
  4817  	jetstream: {max_mem_store: 256MB, max_file_store: 32GB, store_dir: '%s'}
  4818  
  4819  	leaf {
  4820  		listen: 127.0.0.1:-1
  4821  	}
  4822  
  4823  	cluster {
  4824  		name: %s
  4825  		listen: 127.0.0.1:%d
  4826  		routes = [%s]
  4827  	}
  4828  
  4829  	# For access to system account.
  4830  	accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
  4831  `
  4832  
  4833  // Test our global stall gate for outstanding catchup bytes.
  4834  func TestNoRaceJetStreamClusterCatchupStallGate(t *testing.T) {
  4835  	skip(t)
  4836  
  4837  	c := createJetStreamClusterWithTemplate(t, jsClusterStallCatchupTempl, "GSG", 3)
  4838  	defer c.shutdown()
  4839  
  4840  	nc, js := jsClientConnect(t, c.randomServer())
  4841  	defer nc.Close()
  4842  
  4843  	// ~100k per message.
  4844  	msg := []byte(strings.Repeat("A", 99_960))
  4845  
  4846  	// Create 200 streams with 100MB.
  4847  	// Each server has ~2GB
  4848  	var wg sync.WaitGroup
  4849  	for i := 0; i < 20; i++ {
  4850  		wg.Add(1)
  4851  		go func(x int) {
  4852  			defer wg.Done()
  4853  			for n := 1; n <= 10; n++ {
  4854  				sn := fmt.Sprintf("S-%d", n+x)
  4855  				_, err := js.AddStream(&nats.StreamConfig{
  4856  					Name:     sn,
  4857  					Replicas: 3,
  4858  				})
  4859  				require_NoError(t, err)
  4860  				for i := 0; i < 100; i++ {
  4861  					_, err := js.Publish(sn, msg)
  4862  					require_NoError(t, err)
  4863  				}
  4864  			}
  4865  		}(i * 20)
  4866  	}
  4867  	wg.Wait()
  4868  
  4869  	info, err := js.AccountInfo()
  4870  	require_NoError(t, err)
  4871  	require_True(t, info.Streams == 200)
  4872  
  4873  	runtime.GC()
  4874  	debug.FreeOSMemory()
  4875  
  4876  	// Now bring a server down and wipe its storage.
  4877  	s := c.servers[0]
  4878  	vz, err := s.Varz(nil)
  4879  	require_NoError(t, err)
  4880  	fmt.Printf("MEM BEFORE is %v\n", friendlyBytes(vz.Mem))
  4881  
  4882  	sd := s.JetStreamConfig().StoreDir
  4883  	s.Shutdown()
  4884  	removeDir(t, sd)
  4885  	s = c.restartServer(s)
  4886  
  4887  	c.waitOnServerHealthz(s)
  4888  
  4889  	runtime.GC()
  4890  	debug.FreeOSMemory()
  4891  
  4892  	vz, err = s.Varz(nil)
  4893  	require_NoError(t, err)
  4894  	fmt.Printf("MEM AFTER is %v\n", friendlyBytes(vz.Mem))
  4895  }
  4896  
  4897  func TestNoRaceJetStreamClusterCatchupBailMidway(t *testing.T) {
  4898  	skip(t)
  4899  
  4900  	c := createJetStreamClusterWithTemplate(t, jsClusterStallCatchupTempl, "GSG", 3)
  4901  	defer c.shutdown()
  4902  
  4903  	ml := c.leader()
  4904  	nc, js := jsClientConnect(t, ml)
  4905  	defer nc.Close()
  4906  
  4907  	msg := []byte(strings.Repeat("A", 480))
  4908  
  4909  	for i := 0; i < maxConcurrentSyncRequests*2; i++ {
  4910  		sn := fmt.Sprintf("CUP-%d", i+1)
  4911  		_, err := js.AddStream(&nats.StreamConfig{
  4912  			Name:     sn,
  4913  			Replicas: 3,
  4914  		})
  4915  		require_NoError(t, err)
  4916  
  4917  		for i := 0; i < 10_000; i++ {
  4918  			_, err := js.PublishAsync(sn, msg)
  4919  			require_NoError(t, err)
  4920  		}
  4921  		select {
  4922  		case <-js.PublishAsyncComplete():
  4923  		case <-time.After(10 * time.Second):
  4924  			t.Fatalf("Did not receive completion signal")
  4925  		}
  4926  	}
  4927  
  4928  	jsz, _ := ml.Jsz(nil)
  4929  	expectedMsgs := jsz.Messages
  4930  
  4931  	// Now select a server and shut it down, removing the storage directory.
  4932  	s := c.randomNonLeader()
  4933  	sd := s.JetStreamConfig().StoreDir
  4934  	s.Shutdown()
  4935  	removeDir(t, sd)
  4936  
  4937  	// Now restart the server.
  4938  	s = c.restartServer(s)
  4939  
  4940  	// We want to force the follower to bail before the catchup through the
  4941  	// upper level catchup logic completes.
  4942  	checkFor(t, 5*time.Second, 10*time.Millisecond, func() error {
  4943  		jsz, _ := s.Jsz(nil)
  4944  		if jsz.Messages > expectedMsgs/2 {
  4945  			s.Shutdown()
  4946  			return nil
  4947  		}
  4948  		return fmt.Errorf("Not enough yet")
  4949  	})
  4950  
  4951  	// Now restart the server.
  4952  	s = c.restartServer(s)
  4953  
  4954  	checkFor(t, 5*time.Second, 500*time.Millisecond, func() error {
  4955  		jsz, _ := s.Jsz(nil)
  4956  		if jsz.Messages == expectedMsgs {
  4957  			return nil
  4958  		}
  4959  		return fmt.Errorf("Not enough yet")
  4960  	})
  4961  }
  4962  
  4963  func TestNoRaceJetStreamAccountLimitsAndRestart(t *testing.T) {
  4964  	c := createJetStreamClusterWithTemplate(t, jsClusterAccountLimitsTempl, "A3S", 3)
  4965  	defer c.shutdown()
  4966  
  4967  	nc, js := jsClientConnect(t, c.randomServer())
  4968  	defer nc.Close()
  4969  
  4970  	if _, err := js.AddStream(&nats.StreamConfig{Name: "TEST", Replicas: 3}); err != nil {
  4971  		t.Fatalf("Unexpected error: %v", err)
  4972  	}
  4973  
  4974  	for i := 0; i < 20_000; i++ {
  4975  		if _, err := js.Publish("TEST", []byte("A")); err != nil {
  4976  			break
  4977  		}
  4978  		if i == 5_000 {
  4979  			snl := c.randomNonStreamLeader("$JS", "TEST")
  4980  			snl.Shutdown()
  4981  		}
  4982  	}
  4983  
  4984  	c.stopAll()
  4985  	c.restartAll()
  4986  	c.waitOnLeader()
  4987  	c.waitOnStreamLeader("$JS", "TEST")
  4988  
  4989  	for _, cs := range c.servers {
  4990  		c.waitOnStreamCurrent(cs, "$JS", "TEST")
  4991  	}
  4992  }
  4993  
  4994  func TestNoRaceJetStreamPullConsumersAndInteriorDeletes(t *testing.T) {
  4995  	c := createJetStreamClusterExplicit(t, "ID", 3)
  4996  	defer c.shutdown()
  4997  
  4998  	nc, js := jsClientConnect(t, c.randomServer())
  4999  	defer nc.Close()
  5000  
  5001  	_, err := js.AddStream(&nats.StreamConfig{
  5002  		Name:      "foo",
  5003  		Replicas:  3,
  5004  		MaxMsgs:   50000,
  5005  		Retention: nats.InterestPolicy,
  5006  	})
  5007  	require_NoError(t, err)
  5008  
  5009  	c.waitOnStreamLeader(globalAccountName, "foo")
  5010  
  5011  	_, err = js.AddConsumer("foo", &nats.ConsumerConfig{
  5012  		Durable:       "foo",
  5013  		FilterSubject: "foo",
  5014  		MaxAckPending: 20000,
  5015  		AckWait:       time.Minute,
  5016  		AckPolicy:     nats.AckExplicitPolicy,
  5017  	})
  5018  	require_NoError(t, err)
  5019  
  5020  	c.waitOnConsumerLeader(globalAccountName, "foo", "foo")
  5021  
  5022  	rcv := int32(0)
  5023  	prods := 5
  5024  	cons := 5
  5025  	wg := sync.WaitGroup{}
  5026  	wg.Add(prods + cons)
  5027  	toSend := 100000
  5028  
  5029  	for i := 0; i < cons; i++ {
  5030  		go func() {
  5031  			defer wg.Done()
  5032  
  5033  			sub, err := js.PullSubscribe("foo", "foo")
  5034  			if err != nil {
  5035  				return
  5036  			}
  5037  			for {
  5038  				msgs, err := sub.Fetch(200, nats.MaxWait(250*time.Millisecond))
  5039  				if err != nil {
  5040  					if n := int(atomic.LoadInt32(&rcv)); n >= toSend {
  5041  						return
  5042  					}
  5043  					continue
  5044  				}
  5045  				for _, m := range msgs {
  5046  					m.Ack()
  5047  					atomic.AddInt32(&rcv, 1)
  5048  				}
  5049  			}
  5050  		}()
  5051  	}
  5052  
  5053  	for i := 0; i < prods; i++ {
  5054  		go func() {
  5055  			defer wg.Done()
  5056  
  5057  			for i := 0; i < toSend/prods; i++ {
  5058  				js.Publish("foo", []byte("hello"))
  5059  			}
  5060  		}()
  5061  	}
  5062  
  5063  	time.Sleep(time.Second)
  5064  	resp, err := nc.Request(fmt.Sprintf(JSApiConsumerLeaderStepDownT, "foo", "foo"), nil, time.Second)
  5065  	if err != nil {
  5066  		t.Fatalf("Unexpected error: %v", err)
  5067  	}
  5068  	var cdResp JSApiConsumerLeaderStepDownResponse
  5069  	if err := json.Unmarshal(resp.Data, &cdResp); err != nil {
  5070  		t.Fatalf("Unexpected error: %v", err)
  5071  	}
  5072  	if cdResp.Error != nil {
  5073  		t.Fatalf("Unexpected error: %+v", cdResp.Error)
  5074  	}
  5075  	ch := make(chan struct{})
  5076  	go func() {
  5077  		wg.Wait()
  5078  		close(ch)
  5079  	}()
  5080  	select {
  5081  	case <-ch:
  5082  		// OK
  5083  	case <-time.After(30 * time.Second):
  5084  		t.Fatalf("Consumers took too long to consumer all messages")
  5085  	}
  5086  }
  5087  
  5088  func TestNoRaceJetStreamClusterInterestPullConsumerStreamLimitBug(t *testing.T) {
  5089  	c := createJetStreamClusterExplicit(t, "JSC", 3)
  5090  	defer c.shutdown()
  5091  
  5092  	nc, js := jsClientConnect(t, c.randomServer())
  5093  	defer nc.Close()
  5094  
  5095  	limit := uint64(1000)
  5096  
  5097  	_, err := js.AddStream(&nats.StreamConfig{
  5098  		Name:      "TEST",
  5099  		Subjects:  []string{"foo"},
  5100  		Retention: nats.InterestPolicy,
  5101  		MaxMsgs:   int64(limit),
  5102  		Replicas:  3,
  5103  	})
  5104  	require_NoError(t, err)
  5105  
  5106  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "dur", AckPolicy: nats.AckExplicitPolicy})
  5107  	require_NoError(t, err)
  5108  
  5109  	qch := make(chan bool)
  5110  	var wg sync.WaitGroup
  5111  
  5112  	// Publisher
  5113  	wg.Add(1)
  5114  	go func() {
  5115  		defer wg.Done()
  5116  		for {
  5117  			pt := time.NewTimer(time.Duration(rand.Intn(2)) * time.Millisecond)
  5118  			select {
  5119  			case <-pt.C:
  5120  				_, err := js.Publish("foo", []byte("BUG!"))
  5121  				require_NoError(t, err)
  5122  			case <-qch:
  5123  				pt.Stop()
  5124  				return
  5125  			}
  5126  		}
  5127  	}()
  5128  
  5129  	time.Sleep(time.Second)
  5130  
  5131  	// Pull Consumers
  5132  	wg.Add(100)
  5133  	for i := 0; i < 100; i++ {
  5134  		go func() {
  5135  			defer wg.Done()
  5136  			nc := natsConnect(t, c.randomServer().ClientURL())
  5137  			defer nc.Close()
  5138  
  5139  			js, err := nc.JetStream(nats.MaxWait(time.Second))
  5140  			require_NoError(t, err)
  5141  
  5142  			var sub *nats.Subscription
  5143  			for j := 0; j < 5; j++ {
  5144  				sub, err = js.PullSubscribe("foo", "dur")
  5145  				if err == nil {
  5146  					break
  5147  				}
  5148  			}
  5149  			require_NoError(t, err)
  5150  
  5151  			for {
  5152  				pt := time.NewTimer(time.Duration(rand.Intn(300)) * time.Millisecond)
  5153  				select {
  5154  				case <-pt.C:
  5155  					msgs, err := sub.Fetch(1)
  5156  					if err != nil {
  5157  						t.Logf("Got a Fetch error: %v", err)
  5158  						return
  5159  					}
  5160  					if len(msgs) > 0 {
  5161  						go func() {
  5162  							ackDelay := time.Duration(rand.Intn(375)+15) * time.Millisecond
  5163  							m := msgs[0]
  5164  							time.AfterFunc(ackDelay, func() { m.AckSync() })
  5165  						}()
  5166  					}
  5167  				case <-qch:
  5168  					return
  5169  				}
  5170  			}
  5171  		}()
  5172  	}
  5173  
  5174  	// Make sure we have hit the limit for the number of messages we expected.
  5175  	checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  5176  		si, err := js.StreamInfo("TEST")
  5177  		require_NoError(t, err)
  5178  		if si.State.Msgs < limit {
  5179  			return fmt.Errorf("Not hit limit yet")
  5180  		}
  5181  		return nil
  5182  	})
  5183  
  5184  	close(qch)
  5185  	wg.Wait()
  5186  
  5187  	checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  5188  		si, err := js.StreamInfo("TEST")
  5189  		require_NoError(t, err)
  5190  		ci, err := js.ConsumerInfo("TEST", "dur")
  5191  		require_NoError(t, err)
  5192  
  5193  		np := ci.NumPending + uint64(ci.NumAckPending)
  5194  		if np != si.State.Msgs {
  5195  			return fmt.Errorf("Expected NumPending to be %d got %d", si.State.Msgs-uint64(ci.NumAckPending), ci.NumPending)
  5196  		}
  5197  		return nil
  5198  	})
  5199  }
  5200  
  5201  // Test that all peers have the direct access subs that participate in a queue group,
  5202  // but only when they are current and ready. So we will start with R1, add in messages
  5203  // then scale up while also still adding messages.
  5204  func TestNoRaceJetStreamClusterDirectAccessAllPeersSubs(t *testing.T) {
  5205  	c := createJetStreamClusterExplicit(t, "JSC", 3)
  5206  	defer c.shutdown()
  5207  
  5208  	nc, js := jsClientConnect(t, c.randomServer())
  5209  	defer nc.Close()
  5210  
  5211  	// Start as R1
  5212  	cfg := &StreamConfig{
  5213  		Name:        "TEST",
  5214  		Subjects:    []string{"kv.>"},
  5215  		MaxMsgsPer:  10,
  5216  		AllowDirect: true,
  5217  		Replicas:    1,
  5218  		Storage:     FileStorage,
  5219  	}
  5220  	addStream(t, nc, cfg)
  5221  
  5222  	// Seed with enough messages to start then we will scale up while still adding more messages.
  5223  	num, msg := 1000, bytes.Repeat([]byte("XYZ"), 64)
  5224  	for i := 0; i < num; i++ {
  5225  		js.PublishAsync(fmt.Sprintf("kv.%d", i), msg)
  5226  	}
  5227  	select {
  5228  	case <-js.PublishAsyncComplete():
  5229  	case <-time.After(5 * time.Second):
  5230  		t.Fatalf("Did not receive completion signal")
  5231  	}
  5232  
  5233  	getSubj := fmt.Sprintf(JSDirectMsgGetT, "TEST")
  5234  	getMsg := func(key string) *nats.Msg {
  5235  		t.Helper()
  5236  		req := []byte(fmt.Sprintf(`{"last_by_subj":%q}`, key))
  5237  		m, err := nc.Request(getSubj, req, time.Second)
  5238  		require_NoError(t, err)
  5239  		require_True(t, m.Header.Get(JSSubject) == key)
  5240  		return m
  5241  	}
  5242  
  5243  	// Just make sure we can succeed here.
  5244  	getMsg("kv.22")
  5245  
  5246  	// Now crank up a go routine to continue sending more messages.
  5247  	qch := make(chan bool)
  5248  	var wg sync.WaitGroup
  5249  
  5250  	for i := 0; i < 5; i++ {
  5251  		wg.Add(1)
  5252  		go func() {
  5253  			defer wg.Done()
  5254  			nc, js := jsClientConnect(t, c.randomServer())
  5255  			defer nc.Close()
  5256  			for {
  5257  				select {
  5258  				case <-qch:
  5259  					select {
  5260  					case <-js.PublishAsyncComplete():
  5261  					case <-time.After(10 * time.Second):
  5262  					}
  5263  					return
  5264  				default:
  5265  					// Send as fast as we can.
  5266  					js.Publish(fmt.Sprintf("kv.%d", rand.Intn(1000)), msg)
  5267  				}
  5268  			}
  5269  		}()
  5270  	}
  5271  
  5272  	time.Sleep(200 * time.Millisecond)
  5273  
  5274  	// Now let's scale up to an R3.
  5275  	cfg.Replicas = 3
  5276  	updateStream(t, nc, cfg)
  5277  
  5278  	// Wait for the stream to register the new replicas and have a leader.
  5279  	checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  5280  		si, err := js.StreamInfo("TEST")
  5281  		if err != nil {
  5282  			return err
  5283  		}
  5284  		if si.Cluster == nil {
  5285  			return fmt.Errorf("No cluster yet")
  5286  		}
  5287  		if si.Cluster.Leader == _EMPTY_ || len(si.Cluster.Replicas) != 2 {
  5288  			return fmt.Errorf("Cluster not ready yet")
  5289  		}
  5290  		return nil
  5291  	})
  5292  
  5293  	close(qch)
  5294  	wg.Wait()
  5295  
  5296  	// Just make sure we can succeed here.
  5297  	getMsg("kv.22")
  5298  
  5299  	// For each non-leader check that the direct sub fires up.
  5300  	// We just test all, the leader will already have a directSub.
  5301  	for _, s := range c.servers {
  5302  		mset, err := s.GlobalAccount().lookupStream("TEST")
  5303  		require_NoError(t, err)
  5304  		checkFor(t, 20*time.Second, 500*time.Millisecond, func() error {
  5305  			mset.mu.RLock()
  5306  			ok := mset.directSub != nil
  5307  			mset.mu.RUnlock()
  5308  			if ok {
  5309  				return nil
  5310  			}
  5311  			return fmt.Errorf("No directSub yet")
  5312  		})
  5313  	}
  5314  
  5315  	si, err := js.StreamInfo("TEST")
  5316  	require_NoError(t, err)
  5317  
  5318  	if si.State.Msgs == uint64(num) {
  5319  		t.Fatalf("Expected to see messages increase, got %d", si.State.Msgs)
  5320  	}
  5321  
  5322  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  5323  		// Make sure they are all the same from a state perspective.
  5324  		// Leader will have the expected state.
  5325  		lmset, err := c.streamLeader("$G", "TEST").GlobalAccount().lookupStream("TEST")
  5326  		require_NoError(t, err)
  5327  		expected := lmset.state()
  5328  
  5329  		for _, s := range c.servers {
  5330  			mset, err := s.GlobalAccount().lookupStream("TEST")
  5331  			require_NoError(t, err)
  5332  			if state := mset.state(); !reflect.DeepEqual(expected, state) {
  5333  				return fmt.Errorf("Expected %+v, got %+v", expected, state)
  5334  			}
  5335  		}
  5336  		return nil
  5337  	})
  5338  
  5339  }
  5340  
  5341  func TestNoRaceJetStreamClusterStreamNamesAndInfosMoreThanAPILimit(t *testing.T) {
  5342  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5343  	defer c.shutdown()
  5344  
  5345  	s := c.randomServer()
  5346  	nc, js := jsClientConnect(t, s)
  5347  	defer nc.Close()
  5348  
  5349  	createStream := func(name string) {
  5350  		t.Helper()
  5351  		if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil {
  5352  			t.Fatalf("Unexpected error: %v", err)
  5353  		}
  5354  	}
  5355  
  5356  	max := JSApiListLimit
  5357  	if JSApiNamesLimit > max {
  5358  		max = JSApiNamesLimit
  5359  	}
  5360  	max += 10
  5361  
  5362  	for i := 0; i < max; i++ {
  5363  		name := fmt.Sprintf("foo_%d", i)
  5364  		createStream(name)
  5365  	}
  5366  
  5367  	// Not using the JS API here beacause we want to make sure that the
  5368  	// server returns the proper Total count, but also that it does not
  5369  	// send more than when the API limit is in one go.
  5370  	check := func(subj string, limit int) {
  5371  		t.Helper()
  5372  
  5373  		nreq := JSApiStreamNamesRequest{}
  5374  		b, _ := json.Marshal(nreq)
  5375  		msg, err := nc.Request(subj, b, 2*time.Second)
  5376  		require_NoError(t, err)
  5377  
  5378  		nresp := JSApiStreamNamesResponse{}
  5379  		json.Unmarshal(msg.Data, &nresp)
  5380  		if n := nresp.ApiPaged.Total; n != max {
  5381  			t.Fatalf("Expected total to be %v, got %v", max, n)
  5382  		}
  5383  		if n := nresp.ApiPaged.Limit; n != limit {
  5384  			t.Fatalf("Expected limit to be %v, got %v", limit, n)
  5385  		}
  5386  		if n := len(nresp.Streams); n != limit {
  5387  			t.Fatalf("Expected number of streams to be %v, got %v", limit, n)
  5388  		}
  5389  	}
  5390  
  5391  	check(JSApiStreams, JSApiNamesLimit)
  5392  	check(JSApiStreamList, JSApiListLimit)
  5393  }
  5394  
  5395  func TestNoRaceJetStreamClusterConsumerListPaging(t *testing.T) {
  5396  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5397  	defer c.shutdown()
  5398  
  5399  	s := c.randomNonLeader()
  5400  	nc, js := jsClientConnect(t, s)
  5401  	defer nc.Close()
  5402  
  5403  	_, err := js.AddStream(&nats.StreamConfig{
  5404  		Name:     "TEST",
  5405  		Subjects: []string{"foo"},
  5406  		Replicas: 3,
  5407  	})
  5408  	require_NoError(t, err)
  5409  	c.waitOnStreamLeader(globalAccountName, "TEST")
  5410  
  5411  	cfg := &nats.ConsumerConfig{
  5412  		Replicas:      1,
  5413  		MemoryStorage: true,
  5414  		AckPolicy:     nats.AckExplicitPolicy,
  5415  	}
  5416  
  5417  	// create 3000 consumers.
  5418  	numConsumers := 3000
  5419  	for i := 1; i <= numConsumers; i++ {
  5420  		cfg.Durable = fmt.Sprintf("d-%.4d", i)
  5421  		_, err := js.AddConsumer("TEST", cfg)
  5422  		require_NoError(t, err)
  5423  	}
  5424  
  5425  	// Test both names and list operations.
  5426  
  5427  	// Names
  5428  	reqSubj := fmt.Sprintf(JSApiConsumersT, "TEST")
  5429  	grabConsumerNames := func(offset int) []string {
  5430  		req := fmt.Sprintf(`{"offset":%d}`, offset)
  5431  		respMsg, err := nc.Request(reqSubj, []byte(req), time.Second)
  5432  		require_NoError(t, err)
  5433  		var resp JSApiConsumerNamesResponse
  5434  		err = json.Unmarshal(respMsg.Data, &resp)
  5435  		require_NoError(t, err)
  5436  		// Sanity check that we are actually paging properly around limits.
  5437  		if resp.Limit < len(resp.Consumers) {
  5438  			t.Fatalf("Expected total limited to %d but got %d", resp.Limit, len(resp.Consumers))
  5439  		}
  5440  		if resp.Total != numConsumers {
  5441  			t.Fatalf("Invalid total response: expected %d got %d", numConsumers, resp.Total)
  5442  		}
  5443  		return resp.Consumers
  5444  	}
  5445  
  5446  	results := make(map[string]bool)
  5447  
  5448  	for offset := 0; len(results) < numConsumers; {
  5449  		consumers := grabConsumerNames(offset)
  5450  		offset += len(consumers)
  5451  		for _, name := range consumers {
  5452  			if results[name] {
  5453  				t.Fatalf("Found duplicate %q", name)
  5454  			}
  5455  			results[name] = true
  5456  		}
  5457  	}
  5458  
  5459  	// List
  5460  	reqSubj = fmt.Sprintf(JSApiConsumerListT, "TEST")
  5461  	grabConsumerList := func(offset int) []*ConsumerInfo {
  5462  		req := fmt.Sprintf(`{"offset":%d}`, offset)
  5463  		respMsg, err := nc.Request(reqSubj, []byte(req), time.Second)
  5464  		require_NoError(t, err)
  5465  		var resp JSApiConsumerListResponse
  5466  		err = json.Unmarshal(respMsg.Data, &resp)
  5467  		require_NoError(t, err)
  5468  		// Sanity check that we are actually paging properly around limits.
  5469  		if resp.Limit < len(resp.Consumers) {
  5470  			t.Fatalf("Expected total limited to %d but got %d", resp.Limit, len(resp.Consumers))
  5471  		}
  5472  		if resp.Total != numConsumers {
  5473  			t.Fatalf("Invalid total response: expected %d got %d", numConsumers, resp.Total)
  5474  		}
  5475  		return resp.Consumers
  5476  	}
  5477  
  5478  	results = make(map[string]bool)
  5479  
  5480  	for offset := 0; len(results) < numConsumers; {
  5481  		consumers := grabConsumerList(offset)
  5482  		offset += len(consumers)
  5483  		for _, ci := range consumers {
  5484  			name := ci.Config.Durable
  5485  			if results[name] {
  5486  				t.Fatalf("Found duplicate %q", name)
  5487  			}
  5488  			results[name] = true
  5489  		}
  5490  	}
  5491  
  5492  	if len(results) != numConsumers {
  5493  		t.Fatalf("Received %d / %d consumers", len(results), numConsumers)
  5494  	}
  5495  }
  5496  
  5497  func TestNoRaceJetStreamFileStoreLargeKVAccessTiming(t *testing.T) {
  5498  	storeDir := t.TempDir()
  5499  
  5500  	blkSize := uint64(4 * 1024)
  5501  	// Compensate for slower IO on MacOSX
  5502  	if runtime.GOOS == "darwin" {
  5503  		blkSize *= 4
  5504  	}
  5505  
  5506  	fs, err := newFileStore(
  5507  		FileStoreConfig{StoreDir: storeDir, BlockSize: blkSize, CacheExpire: 30 * time.Second},
  5508  		StreamConfig{Name: "zzz", Subjects: []string{"KV.STREAM_NAME.*"}, Storage: FileStorage, MaxMsgsPer: 1},
  5509  	)
  5510  	require_NoError(t, err)
  5511  	defer fs.Stop()
  5512  
  5513  	tmpl := "KV.STREAM_NAME.%d"
  5514  	nkeys, val := 100_000, bytes.Repeat([]byte("Z"), 1024)
  5515  
  5516  	for i := 1; i <= nkeys; i++ {
  5517  		subj := fmt.Sprintf(tmpl, i)
  5518  		_, _, err := fs.StoreMsg(subj, nil, val)
  5519  		require_NoError(t, err)
  5520  	}
  5521  
  5522  	first := fmt.Sprintf(tmpl, 1)
  5523  	last := fmt.Sprintf(tmpl, nkeys)
  5524  
  5525  	start := time.Now()
  5526  	sm, err := fs.LoadLastMsg(last, nil)
  5527  	require_NoError(t, err)
  5528  	base := time.Since(start)
  5529  
  5530  	if !bytes.Equal(sm.msg, val) {
  5531  		t.Fatalf("Retrieved value did not match")
  5532  	}
  5533  
  5534  	start = time.Now()
  5535  	_, err = fs.LoadLastMsg(first, nil)
  5536  	require_NoError(t, err)
  5537  	slow := time.Since(start)
  5538  
  5539  	if base > 100*time.Microsecond || slow > 200*time.Microsecond {
  5540  		t.Fatalf("Took too long to look up first key vs last: %v vs %v", base, slow)
  5541  	}
  5542  
  5543  	// time first seq lookup for both as well.
  5544  	// Base will be first in this case.
  5545  	fs.mu.RLock()
  5546  	start = time.Now()
  5547  	fs.firstSeqForSubj(first)
  5548  	base = time.Since(start)
  5549  	start = time.Now()
  5550  	fs.firstSeqForSubj(last)
  5551  	slow = time.Since(start)
  5552  	fs.mu.RUnlock()
  5553  
  5554  	if base > 100*time.Microsecond || slow > 200*time.Microsecond {
  5555  		t.Fatalf("Took too long to look up last key by subject vs first: %v vs %v", base, slow)
  5556  	}
  5557  }
  5558  
  5559  func TestNoRaceJetStreamKVLock(t *testing.T) {
  5560  	s := RunBasicJetStreamServer(t)
  5561  	defer s.Shutdown()
  5562  
  5563  	nc, js := jsClientConnect(t, s)
  5564  	defer nc.Close()
  5565  
  5566  	_, err := js.CreateKeyValue(&nats.KeyValueConfig{Bucket: "LOCKS"})
  5567  	require_NoError(t, err)
  5568  
  5569  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
  5570  	defer cancel()
  5571  
  5572  	var wg sync.WaitGroup
  5573  	start := make(chan bool)
  5574  
  5575  	var tracker int64
  5576  
  5577  	for i := 0; i < 100; i++ {
  5578  		wg.Add(1)
  5579  		go func() {
  5580  			defer wg.Done()
  5581  
  5582  			nc, js := jsClientConnect(t, s)
  5583  			defer nc.Close()
  5584  			kv, err := js.KeyValue("LOCKS")
  5585  			require_NoError(t, err)
  5586  
  5587  			<-start
  5588  
  5589  			for {
  5590  				last, err := kv.Create("MY_LOCK", []byte("Z"))
  5591  				if err != nil {
  5592  					select {
  5593  					case <-time.After(10 * time.Millisecond):
  5594  						continue
  5595  					case <-ctx.Done():
  5596  						return
  5597  					}
  5598  				}
  5599  
  5600  				if v := atomic.AddInt64(&tracker, 1); v != 1 {
  5601  					t.Logf("TRACKER NOT 1 -> %d\n", v)
  5602  					cancel()
  5603  				}
  5604  
  5605  				time.Sleep(10 * time.Millisecond)
  5606  				if v := atomic.AddInt64(&tracker, -1); v != 0 {
  5607  					t.Logf("TRACKER NOT 0 AFTER RELEASE -> %d\n", v)
  5608  					cancel()
  5609  				}
  5610  
  5611  				err = kv.Delete("MY_LOCK", nats.LastRevision(last))
  5612  				if err != nil {
  5613  					t.Logf("Could not unlock for last %d: %v", last, err)
  5614  				}
  5615  
  5616  				if ctx.Err() != nil {
  5617  					return
  5618  				}
  5619  			}
  5620  		}()
  5621  	}
  5622  
  5623  	close(start)
  5624  	wg.Wait()
  5625  }
  5626  
  5627  func TestNoRaceJetStreamSuperClusterStreamMoveLongRTT(t *testing.T) {
  5628  	// Make C2 far away.
  5629  	gwm := gwProxyMap{
  5630  		"C2": &gwProxy{
  5631  			rtt:  20 * time.Millisecond,
  5632  			up:   1 * 1024 * 1024 * 1024, // 1gbit
  5633  			down: 1 * 1024 * 1024 * 1024, // 1gbit
  5634  		},
  5635  	}
  5636  	sc := createJetStreamTaggedSuperClusterWithGWProxy(t, gwm)
  5637  	defer sc.shutdown()
  5638  
  5639  	nc, js := jsClientConnect(t, sc.randomServer())
  5640  	defer nc.Close()
  5641  
  5642  	cfg := &nats.StreamConfig{
  5643  		Name:      "TEST",
  5644  		Subjects:  []string{"chunk.*"},
  5645  		Placement: &nats.Placement{Tags: []string{"cloud:aws", "country:us"}},
  5646  		Replicas:  3,
  5647  	}
  5648  
  5649  	// Place a stream in C1.
  5650  	_, err := js.AddStream(cfg, nats.MaxWait(10*time.Second))
  5651  	require_NoError(t, err)
  5652  
  5653  	chunk := bytes.Repeat([]byte("Z"), 1000*1024) // ~1MB
  5654  	// 256 MB
  5655  	for i := 0; i < 256; i++ {
  5656  		subj := fmt.Sprintf("chunk.%d", i)
  5657  		js.PublishAsync(subj, chunk)
  5658  	}
  5659  	select {
  5660  	case <-js.PublishAsyncComplete():
  5661  	case <-time.After(10 * time.Second):
  5662  		t.Fatalf("Did not receive completion signal")
  5663  	}
  5664  
  5665  	// C2, slow RTT.
  5666  	cfg.Placement = &nats.Placement{Tags: []string{"cloud:gcp", "country:uk"}}
  5667  	_, err = js.UpdateStream(cfg)
  5668  	require_NoError(t, err)
  5669  
  5670  	checkFor(t, 20*time.Second, time.Second, func() error {
  5671  		si, err := js.StreamInfo("TEST", nats.MaxWait(time.Second))
  5672  		if err != nil {
  5673  			return err
  5674  		}
  5675  		if si.Cluster.Name != "C2" {
  5676  			return fmt.Errorf("Wrong cluster: %q", si.Cluster.Name)
  5677  		}
  5678  		if si.Cluster.Leader == _EMPTY_ {
  5679  			return fmt.Errorf("No leader yet")
  5680  		} else if !strings.HasPrefix(si.Cluster.Leader, "C2-") {
  5681  			return fmt.Errorf("Wrong leader: %q", si.Cluster.Leader)
  5682  		}
  5683  		// Now we want to see that we shrink back to original.
  5684  		if len(si.Cluster.Replicas) != cfg.Replicas-1 {
  5685  			return fmt.Errorf("Expected %d replicas, got %d", cfg.Replicas-1, len(si.Cluster.Replicas))
  5686  		}
  5687  		return nil
  5688  	})
  5689  }
  5690  
  5691  // https://github.com/nats-io/nats-server/issues/3455
  5692  func TestNoRaceJetStreamConcurrentPullConsumerBatch(t *testing.T) {
  5693  	s := RunBasicJetStreamServer(t)
  5694  	defer s.Shutdown()
  5695  
  5696  	nc, js := jsClientConnect(t, s)
  5697  	defer nc.Close()
  5698  
  5699  	_, err := js.AddStream(&nats.StreamConfig{
  5700  		Name:      "TEST",
  5701  		Subjects:  []string{"ORDERS.*"},
  5702  		Storage:   nats.MemoryStorage,
  5703  		Retention: nats.WorkQueuePolicy,
  5704  	})
  5705  	require_NoError(t, err)
  5706  
  5707  	toSend := int32(100_000)
  5708  
  5709  	for i := 0; i < 100_000; i++ {
  5710  		subj := fmt.Sprintf("ORDERS.%d", i+1)
  5711  		js.PublishAsync(subj, []byte("BUY"))
  5712  	}
  5713  	select {
  5714  	case <-js.PublishAsyncComplete():
  5715  	case <-time.After(5 * time.Second):
  5716  		t.Fatalf("Did not receive completion signal")
  5717  	}
  5718  
  5719  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  5720  		Durable:       "PROCESSOR",
  5721  		AckPolicy:     nats.AckExplicitPolicy,
  5722  		MaxAckPending: 5000,
  5723  	})
  5724  	require_NoError(t, err)
  5725  
  5726  	nc, js = jsClientConnect(t, s)
  5727  	defer nc.Close()
  5728  
  5729  	sub1, err := js.PullSubscribe(_EMPTY_, _EMPTY_, nats.Bind("TEST", "PROCESSOR"))
  5730  	require_NoError(t, err)
  5731  
  5732  	nc, js = jsClientConnect(t, s)
  5733  	defer nc.Close()
  5734  
  5735  	sub2, err := js.PullSubscribe(_EMPTY_, _EMPTY_, nats.Bind("TEST", "PROCESSOR"))
  5736  	require_NoError(t, err)
  5737  
  5738  	startCh := make(chan bool)
  5739  
  5740  	var received int32
  5741  
  5742  	wg := sync.WaitGroup{}
  5743  
  5744  	fetchSize := 1000
  5745  	fetch := func(sub *nats.Subscription) {
  5746  		<-startCh
  5747  		defer wg.Done()
  5748  
  5749  		for {
  5750  			msgs, err := sub.Fetch(fetchSize, nats.MaxWait(time.Second))
  5751  			if atomic.AddInt32(&received, int32(len(msgs))) >= toSend {
  5752  				break
  5753  			}
  5754  			// We should always receive a full batch here if not last competing fetch.
  5755  			if err != nil || len(msgs) != fetchSize {
  5756  				break
  5757  			}
  5758  			for _, m := range msgs {
  5759  				m.Ack()
  5760  			}
  5761  		}
  5762  	}
  5763  
  5764  	wg.Add(2)
  5765  
  5766  	go fetch(sub1)
  5767  	go fetch(sub2)
  5768  
  5769  	close(startCh)
  5770  
  5771  	wg.Wait()
  5772  	require_True(t, received == toSend)
  5773  }
  5774  
  5775  func TestNoRaceJetStreamManyPullConsumersNeedAckOptimization(t *testing.T) {
  5776  	// Uncomment to run. Do not want as part of Travis tests atm.
  5777  	// Run with cpu and memory profiling to make sure we have improved.
  5778  	skip(t)
  5779  
  5780  	s := RunBasicJetStreamServer(t)
  5781  	defer s.Shutdown()
  5782  
  5783  	nc, js := jsClientConnect(t, s)
  5784  	defer nc.Close()
  5785  
  5786  	_, err := js.AddStream(&nats.StreamConfig{
  5787  		Name:      "ORDERS",
  5788  		Subjects:  []string{"ORDERS.*"},
  5789  		Storage:   nats.MemoryStorage,
  5790  		Retention: nats.InterestPolicy,
  5791  	})
  5792  	require_NoError(t, err)
  5793  
  5794  	toSend := 100_000
  5795  	numConsumers := 500
  5796  
  5797  	// Create 500 consumers
  5798  	for i := 1; i <= numConsumers; i++ {
  5799  		_, err := js.AddConsumer("ORDERS", &nats.ConsumerConfig{
  5800  			Durable:       fmt.Sprintf("ORDERS_%d", i),
  5801  			FilterSubject: fmt.Sprintf("ORDERS.%d", i),
  5802  			AckPolicy:     nats.AckAllPolicy,
  5803  		})
  5804  		require_NoError(t, err)
  5805  	}
  5806  
  5807  	for i := 1; i <= toSend; i++ {
  5808  		subj := fmt.Sprintf("ORDERS.%d", i%numConsumers+1)
  5809  		js.PublishAsync(subj, []byte("HELLO"))
  5810  	}
  5811  	select {
  5812  	case <-js.PublishAsyncComplete():
  5813  	case <-time.After(5 * time.Second):
  5814  		t.Fatalf("Did not receive completion signal")
  5815  	}
  5816  
  5817  	sub, err := js.PullSubscribe("ORDERS.500", "ORDERS_500")
  5818  	require_NoError(t, err)
  5819  
  5820  	fetchSize := toSend / numConsumers
  5821  	msgs, err := sub.Fetch(fetchSize, nats.MaxWait(time.Second))
  5822  	require_NoError(t, err)
  5823  
  5824  	last := msgs[len(msgs)-1]
  5825  	last.AckSync()
  5826  }
  5827  
  5828  // https://github.com/nats-io/nats-server/issues/3499
  5829  func TestNoRaceJetStreamDeleteConsumerWithInterestStreamAndHighSeqs(t *testing.T) {
  5830  	s := RunBasicJetStreamServer(t)
  5831  	defer s.Shutdown()
  5832  
  5833  	// Client for API requests.
  5834  	nc, js := jsClientConnect(t, s)
  5835  	defer nc.Close()
  5836  
  5837  	_, err := js.AddStream(&nats.StreamConfig{
  5838  		Name:      "TEST",
  5839  		Subjects:  []string{"log.>"},
  5840  		Retention: nats.InterestPolicy,
  5841  	})
  5842  	require_NoError(t, err)
  5843  
  5844  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  5845  		Durable:   "c",
  5846  		AckPolicy: nats.AckExplicitPolicy,
  5847  	})
  5848  	require_NoError(t, err)
  5849  
  5850  	// Set baseline for time to delete so we can see linear increase as sequence numbers increase.
  5851  	start := time.Now()
  5852  	err = js.DeleteConsumer("TEST", "c")
  5853  	require_NoError(t, err)
  5854  	elapsed := time.Since(start)
  5855  
  5856  	// Crank up sequence numbers.
  5857  	msg := []byte(strings.Repeat("ZZZ", 128))
  5858  	for i := 0; i < 5_000_000; i++ {
  5859  		nc.Publish("log.Z", msg)
  5860  	}
  5861  	nc.Flush()
  5862  
  5863  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  5864  		Durable:   "c",
  5865  		AckPolicy: nats.AckExplicitPolicy,
  5866  	})
  5867  	require_NoError(t, err)
  5868  
  5869  	// We have a bug that spins unecessarily through all the sequences from this consumer's
  5870  	// ackfloor(0) and the last sequence for the stream. We will detect by looking for the time
  5871  	// to delete being 100x more. Should be the same since both times no messages exist in the stream.
  5872  	start = time.Now()
  5873  	err = js.DeleteConsumer("TEST", "c")
  5874  	require_NoError(t, err)
  5875  
  5876  	if e := time.Since(start); e > 100*elapsed {
  5877  		t.Fatalf("Consumer delete took too long: %v vs baseline %v", e, elapsed)
  5878  	}
  5879  }
  5880  
  5881  // Bug when we encode a timestamp that upon decode causes an error which causes server to panic.
  5882  // This can happen on consumer redelivery since they adjusted timstamps can be in the future, and result
  5883  // in a negative encoding. If that encoding was exactly -1 seconds, would cause decodeConsumerState to fail
  5884  // and the server to panic.
  5885  func TestNoRaceEncodeConsumerStateBug(t *testing.T) {
  5886  	for i := 0; i < 200_000; i++ {
  5887  		// Pretend we redelivered and updated the timestamp to reflect the new start time for expiration.
  5888  		// The bug will trip when time.Now() rounded to seconds in encode is 1 second below the truncated version
  5889  		// of pending.
  5890  		pending := Pending{Sequence: 1, Timestamp: time.Now().Add(time.Second).UnixNano()}
  5891  		state := ConsumerState{
  5892  			Delivered: SequencePair{Consumer: 1, Stream: 1},
  5893  			Pending:   map[uint64]*Pending{1: &pending},
  5894  		}
  5895  		buf := encodeConsumerState(&state)
  5896  		_, err := decodeConsumerState(buf)
  5897  		require_NoError(t, err)
  5898  	}
  5899  }
  5900  
  5901  // Performance impact on stream ingress with large number of consumers.
  5902  func TestNoRaceJetStreamLargeNumConsumersPerfImpact(t *testing.T) {
  5903  	skip(t)
  5904  
  5905  	s := RunBasicJetStreamServer(t)
  5906  	defer s.Shutdown()
  5907  
  5908  	// Client for API requests.
  5909  	nc, js := jsClientConnect(t, s)
  5910  	defer nc.Close()
  5911  
  5912  	_, err := js.AddStream(&nats.StreamConfig{
  5913  		Name:     "TEST",
  5914  		Subjects: []string{"foo"},
  5915  	})
  5916  	require_NoError(t, err)
  5917  
  5918  	// Baseline with no consumers.
  5919  	toSend := 1_000_000
  5920  	start := time.Now()
  5921  	for i := 0; i < toSend; i++ {
  5922  		js.PublishAsync("foo", []byte("OK"))
  5923  	}
  5924  	<-js.PublishAsyncComplete()
  5925  	tt := time.Since(start)
  5926  	fmt.Printf("Base time is %v\n", tt)
  5927  	fmt.Printf("%.0f msgs/sec\n", float64(toSend)/tt.Seconds())
  5928  
  5929  	err = js.PurgeStream("TEST")
  5930  	require_NoError(t, err)
  5931  
  5932  	// Now add in 10 idle consumers.
  5933  	for i := 1; i <= 10; i++ {
  5934  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  5935  			Durable:   fmt.Sprintf("d-%d", i),
  5936  			AckPolicy: nats.AckExplicitPolicy,
  5937  		})
  5938  		require_NoError(t, err)
  5939  	}
  5940  
  5941  	start = time.Now()
  5942  	for i := 0; i < toSend; i++ {
  5943  		js.PublishAsync("foo", []byte("OK"))
  5944  	}
  5945  	<-js.PublishAsyncComplete()
  5946  	tt = time.Since(start)
  5947  	fmt.Printf("\n10 consumers time is %v\n", tt)
  5948  	fmt.Printf("%.0f msgs/sec\n", float64(toSend)/tt.Seconds())
  5949  
  5950  	err = js.PurgeStream("TEST")
  5951  	require_NoError(t, err)
  5952  
  5953  	// Now add in 90 more idle consumers.
  5954  	for i := 11; i <= 100; i++ {
  5955  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  5956  			Durable:   fmt.Sprintf("d-%d", i),
  5957  			AckPolicy: nats.AckExplicitPolicy,
  5958  		})
  5959  		require_NoError(t, err)
  5960  	}
  5961  
  5962  	start = time.Now()
  5963  	for i := 0; i < toSend; i++ {
  5964  		js.PublishAsync("foo", []byte("OK"))
  5965  	}
  5966  	<-js.PublishAsyncComplete()
  5967  	tt = time.Since(start)
  5968  	fmt.Printf("\n100 consumers time is %v\n", tt)
  5969  	fmt.Printf("%.0f msgs/sec\n", float64(toSend)/tt.Seconds())
  5970  
  5971  	err = js.PurgeStream("TEST")
  5972  	require_NoError(t, err)
  5973  
  5974  	// Now add in 900 more
  5975  	for i := 101; i <= 1000; i++ {
  5976  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  5977  			Durable:   fmt.Sprintf("d-%d", i),
  5978  			AckPolicy: nats.AckExplicitPolicy,
  5979  		})
  5980  		require_NoError(t, err)
  5981  	}
  5982  
  5983  	start = time.Now()
  5984  	for i := 0; i < toSend; i++ {
  5985  		js.PublishAsync("foo", []byte("OK"))
  5986  	}
  5987  	<-js.PublishAsyncComplete()
  5988  	tt = time.Since(start)
  5989  	fmt.Printf("\n1000 consumers time is %v\n", tt)
  5990  	fmt.Printf("%.0f msgs/sec\n", float64(toSend)/tt.Seconds())
  5991  }
  5992  
  5993  // Performance impact on large number of consumers but sparse delivery.
  5994  func TestNoRaceJetStreamLargeNumConsumersSparseDelivery(t *testing.T) {
  5995  	skip(t)
  5996  
  5997  	s := RunBasicJetStreamServer(t)
  5998  	defer s.Shutdown()
  5999  
  6000  	// Client for API requests.
  6001  	nc, js := jsClientConnect(t, s)
  6002  	defer nc.Close()
  6003  
  6004  	_, err := js.AddStream(&nats.StreamConfig{
  6005  		Name:     "TEST",
  6006  		Subjects: []string{"ID.*"},
  6007  	})
  6008  	require_NoError(t, err)
  6009  
  6010  	// Now add in ~10k consumers on different subjects.
  6011  	for i := 3; i <= 10_000; i++ {
  6012  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  6013  			Durable:       fmt.Sprintf("d-%d", i),
  6014  			FilterSubject: fmt.Sprintf("ID.%d", i),
  6015  			AckPolicy:     nats.AckNonePolicy,
  6016  		})
  6017  		require_NoError(t, err)
  6018  	}
  6019  
  6020  	toSend := 100_000
  6021  
  6022  	// Bind a consumer to ID.2.
  6023  	var received int
  6024  	done := make(chan bool)
  6025  
  6026  	nc, js = jsClientConnect(t, s)
  6027  	defer nc.Close()
  6028  
  6029  	mh := func(m *nats.Msg) {
  6030  		received++
  6031  		if received >= toSend {
  6032  			close(done)
  6033  		}
  6034  	}
  6035  	_, err = js.Subscribe("ID.2", mh)
  6036  	require_NoError(t, err)
  6037  
  6038  	last := make(chan bool)
  6039  	_, err = js.Subscribe("ID.1", func(_ *nats.Msg) { close(last) })
  6040  	require_NoError(t, err)
  6041  
  6042  	nc, _ = jsClientConnect(t, s)
  6043  	defer nc.Close()
  6044  	js, err = nc.JetStream(nats.PublishAsyncMaxPending(8 * 1024))
  6045  	require_NoError(t, err)
  6046  
  6047  	start := time.Now()
  6048  	for i := 0; i < toSend; i++ {
  6049  		js.PublishAsync("ID.2", []byte("ok"))
  6050  	}
  6051  	// Check latency for this one message.
  6052  	// This will show the issue better than throughput which can bypass signal processing.
  6053  	js.PublishAsync("ID.1", []byte("ok"))
  6054  
  6055  	select {
  6056  	case <-done:
  6057  		break
  6058  	case <-time.After(10 * time.Second):
  6059  		t.Fatalf("Failed to receive all messages: %d of %d\n", received, toSend)
  6060  	}
  6061  
  6062  	tt := time.Since(start)
  6063  	fmt.Printf("Took %v to receive %d msgs\n", tt, toSend)
  6064  	fmt.Printf("%.0f msgs/s\n", float64(toSend)/tt.Seconds())
  6065  
  6066  	select {
  6067  	case <-last:
  6068  		break
  6069  	case <-time.After(30 * time.Second):
  6070  		t.Fatalf("Failed to receive last message\n")
  6071  	}
  6072  	lt := time.Since(start)
  6073  
  6074  	fmt.Printf("Took %v to receive last msg\n", lt)
  6075  }
  6076  
  6077  func TestNoRaceJetStreamEndToEndLatency(t *testing.T) {
  6078  	s := RunBasicJetStreamServer(t)
  6079  	defer s.Shutdown()
  6080  
  6081  	// Client for API requests.
  6082  	nc, js := jsClientConnect(t, s)
  6083  	defer nc.Close()
  6084  
  6085  	_, err := js.AddStream(&nats.StreamConfig{
  6086  		Name:     "TEST",
  6087  		Subjects: []string{"foo"},
  6088  	})
  6089  	require_NoError(t, err)
  6090  
  6091  	nc, js = jsClientConnect(t, s)
  6092  	defer nc.Close()
  6093  
  6094  	var sent time.Time
  6095  	var max time.Duration
  6096  	next := make(chan struct{})
  6097  
  6098  	mh := func(m *nats.Msg) {
  6099  		received := time.Now()
  6100  		tt := received.Sub(sent)
  6101  		if max == 0 || tt > max {
  6102  			max = tt
  6103  		}
  6104  		next <- struct{}{}
  6105  	}
  6106  	sub, err := js.Subscribe("foo", mh)
  6107  	require_NoError(t, err)
  6108  
  6109  	nc, js = jsClientConnect(t, s)
  6110  	defer nc.Close()
  6111  
  6112  	toSend := 50_000
  6113  	for i := 0; i < toSend; i++ {
  6114  		sent = time.Now()
  6115  		js.Publish("foo", []byte("ok"))
  6116  		<-next
  6117  	}
  6118  	sub.Unsubscribe()
  6119  
  6120  	if max > 250*time.Millisecond {
  6121  		t.Fatalf("Expected max latency to be < 250ms, got %v", max)
  6122  	}
  6123  }
  6124  
  6125  func TestNoRaceJetStreamClusterEnsureWALCompact(t *testing.T) {
  6126  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6127  	defer c.shutdown()
  6128  
  6129  	nc, js := jsClientConnect(t, c.randomServer())
  6130  	defer nc.Close()
  6131  
  6132  	_, err := js.AddStream(&nats.StreamConfig{
  6133  		Name:     "TEST",
  6134  		Subjects: []string{"foo"},
  6135  		Replicas: 3,
  6136  	})
  6137  	require_NoError(t, err)
  6138  
  6139  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  6140  		Durable:        "dlc",
  6141  		DeliverSubject: "zz",
  6142  		Replicas:       3,
  6143  	})
  6144  	require_NoError(t, err)
  6145  
  6146  	// Force snapshot on stream leader.
  6147  	sl := c.streamLeader(globalAccountName, "TEST")
  6148  	mset, err := sl.GlobalAccount().lookupStream("TEST")
  6149  	require_NoError(t, err)
  6150  	node := mset.raftNode()
  6151  	require_True(t, node != nil)
  6152  
  6153  	err = node.InstallSnapshot(mset.stateSnapshot())
  6154  	require_NoError(t, err)
  6155  
  6156  	// Now publish more than should be needed to cause an additional snapshot.
  6157  	ns := 75_000
  6158  	for i := 0; i <= ns; i++ {
  6159  		_, err := js.Publish("foo", []byte("bar"))
  6160  		require_NoError(t, err)
  6161  	}
  6162  
  6163  	// Grab progress and use that to look into WAL entries.
  6164  	_, _, applied := node.Progress()
  6165  	// If ne == ns that means snapshots and compacts were not happening when
  6166  	// they should have been.
  6167  	if ne, _ := node.Applied(applied); ne >= uint64(ns) {
  6168  		t.Fatalf("Did not snapshot and compact the raft WAL, entries == %d", ne)
  6169  	}
  6170  
  6171  	// Now check consumer.
  6172  	// Force snapshot on consumerleader.
  6173  	cl := c.consumerLeader(globalAccountName, "TEST", "dlc")
  6174  	mset, err = cl.GlobalAccount().lookupStream("TEST")
  6175  	require_NoError(t, err)
  6176  	o := mset.lookupConsumer("dlc")
  6177  	require_True(t, o != nil)
  6178  
  6179  	node = o.raftNode()
  6180  	require_True(t, node != nil)
  6181  
  6182  	snap, err := o.store.EncodedState()
  6183  	require_NoError(t, err)
  6184  	err = node.InstallSnapshot(snap)
  6185  	require_NoError(t, err)
  6186  
  6187  	received, done := 0, make(chan bool, 1)
  6188  
  6189  	nc.Subscribe("zz", func(m *nats.Msg) {
  6190  		received++
  6191  		if received >= ns {
  6192  			select {
  6193  			case done <- true:
  6194  			default:
  6195  			}
  6196  		}
  6197  		m.Ack()
  6198  	})
  6199  
  6200  	select {
  6201  	case <-done:
  6202  		return
  6203  	case <-time.After(10 * time.Second):
  6204  		t.Fatalf("Did not received all %d msgs, only %d", ns, received)
  6205  	}
  6206  
  6207  	// Do same trick and check that WAL was compacted.
  6208  	// Grab progress and use that to look into WAL entries.
  6209  	_, _, applied = node.Progress()
  6210  	// If ne == ns that means snapshots and compacts were not happening when
  6211  	// they should have been.
  6212  	if ne, _ := node.Applied(applied); ne >= uint64(ns) {
  6213  		t.Fatalf("Did not snapshot and compact the raft WAL, entries == %d", ne)
  6214  	}
  6215  }
  6216  
  6217  func TestNoRaceFileStoreStreamMaxAgePerformance(t *testing.T) {
  6218  	// Uncomment to run.
  6219  	skip(t)
  6220  
  6221  	storeDir := t.TempDir()
  6222  	maxAge := 5 * time.Second
  6223  
  6224  	fs, err := newFileStore(
  6225  		FileStoreConfig{StoreDir: storeDir},
  6226  		StreamConfig{Name: "MA",
  6227  			Subjects: []string{"foo.*"},
  6228  			MaxAge:   maxAge,
  6229  			Storage:  FileStorage},
  6230  	)
  6231  	require_NoError(t, err)
  6232  	defer fs.Stop()
  6233  
  6234  	// Simulate a callback similar to consumers decrementing.
  6235  	var mu sync.RWMutex
  6236  	var pending int64
  6237  
  6238  	fs.RegisterStorageUpdates(func(md, bd int64, seq uint64, subj string) {
  6239  		mu.Lock()
  6240  		defer mu.Unlock()
  6241  		pending += md
  6242  	})
  6243  
  6244  	start, num, subj := time.Now(), 0, "foo.foo"
  6245  
  6246  	timeout := start.Add(maxAge)
  6247  	for time.Now().Before(timeout) {
  6248  		// We will store in blocks of 100.
  6249  		for i := 0; i < 100; i++ {
  6250  			_, _, err := fs.StoreMsg(subj, nil, []byte("Hello World"))
  6251  			require_NoError(t, err)
  6252  			num++
  6253  		}
  6254  	}
  6255  	elapsed := time.Since(start)
  6256  	fmt.Printf("Took %v to store %d\n", elapsed, num)
  6257  	fmt.Printf("%.0f msgs/sec\n", float64(num)/elapsed.Seconds())
  6258  
  6259  	// Now keep running for 2x longer knowing we are expiring messages in the background.
  6260  	// We want to see the effect on performance.
  6261  
  6262  	start = time.Now()
  6263  	timeout = start.Add(maxAge * 2)
  6264  
  6265  	for time.Now().Before(timeout) {
  6266  		// We will store in blocks of 100.
  6267  		for i := 0; i < 100; i++ {
  6268  			_, _, err := fs.StoreMsg(subj, nil, []byte("Hello World"))
  6269  			require_NoError(t, err)
  6270  			num++
  6271  		}
  6272  	}
  6273  	elapsed = time.Since(start)
  6274  	fmt.Printf("Took %v to store %d\n", elapsed, num)
  6275  	fmt.Printf("%.0f msgs/sec\n", float64(num)/elapsed.Seconds())
  6276  }
  6277  
  6278  // SequenceSet memory tests vs dmaps.
  6279  func TestNoRaceSeqSetSizeComparison(t *testing.T) {
  6280  	// Create 5M random entries (dupes possible but ok for this test) out of 8M range.
  6281  	num := 5_000_000
  6282  	max := 7_000_000
  6283  
  6284  	seqs := make([]uint64, 0, num)
  6285  	for i := 0; i < num; i++ {
  6286  		n := uint64(rand.Int63n(int64(max + 1)))
  6287  		seqs = append(seqs, n)
  6288  	}
  6289  
  6290  	runtime.GC()
  6291  	// Disable to get stable results.
  6292  	gcp := debug.SetGCPercent(-1)
  6293  	defer debug.SetGCPercent(gcp)
  6294  
  6295  	mem := runtime.MemStats{}
  6296  	runtime.ReadMemStats(&mem)
  6297  	inUseBefore := mem.HeapInuse
  6298  
  6299  	dmap := make(map[uint64]struct{}, num)
  6300  	for _, n := range seqs {
  6301  		dmap[n] = struct{}{}
  6302  	}
  6303  	runtime.ReadMemStats(&mem)
  6304  	dmapUse := mem.HeapInuse - inUseBefore
  6305  	inUseBefore = mem.HeapInuse
  6306  
  6307  	// Now do SequenceSet on same dataset.
  6308  	var sset avl.SequenceSet
  6309  	for _, n := range seqs {
  6310  		sset.Insert(n)
  6311  	}
  6312  
  6313  	runtime.ReadMemStats(&mem)
  6314  	seqSetUse := mem.HeapInuse - inUseBefore
  6315  
  6316  	if seqSetUse > 2*1024*1024 {
  6317  		t.Fatalf("Expected SequenceSet size to be < 2M, got %v", friendlyBytes(int64(seqSetUse)))
  6318  	}
  6319  	if seqSetUse*50 > dmapUse {
  6320  		t.Fatalf("Expected SequenceSet to be at least 50x better then dmap approach: %v vs %v",
  6321  			friendlyBytes(int64(seqSetUse)),
  6322  			friendlyBytes(int64(dmapUse)),
  6323  		)
  6324  	}
  6325  }
  6326  
  6327  // FilteredState for ">" with large interior deletes was very slow.
  6328  func TestNoRaceFileStoreFilteredStateWithLargeDeletes(t *testing.T) {
  6329  	storeDir := t.TempDir()
  6330  
  6331  	fs, err := newFileStore(
  6332  		FileStoreConfig{StoreDir: storeDir, BlockSize: 4096},
  6333  		StreamConfig{Name: "zzz", Subjects: []string{"foo"}, Storage: FileStorage},
  6334  	)
  6335  	require_NoError(t, err)
  6336  	defer fs.Stop()
  6337  
  6338  	subj, msg := "foo", []byte("Hello World")
  6339  
  6340  	toStore := 500_000
  6341  	for i := 0; i < toStore; i++ {
  6342  		_, _, err := fs.StoreMsg(subj, nil, msg)
  6343  		require_NoError(t, err)
  6344  	}
  6345  
  6346  	// Now delete every other one.
  6347  	for seq := 2; seq <= toStore; seq += 2 {
  6348  		_, err := fs.RemoveMsg(uint64(seq))
  6349  		require_NoError(t, err)
  6350  	}
  6351  
  6352  	runtime.GC()
  6353  	// Disable to get stable results.
  6354  	gcp := debug.SetGCPercent(-1)
  6355  	defer debug.SetGCPercent(gcp)
  6356  
  6357  	start := time.Now()
  6358  	fss := fs.FilteredState(1, _EMPTY_)
  6359  	elapsed := time.Since(start)
  6360  
  6361  	require_True(t, fss.Msgs == uint64(toStore/2))
  6362  	require_True(t, elapsed < 500*time.Microsecond)
  6363  }
  6364  
  6365  // ConsumerInfo seems to being called quite a bit more than we had anticipated.
  6366  // Under certain circumstances, since we reset num pending, this can be very costly.
  6367  // We will use the fast path to alleviate that performance bottleneck but also make
  6368  // sure we are still being accurate.
  6369  func TestNoRaceJetStreamClusterConsumerInfoSpeed(t *testing.T) {
  6370  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6371  	defer c.shutdown()
  6372  
  6373  	c.waitOnLeader()
  6374  	server := c.randomNonLeader()
  6375  
  6376  	nc, js := jsClientConnect(t, server)
  6377  	defer nc.Close()
  6378  
  6379  	_, err := js.AddStream(&nats.StreamConfig{
  6380  		Name:     "TEST",
  6381  		Subjects: []string{"events.>"},
  6382  		Replicas: 3,
  6383  	})
  6384  	require_NoError(t, err)
  6385  
  6386  	// The issue is compounded when we have lots of different subjects captured
  6387  	// by a terminal fwc. The consumer will have a terminal pwc.
  6388  	// Here make all subjects unique.
  6389  
  6390  	sub, err := js.PullSubscribe("events.*", "DLC")
  6391  	require_NoError(t, err)
  6392  
  6393  	toSend := 250_000
  6394  	for i := 0; i < toSend; i++ {
  6395  		subj := fmt.Sprintf("events.%d", i+1)
  6396  		js.PublishAsync(subj, []byte("ok"))
  6397  	}
  6398  	select {
  6399  	case <-js.PublishAsyncComplete():
  6400  	case <-time.After(5 * time.Second):
  6401  		t.Fatalf("Did not receive completion signal")
  6402  	}
  6403  
  6404  	checkNumPending := func(expected int) {
  6405  		t.Helper()
  6406  		start := time.Now()
  6407  		ci, err := js.ConsumerInfo("TEST", "DLC")
  6408  		require_NoError(t, err)
  6409  		// Make sure these are fast now.
  6410  		if elapsed := time.Since(start); elapsed > 5*time.Millisecond {
  6411  			t.Fatalf("ConsumerInfo took too long: %v", elapsed)
  6412  		}
  6413  		// Make sure pending == expected.
  6414  		if ci.NumPending != uint64(expected) {
  6415  			t.Fatalf("Expected %d NumPending, got %d", expected, ci.NumPending)
  6416  		}
  6417  	}
  6418  	// Make sure in simple case it is correct.
  6419  	checkNumPending(toSend)
  6420  
  6421  	// Do a few acks.
  6422  	toAck := 25
  6423  	for _, m := range fetchMsgs(t, sub, 25, time.Second) {
  6424  		err = m.AckSync()
  6425  		require_NoError(t, err)
  6426  	}
  6427  	checkNumPending(toSend - toAck)
  6428  
  6429  	// Now do a purge such that we only keep so many.
  6430  	// We want to make sure we do the right thing here and have correct calculations.
  6431  	toKeep := 100_000
  6432  	err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Keep: uint64(toKeep)})
  6433  	require_NoError(t, err)
  6434  
  6435  	checkNumPending(toKeep)
  6436  }
  6437  
  6438  func TestNoRaceJetStreamKVAccountWithServerRestarts(t *testing.T) {
  6439  	// Uncomment to run. Needs fast machine to not time out on KeyValue lookup.
  6440  	skip(t)
  6441  
  6442  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6443  	defer c.shutdown()
  6444  
  6445  	nc, js := jsClientConnect(t, c.randomServer())
  6446  	defer nc.Close()
  6447  
  6448  	_, err := js.CreateKeyValue(&nats.KeyValueConfig{
  6449  		Bucket:   "TEST",
  6450  		Replicas: 3,
  6451  	})
  6452  	require_NoError(t, err)
  6453  
  6454  	npubs := 10_000
  6455  	par := 8
  6456  	iter := 2
  6457  	nsubjs := 250
  6458  
  6459  	wg := sync.WaitGroup{}
  6460  	putKeys := func() {
  6461  		wg.Add(1)
  6462  		go func() {
  6463  			defer wg.Done()
  6464  			nc, js := jsClientConnect(t, c.randomServer())
  6465  			defer nc.Close()
  6466  			kv, err := js.KeyValue("TEST")
  6467  			require_NoError(t, err)
  6468  
  6469  			for i := 0; i < npubs; i++ {
  6470  				subj := fmt.Sprintf("KEY-%d", rand.Intn(nsubjs))
  6471  				if _, err := kv.PutString(subj, "hello"); err != nil {
  6472  					nc, js := jsClientConnect(t, c.randomServer())
  6473  					defer nc.Close()
  6474  					kv, err = js.KeyValue("TEST")
  6475  					require_NoError(t, err)
  6476  				}
  6477  			}
  6478  		}()
  6479  	}
  6480  
  6481  	restartServers := func() {
  6482  		time.Sleep(2 * time.Second)
  6483  		// Rotate through and restart the servers.
  6484  		for _, server := range c.servers {
  6485  			server.Shutdown()
  6486  			restarted := c.restartServer(server)
  6487  			checkFor(t, time.Second, 200*time.Millisecond, func() error {
  6488  				hs := restarted.healthz(&HealthzOptions{
  6489  					JSEnabled:    true,
  6490  					JSServerOnly: true,
  6491  				})
  6492  				if hs.Error != _EMPTY_ {
  6493  					return errors.New(hs.Error)
  6494  				}
  6495  				return nil
  6496  			})
  6497  		}
  6498  		c.waitOnLeader()
  6499  		c.waitOnStreamLeader(globalAccountName, "KV_TEST")
  6500  	}
  6501  
  6502  	for n := 0; n < iter; n++ {
  6503  		for i := 0; i < par; i++ {
  6504  			putKeys()
  6505  		}
  6506  		restartServers()
  6507  	}
  6508  	wg.Wait()
  6509  
  6510  	nc, js = jsClientConnect(t, c.randomServer())
  6511  	defer nc.Close()
  6512  
  6513  	si, err := js.StreamInfo("KV_TEST")
  6514  	require_NoError(t, err)
  6515  	require_True(t, si.State.NumSubjects == uint64(nsubjs))
  6516  }
  6517  
  6518  // Test for consumer create when the subject cardinality is high and the
  6519  // consumer is filtered with a wildcard that forces linear scans.
  6520  // We have an optimization to use in memory structures in filestore to speed up.
  6521  // Only if asking to scan all (DeliverAll).
  6522  func TestNoRaceJetStreamConsumerCreateTimeNumPending(t *testing.T) {
  6523  	s := RunBasicJetStreamServer(t)
  6524  	defer s.Shutdown()
  6525  
  6526  	nc, js := jsClientConnect(t, s)
  6527  	defer nc.Close()
  6528  
  6529  	_, err := js.AddStream(&nats.StreamConfig{
  6530  		Name:     "TEST",
  6531  		Subjects: []string{"events.>"},
  6532  	})
  6533  	require_NoError(t, err)
  6534  
  6535  	n := 500_000
  6536  	msg := bytes.Repeat([]byte("X"), 8*1024)
  6537  
  6538  	for i := 0; i < n; i++ {
  6539  		subj := fmt.Sprintf("events.%d", rand.Intn(100_000))
  6540  		js.PublishAsync(subj, msg)
  6541  	}
  6542  	select {
  6543  	case <-js.PublishAsyncComplete():
  6544  	case <-time.After(5 * time.Second):
  6545  	}
  6546  
  6547  	// Should stay under 5ms now, but for Travis variability say 50ms.
  6548  	threshold := 50 * time.Millisecond
  6549  
  6550  	start := time.Now()
  6551  	_, err = js.PullSubscribe("events.*", "dlc")
  6552  	require_NoError(t, err)
  6553  	if elapsed := time.Since(start); elapsed > threshold {
  6554  		t.Fatalf("Consumer create took longer than expected, %v vs %v", elapsed, threshold)
  6555  	}
  6556  
  6557  	start = time.Now()
  6558  	_, err = js.PullSubscribe("events.99999", "xxx")
  6559  	require_NoError(t, err)
  6560  	if elapsed := time.Since(start); elapsed > threshold {
  6561  		t.Fatalf("Consumer create took longer than expected, %v vs %v", elapsed, threshold)
  6562  	}
  6563  
  6564  	start = time.Now()
  6565  	_, err = js.PullSubscribe(">", "zzz")
  6566  	require_NoError(t, err)
  6567  	if elapsed := time.Since(start); elapsed > threshold {
  6568  		t.Fatalf("Consumer create took longer than expected, %v vs %v", elapsed, threshold)
  6569  	}
  6570  }
  6571  
  6572  func TestNoRaceJetStreamClusterGhostConsumers(t *testing.T) {
  6573  	c := createJetStreamClusterExplicit(t, "GHOST", 3)
  6574  	defer c.shutdown()
  6575  
  6576  	nc, js := jsClientConnect(t, c.randomServer())
  6577  	defer nc.Close()
  6578  
  6579  	_, err := js.AddStream(&nats.StreamConfig{
  6580  		Name:     "TEST",
  6581  		Subjects: []string{"events.>"},
  6582  		Replicas: 3,
  6583  	})
  6584  	require_NoError(t, err)
  6585  
  6586  	for i := 0; i < 10; i++ {
  6587  		for j := 0; j < 10; j++ {
  6588  			require_NoError(t, nc.Publish(fmt.Sprintf("events.%d.%d", i, j), []byte(`test`)))
  6589  		}
  6590  	}
  6591  
  6592  	fetch := func(id int) {
  6593  		subject := fmt.Sprintf("events.%d.*", id)
  6594  		subscription, err := js.PullSubscribe(subject,
  6595  			_EMPTY_, // ephemeral consumer
  6596  			nats.DeliverAll(),
  6597  			nats.ReplayInstant(),
  6598  			nats.BindStream("TEST"),
  6599  			nats.ConsumerReplicas(1),
  6600  			nats.ConsumerMemoryStorage(),
  6601  		)
  6602  		if err != nil {
  6603  			return
  6604  		}
  6605  		defer subscription.Unsubscribe()
  6606  
  6607  		info, err := subscription.ConsumerInfo()
  6608  		if err != nil {
  6609  			return
  6610  		}
  6611  
  6612  		subscription.Fetch(int(info.NumPending))
  6613  	}
  6614  
  6615  	replay := func(ctx context.Context, id int) {
  6616  		for {
  6617  			select {
  6618  			case <-ctx.Done():
  6619  				return
  6620  			default:
  6621  				fetch(id)
  6622  			}
  6623  		}
  6624  	}
  6625  
  6626  	ctx, cancel := context.WithCancel(context.Background())
  6627  
  6628  	go replay(ctx, 0)
  6629  	go replay(ctx, 1)
  6630  	go replay(ctx, 2)
  6631  	go replay(ctx, 3)
  6632  	go replay(ctx, 4)
  6633  	go replay(ctx, 5)
  6634  	go replay(ctx, 6)
  6635  	go replay(ctx, 7)
  6636  	go replay(ctx, 8)
  6637  	go replay(ctx, 9)
  6638  
  6639  	time.Sleep(5 * time.Second)
  6640  
  6641  	for _, server := range c.servers {
  6642  		server.Shutdown()
  6643  		restarted := c.restartServer(server)
  6644  		checkFor(t, time.Second, 200*time.Millisecond, func() error {
  6645  			hs := restarted.healthz(&HealthzOptions{
  6646  				JSEnabled:    true,
  6647  				JSServerOnly: true,
  6648  			})
  6649  			if hs.Error != _EMPTY_ {
  6650  				return errors.New(hs.Error)
  6651  			}
  6652  			return nil
  6653  		})
  6654  		c.waitOnStreamLeader(globalAccountName, "TEST")
  6655  		time.Sleep(time.Second * 2)
  6656  		go replay(ctx, 5)
  6657  		go replay(ctx, 6)
  6658  		go replay(ctx, 7)
  6659  		go replay(ctx, 8)
  6660  		go replay(ctx, 9)
  6661  	}
  6662  
  6663  	time.Sleep(5 * time.Second)
  6664  	cancel()
  6665  
  6666  	getMissing := func() []string {
  6667  		m, err := nc.Request("$JS.API.CONSUMER.LIST.TEST", nil, time.Second*10)
  6668  		require_NoError(t, err)
  6669  
  6670  		var resp JSApiConsumerListResponse
  6671  		err = json.Unmarshal(m.Data, &resp)
  6672  		require_NoError(t, err)
  6673  		return resp.Missing
  6674  	}
  6675  
  6676  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  6677  		missing := getMissing()
  6678  		if len(missing) == 0 {
  6679  			return nil
  6680  		}
  6681  		return fmt.Errorf("Still have missing: %+v", missing)
  6682  	})
  6683  }
  6684  
  6685  // This is to test a publish slowdown and general instability experienced in a setup similar to this.
  6686  // We have feeder streams that are all sourced to an aggregate stream. All streams are interest retention.
  6687  // We want to monitor the avg publish time for the sync publishers to the feeder streams, the ingest rate to
  6688  // the aggregate stream, and general health of the consumers on the aggregate stream.
  6689  // Target publish rate is ~2k/s with publish time being ~40-60ms but remaining stable.
  6690  // We can also simulate max redeliveries that create interior deletes in streams.
  6691  func TestNoRaceJetStreamClusterF3Setup(t *testing.T) {
  6692  	// Uncomment to run. Needs to be on a pretty big machine. Do not want as part of Travis tests atm.
  6693  	skip(t)
  6694  
  6695  	// These and the settings below achieve ~60ms pub time on avg and ~2k msgs per sec inbound to the aggregate stream.
  6696  	// On my machine though.
  6697  	np := clusterProxy{
  6698  		rtt:  2 * time.Millisecond,
  6699  		up:   1 * 1024 * 1024 * 1024, // 1gbit
  6700  		down: 1 * 1024 * 1024 * 1024, // 1gbit
  6701  	}
  6702  
  6703  	// Test params.
  6704  	numSourceStreams := 20
  6705  	numConsumersPerSource := 1
  6706  	numPullersPerConsumer := 50
  6707  	numPublishers := 100
  6708  	setHighStartSequence := false
  6709  	simulateMaxRedeliveries := false
  6710  	maxBadPubTimes := uint32(20)
  6711  	badPubThresh := 500 * time.Millisecond
  6712  	testTime := 5 * time.Minute // make sure to do --timeout=65m
  6713  
  6714  	t.Logf("Starting Test: Total Test Time %v", testTime)
  6715  
  6716  	c := createJetStreamClusterWithNetProxy(t, "R3S", 3, &np)
  6717  	defer c.shutdown()
  6718  
  6719  	// Do some quick sanity checking for latency stuff.
  6720  	{
  6721  		nc, js := jsClientConnect(t, c.randomServer())
  6722  		defer nc.Close()
  6723  
  6724  		_, err := js.AddStream(&nats.StreamConfig{
  6725  			Name:      "TEST",
  6726  			Replicas:  3,
  6727  			Subjects:  []string{"foo"},
  6728  			Retention: nats.InterestPolicy,
  6729  		})
  6730  		require_NoError(t, err)
  6731  		defer js.DeleteStream("TEST")
  6732  
  6733  		sl := c.streamLeader(globalAccountName, "TEST")
  6734  		nc, js = jsClientConnect(t, sl)
  6735  		defer nc.Close()
  6736  		start := time.Now()
  6737  		_, err = js.Publish("foo", []byte("hello"))
  6738  		require_NoError(t, err)
  6739  		// This is best case, and with client connection being close to free, this should be at least > rtt
  6740  		if elapsed := time.Since(start); elapsed < np.rtt {
  6741  			t.Fatalf("Expected publish time to be > %v, got %v", np.rtt, elapsed)
  6742  		}
  6743  
  6744  		nl := c.randomNonStreamLeader(globalAccountName, "TEST")
  6745  		nc, js = jsClientConnect(t, nl)
  6746  		defer nc.Close()
  6747  		start = time.Now()
  6748  		_, err = js.Publish("foo", []byte("hello"))
  6749  		require_NoError(t, err)
  6750  		// This is worst case, meaning message has to travel to leader, then to fastest replica, then back.
  6751  		// So should be at 3x rtt, so check at least > 2x rtt.
  6752  		if elapsed := time.Since(start); elapsed < 2*np.rtt {
  6753  			t.Fatalf("Expected publish time to be > %v, got %v", 2*np.rtt, elapsed)
  6754  		}
  6755  	}
  6756  
  6757  	// Setup source streams.
  6758  	nc, js := jsClientConnect(t, c.randomServer())
  6759  	defer nc.Close()
  6760  
  6761  	t.Logf("Creating %d Source Streams", numSourceStreams)
  6762  
  6763  	var sources []string
  6764  	wg := sync.WaitGroup{}
  6765  	for i := 0; i < numSourceStreams; i++ {
  6766  		sname := fmt.Sprintf("EVENT-%s", nuid.Next())
  6767  		sources = append(sources, sname)
  6768  		wg.Add(1)
  6769  		go func(stream string) {
  6770  			defer wg.Done()
  6771  			t.Logf("  %q", stream)
  6772  			subj := fmt.Sprintf("%s.>", stream)
  6773  			_, err := js.AddStream(&nats.StreamConfig{
  6774  				Name:      stream,
  6775  				Subjects:  []string{subj},
  6776  				Replicas:  3,
  6777  				Retention: nats.InterestPolicy,
  6778  			})
  6779  			require_NoError(t, err)
  6780  			for j := 0; j < numConsumersPerSource; j++ {
  6781  				consumer := fmt.Sprintf("C%d", j)
  6782  				_, err := js.Subscribe(_EMPTY_, func(msg *nats.Msg) {
  6783  					msg.Ack()
  6784  				}, nats.BindStream(stream), nats.Durable(consumer), nats.ManualAck())
  6785  				require_NoError(t, err)
  6786  			}
  6787  		}(sname)
  6788  	}
  6789  	wg.Wait()
  6790  
  6791  	var streamSources []*nats.StreamSource
  6792  	for _, src := range sources {
  6793  		streamSources = append(streamSources, &nats.StreamSource{Name: src})
  6794  
  6795  	}
  6796  
  6797  	t.Log("Creating Aggregate Stream")
  6798  
  6799  	// Now create the aggregate stream.
  6800  	_, err := js.AddStream(&nats.StreamConfig{
  6801  		Name:      "EVENTS",
  6802  		Replicas:  3,
  6803  		Retention: nats.InterestPolicy,
  6804  		Sources:   streamSources,
  6805  	})
  6806  	require_NoError(t, err)
  6807  
  6808  	// Set first sequence to a high number.
  6809  	if setHighStartSequence {
  6810  		require_NoError(t, js.PurgeStream("EVENTS", &nats.StreamPurgeRequest{Sequence: 32_000_001}))
  6811  	}
  6812  
  6813  	// Now create 2 pull consumers.
  6814  	_, err = js.PullSubscribe(_EMPTY_, "C1",
  6815  		nats.BindStream("EVENTS"),
  6816  		nats.MaxDeliver(1),
  6817  		nats.AckWait(10*time.Second),
  6818  		nats.ManualAck(),
  6819  	)
  6820  	require_NoError(t, err)
  6821  
  6822  	_, err = js.PullSubscribe(_EMPTY_, "C2",
  6823  		nats.BindStream("EVENTS"),
  6824  		nats.MaxDeliver(1),
  6825  		nats.AckWait(10*time.Second),
  6826  		nats.ManualAck(),
  6827  	)
  6828  	require_NoError(t, err)
  6829  
  6830  	t.Logf("Creating %d x 2 Pull Subscribers", numPullersPerConsumer)
  6831  
  6832  	// Now create the pullers.
  6833  	for _, subName := range []string{"C1", "C2"} {
  6834  		for i := 0; i < numPullersPerConsumer; i++ {
  6835  			go func(subName string) {
  6836  				nc, js := jsClientConnect(t, c.randomServer())
  6837  				defer nc.Close()
  6838  
  6839  				sub, err := js.PullSubscribe(_EMPTY_, subName,
  6840  					nats.BindStream("EVENTS"),
  6841  					nats.MaxDeliver(1),
  6842  					nats.AckWait(10*time.Second),
  6843  					nats.ManualAck(),
  6844  				)
  6845  				require_NoError(t, err)
  6846  
  6847  				for {
  6848  					msgs, err := sub.Fetch(25, nats.MaxWait(2*time.Second))
  6849  					if err != nil && err != nats.ErrTimeout {
  6850  						t.Logf("Exiting pull subscriber %q: %v", subName, err)
  6851  						return
  6852  					}
  6853  					// Shuffle
  6854  					rand.Shuffle(len(msgs), func(i, j int) { msgs[i], msgs[j] = msgs[j], msgs[i] })
  6855  
  6856  					// Wait for a random interval up to 100ms.
  6857  					time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond)
  6858  
  6859  					for _, m := range msgs {
  6860  						// If we want to simulate max redeliveries being hit, since not acking
  6861  						// once will cause it due to subscriber setup.
  6862  						// 100_000 == 0.01%
  6863  						if simulateMaxRedeliveries && rand.Intn(100_000) == 0 {
  6864  							md, err := m.Metadata()
  6865  							require_NoError(t, err)
  6866  							t.Logf("** Skipping Ack: %d **", md.Sequence.Stream)
  6867  						} else {
  6868  							m.Ack()
  6869  						}
  6870  					}
  6871  				}
  6872  			}(subName)
  6873  		}
  6874  	}
  6875  
  6876  	// Now create feeder publishers.
  6877  	eventTypes := []string{"PAYMENT", "SUBMISSION", "CANCEL"}
  6878  
  6879  	msg := make([]byte, 2*1024) // 2k payload
  6880  	crand.Read(msg)
  6881  
  6882  	// For tracking pub times.
  6883  	var pubs int
  6884  	var totalPubTime time.Duration
  6885  	var pmu sync.Mutex
  6886  	last := time.Now()
  6887  
  6888  	updatePubStats := func(elapsed time.Duration) {
  6889  		pmu.Lock()
  6890  		defer pmu.Unlock()
  6891  		// Reset every 5s
  6892  		if time.Since(last) > 5*time.Second {
  6893  			pubs = 0
  6894  			totalPubTime = 0
  6895  			last = time.Now()
  6896  		}
  6897  		pubs++
  6898  		totalPubTime += elapsed
  6899  	}
  6900  	avgPubTime := func() time.Duration {
  6901  		pmu.Lock()
  6902  		np := pubs
  6903  		tpt := totalPubTime
  6904  		pmu.Unlock()
  6905  		return tpt / time.Duration(np)
  6906  	}
  6907  
  6908  	t.Logf("Creating %d Publishers", numPublishers)
  6909  
  6910  	var numLimitsExceeded atomic.Uint32
  6911  	errCh := make(chan error, 100)
  6912  
  6913  	for i := 0; i < numPublishers; i++ {
  6914  		go func() {
  6915  			nc, js := jsClientConnect(t, c.randomServer())
  6916  			defer nc.Close()
  6917  
  6918  			for {
  6919  				// Grab a random source stream
  6920  				stream := sources[rand.Intn(len(sources))]
  6921  				// Grab random event type.
  6922  				evt := eventTypes[rand.Intn(len(eventTypes))]
  6923  				subj := fmt.Sprintf("%s.%s", stream, evt)
  6924  				start := time.Now()
  6925  				_, err := js.Publish(subj, msg)
  6926  				if err != nil {
  6927  					t.Logf("Exiting publisher: %v", err)
  6928  					return
  6929  				}
  6930  				elapsed := time.Since(start)
  6931  				if elapsed > badPubThresh {
  6932  					t.Logf("Publish time took more than expected: %v", elapsed)
  6933  					numLimitsExceeded.Add(1)
  6934  					if ne := numLimitsExceeded.Load(); ne > maxBadPubTimes {
  6935  						errCh <- fmt.Errorf("Too many exceeded times on publish: %d", ne)
  6936  						return
  6937  					}
  6938  				}
  6939  				updatePubStats(elapsed)
  6940  			}
  6941  		}()
  6942  	}
  6943  
  6944  	t.Log("Creating Monitoring Routine - Data in ~10s")
  6945  
  6946  	// Create monitoring routine.
  6947  	go func() {
  6948  		nc, js := jsClientConnect(t, c.randomServer())
  6949  		defer nc.Close()
  6950  
  6951  		fseq, lseq := uint64(0), uint64(0)
  6952  		for {
  6953  			// Grab consumers
  6954  			var minAckFloor uint64 = math.MaxUint64
  6955  			for _, consumer := range []string{"C1", "C2"} {
  6956  				ci, err := js.ConsumerInfo("EVENTS", consumer)
  6957  				if err != nil {
  6958  					t.Logf("Exiting Monitor: %v", err)
  6959  					return
  6960  				}
  6961  				if lseq > 0 {
  6962  					t.Logf("%s:\n  Delivered:\t%d\n  AckFloor:\t%d\n  AckPending:\t%d\n  NumPending:\t%d",
  6963  						consumer, ci.Delivered.Stream, ci.AckFloor.Stream, ci.NumAckPending, ci.NumPending)
  6964  				}
  6965  				if ci.AckFloor.Stream < minAckFloor {
  6966  					minAckFloor = ci.AckFloor.Stream
  6967  				}
  6968  			}
  6969  			// Now grab aggregate stream state.
  6970  			si, err := js.StreamInfo("EVENTS")
  6971  			if err != nil {
  6972  				t.Logf("Exiting Monitor: %v", err)
  6973  				return
  6974  			}
  6975  			state := si.State
  6976  			if lseq != 0 {
  6977  				t.Logf("Stream:\n  Msgs: \t%d\n  First:\t%d\n  Last: \t%d\n  Deletes:\t%d\n",
  6978  					state.Msgs, state.FirstSeq, state.LastSeq, state.NumDeleted)
  6979  				t.Logf("Publish Stats:\n  Msgs/s:\t%0.2f\n  Avg Pub:\t%v\n\n", float64(si.State.LastSeq-lseq)/5.0, avgPubTime())
  6980  				if si.State.FirstSeq < minAckFloor && si.State.FirstSeq == fseq {
  6981  					t.Log("Stream first seq < minimum ack floor")
  6982  				}
  6983  			}
  6984  			fseq, lseq = si.State.FirstSeq, si.State.LastSeq
  6985  			time.Sleep(5 * time.Second)
  6986  		}
  6987  
  6988  	}()
  6989  
  6990  	select {
  6991  	case e := <-errCh:
  6992  		t.Fatal(e)
  6993  	case <-time.After(testTime):
  6994  		t.Fatalf("Did not receive completion signal")
  6995  	}
  6996  }
  6997  
  6998  // Unbalanced stretch cluster.
  6999  // S2 (stream leader) will have a slow path to S1 (via proxy) and S3 (consumer leader) will have a fast path.
  7000  //
  7001  //	 Route Ports
  7002  //		"S1": 14622
  7003  //		"S2": 15622
  7004  //		"S3": 16622
  7005  func createStretchUnbalancedCluster(t testing.TB) (c *cluster, np *netProxy) {
  7006  	t.Helper()
  7007  
  7008  	tmpl := `
  7009  	listen: 127.0.0.1:-1
  7010  	server_name: %s
  7011  	jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  7012  
  7013  	cluster {
  7014  		name: "F3"
  7015  		listen: 127.0.0.1:%d
  7016  		routes = [%s]
  7017  	}
  7018  
  7019  	accounts {
  7020  		$SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] }
  7021  	}
  7022  	`
  7023  	// Do these in order, S1, S2 (proxy) then S3.
  7024  	c = &cluster{t: t, servers: make([]*Server, 3), opts: make([]*Options, 3), name: "F3"}
  7025  
  7026  	// S1
  7027  	conf := fmt.Sprintf(tmpl, "S1", t.TempDir(), 14622, "route://127.0.0.1:15622, route://127.0.0.1:16622")
  7028  	c.servers[0], c.opts[0] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  7029  
  7030  	// S2
  7031  	// Create the proxy first. Connect this to S1. Make it slow, e.g. 5ms RTT.
  7032  	np = createNetProxy(1*time.Millisecond, 1024*1024*1024, 1024*1024*1024, "route://127.0.0.1:14622", true)
  7033  	routes := fmt.Sprintf("%s, route://127.0.0.1:16622", np.routeURL())
  7034  	conf = fmt.Sprintf(tmpl, "S2", t.TempDir(), 15622, routes)
  7035  	c.servers[1], c.opts[1] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  7036  
  7037  	// S3
  7038  	conf = fmt.Sprintf(tmpl, "S3", t.TempDir(), 16622, "route://127.0.0.1:14622, route://127.0.0.1:15622")
  7039  	c.servers[2], c.opts[2] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  7040  
  7041  	c.checkClusterFormed()
  7042  	c.waitOnClusterReady()
  7043  
  7044  	return c, np
  7045  }
  7046  
  7047  // We test an interest based stream that has a cluster with a node with asymmetric paths from
  7048  // the stream leader and the consumer leader such that the consumer leader path is fast and
  7049  // replicated acks arrive sooner then the actual message. This path was considered, but also
  7050  // categorized as very rare and was expensive as it tried to forward a new stream msg delete
  7051  // proposal to the original stream leader. It now will deal with the issue locally and not
  7052  // slow down the ingest rate to the stream's publishers.
  7053  func TestNoRaceJetStreamClusterDifferentRTTInterestBasedStreamSetup(t *testing.T) {
  7054  	// Uncomment to run. Do not want as part of Travis tests atm.
  7055  	skip(t)
  7056  
  7057  	c, np := createStretchUnbalancedCluster(t)
  7058  	defer c.shutdown()
  7059  	defer np.stop()
  7060  
  7061  	nc, js := jsClientConnect(t, c.randomServer())
  7062  	defer nc.Close()
  7063  
  7064  	// Now create the stream.
  7065  	_, err := js.AddStream(&nats.StreamConfig{
  7066  		Name:      "EVENTS",
  7067  		Subjects:  []string{"EV.>"},
  7068  		Replicas:  3,
  7069  		Retention: nats.InterestPolicy,
  7070  	})
  7071  	require_NoError(t, err)
  7072  
  7073  	// Make sure it's leader is on S2.
  7074  	sl := c.servers[1]
  7075  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7076  		c.waitOnStreamLeader(globalAccountName, "EVENTS")
  7077  		if s := c.streamLeader(globalAccountName, "EVENTS"); s != sl {
  7078  			s.JetStreamStepdownStream(globalAccountName, "EVENTS")
  7079  			return fmt.Errorf("Server %s is not stream leader yet", sl)
  7080  		}
  7081  		return nil
  7082  	})
  7083  
  7084  	// Now create the consumer.
  7085  	_, err = js.PullSubscribe(_EMPTY_, "C", nats.BindStream("EVENTS"), nats.ManualAck())
  7086  	require_NoError(t, err)
  7087  
  7088  	// Make sure the consumer leader is on S3.
  7089  	cl := c.servers[2]
  7090  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7091  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "C")
  7092  		if s := c.consumerLeader(globalAccountName, "EVENTS", "C"); s != cl {
  7093  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "C")
  7094  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7095  		}
  7096  		return nil
  7097  	})
  7098  
  7099  	go func(js nats.JetStream) {
  7100  		sub, err := js.PullSubscribe(_EMPTY_, "C", nats.BindStream("EVENTS"), nats.ManualAck())
  7101  		require_NoError(t, err)
  7102  
  7103  		for {
  7104  			msgs, err := sub.Fetch(100, nats.MaxWait(2*time.Second))
  7105  			if err != nil && err != nats.ErrTimeout {
  7106  				return
  7107  			}
  7108  			// Shuffle
  7109  			rand.Shuffle(len(msgs), func(i, j int) { msgs[i], msgs[j] = msgs[j], msgs[i] })
  7110  			for _, m := range msgs {
  7111  				m.Ack()
  7112  			}
  7113  		}
  7114  	}(js)
  7115  
  7116  	numPublishers := 25
  7117  	pubThresh := 2 * time.Second
  7118  	var maxExceeded atomic.Int64
  7119  	errCh := make(chan error, numPublishers)
  7120  	wg := sync.WaitGroup{}
  7121  
  7122  	msg := make([]byte, 2*1024) // 2k payload
  7123  	crand.Read(msg)
  7124  
  7125  	// Publishers.
  7126  	for i := 0; i < numPublishers; i++ {
  7127  		wg.Add(1)
  7128  		go func(iter int) {
  7129  			defer wg.Done()
  7130  
  7131  			// Connect to random, the slow ones will be connected to the slow node.
  7132  			// But if you connect them all there it will pass.
  7133  			s := c.randomServer()
  7134  			nc, js := jsClientConnect(t, s)
  7135  			defer nc.Close()
  7136  
  7137  			for i := 0; i < 1_000; i++ {
  7138  				start := time.Now()
  7139  				_, err := js.Publish("EV.PAID", msg)
  7140  				if err != nil {
  7141  					errCh <- fmt.Errorf("Publish error: %v", err)
  7142  					return
  7143  				}
  7144  				if elapsed := time.Since(start); elapsed > pubThresh {
  7145  					errCh <- fmt.Errorf("Publish time exceeded")
  7146  					if int64(elapsed) > maxExceeded.Load() {
  7147  						maxExceeded.Store(int64(elapsed))
  7148  					}
  7149  					return
  7150  				}
  7151  			}
  7152  		}(i)
  7153  	}
  7154  
  7155  	wg.Wait()
  7156  
  7157  	select {
  7158  	case e := <-errCh:
  7159  		t.Fatalf("%v: threshold is %v, maximum seen: %v", e, pubThresh, time.Duration(maxExceeded.Load()))
  7160  	default:
  7161  	}
  7162  }
  7163  
  7164  func TestNoRaceJetStreamInterestStreamCheckInterestRaceBug(t *testing.T) {
  7165  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  7166  	defer c.shutdown()
  7167  
  7168  	nc, js := jsClientConnect(t, c.randomServer())
  7169  	defer nc.Close()
  7170  
  7171  	_, err := js.AddStream(&nats.StreamConfig{
  7172  		Name:      "TEST",
  7173  		Subjects:  []string{"foo"},
  7174  		Replicas:  3,
  7175  		Retention: nats.InterestPolicy,
  7176  	})
  7177  	require_NoError(t, err)
  7178  
  7179  	numConsumers := 10
  7180  	for i := 0; i < numConsumers; i++ {
  7181  		nc, js := jsClientConnect(t, c.randomServer())
  7182  		defer nc.Close()
  7183  
  7184  		_, err = js.Subscribe("foo", func(m *nats.Msg) {
  7185  			m.Ack()
  7186  		}, nats.Durable(fmt.Sprintf("C%d", i)), nats.ManualAck())
  7187  		require_NoError(t, err)
  7188  	}
  7189  
  7190  	numToSend := 10_000
  7191  	for i := 0; i < numToSend; i++ {
  7192  		_, err := js.PublishAsync("foo", nil, nats.StallWait(800*time.Millisecond))
  7193  		require_NoError(t, err)
  7194  	}
  7195  	select {
  7196  	case <-js.PublishAsyncComplete():
  7197  	case <-time.After(20 * time.Second):
  7198  		t.Fatalf("Did not receive completion signal")
  7199  	}
  7200  
  7201  	// Wait til ackfloor is correct for all consumers.
  7202  	checkFor(t, 20*time.Second, 100*time.Millisecond, func() error {
  7203  		for _, s := range c.servers {
  7204  			mset, err := s.GlobalAccount().lookupStream("TEST")
  7205  			require_NoError(t, err)
  7206  
  7207  			mset.mu.RLock()
  7208  			defer mset.mu.RUnlock()
  7209  
  7210  			require_True(t, len(mset.consumers) == numConsumers)
  7211  
  7212  			for _, o := range mset.consumers {
  7213  				state, err := o.store.State()
  7214  				require_NoError(t, err)
  7215  				if state.AckFloor.Stream != uint64(numToSend) {
  7216  					return fmt.Errorf("Ackfloor not correct yet")
  7217  				}
  7218  			}
  7219  		}
  7220  		return nil
  7221  	})
  7222  
  7223  	for _, s := range c.servers {
  7224  		mset, err := s.GlobalAccount().lookupStream("TEST")
  7225  		require_NoError(t, err)
  7226  
  7227  		mset.mu.RLock()
  7228  		defer mset.mu.RUnlock()
  7229  
  7230  		state := mset.state()
  7231  		require_True(t, state.Msgs == 0)
  7232  		require_True(t, state.FirstSeq == uint64(numToSend+1))
  7233  	}
  7234  }
  7235  
  7236  func TestNoRaceJetStreamClusterInterestStreamConsistencyAfterRollingRestart(t *testing.T) {
  7237  	// Uncomment to run. Needs to be on a big machine. Do not want as part of Travis tests atm.
  7238  	skip(t)
  7239  
  7240  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  7241  	defer c.shutdown()
  7242  
  7243  	numStreams := 200
  7244  	numConsumersPer := 5
  7245  	numPublishers := 10
  7246  
  7247  	nc, js := jsClientConnect(t, c.randomServer())
  7248  	defer nc.Close()
  7249  
  7250  	qch := make(chan bool)
  7251  
  7252  	var mm sync.Mutex
  7253  	ackMap := make(map[string]map[uint64][]string)
  7254  
  7255  	addAckTracking := func(seq uint64, stream, consumer string) {
  7256  		mm.Lock()
  7257  		defer mm.Unlock()
  7258  		sam := ackMap[stream]
  7259  		if sam == nil {
  7260  			sam = make(map[uint64][]string)
  7261  			ackMap[stream] = sam
  7262  		}
  7263  		sam[seq] = append(sam[seq], consumer)
  7264  	}
  7265  
  7266  	doPullSubscriber := func(stream, consumer, filter string) {
  7267  		nc, js := jsClientConnect(t, c.randomServer())
  7268  		defer nc.Close()
  7269  
  7270  		var err error
  7271  		var sub *nats.Subscription
  7272  		timeout := time.Now().Add(5 * time.Second)
  7273  		for time.Now().Before(timeout) {
  7274  			sub, err = js.PullSubscribe(filter, consumer, nats.BindStream(stream), nats.ManualAck())
  7275  			if err == nil {
  7276  				break
  7277  			}
  7278  		}
  7279  		if err != nil {
  7280  			t.Logf("Error on pull subscriber: %v", err)
  7281  			return
  7282  		}
  7283  
  7284  		for {
  7285  			select {
  7286  			case <-time.After(500 * time.Millisecond):
  7287  				msgs, err := sub.Fetch(100, nats.MaxWait(time.Second))
  7288  				if err != nil {
  7289  					continue
  7290  				}
  7291  				// Shuffle
  7292  				rand.Shuffle(len(msgs), func(i, j int) { msgs[i], msgs[j] = msgs[j], msgs[i] })
  7293  				for _, m := range msgs {
  7294  					meta, err := m.Metadata()
  7295  					require_NoError(t, err)
  7296  					m.Ack()
  7297  					addAckTracking(meta.Sequence.Stream, stream, consumer)
  7298  					if meta.NumDelivered > 1 {
  7299  						t.Logf("Got a msg redelivered %d for sequence %d on %q %q\n", meta.NumDelivered, meta.Sequence.Stream, stream, consumer)
  7300  					}
  7301  				}
  7302  			case <-qch:
  7303  				nc.Flush()
  7304  				return
  7305  			}
  7306  		}
  7307  	}
  7308  
  7309  	// Setup
  7310  	wg := sync.WaitGroup{}
  7311  	for i := 0; i < numStreams; i++ {
  7312  		wg.Add(1)
  7313  		go func(stream string) {
  7314  			defer wg.Done()
  7315  			subj := fmt.Sprintf("%s.>", stream)
  7316  			_, err := js.AddStream(&nats.StreamConfig{
  7317  				Name:      stream,
  7318  				Subjects:  []string{subj},
  7319  				Replicas:  3,
  7320  				Retention: nats.InterestPolicy,
  7321  			})
  7322  			require_NoError(t, err)
  7323  			for i := 0; i < numConsumersPer; i++ {
  7324  				consumer := fmt.Sprintf("C%d", i)
  7325  				filter := fmt.Sprintf("%s.%d", stream, i)
  7326  				_, err = js.AddConsumer(stream, &nats.ConsumerConfig{
  7327  					Durable:       consumer,
  7328  					FilterSubject: filter,
  7329  					AckPolicy:     nats.AckExplicitPolicy,
  7330  					AckWait:       2 * time.Second,
  7331  				})
  7332  				require_NoError(t, err)
  7333  				c.waitOnConsumerLeader(globalAccountName, stream, consumer)
  7334  				go doPullSubscriber(stream, consumer, filter)
  7335  			}
  7336  		}(fmt.Sprintf("A-%d", i))
  7337  	}
  7338  	wg.Wait()
  7339  
  7340  	msg := make([]byte, 2*1024) // 2k payload
  7341  	crand.Read(msg)
  7342  
  7343  	// Controls if publishing is on or off.
  7344  	var pubActive atomic.Bool
  7345  
  7346  	doPublish := func() {
  7347  		nc, js := jsClientConnect(t, c.randomServer())
  7348  		defer nc.Close()
  7349  
  7350  		for {
  7351  			select {
  7352  			case <-time.After(100 * time.Millisecond):
  7353  				if pubActive.Load() {
  7354  					for i := 0; i < numStreams; i++ {
  7355  						for j := 0; j < numConsumersPer; j++ {
  7356  							subj := fmt.Sprintf("A-%d.%d", i, j)
  7357  							// Don't care about errors here for this test.
  7358  							js.Publish(subj, msg)
  7359  						}
  7360  					}
  7361  				}
  7362  			case <-qch:
  7363  				return
  7364  			}
  7365  		}
  7366  	}
  7367  
  7368  	pubActive.Store(true)
  7369  
  7370  	for i := 0; i < numPublishers; i++ {
  7371  		go doPublish()
  7372  	}
  7373  
  7374  	// Let run for a bit.
  7375  	time.Sleep(20 * time.Second)
  7376  
  7377  	// Do a rolling restart.
  7378  	for _, s := range c.servers {
  7379  		t.Logf("Shutdown %v\n", s)
  7380  		s.Shutdown()
  7381  		s.WaitForShutdown()
  7382  		time.Sleep(20 * time.Second)
  7383  		t.Logf("Restarting %v\n", s)
  7384  		s = c.restartServer(s)
  7385  		c.waitOnServerHealthz(s)
  7386  	}
  7387  
  7388  	// Let run for a bit longer.
  7389  	time.Sleep(10 * time.Second)
  7390  
  7391  	// Stop pubs.
  7392  	pubActive.Store(false)
  7393  
  7394  	// Let settle.
  7395  	time.Sleep(10 * time.Second)
  7396  	close(qch)
  7397  	time.Sleep(20 * time.Second)
  7398  
  7399  	nc, js = jsClientConnect(t, c.randomServer())
  7400  	defer nc.Close()
  7401  
  7402  	minAckFloor := func(stream string) (uint64, string) {
  7403  		var maf uint64 = math.MaxUint64
  7404  		var consumer string
  7405  		for i := 0; i < numConsumersPer; i++ {
  7406  			cname := fmt.Sprintf("C%d", i)
  7407  			ci, err := js.ConsumerInfo(stream, cname)
  7408  			require_NoError(t, err)
  7409  			if ci.AckFloor.Stream < maf {
  7410  				maf = ci.AckFloor.Stream
  7411  				consumer = cname
  7412  			}
  7413  		}
  7414  		return maf, consumer
  7415  	}
  7416  
  7417  	checkStreamAcks := func(stream string) {
  7418  		mm.Lock()
  7419  		defer mm.Unlock()
  7420  		if sam := ackMap[stream]; sam != nil {
  7421  			for seq := 1; ; seq++ {
  7422  				acks := sam[uint64(seq)]
  7423  				if acks == nil {
  7424  					if sam[uint64(seq+1)] != nil {
  7425  						t.Logf("Missing an ack on stream %q for sequence %d\n", stream, seq)
  7426  					} else {
  7427  						break
  7428  					}
  7429  				}
  7430  				if len(acks) > 1 {
  7431  					t.Logf("Multiple acks for %d which is not expected: %+v", seq, acks)
  7432  				}
  7433  			}
  7434  		}
  7435  	}
  7436  
  7437  	// Now check all streams such that their first sequence is equal to the minimum of all consumers.
  7438  	for i := 0; i < numStreams; i++ {
  7439  		stream := fmt.Sprintf("A-%d", i)
  7440  		si, err := js.StreamInfo(stream)
  7441  		require_NoError(t, err)
  7442  
  7443  		if maf, consumer := minAckFloor(stream); maf > si.State.FirstSeq {
  7444  			t.Logf("\nBAD STATE DETECTED FOR %q, CHECKING OTHER SERVERS! ACK %d vs %+v LEADER %v, CL FOR %q %v\n",
  7445  				stream, maf, si.State, c.streamLeader(globalAccountName, stream), consumer, c.consumerLeader(globalAccountName, stream, consumer))
  7446  
  7447  			t.Logf("TEST ACKS %+v\n", ackMap)
  7448  
  7449  			checkStreamAcks(stream)
  7450  
  7451  			for _, s := range c.servers {
  7452  				mset, err := s.GlobalAccount().lookupStream(stream)
  7453  				require_NoError(t, err)
  7454  				state := mset.state()
  7455  				t.Logf("Server %v Stream STATE %+v\n", s, state)
  7456  
  7457  				var smv StoreMsg
  7458  				if sm, err := mset.store.LoadMsg(state.FirstSeq, &smv); err == nil {
  7459  					t.Logf("Subject for msg %d is %q", state.FirstSeq, sm.subj)
  7460  				} else {
  7461  					t.Logf("Could not retrieve msg for %d: %v", state.FirstSeq, err)
  7462  				}
  7463  
  7464  				if len(mset.preAcks) > 0 {
  7465  					t.Logf("%v preAcks %+v\n", s, mset.preAcks)
  7466  				}
  7467  
  7468  				for _, o := range mset.consumers {
  7469  					ostate, err := o.store.State()
  7470  					require_NoError(t, err)
  7471  					t.Logf("Consumer STATE for %q is %+v\n", o.name, ostate)
  7472  				}
  7473  			}
  7474  			t.Fatalf("BAD STATE: ACKFLOOR > FIRST %d vs %d\n", maf, si.State.FirstSeq)
  7475  		}
  7476  	}
  7477  }
  7478  
  7479  func TestNoRaceFileStoreNumPending(t *testing.T) {
  7480  	// No need for all permutations here.
  7481  	storeDir := t.TempDir()
  7482  	fcfg := FileStoreConfig{
  7483  		StoreDir:  storeDir,
  7484  		BlockSize: 2 * 1024, // Create many blocks on purpose.
  7485  	}
  7486  	fs, err := newFileStore(fcfg, StreamConfig{Name: "zzz", Subjects: []string{"*.*.*.*"}, Storage: FileStorage})
  7487  	require_NoError(t, err)
  7488  	defer fs.Stop()
  7489  
  7490  	tokens := []string{"foo", "bar", "baz"}
  7491  	genSubj := func() string {
  7492  		return fmt.Sprintf("%s.%s.%s.%s",
  7493  			tokens[rand.Intn(len(tokens))],
  7494  			tokens[rand.Intn(len(tokens))],
  7495  			tokens[rand.Intn(len(tokens))],
  7496  			tokens[rand.Intn(len(tokens))],
  7497  		)
  7498  	}
  7499  
  7500  	for i := 0; i < 50_000; i++ {
  7501  		subj := genSubj()
  7502  		_, _, err := fs.StoreMsg(subj, nil, []byte("Hello World"))
  7503  		require_NoError(t, err)
  7504  	}
  7505  
  7506  	state := fs.State()
  7507  
  7508  	// Scan one by one for sanity check against other calculations.
  7509  	sanityCheck := func(sseq uint64, filter string) SimpleState {
  7510  		t.Helper()
  7511  		var ss SimpleState
  7512  		var smv StoreMsg
  7513  		// For here we know 0 is invalid, set to 1.
  7514  		if sseq == 0 {
  7515  			sseq = 1
  7516  		}
  7517  		for seq := sseq; seq <= state.LastSeq; seq++ {
  7518  			sm, err := fs.LoadMsg(seq, &smv)
  7519  			if err != nil {
  7520  				t.Logf("Encountered error %v loading sequence: %d", err, seq)
  7521  				continue
  7522  			}
  7523  			if subjectIsSubsetMatch(sm.subj, filter) {
  7524  				ss.Msgs++
  7525  				ss.Last = seq
  7526  				if ss.First == 0 || seq < ss.First {
  7527  					ss.First = seq
  7528  				}
  7529  			}
  7530  		}
  7531  		return ss
  7532  	}
  7533  
  7534  	check := func(sseq uint64, filter string) {
  7535  		t.Helper()
  7536  		np, lvs := fs.NumPending(sseq, filter, false)
  7537  		ss := fs.FilteredState(sseq, filter)
  7538  		sss := sanityCheck(sseq, filter)
  7539  		if lvs != state.LastSeq {
  7540  			t.Fatalf("Expected NumPending to return valid through last of %d but got %d", state.LastSeq, lvs)
  7541  		}
  7542  		if ss.Msgs != np {
  7543  			t.Fatalf("NumPending of %d did not match ss.Msgs of %d", np, ss.Msgs)
  7544  		}
  7545  		if ss != sss {
  7546  			t.Fatalf("Failed sanity check, expected %+v got %+v", sss, ss)
  7547  		}
  7548  	}
  7549  
  7550  	sanityCheckLastOnly := func(sseq uint64, filter string) SimpleState {
  7551  		t.Helper()
  7552  		var ss SimpleState
  7553  		var smv StoreMsg
  7554  		// For here we know 0 is invalid, set to 1.
  7555  		if sseq == 0 {
  7556  			sseq = 1
  7557  		}
  7558  		seen := make(map[string]bool)
  7559  		for seq := state.LastSeq; seq >= sseq; seq-- {
  7560  			sm, err := fs.LoadMsg(seq, &smv)
  7561  			if err != nil {
  7562  				t.Logf("Encountered error %v loading sequence: %d", err, seq)
  7563  				continue
  7564  			}
  7565  			if !seen[sm.subj] && subjectIsSubsetMatch(sm.subj, filter) {
  7566  				ss.Msgs++
  7567  				if ss.Last == 0 {
  7568  					ss.Last = seq
  7569  				}
  7570  				if ss.First == 0 || seq < ss.First {
  7571  					ss.First = seq
  7572  				}
  7573  				seen[sm.subj] = true
  7574  			}
  7575  		}
  7576  		return ss
  7577  	}
  7578  
  7579  	checkLastOnly := func(sseq uint64, filter string) {
  7580  		t.Helper()
  7581  		np, lvs := fs.NumPending(sseq, filter, true)
  7582  		ss := sanityCheckLastOnly(sseq, filter)
  7583  		if lvs != state.LastSeq {
  7584  			t.Fatalf("Expected NumPending to return valid through last of %d but got %d", state.LastSeq, lvs)
  7585  		}
  7586  		if ss.Msgs != np {
  7587  			t.Fatalf("NumPending of %d did not match ss.Msgs of %d", np, ss.Msgs)
  7588  		}
  7589  	}
  7590  
  7591  	startSeqs := []uint64{0, 1, 2, 200, 444, 555, 2222, 8888, 12_345, 28_222, 33_456, 44_400, 49_999}
  7592  	checkSubs := []string{"foo.>", "*.bar.>", "foo.bar.*.baz", "*.bar.>", "*.foo.bar.*", "foo.foo.bar.baz"}
  7593  
  7594  	for _, filter := range checkSubs {
  7595  		for _, start := range startSeqs {
  7596  			check(start, filter)
  7597  			checkLastOnly(start, filter)
  7598  		}
  7599  	}
  7600  }
  7601  
  7602  func TestNoRaceJetStreamClusterUnbalancedInterestMultipleConsumers(t *testing.T) {
  7603  	c, np := createStretchUnbalancedCluster(t)
  7604  	defer c.shutdown()
  7605  	defer np.stop()
  7606  
  7607  	nc, js := jsClientConnect(t, c.randomServer())
  7608  	defer nc.Close()
  7609  
  7610  	// Now create the stream.
  7611  	_, err := js.AddStream(&nats.StreamConfig{
  7612  		Name:      "EVENTS",
  7613  		Subjects:  []string{"EV.>"},
  7614  		Replicas:  3,
  7615  		Retention: nats.InterestPolicy,
  7616  	})
  7617  	require_NoError(t, err)
  7618  
  7619  	// Make sure it's leader is on S2.
  7620  	sl := c.servers[1]
  7621  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7622  		c.waitOnStreamLeader(globalAccountName, "EVENTS")
  7623  		if s := c.streamLeader(globalAccountName, "EVENTS"); s != sl {
  7624  			s.JetStreamStepdownStream(globalAccountName, "EVENTS")
  7625  			return fmt.Errorf("Server %s is not stream leader yet", sl)
  7626  		}
  7627  		return nil
  7628  	})
  7629  
  7630  	// Create a fast ack consumer.
  7631  	_, err = js.Subscribe("EV.NEW", func(m *nats.Msg) {
  7632  		m.Ack()
  7633  	}, nats.Durable("C"), nats.ManualAck())
  7634  	require_NoError(t, err)
  7635  
  7636  	// Make sure the consumer leader is on S3.
  7637  	cl := c.servers[2]
  7638  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7639  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "C")
  7640  		if s := c.consumerLeader(globalAccountName, "EVENTS", "C"); s != cl {
  7641  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "C")
  7642  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7643  		}
  7644  		return nil
  7645  	})
  7646  
  7647  	// Connect a client directly to the stream leader.
  7648  	nc, js = jsClientConnect(t, sl)
  7649  	defer nc.Close()
  7650  
  7651  	// Now create a pull subscriber.
  7652  	sub, err := js.PullSubscribe("EV.NEW", "D", nats.ManualAck())
  7653  	require_NoError(t, err)
  7654  
  7655  	// Make sure this consumer leader is on S1.
  7656  	cl = c.servers[0]
  7657  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7658  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "D")
  7659  		if s := c.consumerLeader(globalAccountName, "EVENTS", "D"); s != cl {
  7660  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "D")
  7661  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7662  		}
  7663  		return nil
  7664  	})
  7665  
  7666  	numToSend := 1000
  7667  	for i := 0; i < numToSend; i++ {
  7668  		_, err := js.PublishAsync("EV.NEW", nil)
  7669  		require_NoError(t, err)
  7670  	}
  7671  	select {
  7672  	case <-js.PublishAsyncComplete():
  7673  	case <-time.After(20 * time.Second):
  7674  		t.Fatalf("Did not receive completion signal")
  7675  	}
  7676  
  7677  	// Now make sure we can pull messages since we have not acked.
  7678  	// The bug is that the acks arrive on S1 faster then the messages but we want to
  7679  	// make sure we do not remove prematurely.
  7680  	msgs, err := sub.Fetch(100, nats.MaxWait(time.Second))
  7681  	require_NoError(t, err)
  7682  	require_True(t, len(msgs) == 100)
  7683  	for _, m := range msgs {
  7684  		m.AckSync()
  7685  	}
  7686  
  7687  	ci, err := js.ConsumerInfo("EVENTS", "D")
  7688  	require_NoError(t, err)
  7689  	require_True(t, ci.NumPending == uint64(numToSend-100))
  7690  	require_True(t, ci.NumAckPending == 0)
  7691  	require_True(t, ci.Delivered.Stream == 100)
  7692  	require_True(t, ci.AckFloor.Stream == 100)
  7693  
  7694  	// Check stream state on all servers.
  7695  	for _, s := range c.servers {
  7696  		mset, err := s.GlobalAccount().lookupStream("EVENTS")
  7697  		require_NoError(t, err)
  7698  		state := mset.state()
  7699  		require_True(t, state.Msgs == 900)
  7700  		require_True(t, state.FirstSeq == 101)
  7701  		require_True(t, state.LastSeq == 1000)
  7702  		require_True(t, state.Consumers == 2)
  7703  	}
  7704  
  7705  	msgs, err = sub.Fetch(900, nats.MaxWait(time.Second))
  7706  	require_NoError(t, err)
  7707  	require_True(t, len(msgs) == 900)
  7708  	for _, m := range msgs {
  7709  		m.AckSync()
  7710  	}
  7711  
  7712  	// Let acks propagate.
  7713  	time.Sleep(250 * time.Millisecond)
  7714  
  7715  	// Check final stream state on all servers.
  7716  	for _, s := range c.servers {
  7717  		mset, err := s.GlobalAccount().lookupStream("EVENTS")
  7718  		require_NoError(t, err)
  7719  		state := mset.state()
  7720  		require_True(t, state.Msgs == 0)
  7721  		require_True(t, state.FirstSeq == 1001)
  7722  		require_True(t, state.LastSeq == 1000)
  7723  		require_True(t, state.Consumers == 2)
  7724  		// Now check preAcks
  7725  		mset.mu.RLock()
  7726  		numPreAcks := len(mset.preAcks)
  7727  		mset.mu.RUnlock()
  7728  		require_True(t, numPreAcks == 0)
  7729  	}
  7730  }
  7731  
  7732  func TestNoRaceJetStreamClusterUnbalancedInterestMultipleFilteredConsumers(t *testing.T) {
  7733  	c, np := createStretchUnbalancedCluster(t)
  7734  	defer c.shutdown()
  7735  	defer np.stop()
  7736  
  7737  	nc, js := jsClientConnect(t, c.randomServer())
  7738  	defer nc.Close()
  7739  
  7740  	// Now create the stream.
  7741  	_, err := js.AddStream(&nats.StreamConfig{
  7742  		Name:      "EVENTS",
  7743  		Subjects:  []string{"EV.>"},
  7744  		Replicas:  3,
  7745  		Retention: nats.InterestPolicy,
  7746  	})
  7747  	require_NoError(t, err)
  7748  
  7749  	// Make sure it's leader is on S2.
  7750  	sl := c.servers[1]
  7751  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7752  		c.waitOnStreamLeader(globalAccountName, "EVENTS")
  7753  		if s := c.streamLeader(globalAccountName, "EVENTS"); s != sl {
  7754  			s.JetStreamStepdownStream(globalAccountName, "EVENTS")
  7755  			return fmt.Errorf("Server %s is not stream leader yet", sl)
  7756  		}
  7757  		return nil
  7758  	})
  7759  
  7760  	// Create a fast ack consumer.
  7761  	_, err = js.Subscribe("EV.NEW", func(m *nats.Msg) {
  7762  		m.Ack()
  7763  	}, nats.Durable("C"), nats.ManualAck())
  7764  	require_NoError(t, err)
  7765  
  7766  	// Make sure the consumer leader is on S3.
  7767  	cl := c.servers[2]
  7768  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7769  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "C")
  7770  		if s := c.consumerLeader(globalAccountName, "EVENTS", "C"); s != cl {
  7771  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "C")
  7772  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7773  		}
  7774  		return nil
  7775  	})
  7776  
  7777  	// Connect a client directly to the stream leader.
  7778  	nc, js = jsClientConnect(t, sl)
  7779  	defer nc.Close()
  7780  
  7781  	// Now create another fast ack consumer.
  7782  	_, err = js.Subscribe("EV.UPDATED", func(m *nats.Msg) {
  7783  		m.Ack()
  7784  	}, nats.Durable("D"), nats.ManualAck())
  7785  	require_NoError(t, err)
  7786  
  7787  	// Make sure this consumer leader is on S1.
  7788  	cl = c.servers[0]
  7789  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  7790  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "D")
  7791  		if s := c.consumerLeader(globalAccountName, "EVENTS", "D"); s != cl {
  7792  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "D")
  7793  			return fmt.Errorf("Server %s is not consumer leader yet", cl)
  7794  		}
  7795  		return nil
  7796  	})
  7797  
  7798  	numToSend := 500
  7799  	for i := 0; i < numToSend; i++ {
  7800  		_, err := js.PublishAsync("EV.NEW", nil)
  7801  		require_NoError(t, err)
  7802  		_, err = js.PublishAsync("EV.UPDATED", nil)
  7803  		require_NoError(t, err)
  7804  	}
  7805  	select {
  7806  	case <-js.PublishAsyncComplete():
  7807  	case <-time.After(20 * time.Second):
  7808  		t.Fatalf("Did not receive completion signal")
  7809  	}
  7810  
  7811  	// Let acks propagate.
  7812  	time.Sleep(250 * time.Millisecond)
  7813  
  7814  	ci, err := js.ConsumerInfo("EVENTS", "D")
  7815  	require_NoError(t, err)
  7816  	require_True(t, ci.NumPending == 0)
  7817  	require_True(t, ci.NumAckPending == 0)
  7818  	require_True(t, ci.Delivered.Consumer == 500)
  7819  	require_True(t, ci.Delivered.Stream == 1000)
  7820  	require_True(t, ci.AckFloor.Consumer == 500)
  7821  	require_True(t, ci.AckFloor.Stream == 1000)
  7822  
  7823  	// Check final stream state on all servers.
  7824  	for _, s := range c.servers {
  7825  		mset, err := s.GlobalAccount().lookupStream("EVENTS")
  7826  		require_NoError(t, err)
  7827  		state := mset.state()
  7828  		require_True(t, state.Msgs == 0)
  7829  		require_True(t, state.FirstSeq == 1001)
  7830  		require_True(t, state.LastSeq == 1000)
  7831  		require_True(t, state.Consumers == 2)
  7832  		// Now check preAcks
  7833  		mset.mu.RLock()
  7834  		numPreAcks := len(mset.preAcks)
  7835  		mset.mu.RUnlock()
  7836  		require_True(t, numPreAcks == 0)
  7837  	}
  7838  }
  7839  
  7840  func TestNoRaceParallelStreamAndConsumerCreation(t *testing.T) {
  7841  	s := RunBasicJetStreamServer(t)
  7842  	defer s.Shutdown()
  7843  
  7844  	// stream config.
  7845  	scfg := &StreamConfig{
  7846  		Name:     "TEST",
  7847  		Subjects: []string{"foo", "bar"},
  7848  		MaxMsgs:  10,
  7849  		Storage:  FileStorage,
  7850  		Replicas: 1,
  7851  	}
  7852  
  7853  	// Will do these direct against the low level API to really make
  7854  	// sure parallel creation ok.
  7855  	np := 1000
  7856  	startCh := make(chan bool)
  7857  	errCh := make(chan error, np)
  7858  	wg := sync.WaitGroup{}
  7859  	wg.Add(np)
  7860  
  7861  	var streams sync.Map
  7862  
  7863  	for i := 0; i < np; i++ {
  7864  		go func() {
  7865  			defer wg.Done()
  7866  
  7867  			// Make them all fire at once.
  7868  			<-startCh
  7869  
  7870  			if mset, err := s.GlobalAccount().addStream(scfg); err != nil {
  7871  				t.Logf("Stream create got an error: %v", err)
  7872  				errCh <- err
  7873  			} else {
  7874  				streams.Store(mset, true)
  7875  			}
  7876  		}()
  7877  	}
  7878  	time.Sleep(100 * time.Millisecond)
  7879  	close(startCh)
  7880  	wg.Wait()
  7881  
  7882  	// Check for no errors.
  7883  	if len(errCh) > 0 {
  7884  		t.Fatalf("Expected no errors, got %d", len(errCh))
  7885  	}
  7886  
  7887  	// Now make sure we really only created one stream.
  7888  	var numStreams int
  7889  	streams.Range(func(k, v any) bool {
  7890  		numStreams++
  7891  		return true
  7892  	})
  7893  	if numStreams > 1 {
  7894  		t.Fatalf("Expected only one stream to be really created, got %d out of %d attempts", numStreams, np)
  7895  	}
  7896  
  7897  	// Also make sure we cleanup the inflight entries for streams.
  7898  	gacc := s.GlobalAccount()
  7899  	_, jsa, err := gacc.checkForJetStream()
  7900  	require_NoError(t, err)
  7901  	var numEntries int
  7902  	jsa.inflight.Range(func(k, v any) bool {
  7903  		numEntries++
  7904  		return true
  7905  	})
  7906  	if numEntries > 0 {
  7907  		t.Fatalf("Expected no inflight entries to be left over, got %d", numEntries)
  7908  	}
  7909  
  7910  	// Now do consumers.
  7911  	mset, err := gacc.lookupStream("TEST")
  7912  	require_NoError(t, err)
  7913  
  7914  	cfg := &ConsumerConfig{
  7915  		DeliverSubject: "to",
  7916  		Name:           "DLC",
  7917  		AckPolicy:      AckExplicit,
  7918  	}
  7919  
  7920  	startCh = make(chan bool)
  7921  	errCh = make(chan error, np)
  7922  	wg.Add(np)
  7923  
  7924  	var consumers sync.Map
  7925  
  7926  	for i := 0; i < np; i++ {
  7927  		go func() {
  7928  			defer wg.Done()
  7929  
  7930  			// Make them all fire at once.
  7931  			<-startCh
  7932  
  7933  			if _, err = mset.addConsumer(cfg); err != nil {
  7934  				t.Logf("Consumer create got an error: %v", err)
  7935  				errCh <- err
  7936  			} else {
  7937  				consumers.Store(mset, true)
  7938  			}
  7939  		}()
  7940  	}
  7941  	time.Sleep(100 * time.Millisecond)
  7942  	close(startCh)
  7943  	wg.Wait()
  7944  
  7945  	// Check for no errors.
  7946  	if len(errCh) > 0 {
  7947  		t.Fatalf("Expected no errors, got %d", len(errCh))
  7948  	}
  7949  
  7950  	// Now make sure we really only created one stream.
  7951  	var numConsumers int
  7952  	consumers.Range(func(k, v any) bool {
  7953  		numConsumers++
  7954  		return true
  7955  	})
  7956  	if numConsumers > 1 {
  7957  		t.Fatalf("Expected only one consumer to be really created, got %d out of %d attempts", numConsumers, np)
  7958  	}
  7959  }
  7960  
  7961  func TestNoRaceRoutePool(t *testing.T) {
  7962  	var dur1 time.Duration
  7963  	var dur2 time.Duration
  7964  
  7965  	total := 1_000_000
  7966  
  7967  	for _, test := range []struct {
  7968  		name     string
  7969  		poolSize int
  7970  	}{
  7971  		{"no pooling", 0},
  7972  		{"pooling", 5},
  7973  	} {
  7974  		t.Run(test.name, func(t *testing.T) {
  7975  			tmpl := `
  7976  			port: -1
  7977  			accounts {
  7978  				A { users: [{user: "A", password: "A"}] }
  7979  				B { users: [{user: "B", password: "B"}] }
  7980  				C { users: [{user: "C", password: "C"}] }
  7981  				D { users: [{user: "D", password: "D"}] }
  7982  				E { users: [{user: "E", password: "E"}] }
  7983  			}
  7984  			cluster {
  7985  				port: -1
  7986  				name: "local"
  7987  				%s
  7988  				pool_size: %d
  7989  			}
  7990  		`
  7991  			conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, _EMPTY_, test.poolSize)))
  7992  			s1, o1 := RunServerWithConfig(conf1)
  7993  			defer s1.Shutdown()
  7994  
  7995  			conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl,
  7996  				fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port),
  7997  				test.poolSize)))
  7998  			s2, _ := RunServerWithConfig(conf2)
  7999  			defer s2.Shutdown()
  8000  
  8001  			checkClusterFormed(t, s1, s2)
  8002  
  8003  			wg := sync.WaitGroup{}
  8004  			wg.Add(5)
  8005  
  8006  			sendAndRecv := func(acc string) (*nats.Conn, *nats.Conn) {
  8007  				t.Helper()
  8008  
  8009  				s2nc := natsConnect(t, s2.ClientURL(), nats.UserInfo(acc, acc))
  8010  				count := 0
  8011  				natsSub(t, s2nc, "foo", func(_ *nats.Msg) {
  8012  					if count++; count == total {
  8013  						wg.Done()
  8014  					}
  8015  				})
  8016  				natsFlush(t, s2nc)
  8017  
  8018  				s1nc := natsConnect(t, s1.ClientURL(), nats.UserInfo(acc, acc))
  8019  
  8020  				checkSubInterest(t, s1, acc, "foo", time.Second)
  8021  				return s2nc, s1nc
  8022  			}
  8023  
  8024  			var rcv = [5]*nats.Conn{}
  8025  			var snd = [5]*nats.Conn{}
  8026  			accs := []string{"A", "B", "C", "D", "E"}
  8027  
  8028  			for i := 0; i < 5; i++ {
  8029  				rcv[i], snd[i] = sendAndRecv(accs[i])
  8030  				defer rcv[i].Close()
  8031  				defer snd[i].Close()
  8032  			}
  8033  
  8034  			payload := []byte("some message")
  8035  			start := time.Now()
  8036  			for i := 0; i < 5; i++ {
  8037  				go func(idx int) {
  8038  					for i := 0; i < total; i++ {
  8039  						snd[idx].Publish("foo", payload)
  8040  					}
  8041  				}(i)
  8042  			}
  8043  
  8044  			wg.Wait()
  8045  			dur := time.Since(start)
  8046  			if test.poolSize == 0 {
  8047  				dur1 = dur
  8048  			} else {
  8049  				dur2 = dur
  8050  			}
  8051  		})
  8052  	}
  8053  	perf1 := float64(total*5) / dur1.Seconds()
  8054  	t.Logf("No pooling: %.0f msgs/sec", perf1)
  8055  	perf2 := float64(total*5) / dur2.Seconds()
  8056  	t.Logf("Pooling   : %.0f msgs/sec", perf2)
  8057  	t.Logf("Gain      : %.2fx", perf2/perf1)
  8058  }
  8059  
  8060  func testNoRaceRoutePerAccount(t *testing.T, useWildCard bool) {
  8061  	var dur1 time.Duration
  8062  	var dur2 time.Duration
  8063  
  8064  	accounts := make([]string, 5)
  8065  	for i := 0; i < 5; i++ {
  8066  		akp, _ := nkeys.CreateAccount()
  8067  		pub, _ := akp.PublicKey()
  8068  		accounts[i] = pub
  8069  	}
  8070  	routeAccs := fmt.Sprintf("accounts: [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"]",
  8071  		accounts[0], accounts[1], accounts[2], accounts[3], accounts[4])
  8072  
  8073  	total := 1_000_000
  8074  
  8075  	for _, test := range []struct {
  8076  		name      string
  8077  		dedicated bool
  8078  	}{
  8079  		{"route for all accounts", false},
  8080  		{"route per account", true},
  8081  	} {
  8082  		t.Run(test.name, func(t *testing.T) {
  8083  			tmpl := `
  8084  			server_name: "%s"
  8085  			port: -1
  8086  			accounts {
  8087  				%s { users: [{user: "0", password: "0"}] }
  8088  				%s { users: [{user: "1", password: "1"}] }
  8089  				%s { users: [{user: "2", password: "2"}] }
  8090  				%s { users: [{user: "3", password: "3"}] }
  8091  				%s { users: [{user: "4", password: "4"}] }
  8092  			}
  8093  			cluster {
  8094  				port: -1
  8095  				name: "local"
  8096  				%s
  8097  				%s
  8098  			}
  8099  		`
  8100  			var racc string
  8101  			if test.dedicated {
  8102  				racc = routeAccs
  8103  			} else {
  8104  				racc = _EMPTY_
  8105  			}
  8106  			conf1 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "A",
  8107  				accounts[0], accounts[1], accounts[2], accounts[3],
  8108  				accounts[4], _EMPTY_, racc)))
  8109  			s1, o1 := RunServerWithConfig(conf1)
  8110  			defer s1.Shutdown()
  8111  
  8112  			conf2 := createConfFile(t, []byte(fmt.Sprintf(tmpl, "B",
  8113  				accounts[0], accounts[1], accounts[2], accounts[3], accounts[4],
  8114  				fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", o1.Cluster.Port),
  8115  				racc)))
  8116  			s2, _ := RunServerWithConfig(conf2)
  8117  			defer s2.Shutdown()
  8118  
  8119  			checkClusterFormed(t, s1, s2)
  8120  
  8121  			wg := sync.WaitGroup{}
  8122  			wg.Add(5)
  8123  
  8124  			sendAndRecv := func(acc string, user string) (*nats.Conn, *nats.Conn) {
  8125  				t.Helper()
  8126  
  8127  				s2nc := natsConnect(t, s2.ClientURL(), nats.UserInfo(user, user))
  8128  				count := 0
  8129  				var subj string
  8130  				var checkSubj string
  8131  				if useWildCard {
  8132  					subj, checkSubj = "foo.*", "foo.0"
  8133  				} else {
  8134  					subj, checkSubj = "foo", "foo"
  8135  				}
  8136  				natsSub(t, s2nc, subj, func(_ *nats.Msg) {
  8137  					if count++; count == total {
  8138  						wg.Done()
  8139  					}
  8140  				})
  8141  				natsFlush(t, s2nc)
  8142  
  8143  				s1nc := natsConnect(t, s1.ClientURL(), nats.UserInfo(user, user))
  8144  
  8145  				checkSubInterest(t, s1, acc, checkSubj, time.Second)
  8146  				return s2nc, s1nc
  8147  			}
  8148  
  8149  			var rcv = [5]*nats.Conn{}
  8150  			var snd = [5]*nats.Conn{}
  8151  			users := []string{"0", "1", "2", "3", "4"}
  8152  
  8153  			for i := 0; i < 5; i++ {
  8154  				rcv[i], snd[i] = sendAndRecv(accounts[i], users[i])
  8155  				defer rcv[i].Close()
  8156  				defer snd[i].Close()
  8157  			}
  8158  
  8159  			payload := []byte("some message")
  8160  			start := time.Now()
  8161  			for i := 0; i < 5; i++ {
  8162  				go func(idx int) {
  8163  					for i := 0; i < total; i++ {
  8164  						var subj string
  8165  						if useWildCard {
  8166  							subj = fmt.Sprintf("foo.%d", i)
  8167  						} else {
  8168  							subj = "foo"
  8169  						}
  8170  						snd[idx].Publish(subj, payload)
  8171  					}
  8172  				}(i)
  8173  			}
  8174  
  8175  			wg.Wait()
  8176  			dur := time.Since(start)
  8177  			if !test.dedicated {
  8178  				dur1 = dur
  8179  			} else {
  8180  				dur2 = dur
  8181  			}
  8182  		})
  8183  	}
  8184  	perf1 := float64(total*5) / dur1.Seconds()
  8185  	t.Logf("Route for all accounts: %.0f msgs/sec", perf1)
  8186  	perf2 := float64(total*5) / dur2.Seconds()
  8187  	t.Logf("Route per account     : %.0f msgs/sec", perf2)
  8188  	t.Logf("Gain                  : %.2fx", perf2/perf1)
  8189  }
  8190  
  8191  func TestNoRaceRoutePerAccount(t *testing.T) {
  8192  	testNoRaceRoutePerAccount(t, false)
  8193  }
  8194  
  8195  func TestNoRaceRoutePerAccountSubWithWildcard(t *testing.T) {
  8196  	testNoRaceRoutePerAccount(t, true)
  8197  }
  8198  
  8199  // This test, which checks that messages are not duplicated when pooling or
  8200  // per-account routes are reloaded, would cause a DATA RACE that is not
  8201  // specific to the changes for pooling/per_account. For this reason, this
  8202  // test is located in the norace_test.go file.
  8203  func TestNoRaceRoutePoolAndPerAccountConfigReload(t *testing.T) {
  8204  	for _, test := range []struct {
  8205  		name           string
  8206  		poolSizeBefore string
  8207  		poolSizeAfter  string
  8208  		accountsBefore string
  8209  		accountsAfter  string
  8210  	}{
  8211  		{"from no pool to pool", _EMPTY_, "pool_size: 2", _EMPTY_, _EMPTY_},
  8212  		{"increase pool size", "pool_size: 2", "pool_size: 5", _EMPTY_, _EMPTY_},
  8213  		{"decrease pool size", "pool_size: 5", "pool_size: 2", _EMPTY_, _EMPTY_},
  8214  		{"from pool to no pool", "pool_size: 5", _EMPTY_, _EMPTY_, _EMPTY_},
  8215  		{"from no account to account", _EMPTY_, _EMPTY_, _EMPTY_, "accounts: [\"A\"]"},
  8216  		{"add account", _EMPTY_, _EMPTY_, "accounts: [\"B\"]", "accounts: [\"A\",\"B\"]"},
  8217  		{"remove account", _EMPTY_, _EMPTY_, "accounts: [\"A\",\"B\"]", "accounts: [\"B\"]"},
  8218  		{"from account to no account", _EMPTY_, _EMPTY_, "accounts: [\"A\"]", _EMPTY_},
  8219  		{"increase pool size and add account", "pool_size: 2", "pool_size: 3", "accounts: [\"B\"]", "accounts: [\"B\",\"A\"]"},
  8220  		{"decrease pool size and remove account", "pool_size: 3", "pool_size: 2", "accounts: [\"A\",\"B\"]", "accounts: [\"B\"]"},
  8221  	} {
  8222  		t.Run(test.name, func(t *testing.T) {
  8223  			tmplA := `
  8224  				port: -1
  8225  				server_name: "A"
  8226  				accounts {
  8227  					A { users: [{user: a, password: pwd}] }
  8228  					B { users: [{user: b, password: pwd}] }
  8229  				}
  8230  				cluster: {
  8231  					port: -1
  8232  					name: "local"
  8233  					%s
  8234  					%s
  8235  				}
  8236  			`
  8237  			confA := createConfFile(t, []byte(fmt.Sprintf(tmplA, test.poolSizeBefore, test.accountsBefore)))
  8238  			srva, optsA := RunServerWithConfig(confA)
  8239  			defer srva.Shutdown()
  8240  
  8241  			tmplB := `
  8242  				port: -1
  8243  				server_name: "B"
  8244  				accounts {
  8245  					A { users: [{user: a, password: pwd}] }
  8246  					B { users: [{user: b, password: pwd}] }
  8247  				}
  8248  				cluster: {
  8249  					port: -1
  8250  					name: "local"
  8251  					%s
  8252  					%s
  8253  					routes: ["nats://127.0.0.1:%d"]
  8254  				}
  8255  			`
  8256  			confB := createConfFile(t, []byte(fmt.Sprintf(tmplB, test.poolSizeBefore, test.accountsBefore, optsA.Cluster.Port)))
  8257  			srvb, _ := RunServerWithConfig(confB)
  8258  			defer srvb.Shutdown()
  8259  
  8260  			checkClusterFormed(t, srva, srvb)
  8261  
  8262  			ncA := natsConnect(t, srva.ClientURL(), nats.UserInfo("a", "pwd"))
  8263  			defer ncA.Close()
  8264  
  8265  			sub := natsSubSync(t, ncA, "foo")
  8266  			sub.SetPendingLimits(-1, -1)
  8267  			checkSubInterest(t, srvb, "A", "foo", time.Second)
  8268  
  8269  			ncB := natsConnect(t, srvb.ClientURL(), nats.UserInfo("a", "pwd"))
  8270  			defer ncB.Close()
  8271  
  8272  			wg := sync.WaitGroup{}
  8273  			wg.Add(1)
  8274  			ch := make(chan struct{})
  8275  			go func() {
  8276  				defer wg.Done()
  8277  
  8278  				for i := 0; ; i++ {
  8279  					ncB.Publish("foo", []byte(fmt.Sprintf("%d", i)))
  8280  					select {
  8281  					case <-ch:
  8282  						return
  8283  					default:
  8284  					}
  8285  					if i%300 == 0 {
  8286  						time.Sleep(time.Duration(rand.Intn(5)) * time.Millisecond)
  8287  					}
  8288  				}
  8289  			}()
  8290  
  8291  			var l *captureErrorLogger
  8292  			if test.accountsBefore != _EMPTY_ && test.accountsAfter == _EMPTY_ {
  8293  				l = &captureErrorLogger{errCh: make(chan string, 100)}
  8294  				srva.SetLogger(l, false, false)
  8295  			}
  8296  
  8297  			time.Sleep(250 * time.Millisecond)
  8298  			reloadUpdateConfig(t, srva, confA, fmt.Sprintf(tmplA, test.poolSizeAfter, test.accountsAfter))
  8299  			time.Sleep(125 * time.Millisecond)
  8300  			reloadUpdateConfig(t, srvb, confB, fmt.Sprintf(tmplB, test.poolSizeAfter, test.accountsAfter, optsA.Cluster.Port))
  8301  
  8302  			checkClusterFormed(t, srva, srvb)
  8303  			checkSubInterest(t, srvb, "A", "foo", time.Second)
  8304  
  8305  			if l != nil {
  8306  				// Errors regarding "No route for account" should stop
  8307  				var ok bool
  8308  				for numErrs := 0; !ok && numErrs < 10; {
  8309  					select {
  8310  					case e := <-l.errCh:
  8311  						if strings.Contains(e, "No route for account") {
  8312  							numErrs++
  8313  						}
  8314  					case <-time.After(DEFAULT_ROUTE_RECONNECT + 250*time.Millisecond):
  8315  						ok = true
  8316  					}
  8317  				}
  8318  				if !ok {
  8319  					t.Fatalf("Still report of no route for account")
  8320  				}
  8321  			}
  8322  
  8323  			close(ch)
  8324  			wg.Wait()
  8325  
  8326  			for prev := -1; ; {
  8327  				msg, err := sub.NextMsg(50 * time.Millisecond)
  8328  				if err != nil {
  8329  					break
  8330  				}
  8331  				cur, _ := strconv.Atoi(string(msg.Data))
  8332  				if cur <= prev {
  8333  					t.Fatalf("Previous was %d, got %d", prev, cur)
  8334  				}
  8335  				prev = cur
  8336  			}
  8337  		})
  8338  	}
  8339  }
  8340  
  8341  // This test ensures that outbound queues don't cause a run on
  8342  // memory when sending something to lots of clients.
  8343  func TestNoRaceClientOutboundQueueMemory(t *testing.T) {
  8344  	opts := DefaultOptions()
  8345  	s := RunServer(opts)
  8346  	defer s.Shutdown()
  8347  
  8348  	var before runtime.MemStats
  8349  	var after runtime.MemStats
  8350  
  8351  	var err error
  8352  	clients := make([]*nats.Conn, 50000)
  8353  	wait := &sync.WaitGroup{}
  8354  	wait.Add(len(clients))
  8355  
  8356  	for i := 0; i < len(clients); i++ {
  8357  		clients[i], err = nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port), nats.InProcessServer(s))
  8358  		if err != nil {
  8359  			t.Fatalf("Error on connect: %v", err)
  8360  		}
  8361  		defer clients[i].Close()
  8362  
  8363  		clients[i].Subscribe("test", func(m *nats.Msg) {
  8364  			wait.Done()
  8365  		})
  8366  	}
  8367  
  8368  	runtime.GC()
  8369  	runtime.ReadMemStats(&before)
  8370  
  8371  	nc, err := nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port), nats.InProcessServer(s))
  8372  	if err != nil {
  8373  		t.Fatalf("Error on connect: %v", err)
  8374  	}
  8375  	defer nc.Close()
  8376  
  8377  	var m [48000]byte
  8378  	if err = nc.Publish("test", m[:]); err != nil {
  8379  		t.Fatal(err)
  8380  	}
  8381  
  8382  	wait.Wait()
  8383  
  8384  	runtime.GC()
  8385  	runtime.ReadMemStats(&after)
  8386  
  8387  	hb, ha := float64(before.HeapAlloc), float64(after.HeapAlloc)
  8388  	ms := float64(len(m))
  8389  	diff := float64(ha) - float64(hb)
  8390  	inc := (diff / float64(hb)) * 100
  8391  
  8392  	if inc > 10 {
  8393  		t.Logf("Message size:       %.1fKB\n", ms/1024)
  8394  		t.Logf("Subscribed clients: %d\n", len(clients))
  8395  		t.Logf("Heap allocs before: %.1fMB\n", hb/1024/1024)
  8396  		t.Logf("Heap allocs after:  %.1fMB\n", ha/1024/1024)
  8397  		t.Logf("Heap allocs delta:  %.1f%%\n", inc)
  8398  
  8399  		t.Fatalf("memory increase was %.1f%% (should be <= 10%%)", inc)
  8400  	}
  8401  }
  8402  
  8403  func TestNoRaceJetStreamClusterLeafnodeConnectPerf(t *testing.T) {
  8404  	// Uncomment to run. Needs to be on a big machine. Do not want as part of Travis tests atm.
  8405  	skip(t)
  8406  
  8407  	tmpl := strings.Replace(jsClusterAccountsTempl, "store_dir:", "domain: cloud, store_dir:", 1)
  8408  	c := createJetStreamCluster(t, tmpl, "CLOUD", _EMPTY_, 3, 18033, true)
  8409  	defer c.shutdown()
  8410  
  8411  	nc, js := jsClientConnect(t, c.randomServer())
  8412  	defer nc.Close()
  8413  
  8414  	_, err := js.AddStream(&nats.StreamConfig{
  8415  		Name:     "STATE",
  8416  		Subjects: []string{"STATE.GLOBAL.CELL1.*.>"},
  8417  		Replicas: 3,
  8418  	})
  8419  	require_NoError(t, err)
  8420  
  8421  	tmpl = strings.Replace(jsClusterTemplWithSingleFleetLeafNode, "store_dir:", "domain: vehicle, store_dir:", 1)
  8422  
  8423  	var vinSerial int
  8424  	genVIN := func() string {
  8425  		vinSerial++
  8426  		return fmt.Sprintf("7PDSGAALXNN%06d", vinSerial)
  8427  	}
  8428  
  8429  	numVehicles := 500
  8430  	for i := 0; i < numVehicles; i++ {
  8431  		start := time.Now()
  8432  		vin := genVIN()
  8433  		ln := c.createLeafNodeWithTemplateNoSystemWithProto(vin, tmpl, "ws")
  8434  		nc, js := jsClientConnect(t, ln)
  8435  		_, err := js.AddStream(&nats.StreamConfig{
  8436  			Name:     "VEHICLE",
  8437  			Subjects: []string{"STATE.GLOBAL.LOCAL.>"},
  8438  			Sources: []*nats.StreamSource{{
  8439  				Name:          "STATE",
  8440  				FilterSubject: fmt.Sprintf("STATE.GLOBAL.CELL1.%s.>", vin),
  8441  				External: &nats.ExternalStream{
  8442  					APIPrefix:     "$JS.cloud.API",
  8443  					DeliverPrefix: fmt.Sprintf("DELIVER.STATE.GLOBAL.CELL1.%s", vin),
  8444  				},
  8445  			}},
  8446  		})
  8447  		require_NoError(t, err)
  8448  		// Create the sourced stream.
  8449  		checkLeafNodeConnectedCount(t, ln, 1)
  8450  		if elapsed := time.Since(start); elapsed > 2*time.Second {
  8451  			t.Fatalf("Took too long to create leafnode %d connection: %v", i+1, elapsed)
  8452  		}
  8453  		nc.Close()
  8454  	}
  8455  }
  8456  
  8457  func TestNoRaceJetStreamClusterDifferentRTTInterestBasedStreamPreAck(t *testing.T) {
  8458  	tmpl := `
  8459  	listen: 127.0.0.1:-1
  8460  	server_name: %s
  8461  	jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  8462  
  8463  	cluster {
  8464  		name: "F3"
  8465  		listen: 127.0.0.1:%d
  8466  		routes = [%s]
  8467  	}
  8468  
  8469  	accounts {
  8470  		$SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] }
  8471  	}
  8472  	`
  8473  
  8474  	//  Route Ports
  8475  	//	"S1": 14622,
  8476  	//	"S2": 15622,
  8477  	//	"S3": 16622,
  8478  
  8479  	// S2 (stream leader) will have a slow path to S1 (via proxy) and S3 (consumer leader) will have a fast path.
  8480  
  8481  	// Do these in order, S1, S2 (proxy) then S3.
  8482  	c := &cluster{t: t, servers: make([]*Server, 3), opts: make([]*Options, 3), name: "F3"}
  8483  
  8484  	// S1
  8485  	conf := fmt.Sprintf(tmpl, "S1", t.TempDir(), 14622, "route://127.0.0.1:15622, route://127.0.0.1:16622")
  8486  	c.servers[0], c.opts[0] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  8487  
  8488  	// S2
  8489  	// Create the proxy first. Connect this to S1. Make it slow, e.g. 5ms RTT.
  8490  	np := createNetProxy(1*time.Millisecond, 1024*1024*1024, 1024*1024*1024, "route://127.0.0.1:14622", true)
  8491  	routes := fmt.Sprintf("%s, route://127.0.0.1:16622", np.routeURL())
  8492  	conf = fmt.Sprintf(tmpl, "S2", t.TempDir(), 15622, routes)
  8493  	c.servers[1], c.opts[1] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  8494  
  8495  	// S3
  8496  	conf = fmt.Sprintf(tmpl, "S3", t.TempDir(), 16622, "route://127.0.0.1:14622, route://127.0.0.1:15622")
  8497  	c.servers[2], c.opts[2] = RunServerWithConfig(createConfFile(t, []byte(conf)))
  8498  
  8499  	c.checkClusterFormed()
  8500  	c.waitOnClusterReady()
  8501  	defer c.shutdown()
  8502  	defer np.stop()
  8503  
  8504  	nc, js := jsClientConnect(t, c.randomServer())
  8505  	defer nc.Close()
  8506  
  8507  	// Now create the stream.
  8508  	_, err := js.AddStream(&nats.StreamConfig{
  8509  		Name:      "EVENTS",
  8510  		Subjects:  []string{"EV.>"},
  8511  		Replicas:  3,
  8512  		Retention: nats.InterestPolicy,
  8513  	})
  8514  	require_NoError(t, err)
  8515  
  8516  	// Make sure it's leader is on S2.
  8517  	sl := c.servers[1]
  8518  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  8519  		c.waitOnStreamLeader(globalAccountName, "EVENTS")
  8520  		if s := c.streamLeader(globalAccountName, "EVENTS"); s != sl {
  8521  			s.JetStreamStepdownStream(globalAccountName, "EVENTS")
  8522  			return fmt.Errorf("Server %s is not stream leader yet", sl)
  8523  		}
  8524  		return nil
  8525  	})
  8526  
  8527  	// Now create the consumer.
  8528  	_, err = js.AddConsumer("EVENTS", &nats.ConsumerConfig{
  8529  		Durable:        "C",
  8530  		AckPolicy:      nats.AckExplicitPolicy,
  8531  		DeliverSubject: "dx",
  8532  	})
  8533  	require_NoError(t, err)
  8534  
  8535  	// Make sure the consumer leader is on S3.
  8536  	cl := c.servers[2]
  8537  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  8538  		c.waitOnConsumerLeader(globalAccountName, "EVENTS", "C")
  8539  		if s := c.consumerLeader(globalAccountName, "EVENTS", "C"); s != cl {
  8540  			s.JetStreamStepdownConsumer(globalAccountName, "EVENTS", "C")
  8541  			return fmt.Errorf("Server %s is not consumer leader yet", sl)
  8542  		}
  8543  		return nil
  8544  	})
  8545  
  8546  	// Create the real consumer on the consumer leader to make it efficient.
  8547  	nc, js = jsClientConnect(t, cl)
  8548  	defer nc.Close()
  8549  
  8550  	_, err = js.Subscribe(_EMPTY_, func(msg *nats.Msg) {
  8551  		msg.Ack()
  8552  	}, nats.BindStream("EVENTS"), nats.Durable("C"), nats.ManualAck())
  8553  	require_NoError(t, err)
  8554  
  8555  	for i := 0; i < 1_000; i++ {
  8556  		_, err := js.PublishAsync("EVENTS.PAID", []byte("ok"))
  8557  		require_NoError(t, err)
  8558  	}
  8559  	select {
  8560  	case <-js.PublishAsyncComplete():
  8561  	case <-time.After(5 * time.Second):
  8562  		t.Fatalf("Did not receive completion signal")
  8563  	}
  8564  
  8565  	slow := c.servers[0]
  8566  	mset, err := slow.GlobalAccount().lookupStream("EVENTS")
  8567  	require_NoError(t, err)
  8568  
  8569  	// Make sure preAck is non-nil, so we know the logic has kicked in.
  8570  	mset.mu.RLock()
  8571  	preAcks := mset.preAcks
  8572  	mset.mu.RUnlock()
  8573  	require_NotNil(t, preAcks)
  8574  
  8575  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  8576  		state := mset.state()
  8577  		if state.Msgs == 0 {
  8578  			mset.mu.RLock()
  8579  			lp := len(mset.preAcks)
  8580  			mset.mu.RUnlock()
  8581  			if lp == 0 {
  8582  				return nil
  8583  			} else {
  8584  				t.Fatalf("Expected no preAcks with no msgs, but got %d", lp)
  8585  			}
  8586  		}
  8587  		return fmt.Errorf("Still have %d msgs left", state.Msgs)
  8588  	})
  8589  
  8590  }
  8591  
  8592  func TestNoRaceCheckAckFloorWithVeryLargeFirstSeqAndNewConsumers(t *testing.T) {
  8593  	s := RunBasicJetStreamServer(t)
  8594  	defer s.Shutdown()
  8595  
  8596  	nc, _ := jsClientConnect(t, s)
  8597  	defer nc.Close()
  8598  
  8599  	// Make sure to time bound here for the acksync call below.
  8600  	js, err := nc.JetStream(nats.MaxWait(200 * time.Millisecond))
  8601  	require_NoError(t, err)
  8602  
  8603  	_, err = js.AddStream(&nats.StreamConfig{
  8604  		Name:      "TEST",
  8605  		Subjects:  []string{"wq-req"},
  8606  		Retention: nats.WorkQueuePolicy,
  8607  	})
  8608  	require_NoError(t, err)
  8609  
  8610  	largeFirstSeq := uint64(1_200_000_000)
  8611  	err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Sequence: largeFirstSeq})
  8612  	require_NoError(t, err)
  8613  	si, err := js.StreamInfo("TEST")
  8614  	require_NoError(t, err)
  8615  	require_True(t, si.State.FirstSeq == largeFirstSeq)
  8616  
  8617  	// Add a simple request to the stream.
  8618  	sendStreamMsg(t, nc, "wq-req", "HELP")
  8619  
  8620  	sub, err := js.PullSubscribe("wq-req", "dlc")
  8621  	require_NoError(t, err)
  8622  
  8623  	msgs, err := sub.Fetch(1)
  8624  	require_NoError(t, err)
  8625  	require_True(t, len(msgs) == 1)
  8626  
  8627  	// The bug is around the checkAckFloor walking the sequences from current ackfloor
  8628  	// to the first sequence of the stream. We time bound the max wait with the js context
  8629  	// to 200ms. Since checkAckFloor is spinning and holding up processing of acks this will fail.
  8630  	// We will short circuit new consumers to fix this one.
  8631  	require_NoError(t, msgs[0].AckSync())
  8632  
  8633  	// Now do again so we move past the new consumer with no ack floor situation.
  8634  	err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Sequence: 2 * largeFirstSeq})
  8635  	require_NoError(t, err)
  8636  	si, err = js.StreamInfo("TEST")
  8637  	require_NoError(t, err)
  8638  	require_True(t, si.State.FirstSeq == 2*largeFirstSeq)
  8639  
  8640  	sendStreamMsg(t, nc, "wq-req", "MORE HELP")
  8641  
  8642  	// We check this one directly for this use case.
  8643  	mset, err := s.GlobalAccount().lookupStream("TEST")
  8644  	require_NoError(t, err)
  8645  	o := mset.lookupConsumer("dlc")
  8646  	require_True(t, o != nil)
  8647  
  8648  	// Purge will move the stream floor by default, so force into the situation where it is back to largeFirstSeq.
  8649  	// This will not trigger the new consumer logic, but will trigger a walk of the sequence space.
  8650  	// Fix will be to walk the lesser of the two linear spaces.
  8651  	o.mu.Lock()
  8652  	o.asflr = largeFirstSeq
  8653  	o.mu.Unlock()
  8654  
  8655  	done := make(chan bool)
  8656  	go func() {
  8657  		o.checkAckFloor()
  8658  		done <- true
  8659  	}()
  8660  
  8661  	select {
  8662  	case <-done:
  8663  		return
  8664  	case <-time.After(time.Second):
  8665  		t.Fatalf("Check ack floor taking too long!")
  8666  	}
  8667  }
  8668  
  8669  func TestNoRaceReplicatedMirrorWithLargeStartingSequenceOverLeafnode(t *testing.T) {
  8670  	// Cluster B
  8671  	tmpl := strings.Replace(jsClusterTempl, "store_dir:", "domain: B, store_dir:", 1)
  8672  	c := createJetStreamCluster(t, tmpl, "B", _EMPTY_, 3, 22020, true)
  8673  	defer c.shutdown()
  8674  
  8675  	// Cluster A
  8676  	// Domain is "A'
  8677  	lc := c.createLeafNodesWithStartPortAndDomain("A", 3, 22110, "A")
  8678  	defer lc.shutdown()
  8679  
  8680  	lc.waitOnClusterReady()
  8681  
  8682  	// Create a stream on B (HUB/CLOUD) and set its starting sequence very high.
  8683  	nc, js := jsClientConnect(t, c.randomServer())
  8684  	defer nc.Close()
  8685  
  8686  	_, err := js.AddStream(&nats.StreamConfig{
  8687  		Name:     "TEST",
  8688  		Subjects: []string{"foo"},
  8689  		Replicas: 3,
  8690  	})
  8691  	require_NoError(t, err)
  8692  
  8693  	err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Sequence: 1_000_000_000})
  8694  	require_NoError(t, err)
  8695  
  8696  	// Send in a small amount of messages.
  8697  	for i := 0; i < 1000; i++ {
  8698  		sendStreamMsg(t, nc, "foo", "Hello")
  8699  	}
  8700  
  8701  	si, err := js.StreamInfo("TEST")
  8702  	require_NoError(t, err)
  8703  	require_True(t, si.State.FirstSeq == 1_000_000_000)
  8704  
  8705  	// Now try to create a replicated mirror on the leaf cluster.
  8706  	lnc, ljs := jsClientConnect(t, lc.randomServer())
  8707  	defer lnc.Close()
  8708  
  8709  	_, err = ljs.AddStream(&nats.StreamConfig{
  8710  		Name: "TEST",
  8711  		Mirror: &nats.StreamSource{
  8712  			Name:   "TEST",
  8713  			Domain: "B",
  8714  		},
  8715  	})
  8716  	require_NoError(t, err)
  8717  
  8718  	// Make sure we sync quickly.
  8719  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  8720  		si, err = ljs.StreamInfo("TEST")
  8721  		require_NoError(t, err)
  8722  		if si.State.Msgs == 1000 && si.State.FirstSeq == 1_000_000_000 {
  8723  			return nil
  8724  		}
  8725  		return fmt.Errorf("Mirror state not correct: %+v", si.State)
  8726  	})
  8727  }
  8728  
  8729  func TestNoRaceBinaryStreamSnapshotEncodingBasic(t *testing.T) {
  8730  	s := RunBasicJetStreamServer(t)
  8731  	defer s.Shutdown()
  8732  
  8733  	nc, js := jsClientConnect(t, s)
  8734  	defer nc.Close()
  8735  
  8736  	_, err := js.AddStream(&nats.StreamConfig{
  8737  		Name:              "TEST",
  8738  		Subjects:          []string{"*"},
  8739  		MaxMsgsPerSubject: 1,
  8740  	})
  8741  	require_NoError(t, err)
  8742  
  8743  	// Set first key
  8744  	sendStreamMsg(t, nc, "key:1", "hello")
  8745  
  8746  	// Set Second key but keep updating it, causing a laggard pattern.
  8747  	value := bytes.Repeat([]byte("Z"), 8*1024)
  8748  
  8749  	for i := 0; i <= 1000; i++ {
  8750  		_, err := js.PublishAsync("key:2", value)
  8751  		require_NoError(t, err)
  8752  	}
  8753  	select {
  8754  	case <-js.PublishAsyncComplete():
  8755  	case <-time.After(5 * time.Second):
  8756  		t.Fatalf("Did not receive completion signal")
  8757  	}
  8758  
  8759  	// Now do more of swiss cheese style.
  8760  	for i := 3; i <= 1000; i++ {
  8761  		key := fmt.Sprintf("key:%d", i)
  8762  		_, err := js.PublishAsync(key, value)
  8763  		require_NoError(t, err)
  8764  		// Send it twice to create hole right behind it, like swiss cheese.
  8765  		_, err = js.PublishAsync(key, value)
  8766  		require_NoError(t, err)
  8767  	}
  8768  	select {
  8769  	case <-js.PublishAsyncComplete():
  8770  	case <-time.After(5 * time.Second):
  8771  		t.Fatalf("Did not receive completion signal")
  8772  	}
  8773  
  8774  	// Make for round numbers for stream state.
  8775  	sendStreamMsg(t, nc, "key:2", "hello")
  8776  	sendStreamMsg(t, nc, "key:2", "world")
  8777  
  8778  	si, err := js.StreamInfo("TEST")
  8779  	require_NoError(t, err)
  8780  	require_True(t, si.State.FirstSeq == 1)
  8781  	require_True(t, si.State.LastSeq == 3000)
  8782  	require_True(t, si.State.Msgs == 1000)
  8783  	require_True(t, si.State.NumDeleted == 2000)
  8784  
  8785  	mset, err := s.GlobalAccount().lookupStream("TEST")
  8786  	require_NoError(t, err)
  8787  
  8788  	snap, err := mset.store.EncodedStreamState(0)
  8789  	require_NoError(t, err)
  8790  
  8791  	// Now decode the snapshot.
  8792  	ss, err := DecodeStreamState(snap)
  8793  	require_NoError(t, err)
  8794  
  8795  	require_Equal(t, ss.FirstSeq, 1)
  8796  	require_Equal(t, ss.LastSeq, 3000)
  8797  	require_Equal(t, ss.Msgs, 1000)
  8798  	require_Equal(t, ss.Deleted.NumDeleted(), 2000)
  8799  }
  8800  
  8801  func TestNoRaceFilestoreBinaryStreamSnapshotEncodingLargeGaps(t *testing.T) {
  8802  	storeDir := t.TempDir()
  8803  	fcfg := FileStoreConfig{
  8804  		StoreDir:  storeDir,
  8805  		BlockSize: 512, // Small on purpose to create alot of blks.
  8806  	}
  8807  	fs, err := newFileStore(fcfg, StreamConfig{Name: "zzz", Subjects: []string{"zzz"}, Storage: FileStorage})
  8808  	require_NoError(t, err)
  8809  	defer fs.Stop()
  8810  
  8811  	subj, msg := "zzz", bytes.Repeat([]byte("X"), 128)
  8812  	numMsgs := 20_000
  8813  
  8814  	fs.StoreMsg(subj, nil, msg)
  8815  	for i := 2; i < numMsgs; i++ {
  8816  		seq, _, err := fs.StoreMsg(subj, nil, nil)
  8817  		require_NoError(t, err)
  8818  		fs.RemoveMsg(seq)
  8819  	}
  8820  	fs.StoreMsg(subj, nil, msg)
  8821  
  8822  	snap, err := fs.EncodedStreamState(0)
  8823  	require_NoError(t, err)
  8824  	require_True(t, len(snap) < 512)
  8825  
  8826  	// Now decode the snapshot.
  8827  	ss, err := DecodeStreamState(snap)
  8828  	require_NoError(t, err)
  8829  
  8830  	require_True(t, ss.FirstSeq == 1)
  8831  	require_True(t, ss.LastSeq == 20_000)
  8832  	require_True(t, ss.Msgs == 2)
  8833  	require_True(t, len(ss.Deleted) <= 2)
  8834  	require_True(t, ss.Deleted.NumDeleted() == 19_998)
  8835  }
  8836  
  8837  func TestNoRaceJetStreamClusterStreamSnapshotCatchup(t *testing.T) {
  8838  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  8839  	defer c.shutdown()
  8840  
  8841  	// Client based API
  8842  	nc, js := jsClientConnect(t, c.randomServer())
  8843  	defer nc.Close()
  8844  
  8845  	_, err := js.AddStream(&nats.StreamConfig{
  8846  		Name:              "TEST",
  8847  		Subjects:          []string{"*"},
  8848  		MaxMsgsPerSubject: 1,
  8849  		Replicas:          3,
  8850  	})
  8851  	require_NoError(t, err)
  8852  
  8853  	msg := []byte("Hello World")
  8854  	_, err = js.Publish("foo", msg)
  8855  	require_NoError(t, err)
  8856  
  8857  	for i := 1; i < 1000; i++ {
  8858  		_, err := js.PublishAsync("bar", msg)
  8859  		require_NoError(t, err)
  8860  	}
  8861  	select {
  8862  	case <-js.PublishAsyncComplete():
  8863  	case <-time.After(5 * time.Second):
  8864  		t.Fatalf("Did not receive completion signal")
  8865  	}
  8866  
  8867  	sr := c.randomNonStreamLeader(globalAccountName, "TEST")
  8868  	sr.Shutdown()
  8869  
  8870  	// In case we were connected to sr.
  8871  	nc, js = jsClientConnect(t, c.randomServer())
  8872  	defer nc.Close()
  8873  
  8874  	// Now create a large gap.
  8875  	for i := 0; i < 50_000; i++ {
  8876  		_, err := js.PublishAsync("bar", msg)
  8877  		require_NoError(t, err)
  8878  	}
  8879  	select {
  8880  	case <-js.PublishAsyncComplete():
  8881  	case <-time.After(10 * time.Second):
  8882  		t.Fatalf("Did not receive completion signal")
  8883  	}
  8884  
  8885  	sl := c.streamLeader(globalAccountName, "TEST")
  8886  	sl.JetStreamSnapshotStream(globalAccountName, "TEST")
  8887  
  8888  	sr = c.restartServer(sr)
  8889  	c.checkClusterFormed()
  8890  	c.waitOnServerCurrent(sr)
  8891  	c.waitOnStreamCurrent(sr, globalAccountName, "TEST")
  8892  
  8893  	mset, err := sr.GlobalAccount().lookupStream("TEST")
  8894  	require_NoError(t, err)
  8895  
  8896  	// Make sure it's caught up
  8897  	var state StreamState
  8898  	mset.store.FastState(&state)
  8899  	require_Equal(t, state.Msgs, 2)
  8900  	require_Equal(t, state.FirstSeq, 1)
  8901  	require_Equal(t, state.LastSeq, 51_000)
  8902  	require_Equal(t, state.NumDeleted, 51_000-2)
  8903  
  8904  	sr.Shutdown()
  8905  
  8906  	_, err = js.Publish("baz", msg)
  8907  	require_NoError(t, err)
  8908  
  8909  	sl.JetStreamSnapshotStream(globalAccountName, "TEST")
  8910  
  8911  	sr = c.restartServer(sr)
  8912  	c.checkClusterFormed()
  8913  	c.waitOnServerCurrent(sr)
  8914  	c.waitOnStreamCurrent(sr, globalAccountName, "TEST")
  8915  
  8916  	mset, err = sr.GlobalAccount().lookupStream("TEST")
  8917  	require_NoError(t, err)
  8918  	mset.store.FastState(&state)
  8919  
  8920  	require_Equal(t, state.Msgs, 3)
  8921  	require_Equal(t, state.FirstSeq, 1)
  8922  	require_Equal(t, state.LastSeq, 51_001)
  8923  	require_Equal(t, state.NumDeleted, 51_001-3)
  8924  }
  8925  
  8926  func TestNoRaceStoreStreamEncoderDecoder(t *testing.T) {
  8927  	cfg := &StreamConfig{
  8928  		Name:       "zzz",
  8929  		Subjects:   []string{"*"},
  8930  		MaxMsgsPer: 1,
  8931  		Storage:    MemoryStorage,
  8932  	}
  8933  	ms, err := newMemStore(cfg)
  8934  	require_NoError(t, err)
  8935  
  8936  	fs, err := newFileStore(
  8937  		FileStoreConfig{StoreDir: t.TempDir()},
  8938  		StreamConfig{Name: "zzz", Subjects: []string{"*"}, MaxMsgsPer: 1, Storage: FileStorage},
  8939  	)
  8940  	require_NoError(t, err)
  8941  	defer fs.Stop()
  8942  
  8943  	const seed = 2222222
  8944  	msg := bytes.Repeat([]byte("ABC"), 33) // ~100bytes
  8945  
  8946  	maxEncodeTime := 2 * time.Second
  8947  	maxEncodeSize := 700 * 1024
  8948  
  8949  	test := func(t *testing.T, gs StreamStore) {
  8950  		t.Parallel()
  8951  		prand := rand.New(rand.NewSource(seed))
  8952  		tick := time.NewTicker(time.Second)
  8953  		defer tick.Stop()
  8954  		done := time.NewTimer(10 * time.Second)
  8955  
  8956  		for running := true; running; {
  8957  			select {
  8958  			case <-tick.C:
  8959  				var state StreamState
  8960  				gs.FastState(&state)
  8961  				if state.NumDeleted == 0 {
  8962  					continue
  8963  				}
  8964  				start := time.Now()
  8965  				snap, err := gs.EncodedStreamState(0)
  8966  				require_NoError(t, err)
  8967  				elapsed := time.Since(start)
  8968  				// Should take <1ms without race but if CI/CD is slow we will give it a bit of room.
  8969  				if elapsed > maxEncodeTime {
  8970  					t.Logf("Encode took longer then expected: %v", elapsed)
  8971  				}
  8972  				if len(snap) > maxEncodeSize {
  8973  					t.Fatalf("Expected snapshot size < %v got %v", friendlyBytes(maxEncodeSize), friendlyBytes(len(snap)))
  8974  				}
  8975  				ss, err := DecodeStreamState(snap)
  8976  				require_True(t, len(ss.Deleted) > 0)
  8977  				require_NoError(t, err)
  8978  			case <-done.C:
  8979  				running = false
  8980  			default:
  8981  				key := strconv.Itoa(prand.Intn(256_000))
  8982  				gs.StoreMsg(key, nil, msg)
  8983  			}
  8984  		}
  8985  	}
  8986  
  8987  	for _, gs := range []StreamStore{ms, fs} {
  8988  		switch gs.(type) {
  8989  		case *memStore:
  8990  			t.Run("MemStore", func(t *testing.T) {
  8991  				test(t, gs)
  8992  			})
  8993  		case *fileStore:
  8994  			t.Run("FileStore", func(t *testing.T) {
  8995  				test(t, gs)
  8996  			})
  8997  		}
  8998  	}
  8999  }
  9000  
  9001  func TestNoRaceJetStreamClusterKVWithServerKill(t *testing.T) {
  9002  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9003  	defer c.shutdown()
  9004  
  9005  	// Setup the KV bucket and use for making assertions.
  9006  	nc, js := jsClientConnect(t, c.randomServer())
  9007  	defer nc.Close()
  9008  	_, err := js.CreateKeyValue(&nats.KeyValueConfig{
  9009  		Bucket:   "TEST",
  9010  		Replicas: 3,
  9011  		History:  10,
  9012  	})
  9013  	require_NoError(t, err)
  9014  
  9015  	// Total number of keys to range over.
  9016  	numKeys := 50
  9017  
  9018  	// ID is the server id to explicitly connect to.
  9019  	work := func(ctx context.Context, wg *sync.WaitGroup, id int) {
  9020  		defer wg.Done()
  9021  
  9022  		nc, js := jsClientConnectEx(t, c.servers[id], []nats.JSOpt{nats.Context(ctx)})
  9023  		defer nc.Close()
  9024  
  9025  		kv, err := js.KeyValue("TEST")
  9026  		require_NoError(t, err)
  9027  
  9028  		// 100 messages a second for each single client.
  9029  		tk := time.NewTicker(10 * time.Millisecond)
  9030  		defer tk.Stop()
  9031  
  9032  		for {
  9033  			select {
  9034  			case <-ctx.Done():
  9035  				return
  9036  
  9037  			case <-tk.C:
  9038  				// Pick a random key within the range.
  9039  				k := fmt.Sprintf("key.%d", rand.Intn(numKeys))
  9040  				// Attempt to get a key.
  9041  				e, err := kv.Get(k)
  9042  				// If found, attempt to update or delete.
  9043  				if err == nil {
  9044  					if rand.Intn(10) < 3 {
  9045  						kv.Delete(k, nats.LastRevision(e.Revision()))
  9046  					} else {
  9047  						kv.Update(k, nil, e.Revision())
  9048  					}
  9049  				} else if errors.Is(err, nats.ErrKeyNotFound) {
  9050  					kv.Create(k, nil)
  9051  				}
  9052  			}
  9053  		}
  9054  	}
  9055  
  9056  	ctx, cancel := context.WithCancel(context.Background())
  9057  	defer cancel()
  9058  
  9059  	var wg sync.WaitGroup
  9060  	wg.Add(3)
  9061  
  9062  	go work(ctx, &wg, 0)
  9063  	go work(ctx, &wg, 1)
  9064  	go work(ctx, &wg, 2)
  9065  
  9066  	time.Sleep(time.Second)
  9067  
  9068  	// Simulate server stop and restart.
  9069  	for i := 0; i < 7; i++ {
  9070  		s := c.randomServer()
  9071  		s.Shutdown()
  9072  		c.waitOnLeader()
  9073  		c.waitOnStreamLeader(globalAccountName, "KV_TEST")
  9074  
  9075  		// Wait for a bit and then start the server again.
  9076  		time.Sleep(time.Duration(rand.Intn(1250)) * time.Millisecond)
  9077  		s = c.restartServer(s)
  9078  		c.waitOnServerCurrent(s)
  9079  		c.waitOnLeader()
  9080  		c.waitOnStreamLeader(globalAccountName, "KV_TEST")
  9081  		c.waitOnPeerCount(3)
  9082  	}
  9083  
  9084  	// Stop the workload.
  9085  	cancel()
  9086  	wg.Wait()
  9087  
  9088  	type fullState struct {
  9089  		state StreamState
  9090  		lseq  uint64
  9091  		clfs  uint64
  9092  	}
  9093  
  9094  	grabState := func(mset *stream) *fullState {
  9095  		mset.mu.RLock()
  9096  		defer mset.mu.RUnlock()
  9097  		var state StreamState
  9098  		mset.store.FastState(&state)
  9099  		return &fullState{state, mset.lseq, mset.clfs}
  9100  	}
  9101  
  9102  	grabStore := func(mset *stream) map[string][]uint64 {
  9103  		mset.mu.RLock()
  9104  		store := mset.store
  9105  		mset.mu.RUnlock()
  9106  		var state StreamState
  9107  		store.FastState(&state)
  9108  		storeMap := make(map[string][]uint64)
  9109  		for seq := state.FirstSeq; seq <= state.LastSeq; seq++ {
  9110  			if sm, err := store.LoadMsg(seq, nil); err == nil {
  9111  				storeMap[sm.subj] = append(storeMap[sm.subj], sm.seq)
  9112  			}
  9113  		}
  9114  		return storeMap
  9115  	}
  9116  
  9117  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  9118  		// Current stream leader.
  9119  		sl := c.streamLeader(globalAccountName, "KV_TEST")
  9120  		mset, err := sl.GlobalAccount().lookupStream("KV_TEST")
  9121  		require_NoError(t, err)
  9122  		lstate := grabState(mset)
  9123  		golden := grabStore(mset)
  9124  
  9125  		// Report messages per server.
  9126  		for _, s := range c.servers {
  9127  			if s == sl {
  9128  				continue
  9129  			}
  9130  			mset, err := s.GlobalAccount().lookupStream("KV_TEST")
  9131  			require_NoError(t, err)
  9132  			state := grabState(mset)
  9133  			if !reflect.DeepEqual(state, lstate) {
  9134  				return fmt.Errorf("Expected follower state\n%+v\nto match leader's\n %+v", state, lstate)
  9135  			}
  9136  			sm := grabStore(mset)
  9137  			if !reflect.DeepEqual(sm, golden) {
  9138  				t.Fatalf("Expected follower store for %v\n%+v\nto match leader's %v\n %+v", s, sm, sl, golden)
  9139  			}
  9140  		}
  9141  		return nil
  9142  	})
  9143  }
  9144  
  9145  func TestNoRaceFileStoreLargeMsgsAndFirstMatching(t *testing.T) {
  9146  	sd := t.TempDir()
  9147  	fs, err := newFileStore(
  9148  		FileStoreConfig{StoreDir: sd, BlockSize: 8 * 1024 * 1024},
  9149  		StreamConfig{Name: "zzz", Subjects: []string{">"}, Storage: FileStorage})
  9150  	require_NoError(t, err)
  9151  	defer fs.Stop()
  9152  
  9153  	for i := 0; i < 150_000; i++ {
  9154  		fs.StoreMsg(fmt.Sprintf("foo.bar.%d", i), nil, nil)
  9155  	}
  9156  	for i := 0; i < 150_000; i++ {
  9157  		fs.StoreMsg(fmt.Sprintf("foo.baz.%d", i), nil, nil)
  9158  	}
  9159  	require_Equal(t, fs.numMsgBlocks(), 2)
  9160  	fs.mu.RLock()
  9161  	mb := fs.blks[1]
  9162  	fs.mu.RUnlock()
  9163  	fseq := atomic.LoadUint64(&mb.first.seq)
  9164  	// The -40 leaves enough mb.fss entries to kick in linear scan.
  9165  	for seq := fseq; seq < 300_000-40; seq++ {
  9166  		fs.RemoveMsg(uint64(seq))
  9167  	}
  9168  	start := time.Now()
  9169  	fs.LoadNextMsg("*.baz.*", true, fseq, nil)
  9170  	require_True(t, time.Since(start) < 200*time.Microsecond)
  9171  	// Now remove more to kick into non-linear logic.
  9172  	for seq := 300_000 - 40; seq < 300_000; seq++ {
  9173  		fs.RemoveMsg(uint64(seq))
  9174  	}
  9175  	start = time.Now()
  9176  	fs.LoadNextMsg("*.baz.*", true, fseq, nil)
  9177  	require_True(t, time.Since(start) < 200*time.Microsecond)
  9178  }
  9179  
  9180  func TestNoRaceWSNoCorruptionWithFrameSizeLimit(t *testing.T) {
  9181  	testWSNoCorruptionWithFrameSizeLimit(t, 50000)
  9182  }
  9183  
  9184  func TestNoRaceJetStreamAPIDispatchQueuePending(t *testing.T) {
  9185  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9186  	defer c.shutdown()
  9187  
  9188  	// Setup the KV bucket and use for making assertions.
  9189  	nc, js := jsClientConnect(t, c.randomServer())
  9190  	defer nc.Close()
  9191  
  9192  	_, err := js.AddStream(&nats.StreamConfig{
  9193  		Name:     "TEST",
  9194  		Subjects: []string{"foo.*.*"},
  9195  	})
  9196  	require_NoError(t, err)
  9197  
  9198  	// Queue up 500k messages all with different subjects.
  9199  	// We want to make num pending for a consumer expensive, so a large subject
  9200  	// space and wildcards for now does the trick.
  9201  	toks := []string{"foo", "bar", "baz"} // for second token.
  9202  	for i := 1; i <= 500_000; i++ {
  9203  		subj := fmt.Sprintf("foo.%s.%d", toks[rand.Intn(len(toks))], i)
  9204  		_, err := js.PublishAsync(subj, nil, nats.StallWait(time.Second))
  9205  		require_NoError(t, err)
  9206  	}
  9207  	select {
  9208  	case <-js.PublishAsyncComplete():
  9209  	case <-time.After(20 * time.Second):
  9210  		t.Fatalf("Did not receive completion signal")
  9211  	}
  9212  
  9213  	// To back up our pending queue we will create lots of filtered, with wildcards, R1 consumers
  9214  	// from a different server then the one hosting the stream.
  9215  	// ok to share this connection here.
  9216  	sldr := c.streamLeader(globalAccountName, "TEST")
  9217  	for _, s := range c.servers {
  9218  		if s != sldr {
  9219  			nc, js = jsClientConnect(t, s)
  9220  			defer nc.Close()
  9221  			break
  9222  		}
  9223  	}
  9224  
  9225  	ngr, ncons := 100, 10
  9226  	startCh, errCh := make(chan bool), make(chan error, ngr)
  9227  	var wg, swg sync.WaitGroup
  9228  	wg.Add(ngr)
  9229  	swg.Add(ngr)
  9230  
  9231  	// The wildcard in the filter subject is the key.
  9232  	cfg := &nats.ConsumerConfig{FilterSubject: "foo.*.22"}
  9233  	var tt atomic.Int64
  9234  
  9235  	for i := 0; i < ngr; i++ {
  9236  		go func() {
  9237  			defer wg.Done()
  9238  			swg.Done()
  9239  			// Make them all fire at once.
  9240  			<-startCh
  9241  
  9242  			for i := 0; i < ncons; i++ {
  9243  				start := time.Now()
  9244  				if _, err := js.AddConsumer("TEST", cfg); err != nil {
  9245  					errCh <- err
  9246  					t.Logf("Got err creating consumer: %v", err)
  9247  				}
  9248  				elapsed := time.Since(start)
  9249  				tt.Add(int64(elapsed))
  9250  			}
  9251  		}()
  9252  	}
  9253  	swg.Wait()
  9254  	close(startCh)
  9255  	time.Sleep(time.Millisecond)
  9256  	jsz, _ := sldr.Jsz(nil)
  9257  	// This could be 0 legit, so just log, don't fail.
  9258  	if jsz.JetStreamStats.API.Inflight == 0 {
  9259  		t.Log("Expected a non-zero inflight")
  9260  	}
  9261  	wg.Wait()
  9262  
  9263  	if len(errCh) > 0 {
  9264  		t.Fatalf("Expected no errors, got %d", len(errCh))
  9265  	}
  9266  }
  9267  
  9268  func TestNoRaceJetStreamMirrorAndSourceConsumerFailBackoff(t *testing.T) {
  9269  	// Check calculations first.
  9270  	for i := 1; i <= 20; i++ {
  9271  		backoff := calculateRetryBackoff(i)
  9272  		if i < 12 {
  9273  			require_Equal(t, backoff, time.Duration(i)*10*time.Second)
  9274  		} else {
  9275  			require_Equal(t, backoff, retryMaximum)
  9276  		}
  9277  	}
  9278  
  9279  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9280  	defer c.shutdown()
  9281  
  9282  	nc, js := jsClientConnect(t, c.randomServer())
  9283  	defer nc.Close()
  9284  
  9285  	_, err := js.AddStream(&nats.StreamConfig{
  9286  		Name:     "TEST",
  9287  		Subjects: []string{"foo.*.*"},
  9288  	})
  9289  	require_NoError(t, err)
  9290  	sl := c.streamLeader(globalAccountName, "TEST")
  9291  
  9292  	// Create a mirror.
  9293  	ml := sl
  9294  	// Make sure not on the same server. Should not happened in general but possible.
  9295  	for ml == sl {
  9296  		js.DeleteStream("MIRROR")
  9297  		_, err = js.AddStream(&nats.StreamConfig{
  9298  			Name:   "MIRROR",
  9299  			Mirror: &nats.StreamSource{Name: "TEST"},
  9300  		})
  9301  		require_NoError(t, err)
  9302  		ml = c.streamLeader(globalAccountName, "MIRROR")
  9303  	}
  9304  	// Create a source.
  9305  	srcl := sl
  9306  	for srcl == sl {
  9307  		js.DeleteStream("SOURCE")
  9308  		_, err = js.AddStream(&nats.StreamConfig{
  9309  			Name:    "SOURCE",
  9310  			Sources: []*nats.StreamSource{{Name: "TEST"}},
  9311  		})
  9312  		require_NoError(t, err)
  9313  		srcl = c.streamLeader(globalAccountName, "MIRROR")
  9314  	}
  9315  
  9316  	// Create sub to watch for the consumer create requests.
  9317  	nc, _ = jsClientConnect(t, ml)
  9318  	defer nc.Close()
  9319  	sub := natsSubSync(t, nc, "$JS.API.CONSUMER.CREATE.>")
  9320  
  9321  	// Kill the server where the source is..
  9322  	sldr := c.streamLeader(globalAccountName, "TEST")
  9323  	sldr.Shutdown()
  9324  
  9325  	// Wait for just greater than 10s. We should only see 1 request during this time.
  9326  	time.Sleep(11 * time.Second)
  9327  	// There should have been 2 requests, one for mirror, one for source
  9328  	n, _, _ := sub.Pending()
  9329  	require_Equal(t, n, 2)
  9330  	var mreq, sreq int
  9331  	for i := 0; i < 2; i++ {
  9332  		msg := natsNexMsg(t, sub, time.Second)
  9333  		if bytes.Contains(msg.Data, []byte("$JS.M.")) {
  9334  			mreq++
  9335  		} else if bytes.Contains(msg.Data, []byte("$JS.S.")) {
  9336  			sreq++
  9337  		}
  9338  	}
  9339  	if mreq != 1 || sreq != 1 {
  9340  		t.Fatalf("Consumer create captures invalid: mreq=%v sreq=%v", mreq, sreq)
  9341  	}
  9342  
  9343  	// Now make sure that the fails is set properly.
  9344  	mset, err := c.streamLeader(globalAccountName, "MIRROR").GlobalAccount().lookupStream("MIRROR")
  9345  	require_NoError(t, err)
  9346  	mset.mu.RLock()
  9347  	fails := mset.mirror.fails
  9348  	mset.mu.RUnlock()
  9349  	require_Equal(t, fails, 1)
  9350  
  9351  	mset, err = c.streamLeader(globalAccountName, "SOURCE").GlobalAccount().lookupStream("SOURCE")
  9352  	require_NoError(t, err)
  9353  	mset.mu.RLock()
  9354  	si := mset.sources["TEST > >"]
  9355  	mset.mu.RUnlock()
  9356  	require_True(t, si != nil)
  9357  	require_Equal(t, si.fails, 1)
  9358  }
  9359  
  9360  func TestNoRaceJetStreamClusterStreamCatchupLargeInteriorDeletes(t *testing.T) {
  9361  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9362  	defer c.shutdown()
  9363  
  9364  	nc, js := jsClientConnect(t, c.randomServer())
  9365  	defer nc.Close()
  9366  
  9367  	cfg := &nats.StreamConfig{
  9368  		Name:              "TEST",
  9369  		Subjects:          []string{"foo.*"},
  9370  		MaxMsgsPerSubject: 100,
  9371  		Replicas:          1,
  9372  	}
  9373  
  9374  	_, err := js.AddStream(cfg)
  9375  	require_NoError(t, err)
  9376  
  9377  	msg := bytes.Repeat([]byte("Z"), 2*1024)
  9378  	// We will create lots of interior deletes on our R1 then scale up.
  9379  	_, err = js.Publish("foo.0", msg)
  9380  	require_NoError(t, err)
  9381  
  9382  	// Create 50k messages randomly from 1-100
  9383  	for i := 0; i < 50_000; i++ {
  9384  		subj := fmt.Sprintf("foo.%d", rand.Intn(100)+1)
  9385  		js.PublishAsync(subj, msg)
  9386  	}
  9387  	select {
  9388  	case <-js.PublishAsyncComplete():
  9389  	case <-time.After(5 * time.Second):
  9390  		t.Fatalf("Did not receive completion signal")
  9391  	}
  9392  	// Now create a large gap.
  9393  	for i := 0; i < 100_000; i++ {
  9394  		js.PublishAsync("foo.2", msg)
  9395  	}
  9396  	select {
  9397  	case <-js.PublishAsyncComplete():
  9398  	case <-time.After(5 * time.Second):
  9399  		t.Fatalf("Did not receive completion signal")
  9400  	}
  9401  	// Do 50k random again at end.
  9402  	for i := 0; i < 50_000; i++ {
  9403  		subj := fmt.Sprintf("foo.%d", rand.Intn(100)+1)
  9404  		js.PublishAsync(subj, msg)
  9405  	}
  9406  	select {
  9407  	case <-js.PublishAsyncComplete():
  9408  	case <-time.After(5 * time.Second):
  9409  		t.Fatalf("Did not receive completion signal")
  9410  	}
  9411  
  9412  	si, err := js.StreamInfo("TEST")
  9413  	require_NoError(t, err)
  9414  
  9415  	cfg.Replicas = 2
  9416  	_, err = js.UpdateStream(cfg)
  9417  	require_NoError(t, err)
  9418  
  9419  	// Let catchup start.
  9420  	c.waitOnStreamLeader(globalAccountName, "TEST")
  9421  
  9422  	nl := c.randomNonStreamLeader(globalAccountName, "TEST")
  9423  	require_True(t, nl != nil)
  9424  	mset, err := nl.GlobalAccount().lookupStream("TEST")
  9425  	require_NoError(t, err)
  9426  
  9427  	checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
  9428  		state := mset.state()
  9429  		if state.Msgs == si.State.Msgs {
  9430  			return nil
  9431  		}
  9432  		return fmt.Errorf("Msgs not equal %d vs %d", state.Msgs, si.State.Msgs)
  9433  	})
  9434  }
  9435  
  9436  func TestNoRaceJetStreamClusterBadRestartsWithHealthzPolling(t *testing.T) {
  9437  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9438  	defer c.shutdown()
  9439  
  9440  	nc, js := jsClientConnect(t, c.randomServer())
  9441  	defer nc.Close()
  9442  
  9443  	cfg := &nats.StreamConfig{
  9444  		Name:     "TEST",
  9445  		Subjects: []string{"foo.>"},
  9446  		Replicas: 3,
  9447  	}
  9448  	_, err := js.AddStream(cfg)
  9449  	require_NoError(t, err)
  9450  
  9451  	// We will poll healthz at a decent clip and make sure any restart logic works
  9452  	// correctly with assets coming and going.
  9453  	ch := make(chan struct{})
  9454  	defer close(ch)
  9455  
  9456  	go func() {
  9457  		for {
  9458  			select {
  9459  			case <-ch:
  9460  				return
  9461  			case <-time.After(50 * time.Millisecond):
  9462  				for _, s := range c.servers {
  9463  					s.healthz(nil)
  9464  				}
  9465  			}
  9466  		}
  9467  	}()
  9468  
  9469  	numConsumers := 500
  9470  	consumers := make([]string, 0, numConsumers)
  9471  
  9472  	var wg sync.WaitGroup
  9473  
  9474  	for i := 0; i < numConsumers; i++ {
  9475  		cname := fmt.Sprintf("CONS-%d", i+1)
  9476  		consumers = append(consumers, cname)
  9477  		wg.Add(1)
  9478  		go func() {
  9479  			defer wg.Done()
  9480  			_, err := js.PullSubscribe("foo.>", cname, nats.BindStream("TEST"))
  9481  			require_NoError(t, err)
  9482  		}()
  9483  	}
  9484  	wg.Wait()
  9485  
  9486  	// Make sure all are reported.
  9487  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  9488  		for _, s := range c.servers {
  9489  			jsz, _ := s.Jsz(nil)
  9490  			if jsz.Consumers != numConsumers {
  9491  				return fmt.Errorf("%v wrong number of consumers: %d vs %d", s, jsz.Consumers, numConsumers)
  9492  			}
  9493  		}
  9494  		return nil
  9495  	})
  9496  
  9497  	// Now do same for streams.
  9498  	numStreams := 200
  9499  	streams := make([]string, 0, numStreams)
  9500  
  9501  	for i := 0; i < numStreams; i++ {
  9502  		sname := fmt.Sprintf("TEST-%d", i+1)
  9503  		streams = append(streams, sname)
  9504  		wg.Add(1)
  9505  		go func() {
  9506  			defer wg.Done()
  9507  			_, err := js.AddStream(&nats.StreamConfig{Name: sname, Replicas: 3})
  9508  			require_NoError(t, err)
  9509  		}()
  9510  	}
  9511  	wg.Wait()
  9512  
  9513  	// Make sure all are reported.
  9514  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  9515  		for _, s := range c.servers {
  9516  			jsz, _ := s.Jsz(nil)
  9517  			if jsz.Streams != numStreams+1 {
  9518  				return fmt.Errorf("%v wrong number of streams: %d vs %d", s, jsz.Streams, numStreams+1)
  9519  			}
  9520  		}
  9521  		return nil
  9522  	})
  9523  
  9524  	// Delete consumers.
  9525  	for _, cname := range consumers {
  9526  		err := js.DeleteConsumer("TEST", cname)
  9527  		require_NoError(t, err)
  9528  	}
  9529  	// Make sure reporting goes to zero.
  9530  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  9531  		for _, s := range c.servers {
  9532  			jsz, _ := s.Jsz(nil)
  9533  			if jsz.Consumers != 0 {
  9534  				return fmt.Errorf("%v still has %d consumers", s, jsz.Consumers)
  9535  			}
  9536  		}
  9537  		return nil
  9538  	})
  9539  
  9540  	// Delete streams
  9541  	for _, sname := range streams {
  9542  		err := js.DeleteStream(sname)
  9543  		require_NoError(t, err)
  9544  	}
  9545  	err = js.DeleteStream("TEST")
  9546  	require_NoError(t, err)
  9547  
  9548  	// Make sure reporting goes to zero.
  9549  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  9550  		for _, s := range c.servers {
  9551  			jsz, _ := s.Jsz(nil)
  9552  			if jsz.Streams != 0 {
  9553  				return fmt.Errorf("%v still has %d streams", s, jsz.Streams)
  9554  			}
  9555  		}
  9556  		return nil
  9557  	})
  9558  }
  9559  
  9560  func TestNoRaceJetStreamKVReplaceWithServerRestart(t *testing.T) {
  9561  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9562  	defer c.shutdown()
  9563  
  9564  	nc, _ := jsClientConnect(t, c.randomServer())
  9565  	defer nc.Close()
  9566  	// Shorten wait time for disconnects.
  9567  	js, err := nc.JetStream(nats.MaxWait(time.Second))
  9568  	require_NoError(t, err)
  9569  
  9570  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  9571  		Bucket:   "TEST",
  9572  		Replicas: 3,
  9573  	})
  9574  	require_NoError(t, err)
  9575  
  9576  	createData := func(n int) []byte {
  9577  		const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
  9578  		b := make([]byte, n)
  9579  		for i := range b {
  9580  			b[i] = letterBytes[rand.Intn(len(letterBytes))]
  9581  		}
  9582  		return b
  9583  	}
  9584  
  9585  	_, err = kv.Create("foo", createData(160))
  9586  	require_NoError(t, err)
  9587  
  9588  	ch := make(chan struct{})
  9589  	wg := sync.WaitGroup{}
  9590  
  9591  	// For counting errors that should not happen.
  9592  	errCh := make(chan error, 1024)
  9593  
  9594  	wg.Add(1)
  9595  	go func() {
  9596  		defer wg.Done()
  9597  
  9598  		var lastData []byte
  9599  		var revision uint64
  9600  
  9601  		for {
  9602  			select {
  9603  			case <-ch:
  9604  				return
  9605  			default:
  9606  				k, err := kv.Get("foo")
  9607  				if err == nats.ErrKeyNotFound {
  9608  					errCh <- err
  9609  				} else if k != nil {
  9610  					if lastData != nil && k.Revision() == revision && !bytes.Equal(lastData, k.Value()) {
  9611  						errCh <- fmt.Errorf("data loss [%s][rev:%d] expected:[%q] is:[%q]\n", "foo", revision, lastData, k.Value())
  9612  					}
  9613  					newData := createData(160)
  9614  					if revision, err = kv.Update("foo", newData, k.Revision()); err == nil {
  9615  						lastData = newData
  9616  					}
  9617  				}
  9618  			}
  9619  		}
  9620  	}()
  9621  
  9622  	// Wait a short bit.
  9623  	time.Sleep(2 * time.Second)
  9624  	for _, s := range c.servers {
  9625  		s.Shutdown()
  9626  		// Need to leave servers down for awhile to trigger bug properly.
  9627  		time.Sleep(5 * time.Second)
  9628  		s = c.restartServer(s)
  9629  		c.waitOnServerHealthz(s)
  9630  	}
  9631  
  9632  	// Shutdown the go routine above.
  9633  	close(ch)
  9634  	// Wait for it to finish.
  9635  	wg.Wait()
  9636  
  9637  	if len(errCh) != 0 {
  9638  		for err := range errCh {
  9639  			t.Logf("Received err %v during test", err)
  9640  		}
  9641  		t.Fatalf("Encountered errors")
  9642  	}
  9643  }
  9644  
  9645  func TestNoRaceMemStoreCompactPerformance(t *testing.T) {
  9646  	//Load MemStore so that it is full
  9647  	subj, msg := "foo", make([]byte, 1000)
  9648  	storedMsgSize := memStoreMsgSize(subj, nil, msg)
  9649  
  9650  	toStore := uint64(10_000)
  9651  	toStoreOnTop := uint64(1_000)
  9652  	setSeqNo := uint64(10_000_000_000)
  9653  
  9654  	expectedPurge := toStore - 1
  9655  	maxBytes := storedMsgSize * toStore
  9656  
  9657  	ms, err := newMemStore(&StreamConfig{Storage: MemoryStorage, MaxBytes: int64(maxBytes)})
  9658  	require_NoError(t, err)
  9659  	defer ms.Stop()
  9660  
  9661  	for i := uint64(0); i < toStore; i++ {
  9662  		ms.StoreMsg(subj, nil, msg)
  9663  	}
  9664  	state := ms.State()
  9665  	require_Equal(t, toStore, state.Msgs)
  9666  	require_Equal(t, state.Bytes, storedMsgSize*toStore)
  9667  
  9668  	//1st run: Load additional messages then compact
  9669  	for i := uint64(0); i < toStoreOnTop; i++ {
  9670  		ms.StoreMsg(subj, nil, msg)
  9671  	}
  9672  	startFirstRun := time.Now()
  9673  	purgedFirstRun, _ := ms.Compact(toStore + toStoreOnTop)
  9674  	elapsedFirstRun := time.Since(startFirstRun)
  9675  	require_Equal(t, expectedPurge, purgedFirstRun)
  9676  
  9677  	//set the seq number to a very high value by compacting with a too high seq number
  9678  	purgedFull, _ := ms.Compact(setSeqNo)
  9679  	require_Equal(t, 1, purgedFull)
  9680  
  9681  	//2nd run: Compact again
  9682  	for i := uint64(0); i < toStore; i++ {
  9683  		ms.StoreMsg(subj, nil, msg)
  9684  	}
  9685  	startSecondRun := time.Now()
  9686  	purgedSecondRun, _ := ms.Compact(setSeqNo + toStore - 1)
  9687  	elapsedSecondRun := time.Since(startSecondRun)
  9688  	require_Equal(t, expectedPurge, purgedSecondRun)
  9689  
  9690  	//Calculate delta between runs and fail if it is too high
  9691  	require_LessThan(t, elapsedSecondRun-elapsedFirstRun, time.Duration(1)*time.Second)
  9692  }
  9693  
  9694  func TestNoRaceJetStreamSnapshotsWithSlowAckDontSlowConsumer(t *testing.T) {
  9695  	s := RunBasicJetStreamServer(t)
  9696  	defer s.Shutdown()
  9697  
  9698  	ech := make(chan error)
  9699  	ecb := func(_ *nats.Conn, _ *nats.Subscription, err error) {
  9700  		if err != nil {
  9701  			ech <- err
  9702  		}
  9703  	}
  9704  	nc, js := jsClientConnect(t, s, nats.ErrorHandler(ecb))
  9705  	defer nc.Close()
  9706  
  9707  	_, err := js.AddStream(&nats.StreamConfig{
  9708  		Name:     "TEST",
  9709  		Subjects: []string{"foo"},
  9710  	})
  9711  	require_NoError(t, err)
  9712  
  9713  	// Put in over 64MB.
  9714  	msg, toSend := make([]byte, 1024*1024), 80
  9715  	crand.Read(msg)
  9716  
  9717  	for i := 0; i < toSend; i++ {
  9718  		_, err := js.Publish("foo", msg)
  9719  		require_NoError(t, err)
  9720  	}
  9721  
  9722  	sreq := &JSApiStreamSnapshotRequest{
  9723  		DeliverSubject: nats.NewInbox(),
  9724  		ChunkSize:      1024 * 1024,
  9725  	}
  9726  	req, _ := json.Marshal(sreq)
  9727  	rmsg, err := nc.Request(fmt.Sprintf(JSApiStreamSnapshotT, "TEST"), req, time.Second)
  9728  	require_NoError(t, err)
  9729  
  9730  	var resp JSApiStreamSnapshotResponse
  9731  	json.Unmarshal(rmsg.Data, &resp)
  9732  	require_True(t, resp.Error == nil)
  9733  
  9734  	done := make(chan *nats.Msg)
  9735  	sub, _ := nc.Subscribe(sreq.DeliverSubject, func(m *nats.Msg) {
  9736  		// EOF
  9737  		if len(m.Data) == 0 {
  9738  			done <- m
  9739  			return
  9740  		}
  9741  	})
  9742  	defer sub.Unsubscribe()
  9743  
  9744  	// Check that we do not get disconnected due to slow consumer.
  9745  	select {
  9746  	case msg := <-done:
  9747  		require_Equal(t, msg.Header.Get("Status"), "408")
  9748  		require_Equal(t, msg.Header.Get("Description"), "No Flow Response")
  9749  	case <-ech:
  9750  		t.Fatalf("Got disconnected: %v", err)
  9751  	case <-time.After(5 * time.Second):
  9752  		t.Fatalf("Should have received EOF with error status")
  9753  	}
  9754  }
  9755  
  9756  func TestNoRaceJetStreamWQSkippedMsgsOnScaleUp(t *testing.T) {
  9757  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  9758  	defer c.shutdown()
  9759  
  9760  	nc, js := jsClientConnect(t, c.randomServer())
  9761  	defer nc.Close()
  9762  
  9763  	const pre = "CORE_ENT_DR_OTP_22."
  9764  	wcSubj := pre + ">"
  9765  
  9766  	_, err := js.AddStream(&nats.StreamConfig{
  9767  		Name:        "TEST",
  9768  		Subjects:    []string{wcSubj},
  9769  		Retention:   nats.WorkQueuePolicy,
  9770  		AllowDirect: true,
  9771  		Replicas:    3,
  9772  	})
  9773  	require_NoError(t, err)
  9774  
  9775  	cfg := &nats.ConsumerConfig{
  9776  		Durable:           "dlc",
  9777  		FilterSubject:     wcSubj,
  9778  		DeliverPolicy:     nats.DeliverAllPolicy,
  9779  		AckPolicy:         nats.AckExplicitPolicy,
  9780  		MaxAckPending:     10_000,
  9781  		AckWait:           500 * time.Millisecond,
  9782  		MaxWaiting:        100,
  9783  		MaxRequestExpires: 1050 * time.Millisecond,
  9784  	}
  9785  	_, err = js.AddConsumer("TEST", cfg)
  9786  	require_NoError(t, err)
  9787  
  9788  	pdone := make(chan bool)
  9789  	cdone := make(chan bool)
  9790  
  9791  	// We will have 51 consumer apps and a producer app. Make sure to wait for
  9792  	// all go routines to end at the end of the test.
  9793  	wg := sync.WaitGroup{}
  9794  	wg.Add(52)
  9795  
  9796  	// Publish routine
  9797  	go func() {
  9798  		defer wg.Done()
  9799  
  9800  		publishSubjects := []string{
  9801  			"CORE_ENT_DR_OTP_22.P.H.TC.10011.1010.918886682066",
  9802  			"CORE_ENT_DR_OTP_22.P.H.TC.10011.1010.918886682067",
  9803  			"CORE_ENT_DR_OTP_22.P.H.TC.10011.1010.916596543211",
  9804  			"CORE_ENT_DR_OTP_22.P.H.TC.10011.1010.916596543212",
  9805  			"CORE_ENT_DR_OTP_22.P.H.TC.10011.1010.916596543213",
  9806  			"CORE_ENT_DR_OTP_22.P.H.TC.10011.1010.916596543214",
  9807  			"CORE_ENT_DR_OTP_22.P.H.TC.10011.1010.916596543215",
  9808  			"CORE_ENT_DR_OTP_22.P.H.TC.10011.1010.916596543216",
  9809  			"CORE_ENT_DR_OTP_22.P.H.TC.10011.1010.916596543217",
  9810  		}
  9811  		// ~1.7kb
  9812  		msg := bytes.Repeat([]byte("Z"), 1750)
  9813  
  9814  		// 200 msgs/s
  9815  		st := time.NewTicker(5 * time.Millisecond)
  9816  		defer st.Stop()
  9817  
  9818  		nc, js := jsClientConnect(t, c.randomServer())
  9819  		defer nc.Close()
  9820  
  9821  		for {
  9822  			select {
  9823  			case <-st.C:
  9824  				subj := publishSubjects[rand.Intn(len(publishSubjects))]
  9825  				_, err = js.Publish(subj, msg)
  9826  				require_NoError(t, err)
  9827  			case <-pdone:
  9828  				return
  9829  			}
  9830  		}
  9831  	}()
  9832  
  9833  	consumerApp := func() {
  9834  		defer wg.Done()
  9835  
  9836  		nc, js := jsClientConnect(t, c.randomServer())
  9837  		defer nc.Close()
  9838  
  9839  		_, err := js.ConsumerInfo("TEST", "dlc")
  9840  		require_NoError(t, err)
  9841  		_, err = js.UpdateConsumer("TEST", cfg)
  9842  		require_NoError(t, err)
  9843  
  9844  		sub, err := js.PullSubscribe(wcSubj, "dlc")
  9845  		require_NoError(t, err)
  9846  
  9847  		st := time.NewTicker(100 * time.Millisecond)
  9848  		defer st.Stop()
  9849  
  9850  		for {
  9851  			select {
  9852  			case <-st.C:
  9853  				msgs, err := sub.Fetch(1, nats.MaxWait(100*time.Millisecond))
  9854  				if err != nil {
  9855  					continue
  9856  				}
  9857  				require_Equal(t, len(msgs), 1)
  9858  				m := msgs[0]
  9859  				if rand.Intn(10) == 1 {
  9860  					m.Nak()
  9861  				} else {
  9862  					// Wait up to 20ms to ack.
  9863  					time.Sleep(time.Duration(rand.Intn(20)) * time.Millisecond)
  9864  					// This could fail and that is ok, system should recover due to low ack wait.
  9865  					m.Ack()
  9866  				}
  9867  			case <-cdone:
  9868  				return
  9869  			}
  9870  		}
  9871  	}
  9872  
  9873  	// Now consumer side single.
  9874  	go consumerApp()
  9875  
  9876  	// Wait for 2s
  9877  	time.Sleep(2 * time.Second)
  9878  
  9879  	// Now spin up 50 more.
  9880  	for i := 1; i <= 50; i++ {
  9881  		if i%5 == 0 {
  9882  			time.Sleep(200 * time.Millisecond)
  9883  		}
  9884  		go consumerApp()
  9885  	}
  9886  
  9887  	timeout := time.Now().Add(8 * time.Second)
  9888  	for time.Now().Before(timeout) {
  9889  		time.Sleep(750 * time.Millisecond)
  9890  		if s := c.consumerLeader(globalAccountName, "TEST", "dlc"); s != nil {
  9891  			s.JetStreamStepdownConsumer(globalAccountName, "TEST", "dlc")
  9892  		}
  9893  	}
  9894  
  9895  	// Close publishers and defer closing consumers.
  9896  	close(pdone)
  9897  	defer func() {
  9898  		close(cdone)
  9899  		wg.Wait()
  9900  	}()
  9901  
  9902  	checkFor(t, 30*time.Second, 50*time.Millisecond, func() error {
  9903  		si, err := js.StreamInfo("TEST")
  9904  		require_NoError(t, err)
  9905  		if si.State.NumDeleted > 0 || si.State.Msgs > 0 {
  9906  			return fmt.Errorf("State not correct: %+v", si.State)
  9907  		}
  9908  		return nil
  9909  	})
  9910  }
  9911  
  9912  func TestNoRaceConnectionObjectReleased(t *testing.T) {
  9913  	ob1Conf := createConfFile(t, []byte(`
  9914  		listen: "127.0.0.1:-1"
  9915  		server_name: "B1"
  9916  		accounts {
  9917  			A { users: [{user: a, password: pwd}] }
  9918  			SYS { users: [{user: sys, password: pwd}] }
  9919  		}
  9920  		cluster {
  9921  			name: "B"
  9922  			listen: "127.0.0.1:-1"
  9923  		}
  9924  		gateway {
  9925  			name: "B"
  9926  			listen: "127.0.0.1:-1"
  9927  		}
  9928  		leaf {
  9929  			listen: "127.0.0.1:-1"
  9930  		}
  9931  		system_account: "SYS"
  9932  	`))
  9933  	sb1, ob1 := RunServerWithConfig(ob1Conf)
  9934  	defer sb1.Shutdown()
  9935  
  9936  	oaConf := createConfFile(t, []byte(fmt.Sprintf(`
  9937  		listen: "127.0.0.1:-1"
  9938  		server_name: "A"
  9939  		accounts {
  9940  			A { users: [{user: a, password: pwd}] }
  9941  			SYS { users: [{user: sys, password: pwd}] }
  9942  		}
  9943  		gateway {
  9944  			name: "A"
  9945  			listen: "127.0.0.1:-1"
  9946  			gateways [
  9947  				{
  9948  					name: "B"
  9949  					url: "nats://a:pwd@127.0.0.1:%d"
  9950  				}
  9951  			]
  9952  		}
  9953  		websocket {
  9954  			listen: "127.0.0.1:-1"
  9955  			no_tls: true
  9956  		}
  9957  		system_account: "SYS"
  9958  	`, ob1.Gateway.Port)))
  9959  	sa, oa := RunServerWithConfig(oaConf)
  9960  	defer sa.Shutdown()
  9961  
  9962  	waitForOutboundGateways(t, sa, 1, 2*time.Second)
  9963  	waitForOutboundGateways(t, sb1, 1, 2*time.Second)
  9964  
  9965  	ob2Conf := createConfFile(t, []byte(fmt.Sprintf(`
  9966  		listen: "127.0.0.1:-1"
  9967  		server_name: "B2"
  9968  		accounts {
  9969  			A { users: [{user: a, password: pwd}] }
  9970  			SYS { users: [{user: sys, password: pwd}] }
  9971  		}
  9972  		cluster {
  9973  			name: "B"
  9974  			listen: "127.0.0.1:-1"
  9975  			routes: ["nats://127.0.0.1:%d"]
  9976  		}
  9977  		gateway {
  9978  			name: "B"
  9979  			listen: "127.0.0.1:-1"
  9980  		}
  9981  		system_account: "SYS"
  9982  	`, ob1.Cluster.Port)))
  9983  	sb2, _ := RunServerWithConfig(ob2Conf)
  9984  	defer sb2.Shutdown()
  9985  
  9986  	checkClusterFormed(t, sb1, sb2)
  9987  	waitForOutboundGateways(t, sb2, 1, 2*time.Second)
  9988  	waitForInboundGateways(t, sa, 2, 2*time.Second)
  9989  
  9990  	leafConf := createConfFile(t, []byte(fmt.Sprintf(`
  9991  		listen: "127.0.0.1:-1"
  9992  		server_name: "C"
  9993  		accounts {
  9994  			A { users: [{user: a, password: pwd}] }
  9995  			SYS { users: [{user: sys, password: pwd}] }
  9996  		}
  9997  		leafnodes {
  9998  			remotes [
  9999  				{ url: "nats://a:pwd@127.0.0.1:%d" }
 10000  			]
 10001  		}
 10002  		system_account: "SYS"
 10003  	`, ob1.LeafNode.Port)))
 10004  	leaf, _ := RunServerWithConfig(leafConf)
 10005  	defer leaf.Shutdown()
 10006  
 10007  	checkLeafNodeConnected(t, leaf)
 10008  
 10009  	// Start an independent MQTT server to check MQTT client connection.
 10010  	mo := testMQTTDefaultOptions()
 10011  	sm := testMQTTRunServer(t, mo)
 10012  	defer testMQTTShutdownServer(sm)
 10013  
 10014  	mc, mr := testMQTTConnect(t, &mqttConnInfo{cleanSess: true}, mo.MQTT.Host, mo.MQTT.Port)
 10015  	defer mc.Close()
 10016  	testMQTTCheckConnAck(t, mr, mqttConnAckRCConnectionAccepted, false)
 10017  
 10018  	nc := natsConnect(t, sb1.ClientURL(), nats.UserInfo("a", "pwd"))
 10019  	defer nc.Close()
 10020  	cid, err := nc.GetClientID()
 10021  	require_NoError(t, err)
 10022  	natsSubSync(t, nc, "foo")
 10023  
 10024  	ncWS := natsConnect(t, fmt.Sprintf("ws://a:pwd@127.0.0.1:%d", oa.Websocket.Port))
 10025  	defer ncWS.Close()
 10026  	cidWS, err := ncWS.GetClientID()
 10027  	require_NoError(t, err)
 10028  
 10029  	var conns []net.Conn
 10030  	var total int
 10031  	var ch chan string
 10032  
 10033  	track := func(c *client) {
 10034  		total++
 10035  		c.mu.Lock()
 10036  		conns = append(conns, c.nc)
 10037  		c.mu.Unlock()
 10038  		runtime.SetFinalizer(c, func(c *client) {
 10039  			ch <- fmt.Sprintf("Server=%s - Kind=%s - Conn=%v", c.srv, c.kindString(), c)
 10040  		})
 10041  	}
 10042  	// Track the connection for the MQTT client
 10043  	sm.mu.RLock()
 10044  	for _, c := range sm.clients {
 10045  		track(c)
 10046  	}
 10047  	sm.mu.RUnlock()
 10048  
 10049  	// Track the connection from the NATS client
 10050  	track(sb1.getClient(cid))
 10051  	// The outbound connection to GW "A"
 10052  	track(sb1.getOutboundGatewayConnection("A"))
 10053  	// The inbound connection from GW "A"
 10054  	var inGW []*client
 10055  	sb1.getInboundGatewayConnections(&inGW)
 10056  	track(inGW[0])
 10057  	// The routes from sb2
 10058  	sb1.forEachRoute(func(r *client) {
 10059  		track(r)
 10060  	})
 10061  	// The leaf form "LEAF"
 10062  	sb1.mu.RLock()
 10063  	for _, l := range sb1.leafs {
 10064  		track(l)
 10065  	}
 10066  	sb1.mu.RUnlock()
 10067  
 10068  	// Now from sb2, the routes to sb1
 10069  	sb2.forEachRoute(func(r *client) {
 10070  		track(r)
 10071  	})
 10072  	// The outbound connection to GW "A"
 10073  	track(sb2.getOutboundGatewayConnection("A"))
 10074  
 10075  	// From server "A", track the outbound GW
 10076  	track(sa.getOutboundGatewayConnection("B"))
 10077  	inGW = inGW[:0]
 10078  	// Track the inbound GW connections
 10079  	sa.getInboundGatewayConnections(&inGW)
 10080  	for _, ig := range inGW {
 10081  		track(ig)
 10082  	}
 10083  	// Track the websocket client
 10084  	track(sa.getClient(cidWS))
 10085  
 10086  	// From the LEAF server, the connection to sb1
 10087  	leaf.mu.RLock()
 10088  	for _, l := range leaf.leafs {
 10089  		track(l)
 10090  	}
 10091  	leaf.mu.RUnlock()
 10092  
 10093  	// Now close all connections and wait to see if all connections
 10094  	// with the finalizer set is invoked.
 10095  	ch = make(chan string, total)
 10096  	// Close the clients and then all other connections to create a disconnect.
 10097  	nc.Close()
 10098  	mc.Close()
 10099  	ncWS.Close()
 10100  	for _, conn := range conns {
 10101  		conn.Close()
 10102  	}
 10103  	// Wait and see if we get them all.
 10104  	tm := time.NewTimer(10 * time.Second)
 10105  	defer tm.Stop()
 10106  	tk := time.NewTicker(10 * time.Millisecond)
 10107  	for clients := make([]string, 0, total); len(clients) < total; {
 10108  		select {
 10109  		case <-tk.C:
 10110  			runtime.GC()
 10111  		case cs := <-ch:
 10112  			clients = append(clients, cs)
 10113  		case <-tm.C:
 10114  			// Don't fail the test since there is no guarantee that
 10115  			// finalizers are invoked.
 10116  			t.Logf("Got %v out of %v finalizers", len(clients), total)
 10117  			sort.Strings(clients)
 10118  			for _, cs := range clients {
 10119  				t.Logf("  => %s", cs)
 10120  			}
 10121  			return
 10122  		}
 10123  	}
 10124  }
 10125  
 10126  func TestNoRaceFileStoreMsgLoadNextMsgMultiPerf(t *testing.T) {
 10127  	fs, err := newFileStore(
 10128  		FileStoreConfig{StoreDir: t.TempDir()},
 10129  		StreamConfig{Name: "zzz", Subjects: []string{"foo.*"}, Storage: FileStorage})
 10130  	require_NoError(t, err)
 10131  	defer fs.Stop()
 10132  
 10133  	// Put 1k msgs in
 10134  	for i := 0; i < 1000; i++ {
 10135  		subj := fmt.Sprintf("foo.%d", i)
 10136  		fs.StoreMsg(subj, nil, []byte("ZZZ"))
 10137  	}
 10138  
 10139  	var smv StoreMsg
 10140  
 10141  	// Now do normal load next with no filter.
 10142  	// This is baseline.
 10143  	start := time.Now()
 10144  	for i, seq := 0, uint64(1); i < 1000; i++ {
 10145  		sm, nseq, err := fs.LoadNextMsg(_EMPTY_, false, seq, &smv)
 10146  		require_NoError(t, err)
 10147  		require_True(t, sm.subj == fmt.Sprintf("foo.%d", i))
 10148  		require_Equal(t, nseq, seq)
 10149  		seq++
 10150  	}
 10151  	baseline := time.Since(start)
 10152  	t.Logf("Single - No filter %v", baseline)
 10153  
 10154  	// Now do normal load next with wc filter.
 10155  	start = time.Now()
 10156  	for i, seq := 0, uint64(1); i < 1000; i++ {
 10157  		sm, nseq, err := fs.LoadNextMsg("foo.>", true, seq, &smv)
 10158  		require_NoError(t, err)
 10159  		require_True(t, sm.subj == fmt.Sprintf("foo.%d", i))
 10160  		require_Equal(t, nseq, seq)
 10161  		seq++
 10162  	}
 10163  	elapsed := time.Since(start)
 10164  	require_True(t, elapsed < 2*baseline)
 10165  	t.Logf("Single - WC filter %v", elapsed)
 10166  
 10167  	// Now do multi load next with 1 wc entry.
 10168  	sl := NewSublistWithCache()
 10169  	sl.Insert(&subscription{subject: []byte("foo.>")})
 10170  	start = time.Now()
 10171  	for i, seq := 0, uint64(1); i < 1000; i++ {
 10172  		sm, nseq, err := fs.LoadNextMsgMulti(sl, seq, &smv)
 10173  		require_NoError(t, err)
 10174  		require_True(t, sm.subj == fmt.Sprintf("foo.%d", i))
 10175  		require_Equal(t, nseq, seq)
 10176  		seq++
 10177  	}
 10178  	elapsed = time.Since(start)
 10179  	require_True(t, elapsed < 2*baseline)
 10180  	t.Logf("Multi - Single WC filter %v", elapsed)
 10181  
 10182  	// Now do multi load next with 1000 literal subjects.
 10183  	sl = NewSublistWithCache()
 10184  	for i := 0; i < 1000; i++ {
 10185  		subj := fmt.Sprintf("foo.%d", i)
 10186  		sl.Insert(&subscription{subject: []byte(subj)})
 10187  	}
 10188  	start = time.Now()
 10189  	for i, seq := 0, uint64(1); i < 1000; i++ {
 10190  		sm, nseq, err := fs.LoadNextMsgMulti(sl, seq, &smv)
 10191  		require_NoError(t, err)
 10192  		require_True(t, sm.subj == fmt.Sprintf("foo.%d", i))
 10193  		require_Equal(t, nseq, seq)
 10194  		seq++
 10195  	}
 10196  	elapsed = time.Since(start)
 10197  	require_True(t, elapsed < 2*baseline)
 10198  	t.Logf("Multi - 1000 filters %v", elapsed)
 10199  }
 10200  
 10201  func TestNoRaceWQAndMultiSubjectFilters(t *testing.T) {
 10202  	c := createJetStreamClusterExplicit(t, "R3S", 3)
 10203  	defer c.shutdown()
 10204  
 10205  	nc, js := jsClientConnect(t, c.randomServer())
 10206  	defer nc.Close()
 10207  
 10208  	_, err := js.AddStream(&nats.StreamConfig{
 10209  		Name:      "TEST",
 10210  		Subjects:  []string{"Z.>"},
 10211  		Retention: nats.WorkQueuePolicy,
 10212  	})
 10213  	require_NoError(t, err)
 10214  
 10215  	stopPubs := make(chan bool)
 10216  
 10217  	publish := func(subject string) {
 10218  		nc, js := jsClientConnect(t, c.randomServer())
 10219  		defer nc.Close()
 10220  
 10221  		for {
 10222  			select {
 10223  			case <-stopPubs:
 10224  				return
 10225  			default:
 10226  				_, _ = js.Publish(subject, []byte("hello"))
 10227  			}
 10228  		}
 10229  	}
 10230  
 10231  	go publish("Z.foo")
 10232  	go publish("Z.bar")
 10233  	go publish("Z.baz")
 10234  
 10235  	// Cancel pubs after 10s.
 10236  	time.AfterFunc(10*time.Second, func() { close(stopPubs) })
 10237  
 10238  	// Create a consumer
 10239  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
 10240  		Durable:        "zzz",
 10241  		AckPolicy:      nats.AckExplicitPolicy,
 10242  		AckWait:        5 * time.Second,
 10243  		FilterSubjects: []string{"Z.foo", "Z.bar", "Z.baz"},
 10244  	})
 10245  	require_NoError(t, err)
 10246  
 10247  	sub, err := js.PullSubscribe(_EMPTY_, "zzz", nats.Bind("TEST", "zzz"))
 10248  	require_NoError(t, err)
 10249  
 10250  	received := make([]uint64, 0, 256_000)
 10251  	batchSize := 10
 10252  
 10253  	for running := true; running; {
 10254  		msgs, err := sub.Fetch(batchSize, nats.MaxWait(2*time.Second))
 10255  		if err == nats.ErrTimeout {
 10256  			running = false
 10257  		}
 10258  		for _, m := range msgs {
 10259  			meta, err := m.Metadata()
 10260  			require_NoError(t, err)
 10261  			received = append(received, meta.Sequence.Stream)
 10262  			m.Ack()
 10263  		}
 10264  	}
 10265  
 10266  	sort.Slice(received, func(i, j int) bool { return received[i] < received[j] })
 10267  
 10268  	var pseq, gaps uint64
 10269  	for _, seq := range received {
 10270  		if pseq != 0 && pseq != seq-1 {
 10271  			gaps += seq - pseq + 1
 10272  		}
 10273  		pseq = seq
 10274  	}
 10275  	si, err := js.StreamInfo("TEST")
 10276  	require_NoError(t, err)
 10277  
 10278  	if si.State.Msgs != 0 || gaps > 0 {
 10279  		t.Fatalf("Orphaned msgs %d with %d gaps detected", si.State.Msgs, gaps)
 10280  	}
 10281  }
 10282  
 10283  // https://github.com/nats-io/nats-server/issues/4957
 10284  func TestNoRaceWQAndMultiSubjectFiltersRace(t *testing.T) {
 10285  	c := createJetStreamClusterExplicit(t, "R3S", 3)
 10286  	defer c.shutdown()
 10287  
 10288  	nc, js := jsClientConnect(t, c.randomServer())
 10289  	defer nc.Close()
 10290  
 10291  	_, err := js.AddStream(&nats.StreamConfig{
 10292  		Name:      "TEST",
 10293  		Subjects:  []string{"Z.>"},
 10294  		Retention: nats.WorkQueuePolicy,
 10295  		Replicas:  1,
 10296  	})
 10297  	require_NoError(t, err)
 10298  
 10299  	// The bug would happen when the stream was on same server as meta-leader.
 10300  	// So make that so.
 10301  	// Make sure stream leader is on S-1
 10302  	sl := c.streamLeader(globalAccountName, "TEST")
 10303  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
 10304  		if sl == c.leader() {
 10305  			return nil
 10306  		}
 10307  		// Move meta-leader since stream can be R1.
 10308  		nc.Request(JSApiLeaderStepDown, nil, time.Second)
 10309  		return fmt.Errorf("stream leader on meta-leader")
 10310  	})
 10311  
 10312  	start := make(chan struct{})
 10313  	var done, ready sync.WaitGroup
 10314  
 10315  	// Create num go routines who will all race to create a consumer with the same filter subject but a different name.
 10316  	num := 10
 10317  	ready.Add(num)
 10318  	done.Add(num)
 10319  
 10320  	for i := 0; i < num; i++ {
 10321  		go func(n int) {
 10322  			// Connect directly to the meta leader but with our own connection.
 10323  			s := c.leader()
 10324  			nc, js := jsClientConnect(t, s)
 10325  			defer nc.Close()
 10326  
 10327  			ready.Done()
 10328  			defer done.Done()
 10329  			<-start
 10330  
 10331  			js.AddConsumer("TEST", &nats.ConsumerConfig{
 10332  				Name:          fmt.Sprintf("C-%d", n),
 10333  				FilterSubject: "Z.foo",
 10334  				AckPolicy:     nats.AckExplicitPolicy,
 10335  			})
 10336  		}(i)
 10337  	}
 10338  
 10339  	// Wait for requestors to be ready
 10340  	ready.Wait()
 10341  	close(start)
 10342  	done.Wait()
 10343  
 10344  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
 10345  		si, err := js.StreamInfo("TEST")
 10346  		require_NoError(t, err)
 10347  		if si.State.Consumers != 1 {
 10348  			return fmt.Errorf("Consumer count not correct: %d vs 1", si.State.Consumers)
 10349  		}
 10350  		return nil
 10351  	})
 10352  }
 10353  
 10354  func TestNoRaceFileStoreWriteFullStateUniqueSubjects(t *testing.T) {
 10355  	fcfg := FileStoreConfig{StoreDir: t.TempDir()}
 10356  	fs, err := newFileStore(fcfg,
 10357  		StreamConfig{Name: "zzz", Subjects: []string{"records.>"}, Storage: FileStorage, MaxMsgsPer: 1, MaxBytes: 15 * 1024 * 1024 * 1024})
 10358  	require_NoError(t, err)
 10359  	defer fs.Stop()
 10360  
 10361  	qch := make(chan struct{})
 10362  	defer close(qch)
 10363  
 10364  	go func() {
 10365  		const numThreshold = 1_000_000
 10366  		tick := time.NewTicker(1 * time.Second)
 10367  		for {
 10368  			select {
 10369  			case <-qch:
 10370  				return
 10371  			case <-tick.C:
 10372  				err := fs.writeFullState()
 10373  				var state StreamState
 10374  				fs.FastState(&state)
 10375  				if state.Msgs > numThreshold && err != nil {
 10376  					require_Error(t, err, errStateTooBig)
 10377  				}
 10378  			}
 10379  		}
 10380  	}()
 10381  
 10382  	labels := []string{"AAAAA", "BBBB", "CCCC", "DD", "EEEEE"}
 10383  	msg := []byte(strings.Repeat("Z", 128))
 10384  
 10385  	for i := 0; i < 100; i++ {
 10386  		partA := nuid.Next()
 10387  		for j := 0; j < 100; j++ {
 10388  			partB := nuid.Next()
 10389  			for k := 0; k < 500; k++ {
 10390  				partC := nuid.Next()
 10391  				partD := labels[rand.Intn(len(labels)-1)]
 10392  				subject := fmt.Sprintf("records.%s.%s.%s.%s.%s", partA, partB, partC, partD, nuid.Next())
 10393  				start := time.Now()
 10394  				fs.StoreMsg(subject, nil, msg)
 10395  				elapsed := time.Since(start)
 10396  				if elapsed > 500*time.Millisecond {
 10397  					t.Fatalf("Slow store for %q: %v\n", subject, elapsed)
 10398  				}
 10399  			}
 10400  		}
 10401  	}
 10402  	// Make sure we do write the full state on stop.
 10403  	fs.Stop()
 10404  	fi, err := os.Stat(filepath.Join(fcfg.StoreDir, msgDir, streamStreamStateFile))
 10405  	require_NoError(t, err)
 10406  	// ~500MB, could change if we tweak encodings..
 10407  	require_True(t, fi.Size() > 500*1024*1024)
 10408  }