get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/server/jetstream_cluster_3_test.go (about)

     1  // Copyright 2022-2024 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  //go:build !skip_js_tests && !skip_js_cluster_tests_3
    15  // +build !skip_js_tests,!skip_js_cluster_tests_3
    16  
    17  package server
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/json"
    23  	"errors"
    24  	"fmt"
    25  	"math/rand"
    26  	"net"
    27  	"os"
    28  	"path/filepath"
    29  	"reflect"
    30  	"strings"
    31  	"sync"
    32  	"sync/atomic"
    33  	"testing"
    34  	"time"
    35  
    36  	"github.com/nats-io/jwt/v2"
    37  	"github.com/nats-io/nats.go"
    38  )
    39  
    40  func TestJetStreamClusterRemovePeerByID(t *testing.T) {
    41  	c := createJetStreamClusterExplicit(t, "R3S", 3)
    42  	defer c.shutdown()
    43  
    44  	s := c.randomNonLeader()
    45  	nc, js := jsClientConnect(t, s)
    46  	defer nc.Close()
    47  
    48  	_, err := js.AddStream(&nats.StreamConfig{
    49  		Name:     "TEST",
    50  		Subjects: []string{"foo", "bar"},
    51  		Replicas: 3,
    52  	})
    53  	require_NoError(t, err)
    54  
    55  	// Wait for a leader
    56  	c.waitOnStreamLeader(globalAccountName, "TEST")
    57  
    58  	// Get the name of the one that is not restarted
    59  	srvName := c.opts[2].ServerName
    60  	// And its node ID
    61  	peerID := c.servers[2].Node()
    62  
    63  	nc.Close()
    64  	// Now stop the whole cluster
    65  	c.stopAll()
    66  	// Restart all but one
    67  	for i := 0; i < 2; i++ {
    68  		opts := c.opts[i]
    69  		s, o := RunServerWithConfig(opts.ConfigFile)
    70  		c.servers[i] = s
    71  		c.opts[i] = o
    72  	}
    73  
    74  	c.waitOnClusterReadyWithNumPeers(2)
    75  	c.waitOnStreamLeader(globalAccountName, "TEST")
    76  
    77  	// Now attempt to remove by name, this should fail because the cluster
    78  	// was restarted and names are not persisted.
    79  	ml := c.leader()
    80  	nc, err = nats.Connect(ml.ClientURL(), nats.UserInfo("admin", "s3cr3t!"))
    81  	require_NoError(t, err)
    82  	defer nc.Close()
    83  
    84  	req := &JSApiMetaServerRemoveRequest{Server: srvName}
    85  	jsreq, err := json.Marshal(req)
    86  	require_NoError(t, err)
    87  	rmsg, err := nc.Request(JSApiRemoveServer, jsreq, 2*time.Second)
    88  	require_NoError(t, err)
    89  
    90  	var resp JSApiMetaServerRemoveResponse
    91  	err = json.Unmarshal(rmsg.Data, &resp)
    92  	require_NoError(t, err)
    93  	require_True(t, resp.Error != nil)
    94  	require_True(t, IsNatsErr(resp.Error, JSClusterServerNotMemberErr))
    95  
    96  	// Now try by ID, but first with an ID that does not match any peerID
    97  	req.Peer = "some_bad_id"
    98  	jsreq, err = json.Marshal(req)
    99  	require_NoError(t, err)
   100  	rmsg, err = nc.Request(JSApiRemoveServer, jsreq, 2*time.Second)
   101  	require_NoError(t, err)
   102  
   103  	resp = JSApiMetaServerRemoveResponse{}
   104  	err = json.Unmarshal(rmsg.Data, &resp)
   105  	require_NoError(t, err)
   106  	require_True(t, resp.Error != nil)
   107  	require_True(t, IsNatsErr(resp.Error, JSClusterServerNotMemberErr))
   108  
   109  	// Now with the proper peer ID
   110  	req.Peer = peerID
   111  	jsreq, err = json.Marshal(req)
   112  	require_NoError(t, err)
   113  	rmsg, err = nc.Request(JSApiRemoveServer, jsreq, 2*time.Second)
   114  	require_NoError(t, err)
   115  
   116  	resp = JSApiMetaServerRemoveResponse{}
   117  	err = json.Unmarshal(rmsg.Data, &resp)
   118  	require_NoError(t, err)
   119  	require_True(t, resp.Error == nil)
   120  	require_True(t, resp.Success)
   121  }
   122  
   123  func TestJetStreamClusterDiscardNewAndMaxMsgsPerSubject(t *testing.T) {
   124  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   125  	defer c.shutdown()
   126  
   127  	// Client for API requests.
   128  	s := c.randomNonLeader()
   129  	nc, js := jsClientConnect(t, s)
   130  	defer nc.Close()
   131  
   132  	for _, test := range []struct {
   133  		name     string
   134  		storage  StorageType
   135  		replicas int
   136  	}{
   137  		{"MEM-R1", MemoryStorage, 1},
   138  		{"FILE-R1", FileStorage, 1},
   139  		{"MEM-R3", MemoryStorage, 3},
   140  		{"FILE-R3", FileStorage, 3},
   141  	} {
   142  		t.Run(test.name, func(t *testing.T) {
   143  			js.DeleteStream("KV")
   144  			// Make sure setting new without DiscardPolicy also being new is error.
   145  			cfg := &StreamConfig{
   146  				Name:          "KV",
   147  				Subjects:      []string{"KV.>"},
   148  				Storage:       test.storage,
   149  				AllowDirect:   true,
   150  				DiscardNewPer: true,
   151  				MaxMsgs:       10,
   152  				Replicas:      test.replicas,
   153  			}
   154  			if _, apiErr := addStreamWithError(t, nc, cfg); apiErr == nil {
   155  				t.Fatalf("Expected API error but got none")
   156  			} else if apiErr.ErrCode != 10052 || !strings.Contains(apiErr.Description, "discard new per subject requires discard new policy") {
   157  				t.Fatalf("Got wrong error: %+v", apiErr)
   158  			}
   159  
   160  			// Set broad discard new policy to engage DiscardNewPer
   161  			cfg.Discard = DiscardNew
   162  			// We should also error here since we have not setup max msgs per subject.
   163  			if _, apiErr := addStreamWithError(t, nc, cfg); apiErr == nil {
   164  				t.Fatalf("Expected API error but got none")
   165  			} else if apiErr.ErrCode != 10052 || !strings.Contains(apiErr.Description, "discard new per subject requires max msgs per subject > 0") {
   166  				t.Fatalf("Got wrong error: %+v", apiErr)
   167  			}
   168  
   169  			cfg.MaxMsgsPer = 1
   170  			addStream(t, nc, cfg)
   171  
   172  			// We want to test that we reject new messages on a per subject basis if the
   173  			// max msgs per subject limit has been hit, even if other limits have not.
   174  			_, err := js.Publish("KV.foo", nil)
   175  			require_NoError(t, err)
   176  
   177  			_, err = js.Publish("KV.foo", nil)
   178  			// Go client does not have const for this one.
   179  			require_Error(t, err, errors.New("nats: maximum messages per subject exceeded"))
   180  		})
   181  	}
   182  }
   183  
   184  func TestJetStreamClusterCreateConsumerWithReplicaOneGetsResponse(t *testing.T) {
   185  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   186  	defer c.shutdown()
   187  
   188  	s := c.randomNonLeader()
   189  	nc, js := jsClientConnect(t, s)
   190  	defer nc.Close()
   191  
   192  	_, err := js.AddStream(&nats.StreamConfig{
   193  		Name:     "TEST",
   194  		Subjects: []string{"foo"},
   195  		Replicas: 3,
   196  	})
   197  	require_NoError(t, err)
   198  
   199  	c.waitOnStreamLeader(globalAccountName, "TEST")
   200  
   201  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
   202  		Durable:   "C3",
   203  		AckPolicy: nats.AckExplicitPolicy,
   204  	})
   205  	require_NoError(t, err)
   206  
   207  	c.waitOnConsumerLeader(globalAccountName, "TEST", "C3")
   208  
   209  	// Update to scale down to R1, that should work (get a response)
   210  	_, err = js.UpdateConsumer("TEST", &nats.ConsumerConfig{
   211  		Durable:   "C3",
   212  		AckPolicy: nats.AckExplicitPolicy,
   213  		Replicas:  1,
   214  	})
   215  	require_NoError(t, err)
   216  
   217  	c.waitOnConsumerLeader(globalAccountName, "TEST", "C3")
   218  
   219  	ci, err := js.ConsumerInfo("TEST", "C3")
   220  	require_NoError(t, err)
   221  	require_True(t, ci.Config.Replicas == 1)
   222  	require_True(t, len(ci.Cluster.Replicas) == 0)
   223  }
   224  
   225  func TestJetStreamClusterMetaRecoveryLogic(t *testing.T) {
   226  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   227  	defer c.shutdown()
   228  
   229  	s := c.randomNonLeader()
   230  	nc, js := jsClientConnect(t, s)
   231  	defer nc.Close()
   232  
   233  	_, err := js.AddStream(&nats.StreamConfig{
   234  		Name:     "TEST",
   235  		Subjects: []string{"foo"},
   236  		Replicas: 3,
   237  	})
   238  	require_NoError(t, err)
   239  
   240  	_, err = js.UpdateStream(&nats.StreamConfig{
   241  		Name:     "TEST",
   242  		Subjects: []string{"foo", "bar"},
   243  		Replicas: 1,
   244  	})
   245  	require_NoError(t, err)
   246  
   247  	err = js.DeleteStream("TEST")
   248  	require_NoError(t, err)
   249  
   250  	_, err = js.AddStream(&nats.StreamConfig{
   251  		Name:     "TEST",
   252  		Subjects: []string{"foo"},
   253  		Replicas: 3,
   254  	})
   255  	require_NoError(t, err)
   256  
   257  	err = js.DeleteStream("TEST")
   258  	require_NoError(t, err)
   259  
   260  	_, err = js.AddStream(&nats.StreamConfig{
   261  		Name:     "TEST",
   262  		Subjects: []string{"baz"},
   263  		Replicas: 1,
   264  	})
   265  	require_NoError(t, err)
   266  
   267  	osi, err := js.StreamInfo("TEST")
   268  	require_NoError(t, err)
   269  
   270  	c.stopAll()
   271  	c.restartAll()
   272  	c.waitOnLeader()
   273  	c.waitOnStreamLeader("$G", "TEST")
   274  
   275  	s = c.randomNonLeader()
   276  	nc, js = jsClientConnect(t, s)
   277  	defer nc.Close()
   278  
   279  	si, err := js.StreamInfo("TEST")
   280  	require_NoError(t, err)
   281  
   282  	if !reflect.DeepEqual(si.Config, osi.Config) {
   283  		t.Fatalf("Expected %+v, but got %+v", osi.Config, si.Config)
   284  	}
   285  }
   286  
   287  func TestJetStreamClusterDeleteConsumerWhileServerDown(t *testing.T) {
   288  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   289  	defer c.shutdown()
   290  
   291  	nc, js := jsClientConnect(t, c.randomNonLeader())
   292  	defer nc.Close()
   293  
   294  	_, err := js.AddStream(&nats.StreamConfig{
   295  		Name:     "TEST",
   296  		Subjects: []string{"foo"},
   297  		Replicas: 3,
   298  	})
   299  	require_NoError(t, err)
   300  
   301  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
   302  		Durable:   "DC",
   303  		AckPolicy: nats.AckExplicitPolicy,
   304  		Replicas:  3,
   305  	})
   306  	require_NoError(t, err)
   307  
   308  	s := c.randomNonConsumerLeader("$G", "TEST", "DC")
   309  	s.Shutdown()
   310  
   311  	c.waitOnLeader()                                 // In case that was metaleader.
   312  	nc, js = jsClientConnect(t, c.randomNonLeader()) // In case we were connected there.
   313  	defer nc.Close()
   314  
   315  	err = js.DeleteConsumer("TEST", "DC")
   316  	require_NoError(t, err)
   317  
   318  	// Restart.
   319  	s = c.restartServer(s)
   320  	checkFor(t, 10*time.Second, 200*time.Millisecond, func() error {
   321  		hs := s.healthz(&HealthzOptions{
   322  			JSEnabledOnly: false,
   323  			JSServerOnly:  false,
   324  		})
   325  		if hs.Error != _EMPTY_ {
   326  			return errors.New(hs.Error)
   327  		}
   328  		return nil
   329  	})
   330  
   331  	// Make sure we can not see it on the server that was down at the time of delete.
   332  	mset, err := s.GlobalAccount().lookupStream("TEST")
   333  	require_NoError(t, err)
   334  
   335  	if o := mset.lookupConsumer("DC"); o != nil {
   336  		t.Fatalf("Expected to not find consumer, but did")
   337  	}
   338  
   339  	// Now repeat but force a meta snapshot.
   340  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
   341  		Durable:   "DC",
   342  		AckPolicy: nats.AckExplicitPolicy,
   343  		Replicas:  3,
   344  	})
   345  	require_NoError(t, err)
   346  
   347  	s = c.randomNonConsumerLeader("$G", "TEST", "DC")
   348  	s.Shutdown()
   349  
   350  	c.waitOnLeader()                                 // In case that was metaleader.
   351  	nc, js = jsClientConnect(t, c.randomNonLeader()) // In case we were connected there.
   352  	defer nc.Close()
   353  
   354  	err = js.DeleteConsumer("TEST", "DC")
   355  	require_NoError(t, err)
   356  
   357  	err = c.leader().JetStreamSnapshotMeta()
   358  	require_NoError(t, err)
   359  
   360  	// Restart.
   361  	s = c.restartServer(s)
   362  	checkFor(t, time.Second*2, 200*time.Millisecond, func() error {
   363  		hs := s.healthz(&HealthzOptions{
   364  			JSEnabledOnly: false,
   365  			JSServerOnly:  false,
   366  		})
   367  		if hs.Error != _EMPTY_ {
   368  			return errors.New(hs.Error)
   369  		}
   370  		return nil
   371  	})
   372  
   373  	// Make sure we can not see it on the server that was down at the time of delete.
   374  	mset, err = s.GlobalAccount().lookupStream("TEST")
   375  	require_NoError(t, err)
   376  
   377  	if o := mset.lookupConsumer("DC"); o != nil {
   378  		t.Fatalf("Expected to not find consumer, but did")
   379  	}
   380  }
   381  
   382  func TestJetStreamClusterNegativeReplicas(t *testing.T) {
   383  	s := RunBasicJetStreamServer(t)
   384  	defer s.Shutdown()
   385  
   386  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   387  	defer c.shutdown()
   388  
   389  	testBadReplicas := func(t *testing.T, s *Server, name string) {
   390  		nc, js := jsClientConnect(t, s)
   391  		defer nc.Close()
   392  
   393  		_, err := js.AddStream(&nats.StreamConfig{
   394  			Name:     name,
   395  			Replicas: -1,
   396  		})
   397  		require_Error(t, err, NewJSReplicasCountCannotBeNegativeError())
   398  
   399  		_, err = js.AddStream(&nats.StreamConfig{
   400  			Name:     name,
   401  			Replicas: 1,
   402  		})
   403  		require_NoError(t, err)
   404  
   405  		// Check update now.
   406  		_, err = js.UpdateStream(&nats.StreamConfig{
   407  			Name:     name,
   408  			Replicas: -11,
   409  		})
   410  		require_Error(t, err, NewJSReplicasCountCannotBeNegativeError())
   411  
   412  		// Now same for consumers
   413  		durName := fmt.Sprintf("%s_dur", name)
   414  		_, err = js.AddConsumer(name, &nats.ConsumerConfig{
   415  			Durable:  durName,
   416  			Replicas: -1,
   417  		})
   418  		require_Error(t, err, NewJSReplicasCountCannotBeNegativeError())
   419  
   420  		_, err = js.AddConsumer(name, &nats.ConsumerConfig{
   421  			Durable:  durName,
   422  			Replicas: 1,
   423  		})
   424  		require_NoError(t, err)
   425  
   426  		// Check update now
   427  		_, err = js.UpdateConsumer(name, &nats.ConsumerConfig{
   428  			Durable:  durName,
   429  			Replicas: -11,
   430  		})
   431  		require_Error(t, err, NewJSReplicasCountCannotBeNegativeError())
   432  	}
   433  
   434  	t.Run("Standalone", func(t *testing.T) { testBadReplicas(t, s, "TEST1") })
   435  	t.Run("Clustered", func(t *testing.T) { testBadReplicas(t, c.randomServer(), "TEST2") })
   436  }
   437  
   438  func TestJetStreamClusterUserGivenConsName(t *testing.T) {
   439  	s := RunBasicJetStreamServer(t)
   440  	defer s.Shutdown()
   441  
   442  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   443  	defer c.shutdown()
   444  
   445  	test := func(t *testing.T, s *Server, stream string, replicas int, cons string) {
   446  		nc, js := jsClientConnect(t, s)
   447  		defer nc.Close()
   448  
   449  		_, err := js.AddStream(&nats.StreamConfig{
   450  			Name:     stream,
   451  			Replicas: replicas,
   452  		})
   453  		require_NoError(t, err)
   454  
   455  		cc := &CreateConsumerRequest{
   456  			Stream: stream,
   457  			Config: ConsumerConfig{
   458  				Name:              cons,
   459  				FilterSubject:     stream,
   460  				InactiveThreshold: 10 * time.Second,
   461  			},
   462  		}
   463  		subj := fmt.Sprintf(JSApiConsumerCreateExT, stream, cons, stream)
   464  		req, err := json.Marshal(cc)
   465  		require_NoError(t, err)
   466  
   467  		reply, err := nc.Request(subj, req, 2*time.Second)
   468  		require_NoError(t, err)
   469  
   470  		var cresp JSApiConsumerCreateResponse
   471  		json.Unmarshal(reply.Data, &cresp)
   472  		if cresp.Error != nil {
   473  			t.Fatalf("Unexpected error: %v", cresp.Error)
   474  		}
   475  		require_Equal(t, cresp.Name, cons)
   476  		require_Equal(t, cresp.Config.Name, cons)
   477  
   478  		// Resend the add request but before change something that the server
   479  		// should reject since the consumer already exist and we don't support
   480  		// the update of the consumer that way.
   481  		cc.Config.DeliverPolicy = DeliverNew
   482  		req, err = json.Marshal(cc)
   483  		require_NoError(t, err)
   484  		reply, err = nc.Request(subj, req, 2*time.Second)
   485  		require_NoError(t, err)
   486  
   487  		cresp = JSApiConsumerCreateResponse{}
   488  		json.Unmarshal(reply.Data, &cresp)
   489  		require_Error(t, cresp.Error, NewJSConsumerCreateError(errors.New("deliver policy can not be updated")))
   490  	}
   491  
   492  	t.Run("Standalone", func(t *testing.T) { test(t, s, "TEST", 1, "cons") })
   493  	t.Run("Clustered R1", func(t *testing.T) { test(t, c.randomServer(), "TEST2", 1, "cons2") })
   494  	t.Run("Clustered R3", func(t *testing.T) { test(t, c.randomServer(), "TEST3", 3, "cons3") })
   495  }
   496  
   497  func TestJetStreamClusterUserGivenConsNameWithLeaderChange(t *testing.T) {
   498  	c := createJetStreamClusterExplicit(t, "R5S", 5)
   499  	defer c.shutdown()
   500  
   501  	nc, js := jsClientConnect(t, c.randomServer())
   502  	defer nc.Close()
   503  
   504  	_, err := js.AddStream(&nats.StreamConfig{
   505  		Name:     "TEST",
   506  		Subjects: []string{"foo"},
   507  		Replicas: 3,
   508  	})
   509  	require_NoError(t, err)
   510  
   511  	c.waitOnStreamLeader(globalAccountName, "TEST")
   512  	for i := 0; i < 100; i++ {
   513  		sendStreamMsg(t, nc, "foo", "msg")
   514  	}
   515  
   516  	consName := "myephemeral"
   517  	cc := &CreateConsumerRequest{
   518  		Stream: "TEST",
   519  		Config: ConsumerConfig{
   520  			Name:              consName,
   521  			FilterSubject:     "foo",
   522  			InactiveThreshold: time.Hour,
   523  			Replicas:          3,
   524  		},
   525  	}
   526  	subj := fmt.Sprintf(JSApiConsumerCreateExT, "TEST", consName, "foo")
   527  	req, err := json.Marshal(cc)
   528  	require_NoError(t, err)
   529  
   530  	reply, err := nc.Request(subj, req, 2*time.Second)
   531  	require_NoError(t, err)
   532  
   533  	var cresp JSApiConsumerCreateResponse
   534  	json.Unmarshal(reply.Data, &cresp)
   535  	if cresp.Error != nil {
   536  		t.Fatalf("Unexpected error: %v", cresp.Error)
   537  	}
   538  	require_Equal(t, cresp.Name, consName)
   539  	require_Equal(t, cresp.Config.Name, consName)
   540  
   541  	// Consumer leader name
   542  	clname := cresp.ConsumerInfo.Cluster.Leader
   543  
   544  	nreq := &JSApiConsumerGetNextRequest{Batch: 1, Expires: time.Second}
   545  	req, err = json.Marshal(nreq)
   546  	require_NoError(t, err)
   547  
   548  	sub := natsSubSync(t, nc, "xxx")
   549  	rsubj := fmt.Sprintf(JSApiRequestNextT, "TEST", consName)
   550  	err = nc.PublishRequest(rsubj, "xxx", req)
   551  	require_NoError(t, err)
   552  
   553  	msg := natsNexMsg(t, sub, time.Second)
   554  	require_Equal(t, string(msg.Data), "msg")
   555  
   556  	// Shutdown the consumer leader
   557  	cl := c.serverByName(clname)
   558  	cl.Shutdown()
   559  
   560  	// Wait for a bit to be sure that we lost leadership
   561  	time.Sleep(250 * time.Millisecond)
   562  
   563  	// Wait for new leader
   564  	c.waitOnStreamLeader(globalAccountName, "TEST")
   565  	c.waitOnConsumerLeader(globalAccountName, "TEST", consName)
   566  
   567  	// Make sure we can still consume.
   568  	for i := 0; i < 2; i++ {
   569  		err = nc.PublishRequest(rsubj, "xxx", req)
   570  		require_NoError(t, err)
   571  
   572  		msg = natsNexMsg(t, sub, time.Second)
   573  		if len(msg.Data) == 0 {
   574  			continue
   575  		}
   576  		require_Equal(t, string(msg.Data), "msg")
   577  		return
   578  	}
   579  	t.Fatal("Did not receive message")
   580  }
   581  
   582  func TestJetStreamClusterMirrorCrossDomainOnLeadnodeNoSystemShare(t *testing.T) {
   583  	tmpl := strings.Replace(jsClusterAccountsTempl, "store_dir:", "domain: HUB, store_dir:", 1)
   584  	c := createJetStreamCluster(t, tmpl, "CORE", _EMPTY_, 3, 18033, true)
   585  	defer c.shutdown()
   586  
   587  	tmpl = strings.Replace(jsClusterTemplWithSingleLeafNode, "store_dir:", "domain: SPOKE, store_dir:", 1)
   588  	ln := c.createLeafNodeWithTemplateNoSystem("LN-SPOKE", tmpl)
   589  	defer ln.Shutdown()
   590  
   591  	checkLeafNodeConnectedCount(t, ln, 1)
   592  
   593  	// Create origin stream in hub.
   594  	nc, js := jsClientConnect(t, c.randomServer())
   595  	defer nc.Close()
   596  
   597  	_, err := js.AddStream(&nats.StreamConfig{
   598  		Name:              "TEST",
   599  		Subjects:          []string{"foo"},
   600  		MaxMsgsPerSubject: 10,
   601  		AllowDirect:       true,
   602  	})
   603  	require_NoError(t, err)
   604  
   605  	// Now create the mirror on the leafnode.
   606  	lnc, ljs := jsClientConnect(t, ln)
   607  	defer lnc.Close()
   608  
   609  	_, err = ljs.AddStream(&nats.StreamConfig{
   610  		Name:              "M",
   611  		MaxMsgsPerSubject: 10,
   612  		AllowDirect:       true,
   613  		MirrorDirect:      true,
   614  		Mirror: &nats.StreamSource{
   615  			Name: "TEST",
   616  			External: &nats.ExternalStream{
   617  				APIPrefix: "$JS.HUB.API",
   618  			},
   619  		},
   620  	})
   621  	require_NoError(t, err)
   622  
   623  	// Publish to the hub stream and make sure the mirror gets those messages.
   624  	for i := 0; i < 20; i++ {
   625  		js.Publish("foo", nil)
   626  	}
   627  
   628  	si, err := js.StreamInfo("TEST")
   629  	require_NoError(t, err)
   630  	require_True(t, si.State.Msgs == 10)
   631  
   632  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
   633  		si, err := ljs.StreamInfo("M")
   634  		require_NoError(t, err)
   635  		if si.State.Msgs == 10 {
   636  			return nil
   637  		}
   638  		return fmt.Errorf("State not current: %+v", si.State)
   639  	})
   640  }
   641  
   642  func TestJetStreamClusterFirstSeqMismatch(t *testing.T) {
   643  	c := createJetStreamClusterWithTemplateAndModHook(t, jsClusterTempl, "C", 3,
   644  		func(serverName, clusterName, storeDir, conf string) string {
   645  			tf := createTempFile(t, "")
   646  			logName := tf.Name()
   647  			tf.Close()
   648  			return fmt.Sprintf("%s\nlogfile: '%s'", conf, logName)
   649  		})
   650  	defer c.shutdown()
   651  
   652  	rs := c.randomServer()
   653  	nc, js := jsClientConnect(t, rs)
   654  	defer nc.Close()
   655  
   656  	_, err := js.AddStream(&nats.StreamConfig{
   657  		Name:     "TEST",
   658  		Subjects: []string{"foo"},
   659  		Replicas: 3,
   660  		MaxAge:   2 * time.Second,
   661  	})
   662  	require_NoError(t, err)
   663  
   664  	c.waitOnStreamLeader(globalAccountName, "TEST")
   665  
   666  	mset, err := c.streamLeader(globalAccountName, "TEST").GlobalAccount().lookupStream("TEST")
   667  	require_NoError(t, err)
   668  	node := mset.raftNode()
   669  
   670  	nl := c.randomNonStreamLeader(globalAccountName, "TEST")
   671  	if rs == nl {
   672  		nc.Close()
   673  		for _, s := range c.servers {
   674  			if s != nl {
   675  				nc, _ = jsClientConnect(t, s)
   676  				defer nc.Close()
   677  				break
   678  			}
   679  		}
   680  	}
   681  
   682  	wg := sync.WaitGroup{}
   683  	wg.Add(1)
   684  	ch := make(chan struct{})
   685  	go func() {
   686  		defer wg.Done()
   687  		for i := 0; ; i++ {
   688  			sendStreamMsg(t, nc, "foo", "msg")
   689  			select {
   690  			case <-ch:
   691  				return
   692  			default:
   693  			}
   694  		}
   695  	}()
   696  
   697  	time.Sleep(2500 * time.Millisecond)
   698  	nl.Shutdown()
   699  
   700  	time.Sleep(500 * time.Millisecond)
   701  	node.InstallSnapshot(mset.stateSnapshot())
   702  	time.Sleep(3500 * time.Millisecond)
   703  
   704  	c.restartServer(nl)
   705  	c.waitOnAllCurrent()
   706  
   707  	close(ch)
   708  	wg.Wait()
   709  
   710  	log := nl.getOpts().LogFile
   711  	nl.Shutdown()
   712  
   713  	content, err := os.ReadFile(log)
   714  	require_NoError(t, err)
   715  	if bytes.Contains(content, []byte(errFirstSequenceMismatch.Error())) {
   716  		t.Fatalf("First sequence mismatch occurred!")
   717  	}
   718  }
   719  
   720  func TestJetStreamClusterConsumerInactiveThreshold(t *testing.T) {
   721  	// Create a standalone, a cluster, and a super cluster
   722  
   723  	s := RunBasicJetStreamServer(t)
   724  	defer s.Shutdown()
   725  
   726  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   727  	defer c.shutdown()
   728  
   729  	sc := createJetStreamSuperCluster(t, 3, 2)
   730  	defer sc.shutdown()
   731  
   732  	test := func(t *testing.T, c *cluster, s *Server, replicas int) {
   733  		if c != nil {
   734  			s = c.randomServer()
   735  		}
   736  		nc, js := jsClientConnect(t, s)
   737  		defer nc.Close()
   738  
   739  		sname := fmt.Sprintf("TEST%d", replicas)
   740  		_, err := js.AddStream(&nats.StreamConfig{
   741  			Name:     sname,
   742  			Subjects: []string{sname},
   743  			Replicas: replicas,
   744  		})
   745  		require_NoError(t, err)
   746  
   747  		if c != nil {
   748  			c.waitOnStreamLeader(globalAccountName, sname)
   749  		}
   750  
   751  		for i := 0; i < 10; i++ {
   752  			js.PublishAsync(sname, []byte("ok"))
   753  		}
   754  		select {
   755  		case <-js.PublishAsyncComplete():
   756  		case <-time.After(5 * time.Second):
   757  			t.Fatalf("Did not receive completion signal")
   758  		}
   759  
   760  		waitOnCleanup := func(ci *nats.ConsumerInfo) {
   761  			t.Helper()
   762  			checkFor(t, 2*time.Second, 50*time.Millisecond, func() error {
   763  				_, err := js.ConsumerInfo(ci.Stream, ci.Name)
   764  				if err == nil {
   765  					return fmt.Errorf("Consumer still present")
   766  				}
   767  				return nil
   768  			})
   769  		}
   770  
   771  		// Test to make sure inactive threshold is enforced for all types.
   772  		// Ephemeral and Durable, both push and pull.
   773  
   774  		// Ephemeral Push (no bind to deliver subject)
   775  		ci, err := js.AddConsumer(sname, &nats.ConsumerConfig{
   776  			DeliverSubject:    "_no_bind_",
   777  			InactiveThreshold: 50 * time.Millisecond,
   778  		})
   779  		require_NoError(t, err)
   780  		waitOnCleanup(ci)
   781  
   782  		// Ephemeral Pull
   783  		ci, err = js.AddConsumer(sname, &nats.ConsumerConfig{
   784  			AckPolicy:         nats.AckExplicitPolicy,
   785  			InactiveThreshold: 50 * time.Millisecond,
   786  		})
   787  		require_NoError(t, err)
   788  		waitOnCleanup(ci)
   789  
   790  		// Support InactiveThresholds for Durables as well.
   791  
   792  		// Durable Push (no bind to deliver subject)
   793  		ci, err = js.AddConsumer(sname, &nats.ConsumerConfig{
   794  			Durable:           "d1",
   795  			DeliverSubject:    "_no_bind_",
   796  			InactiveThreshold: 50 * time.Millisecond,
   797  		})
   798  		require_NoError(t, err)
   799  		waitOnCleanup(ci)
   800  
   801  		// Durable Push (no bind to deliver subject) with an activity
   802  		// threshold set after creation
   803  		ci, err = js.AddConsumer(sname, &nats.ConsumerConfig{
   804  			Durable:        "d2",
   805  			DeliverSubject: "_no_bind_",
   806  		})
   807  		require_NoError(t, err)
   808  		if c != nil {
   809  			c.waitOnConsumerLeader(globalAccountName, sname, "d2")
   810  		}
   811  		_, err = js.UpdateConsumer(sname, &nats.ConsumerConfig{
   812  			Durable:           "d2",
   813  			DeliverSubject:    "_no_bind_",
   814  			InactiveThreshold: 50 * time.Millisecond,
   815  		})
   816  		require_NoError(t, err)
   817  		waitOnCleanup(ci)
   818  
   819  		// Durable Pull
   820  		ci, err = js.AddConsumer(sname, &nats.ConsumerConfig{
   821  			Durable:           "d3",
   822  			AckPolicy:         nats.AckExplicitPolicy,
   823  			InactiveThreshold: 50 * time.Millisecond,
   824  		})
   825  		require_NoError(t, err)
   826  		waitOnCleanup(ci)
   827  
   828  		// Durable Pull with an inactivity threshold set after creation
   829  		ci, err = js.AddConsumer(sname, &nats.ConsumerConfig{
   830  			Durable:   "d4",
   831  			AckPolicy: nats.AckExplicitPolicy,
   832  		})
   833  		require_NoError(t, err)
   834  		if c != nil {
   835  			c.waitOnConsumerLeader(globalAccountName, sname, "d4")
   836  		}
   837  		_, err = js.UpdateConsumer(sname, &nats.ConsumerConfig{
   838  			Durable:           "d4",
   839  			AckPolicy:         nats.AckExplicitPolicy,
   840  			InactiveThreshold: 50 * time.Millisecond,
   841  		})
   842  		require_NoError(t, err)
   843  		waitOnCleanup(ci)
   844  	}
   845  
   846  	t.Run("standalone", func(t *testing.T) { test(t, nil, s, 1) })
   847  	t.Run("cluster-r1", func(t *testing.T) { test(t, c, nil, 1) })
   848  	t.Run("cluster-r3", func(t *testing.T) { test(t, c, nil, 3) })
   849  	t.Run("super-cluster-r1", func(t *testing.T) { test(t, sc.randomCluster(), nil, 1) })
   850  	t.Run("super-cluster-r3", func(t *testing.T) { test(t, sc.randomCluster(), nil, 3) })
   851  }
   852  
   853  // To capture our false warnings for clustered stream lag.
   854  type testStreamLagWarnLogger struct {
   855  	DummyLogger
   856  	ch chan string
   857  }
   858  
   859  func (l *testStreamLagWarnLogger) Warnf(format string, v ...interface{}) {
   860  	msg := fmt.Sprintf(format, v...)
   861  	if strings.Contains(msg, "has high message lag") {
   862  		select {
   863  		case l.ch <- msg:
   864  		default:
   865  		}
   866  	}
   867  }
   868  
   869  // False triggering warnings on stream lag because not offsetting by failures.
   870  func TestJetStreamClusterStreamLagWarning(t *testing.T) {
   871  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   872  	defer c.shutdown()
   873  
   874  	nc, js := jsClientConnect(t, c.randomServer())
   875  	defer nc.Close()
   876  
   877  	_, err := js.AddStream(&nats.StreamConfig{
   878  		Name:     "TEST",
   879  		Subjects: []string{"foo"},
   880  		Replicas: 3,
   881  	})
   882  	require_NoError(t, err)
   883  
   884  	sl := c.streamLeader("$G", "TEST")
   885  
   886  	l := &testStreamLagWarnLogger{ch: make(chan string, 10)}
   887  	sl.SetLogger(l, false, false)
   888  
   889  	// We only need to trigger post RAFT propose failures that increment mset.clfs.
   890  	// Dedupe with msgIDs is one, so we will use that.
   891  	m := nats.NewMsg("foo")
   892  	m.Data = []byte("OK")
   893  	m.Header.Set(JSMsgId, "zz")
   894  
   895  	// Make sure we know we will trip the warning threshold.
   896  	for i := 0; i < 2*streamLagWarnThreshold; i++ {
   897  		js.PublishMsgAsync(m)
   898  	}
   899  	select {
   900  	case <-js.PublishAsyncComplete():
   901  	case <-time.After(5 * time.Second):
   902  		t.Fatalf("Did not receive completion signal")
   903  	}
   904  
   905  	select {
   906  	case msg := <-l.ch:
   907  		t.Fatalf("Unexpected msg lag warning seen: %s", msg)
   908  	case <-time.After(100 * time.Millisecond):
   909  		// OK
   910  	}
   911  }
   912  
   913  // https://github.com/nats-io/nats-server/issues/3603
   914  func TestJetStreamClusterSignalPullConsumersOnDelete(t *testing.T) {
   915  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   916  	defer c.shutdown()
   917  
   918  	nc, js := jsClientConnect(t, c.randomServer())
   919  	defer nc.Close()
   920  
   921  	_, err := js.AddStream(&nats.StreamConfig{
   922  		Name:     "TEST",
   923  		Subjects: []string{"foo"},
   924  		Replicas: 3,
   925  	})
   926  	require_NoError(t, err)
   927  
   928  	// Create 2 pull consumers.
   929  	sub1, err := js.PullSubscribe("foo", "d1")
   930  	require_NoError(t, err)
   931  
   932  	sub2, err := js.PullSubscribe("foo", "d2")
   933  	require_NoError(t, err)
   934  
   935  	// We want to make sure we get kicked out prior to the timeout
   936  	// when consumers are being deleted or the parent stream is being deleted.
   937  	// Note this should be lower case, Go client needs to be updated.
   938  	expectedErr := errors.New("nats: consumer deleted")
   939  
   940  	// Queue up the delete for sub1
   941  	time.AfterFunc(250*time.Millisecond, func() { js.DeleteConsumer("TEST", "d1") })
   942  	start := time.Now()
   943  	_, err = sub1.Fetch(1, nats.MaxWait(10*time.Second))
   944  	require_Error(t, err, expectedErr)
   945  
   946  	// Check that we bailed early.
   947  	if time.Since(start) > time.Second {
   948  		t.Fatalf("Took to long to bail out on consumer delete")
   949  	}
   950  
   951  	time.AfterFunc(250*time.Millisecond, func() { js.DeleteStream("TEST") })
   952  	start = time.Now()
   953  	_, err = sub2.Fetch(1, nats.MaxWait(10*time.Second))
   954  	require_Error(t, err, expectedErr)
   955  	if time.Since(start) > time.Second {
   956  		t.Fatalf("Took to long to bail out on stream delete")
   957  	}
   958  }
   959  
   960  // https://github.com/nats-io/nats-server/issues/3559
   961  func TestJetStreamClusterSourceWithOptStartTime(t *testing.T) {
   962  	s := RunBasicJetStreamServer(t)
   963  	defer s.Shutdown()
   964  
   965  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   966  	defer c.shutdown()
   967  
   968  	test := func(t *testing.T, c *cluster, s *Server) {
   969  
   970  		replicas := 1
   971  		if c != nil {
   972  			s = c.randomServer()
   973  			replicas = 3
   974  		}
   975  		nc, js := jsClientConnect(t, s)
   976  		defer nc.Close()
   977  
   978  		_, err := js.AddStream(&nats.StreamConfig{
   979  			Name:     "TEST",
   980  			Subjects: []string{"foo"},
   981  			Replicas: replicas,
   982  		})
   983  		require_NoError(t, err)
   984  
   985  		yesterday := time.Now().Add(-24 * time.Hour)
   986  
   987  		_, err = js.AddStream(&nats.StreamConfig{
   988  			Name:     "SOURCE",
   989  			Replicas: replicas,
   990  			Sources: []*nats.StreamSource{{
   991  				Name:         "TEST",
   992  				OptStartTime: &yesterday,
   993  			}},
   994  		})
   995  		require_NoError(t, err)
   996  
   997  		_, err = js.AddStream(&nats.StreamConfig{
   998  			Name:     "MIRROR",
   999  			Replicas: replicas,
  1000  			Mirror: &nats.StreamSource{
  1001  				Name:         "TEST",
  1002  				OptStartTime: &yesterday,
  1003  			},
  1004  		})
  1005  		require_NoError(t, err)
  1006  
  1007  		total := 10
  1008  		for i := 0; i < total; i++ {
  1009  			sendStreamMsg(t, nc, "foo", "hello")
  1010  		}
  1011  
  1012  		checkCount := func(sname string, expected int) {
  1013  			t.Helper()
  1014  			checkFor(t, 10*time.Second, 50*time.Millisecond, func() error {
  1015  				si, err := js.StreamInfo(sname)
  1016  				if err != nil {
  1017  					return err
  1018  				}
  1019  				if n := si.State.Msgs; n != uint64(expected) {
  1020  					return fmt.Errorf("Expected stream %q to have %v messages, got %v", sname, expected, n)
  1021  				}
  1022  				return nil
  1023  			})
  1024  		}
  1025  
  1026  		checkCount("TEST", 10)
  1027  		checkCount("SOURCE", 10)
  1028  		checkCount("MIRROR", 10)
  1029  
  1030  		err = js.PurgeStream("SOURCE")
  1031  		require_NoError(t, err)
  1032  		err = js.PurgeStream("MIRROR")
  1033  		require_NoError(t, err)
  1034  
  1035  		checkCount("TEST", 10)
  1036  		checkCount("SOURCE", 0)
  1037  		checkCount("MIRROR", 0)
  1038  
  1039  		nc.Close()
  1040  		if c != nil {
  1041  			c.stopAll()
  1042  			c.restartAll()
  1043  
  1044  			c.waitOnStreamLeader(globalAccountName, "TEST")
  1045  			c.waitOnStreamLeader(globalAccountName, "SOURCE")
  1046  			c.waitOnStreamLeader(globalAccountName, "MIRROR")
  1047  
  1048  			s = c.randomServer()
  1049  		} else {
  1050  			sd := s.JetStreamConfig().StoreDir
  1051  			s.Shutdown()
  1052  			s = RunJetStreamServerOnPort(-1, sd)
  1053  			defer s.Shutdown()
  1054  		}
  1055  
  1056  		// Wait a bit before checking because sync'ing (even with the defect)
  1057  		// would not happen right away. I tried with 1 sec and test would pass,
  1058  		// so need to be at least that much.
  1059  		time.Sleep(2 * time.Second)
  1060  
  1061  		nc, js = jsClientConnect(t, s)
  1062  		defer nc.Close()
  1063  		checkCount("TEST", 10)
  1064  		checkCount("SOURCE", 0)
  1065  		checkCount("MIRROR", 0)
  1066  	}
  1067  
  1068  	t.Run("standalone", func(t *testing.T) { test(t, nil, s) })
  1069  	t.Run("cluster", func(t *testing.T) { test(t, c, nil) })
  1070  }
  1071  
  1072  type networkCableUnplugged struct {
  1073  	net.Conn
  1074  	sync.Mutex
  1075  	unplugged bool
  1076  	wb        bytes.Buffer
  1077  	wg        sync.WaitGroup
  1078  }
  1079  
  1080  func (c *networkCableUnplugged) Write(b []byte) (int, error) {
  1081  	c.Lock()
  1082  	if c.unplugged {
  1083  		c.wb.Write(b)
  1084  		c.Unlock()
  1085  		return len(b), nil
  1086  	} else if c.wb.Len() > 0 {
  1087  		c.wb.Write(b)
  1088  		buf := c.wb.Bytes()
  1089  		c.wb.Reset()
  1090  		c.Unlock()
  1091  		if _, err := c.Conn.Write(buf); err != nil {
  1092  			return 0, err
  1093  		}
  1094  		return len(b), nil
  1095  	}
  1096  	c.Unlock()
  1097  	return c.Conn.Write(b)
  1098  }
  1099  
  1100  func (c *networkCableUnplugged) Read(b []byte) (int, error) {
  1101  	c.Lock()
  1102  	wait := c.unplugged
  1103  	c.Unlock()
  1104  	if wait {
  1105  		c.wg.Wait()
  1106  	}
  1107  	return c.Conn.Read(b)
  1108  }
  1109  
  1110  func TestJetStreamClusterScaleDownWhileNoQuorum(t *testing.T) {
  1111  	c := createJetStreamClusterExplicit(t, "R5S", 5)
  1112  	defer c.shutdown()
  1113  
  1114  	s := c.randomServer()
  1115  	nc, js := jsClientConnect(t, s)
  1116  	defer nc.Close()
  1117  
  1118  	si, err := js.AddStream(&nats.StreamConfig{
  1119  		Name:     "TEST",
  1120  		Subjects: []string{"foo"},
  1121  		Replicas: 2,
  1122  	})
  1123  	require_NoError(t, err)
  1124  
  1125  	for i := 0; i < 1000; i++ {
  1126  		sendStreamMsg(t, nc, "foo", "msg")
  1127  	}
  1128  
  1129  	// Let's have a server from this R2 stream be network partitionned.
  1130  	// We will take the leader, but doesn't have to be.
  1131  	// To simulate partition, we will replace all its routes with a
  1132  	// special connection that drops messages.
  1133  	sl := c.serverByName(si.Cluster.Leader)
  1134  	if s == sl {
  1135  		nc.Close()
  1136  		for s = c.randomServer(); s != sl; s = c.randomServer() {
  1137  		}
  1138  		nc, js = jsClientConnect(t, s)
  1139  		defer nc.Close()
  1140  	}
  1141  
  1142  	sl.mu.Lock()
  1143  	sl.forEachRoute(func(r *client) {
  1144  		r.mu.Lock()
  1145  		ncu := &networkCableUnplugged{Conn: r.nc, unplugged: true}
  1146  		ncu.wg.Add(1)
  1147  		r.nc = ncu
  1148  		r.mu.Unlock()
  1149  	})
  1150  	sl.mu.Unlock()
  1151  
  1152  	// Wait for the stream info to fail
  1153  	checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
  1154  		si, err := js.StreamInfo("TEST", nats.MaxWait(time.Second))
  1155  		if err != nil {
  1156  			return err
  1157  		}
  1158  		if si.Cluster.Leader == _EMPTY_ {
  1159  			return nil
  1160  		}
  1161  		return fmt.Errorf("stream still has a leader")
  1162  	})
  1163  
  1164  	// Make sure if meta leader was on same server as stream leader we make sure
  1165  	// it elects new leader to receive update request.
  1166  	c.waitOnLeader()
  1167  
  1168  	// Now try to edit the stream by making it an R1. In some case we get
  1169  	// a context deadline error, in some no error. So don't check the returned error.
  1170  	js.UpdateStream(&nats.StreamConfig{
  1171  		Name:     "TEST",
  1172  		Subjects: []string{"foo"},
  1173  		Replicas: 1,
  1174  	}, nats.MaxWait(5*time.Second))
  1175  
  1176  	sl.mu.Lock()
  1177  	sl.forEachRoute(func(r *client) {
  1178  		r.mu.Lock()
  1179  		ncu := r.nc.(*networkCableUnplugged)
  1180  		ncu.Lock()
  1181  		ncu.unplugged = false
  1182  		ncu.wg.Done()
  1183  		ncu.Unlock()
  1184  		r.mu.Unlock()
  1185  	})
  1186  	sl.mu.Unlock()
  1187  
  1188  	checkClusterFormed(t, c.servers...)
  1189  	c.waitOnStreamLeader(globalAccountName, "TEST")
  1190  }
  1191  
  1192  // We noticed that ha_assets enforcement seemed to not be upheld when assets created in a rapid fashion.
  1193  func TestJetStreamClusterHAssetsEnforcement(t *testing.T) {
  1194  	tmpl := strings.Replace(jsClusterTempl, "store_dir:", "limits: {max_ha_assets: 2}, store_dir:", 1)
  1195  	c := createJetStreamClusterWithTemplateAndModHook(t, tmpl, "R3S", 3, nil)
  1196  	defer c.shutdown()
  1197  
  1198  	nc, js := jsClientConnect(t, c.randomServer())
  1199  	defer nc.Close()
  1200  
  1201  	_, err := js.AddStream(&nats.StreamConfig{
  1202  		Name:     "TEST-1",
  1203  		Subjects: []string{"foo"},
  1204  		Replicas: 3,
  1205  	})
  1206  	require_NoError(t, err)
  1207  
  1208  	_, err = js.AddStream(&nats.StreamConfig{
  1209  		Name:     "TEST-2",
  1210  		Subjects: []string{"bar"},
  1211  		Replicas: 3,
  1212  	})
  1213  	require_NoError(t, err)
  1214  
  1215  	exceededErrs := []error{errors.New("system limit reached"), errors.New("no suitable peers")}
  1216  
  1217  	// Should fail.
  1218  	_, err = js.AddStream(&nats.StreamConfig{
  1219  		Name:     "TEST-3",
  1220  		Subjects: []string{"baz"},
  1221  		Replicas: 3,
  1222  	})
  1223  	require_Error(t, err, exceededErrs...)
  1224  }
  1225  
  1226  func TestJetStreamClusterInterestStreamConsumer(t *testing.T) {
  1227  	c := createJetStreamClusterExplicit(t, "R5S", 5)
  1228  	defer c.shutdown()
  1229  
  1230  	nc, js := jsClientConnect(t, c.randomServer())
  1231  	defer nc.Close()
  1232  
  1233  	_, err := js.AddStream(&nats.StreamConfig{
  1234  		Name:      "TEST",
  1235  		Subjects:  []string{"foo"},
  1236  		Retention: nats.InterestPolicy,
  1237  		Replicas:  3,
  1238  	})
  1239  	require_NoError(t, err)
  1240  
  1241  	var subs []*nats.Subscription
  1242  	ns := 5
  1243  
  1244  	for i := 0; i < ns; i++ {
  1245  		dn := fmt.Sprintf("d%d", i)
  1246  		sub, err := js.PullSubscribe("foo", dn)
  1247  		require_NoError(t, err)
  1248  		subs = append(subs, sub)
  1249  	}
  1250  
  1251  	// Send 10 msgs
  1252  	n := 10
  1253  	for i := 0; i < n; i++ {
  1254  		sendStreamMsg(t, nc, "foo", "msg")
  1255  	}
  1256  
  1257  	// Collect all the messages.
  1258  	var msgs []*nats.Msg
  1259  	for _, sub := range subs {
  1260  		lmsgs := fetchMsgs(t, sub, n, time.Second)
  1261  		if len(lmsgs) != n {
  1262  			t.Fatalf("Did not receive all msgs: %d vs %d", len(lmsgs), n)
  1263  		}
  1264  		msgs = append(msgs, lmsgs...)
  1265  	}
  1266  
  1267  	// Shuffle
  1268  	rand.Shuffle(len(msgs), func(i, j int) { msgs[i], msgs[j] = msgs[j], msgs[i] })
  1269  	for _, m := range msgs {
  1270  		m.AckSync()
  1271  	}
  1272  	// Make sure replicated acks are processed.
  1273  	time.Sleep(250 * time.Millisecond)
  1274  
  1275  	si, err := js.StreamInfo("TEST")
  1276  	require_NoError(t, err)
  1277  
  1278  	if si.State.Msgs != 0 {
  1279  		t.Fatalf("Should not have any messages left: %d of %d", si.State.Msgs, n)
  1280  	}
  1281  }
  1282  
  1283  func TestJetStreamClusterNoPanicOnStreamInfoWhenNoLeaderYet(t *testing.T) {
  1284  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  1285  	defer c.shutdown()
  1286  
  1287  	nc := natsConnect(t, c.randomServer().ClientURL())
  1288  	defer nc.Close()
  1289  
  1290  	js, _ := nc.JetStream(nats.MaxWait(500 * time.Millisecond))
  1291  
  1292  	wg := sync.WaitGroup{}
  1293  	wg.Add(1)
  1294  	ch := make(chan struct{})
  1295  	go func() {
  1296  		defer wg.Done()
  1297  
  1298  		for {
  1299  			js.StreamInfo("TEST")
  1300  			select {
  1301  			case <-ch:
  1302  				return
  1303  			case <-time.After(15 * time.Millisecond):
  1304  			}
  1305  		}
  1306  	}()
  1307  
  1308  	time.Sleep(250 * time.Millisecond)
  1309  
  1310  	// Don't care if this succeeds or not (could get a context deadline
  1311  	// due to the low MaxWait() when creating the context).
  1312  	js.AddStream(&nats.StreamConfig{
  1313  		Name:     "TEST",
  1314  		Subjects: []string{"foo"},
  1315  		Replicas: 3,
  1316  	})
  1317  
  1318  	close(ch)
  1319  	wg.Wait()
  1320  }
  1321  
  1322  // Issue https://github.com/nats-io/nats-server/issues/3630
  1323  func TestJetStreamClusterPullConsumerAcksExtendInactivityThreshold(t *testing.T) {
  1324  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  1325  	defer c.shutdown()
  1326  
  1327  	nc, js := jsClientConnect(t, c.randomServer())
  1328  	defer nc.Close()
  1329  
  1330  	js.AddStream(&nats.StreamConfig{
  1331  		Name:     "TEST",
  1332  		Subjects: []string{"foo"},
  1333  		Replicas: 3,
  1334  	})
  1335  
  1336  	n := 10
  1337  	for i := 0; i < n; i++ {
  1338  		sendStreamMsg(t, nc, "foo", "msg")
  1339  	}
  1340  
  1341  	// Pull Consumer
  1342  	sub, err := js.PullSubscribe("foo", "d", nats.InactiveThreshold(time.Second))
  1343  	require_NoError(t, err)
  1344  
  1345  	fetchMsgs(t, sub, n/2, time.Second)
  1346  	// Will wait for .5s.
  1347  	time.Sleep(500 * time.Millisecond)
  1348  	msgs := fetchMsgs(t, sub, n/2, time.Second)
  1349  	if len(msgs) != n/2 {
  1350  		t.Fatalf("Did not receive msgs: %d vs %d", len(msgs), n/2)
  1351  	}
  1352  
  1353  	// Wait for .5s.
  1354  	time.Sleep(500 * time.Millisecond)
  1355  	msgs[0].Ack() // Ack
  1356  	// Wait another .5s.
  1357  	time.Sleep(500 * time.Millisecond)
  1358  	msgs[1].Nak() // Nak
  1359  	// Wait another .5s.
  1360  	time.Sleep(500 * time.Millisecond)
  1361  	msgs[2].Term() // Term
  1362  	time.Sleep(500 * time.Millisecond)
  1363  	msgs[3].InProgress() // WIP
  1364  
  1365  	// The above should have kept the consumer alive.
  1366  	_, err = js.ConsumerInfo("TEST", "d")
  1367  	require_NoError(t, err)
  1368  
  1369  	// Make sure it gets cleaned up.
  1370  	time.Sleep(2 * time.Second)
  1371  	_, err = js.ConsumerInfo("TEST", "d")
  1372  	require_Error(t, err, nats.ErrConsumerNotFound)
  1373  }
  1374  
  1375  // https://github.com/nats-io/nats-server/issues/3677
  1376  func TestJetStreamClusterParallelStreamCreation(t *testing.T) {
  1377  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  1378  	defer c.shutdown()
  1379  
  1380  	np := 100
  1381  
  1382  	startCh := make(chan bool)
  1383  	errCh := make(chan error, np)
  1384  
  1385  	wg := sync.WaitGroup{}
  1386  	wg.Add(np)
  1387  
  1388  	start := sync.WaitGroup{}
  1389  	start.Add(np)
  1390  
  1391  	for i := 0; i < np; i++ {
  1392  		go func() {
  1393  			defer wg.Done()
  1394  
  1395  			// Individual connection
  1396  			nc, js := jsClientConnect(t, c.randomServer())
  1397  			defer nc.Close()
  1398  			// Signal we are ready
  1399  			start.Done()
  1400  			// Make them all fire at once.
  1401  			<-startCh
  1402  
  1403  			if _, err := js.AddStream(&nats.StreamConfig{
  1404  				Name:     "TEST",
  1405  				Subjects: []string{"common.*.*"},
  1406  				Replicas: 3,
  1407  			}); err != nil {
  1408  				errCh <- err
  1409  			}
  1410  		}()
  1411  	}
  1412  
  1413  	start.Wait()
  1414  	close(startCh)
  1415  	wg.Wait()
  1416  
  1417  	if len(errCh) > 0 {
  1418  		t.Fatalf("Expected no errors, got %d", len(errCh))
  1419  	}
  1420  }
  1421  
  1422  // In addition to test above, if streams were attempted to be created in parallel
  1423  // it could be that multiple raft groups would be created for the same asset.
  1424  func TestJetStreamClusterParallelStreamCreationDupeRaftGroups(t *testing.T) {
  1425  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  1426  	defer c.shutdown()
  1427  
  1428  	np := 20
  1429  
  1430  	startCh := make(chan bool)
  1431  	wg := sync.WaitGroup{}
  1432  	wg.Add(np)
  1433  	for i := 0; i < np; i++ {
  1434  		go func() {
  1435  			defer wg.Done()
  1436  
  1437  			// Individual connection
  1438  			nc, _ := jsClientConnect(t, c.randomServer())
  1439  			js, _ := nc.JetStream(nats.MaxWait(time.Second))
  1440  			defer nc.Close()
  1441  
  1442  			// Make them all fire at once.
  1443  			<-startCh
  1444  
  1445  			// Ignore errors in this test, care about raft group and metastate.
  1446  			js.AddStream(&nats.StreamConfig{
  1447  				Name:     "TEST",
  1448  				Subjects: []string{"common.*.*"},
  1449  				Replicas: 3,
  1450  			})
  1451  		}()
  1452  	}
  1453  
  1454  	close(startCh)
  1455  	wg.Wait()
  1456  
  1457  	// Restart a server too.
  1458  	s := c.randomServer()
  1459  	s.Shutdown()
  1460  	s = c.restartServer(s)
  1461  	c.waitOnLeader()
  1462  	c.waitOnStreamLeader(globalAccountName, "TEST")
  1463  	// Check that this server has only two active raft nodes after restart.
  1464  	if nrn := s.numRaftNodes(); nrn != 2 {
  1465  		t.Fatalf("Expected only two active raft nodes, got %d", nrn)
  1466  	}
  1467  
  1468  	// Make sure we only have 2 unique raft groups for all servers.
  1469  	// One for meta, one for stream.
  1470  	expected := 2
  1471  	rg := make(map[string]struct{})
  1472  	for _, s := range c.servers {
  1473  		s.rnMu.RLock()
  1474  		for _, ni := range s.raftNodes {
  1475  			n := ni.(*raft)
  1476  			rg[n.Group()] = struct{}{}
  1477  		}
  1478  		s.rnMu.RUnlock()
  1479  	}
  1480  	if len(rg) != expected {
  1481  		t.Fatalf("Expected only %d distinct raft groups for all servers, go %d", expected, len(rg))
  1482  	}
  1483  }
  1484  
  1485  func TestJetStreamClusterParallelConsumerCreation(t *testing.T) {
  1486  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  1487  	defer c.shutdown()
  1488  
  1489  	nc, js := jsClientConnect(t, c.randomServer())
  1490  	defer nc.Close()
  1491  
  1492  	_, err := js.AddStream(&nats.StreamConfig{
  1493  		Name:     "TEST",
  1494  		Subjects: []string{"common.*.*"},
  1495  		Replicas: 3,
  1496  	})
  1497  	require_NoError(t, err)
  1498  	c.waitOnStreamLeader(globalAccountName, "TEST")
  1499  
  1500  	np := 50
  1501  
  1502  	startCh := make(chan bool)
  1503  	errCh := make(chan error, np)
  1504  
  1505  	cfg := &nats.ConsumerConfig{
  1506  		Durable:  "dlc",
  1507  		Replicas: 3,
  1508  	}
  1509  
  1510  	wg := sync.WaitGroup{}
  1511  	swg := sync.WaitGroup{}
  1512  	wg.Add(np)
  1513  	swg.Add(np)
  1514  
  1515  	for i := 0; i < np; i++ {
  1516  		go func() {
  1517  			defer wg.Done()
  1518  
  1519  			// Individual connection
  1520  			nc, js := jsClientConnect(t, c.randomServer())
  1521  			defer nc.Close()
  1522  
  1523  			swg.Done()
  1524  
  1525  			// Make them all fire at once.
  1526  			<-startCh
  1527  
  1528  			if _, err := js.AddConsumer("TEST", cfg); err != nil {
  1529  				errCh <- err
  1530  			}
  1531  		}()
  1532  	}
  1533  
  1534  	swg.Wait()
  1535  	close(startCh)
  1536  
  1537  	wg.Wait()
  1538  
  1539  	if len(errCh) > 0 {
  1540  		t.Fatalf("Expected no errors, got %d", len(errCh))
  1541  	}
  1542  
  1543  	// Make sure we only have 3 unique raft groups for all servers.
  1544  	// One for meta, one for stream, one for consumer.
  1545  	expected := 3
  1546  	rg := make(map[string]struct{})
  1547  	for _, s := range c.servers {
  1548  		s.rnMu.RLock()
  1549  		for _, ni := range s.raftNodes {
  1550  			n := ni.(*raft)
  1551  			rg[n.Group()] = struct{}{}
  1552  		}
  1553  		s.rnMu.RUnlock()
  1554  	}
  1555  	if len(rg) != expected {
  1556  		t.Fatalf("Expected only %d distinct raft groups for all servers, go %d", expected, len(rg))
  1557  	}
  1558  }
  1559  
  1560  func TestJetStreamClusterGhostEphemeralsAfterRestart(t *testing.T) {
  1561  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  1562  	defer c.shutdown()
  1563  
  1564  	nc, js := jsClientConnect(t, c.randomServer())
  1565  	defer nc.Close()
  1566  
  1567  	_, err := js.AddStream(&nats.StreamConfig{
  1568  		Name:     "TEST",
  1569  		Subjects: []string{"foo"},
  1570  		Replicas: 3,
  1571  	})
  1572  	require_NoError(t, err)
  1573  
  1574  	// Add in 100 memory based ephemerals.
  1575  	for i := 0; i < 100; i++ {
  1576  		_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  1577  			Replicas:          1,
  1578  			InactiveThreshold: time.Second,
  1579  			MemoryStorage:     true,
  1580  		})
  1581  		require_NoError(t, err)
  1582  	}
  1583  
  1584  	// Grab random server.
  1585  	rs := c.randomServer()
  1586  	// Now shutdown cluster.
  1587  	c.stopAll()
  1588  
  1589  	// Let the consumers all expire.
  1590  	time.Sleep(2 * time.Second)
  1591  
  1592  	// Restart first and wait so that we know it will try cleanup without a metaleader.
  1593  	c.restartServer(rs)
  1594  	time.Sleep(time.Second)
  1595  
  1596  	c.restartAll()
  1597  	c.waitOnLeader()
  1598  	c.waitOnStreamLeader(globalAccountName, "TEST")
  1599  
  1600  	nc, _ = jsClientConnect(t, c.randomServer())
  1601  	defer nc.Close()
  1602  
  1603  	subj := fmt.Sprintf(JSApiConsumerListT, "TEST")
  1604  	checkFor(t, 10*time.Second, 200*time.Millisecond, func() error {
  1605  		m, err := nc.Request(subj, nil, time.Second)
  1606  		if err != nil {
  1607  			return err
  1608  		}
  1609  		var resp JSApiConsumerListResponse
  1610  		err = json.Unmarshal(m.Data, &resp)
  1611  		require_NoError(t, err)
  1612  		if len(resp.Consumers) != 0 {
  1613  			return fmt.Errorf("Still have %d consumers", len(resp.Consumers))
  1614  		}
  1615  		if len(resp.Missing) != 0 {
  1616  			return fmt.Errorf("Still have %d missing consumers", len(resp.Missing))
  1617  		}
  1618  
  1619  		return nil
  1620  	})
  1621  }
  1622  
  1623  func TestJetStreamClusterReplacementPolicyAfterPeerRemove(t *testing.T) {
  1624  	// R3 scenario where there is a redundant node in each unique cloud so removing a peer should result in
  1625  	// an immediate replacement also preserving cloud uniqueness.
  1626  
  1627  	sc := createJetStreamClusterExplicit(t, "PR9", 9)
  1628  	sc.waitOnPeerCount(9)
  1629  
  1630  	reset := func(s *Server) {
  1631  		s.mu.Lock()
  1632  		rch := s.sys.resetCh
  1633  		s.mu.Unlock()
  1634  		if rch != nil {
  1635  			rch <- struct{}{}
  1636  		}
  1637  		s.sendStatszUpdate()
  1638  	}
  1639  
  1640  	tags := []string{"cloud:aws", "cloud:aws", "cloud:aws", "cloud:gcp", "cloud:gcp", "cloud:gcp", "cloud:az", "cloud:az", "cloud:az"}
  1641  
  1642  	var serverUTags = make(map[string]string)
  1643  
  1644  	for i, s := range sc.servers {
  1645  		s.optsMu.Lock()
  1646  		serverUTags[s.Name()] = tags[i]
  1647  		s.opts.Tags.Add(tags[i])
  1648  		s.opts.JetStreamUniqueTag = "cloud"
  1649  		s.optsMu.Unlock()
  1650  		reset(s)
  1651  	}
  1652  
  1653  	ml := sc.leader()
  1654  	js := ml.getJetStream()
  1655  	require_True(t, js != nil)
  1656  	js.mu.RLock()
  1657  	cc := js.cluster
  1658  	require_True(t, cc != nil)
  1659  
  1660  	// Walk and make sure all tags are registered.
  1661  	expires := time.Now().Add(10 * time.Second)
  1662  	for time.Now().Before(expires) {
  1663  		allOK := true
  1664  		for _, p := range cc.meta.Peers() {
  1665  			si, ok := ml.nodeToInfo.Load(p.ID)
  1666  			require_True(t, ok)
  1667  			ni := si.(nodeInfo)
  1668  			if len(ni.tags) == 0 {
  1669  				allOK = false
  1670  				reset(sc.serverByName(ni.name))
  1671  			}
  1672  		}
  1673  		if allOK {
  1674  			break
  1675  		}
  1676  	}
  1677  	js.mu.RUnlock()
  1678  	defer sc.shutdown()
  1679  
  1680  	sc.waitOnClusterReadyWithNumPeers(9)
  1681  
  1682  	s := sc.leader()
  1683  	nc, jsc := jsClientConnect(t, s)
  1684  	defer nc.Close()
  1685  
  1686  	_, err := jsc.AddStream(&nats.StreamConfig{
  1687  		Name:     "TEST",
  1688  		Subjects: []string{"foo"},
  1689  		Replicas: 3,
  1690  	})
  1691  	require_NoError(t, err)
  1692  
  1693  	sc.waitOnStreamLeader(globalAccountName, "TEST")
  1694  
  1695  	osi, err := jsc.StreamInfo("TEST")
  1696  	require_NoError(t, err)
  1697  
  1698  	// Double check original placement honors unique_tag
  1699  	var uTags = make(map[string]struct{})
  1700  
  1701  	uTags[serverUTags[osi.Cluster.Leader]] = struct{}{}
  1702  	for _, replica := range osi.Cluster.Replicas {
  1703  		evalTag := serverUTags[replica.Name]
  1704  		if _, exists := uTags[evalTag]; !exists {
  1705  			uTags[evalTag] = struct{}{}
  1706  			continue
  1707  		} else {
  1708  			t.Fatalf("expected initial placement to honor unique_tag")
  1709  		}
  1710  	}
  1711  
  1712  	// Remove a peer and select replacement 5 times to avoid false good
  1713  	for i := 0; i < 5; i++ {
  1714  		// Remove 1 peer replica (this will be random cloud region as initial placement was randomized ordering)
  1715  		// After each successful iteration, osi will reflect the current RG peers
  1716  		toRemove := osi.Cluster.Replicas[0].Name
  1717  		resp, err := nc.Request(fmt.Sprintf(JSApiStreamRemovePeerT, "TEST"), []byte(`{"peer":"`+toRemove+`"}`), time.Second)
  1718  		require_NoError(t, err)
  1719  		var rpResp JSApiStreamRemovePeerResponse
  1720  		err = json.Unmarshal(resp.Data, &rpResp)
  1721  		require_NoError(t, err)
  1722  		require_True(t, rpResp.Success)
  1723  
  1724  		sc.waitOnStreamLeader(globalAccountName, "TEST")
  1725  
  1726  		checkFor(t, time.Second, 200*time.Millisecond, func() error {
  1727  			osi, err = jsc.StreamInfo("TEST")
  1728  			require_NoError(t, err)
  1729  			if len(osi.Cluster.Replicas) != 2 {
  1730  				return fmt.Errorf("expected R3, got R%d", len(osi.Cluster.Replicas)+1)
  1731  			}
  1732  			// STREAM.PEER.REMOVE is asynchronous command; make sure remove has occurred by
  1733  			// checking that the toRemove peer is gone.
  1734  			for _, replica := range osi.Cluster.Replicas {
  1735  				if replica.Name == toRemove {
  1736  					return fmt.Errorf("expected replaced replica, old replica still present")
  1737  				}
  1738  			}
  1739  			return nil
  1740  		})
  1741  
  1742  		// Validate that replacement with new peer still honors
  1743  		uTags = make(map[string]struct{}) //reset
  1744  
  1745  		uTags[serverUTags[osi.Cluster.Leader]] = struct{}{}
  1746  		for _, replica := range osi.Cluster.Replicas {
  1747  			evalTag := serverUTags[replica.Name]
  1748  			if _, exists := uTags[evalTag]; !exists {
  1749  				uTags[evalTag] = struct{}{}
  1750  				continue
  1751  			} else {
  1752  				t.Fatalf("expected new peer and revised placement to honor unique_tag")
  1753  			}
  1754  		}
  1755  	}
  1756  }
  1757  
  1758  func TestJetStreamClusterReplacementPolicyAfterPeerRemoveNoPlace(t *testing.T) {
  1759  	// R3 scenario where there are exactly three unique cloud nodes, so removing a peer should NOT
  1760  	// result in a new peer
  1761  
  1762  	sc := createJetStreamClusterExplicit(t, "threeup", 3)
  1763  	sc.waitOnPeerCount(3)
  1764  
  1765  	reset := func(s *Server) {
  1766  		s.mu.Lock()
  1767  		rch := s.sys.resetCh
  1768  		s.mu.Unlock()
  1769  		if rch != nil {
  1770  			rch <- struct{}{}
  1771  		}
  1772  		s.sendStatszUpdate()
  1773  	}
  1774  
  1775  	tags := []string{"cloud:aws", "cloud:gcp", "cloud:az"}
  1776  
  1777  	var serverUTags = make(map[string]string)
  1778  
  1779  	for i, s := range sc.servers {
  1780  		s.optsMu.Lock()
  1781  		serverUTags[s.Name()] = tags[i]
  1782  		s.opts.Tags.Add(tags[i])
  1783  		s.opts.JetStreamUniqueTag = "cloud"
  1784  		s.optsMu.Unlock()
  1785  		reset(s)
  1786  	}
  1787  
  1788  	ml := sc.leader()
  1789  	js := ml.getJetStream()
  1790  	require_True(t, js != nil)
  1791  	js.mu.RLock()
  1792  	cc := js.cluster
  1793  	require_True(t, cc != nil)
  1794  
  1795  	// Walk and make sure all tags are registered.
  1796  	expires := time.Now().Add(10 * time.Second)
  1797  	for time.Now().Before(expires) {
  1798  		allOK := true
  1799  		for _, p := range cc.meta.Peers() {
  1800  			si, ok := ml.nodeToInfo.Load(p.ID)
  1801  			require_True(t, ok)
  1802  			ni := si.(nodeInfo)
  1803  			if len(ni.tags) == 0 {
  1804  				allOK = false
  1805  				reset(sc.serverByName(ni.name))
  1806  			}
  1807  		}
  1808  		if allOK {
  1809  			break
  1810  		}
  1811  	}
  1812  	js.mu.RUnlock()
  1813  	defer sc.shutdown()
  1814  
  1815  	sc.waitOnClusterReadyWithNumPeers(3)
  1816  
  1817  	s := sc.leader()
  1818  	nc, jsc := jsClientConnect(t, s)
  1819  	defer nc.Close()
  1820  
  1821  	_, err := jsc.AddStream(&nats.StreamConfig{
  1822  		Name:     "TEST",
  1823  		Subjects: []string{"foo"},
  1824  		Replicas: 3,
  1825  	})
  1826  	require_NoError(t, err)
  1827  
  1828  	sc.waitOnStreamLeader(globalAccountName, "TEST")
  1829  
  1830  	osi, err := jsc.StreamInfo("TEST")
  1831  	require_NoError(t, err)
  1832  
  1833  	// Double check original placement honors unique_tag
  1834  	var uTags = make(map[string]struct{})
  1835  
  1836  	uTags[serverUTags[osi.Cluster.Leader]] = struct{}{}
  1837  	for _, replica := range osi.Cluster.Replicas {
  1838  		evalTag := serverUTags[replica.Name]
  1839  		if _, exists := uTags[evalTag]; !exists {
  1840  			uTags[evalTag] = struct{}{}
  1841  			continue
  1842  		} else {
  1843  			t.Fatalf("expected initial placement to honor unique_tag")
  1844  		}
  1845  	}
  1846  
  1847  	// Remove 1 peer replica (this will be random cloud region as initial placement was randomized ordering)
  1848  	_, err = nc.Request("$JS.API.STREAM.PEER.REMOVE.TEST", []byte(`{"peer":"`+osi.Cluster.Replicas[0].Name+`"}`), time.Second*10)
  1849  	require_NoError(t, err)
  1850  
  1851  	sc.waitOnStreamLeader(globalAccountName, "TEST")
  1852  
  1853  	// Verify R2 since no eligible peer can replace the removed peer without braking unique constraint
  1854  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  1855  		osi, err = jsc.StreamInfo("TEST")
  1856  		require_NoError(t, err)
  1857  		if len(osi.Cluster.Replicas) != 1 {
  1858  			return fmt.Errorf("expected R2, got R%d", len(osi.Cluster.Replicas)+1)
  1859  		}
  1860  		return nil
  1861  	})
  1862  
  1863  	// Validate that remaining members still honor unique tags
  1864  	uTags = make(map[string]struct{}) //reset
  1865  
  1866  	uTags[serverUTags[osi.Cluster.Leader]] = struct{}{}
  1867  	for _, replica := range osi.Cluster.Replicas {
  1868  		evalTag := serverUTags[replica.Name]
  1869  		if _, exists := uTags[evalTag]; !exists {
  1870  			uTags[evalTag] = struct{}{}
  1871  			continue
  1872  		} else {
  1873  			t.Fatalf("expected revised placement to honor unique_tag")
  1874  		}
  1875  	}
  1876  }
  1877  
  1878  // https://github.com/nats-io/nats-server/issues/3191
  1879  func TestJetStreamClusterLeafnodeDuplicateConsumerMessages(t *testing.T) {
  1880  	// Cluster B
  1881  	c := createJetStreamCluster(t, jsClusterTempl, "B", _EMPTY_, 2, 22020, false)
  1882  	defer c.shutdown()
  1883  
  1884  	// Cluster A
  1885  	// Domain is "A'
  1886  	lc := c.createLeafNodesWithStartPortAndDomain("A", 2, 22110, "A")
  1887  	defer lc.shutdown()
  1888  
  1889  	lc.waitOnClusterReady()
  1890  
  1891  	// We want A-S-1 connected to B-S-1 and A-S-2 connected to B-S-2
  1892  	// So adjust if needed.
  1893  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  1894  		for i, ls := range lc.servers {
  1895  			ls.mu.RLock()
  1896  			var remoteServer string
  1897  			for _, rc := range ls.leafs {
  1898  				rc.mu.Lock()
  1899  				remoteServer = rc.leaf.remoteServer
  1900  				rc.mu.Unlock()
  1901  				break
  1902  			}
  1903  			ls.mu.RUnlock()
  1904  
  1905  			wantedRemote := fmt.Sprintf("S-%d", i+1)
  1906  			if remoteServer != wantedRemote {
  1907  				ls.Shutdown()
  1908  				lc.restartServer(ls)
  1909  				return fmt.Errorf("Leafnode server %d not connected to %q", i+1, wantedRemote)
  1910  			}
  1911  		}
  1912  		return nil
  1913  	})
  1914  
  1915  	// Wait on ready again.
  1916  	lc.waitOnClusterReady()
  1917  
  1918  	// Create a stream and a durable pull consumer on cluster A.
  1919  	lnc, ljs := jsClientConnect(t, lc.randomServer())
  1920  	defer lnc.Close()
  1921  
  1922  	_, err := ljs.AddStream(&nats.StreamConfig{
  1923  		Name:     "TEST",
  1924  		Subjects: []string{"foo"},
  1925  		Replicas: 2,
  1926  	})
  1927  	require_NoError(t, err)
  1928  
  1929  	// Make sure stream leader is on S-1
  1930  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  1931  		si, err := ljs.StreamInfo("TEST")
  1932  		require_NoError(t, err)
  1933  		if si.Cluster.Leader == "A-S-1" {
  1934  			return nil
  1935  		}
  1936  		_, err = lnc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST"), nil, time.Second)
  1937  		require_NoError(t, err)
  1938  		return fmt.Errorf("Stream leader not placed on A-S-1")
  1939  	})
  1940  
  1941  	_, err = ljs.StreamInfo("TEST")
  1942  	require_NoError(t, err)
  1943  
  1944  	_, err = ljs.AddConsumer("TEST", &nats.ConsumerConfig{
  1945  		Durable:    "dlc",
  1946  		Replicas:   2,
  1947  		MaxDeliver: 1,
  1948  		AckPolicy:  nats.AckNonePolicy,
  1949  	})
  1950  	require_NoError(t, err)
  1951  
  1952  	// Make sure consumer leader is on S-2
  1953  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  1954  		ci, err := ljs.ConsumerInfo("TEST", "dlc")
  1955  		require_NoError(t, err)
  1956  		if ci.Cluster.Leader == "A-S-2" {
  1957  			return nil
  1958  		}
  1959  		_, err = lnc.Request(fmt.Sprintf(JSApiConsumerLeaderStepDownT, "TEST", "dlc"), nil, time.Second)
  1960  		require_NoError(t, err)
  1961  		return fmt.Errorf("Stream leader not placed on A-S-1")
  1962  	})
  1963  
  1964  	_, err = ljs.ConsumerInfo("TEST", "dlc")
  1965  	require_NoError(t, err)
  1966  
  1967  	// Send 2 messages.
  1968  	sendStreamMsg(t, lnc, "foo", "M-1")
  1969  	sendStreamMsg(t, lnc, "foo", "M-2")
  1970  
  1971  	// Now bind apps to cluster B servers and bind to pull consumer.
  1972  	nc1, _ := jsClientConnect(t, c.servers[0])
  1973  	defer nc1.Close()
  1974  	js1, err := nc1.JetStream(nats.Domain("A"))
  1975  	require_NoError(t, err)
  1976  
  1977  	sub1, err := js1.PullSubscribe("foo", "dlc", nats.BindStream("TEST"))
  1978  	require_NoError(t, err)
  1979  	defer sub1.Unsubscribe()
  1980  
  1981  	nc2, _ := jsClientConnect(t, c.servers[1])
  1982  	defer nc2.Close()
  1983  	js2, err := nc2.JetStream(nats.Domain("A"))
  1984  	require_NoError(t, err)
  1985  
  1986  	sub2, err := js2.PullSubscribe("foo", "dlc", nats.BindStream("TEST"))
  1987  	require_NoError(t, err)
  1988  	defer sub2.Unsubscribe()
  1989  
  1990  	// Make sure we can properly get messages.
  1991  	msgs, err := sub1.Fetch(1)
  1992  	require_NoError(t, err)
  1993  	require_True(t, len(msgs) == 1)
  1994  	require_True(t, string(msgs[0].Data) == "M-1")
  1995  
  1996  	msgs, err = sub2.Fetch(1)
  1997  	require_NoError(t, err)
  1998  	require_True(t, len(msgs) == 1)
  1999  	require_True(t, string(msgs[0].Data) == "M-2")
  2000  
  2001  	// Make sure delivered state makes it to other server to not accidentally send M-2 again
  2002  	// and fail the test below.
  2003  	time.Sleep(250 * time.Millisecond)
  2004  
  2005  	// Now let's introduce and event, where A-S-2 will now reconnect after a restart to B-S-2
  2006  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  2007  		ls := lc.servers[1]
  2008  		wantedRemote := "S-1"
  2009  		var remoteServer string
  2010  
  2011  		ls.mu.RLock()
  2012  		for _, rc := range ls.leafs {
  2013  			rc.mu.Lock()
  2014  			remoteServer = rc.leaf.remoteServer
  2015  			rc.mu.Unlock()
  2016  			break
  2017  		}
  2018  		ls.mu.RUnlock()
  2019  
  2020  		if remoteServer != wantedRemote {
  2021  			ls.Shutdown()
  2022  			lc.restartServer(ls)
  2023  			return fmt.Errorf("Leafnode server not connected to %q", wantedRemote)
  2024  		}
  2025  		return nil
  2026  	})
  2027  
  2028  	// Wait on ready again.
  2029  	lc.waitOnClusterReady()
  2030  	lc.waitOnStreamLeader(globalAccountName, "TEST")
  2031  	lc.waitOnConsumerLeader(globalAccountName, "TEST", "dlc")
  2032  
  2033  	// Send 2 more messages.
  2034  	sendStreamMsg(t, lnc, "foo", "M-3")
  2035  	sendStreamMsg(t, lnc, "foo", "M-4")
  2036  
  2037  	msgs, err = sub1.Fetch(2)
  2038  	require_NoError(t, err)
  2039  	require_True(t, len(msgs) == 2)
  2040  	require_True(t, string(msgs[0].Data) == "M-3")
  2041  	require_True(t, string(msgs[1].Data) == "M-4")
  2042  
  2043  	// Send 2 more messages.
  2044  	sendStreamMsg(t, lnc, "foo", "M-5")
  2045  	sendStreamMsg(t, lnc, "foo", "M-6")
  2046  
  2047  	msgs, err = sub2.Fetch(2)
  2048  	require_NoError(t, err)
  2049  	require_True(t, len(msgs) == 2)
  2050  	require_True(t, string(msgs[0].Data) == "M-5")
  2051  	require_True(t, string(msgs[1].Data) == "M-6")
  2052  }
  2053  
  2054  func snapRGSet(pFlag bool, banner string, osi *nats.StreamInfo) *map[string]struct{} {
  2055  	var snapSet = make(map[string]struct{})
  2056  	if pFlag {
  2057  		fmt.Println(banner)
  2058  	}
  2059  	if osi == nil {
  2060  		if pFlag {
  2061  			fmt.Printf("bonkers!\n")
  2062  		}
  2063  		return nil
  2064  	}
  2065  
  2066  	snapSet[osi.Cluster.Leader] = struct{}{}
  2067  	if pFlag {
  2068  		fmt.Printf("Leader: %s\n", osi.Cluster.Leader)
  2069  	}
  2070  	for _, replica := range osi.Cluster.Replicas {
  2071  		snapSet[replica.Name] = struct{}{}
  2072  		if pFlag {
  2073  			fmt.Printf("Replica: %s\n", replica.Name)
  2074  		}
  2075  	}
  2076  
  2077  	return &snapSet
  2078  }
  2079  
  2080  func TestJetStreamClusterAfterPeerRemoveZeroState(t *testing.T) {
  2081  	// R3 scenario (w/messages) in a 4-node cluster. Peer remove from RG and add back to same RG later.
  2082  	// Validate that original peer brought no memory or issues from its previous RG tour of duty, specifically
  2083  	// that the restored peer has the correct filestore usage bytes for the asset.
  2084  	var err error
  2085  
  2086  	sc := createJetStreamClusterExplicit(t, "cl4", 4)
  2087  	defer sc.shutdown()
  2088  
  2089  	sc.waitOnClusterReadyWithNumPeers(4)
  2090  
  2091  	s := sc.leader()
  2092  	nc, jsc := jsClientConnect(t, s)
  2093  	defer nc.Close()
  2094  
  2095  	_, err = jsc.AddStream(&nats.StreamConfig{
  2096  		Name:     "foo",
  2097  		Subjects: []string{"foo.*"},
  2098  		Replicas: 3,
  2099  	})
  2100  	require_NoError(t, err)
  2101  
  2102  	sc.waitOnStreamLeader(globalAccountName, "foo")
  2103  
  2104  	osi, err := jsc.StreamInfo("foo")
  2105  	require_NoError(t, err)
  2106  
  2107  	// make sure 0 msgs
  2108  	require_True(t, osi.State.Msgs == 0)
  2109  
  2110  	// load up messages
  2111  	toSend := 10000
  2112  	// storage bytes with JS message overhead
  2113  	assetStoreBytesExpected := uint64(460000)
  2114  
  2115  	for i := 1; i <= toSend; i++ {
  2116  		msg := []byte("Hello World")
  2117  		if _, err = jsc.Publish("foo.a", msg); err != nil {
  2118  			t.Fatalf("unexpected publish error: %v", err)
  2119  		}
  2120  	}
  2121  
  2122  	osi, err = jsc.StreamInfo("foo")
  2123  	require_NoError(t, err)
  2124  
  2125  	// make sure 10000 msgs
  2126  	require_True(t, osi.State.Msgs == uint64(toSend))
  2127  
  2128  	origSet := *snapRGSet(false, "== Orig RG Set ==", osi)
  2129  
  2130  	// remove 1 peer replica (1 of 2 non-leaders)
  2131  	origPeer := osi.Cluster.Replicas[0].Name
  2132  	resp, err := nc.Request(fmt.Sprintf(JSApiStreamRemovePeerT, "foo"), []byte(`{"peer":"`+origPeer+`"}`), time.Second)
  2133  	require_NoError(t, err)
  2134  	var rpResp JSApiStreamRemovePeerResponse
  2135  	err = json.Unmarshal(resp.Data, &rpResp)
  2136  	require_NoError(t, err)
  2137  	require_True(t, rpResp.Success)
  2138  
  2139  	// validate the origPeer is removed with a replacement newPeer
  2140  	sc.waitOnStreamLeader(globalAccountName, "foo")
  2141  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  2142  		osi, err = jsc.StreamInfo("foo")
  2143  		require_NoError(t, err)
  2144  		if len(osi.Cluster.Replicas) != 2 {
  2145  			return fmt.Errorf("expected R3, got R%d", len(osi.Cluster.Replicas)+1)
  2146  		}
  2147  		// STREAM.PEER.REMOVE is asynchronous command; make sure remove has occurred
  2148  		for _, replica := range osi.Cluster.Replicas {
  2149  			if replica.Name == origPeer {
  2150  				return fmt.Errorf("expected replaced replica, old replica still present")
  2151  			}
  2152  		}
  2153  		return nil
  2154  	})
  2155  
  2156  	// identify the new peer
  2157  	var newPeer string
  2158  	osi, err = jsc.StreamInfo("foo")
  2159  	require_NoError(t, err)
  2160  	newSet := *snapRGSet(false, "== New RG Set ==", osi)
  2161  	for peer := range newSet {
  2162  		_, ok := origSet[peer]
  2163  		if !ok {
  2164  			newPeer = peer
  2165  			break
  2166  		}
  2167  	}
  2168  	require_True(t, newPeer != "")
  2169  
  2170  	// kick out newPeer which will cause origPeer to be assigned to the RG again
  2171  	resp, err = nc.Request(fmt.Sprintf(JSApiStreamRemovePeerT, "foo"), []byte(`{"peer":"`+newPeer+`"}`), time.Second)
  2172  	require_NoError(t, err)
  2173  	err = json.Unmarshal(resp.Data, &rpResp)
  2174  	require_NoError(t, err)
  2175  	require_True(t, rpResp.Success)
  2176  
  2177  	// validate the newPeer is removed and R3 has reformed (with origPeer)
  2178  	sc.waitOnStreamLeader(globalAccountName, "foo")
  2179  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  2180  		osi, err = jsc.StreamInfo("foo")
  2181  		require_NoError(t, err)
  2182  		if len(osi.Cluster.Replicas) != 2 {
  2183  			return fmt.Errorf("expected R3, got R%d", len(osi.Cluster.Replicas)+1)
  2184  		}
  2185  		// STREAM.PEER.REMOVE is asynchronous command; make sure remove has occurred
  2186  		for _, replica := range osi.Cluster.Replicas {
  2187  			if replica.Name == newPeer {
  2188  				return fmt.Errorf("expected replaced replica, old replica still present")
  2189  			}
  2190  		}
  2191  		return nil
  2192  	})
  2193  
  2194  	osi, err = jsc.StreamInfo("foo")
  2195  	require_NoError(t, err)
  2196  
  2197  	// make sure all msgs reported in stream at this point with original leader
  2198  	require_True(t, osi.State.Msgs == uint64(toSend))
  2199  
  2200  	snapRGSet(false, "== RG Set w/origPeer Back ==", osi)
  2201  
  2202  	// get a handle to original peer server
  2203  	var origServer *Server = sc.serverByName(origPeer)
  2204  	if origServer == nil {
  2205  		t.Fatalf("expected to get a handle to original peer server by name")
  2206  	}
  2207  
  2208  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  2209  		jszResult, err := origServer.Jsz(nil)
  2210  		require_NoError(t, err)
  2211  		if jszResult.Store != assetStoreBytesExpected {
  2212  			return fmt.Errorf("expected %d storage on orig peer, got %d", assetStoreBytesExpected, jszResult.Store)
  2213  		}
  2214  		return nil
  2215  	})
  2216  }
  2217  
  2218  func TestJetStreamClusterMemLeaderRestart(t *testing.T) {
  2219  	// Test if R3 clustered mem store asset leader server restarted, that asset remains stable with final quorum
  2220  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2221  	defer c.shutdown()
  2222  
  2223  	ml := c.leader()
  2224  	nc, jsc := jsClientConnect(t, ml)
  2225  	defer nc.Close()
  2226  
  2227  	_, err := jsc.AddStream(&nats.StreamConfig{
  2228  		Name:     "foo",
  2229  		Storage:  nats.MemoryStorage,
  2230  		Subjects: []string{"foo.*"},
  2231  		Replicas: 3,
  2232  	})
  2233  	require_NoError(t, err)
  2234  
  2235  	// load up messages
  2236  	toSend := 10000
  2237  	for i := 1; i <= toSend; i++ {
  2238  		msg := []byte("Hello World")
  2239  		if _, err = jsc.Publish("foo.a", msg); err != nil {
  2240  			t.Fatalf("unexpected publish error: %v", err)
  2241  		}
  2242  	}
  2243  
  2244  	osi, err := jsc.StreamInfo("foo")
  2245  	require_NoError(t, err)
  2246  	// make sure 10000 msgs
  2247  	require_True(t, osi.State.Msgs == uint64(toSend))
  2248  
  2249  	// Shutdown the stream leader server
  2250  	rs := c.serverByName(osi.Cluster.Leader)
  2251  	rs.Shutdown()
  2252  
  2253  	// Make sure that we have a META leader (there can always be a re-election)
  2254  	c.waitOnLeader()
  2255  	c.waitOnStreamLeader(globalAccountName, "foo")
  2256  
  2257  	// Should still have quorum and a new leader
  2258  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  2259  		osi, err = jsc.StreamInfo("foo")
  2260  		if err != nil {
  2261  			return fmt.Errorf("expected healthy stream asset, got %s", err.Error())
  2262  		}
  2263  		if osi.Cluster.Leader == _EMPTY_ {
  2264  			return fmt.Errorf("expected healthy stream asset with new leader")
  2265  		}
  2266  		if osi.State.Msgs != uint64(toSend) {
  2267  			return fmt.Errorf("expected healthy stream asset %d messages, got %d messages", toSend, osi.State.Msgs)
  2268  		}
  2269  		return nil
  2270  	})
  2271  
  2272  	// Now restart the old leader peer (old stream state)
  2273  	oldrs := rs
  2274  	rs, _ = RunServerWithConfig(rs.getOpts().ConfigFile)
  2275  	defer rs.Shutdown()
  2276  
  2277  	// Replaced old with new server
  2278  	for i := 0; i < len(c.servers); i++ {
  2279  		if c.servers[i] == oldrs {
  2280  			c.servers[i] = rs
  2281  		}
  2282  	}
  2283  
  2284  	// Wait for cluster to be formed
  2285  	checkClusterFormed(t, c.servers...)
  2286  
  2287  	// Make sure that we have a leader (there can always be a re-election)
  2288  	c.waitOnLeader()
  2289  
  2290  	// Can we get stream info after return
  2291  	osi, err = jsc.StreamInfo("foo")
  2292  	if err != nil {
  2293  		t.Fatalf("expected stream asset info return, got %s", err.Error())
  2294  	}
  2295  
  2296  	// When asset leader came back did we re-form with quorum
  2297  	if osi.Cluster.Leader == "" {
  2298  		t.Fatalf("expected a current leader after old leader restarted")
  2299  	}
  2300  }
  2301  
  2302  // Customer reported R1 consumers that seemed to be ghosted after server restart.
  2303  func TestJetStreamClusterLostConsumers(t *testing.T) {
  2304  	c := createJetStreamClusterExplicit(t, "GHOST", 3)
  2305  	defer c.shutdown()
  2306  
  2307  	nc, js := jsClientConnect(t, c.randomServer())
  2308  	defer nc.Close()
  2309  
  2310  	_, err := js.AddStream(&nats.StreamConfig{
  2311  		Name:     "TEST",
  2312  		Subjects: []string{"events.>"},
  2313  		Replicas: 3,
  2314  	})
  2315  	require_NoError(t, err)
  2316  
  2317  	for i := 0; i < 10; i++ {
  2318  		for j := 0; j < 10; j++ {
  2319  			_, err := js.Publish(fmt.Sprintf("events.%d.%d", i, j), []byte("test"))
  2320  			require_NoError(t, err)
  2321  		}
  2322  	}
  2323  
  2324  	s := c.randomServer()
  2325  	s.Shutdown()
  2326  
  2327  	c.waitOnLeader()
  2328  	c.waitOnStreamLeader(globalAccountName, "TEST")
  2329  
  2330  	nc, _ = jsClientConnect(t, c.randomServer())
  2331  	defer nc.Close()
  2332  
  2333  	cc := CreateConsumerRequest{
  2334  		Stream: "TEST",
  2335  		Config: ConsumerConfig{
  2336  			AckPolicy: AckExplicit,
  2337  		},
  2338  	}
  2339  	req, err := json.Marshal(cc)
  2340  	require_NoError(t, err)
  2341  
  2342  	reqSubj := fmt.Sprintf(JSApiConsumerCreateT, "TEST")
  2343  
  2344  	// Now create 50 consumers. We do not wait for the answer.
  2345  	for i := 0; i < 50; i++ {
  2346  		nc.Publish(reqSubj, req)
  2347  	}
  2348  	nc.Flush()
  2349  
  2350  	// Grab the meta leader.
  2351  	ml := c.leader()
  2352  	require_NoError(t, ml.JetStreamSnapshotMeta())
  2353  
  2354  	numConsumerAssignments := func(s *Server) int {
  2355  		t.Helper()
  2356  		js := s.getJetStream()
  2357  		js.mu.RLock()
  2358  		defer js.mu.RUnlock()
  2359  		cc := js.cluster
  2360  		for _, asa := range cc.streams {
  2361  			for _, sa := range asa {
  2362  				return len(sa.consumers)
  2363  			}
  2364  		}
  2365  		return 0
  2366  	}
  2367  
  2368  	checkFor(t, time.Second, 100*time.Millisecond, func() error {
  2369  		num := numConsumerAssignments(ml)
  2370  		if num == 50 {
  2371  			return nil
  2372  		}
  2373  		return fmt.Errorf("Consumers is only %d", num)
  2374  	})
  2375  
  2376  	// Restart the server we shutdown. We snapshotted to the snapshot
  2377  	// has to fill in the new consumers.
  2378  	// The bug would fail to add them to the meta state since the stream
  2379  	// existed.
  2380  	s = c.restartServer(s)
  2381  
  2382  	checkFor(t, time.Second, 100*time.Millisecond, func() error {
  2383  		num := numConsumerAssignments(s)
  2384  		if num == 50 {
  2385  			return nil
  2386  		}
  2387  		return fmt.Errorf("Consumers is only %d", num)
  2388  	})
  2389  }
  2390  
  2391  // https://github.com/nats-io/nats-server/issues/3636
  2392  func TestJetStreamClusterScaleDownDuringServerOffline(t *testing.T) {
  2393  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2394  	defer c.shutdown()
  2395  
  2396  	nc, js := jsClientConnect(t, c.randomServer())
  2397  	defer nc.Close()
  2398  
  2399  	_, err := js.AddStream(&nats.StreamConfig{
  2400  		Name:     "TEST",
  2401  		Subjects: []string{"foo"},
  2402  		Replicas: 3,
  2403  	})
  2404  	require_NoError(t, err)
  2405  
  2406  	for i := 0; i < 100; i++ {
  2407  		sendStreamMsg(t, nc, "foo", "hello")
  2408  	}
  2409  
  2410  	s := c.randomNonStreamLeader(globalAccountName, "TEST")
  2411  	s.Shutdown()
  2412  
  2413  	c.waitOnLeader()
  2414  
  2415  	nc, js = jsClientConnect(t, c.randomServer())
  2416  	defer nc.Close()
  2417  
  2418  	_, err = js.UpdateStream(&nats.StreamConfig{
  2419  		Name:     "TEST",
  2420  		Subjects: []string{"foo"},
  2421  		Replicas: 1,
  2422  	})
  2423  	require_NoError(t, err)
  2424  
  2425  	s = c.restartServer(s)
  2426  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  2427  		hs := s.healthz(nil)
  2428  		if hs.Error != _EMPTY_ {
  2429  			return errors.New(hs.Error)
  2430  		}
  2431  		return nil
  2432  	})
  2433  }
  2434  
  2435  // Reported by a customer manually upgrading their streams to support direct gets.
  2436  // Worked if single replica but not in clustered mode.
  2437  func TestJetStreamClusterDirectGetStreamUpgrade(t *testing.T) {
  2438  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2439  	defer c.shutdown()
  2440  
  2441  	nc, js := jsClientConnect(t, c.randomServer())
  2442  	defer nc.Close()
  2443  
  2444  	_, err := js.AddStream(&nats.StreamConfig{
  2445  		Name:              "KV_TEST",
  2446  		Subjects:          []string{"$KV.TEST.>"},
  2447  		Discard:           nats.DiscardNew,
  2448  		MaxMsgsPerSubject: 1,
  2449  		DenyDelete:        true,
  2450  		Replicas:          3,
  2451  	})
  2452  	require_NoError(t, err)
  2453  
  2454  	kv, err := js.KeyValue("TEST")
  2455  	require_NoError(t, err)
  2456  
  2457  	_, err = kv.PutString("name", "derek")
  2458  	require_NoError(t, err)
  2459  
  2460  	entry, err := kv.Get("name")
  2461  	require_NoError(t, err)
  2462  	require_True(t, string(entry.Value()) == "derek")
  2463  
  2464  	// Now simulate a update to the stream to support direct gets.
  2465  	_, err = js.UpdateStream(&nats.StreamConfig{
  2466  		Name:              "KV_TEST",
  2467  		Subjects:          []string{"$KV.TEST.>"},
  2468  		Discard:           nats.DiscardNew,
  2469  		MaxMsgsPerSubject: 1,
  2470  		DenyDelete:        true,
  2471  		AllowDirect:       true,
  2472  		Replicas:          3,
  2473  	})
  2474  	require_NoError(t, err)
  2475  
  2476  	// Rebind to KV to make sure we DIRECT version of Get().
  2477  	kv, err = js.KeyValue("TEST")
  2478  	require_NoError(t, err)
  2479  
  2480  	// Make sure direct get works.
  2481  	entry, err = kv.Get("name")
  2482  	require_NoError(t, err)
  2483  	require_True(t, string(entry.Value()) == "derek")
  2484  }
  2485  
  2486  // For interest (or workqueue) based streams its important to match the replication factor.
  2487  // This was the case but now that more control over consumer creation is allowed its possible
  2488  // to create a consumer where the replication factor does not match. This could cause
  2489  // instability in the state between servers and cause problems on leader switches.
  2490  func TestJetStreamClusterInterestPolicyStreamForConsumersToMatchRFactor(t *testing.T) {
  2491  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2492  	defer c.shutdown()
  2493  
  2494  	nc, js := jsClientConnect(t, c.randomServer())
  2495  	defer nc.Close()
  2496  
  2497  	_, err := js.AddStream(&nats.StreamConfig{
  2498  		Name:      "TEST",
  2499  		Subjects:  []string{"foo"},
  2500  		Retention: nats.InterestPolicy,
  2501  		Replicas:  3,
  2502  	})
  2503  	require_NoError(t, err)
  2504  
  2505  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  2506  		Durable:   "XX",
  2507  		AckPolicy: nats.AckExplicitPolicy,
  2508  		Replicas:  1,
  2509  	})
  2510  
  2511  	require_Error(t, err, NewJSConsumerReplicasShouldMatchStreamError())
  2512  }
  2513  
  2514  // https://github.com/nats-io/nats-server/issues/3791
  2515  func TestJetStreamClusterKVWatchersWithServerDown(t *testing.T) {
  2516  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2517  	defer c.shutdown()
  2518  
  2519  	nc, js := jsClientConnect(t, c.randomServer())
  2520  	defer nc.Close()
  2521  
  2522  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  2523  		Bucket:   "TEST",
  2524  		Replicas: 3,
  2525  	})
  2526  	require_NoError(t, err)
  2527  
  2528  	kv.PutString("foo", "bar")
  2529  	kv.PutString("foo", "baz")
  2530  
  2531  	// Shutdown a follower.
  2532  	s := c.randomNonStreamLeader(globalAccountName, "KV_TEST")
  2533  	s.Shutdown()
  2534  	c.waitOnLeader()
  2535  
  2536  	nc, _ = jsClientConnect(t, c.randomServer())
  2537  	defer nc.Close()
  2538  
  2539  	js, err = nc.JetStream(nats.MaxWait(2 * time.Second))
  2540  	require_NoError(t, err)
  2541  
  2542  	kv, err = js.KeyValue("TEST")
  2543  	require_NoError(t, err)
  2544  
  2545  	for i := 0; i < 100; i++ {
  2546  		w, err := kv.Watch("foo")
  2547  		require_NoError(t, err)
  2548  		w.Stop()
  2549  	}
  2550  }
  2551  
  2552  // TestJetStreamClusterCurrentVsHealth is designed to show the
  2553  // difference between "current" and "healthy" when async publishes
  2554  // outpace the rate at which they can be applied.
  2555  func TestJetStreamClusterCurrentVsHealth(t *testing.T) {
  2556  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2557  	defer c.shutdown()
  2558  
  2559  	c.waitOnLeader()
  2560  	server := c.randomNonLeader()
  2561  
  2562  	nc, js := jsClientConnect(t, server)
  2563  	defer nc.Close()
  2564  
  2565  	_, err := js.AddStream(&nats.StreamConfig{
  2566  		Name:     "TEST",
  2567  		Subjects: []string{"foo"},
  2568  		Replicas: 3,
  2569  	})
  2570  	require_NoError(t, err)
  2571  
  2572  	server = c.randomNonStreamLeader(globalAccountName, "TEST")
  2573  	stream, err := server.GlobalAccount().lookupStream("TEST")
  2574  	require_NoError(t, err)
  2575  
  2576  	raft, ok := stream.raftGroup().node.(*raft)
  2577  	require_True(t, ok)
  2578  
  2579  	for i := 0; i < 1000; i++ {
  2580  		_, err := js.PublishAsync("foo", []byte("bar"))
  2581  		require_NoError(t, err)
  2582  
  2583  		raft.RLock()
  2584  		commit := raft.commit
  2585  		applied := raft.applied
  2586  		raft.RUnlock()
  2587  
  2588  		current := raft.Current()
  2589  		healthy := raft.Healthy()
  2590  
  2591  		if !current || !healthy || commit != applied {
  2592  			t.Logf(
  2593  				"%d | Current %v, healthy %v, commit %d, applied %d, pending %d",
  2594  				i, current, healthy, commit, applied, commit-applied,
  2595  			)
  2596  		}
  2597  	}
  2598  }
  2599  
  2600  // Several users and customers use this setup, but many times across leafnodes.
  2601  // This should be allowed in same account since we are really protecting against
  2602  // multiple pub acks with cycle detection.
  2603  func TestJetStreamClusterActiveActiveSourcedStreams(t *testing.T) {
  2604  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2605  	defer c.shutdown()
  2606  
  2607  	nc, js := jsClientConnect(t, c.randomServer())
  2608  	defer nc.Close()
  2609  
  2610  	_, err := js.AddStream(&nats.StreamConfig{
  2611  		Name:     "A",
  2612  		Subjects: []string{"A.>"},
  2613  	})
  2614  	require_NoError(t, err)
  2615  
  2616  	_, err = js.AddStream(&nats.StreamConfig{
  2617  		Name:     "B",
  2618  		Subjects: []string{"B.>"},
  2619  	})
  2620  	require_NoError(t, err)
  2621  
  2622  	_, err = js.UpdateStream(&nats.StreamConfig{
  2623  		Name:     "A",
  2624  		Subjects: []string{"A.>"},
  2625  		Sources: []*nats.StreamSource{{
  2626  			Name:          "B",
  2627  			FilterSubject: "B.>",
  2628  		}},
  2629  	})
  2630  	require_NoError(t, err)
  2631  
  2632  	// Before this would fail.
  2633  	_, err = js.UpdateStream(&nats.StreamConfig{
  2634  		Name:     "B",
  2635  		Subjects: []string{"B.>"},
  2636  		Sources: []*nats.StreamSource{{
  2637  			Name:          "A",
  2638  			FilterSubject: "A.>",
  2639  		}},
  2640  	})
  2641  	require_NoError(t, err)
  2642  }
  2643  
  2644  func TestJetStreamClusterUpdateConsumerShouldNotForceDeleteOnRestart(t *testing.T) {
  2645  	c := createJetStreamClusterExplicit(t, "R7S", 7)
  2646  	defer c.shutdown()
  2647  
  2648  	nc, js := jsClientConnect(t, c.randomServer())
  2649  	defer nc.Close()
  2650  
  2651  	_, err := js.AddStream(&nats.StreamConfig{
  2652  		Name:     "TEST",
  2653  		Subjects: []string{"foo", "bar"},
  2654  		Replicas: 3,
  2655  	})
  2656  	require_NoError(t, err)
  2657  
  2658  	ci, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  2659  		Durable:        "D",
  2660  		DeliverSubject: "_no_bind_",
  2661  	})
  2662  	require_NoError(t, err)
  2663  
  2664  	// Shutdown a consumer follower.
  2665  	nc.Close()
  2666  	s := c.serverByName(ci.Cluster.Replicas[0].Name)
  2667  	s.Shutdown()
  2668  
  2669  	c.waitOnLeader()
  2670  
  2671  	nc, js = jsClientConnect(t, c.randomServer())
  2672  	defer nc.Close()
  2673  
  2674  	// Change delivery subject.
  2675  	_, err = js.UpdateConsumer("TEST", &nats.ConsumerConfig{
  2676  		Durable:        "D",
  2677  		DeliverSubject: "_d_",
  2678  	})
  2679  	require_NoError(t, err)
  2680  
  2681  	// Create interest in new and old deliver subject.
  2682  	_, err = nc.SubscribeSync("_d_")
  2683  	require_NoError(t, err)
  2684  	_, err = nc.SubscribeSync("_no_bind_")
  2685  	require_NoError(t, err)
  2686  	nc.Flush()
  2687  
  2688  	c.restartServer(s)
  2689  	c.waitOnAllCurrent()
  2690  
  2691  	// Wait on bad error that would cleanup consumer.
  2692  	time.Sleep(time.Second)
  2693  
  2694  	_, err = js.ConsumerInfo("TEST", "D")
  2695  	require_NoError(t, err)
  2696  }
  2697  
  2698  func TestJetStreamClusterInterestPolicyEphemeral(t *testing.T) {
  2699  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2700  	defer c.shutdown()
  2701  
  2702  	for _, test := range []struct {
  2703  		testName string
  2704  		stream   string
  2705  		subject  string
  2706  		durable  string
  2707  		name     string
  2708  	}{
  2709  		{testName: "InterestWithDurable", durable: "eph", subject: "intdur", stream: "INT_DUR"},
  2710  		{testName: "InterestWithName", name: "eph", subject: "inteph", stream: "INT_EPH"},
  2711  	} {
  2712  		t.Run(test.testName, func(t *testing.T) {
  2713  			var err error
  2714  
  2715  			nc, js := jsClientConnect(t, c.randomServer())
  2716  			defer nc.Close()
  2717  
  2718  			_, err = js.AddStream(&nats.StreamConfig{
  2719  				Name:      test.stream,
  2720  				Subjects:  []string{test.subject},
  2721  				Retention: nats.LimitsPolicy,
  2722  				Replicas:  3,
  2723  			})
  2724  			require_NoError(t, err)
  2725  
  2726  			const inactiveThreshold = time.Second
  2727  
  2728  			_, err = js.AddConsumer(test.stream, &nats.ConsumerConfig{
  2729  				DeliverSubject:    nats.NewInbox(),
  2730  				AckPolicy:         nats.AckExplicitPolicy,
  2731  				InactiveThreshold: inactiveThreshold,
  2732  				Durable:           test.durable,
  2733  				Name:              test.name,
  2734  			})
  2735  			require_NoError(t, err)
  2736  
  2737  			name := test.durable
  2738  			if test.durable == _EMPTY_ {
  2739  				name = test.name
  2740  			}
  2741  
  2742  			const msgs = 5_000
  2743  			done, count := make(chan bool, 1), 0
  2744  
  2745  			sub, err := js.Subscribe(_EMPTY_, func(msg *nats.Msg) {
  2746  				require_NoError(t, msg.Ack())
  2747  				count++
  2748  				if count >= msgs {
  2749  					select {
  2750  					case done <- true:
  2751  					default:
  2752  					}
  2753  				}
  2754  			}, nats.Bind(test.stream, name), nats.ManualAck())
  2755  			require_NoError(t, err)
  2756  
  2757  			// This happens only if we start publishing messages after consumer was created.
  2758  			pubDone := make(chan struct{})
  2759  			go func(subject string) {
  2760  				for i := 0; i < msgs; i++ {
  2761  					js.Publish(subject, []byte("DATA"))
  2762  				}
  2763  				close(pubDone)
  2764  			}(test.subject)
  2765  
  2766  			// Wait for inactive threshold to expire and all messages to be published and received
  2767  			// Bug is we clean up active consumers when we should not.
  2768  			time.Sleep(3 * inactiveThreshold / 2)
  2769  
  2770  			select {
  2771  			case <-pubDone:
  2772  			case <-time.After(10 * time.Second):
  2773  				t.Fatalf("Did not receive completion signal")
  2774  			}
  2775  
  2776  			info, err := js.ConsumerInfo(test.stream, name)
  2777  			if err != nil {
  2778  				t.Fatalf("Expected to be able to retrieve consumer: %v", err)
  2779  			}
  2780  			require_True(t, info.Delivered.Stream == msgs)
  2781  
  2782  			// Stop the subscription and remove the interest.
  2783  			err = sub.Unsubscribe()
  2784  			require_NoError(t, err)
  2785  
  2786  			// Now wait for interest inactivity threshold to kick in.
  2787  			time.Sleep(3 * inactiveThreshold / 2)
  2788  
  2789  			// Check if the consumer has been removed.
  2790  			_, err = js.ConsumerInfo(test.stream, name)
  2791  			require_Error(t, err, nats.ErrConsumerNotFound)
  2792  		})
  2793  	}
  2794  }
  2795  
  2796  // TestJetStreamClusterWALBuildupOnNoOpPull tests whether or not the consumer
  2797  // RAFT log is being compacted when the stream is idle but we are performing
  2798  // lots of fetches. Otherwise the disk usage just spirals out of control if
  2799  // there are no other state changes to trigger a compaction.
  2800  func TestJetStreamClusterWALBuildupOnNoOpPull(t *testing.T) {
  2801  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2802  	defer c.shutdown()
  2803  
  2804  	nc, js := jsClientConnect(t, c.randomServer())
  2805  	defer nc.Close()
  2806  
  2807  	_, err := js.AddStream(&nats.StreamConfig{
  2808  		Name:     "TEST",
  2809  		Subjects: []string{"foo"},
  2810  		Replicas: 3,
  2811  	})
  2812  	require_NoError(t, err)
  2813  
  2814  	sub, err := js.PullSubscribe(
  2815  		"foo",
  2816  		"durable",
  2817  		nats.ConsumerReplicas(3),
  2818  	)
  2819  	require_NoError(t, err)
  2820  
  2821  	for i := 0; i < 10000; i++ {
  2822  		_, _ = sub.Fetch(1, nats.MaxWait(time.Microsecond))
  2823  	}
  2824  
  2825  	// Needs to be at least 10 seconds, otherwise we won't hit the
  2826  	// minSnapDelta that prevents us from snapshotting too often
  2827  	time.Sleep(time.Second * 11)
  2828  
  2829  	for i := 0; i < 1024; i++ {
  2830  		_, _ = sub.Fetch(1, nats.MaxWait(time.Microsecond))
  2831  	}
  2832  
  2833  	time.Sleep(time.Second)
  2834  
  2835  	server := c.randomNonConsumerLeader(globalAccountName, "TEST", "durable")
  2836  
  2837  	stream, err := server.globalAccount().lookupStream("TEST")
  2838  	require_NoError(t, err)
  2839  
  2840  	consumer := stream.lookupConsumer("durable")
  2841  	require_NotNil(t, consumer)
  2842  
  2843  	entries, bytes := consumer.raftNode().Size()
  2844  	t.Log("new entries:", entries)
  2845  	t.Log("new bytes:", bytes)
  2846  
  2847  	if max := uint64(1024); entries > max {
  2848  		t.Fatalf("got %d entries, expected less than %d entries", entries, max)
  2849  	}
  2850  }
  2851  
  2852  // Found in https://github.com/nats-io/nats-server/issues/3848
  2853  // When Max Age was specified and stream was scaled up, new replicas
  2854  // were expiring messages much later than the leader.
  2855  func TestJetStreamClusterStreamMaxAgeScaleUp(t *testing.T) {
  2856  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2857  	defer c.shutdown()
  2858  
  2859  	nc, js := jsClientConnect(t, c.randomServer())
  2860  	defer nc.Close()
  2861  
  2862  	for _, test := range []struct {
  2863  		name    string
  2864  		storage nats.StorageType
  2865  		stream  string
  2866  		purge   bool
  2867  	}{
  2868  		{name: "file", storage: nats.FileStorage, stream: "A", purge: false},
  2869  		{name: "memory", storage: nats.MemoryStorage, stream: "B", purge: false},
  2870  		{name: "file with purge", storage: nats.FileStorage, stream: "C", purge: true},
  2871  		{name: "memory with purge", storage: nats.MemoryStorage, stream: "D", purge: true},
  2872  	} {
  2873  
  2874  		t.Run(test.name, func(t *testing.T) {
  2875  			ttl := time.Second * 5
  2876  			// Add stream with one replica and short MaxAge.
  2877  			_, err := js.AddStream(&nats.StreamConfig{
  2878  				Name:     test.stream,
  2879  				Replicas: 1,
  2880  				Subjects: []string{test.stream},
  2881  				MaxAge:   ttl,
  2882  				Storage:  test.storage,
  2883  			})
  2884  			require_NoError(t, err)
  2885  
  2886  			// Add some messages.
  2887  			for i := 0; i < 10; i++ {
  2888  				sendStreamMsg(t, nc, test.stream, "HELLO")
  2889  			}
  2890  			// We need to also test if we properly set expiry
  2891  			// if first sequence is not 1.
  2892  			if test.purge {
  2893  				err = js.PurgeStream(test.stream)
  2894  				require_NoError(t, err)
  2895  				// Add some messages.
  2896  				for i := 0; i < 10; i++ {
  2897  					sendStreamMsg(t, nc, test.stream, "HELLO")
  2898  				}
  2899  			}
  2900  			// Mark the time when all messages were published.
  2901  			start := time.Now()
  2902  
  2903  			// Sleep for half of the MaxAge time.
  2904  			time.Sleep(ttl / 2)
  2905  
  2906  			// Scale up the Stream to 3 replicas.
  2907  			_, err = js.UpdateStream(&nats.StreamConfig{
  2908  				Name:     test.stream,
  2909  				Replicas: 3,
  2910  				Subjects: []string{test.stream},
  2911  				MaxAge:   ttl,
  2912  				Storage:  test.storage,
  2913  			})
  2914  			require_NoError(t, err)
  2915  
  2916  			// All messages should still be there.
  2917  			info, err := js.StreamInfo(test.stream)
  2918  			require_NoError(t, err)
  2919  			require_True(t, info.State.Msgs == 10)
  2920  
  2921  			// Wait until MaxAge is reached.
  2922  			time.Sleep(ttl - time.Since(start) + (1 * time.Second))
  2923  
  2924  			// Check if all messages are expired.
  2925  			info, err = js.StreamInfo(test.stream)
  2926  			require_NoError(t, err)
  2927  			require_True(t, info.State.Msgs == 0)
  2928  
  2929  			// Now switch leader to one of replicas
  2930  			_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, test.stream), nil, time.Second)
  2931  			require_NoError(t, err)
  2932  			c.waitOnStreamLeader("$G", test.stream)
  2933  
  2934  			// and make sure that it also expired all messages
  2935  			info, err = js.StreamInfo(test.stream)
  2936  			require_NoError(t, err)
  2937  			require_True(t, info.State.Msgs == 0)
  2938  		})
  2939  	}
  2940  }
  2941  
  2942  func TestJetStreamClusterWorkQueueConsumerReplicatedAfterScaleUp(t *testing.T) {
  2943  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2944  	defer c.shutdown()
  2945  
  2946  	nc, js := jsClientConnect(t, c.randomServer())
  2947  	defer nc.Close()
  2948  
  2949  	_, err := js.AddStream(&nats.StreamConfig{
  2950  		Name:      "TEST",
  2951  		Replicas:  1,
  2952  		Subjects:  []string{"WQ"},
  2953  		Retention: nats.WorkQueuePolicy,
  2954  	})
  2955  	require_NoError(t, err)
  2956  
  2957  	// Create an ephemeral consumer.
  2958  	sub, err := js.SubscribeSync("WQ")
  2959  	require_NoError(t, err)
  2960  
  2961  	// Scale up to R3.
  2962  	_, err = js.UpdateStream(&nats.StreamConfig{
  2963  		Name:      "TEST",
  2964  		Replicas:  3,
  2965  		Subjects:  []string{"WQ"},
  2966  		Retention: nats.WorkQueuePolicy,
  2967  	})
  2968  	require_NoError(t, err)
  2969  	c.waitOnStreamLeader(globalAccountName, "TEST")
  2970  
  2971  	ci, err := sub.ConsumerInfo()
  2972  	require_NoError(t, err)
  2973  
  2974  	require_True(t, ci.Config.Replicas == 0 || ci.Config.Replicas == 3)
  2975  
  2976  	c.waitOnConsumerLeader(globalAccountName, "TEST", ci.Name)
  2977  	s := c.consumerLeader(globalAccountName, "TEST", ci.Name)
  2978  	require_NotNil(t, s)
  2979  
  2980  	mset, err := s.GlobalAccount().lookupStream("TEST")
  2981  	require_NoError(t, err)
  2982  
  2983  	o := mset.lookupConsumer(ci.Name)
  2984  	require_NotNil(t, o)
  2985  	require_NotNil(t, o.raftNode())
  2986  }
  2987  
  2988  // https://github.com/nats-io/nats-server/issues/3953
  2989  func TestJetStreamClusterWorkQueueAfterScaleUp(t *testing.T) {
  2990  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  2991  	defer c.shutdown()
  2992  
  2993  	nc, js := jsClientConnect(t, c.randomServer())
  2994  	defer nc.Close()
  2995  
  2996  	_, err := js.AddStream(&nats.StreamConfig{
  2997  		Name:      "TEST",
  2998  		Replicas:  1,
  2999  		Subjects:  []string{"WQ"},
  3000  		Retention: nats.WorkQueuePolicy,
  3001  	})
  3002  	require_NoError(t, err)
  3003  
  3004  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  3005  		Durable:        "d1",
  3006  		DeliverSubject: "d1",
  3007  		AckPolicy:      nats.AckExplicitPolicy,
  3008  	})
  3009  	require_NoError(t, err)
  3010  
  3011  	wch := make(chan bool, 1)
  3012  	_, err = nc.Subscribe("d1", func(msg *nats.Msg) {
  3013  		msg.AckSync()
  3014  		wch <- true
  3015  	})
  3016  	require_NoError(t, err)
  3017  
  3018  	_, err = js.UpdateStream(&nats.StreamConfig{
  3019  		Name:      "TEST",
  3020  		Replicas:  3,
  3021  		Subjects:  []string{"WQ"},
  3022  		Retention: nats.WorkQueuePolicy,
  3023  	})
  3024  	require_NoError(t, err)
  3025  	c.waitOnStreamLeader(globalAccountName, "TEST")
  3026  
  3027  	sendStreamMsg(t, nc, "WQ", "SOME WORK")
  3028  	<-wch
  3029  
  3030  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  3031  		si, err := js.StreamInfo("TEST")
  3032  		require_NoError(t, err)
  3033  		if si.State.Msgs == 0 {
  3034  			return nil
  3035  		}
  3036  		return fmt.Errorf("Still have %d msgs left", si.State.Msgs)
  3037  	})
  3038  }
  3039  
  3040  func TestJetStreamClusterInterestBasedStreamAndConsumerSnapshots(t *testing.T) {
  3041  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3042  	defer c.shutdown()
  3043  
  3044  	nc, js := jsClientConnect(t, c.randomServer())
  3045  	defer nc.Close()
  3046  
  3047  	_, err := js.AddStream(&nats.StreamConfig{
  3048  		Name:      "TEST",
  3049  		Replicas:  3,
  3050  		Subjects:  []string{"foo"},
  3051  		Retention: nats.InterestPolicy,
  3052  	})
  3053  	require_NoError(t, err)
  3054  
  3055  	sub, err := js.SubscribeSync("foo", nats.Durable("d22"))
  3056  	require_NoError(t, err)
  3057  
  3058  	num := 200
  3059  	for i := 0; i < num; i++ {
  3060  		js.PublishAsync("foo", []byte("ok"))
  3061  	}
  3062  	select {
  3063  	case <-js.PublishAsyncComplete():
  3064  	case <-time.After(5 * time.Second):
  3065  		t.Fatalf("Did not receive completion signal")
  3066  	}
  3067  
  3068  	checkSubsPending(t, sub, num)
  3069  
  3070  	// Shutdown one server.
  3071  	s := c.randomServer()
  3072  	s.Shutdown()
  3073  
  3074  	c.waitOnStreamLeader(globalAccountName, "TEST")
  3075  
  3076  	nc, js = jsClientConnect(t, c.randomServer())
  3077  	defer nc.Close()
  3078  
  3079  	// Now ack all messages while the other server is down.
  3080  	for i := 0; i < num; i++ {
  3081  		m, err := sub.NextMsg(time.Second)
  3082  		require_NoError(t, err)
  3083  		m.AckSync()
  3084  	}
  3085  
  3086  	// Wait for all message acks to be processed and all messages to be removed.
  3087  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  3088  		si, err := js.StreamInfo("TEST")
  3089  		require_NoError(t, err)
  3090  		if si.State.Msgs == 0 {
  3091  			return nil
  3092  		}
  3093  		return fmt.Errorf("Still have %d msgs left", si.State.Msgs)
  3094  	})
  3095  
  3096  	// Force a snapshot on the consumer leader before restarting the downed server.
  3097  	cl := c.consumerLeader(globalAccountName, "TEST", "d22")
  3098  	require_NotNil(t, cl)
  3099  
  3100  	mset, err := cl.GlobalAccount().lookupStream("TEST")
  3101  	require_NoError(t, err)
  3102  
  3103  	o := mset.lookupConsumer("d22")
  3104  	require_NotNil(t, o)
  3105  
  3106  	snap, err := o.store.EncodedState()
  3107  	require_NoError(t, err)
  3108  
  3109  	n := o.raftNode()
  3110  	require_NotNil(t, n)
  3111  	require_NoError(t, n.InstallSnapshot(snap))
  3112  
  3113  	// Now restart the downed server.
  3114  	s = c.restartServer(s)
  3115  
  3116  	// Make the restarted server the eventual leader.
  3117  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  3118  		c.waitOnStreamLeader(globalAccountName, "TEST")
  3119  		if sl := c.streamLeader(globalAccountName, "TEST"); sl != s {
  3120  			sl.JetStreamStepdownStream(globalAccountName, "TEST")
  3121  			return fmt.Errorf("Server %s is not leader yet", s)
  3122  		}
  3123  		return nil
  3124  	})
  3125  
  3126  	si, err := js.StreamInfo("TEST")
  3127  	require_NoError(t, err)
  3128  	require_True(t, si.State.Msgs == 0)
  3129  }
  3130  
  3131  func TestJetStreamClusterConsumerFollowerStoreStateAckFloorBug(t *testing.T) {
  3132  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3133  	defer c.shutdown()
  3134  
  3135  	nc, js := jsClientConnect(t, c.randomServer())
  3136  	defer nc.Close()
  3137  
  3138  	_, err := js.AddStream(&nats.StreamConfig{
  3139  		Name:     "TEST",
  3140  		Replicas: 3,
  3141  		Subjects: []string{"foo"},
  3142  	})
  3143  	require_NoError(t, err)
  3144  
  3145  	sub, err := js.PullSubscribe(_EMPTY_, "C", nats.BindStream("TEST"), nats.ManualAck())
  3146  	require_NoError(t, err)
  3147  
  3148  	num := 100
  3149  	for i := 0; i < num; i++ {
  3150  		sendStreamMsg(t, nc, "foo", "data")
  3151  	}
  3152  
  3153  	// This one prevents the state for pending from reaching 0 and resetting, which would not show the bug.
  3154  	sendStreamMsg(t, nc, "foo", "data")
  3155  
  3156  	// Ack all but one and out of order and make sure all consumers have the same stored state.
  3157  	msgs, err := sub.Fetch(num, nats.MaxWait(time.Second))
  3158  	require_NoError(t, err)
  3159  	require_True(t, len(msgs) == num)
  3160  
  3161  	_, err = sub.Fetch(1, nats.MaxWait(time.Second))
  3162  	require_NoError(t, err)
  3163  
  3164  	rand.Shuffle(len(msgs), func(i, j int) { msgs[i], msgs[j] = msgs[j], msgs[i] })
  3165  	for _, m := range msgs {
  3166  		if err := m.AckSync(); err != nil {
  3167  			t.Fatalf("Ack failed :%+v", err)
  3168  		}
  3169  	}
  3170  
  3171  	checkConsumerState := func(delivered, ackFloor nats.SequenceInfo, numAckPending int) error {
  3172  		expectedDelivered := uint64(num) + 1
  3173  		if delivered.Stream != expectedDelivered || delivered.Consumer != expectedDelivered {
  3174  			return fmt.Errorf("Wrong delivered, expected %d got %+v", expectedDelivered, delivered)
  3175  		}
  3176  		expectedAck := uint64(num)
  3177  		if ackFloor.Stream != expectedAck || ackFloor.Consumer != expectedAck {
  3178  			return fmt.Errorf("Wrong ackFloor, expected %d got %+v", expectedAck, ackFloor)
  3179  		}
  3180  		if numAckPending != 1 {
  3181  			return errors.New("Expected num ack pending to be 1")
  3182  		}
  3183  		return nil
  3184  	}
  3185  
  3186  	ci, err := js.ConsumerInfo("TEST", "C")
  3187  	require_NoError(t, err)
  3188  	require_NoError(t, checkConsumerState(ci.Delivered, ci.AckFloor, ci.NumAckPending))
  3189  
  3190  	// Check each consumer on each server for it's store state and make sure it matches as well.
  3191  	checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  3192  		for _, s := range c.servers {
  3193  			mset, err := s.GlobalAccount().lookupStream("TEST")
  3194  			if err != nil {
  3195  				return err
  3196  			}
  3197  			if mset == nil {
  3198  				return errors.New("Mset should not be nil")
  3199  			}
  3200  			o := mset.lookupConsumer("C")
  3201  			if o == nil {
  3202  				return errors.New("Consumer should not be nil")
  3203  			}
  3204  
  3205  			state, err := o.store.State()
  3206  			if err != nil {
  3207  				return err
  3208  			}
  3209  			delivered := nats.SequenceInfo{Stream: state.Delivered.Stream, Consumer: state.Delivered.Consumer}
  3210  			ackFloor := nats.SequenceInfo{Stream: state.AckFloor.Stream, Consumer: state.AckFloor.Consumer}
  3211  			if err := checkConsumerState(delivered, ackFloor, len(state.Pending)); err != nil {
  3212  				return err
  3213  			}
  3214  		}
  3215  		return nil
  3216  	})
  3217  
  3218  	// Now stepdown the consumer and move its leader and check the state after transition.
  3219  	// Make the restarted server the eventual leader.
  3220  	seen := make(map[*Server]bool)
  3221  	cl := c.consumerLeader(globalAccountName, "TEST", "C")
  3222  	require_NotNil(t, cl)
  3223  	seen[cl] = true
  3224  
  3225  	allSeen := func() bool {
  3226  		for _, s := range c.servers {
  3227  			if !seen[s] {
  3228  				return false
  3229  			}
  3230  		}
  3231  		return true
  3232  	}
  3233  
  3234  	checkAllLeaders := func() {
  3235  		t.Helper()
  3236  		checkFor(t, 20*time.Second, 200*time.Millisecond, func() error {
  3237  			c.waitOnConsumerLeader(globalAccountName, "TEST", "C")
  3238  			if allSeen() {
  3239  				return nil
  3240  			}
  3241  			cl := c.consumerLeader(globalAccountName, "TEST", "C")
  3242  			seen[cl] = true
  3243  			ci, err := js.ConsumerInfo("TEST", "C")
  3244  			if err != nil {
  3245  				return err
  3246  			}
  3247  			if err := checkConsumerState(ci.Delivered, ci.AckFloor, ci.NumAckPending); err != nil {
  3248  				return err
  3249  			}
  3250  			cl.JetStreamStepdownConsumer(globalAccountName, "TEST", "C")
  3251  			return fmt.Errorf("Not all servers have been consumer leader yet")
  3252  		})
  3253  	}
  3254  
  3255  	checkAllLeaders()
  3256  
  3257  	// No restart all servers and check again.
  3258  	c.stopAll()
  3259  	c.restartAll()
  3260  	c.waitOnLeader()
  3261  
  3262  	nc, js = jsClientConnect(t, c.randomServer())
  3263  	defer nc.Close()
  3264  
  3265  	seen = make(map[*Server]bool)
  3266  	checkAllLeaders()
  3267  }
  3268  
  3269  func TestJetStreamClusterInterestLeakOnDisableJetStream(t *testing.T) {
  3270  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3271  	defer c.shutdown()
  3272  
  3273  	nc, js := jsClientConnect(t, c.leader())
  3274  	defer nc.Close()
  3275  
  3276  	for i := 1; i <= 5; i++ {
  3277  		_, err := js.AddStream(&nats.StreamConfig{
  3278  			Name:     fmt.Sprintf("test_%d", i),
  3279  			Subjects: []string{fmt.Sprintf("test_%d", i)},
  3280  			Replicas: 3,
  3281  		})
  3282  		require_NoError(t, err)
  3283  	}
  3284  
  3285  	c.waitOnAllCurrent()
  3286  
  3287  	server := c.randomNonLeader()
  3288  	account := server.SystemAccount()
  3289  
  3290  	server.DisableJetStream()
  3291  
  3292  	var sublist []*subscription
  3293  	account.sl.localSubs(&sublist, false)
  3294  
  3295  	var danglingJSC, danglingRaft int
  3296  	for _, sub := range sublist {
  3297  		if strings.HasPrefix(string(sub.subject), "$JSC.") {
  3298  			danglingJSC++
  3299  		} else if strings.HasPrefix(string(sub.subject), "$NRG.") {
  3300  			danglingRaft++
  3301  		}
  3302  	}
  3303  	if danglingJSC > 0 || danglingRaft > 0 {
  3304  		t.Fatalf("unexpected dangling interests for JetStream assets after shutdown (%d $JSC, %d $NRG)", danglingJSC, danglingRaft)
  3305  	}
  3306  }
  3307  
  3308  func TestJetStreamClusterNoLeadersDuringLameDuck(t *testing.T) {
  3309  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3310  	defer c.shutdown()
  3311  
  3312  	// Grab the first server and set lameduck option directly.
  3313  	s := c.servers[0]
  3314  	s.optsMu.Lock()
  3315  	s.opts.LameDuckDuration = 5 * time.Second
  3316  	s.opts.LameDuckGracePeriod = -5 * time.Second
  3317  	s.optsMu.Unlock()
  3318  
  3319  	// Connect to the third server.
  3320  	nc, js := jsClientConnect(t, c.servers[2])
  3321  	defer nc.Close()
  3322  
  3323  	allServersHaveLeaders := func() bool {
  3324  		haveLeader := make(map[*Server]bool)
  3325  		for _, s := range c.servers {
  3326  			s.rnMu.RLock()
  3327  			for _, n := range s.raftNodes {
  3328  				if n.Leader() {
  3329  					haveLeader[s] = true
  3330  					break
  3331  				}
  3332  			}
  3333  			s.rnMu.RUnlock()
  3334  		}
  3335  		return len(haveLeader) == len(c.servers)
  3336  	}
  3337  
  3338  	// Create streams until we have a leader on all the servers.
  3339  	var index int
  3340  	checkFor(t, 10*time.Second, time.Millisecond, func() error {
  3341  		if allServersHaveLeaders() {
  3342  			return nil
  3343  		}
  3344  		index++
  3345  		_, err := js.AddStream(&nats.StreamConfig{
  3346  			Name:     fmt.Sprintf("TEST_%d", index),
  3347  			Subjects: []string{fmt.Sprintf("foo.%d", index)},
  3348  			Replicas: 3,
  3349  		})
  3350  		require_NoError(t, err)
  3351  		return fmt.Errorf("All servers do not have at least one leader")
  3352  	})
  3353  
  3354  	// Put our server into lameduck mode.
  3355  	// Need a client.
  3356  	dummy, _ := jsClientConnect(t, s)
  3357  	defer dummy.Close()
  3358  	go s.lameDuckMode()
  3359  
  3360  	// Wait for all leaders to move off.
  3361  	checkFor(t, 2*time.Second, 50*time.Millisecond, func() error {
  3362  		s.rnMu.RLock()
  3363  		defer s.rnMu.RUnlock()
  3364  		for _, n := range s.raftNodes {
  3365  			if n.Leader() {
  3366  				return fmt.Errorf("Server still has a leader")
  3367  			}
  3368  		}
  3369  		return nil
  3370  	})
  3371  
  3372  	// All leader evacuated.
  3373  
  3374  	// Create a go routine that will create streams constantly.
  3375  	qch := make(chan bool)
  3376  	go func() {
  3377  		var index int
  3378  		for {
  3379  			select {
  3380  			case <-time.After(time.Millisecond):
  3381  				index++
  3382  				_, err := js.AddStream(&nats.StreamConfig{
  3383  					Name:     fmt.Sprintf("NEW_TEST_%d", index),
  3384  					Subjects: []string{fmt.Sprintf("bar.%d", index)},
  3385  					Replicas: 3,
  3386  				})
  3387  				if err != nil {
  3388  					return
  3389  				}
  3390  			case <-qch:
  3391  				return
  3392  			}
  3393  		}
  3394  	}()
  3395  	defer close(qch)
  3396  
  3397  	// Make sure we do not have any leaders placed on the lameduck server.
  3398  	for s.isRunning() {
  3399  		var hasLeader bool
  3400  		s.rnMu.RLock()
  3401  		for _, n := range s.raftNodes {
  3402  			hasLeader = hasLeader || n.Leader()
  3403  		}
  3404  		s.rnMu.RUnlock()
  3405  		if hasLeader {
  3406  			t.Fatalf("Server had a leader when it should not due to lameduck mode")
  3407  		}
  3408  	}
  3409  }
  3410  
  3411  func TestJetStreamClusterNoR1AssetsDuringLameDuck(t *testing.T) {
  3412  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3413  	defer c.shutdown()
  3414  
  3415  	// Grab the first server and set lameduck option directly.
  3416  	s := c.servers[0]
  3417  	s.optsMu.Lock()
  3418  	s.opts.LameDuckDuration = 5 * time.Second
  3419  	s.opts.LameDuckGracePeriod = -5 * time.Second
  3420  	s.optsMu.Unlock()
  3421  
  3422  	// Connect to the server to keep it alive when we go into LDM.
  3423  	dummy, _ := jsClientConnect(t, s)
  3424  	defer dummy.Close()
  3425  
  3426  	// Connect to the third server.
  3427  	nc, js := jsClientConnect(t, c.servers[2])
  3428  	defer nc.Close()
  3429  
  3430  	// Now put the first server into lame duck mode.
  3431  	go s.lameDuckMode()
  3432  
  3433  	// Wait for news to arrive that the first server has gone into
  3434  	// lame duck mode and been marked offline.
  3435  	checkFor(t, 2*time.Second, 50*time.Millisecond, func() error {
  3436  		id := s.info.ID
  3437  		s := c.servers[2]
  3438  		s.mu.RLock()
  3439  		defer s.mu.RUnlock()
  3440  
  3441  		var isOffline bool
  3442  		s.nodeToInfo.Range(func(_, v any) bool {
  3443  			ni := v.(nodeInfo)
  3444  			if ni.id == id {
  3445  				isOffline = ni.offline
  3446  				return false
  3447  			}
  3448  			return true
  3449  		})
  3450  
  3451  		if !isOffline {
  3452  			return fmt.Errorf("first node is still online unexpectedly")
  3453  		}
  3454  		return nil
  3455  	})
  3456  
  3457  	// Create a go routine that will create streams constantly.
  3458  	qch := make(chan bool)
  3459  	go func() {
  3460  		var index int
  3461  		for {
  3462  			select {
  3463  			case <-time.After(time.Millisecond * 25):
  3464  				index++
  3465  				_, err := js.AddStream(&nats.StreamConfig{
  3466  					Name:     fmt.Sprintf("NEW_TEST_%d", index),
  3467  					Subjects: []string{fmt.Sprintf("bar.%d", index)},
  3468  					Replicas: 1,
  3469  				})
  3470  				if err != nil {
  3471  					return
  3472  				}
  3473  			case <-qch:
  3474  				return
  3475  			}
  3476  		}
  3477  	}()
  3478  	defer close(qch)
  3479  
  3480  	gacc := s.GlobalAccount()
  3481  	if gacc == nil {
  3482  		t.Fatalf("No global account")
  3483  	}
  3484  	// Make sure we do not have any R1 assets placed on the lameduck server.
  3485  	for s.isRunning() {
  3486  		if len(gacc.streams()) > 0 {
  3487  			t.Fatalf("Server had an R1 asset when it should not due to lameduck mode")
  3488  		}
  3489  		time.Sleep(15 * time.Millisecond)
  3490  	}
  3491  	s.WaitForShutdown()
  3492  }
  3493  
  3494  // If a consumer has not been registered (possible in heavily loaded systems with lots  of assets)
  3495  // it could miss the signal of a message going away. If that message was pending and expires the
  3496  // ack floor could fall below the stream first sequence. This test will force that condition and
  3497  // make sure the system resolves itself.
  3498  func TestJetStreamClusterConsumerAckFloorDrift(t *testing.T) {
  3499  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  3500  	defer c.shutdown()
  3501  
  3502  	nc, js := jsClientConnect(t, c.randomServer())
  3503  	defer nc.Close()
  3504  
  3505  	_, err := js.AddStream(&nats.StreamConfig{
  3506  		Name:     "TEST",
  3507  		Subjects: []string{"*"},
  3508  		Replicas: 3,
  3509  		MaxAge:   time.Second,
  3510  		MaxMsgs:  10,
  3511  	})
  3512  	require_NoError(t, err)
  3513  
  3514  	sub, err := js.PullSubscribe("foo", "C")
  3515  	require_NoError(t, err)
  3516  
  3517  	for i := 0; i < 10; i++ {
  3518  		sendStreamMsg(t, nc, "foo", "HELLO")
  3519  	}
  3520  
  3521  	// No-op but will surface as delivered.
  3522  	_, err = sub.Fetch(10)
  3523  	require_NoError(t, err)
  3524  
  3525  	// We will grab the state with delivered being 10 and ackfloor being 0 directly.
  3526  	cl := c.consumerLeader(globalAccountName, "TEST", "C")
  3527  	require_NotNil(t, cl)
  3528  
  3529  	mset, err := cl.GlobalAccount().lookupStream("TEST")
  3530  	require_NoError(t, err)
  3531  	o := mset.lookupConsumer("C")
  3532  	require_NotNil(t, o)
  3533  	o.mu.RLock()
  3534  	state, err := o.store.State()
  3535  	o.mu.RUnlock()
  3536  	require_NoError(t, err)
  3537  	require_NotNil(t, state)
  3538  
  3539  	// Now let messages expire.
  3540  	checkFor(t, 5*time.Second, time.Second, func() error {
  3541  		si, err := js.StreamInfo("TEST")
  3542  		require_NoError(t, err)
  3543  		if si.State.Msgs == 0 {
  3544  			return nil
  3545  		}
  3546  		return fmt.Errorf("stream still has msgs")
  3547  	})
  3548  
  3549  	// Set state to ackfloor of 5 and no pending.
  3550  	state.AckFloor.Consumer = 5
  3551  	state.AckFloor.Stream = 5
  3552  	state.Pending = nil
  3553  
  3554  	// Now put back the state underneath of the consumers.
  3555  	for _, s := range c.servers {
  3556  		mset, err := s.GlobalAccount().lookupStream("TEST")
  3557  		require_NoError(t, err)
  3558  		o := mset.lookupConsumer("C")
  3559  		require_NotNil(t, o)
  3560  		o.mu.Lock()
  3561  		err = o.setStoreState(state)
  3562  		cfs := o.store.(*consumerFileStore)
  3563  		o.mu.Unlock()
  3564  		require_NoError(t, err)
  3565  		// The lower layer will ignore, so set more directly.
  3566  		cfs.mu.Lock()
  3567  		cfs.state = *state
  3568  		cfs.mu.Unlock()
  3569  		// Also snapshot to remove any raft entries that could affect it.
  3570  		snap, err := o.store.EncodedState()
  3571  		require_NoError(t, err)
  3572  		require_NoError(t, o.raftNode().InstallSnapshot(snap))
  3573  	}
  3574  
  3575  	cl.JetStreamStepdownConsumer(globalAccountName, "TEST", "C")
  3576  	c.waitOnConsumerLeader(globalAccountName, "TEST", "C")
  3577  
  3578  	checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
  3579  		ci, err := js.ConsumerInfo("TEST", "C")
  3580  		require_NoError(t, err)
  3581  		// Make sure we catch this and adjust.
  3582  		if ci.AckFloor.Stream == 10 && ci.AckFloor.Consumer == 10 {
  3583  			return nil
  3584  		}
  3585  		return fmt.Errorf("AckFloor not correct, expected 10, got %+v", ci.AckFloor)
  3586  	})
  3587  }
  3588  
  3589  func TestJetStreamClusterInterestStreamFilteredConsumersWithNoInterest(t *testing.T) {
  3590  	c := createJetStreamClusterExplicit(t, "R5S", 5)
  3591  	defer c.shutdown()
  3592  
  3593  	nc, js := jsClientConnect(t, c.randomServer())
  3594  	defer nc.Close()
  3595  
  3596  	_, err := js.AddStream(&nats.StreamConfig{
  3597  		Name:      "TEST",
  3598  		Subjects:  []string{"*"},
  3599  		Retention: nats.InterestPolicy,
  3600  		Replicas:  3,
  3601  	})
  3602  	require_NoError(t, err)
  3603  
  3604  	// Create three subscribers.
  3605  	ackCb := func(m *nats.Msg) { m.Ack() }
  3606  
  3607  	_, err = js.Subscribe("foo", ackCb, nats.BindStream("TEST"), nats.ManualAck())
  3608  	require_NoError(t, err)
  3609  
  3610  	_, err = js.Subscribe("bar", ackCb, nats.BindStream("TEST"), nats.ManualAck())
  3611  	require_NoError(t, err)
  3612  
  3613  	_, err = js.Subscribe("baz", ackCb, nats.BindStream("TEST"), nats.ManualAck())
  3614  	require_NoError(t, err)
  3615  
  3616  	// Now send 100 messages, randomly picking foo or bar, but never baz.
  3617  	for i := 0; i < 100; i++ {
  3618  		if rand.Intn(2) > 0 {
  3619  			sendStreamMsg(t, nc, "foo", "HELLO")
  3620  		} else {
  3621  			sendStreamMsg(t, nc, "bar", "WORLD")
  3622  		}
  3623  	}
  3624  
  3625  	// Messages are expected to go to 0.
  3626  	checkFor(t, time.Second, 100*time.Millisecond, func() error {
  3627  		si, err := js.StreamInfo("TEST")
  3628  		require_NoError(t, err)
  3629  		if si.State.Msgs == 0 {
  3630  			return nil
  3631  		}
  3632  		return fmt.Errorf("stream still has msgs")
  3633  	})
  3634  }
  3635  
  3636  func TestJetStreamClusterChangeClusterAfterStreamCreate(t *testing.T) {
  3637  	c := createJetStreamClusterExplicit(t, "NATS", 3)
  3638  	defer c.shutdown()
  3639  
  3640  	nc, js := jsClientConnect(t, c.randomServer())
  3641  	defer nc.Close()
  3642  
  3643  	_, err := js.AddStream(&nats.StreamConfig{
  3644  		Name:     "TEST",
  3645  		Subjects: []string{"*"},
  3646  		Replicas: 3,
  3647  	})
  3648  	require_NoError(t, err)
  3649  
  3650  	for i := 0; i < 1000; i++ {
  3651  		sendStreamMsg(t, nc, "foo", "HELLO")
  3652  	}
  3653  
  3654  	_, err = js.UpdateStream(&nats.StreamConfig{
  3655  		Name:     "TEST",
  3656  		Subjects: []string{"*"},
  3657  		Replicas: 1,
  3658  	})
  3659  	require_NoError(t, err)
  3660  
  3661  	c.stopAll()
  3662  
  3663  	c.name = "FOO"
  3664  	for _, o := range c.opts {
  3665  		buf, err := os.ReadFile(o.ConfigFile)
  3666  		require_NoError(t, err)
  3667  		nbuf := bytes.Replace(buf, []byte("name: NATS"), []byte("name: FOO"), 1)
  3668  		err = os.WriteFile(o.ConfigFile, nbuf, 0640)
  3669  		require_NoError(t, err)
  3670  	}
  3671  
  3672  	c.restartAll()
  3673  	c.waitOnLeader()
  3674  	c.waitOnStreamLeader(globalAccountName, "TEST")
  3675  
  3676  	nc, js = jsClientConnect(t, c.randomServer())
  3677  	defer nc.Close()
  3678  
  3679  	_, err = js.UpdateStream(&nats.StreamConfig{
  3680  		Name:     "TEST",
  3681  		Subjects: []string{"*"},
  3682  		Replicas: 3,
  3683  	})
  3684  	// This should fail with no suitable peers, since the asset was created under the NATS cluster which has no peers.
  3685  	require_Error(t, err, errors.New("nats: no suitable peers for placement"))
  3686  
  3687  	// Make sure we can swap the cluster.
  3688  	_, err = js.UpdateStream(&nats.StreamConfig{
  3689  		Name:      "TEST",
  3690  		Subjects:  []string{"*"},
  3691  		Placement: &nats.Placement{Cluster: "FOO"},
  3692  	})
  3693  	require_NoError(t, err)
  3694  }
  3695  
  3696  // The consumer info() call does not take into account whether a consumer
  3697  // is a leader or not, so results would be very different when asking servers
  3698  // that housed consumer followers vs leaders.
  3699  func TestJetStreamClusterConsumerInfoForJszForFollowers(t *testing.T) {
  3700  	c := createJetStreamClusterExplicit(t, "NATS", 3)
  3701  	defer c.shutdown()
  3702  
  3703  	nc, js := jsClientConnect(t, c.randomServer())
  3704  	defer nc.Close()
  3705  
  3706  	_, err := js.AddStream(&nats.StreamConfig{
  3707  		Name:     "TEST",
  3708  		Subjects: []string{"*"},
  3709  		Replicas: 3,
  3710  	})
  3711  	require_NoError(t, err)
  3712  
  3713  	for i := 0; i < 1000; i++ {
  3714  		sendStreamMsg(t, nc, "foo", "HELLO")
  3715  	}
  3716  
  3717  	sub, err := js.PullSubscribe("foo", "d")
  3718  	require_NoError(t, err)
  3719  
  3720  	fetch, ack := 122, 22
  3721  	msgs, err := sub.Fetch(fetch, nats.MaxWait(10*time.Second))
  3722  	require_NoError(t, err)
  3723  	require_True(t, len(msgs) == fetch)
  3724  	for _, m := range msgs[:ack] {
  3725  		m.AckSync()
  3726  	}
  3727  	// Let acks propagate.
  3728  	time.Sleep(100 * time.Millisecond)
  3729  
  3730  	for _, s := range c.servers {
  3731  		jsz, err := s.Jsz(&JSzOptions{Accounts: true, Consumer: true})
  3732  		require_NoError(t, err)
  3733  		require_True(t, len(jsz.AccountDetails) == 1)
  3734  		require_True(t, len(jsz.AccountDetails[0].Streams) == 1)
  3735  		require_True(t, len(jsz.AccountDetails[0].Streams[0].Consumer) == 1)
  3736  		consumer := jsz.AccountDetails[0].Streams[0].Consumer[0]
  3737  		if consumer.Delivered.Consumer != uint64(fetch) || consumer.Delivered.Stream != uint64(fetch) {
  3738  			t.Fatalf("Incorrect delivered for %v: %+v", s, consumer.Delivered)
  3739  		}
  3740  		if consumer.AckFloor.Consumer != uint64(ack) || consumer.AckFloor.Stream != uint64(ack) {
  3741  			t.Fatalf("Incorrect ackfloor for %v: %+v", s, consumer.AckFloor)
  3742  		}
  3743  	}
  3744  }
  3745  
  3746  // Under certain scenarios we have seen consumers become stopped and cause healthz to fail.
  3747  // The specific scneario is heavy loads, and stream resets on upgrades that could orphan consumers.
  3748  func TestJetStreamClusterHealthzCheckForStoppedAssets(t *testing.T) {
  3749  	c := createJetStreamClusterExplicit(t, "NATS", 3)
  3750  	defer c.shutdown()
  3751  
  3752  	nc, js := jsClientConnect(t, c.randomServer())
  3753  	defer nc.Close()
  3754  
  3755  	_, err := js.AddStream(&nats.StreamConfig{
  3756  		Name:     "TEST",
  3757  		Subjects: []string{"*"},
  3758  		Replicas: 3,
  3759  	})
  3760  	require_NoError(t, err)
  3761  
  3762  	for i := 0; i < 1000; i++ {
  3763  		sendStreamMsg(t, nc, "foo", "HELLO")
  3764  	}
  3765  
  3766  	sub, err := js.PullSubscribe("foo", "d")
  3767  	require_NoError(t, err)
  3768  
  3769  	fetch, ack := 122, 22
  3770  	msgs, err := sub.Fetch(fetch, nats.MaxWait(10*time.Second))
  3771  	require_NoError(t, err)
  3772  	require_True(t, len(msgs) == fetch)
  3773  	for _, m := range msgs[:ack] {
  3774  		m.AckSync()
  3775  	}
  3776  	// Let acks propagate.
  3777  	time.Sleep(100 * time.Millisecond)
  3778  
  3779  	// We will now stop a stream on a given server.
  3780  	s := c.randomServer()
  3781  	mset, err := s.GlobalAccount().lookupStream("TEST")
  3782  	require_NoError(t, err)
  3783  	// Stop the stream
  3784  	mset.stop(false, false)
  3785  
  3786  	// Wait for exit.
  3787  	time.Sleep(100 * time.Millisecond)
  3788  
  3789  	checkFor(t, 15*time.Second, 500*time.Millisecond, func() error {
  3790  		hs := s.healthz(nil)
  3791  		if hs.Error != _EMPTY_ {
  3792  			return errors.New(hs.Error)
  3793  		}
  3794  		return nil
  3795  	})
  3796  
  3797  	// Now take out the consumer.
  3798  	mset, err = s.GlobalAccount().lookupStream("TEST")
  3799  	require_NoError(t, err)
  3800  
  3801  	o := mset.lookupConsumer("d")
  3802  	require_NotNil(t, o)
  3803  
  3804  	o.stop()
  3805  	// Wait for exit.
  3806  	time.Sleep(100 * time.Millisecond)
  3807  
  3808  	checkFor(t, 5*time.Second, 500*time.Millisecond, func() error {
  3809  		hs := s.healthz(nil)
  3810  		if hs.Error != _EMPTY_ {
  3811  			return errors.New(hs.Error)
  3812  		}
  3813  		return nil
  3814  	})
  3815  
  3816  	// Now just stop the raft node from underneath the consumer.
  3817  	o = mset.lookupConsumer("d")
  3818  	require_NotNil(t, o)
  3819  	node := o.raftNode()
  3820  	require_NotNil(t, node)
  3821  	node.Stop()
  3822  
  3823  	checkFor(t, 5*time.Second, 500*time.Millisecond, func() error {
  3824  		hs := s.healthz(nil)
  3825  		if hs.Error != _EMPTY_ {
  3826  			return errors.New(hs.Error)
  3827  		}
  3828  		return nil
  3829  	})
  3830  }
  3831  
  3832  // Make sure that stopping a stream shutdowns down it's raft node.
  3833  func TestJetStreamClusterStreamNodeShutdownBugOnStop(t *testing.T) {
  3834  	c := createJetStreamClusterExplicit(t, "NATS", 3)
  3835  	defer c.shutdown()
  3836  
  3837  	nc, js := jsClientConnect(t, c.randomServer())
  3838  	defer nc.Close()
  3839  
  3840  	_, err := js.AddStream(&nats.StreamConfig{
  3841  		Name:     "TEST",
  3842  		Subjects: []string{"*"},
  3843  		Replicas: 3,
  3844  	})
  3845  	require_NoError(t, err)
  3846  
  3847  	for i := 0; i < 100; i++ {
  3848  		sendStreamMsg(t, nc, "foo", "HELLO")
  3849  	}
  3850  
  3851  	s := c.randomServer()
  3852  	numNodesStart := s.numRaftNodes()
  3853  	mset, err := s.GlobalAccount().lookupStream("TEST")
  3854  	require_NoError(t, err)
  3855  	node := mset.raftNode()
  3856  	require_NotNil(t, node)
  3857  	node.InstallSnapshot(mset.stateSnapshot())
  3858  	// Stop the stream
  3859  	mset.stop(false, false)
  3860  
  3861  	if numNodes := s.numRaftNodes(); numNodes != numNodesStart-1 {
  3862  		t.Fatalf("RAFT nodes after stream stop incorrect: %d vs %d", numNodesStart, numNodes)
  3863  	}
  3864  }
  3865  
  3866  func TestJetStreamClusterStreamAccountingOnStoreError(t *testing.T) {
  3867  	c := createJetStreamClusterWithTemplate(t, jsClusterMaxBytesAccountLimitTempl, "NATS", 3)
  3868  	defer c.shutdown()
  3869  
  3870  	nc, js := jsClientConnect(t, c.randomServer())
  3871  	defer nc.Close()
  3872  
  3873  	_, err := js.AddStream(&nats.StreamConfig{
  3874  		Name:     "TEST",
  3875  		Subjects: []string{"*"},
  3876  		MaxBytes: 1 * 1024 * 1024 * 1024,
  3877  		Replicas: 3,
  3878  	})
  3879  	require_NoError(t, err)
  3880  
  3881  	msg := strings.Repeat("Z", 32*1024)
  3882  	for i := 0; i < 10; i++ {
  3883  		sendStreamMsg(t, nc, "foo", msg)
  3884  	}
  3885  	s := c.randomServer()
  3886  	acc, err := s.LookupAccount("$U")
  3887  	require_NoError(t, err)
  3888  	mset, err := acc.lookupStream("TEST")
  3889  	require_NoError(t, err)
  3890  	mset.mu.Lock()
  3891  	mset.store.Stop()
  3892  	sjs := mset.js
  3893  	mset.mu.Unlock()
  3894  
  3895  	// Now delete the stream
  3896  	js.DeleteStream("TEST")
  3897  
  3898  	// Wait for this to propgate.
  3899  	// The bug will have us not release reserved resources properly.
  3900  	checkFor(t, 10*time.Second, 200*time.Millisecond, func() error {
  3901  		info, err := js.AccountInfo()
  3902  		require_NoError(t, err)
  3903  		// Default tier
  3904  		if info.Store != 0 {
  3905  			return fmt.Errorf("Expected store to be 0 but got %v", friendlyBytes(info.Store))
  3906  		}
  3907  		return nil
  3908  	})
  3909  
  3910  	// Now check js from server directly regarding reserved.
  3911  	sjs.mu.RLock()
  3912  	reserved := sjs.storeReserved
  3913  	sjs.mu.RUnlock()
  3914  	// Under bug will show 1GB
  3915  	if reserved != 0 {
  3916  		t.Fatalf("Expected store reserved to be 0 after stream delete, got %v", friendlyBytes(reserved))
  3917  	}
  3918  }
  3919  
  3920  func TestJetStreamClusterStreamAccountingDriftFixups(t *testing.T) {
  3921  	c := createJetStreamClusterWithTemplate(t, jsClusterMaxBytesAccountLimitTempl, "NATS", 3)
  3922  	defer c.shutdown()
  3923  
  3924  	nc, js := jsClientConnect(t, c.randomServer())
  3925  	defer nc.Close()
  3926  
  3927  	_, err := js.AddStream(&nats.StreamConfig{
  3928  		Name:     "TEST",
  3929  		Subjects: []string{"*"},
  3930  		MaxBytes: 2 * 1024 * 1024,
  3931  		Replicas: 3,
  3932  	})
  3933  	require_NoError(t, err)
  3934  
  3935  	msg := strings.Repeat("Z", 32*1024)
  3936  	for i := 0; i < 100; i++ {
  3937  		sendStreamMsg(t, nc, "foo", msg)
  3938  	}
  3939  
  3940  	err = js.PurgeStream("TEST")
  3941  	require_NoError(t, err)
  3942  
  3943  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  3944  		info, err := js.AccountInfo()
  3945  		require_NoError(t, err)
  3946  		if info.Store != 0 {
  3947  			return fmt.Errorf("Store usage not 0: %d", info.Store)
  3948  		}
  3949  		return nil
  3950  	})
  3951  
  3952  	s := c.leader()
  3953  	jsz, err := s.Jsz(nil)
  3954  	require_NoError(t, err)
  3955  	require_True(t, jsz.JetStreamStats.Store == 0)
  3956  
  3957  	acc, err := s.LookupAccount("$U")
  3958  	require_NoError(t, err)
  3959  	mset, err := acc.lookupStream("TEST")
  3960  	require_NoError(t, err)
  3961  	mset.mu.RLock()
  3962  	jsa, tier, stype := mset.jsa, mset.tier, mset.stype
  3963  	mset.mu.RUnlock()
  3964  	// Drift the usage.
  3965  	jsa.updateUsage(tier, stype, -100)
  3966  
  3967  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  3968  		info, err := js.AccountInfo()
  3969  		require_NoError(t, err)
  3970  		if info.Store != 0 {
  3971  			return fmt.Errorf("Store usage not 0: %d", info.Store)
  3972  		}
  3973  		return nil
  3974  	})
  3975  	jsz, err = s.Jsz(nil)
  3976  	require_NoError(t, err)
  3977  	require_True(t, jsz.JetStreamStats.Store == 0)
  3978  }
  3979  
  3980  // Some older streams seem to have been created or exist with no explicit cluster setting.
  3981  // For server <= 2.9.16 you could not scale the streams up since we could not place them in another cluster.
  3982  func TestJetStreamClusterStreamScaleUpNoGroupCluster(t *testing.T) {
  3983  	c := createJetStreamClusterExplicit(t, "NATS", 3)
  3984  	defer c.shutdown()
  3985  
  3986  	nc, js := jsClientConnect(t, c.randomServer())
  3987  	defer nc.Close()
  3988  
  3989  	_, err := js.AddStream(&nats.StreamConfig{
  3990  		Name:     "TEST",
  3991  		Subjects: []string{"*"},
  3992  	})
  3993  	require_NoError(t, err)
  3994  
  3995  	// Manually going to grab stream assignment and update it to be without the group cluster.
  3996  	s := c.streamLeader(globalAccountName, "TEST")
  3997  	mset, err := s.GlobalAccount().lookupStream("TEST")
  3998  	require_NoError(t, err)
  3999  
  4000  	sa := mset.streamAssignment()
  4001  	require_NotNil(t, sa)
  4002  	// Make copy to not change stream's
  4003  	sa = sa.copyGroup()
  4004  	// Remove cluster and preferred.
  4005  	sa.Group.Cluster = _EMPTY_
  4006  	sa.Group.Preferred = _EMPTY_
  4007  	// Insert into meta layer.
  4008  	if sjs := s.getJetStream(); sjs != nil {
  4009  		sjs.mu.RLock()
  4010  		meta := sjs.cluster.meta
  4011  		sjs.mu.RUnlock()
  4012  		if meta != nil {
  4013  			meta.ForwardProposal(encodeUpdateStreamAssignment(sa))
  4014  		}
  4015  	}
  4016  	// Make sure it got propagated..
  4017  	checkFor(t, 10*time.Second, 200*time.Millisecond, func() error {
  4018  		sa := mset.streamAssignment().copyGroup()
  4019  		require_NotNil(t, sa)
  4020  		if sa.Group.Cluster != _EMPTY_ {
  4021  			return fmt.Errorf("Cluster still not cleared")
  4022  		}
  4023  		return nil
  4024  	})
  4025  	// Now we know it has been nil'd out. Make sure we can scale up.
  4026  	_, err = js.UpdateStream(&nats.StreamConfig{
  4027  		Name:     "TEST",
  4028  		Subjects: []string{"*"},
  4029  		Replicas: 3,
  4030  	})
  4031  	require_NoError(t, err)
  4032  }
  4033  
  4034  // https://github.com/nats-io/nats-server/issues/4162
  4035  func TestJetStreamClusterStaleDirectGetOnRestart(t *testing.T) {
  4036  	c := createJetStreamClusterExplicit(t, "NATS", 3)
  4037  	defer c.shutdown()
  4038  
  4039  	nc, js := jsClientConnect(t, c.randomServer())
  4040  	defer nc.Close()
  4041  
  4042  	kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
  4043  		Bucket:   "TEST",
  4044  		Replicas: 3,
  4045  	})
  4046  	require_NoError(t, err)
  4047  
  4048  	_, err = kv.PutString("foo", "bar")
  4049  	require_NoError(t, err)
  4050  
  4051  	// Close client in case we were connected to server below.
  4052  	// We will recreate.
  4053  	nc.Close()
  4054  
  4055  	// Shutdown a non-leader.
  4056  	s := c.randomNonStreamLeader(globalAccountName, "KV_TEST")
  4057  	s.Shutdown()
  4058  
  4059  	nc, js = jsClientConnect(t, c.randomServer())
  4060  	defer nc.Close()
  4061  
  4062  	kv, err = js.KeyValue("TEST")
  4063  	require_NoError(t, err)
  4064  
  4065  	_, err = kv.PutString("foo", "baz")
  4066  	require_NoError(t, err)
  4067  
  4068  	errCh := make(chan error, 100)
  4069  	done := make(chan struct{})
  4070  
  4071  	go func() {
  4072  		nc, js := jsClientConnect(t, c.randomServer())
  4073  		defer nc.Close()
  4074  
  4075  		kv, err := js.KeyValue("TEST")
  4076  		if err != nil {
  4077  			errCh <- err
  4078  			return
  4079  		}
  4080  
  4081  		for {
  4082  			select {
  4083  			case <-done:
  4084  				return
  4085  			default:
  4086  				entry, err := kv.Get("foo")
  4087  				if err != nil {
  4088  					errCh <- err
  4089  					return
  4090  				}
  4091  				if v := string(entry.Value()); v != "baz" {
  4092  					errCh <- fmt.Errorf("Got wrong value: %q", v)
  4093  				}
  4094  			}
  4095  		}
  4096  	}()
  4097  
  4098  	// Restart
  4099  	c.restartServer(s)
  4100  	// Wait for a bit to make sure as this server participates in direct gets
  4101  	// it does not server stale reads.
  4102  	time.Sleep(2 * time.Second)
  4103  	close(done)
  4104  
  4105  	if len(errCh) > 0 {
  4106  		t.Fatalf("Expected no errors but got %v", <-errCh)
  4107  	}
  4108  }
  4109  
  4110  // This test mimics a user's setup where there is a cloud cluster/domain, and one for eu and ap that are leafnoded into the
  4111  // cloud cluster, and one for cn that is leafnoded into the ap cluster.
  4112  // We broke basic connectivity in 2.9.17 from publishing in eu for delivery in cn on same account which is daisy chained through ap.
  4113  // We will also test cross account delivery in this test as well.
  4114  func TestJetStreamClusterLeafnodePlusDaisyChainSetup(t *testing.T) {
  4115  	var cloudTmpl = `
  4116  		listen: 127.0.0.1:-1
  4117  		server_name: %s
  4118  		jetstream: {max_mem_store: 256MB, max_file_store: 2GB, domain: CLOUD, store_dir: '%s'}
  4119  
  4120  		leaf { listen: 127.0.0.1:-1 }
  4121  
  4122  		cluster {
  4123  			name: %s
  4124  			listen: 127.0.0.1:%d
  4125  			routes = [%s]
  4126  		}
  4127  
  4128  		accounts {
  4129  			F {
  4130  				jetstream: enabled
  4131  				users = [ { user: "F", pass: "pass" } ]
  4132  				exports [ { stream: "F.>" } ]
  4133  			}
  4134  			T {
  4135  				jetstream: enabled
  4136  				users = [ { user: "T", pass: "pass" } ]
  4137  				imports [ { stream: { account: F, subject: "F.>"} } ]
  4138  			}
  4139  			$SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] }
  4140  		}`
  4141  
  4142  	// Now create the cloud and make sure we are connected.
  4143  	// Cloud
  4144  	c := createJetStreamCluster(t, cloudTmpl, "CLOUD", _EMPTY_, 3, 22020, false)
  4145  	defer c.shutdown()
  4146  
  4147  	var lnTmpl = `
  4148  		listen: 127.0.0.1:-1
  4149  		server_name: %s
  4150  		jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  4151  
  4152  		{{leaf}}
  4153  
  4154  		cluster {
  4155  			name: %s
  4156  			listen: 127.0.0.1:%d
  4157  			routes = [%s]
  4158  		}
  4159  
  4160  		accounts {
  4161  			F {
  4162  				jetstream: enabled
  4163  				users = [ { user: "F", pass: "pass" } ]
  4164  				exports [ { stream: "F.>" } ]
  4165  			}
  4166  			T {
  4167  				jetstream: enabled
  4168  				users = [ { user: "T", pass: "pass" } ]
  4169  				imports [ { stream: { account: F, subject: "F.>"} } ]
  4170  			}
  4171  			$SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] }
  4172  		}`
  4173  
  4174  	var leafFrag = `
  4175  			leaf {
  4176  				listen: 127.0.0.1:-1
  4177  				remotes [ { urls: [ %s ], account: "T" }, { urls: [ %s ], account: "F" } ]
  4178  			}`
  4179  
  4180  	genLeafTmpl := func(tmpl string, c *cluster) string {
  4181  		t.Helper()
  4182  		// Create our leafnode cluster template first.
  4183  		var lnt, lnf []string
  4184  		for _, s := range c.servers {
  4185  			if s.ClusterName() != c.name {
  4186  				continue
  4187  			}
  4188  			ln := s.getOpts().LeafNode
  4189  			lnt = append(lnt, fmt.Sprintf("nats://T:pass@%s:%d", ln.Host, ln.Port))
  4190  			lnf = append(lnf, fmt.Sprintf("nats://F:pass@%s:%d", ln.Host, ln.Port))
  4191  		}
  4192  		lntc := strings.Join(lnt, ", ")
  4193  		lnfc := strings.Join(lnf, ", ")
  4194  		return strings.Replace(tmpl, "{{leaf}}", fmt.Sprintf(leafFrag, lntc, lnfc), 1)
  4195  	}
  4196  
  4197  	// Cluster EU
  4198  	// Domain is "EU'
  4199  	tmpl := strings.Replace(lnTmpl, "store_dir:", fmt.Sprintf(`domain: "%s", store_dir:`, "EU"), 1)
  4200  	tmpl = genLeafTmpl(tmpl, c)
  4201  	lceu := createJetStreamCluster(t, tmpl, "EU", "EU-", 3, 22110, false)
  4202  	lceu.waitOnClusterReady()
  4203  	defer lceu.shutdown()
  4204  
  4205  	for _, s := range lceu.servers {
  4206  		checkLeafNodeConnectedCount(t, s, 2)
  4207  	}
  4208  
  4209  	// Cluster AP
  4210  	// Domain is "AP'
  4211  	tmpl = strings.Replace(lnTmpl, "store_dir:", fmt.Sprintf(`domain: "%s", store_dir:`, "AP"), 1)
  4212  	tmpl = genLeafTmpl(tmpl, c)
  4213  	lcap := createJetStreamCluster(t, tmpl, "AP", "AP-", 3, 22180, false)
  4214  	lcap.waitOnClusterReady()
  4215  	defer lcap.shutdown()
  4216  
  4217  	for _, s := range lcap.servers {
  4218  		checkLeafNodeConnectedCount(t, s, 2)
  4219  	}
  4220  
  4221  	// Cluster CN
  4222  	// Domain is "CN'
  4223  	// This one connects to AP, not the cloud hub.
  4224  	tmpl = strings.Replace(lnTmpl, "store_dir:", fmt.Sprintf(`domain: "%s", store_dir:`, "CN"), 1)
  4225  	tmpl = genLeafTmpl(tmpl, lcap)
  4226  	lccn := createJetStreamCluster(t, tmpl, "CN", "CN-", 3, 22280, false)
  4227  	lccn.waitOnClusterReady()
  4228  	defer lccn.shutdown()
  4229  
  4230  	for _, s := range lccn.servers {
  4231  		checkLeafNodeConnectedCount(t, s, 2)
  4232  	}
  4233  
  4234  	// Now connect to CN on account F and subscribe to data.
  4235  	nc, _ := jsClientConnect(t, lccn.randomServer(), nats.UserInfo("F", "pass"))
  4236  	defer nc.Close()
  4237  	fsub, err := nc.SubscribeSync("F.EU.>")
  4238  	require_NoError(t, err)
  4239  
  4240  	// Same for account T where the import is.
  4241  	nc, _ = jsClientConnect(t, lccn.randomServer(), nats.UserInfo("T", "pass"))
  4242  	defer nc.Close()
  4243  	tsub, err := nc.SubscribeSync("F.EU.>")
  4244  	require_NoError(t, err)
  4245  
  4246  	// Let sub propagate.
  4247  	time.Sleep(500 * time.Millisecond)
  4248  
  4249  	// Now connect to EU on account F and generate data.
  4250  	nc, _ = jsClientConnect(t, lceu.randomServer(), nats.UserInfo("F", "pass"))
  4251  	defer nc.Close()
  4252  
  4253  	num := 10
  4254  	for i := 0; i < num; i++ {
  4255  		err := nc.Publish("F.EU.DATA", []byte(fmt.Sprintf("MSG-%d", i)))
  4256  		require_NoError(t, err)
  4257  	}
  4258  
  4259  	checkSubsPending(t, fsub, num)
  4260  	// Since we export and import in each cluster, we will receive 4x.
  4261  	// First hop from EU -> CLOUD is 1F and 1T
  4262  	// Second hop from CLOUD -> AP is 1F, 1T and another 1T
  4263  	// Third hop from AP -> CN is 1F, 1T, 1T and 1T
  4264  	// Each cluster hop that has the export/import mapping will add another T message copy.
  4265  	checkSubsPending(t, tsub, num*4)
  4266  
  4267  	// Create stream in cloud.
  4268  	nc, js := jsClientConnect(t, c.randomServer(), nats.UserInfo("F", "pass"))
  4269  	defer nc.Close()
  4270  
  4271  	_, err = js.AddStream(&nats.StreamConfig{
  4272  		Name:     "TEST",
  4273  		Subjects: []string{"TEST.>"},
  4274  		Replicas: 3,
  4275  	})
  4276  	require_NoError(t, err)
  4277  
  4278  	for i := 0; i < 100; i++ {
  4279  		sendStreamMsg(t, nc, fmt.Sprintf("TEST.%d", i), "OK")
  4280  	}
  4281  
  4282  	// Now connect to EU.
  4283  	nc, js = jsClientConnect(t, lceu.randomServer(), nats.UserInfo("F", "pass"))
  4284  	defer nc.Close()
  4285  
  4286  	// Create a mirror.
  4287  	_, err = js.AddStream(&nats.StreamConfig{
  4288  		Name: "M",
  4289  		Mirror: &nats.StreamSource{
  4290  			Name:   "TEST",
  4291  			Domain: "CLOUD",
  4292  		},
  4293  	})
  4294  	require_NoError(t, err)
  4295  
  4296  	checkFor(t, time.Second, 200*time.Millisecond, func() error {
  4297  		si, err := js.StreamInfo("M")
  4298  		require_NoError(t, err)
  4299  		if si.State.Msgs == 100 {
  4300  			return nil
  4301  		}
  4302  		return fmt.Errorf("State not current: %+v", si.State)
  4303  	})
  4304  }
  4305  
  4306  // https://github.com/nats-io/nats-server/pull/4197
  4307  func TestJetStreamClusterPurgeExReplayAfterRestart(t *testing.T) {
  4308  	c := createJetStreamClusterExplicit(t, "P3F", 3)
  4309  	defer c.shutdown()
  4310  
  4311  	// Client based API
  4312  	nc, js := jsClientConnect(t, c.randomServer())
  4313  	defer nc.Close()
  4314  
  4315  	_, err := js.AddStream(&nats.StreamConfig{
  4316  		Name:     "TEST",
  4317  		Subjects: []string{"TEST.>"},
  4318  		Replicas: 3,
  4319  	})
  4320  	require_NoError(t, err)
  4321  
  4322  	sendStreamMsg(t, nc, "TEST.0", "OK")
  4323  	sendStreamMsg(t, nc, "TEST.1", "OK")
  4324  	sendStreamMsg(t, nc, "TEST.2", "OK")
  4325  
  4326  	runTest := func(f func(js nats.JetStreamManager)) *nats.StreamInfo {
  4327  		nc, js := jsClientConnect(t, c.randomServer())
  4328  		defer nc.Close()
  4329  
  4330  		// install snapshot, then execute interior func, ensuring the purge will be recovered later
  4331  		fsl := c.streamLeader(globalAccountName, "TEST")
  4332  		fsl.JetStreamSnapshotStream(globalAccountName, "TEST")
  4333  
  4334  		f(js)
  4335  		time.Sleep(250 * time.Millisecond)
  4336  
  4337  		fsl.Shutdown()
  4338  		fsl.WaitForShutdown()
  4339  		fsl = c.restartServer(fsl)
  4340  		c.waitOnServerCurrent(fsl)
  4341  
  4342  		nc, js = jsClientConnect(t, c.randomServer())
  4343  		defer nc.Close()
  4344  
  4345  		c.waitOnStreamLeader(globalAccountName, "TEST")
  4346  		sl := c.streamLeader(globalAccountName, "TEST")
  4347  
  4348  		// keep stepping down so the stream leader matches the initial leader
  4349  		// we need to check if it restored from the snapshot properly
  4350  		for sl != fsl {
  4351  			_, err := nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST"), nil, time.Second)
  4352  			require_NoError(t, err)
  4353  			c.waitOnStreamLeader(globalAccountName, "TEST")
  4354  			sl = c.streamLeader(globalAccountName, "TEST")
  4355  		}
  4356  
  4357  		si, err := js.StreamInfo("TEST")
  4358  		require_NoError(t, err)
  4359  		return si
  4360  	}
  4361  	si := runTest(func(js nats.JetStreamManager) {
  4362  		err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Subject: "TEST.0"})
  4363  		require_NoError(t, err)
  4364  	})
  4365  	if si.State.Msgs != 2 {
  4366  		t.Fatalf("Expected 2 msgs after restart, got %d", si.State.Msgs)
  4367  	}
  4368  	if si.State.FirstSeq != 2 || si.State.LastSeq != 3 {
  4369  		t.Fatalf("Expected FirstSeq=2, LastSeq=3 after restart, got FirstSeq=%d, LastSeq=%d",
  4370  			si.State.FirstSeq, si.State.LastSeq)
  4371  	}
  4372  
  4373  	si = runTest(func(js nats.JetStreamManager) {
  4374  		err = js.PurgeStream("TEST")
  4375  		require_NoError(t, err)
  4376  		// Send 2 more messages.
  4377  		sendStreamMsg(t, nc, "TEST.1", "OK")
  4378  		sendStreamMsg(t, nc, "TEST.2", "OK")
  4379  	})
  4380  	if si.State.Msgs != 2 {
  4381  		t.Fatalf("Expected 2 msgs after restart, got %d", si.State.Msgs)
  4382  	}
  4383  	if si.State.FirstSeq != 4 || si.State.LastSeq != 5 {
  4384  		t.Fatalf("Expected FirstSeq=4, LastSeq=5 after restart, got FirstSeq=%d, LastSeq=%d",
  4385  			si.State.FirstSeq, si.State.LastSeq)
  4386  	}
  4387  
  4388  	// Now test a keep
  4389  	si = runTest(func(js nats.JetStreamManager) {
  4390  		err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Keep: 1})
  4391  		require_NoError(t, err)
  4392  		// Send 4 more messages.
  4393  		sendStreamMsg(t, nc, "TEST.1", "OK")
  4394  		sendStreamMsg(t, nc, "TEST.2", "OK")
  4395  		sendStreamMsg(t, nc, "TEST.3", "OK")
  4396  		sendStreamMsg(t, nc, "TEST.1", "OK")
  4397  	})
  4398  	if si.State.Msgs != 5 {
  4399  		t.Fatalf("Expected 5 msgs after restart, got %d", si.State.Msgs)
  4400  	}
  4401  	if si.State.FirstSeq != 5 || si.State.LastSeq != 9 {
  4402  		t.Fatalf("Expected FirstSeq=5, LastSeq=9 after restart, got FirstSeq=%d, LastSeq=%d",
  4403  			si.State.FirstSeq, si.State.LastSeq)
  4404  	}
  4405  
  4406  	// Now test a keep on a subject
  4407  	si = runTest(func(js nats.JetStreamManager) {
  4408  		err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Subject: "TEST.1", Keep: 1})
  4409  		require_NoError(t, err)
  4410  		// Send 3 more messages.
  4411  		sendStreamMsg(t, nc, "TEST.1", "OK")
  4412  		sendStreamMsg(t, nc, "TEST.2", "OK")
  4413  		sendStreamMsg(t, nc, "TEST.3", "OK")
  4414  	})
  4415  	if si.State.Msgs != 7 {
  4416  		t.Fatalf("Expected 7 msgs after restart, got %d", si.State.Msgs)
  4417  	}
  4418  	if si.State.FirstSeq != 5 || si.State.LastSeq != 12 {
  4419  		t.Fatalf("Expected FirstSeq=5, LastSeq=12 after restart, got FirstSeq=%d, LastSeq=%d",
  4420  			si.State.FirstSeq, si.State.LastSeq)
  4421  	}
  4422  }
  4423  
  4424  func TestJetStreamClusterConsumerCleanupWithSameName(t *testing.T) {
  4425  	c := createJetStreamClusterExplicit(t, "R3F", 3)
  4426  	defer c.shutdown()
  4427  
  4428  	// Client based API
  4429  	nc, js := jsClientConnect(t, c.randomServer())
  4430  	defer nc.Close()
  4431  
  4432  	_, err := js.AddStream(&nats.StreamConfig{
  4433  		Name:     "UPDATES",
  4434  		Subjects: []string{"DEVICE.*"},
  4435  		Replicas: 3,
  4436  	})
  4437  	require_NoError(t, err)
  4438  
  4439  	// Create a consumer that will be an R1 that we will auto-recreate but using the same name.
  4440  	// We want to make sure that the system does not continually try to cleanup the new one from the old one.
  4441  
  4442  	// Track the sequence for restart etc.
  4443  	var seq atomic.Uint64
  4444  
  4445  	msgCB := func(msg *nats.Msg) {
  4446  		msg.AckSync()
  4447  		meta, err := msg.Metadata()
  4448  		require_NoError(t, err)
  4449  		seq.Store(meta.Sequence.Stream)
  4450  	}
  4451  
  4452  	waitOnSeqDelivered := func(expected uint64) {
  4453  		checkFor(t, 10*time.Second, 200*time.Millisecond, func() error {
  4454  			received := seq.Load()
  4455  			if received == expected {
  4456  				return nil
  4457  			}
  4458  			return fmt.Errorf("Seq is %d, want %d", received, expected)
  4459  		})
  4460  	}
  4461  
  4462  	doSub := func() {
  4463  		_, err = js.Subscribe(
  4464  			"DEVICE.22",
  4465  			msgCB,
  4466  			nats.ConsumerName("dlc"),
  4467  			nats.SkipConsumerLookup(),
  4468  			nats.StartSequence(seq.Load()+1),
  4469  			nats.MaxAckPending(1), // One at a time.
  4470  			nats.ManualAck(),
  4471  			nats.ConsumerReplicas(1),
  4472  			nats.ConsumerMemoryStorage(),
  4473  			nats.MaxDeliver(1),
  4474  			nats.InactiveThreshold(time.Second),
  4475  			nats.IdleHeartbeat(250*time.Millisecond),
  4476  		)
  4477  		require_NoError(t, err)
  4478  	}
  4479  
  4480  	// Track any errors for consumer not active so we can recreate the consumer.
  4481  	errCh := make(chan error, 10)
  4482  	nc.SetErrorHandler(func(c *nats.Conn, s *nats.Subscription, err error) {
  4483  		if errors.Is(err, nats.ErrConsumerNotActive) {
  4484  			s.Unsubscribe()
  4485  			errCh <- err
  4486  			doSub()
  4487  		}
  4488  	})
  4489  
  4490  	doSub()
  4491  
  4492  	sendStreamMsg(t, nc, "DEVICE.22", "update-1")
  4493  	sendStreamMsg(t, nc, "DEVICE.22", "update-2")
  4494  	sendStreamMsg(t, nc, "DEVICE.22", "update-3")
  4495  	waitOnSeqDelivered(3)
  4496  
  4497  	// Shutdown the consumer's leader.
  4498  	s := c.consumerLeader(globalAccountName, "UPDATES", "dlc")
  4499  	s.Shutdown()
  4500  	c.waitOnStreamLeader(globalAccountName, "UPDATES")
  4501  
  4502  	// In case our client connection was to the same server.
  4503  	nc, _ = jsClientConnect(t, c.randomServer())
  4504  	defer nc.Close()
  4505  
  4506  	sendStreamMsg(t, nc, "DEVICE.22", "update-4")
  4507  	sendStreamMsg(t, nc, "DEVICE.22", "update-5")
  4508  	sendStreamMsg(t, nc, "DEVICE.22", "update-6")
  4509  
  4510  	// Wait for the consumer not active error.
  4511  	<-errCh
  4512  	// Now restart server with the old consumer.
  4513  	c.restartServer(s)
  4514  	// Wait on all messages delivered.
  4515  	waitOnSeqDelivered(6)
  4516  	// Make sure no other errors showed up
  4517  	require_True(t, len(errCh) == 0)
  4518  }
  4519  func TestJetStreamClusterConsumerActions(t *testing.T) {
  4520  	c := createJetStreamClusterExplicit(t, "R3F", 3)
  4521  	defer c.shutdown()
  4522  
  4523  	nc, js := jsClientConnect(t, c.randomServer())
  4524  	defer nc.Close()
  4525  
  4526  	var err error
  4527  	_, err = js.AddStream(&nats.StreamConfig{
  4528  		Name:     "TEST",
  4529  		Subjects: []string{"test"},
  4530  	})
  4531  	require_NoError(t, err)
  4532  
  4533  	ecSubj := fmt.Sprintf(JSApiConsumerCreateExT, "TEST", "CONSUMER", "test")
  4534  	crReq := CreateConsumerRequest{
  4535  		Stream: "TEST",
  4536  		Config: ConsumerConfig{
  4537  			DeliverPolicy: DeliverLast,
  4538  			FilterSubject: "test",
  4539  			AckPolicy:     AckExplicit,
  4540  		},
  4541  	}
  4542  
  4543  	// A new consumer. Should not be an error.
  4544  	crReq.Action = ActionCreate
  4545  	req, err := json.Marshal(crReq)
  4546  	require_NoError(t, err)
  4547  	resp, err := nc.Request(ecSubj, req, 500*time.Millisecond)
  4548  	require_NoError(t, err)
  4549  	var ccResp JSApiConsumerCreateResponse
  4550  	err = json.Unmarshal(resp.Data, &ccResp)
  4551  	require_NoError(t, err)
  4552  	if ccResp.Error != nil {
  4553  		t.Fatalf("Unexpected error: %v", ccResp.Error)
  4554  	}
  4555  	ccResp.Error = nil
  4556  
  4557  	// Consumer exists, but config is the same, so should be ok
  4558  	resp, err = nc.Request(ecSubj, req, 500*time.Millisecond)
  4559  	require_NoError(t, err)
  4560  	err = json.Unmarshal(resp.Data, &ccResp)
  4561  	require_NoError(t, err)
  4562  	if ccResp.Error != nil {
  4563  		t.Fatalf("Unexpected er response: %v", ccResp.Error)
  4564  	}
  4565  	ccResp.Error = nil
  4566  	// Consumer exists. Config is different, so should error
  4567  	crReq.Config.Description = "changed"
  4568  	req, err = json.Marshal(crReq)
  4569  	require_NoError(t, err)
  4570  	resp, err = nc.Request(ecSubj, req, 500*time.Millisecond)
  4571  	require_NoError(t, err)
  4572  	err = json.Unmarshal(resp.Data, &ccResp)
  4573  	require_NoError(t, err)
  4574  	if ccResp.Error == nil {
  4575  		t.Fatalf("Unexpected ok response")
  4576  	}
  4577  
  4578  	ccResp.Error = nil
  4579  	// Consumer update, so update should be ok
  4580  	crReq.Action = ActionUpdate
  4581  	crReq.Config.Description = "changed again"
  4582  	req, err = json.Marshal(crReq)
  4583  	require_NoError(t, err)
  4584  	resp, err = nc.Request(ecSubj, req, 500*time.Millisecond)
  4585  	require_NoError(t, err)
  4586  	err = json.Unmarshal(resp.Data, &ccResp)
  4587  	require_NoError(t, err)
  4588  	if ccResp.Error != nil {
  4589  		t.Fatalf("Unexpected error response: %v", ccResp.Error)
  4590  	}
  4591  
  4592  	ecSubj = fmt.Sprintf(JSApiConsumerCreateExT, "TEST", "NEW", "test")
  4593  	ccResp.Error = nil
  4594  	// Updating new consumer, so should error
  4595  	crReq.Config.Name = "NEW"
  4596  	req, err = json.Marshal(crReq)
  4597  	require_NoError(t, err)
  4598  	resp, err = nc.Request(ecSubj, req, 500*time.Millisecond)
  4599  	require_NoError(t, err)
  4600  	err = json.Unmarshal(resp.Data, &ccResp)
  4601  	require_NoError(t, err)
  4602  	if ccResp.Error == nil {
  4603  		t.Fatalf("Unexpected ok response")
  4604  	}
  4605  }
  4606  
  4607  func TestJetStreamClusterSnapshotAndRestoreWithHealthz(t *testing.T) {
  4608  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  4609  	defer c.shutdown()
  4610  
  4611  	nc, js := jsClientConnect(t, c.randomServer())
  4612  	defer nc.Close()
  4613  
  4614  	_, err := js.AddStream(&nats.StreamConfig{
  4615  		Name:     "TEST",
  4616  		Subjects: []string{"foo"},
  4617  		Replicas: 3,
  4618  	})
  4619  	require_NoError(t, err)
  4620  
  4621  	toSend, msg := 1000, bytes.Repeat([]byte("Z"), 1024)
  4622  	for i := 0; i < toSend; i++ {
  4623  		_, err := js.PublishAsync("foo", msg)
  4624  		require_NoError(t, err)
  4625  	}
  4626  	select {
  4627  	case <-js.PublishAsyncComplete():
  4628  	case <-time.After(5 * time.Second):
  4629  		t.Fatalf("Did not receive completion signal")
  4630  	}
  4631  
  4632  	sreq := &JSApiStreamSnapshotRequest{
  4633  		DeliverSubject: nats.NewInbox(),
  4634  		ChunkSize:      512,
  4635  	}
  4636  	req, _ := json.Marshal(sreq)
  4637  	rmsg, err := nc.Request(fmt.Sprintf(JSApiStreamSnapshotT, "TEST"), req, time.Second)
  4638  	require_NoError(t, err)
  4639  
  4640  	var resp JSApiStreamSnapshotResponse
  4641  	json.Unmarshal(rmsg.Data, &resp)
  4642  	require_True(t, resp.Error == nil)
  4643  
  4644  	state := *resp.State
  4645  	cfg := *resp.Config
  4646  
  4647  	var snapshot []byte
  4648  	done := make(chan bool)
  4649  
  4650  	sub, _ := nc.Subscribe(sreq.DeliverSubject, func(m *nats.Msg) {
  4651  		// EOF
  4652  		if len(m.Data) == 0 {
  4653  			done <- true
  4654  			return
  4655  		}
  4656  		// Could be writing to a file here too.
  4657  		snapshot = append(snapshot, m.Data...)
  4658  		// Flow ack
  4659  		m.Respond(nil)
  4660  	})
  4661  	defer sub.Unsubscribe()
  4662  
  4663  	// Wait to receive the snapshot.
  4664  	select {
  4665  	case <-done:
  4666  	case <-time.After(5 * time.Second):
  4667  		t.Fatalf("Did not receive our snapshot in time")
  4668  	}
  4669  
  4670  	// Delete before we try to restore.
  4671  	require_NoError(t, js.DeleteStream("TEST"))
  4672  
  4673  	checkHealth := func() {
  4674  		for _, s := range c.servers {
  4675  			s.healthz(nil)
  4676  		}
  4677  	}
  4678  
  4679  	var rresp JSApiStreamRestoreResponse
  4680  	rreq := &JSApiStreamRestoreRequest{
  4681  		Config: cfg,
  4682  		State:  state,
  4683  	}
  4684  	req, _ = json.Marshal(rreq)
  4685  
  4686  	rmsg, err = nc.Request(fmt.Sprintf(JSApiStreamRestoreT, "TEST"), req, 5*time.Second)
  4687  	require_NoError(t, err)
  4688  
  4689  	rresp.Error = nil
  4690  	json.Unmarshal(rmsg.Data, &rresp)
  4691  	require_True(t, resp.Error == nil)
  4692  
  4693  	checkHealth()
  4694  
  4695  	// We will now chunk the snapshot responses (and EOF).
  4696  	var chunk [1024]byte
  4697  	for i, r := 0, bytes.NewReader(snapshot); ; {
  4698  		n, err := r.Read(chunk[:])
  4699  		if err != nil {
  4700  			break
  4701  		}
  4702  		nc.Request(rresp.DeliverSubject, chunk[:n], time.Second)
  4703  		i++
  4704  		// We will call healthz for all servers half way through the restore.
  4705  		if i%100 == 0 {
  4706  			checkHealth()
  4707  		}
  4708  	}
  4709  	rmsg, err = nc.Request(rresp.DeliverSubject, nil, time.Second)
  4710  	require_NoError(t, err)
  4711  	rresp.Error = nil
  4712  	json.Unmarshal(rmsg.Data, &rresp)
  4713  	require_True(t, resp.Error == nil)
  4714  
  4715  	si, err := js.StreamInfo("TEST")
  4716  	require_NoError(t, err)
  4717  	require_True(t, si.State.Msgs == uint64(toSend))
  4718  
  4719  	// Make sure stepdown works, this would fail before the fix.
  4720  	_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST"), nil, 5*time.Second)
  4721  	require_NoError(t, err)
  4722  
  4723  	si, err = js.StreamInfo("TEST")
  4724  	require_NoError(t, err)
  4725  	require_True(t, si.State.Msgs == uint64(toSend))
  4726  }
  4727  
  4728  func TestJetStreamClusterBinaryStreamSnapshotCapability(t *testing.T) {
  4729  	c := createJetStreamClusterExplicit(t, "NATS", 3)
  4730  	defer c.shutdown()
  4731  
  4732  	nc, js := jsClientConnect(t, c.randomServer())
  4733  	defer nc.Close()
  4734  
  4735  	_, err := js.AddStream(&nats.StreamConfig{
  4736  		Name:     "TEST",
  4737  		Subjects: []string{"foo"},
  4738  		Replicas: 3,
  4739  	})
  4740  	require_NoError(t, err)
  4741  
  4742  	mset, err := c.streamLeader(globalAccountName, "TEST").GlobalAccount().lookupStream("TEST")
  4743  	require_NoError(t, err)
  4744  
  4745  	if !mset.supportsBinarySnapshot() {
  4746  		t.Fatalf("Expected to signal that we could support binary stream snapshots")
  4747  	}
  4748  }
  4749  
  4750  func TestJetStreamClusterBadEncryptKey(t *testing.T) {
  4751  	c := createJetStreamClusterWithTemplate(t, jsClusterEncryptedTempl, "JSC", 3)
  4752  	defer c.shutdown()
  4753  
  4754  	nc, js := jsClientConnect(t, c.randomServer())
  4755  	defer nc.Close()
  4756  
  4757  	// Create 10 streams.
  4758  	for i := 0; i < 10; i++ {
  4759  		_, err := js.AddStream(&nats.StreamConfig{
  4760  			Name:     fmt.Sprintf("TEST-%d", i),
  4761  			Replicas: 3,
  4762  		})
  4763  		require_NoError(t, err)
  4764  	}
  4765  
  4766  	// Grab random server.
  4767  	s := c.randomServer()
  4768  	s.Shutdown()
  4769  	s.WaitForShutdown()
  4770  
  4771  	var opts *Options
  4772  	for i := 0; i < len(c.servers); i++ {
  4773  		if c.servers[i] == s {
  4774  			opts = c.opts[i]
  4775  			break
  4776  		}
  4777  	}
  4778  	require_NotNil(t, opts)
  4779  
  4780  	// Replace key with an empty key.
  4781  	buf, err := os.ReadFile(opts.ConfigFile)
  4782  	require_NoError(t, err)
  4783  	nbuf := bytes.Replace(buf, []byte("key: \"s3cr3t!\""), []byte("key: \"\""), 1)
  4784  	err = os.WriteFile(opts.ConfigFile, nbuf, 0640)
  4785  	require_NoError(t, err)
  4786  
  4787  	// Make sure trying to start the server now fails.
  4788  	s, err = NewServer(LoadConfig(opts.ConfigFile))
  4789  	require_NoError(t, err)
  4790  	require_NotNil(t, s)
  4791  	s.Start()
  4792  	if err := s.readyForConnections(1 * time.Second); err == nil {
  4793  		t.Fatalf("Expected server not to start")
  4794  	}
  4795  }
  4796  
  4797  func TestJetStreamClusterAccountUsageDrifts(t *testing.T) {
  4798  	tmpl := `
  4799  			listen: 127.0.0.1:-1
  4800  			server_name: %s
  4801  			jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  4802  			leaf {
  4803  				listen: 127.0.0.1:-1
  4804  			}
  4805  			cluster {
  4806  				name: %s
  4807  				listen: 127.0.0.1:%d
  4808  				routes = [%s]
  4809  			}
  4810  	`
  4811  	opFrag := `
  4812  			operator: %s
  4813  			system_account: %s
  4814  			resolver: { type: MEM }
  4815  			resolver_preload = {
  4816  				%s : %s
  4817  				%s : %s
  4818  			}
  4819  		`
  4820  
  4821  	_, syspub := createKey(t)
  4822  	sysJwt := encodeClaim(t, jwt.NewAccountClaims(syspub), syspub)
  4823  
  4824  	accKp, aExpPub := createKey(t)
  4825  	accClaim := jwt.NewAccountClaims(aExpPub)
  4826  	accClaim.Limits.JetStreamTieredLimits["R1"] = jwt.JetStreamLimits{
  4827  		DiskStorage: -1, Consumer: 1, Streams: 1}
  4828  	accClaim.Limits.JetStreamTieredLimits["R3"] = jwt.JetStreamLimits{
  4829  		DiskStorage: -1, Consumer: 1, Streams: 1}
  4830  	accJwt := encodeClaim(t, accClaim, aExpPub)
  4831  	accCreds := newUser(t, accKp)
  4832  
  4833  	template := tmpl + fmt.Sprintf(opFrag, ojwt, syspub, syspub, sysJwt, aExpPub, accJwt)
  4834  	c := createJetStreamClusterWithTemplate(t, template, "R3S", 3)
  4835  	defer c.shutdown()
  4836  
  4837  	nc, js := jsClientConnect(t, c.randomServer(), nats.UserCredentials(accCreds))
  4838  	defer nc.Close()
  4839  
  4840  	_, err := js.AddStream(&nats.StreamConfig{
  4841  		Name:     "TEST1",
  4842  		Subjects: []string{"foo"},
  4843  		MaxBytes: 1 * 1024 * 1024 * 1024,
  4844  		MaxMsgs:  1000,
  4845  		Replicas: 3,
  4846  	})
  4847  	require_NoError(t, err)
  4848  
  4849  	_, err = js.AddStream(&nats.StreamConfig{
  4850  		Name:     "TEST2",
  4851  		Subjects: []string{"bar"},
  4852  	})
  4853  	require_NoError(t, err)
  4854  
  4855  	// These expected store values can come directly from stream info's state bytes.
  4856  	// We will *= 3 for R3
  4857  	checkAccount := func(r1u, r3u uint64) {
  4858  		t.Helper()
  4859  		r3u *= 3
  4860  
  4861  		// Remote usage updates can be delayed, so wait for a bit for values we want.
  4862  		checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
  4863  			info, err := js.AccountInfo()
  4864  			require_NoError(t, err)
  4865  			require_True(t, len(info.Tiers) >= 2)
  4866  			// These can move.
  4867  			if u := info.Tiers["R1"].Store; u != r1u {
  4868  				return fmt.Errorf("Expected R1 to be %v, got %v", friendlyBytes(r1u), friendlyBytes(u))
  4869  			}
  4870  			if u := info.Tiers["R3"].Store; u != r3u {
  4871  				return fmt.Errorf("Expected R3 to be %v, got %v", friendlyBytes(r3u), friendlyBytes(u))
  4872  			}
  4873  			return nil
  4874  		})
  4875  	}
  4876  
  4877  	checkAccount(0, 0)
  4878  
  4879  	// Now add in some R3 data.
  4880  	msg := bytes.Repeat([]byte("Z"), 32*1024)     // 32k
  4881  	smallMsg := bytes.Repeat([]byte("Z"), 4*1024) // 4k
  4882  
  4883  	for i := 0; i < 1000; i++ {
  4884  		js.Publish("foo", msg)
  4885  	}
  4886  	sir3, err := js.StreamInfo("TEST1")
  4887  	require_NoError(t, err)
  4888  
  4889  	checkAccount(0, sir3.State.Bytes)
  4890  
  4891  	// Now add in some R1 data.
  4892  	for i := 0; i < 100; i++ {
  4893  		js.Publish("bar", msg)
  4894  	}
  4895  
  4896  	sir1, err := js.StreamInfo("TEST2")
  4897  	require_NoError(t, err)
  4898  
  4899  	checkAccount(sir1.State.Bytes, sir3.State.Bytes)
  4900  
  4901  	// We will now test a bunch of scenarios to see that we are doing accounting correctly.
  4902  
  4903  	// Since our R3 has a limit of 1000 msgs, let's add in more msgs and drop older ones.
  4904  	for i := 0; i < 100; i++ {
  4905  		js.Publish("foo", smallMsg)
  4906  	}
  4907  	sir3, err = js.StreamInfo("TEST1")
  4908  	require_NoError(t, err)
  4909  
  4910  	checkAccount(sir1.State.Bytes, sir3.State.Bytes)
  4911  
  4912  	// Move our R3 stream leader and make sure acounting is correct.
  4913  	_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST1"), nil, time.Second)
  4914  	require_NoError(t, err)
  4915  
  4916  	checkAccount(sir1.State.Bytes, sir3.State.Bytes)
  4917  
  4918  	// Now scale down.
  4919  	_, err = js.UpdateStream(&nats.StreamConfig{
  4920  		Name:     "TEST1",
  4921  		Subjects: []string{"foo"},
  4922  		MaxBytes: 1 * 1024 * 1024 * 1024,
  4923  		MaxMsgs:  1000,
  4924  		Replicas: 1,
  4925  	})
  4926  	require_NoError(t, err)
  4927  
  4928  	checkAccount(sir1.State.Bytes+sir3.State.Bytes, 0)
  4929  
  4930  	// Add in more msgs which will replace the older and bigger ones.
  4931  	for i := 0; i < 100; i++ {
  4932  		js.Publish("foo", smallMsg)
  4933  	}
  4934  	sir3, err = js.StreamInfo("TEST1")
  4935  	require_NoError(t, err)
  4936  
  4937  	// Now scale back up.
  4938  	_, err = js.UpdateStream(&nats.StreamConfig{
  4939  		Name:     "TEST1",
  4940  		Subjects: []string{"foo"},
  4941  		MaxBytes: 1 * 1024 * 1024 * 1024,
  4942  		MaxMsgs:  1000,
  4943  		Replicas: 3,
  4944  	})
  4945  	require_NoError(t, err)
  4946  
  4947  	checkAccount(sir1.State.Bytes, sir3.State.Bytes)
  4948  
  4949  	// Test Purge.
  4950  	err = js.PurgeStream("TEST1")
  4951  	require_NoError(t, err)
  4952  
  4953  	checkAccount(sir1.State.Bytes, 0)
  4954  
  4955  	for i := 0; i < 1000; i++ {
  4956  		js.Publish("foo", smallMsg)
  4957  	}
  4958  	sir3, err = js.StreamInfo("TEST1")
  4959  	require_NoError(t, err)
  4960  
  4961  	checkAccount(sir1.State.Bytes, sir3.State.Bytes)
  4962  
  4963  	requestLeaderStepDown := func() {
  4964  		ml := c.leader()
  4965  		checkFor(t, 5*time.Second, 250*time.Millisecond, func() error {
  4966  			if cml := c.leader(); cml == ml {
  4967  				nc.Request(JSApiLeaderStepDown, nil, time.Second)
  4968  				return fmt.Errorf("Metaleader has not moved yet")
  4969  			}
  4970  			return nil
  4971  		})
  4972  	}
  4973  
  4974  	// Test meta leader stepdowns.
  4975  	for i := 0; i < len(c.servers); i++ {
  4976  		requestLeaderStepDown()
  4977  		checkAccount(sir1.State.Bytes, sir3.State.Bytes)
  4978  	}
  4979  
  4980  	// Now test cluster reset operations where we internally reset the NRG and optionally the stream too.
  4981  	// Only applicable to TEST1 stream which is R3.
  4982  	nl := c.randomNonStreamLeader(aExpPub, "TEST1")
  4983  	acc, err := nl.LookupAccount(aExpPub)
  4984  	require_NoError(t, err)
  4985  	mset, err := acc.lookupStream("TEST1")
  4986  	require_NoError(t, err)
  4987  	// NRG only
  4988  	mset.resetClusteredState(nil)
  4989  	checkAccount(sir1.State.Bytes, sir3.State.Bytes)
  4990  	// Need to re-lookup this stream since we will recreate from reset above.
  4991  	checkFor(t, 5*time.Second, 200*time.Millisecond, func() error {
  4992  		mset, err = acc.lookupStream("TEST1")
  4993  		return err
  4994  	})
  4995  	// Now NRG and Stream state itself.
  4996  	mset.resetClusteredState(errFirstSequenceMismatch)
  4997  	checkAccount(sir1.State.Bytes, sir3.State.Bytes)
  4998  
  4999  	// Now test server restart
  5000  	for _, s := range c.servers {
  5001  		s.Shutdown()
  5002  		s.WaitForShutdown()
  5003  		s = c.restartServer(s)
  5004  
  5005  		// Wait on healthz and leader etc.
  5006  		checkFor(t, 10*time.Second, 200*time.Millisecond, func() error {
  5007  			if hs := s.healthz(nil); hs.Error != _EMPTY_ {
  5008  				return errors.New(hs.Error)
  5009  			}
  5010  			return nil
  5011  		})
  5012  		c.waitOnLeader()
  5013  		c.waitOnStreamLeader(aExpPub, "TEST1")
  5014  		c.waitOnStreamLeader(aExpPub, "TEST2")
  5015  
  5016  		// Now check account again.
  5017  		checkAccount(sir1.State.Bytes, sir3.State.Bytes)
  5018  	}
  5019  }
  5020  
  5021  func TestJetStreamClusterStreamFailTracking(t *testing.T) {
  5022  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5023  	defer c.shutdown()
  5024  
  5025  	nc, js := jsClientConnect(t, c.randomServer())
  5026  	defer nc.Close()
  5027  
  5028  	_, err := js.AddStream(&nats.StreamConfig{
  5029  		Name:     "TEST",
  5030  		Subjects: []string{"foo"},
  5031  		Replicas: 3,
  5032  	})
  5033  	require_NoError(t, err)
  5034  
  5035  	m := nats.NewMsg("foo")
  5036  	m.Data = []byte("OK")
  5037  
  5038  	b, bsz := 0, 5
  5039  	sendBatch := func() {
  5040  		for i := b * bsz; i < b*bsz+bsz; i++ {
  5041  			msgId := fmt.Sprintf("ID:%d", i)
  5042  			m.Header.Set(JSMsgId, msgId)
  5043  			// Send it twice on purpose.
  5044  			js.PublishMsg(m)
  5045  			js.PublishMsg(m)
  5046  		}
  5047  		b++
  5048  	}
  5049  
  5050  	sendBatch()
  5051  
  5052  	_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST"), nil, time.Second)
  5053  	require_NoError(t, err)
  5054  	c.waitOnStreamLeader(globalAccountName, "TEST")
  5055  
  5056  	sendBatch()
  5057  
  5058  	// Now stop one and restart.
  5059  	nl := c.randomNonStreamLeader(globalAccountName, "TEST")
  5060  	mset, err := nl.GlobalAccount().lookupStream("TEST")
  5061  	require_NoError(t, err)
  5062  	// Reset raft
  5063  	mset.resetClusteredState(nil)
  5064  	time.Sleep(100 * time.Millisecond)
  5065  
  5066  	nl.Shutdown()
  5067  	nl.WaitForShutdown()
  5068  
  5069  	sendBatch()
  5070  
  5071  	nl = c.restartServer(nl)
  5072  
  5073  	sendBatch()
  5074  
  5075  	for {
  5076  		_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST"), nil, time.Second)
  5077  		require_NoError(t, err)
  5078  		c.waitOnStreamLeader(globalAccountName, "TEST")
  5079  		if nl == c.streamLeader(globalAccountName, "TEST") {
  5080  			break
  5081  		}
  5082  	}
  5083  
  5084  	sendBatch()
  5085  
  5086  	_, err = js.UpdateStream(&nats.StreamConfig{
  5087  		Name:     "TEST",
  5088  		Subjects: []string{"foo"},
  5089  		Replicas: 1,
  5090  	})
  5091  	require_NoError(t, err)
  5092  
  5093  	// Make sure all in order.
  5094  	errCh := make(chan error, 100)
  5095  	var wg sync.WaitGroup
  5096  	wg.Add(1)
  5097  
  5098  	expected, seen := b*bsz, 0
  5099  
  5100  	sub, err := js.Subscribe("foo", func(msg *nats.Msg) {
  5101  		expectedID := fmt.Sprintf("ID:%d", seen)
  5102  		if v := msg.Header.Get(JSMsgId); v != expectedID {
  5103  			errCh <- err
  5104  			wg.Done()
  5105  			msg.Sub.Unsubscribe()
  5106  			return
  5107  		}
  5108  		seen++
  5109  		if seen >= expected {
  5110  			wg.Done()
  5111  			msg.Sub.Unsubscribe()
  5112  		}
  5113  	})
  5114  	require_NoError(t, err)
  5115  	defer sub.Unsubscribe()
  5116  
  5117  	wg.Wait()
  5118  	if len(errCh) > 0 {
  5119  		t.Fatalf("Expected no errors, got %d", len(errCh))
  5120  	}
  5121  }
  5122  
  5123  func TestJetStreamClusterStreamFailTrackingSnapshots(t *testing.T) {
  5124  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5125  	defer c.shutdown()
  5126  
  5127  	nc, js := jsClientConnect(t, c.randomServer())
  5128  	defer nc.Close()
  5129  
  5130  	_, err := js.AddStream(&nats.StreamConfig{
  5131  		Name:     "TEST",
  5132  		Subjects: []string{"foo"},
  5133  		Replicas: 3,
  5134  	})
  5135  	require_NoError(t, err)
  5136  
  5137  	m := nats.NewMsg("foo")
  5138  	m.Data = []byte("OK")
  5139  
  5140  	// Send 1000 a dupe every msgID.
  5141  	for i := 0; i < 1000; i++ {
  5142  		msgId := fmt.Sprintf("ID:%d", i)
  5143  		m.Header.Set(JSMsgId, msgId)
  5144  		// Send it twice on purpose.
  5145  		js.PublishMsg(m)
  5146  		js.PublishMsg(m)
  5147  	}
  5148  
  5149  	// Now stop one.
  5150  	nl := c.randomNonStreamLeader(globalAccountName, "TEST")
  5151  	nl.Shutdown()
  5152  	nl.WaitForShutdown()
  5153  
  5154  	// Now send more and make sure leader snapshots.
  5155  	for i := 1000; i < 2000; i++ {
  5156  		msgId := fmt.Sprintf("ID:%d", i)
  5157  		m.Header.Set(JSMsgId, msgId)
  5158  		// Send it twice on purpose.
  5159  		js.PublishMsg(m)
  5160  		js.PublishMsg(m)
  5161  	}
  5162  
  5163  	sl := c.streamLeader(globalAccountName, "TEST")
  5164  	mset, err := sl.GlobalAccount().lookupStream("TEST")
  5165  	require_NoError(t, err)
  5166  	node := mset.raftNode()
  5167  	require_NotNil(t, node)
  5168  	node.InstallSnapshot(mset.stateSnapshot())
  5169  
  5170  	// Now restart nl
  5171  	nl = c.restartServer(nl)
  5172  	c.waitOnServerCurrent(nl)
  5173  
  5174  	// Move leader to NL
  5175  	for {
  5176  		_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST"), nil, time.Second)
  5177  		require_NoError(t, err)
  5178  		c.waitOnStreamLeader(globalAccountName, "TEST")
  5179  		if nl == c.streamLeader(globalAccountName, "TEST") {
  5180  			break
  5181  		}
  5182  	}
  5183  
  5184  	_, err = js.UpdateStream(&nats.StreamConfig{
  5185  		Name:     "TEST",
  5186  		Subjects: []string{"foo"},
  5187  		Replicas: 1,
  5188  	})
  5189  	require_NoError(t, err)
  5190  
  5191  	// Make sure all in order.
  5192  	errCh := make(chan error, 100)
  5193  	var wg sync.WaitGroup
  5194  	wg.Add(1)
  5195  
  5196  	expected, seen := 2000, 0
  5197  
  5198  	sub, err := js.Subscribe("foo", func(msg *nats.Msg) {
  5199  		expectedID := fmt.Sprintf("ID:%d", seen)
  5200  		if v := msg.Header.Get(JSMsgId); v != expectedID {
  5201  			errCh <- err
  5202  			wg.Done()
  5203  			msg.Sub.Unsubscribe()
  5204  			return
  5205  		}
  5206  		seen++
  5207  		if seen >= expected {
  5208  			wg.Done()
  5209  			msg.Sub.Unsubscribe()
  5210  		}
  5211  	})
  5212  	require_NoError(t, err)
  5213  	defer sub.Unsubscribe()
  5214  
  5215  	wg.Wait()
  5216  	if len(errCh) > 0 {
  5217  		t.Fatalf("Expected no errors, got %d", len(errCh))
  5218  	}
  5219  }
  5220  
  5221  func TestJetStreamClusterOrphanConsumerSubjects(t *testing.T) {
  5222  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5223  	defer c.shutdown()
  5224  
  5225  	nc, js := jsClientConnect(t, c.randomServer())
  5226  	defer nc.Close()
  5227  
  5228  	_, err := js.AddStream(&nats.StreamConfig{
  5229  		Name:     "TEST",
  5230  		Subjects: []string{"foo.>", "bar.>"},
  5231  		Replicas: 3,
  5232  	})
  5233  	require_NoError(t, err)
  5234  
  5235  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  5236  		Name:          "consumer_foo",
  5237  		Durable:       "consumer_foo",
  5238  		FilterSubject: "foo.something",
  5239  	})
  5240  	require_NoError(t, err)
  5241  
  5242  	for _, replicas := range []int{3, 1, 3} {
  5243  		_, err = js.UpdateStream(&nats.StreamConfig{
  5244  			Name:     "TEST",
  5245  			Subjects: []string{"bar.>"},
  5246  			Replicas: replicas,
  5247  		})
  5248  		require_NoError(t, err)
  5249  		c.waitOnAllCurrent()
  5250  	}
  5251  
  5252  	c.waitOnStreamLeader("$G", "TEST")
  5253  	c.waitOnConsumerLeader("$G", "TEST", "consumer_foo")
  5254  
  5255  	info, err := js.ConsumerInfo("TEST", "consumer_foo")
  5256  	require_NoError(t, err)
  5257  	require_True(t, info.Cluster != nil)
  5258  	require_NotEqual(t, info.Cluster.Leader, "")
  5259  	require_Equal(t, len(info.Cluster.Replicas), 2)
  5260  }
  5261  
  5262  func TestJetStreamClusterDurableConsumerInactiveThresholdLeaderSwitch(t *testing.T) {
  5263  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5264  	defer c.shutdown()
  5265  
  5266  	nc, js := jsClientConnect(t, c.randomServer())
  5267  	defer nc.Close()
  5268  
  5269  	_, err := js.AddStream(&nats.StreamConfig{
  5270  		Name:     "TEST",
  5271  		Subjects: []string{"*"},
  5272  		Replicas: 3,
  5273  	})
  5274  	require_NoError(t, err)
  5275  
  5276  	// Queue a msg.
  5277  	sendStreamMsg(t, nc, "foo", "ok")
  5278  
  5279  	thresh := 250 * time.Millisecond
  5280  
  5281  	// This will start the timer.
  5282  	sub, err := js.PullSubscribe("foo", "dlc", nats.InactiveThreshold(thresh))
  5283  	require_NoError(t, err)
  5284  
  5285  	// Switch over leader.
  5286  	cl := c.consumerLeader(globalAccountName, "TEST", "dlc")
  5287  	cl.JetStreamStepdownConsumer(globalAccountName, "TEST", "dlc")
  5288  	c.waitOnConsumerLeader(globalAccountName, "TEST", "dlc")
  5289  
  5290  	// Create activity on this consumer.
  5291  	msgs, err := sub.Fetch(1)
  5292  	require_NoError(t, err)
  5293  	require_True(t, len(msgs) == 1)
  5294  
  5295  	// This is consider activity as well. So we can watch now up to thresh to make sure consumer still active.
  5296  	msgs[0].AckSync()
  5297  
  5298  	// The consumer should not disappear for next `thresh` interval unless old leader does so.
  5299  	timeout := time.Now().Add(thresh)
  5300  	for time.Now().Before(timeout) {
  5301  		_, err := js.ConsumerInfo("TEST", "dlc")
  5302  		if err == nats.ErrConsumerNotFound {
  5303  			t.Fatalf("Consumer deleted when it should not have been")
  5304  		}
  5305  	}
  5306  }
  5307  
  5308  func TestJetStreamClusterConsumerMaxDeliveryNumAckPendingBug(t *testing.T) {
  5309  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5310  	defer c.shutdown()
  5311  
  5312  	nc, js := jsClientConnect(t, c.randomServer())
  5313  	defer nc.Close()
  5314  
  5315  	_, err := js.AddStream(&nats.StreamConfig{
  5316  		Name:     "TEST",
  5317  		Subjects: []string{"*"},
  5318  		Replicas: 3,
  5319  	})
  5320  	require_NoError(t, err)
  5321  
  5322  	// send 50 msgs
  5323  	for i := 0; i < 50; i++ {
  5324  		_, err := js.Publish("foo", []byte("ok"))
  5325  		require_NoError(t, err)
  5326  	}
  5327  
  5328  	// File based.
  5329  	_, err = js.Subscribe("foo",
  5330  		func(msg *nats.Msg) {},
  5331  		nats.Durable("file"),
  5332  		nats.ManualAck(),
  5333  		nats.MaxDeliver(1),
  5334  		nats.AckWait(time.Second),
  5335  		nats.MaxAckPending(10),
  5336  	)
  5337  	require_NoError(t, err)
  5338  
  5339  	// Let first batch retry and expire.
  5340  	time.Sleep(1200 * time.Millisecond)
  5341  
  5342  	cia, err := js.ConsumerInfo("TEST", "file")
  5343  	require_NoError(t, err)
  5344  
  5345  	// Make sure followers will have exact same state.
  5346  	_, err = nc.Request(fmt.Sprintf(JSApiConsumerLeaderStepDownT, "TEST", "file"), nil, time.Second)
  5347  	require_NoError(t, err)
  5348  	c.waitOnConsumerLeader(globalAccountName, "TEST", "file")
  5349  
  5350  	cib, err := js.ConsumerInfo("TEST", "file")
  5351  	require_NoError(t, err)
  5352  
  5353  	// Want to compare sans cluster details which we know will change due to leader change.
  5354  	// Also last activity for delivered can be slightly off so nil out as well.
  5355  	checkConsumerInfo := func(a, b *nats.ConsumerInfo) {
  5356  		t.Helper()
  5357  		a.Cluster, b.Cluster = nil, nil
  5358  		a.Delivered.Last, b.Delivered.Last = nil, nil
  5359  		if !reflect.DeepEqual(a, b) {
  5360  			t.Fatalf("ConsumerInfo do not match\n\t%+v\n\t%+v", a, b)
  5361  		}
  5362  	}
  5363  
  5364  	checkConsumerInfo(cia, cib)
  5365  
  5366  	// Memory based.
  5367  	_, err = js.Subscribe("foo",
  5368  		func(msg *nats.Msg) {},
  5369  		nats.Durable("mem"),
  5370  		nats.ManualAck(),
  5371  		nats.MaxDeliver(1),
  5372  		nats.AckWait(time.Second),
  5373  		nats.MaxAckPending(10),
  5374  		nats.ConsumerMemoryStorage(),
  5375  	)
  5376  	require_NoError(t, err)
  5377  
  5378  	// Let first batch retry and expire.
  5379  	time.Sleep(1200 * time.Millisecond)
  5380  
  5381  	cia, err = js.ConsumerInfo("TEST", "mem")
  5382  	require_NoError(t, err)
  5383  
  5384  	// Make sure followers will have exact same state.
  5385  	_, err = nc.Request(fmt.Sprintf(JSApiConsumerLeaderStepDownT, "TEST", "mem"), nil, time.Second)
  5386  	require_NoError(t, err)
  5387  	c.waitOnConsumerLeader(globalAccountName, "TEST", "mem")
  5388  
  5389  	cib, err = js.ConsumerInfo("TEST", "mem")
  5390  	require_NoError(t, err)
  5391  
  5392  	checkConsumerInfo(cia, cib)
  5393  
  5394  	// Now file based but R1 and server restart.
  5395  	_, err = js.Subscribe("foo",
  5396  		func(msg *nats.Msg) {},
  5397  		nats.Durable("r1"),
  5398  		nats.ManualAck(),
  5399  		nats.MaxDeliver(1),
  5400  		nats.AckWait(time.Second),
  5401  		nats.MaxAckPending(10),
  5402  		nats.ConsumerReplicas(1),
  5403  	)
  5404  	require_NoError(t, err)
  5405  
  5406  	// Let first batch retry and expire.
  5407  	time.Sleep(1200 * time.Millisecond)
  5408  
  5409  	cia, err = js.ConsumerInfo("TEST", "r1")
  5410  	require_NoError(t, err)
  5411  
  5412  	cl := c.consumerLeader(globalAccountName, "TEST", "r1")
  5413  	cl.Shutdown()
  5414  	cl.WaitForShutdown()
  5415  	cl = c.restartServer(cl)
  5416  	c.waitOnServerCurrent(cl)
  5417  
  5418  	cib, err = js.ConsumerInfo("TEST", "r1")
  5419  	require_NoError(t, err)
  5420  
  5421  	// Created can skew a small bit due to server restart, this is expected.
  5422  	now := time.Now()
  5423  	cia.Created, cib.Created = now, now
  5424  	// Clear any disagreement on push bound.
  5425  	cia.PushBound, cib.PushBound = false, false
  5426  	checkConsumerInfo(cia, cib)
  5427  }
  5428  
  5429  func TestJetStreamClusterConsumerDefaultsFromStream(t *testing.T) {
  5430  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5431  	defer c.shutdown()
  5432  
  5433  	nc, js := jsClientConnect(t, c.randomServer())
  5434  	defer nc.Close()
  5435  
  5436  	streamTmpl := &StreamConfig{
  5437  		Name:     "test",
  5438  		Subjects: []string{"test.*"},
  5439  		Storage:  MemoryStorage,
  5440  		ConsumerLimits: StreamConsumerLimits{
  5441  			MaxAckPending:     0,
  5442  			InactiveThreshold: 0,
  5443  		},
  5444  	}
  5445  
  5446  	// Since nats.go doesn't yet know about the consumer limits, craft
  5447  	// the stream configuration request by hand.
  5448  	streamCreate := func(maxAckPending int, inactiveThreshold time.Duration) (*StreamConfig, error) {
  5449  		cfg := streamTmpl
  5450  		cfg.ConsumerLimits = StreamConsumerLimits{
  5451  			MaxAckPending:     maxAckPending,
  5452  			InactiveThreshold: inactiveThreshold,
  5453  		}
  5454  		j, err := json.Marshal(cfg)
  5455  		if err != nil {
  5456  			return nil, err
  5457  		}
  5458  		msg, err := nc.Request(fmt.Sprintf(JSApiStreamCreateT, "test"), j, time.Second*3)
  5459  		if err != nil {
  5460  			return nil, err
  5461  		}
  5462  		var resp JSApiStreamCreateResponse
  5463  		if err := json.Unmarshal(msg.Data, &resp); err != nil {
  5464  			return nil, err
  5465  		}
  5466  		if resp.StreamInfo == nil {
  5467  			return nil, resp.ApiResponse.ToError()
  5468  		}
  5469  		return &resp.Config, resp.ApiResponse.ToError()
  5470  	}
  5471  	streamUpdate := func(maxAckPending int, inactiveThreshold time.Duration) (*StreamConfig, error) {
  5472  		cfg := streamTmpl
  5473  		cfg.ConsumerLimits = StreamConsumerLimits{
  5474  			MaxAckPending:     maxAckPending,
  5475  			InactiveThreshold: inactiveThreshold,
  5476  		}
  5477  		j, err := json.Marshal(cfg)
  5478  		if err != nil {
  5479  			return nil, err
  5480  		}
  5481  		msg, err := nc.Request(fmt.Sprintf(JSApiStreamUpdateT, "test"), j, time.Second*3)
  5482  		if err != nil {
  5483  			return nil, err
  5484  		}
  5485  		var resp JSApiStreamUpdateResponse
  5486  		if err := json.Unmarshal(msg.Data, &resp); err != nil {
  5487  			return nil, err
  5488  		}
  5489  		if resp.StreamInfo == nil {
  5490  			return nil, resp.ApiResponse.ToError()
  5491  		}
  5492  		return &resp.Config, resp.ApiResponse.ToError()
  5493  	}
  5494  
  5495  	if _, err := streamCreate(15, time.Second); err != nil {
  5496  		t.Fatalf("Failed to add stream: %v", err)
  5497  	}
  5498  
  5499  	t.Run("InheritDefaultsFromStream", func(t *testing.T) {
  5500  		ci, err := js.AddConsumer("test", &nats.ConsumerConfig{
  5501  			Name: "InheritDefaultsFromStream",
  5502  		})
  5503  		require_NoError(t, err)
  5504  
  5505  		switch {
  5506  		case ci.Config.InactiveThreshold != time.Second:
  5507  			t.Fatalf("InactiveThreshold should be 1s, got %s", ci.Config.InactiveThreshold)
  5508  		case ci.Config.MaxAckPending != 15:
  5509  			t.Fatalf("MaxAckPending should be 15, got %d", ci.Config.MaxAckPending)
  5510  		}
  5511  	})
  5512  
  5513  	t.Run("CreateConsumerErrorOnExceedMaxAckPending", func(t *testing.T) {
  5514  		_, err := js.AddConsumer("test", &nats.ConsumerConfig{
  5515  			Name:          "CreateConsumerErrorOnExceedMaxAckPending",
  5516  			MaxAckPending: 30,
  5517  		})
  5518  		switch e := err.(type) {
  5519  		case *nats.APIError:
  5520  			if ErrorIdentifier(e.ErrorCode) != JSConsumerMaxPendingAckExcessErrF {
  5521  				t.Fatalf("invalid error code, got %d, wanted %d", e.ErrorCode, JSConsumerMaxPendingAckExcessErrF)
  5522  			}
  5523  		default:
  5524  			t.Fatalf("should have returned API error, got %T", e)
  5525  		}
  5526  	})
  5527  
  5528  	t.Run("CreateConsumerErrorOnExceedInactiveThreshold", func(t *testing.T) {
  5529  		_, err := js.AddConsumer("test", &nats.ConsumerConfig{
  5530  			Name:              "CreateConsumerErrorOnExceedInactiveThreshold",
  5531  			InactiveThreshold: time.Second * 2,
  5532  		})
  5533  		switch e := err.(type) {
  5534  		case *nats.APIError:
  5535  			if ErrorIdentifier(e.ErrorCode) != JSConsumerInactiveThresholdExcess {
  5536  				t.Fatalf("invalid error code, got %d, wanted %d", e.ErrorCode, JSConsumerInactiveThresholdExcess)
  5537  			}
  5538  		default:
  5539  			t.Fatalf("should have returned API error, got %T", e)
  5540  		}
  5541  	})
  5542  
  5543  	t.Run("UpdateStreamErrorOnViolateConsumerMaxAckPending", func(t *testing.T) {
  5544  		_, err := js.AddConsumer("test", &nats.ConsumerConfig{
  5545  			Name:          "UpdateStreamErrorOnViolateConsumerMaxAckPending",
  5546  			MaxAckPending: 15,
  5547  		})
  5548  		require_NoError(t, err)
  5549  
  5550  		if _, err = streamUpdate(10, 0); err == nil {
  5551  			t.Fatalf("stream update should have errored but didn't")
  5552  		}
  5553  	})
  5554  
  5555  	t.Run("UpdateStreamErrorOnViolateConsumerInactiveThreshold", func(t *testing.T) {
  5556  		_, err := js.AddConsumer("test", &nats.ConsumerConfig{
  5557  			Name:              "UpdateStreamErrorOnViolateConsumerInactiveThreshold",
  5558  			InactiveThreshold: time.Second,
  5559  		})
  5560  		require_NoError(t, err)
  5561  
  5562  		if _, err = streamUpdate(0, time.Second/2); err == nil {
  5563  			t.Fatalf("stream update should have errored but didn't")
  5564  		}
  5565  	})
  5566  }
  5567  
  5568  // Discovered that we are not properly setting certain default filestore blkSizes.
  5569  func TestJetStreamClusterCheckFileStoreBlkSizes(t *testing.T) {
  5570  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5571  	defer c.shutdown()
  5572  
  5573  	nc, js := jsClientConnect(t, c.randomServer())
  5574  	defer nc.Close()
  5575  
  5576  	// Normal Stream
  5577  	_, err := js.AddStream(&nats.StreamConfig{
  5578  		Name:     "TEST",
  5579  		Subjects: []string{"*"},
  5580  		Replicas: 3,
  5581  	})
  5582  	require_NoError(t, err)
  5583  
  5584  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  5585  		Durable:   "C3",
  5586  		AckPolicy: nats.AckExplicitPolicy,
  5587  	})
  5588  	require_NoError(t, err)
  5589  
  5590  	// KV
  5591  	_, err = js.CreateKeyValue(&nats.KeyValueConfig{
  5592  		Bucket:   "TEST",
  5593  		Replicas: 3,
  5594  	})
  5595  	require_NoError(t, err)
  5596  
  5597  	blkSize := func(fs *fileStore) uint64 {
  5598  		fs.mu.RLock()
  5599  		defer fs.mu.RUnlock()
  5600  		return fs.fcfg.BlockSize
  5601  	}
  5602  
  5603  	// We will check now the following filestores.
  5604  	//  meta
  5605  	//  TEST stream and NRG
  5606  	//  C3 NRG
  5607  	//  KV_TEST stream and NRG
  5608  	for _, s := range c.servers {
  5609  		js, cc := s.getJetStreamCluster()
  5610  		// META
  5611  		js.mu.RLock()
  5612  		meta := cc.meta
  5613  		js.mu.RUnlock()
  5614  		require_True(t, meta != nil)
  5615  		fs := meta.(*raft).wal.(*fileStore)
  5616  		require_True(t, blkSize(fs) == defaultMetaFSBlkSize)
  5617  
  5618  		// TEST STREAM
  5619  		mset, err := s.GlobalAccount().lookupStream("TEST")
  5620  		require_NoError(t, err)
  5621  		mset.mu.RLock()
  5622  		fs = mset.store.(*fileStore)
  5623  		mset.mu.RUnlock()
  5624  		require_True(t, blkSize(fs) == defaultLargeBlockSize)
  5625  
  5626  		// KV STREAM
  5627  		// Now the KV which is different default size.
  5628  		kv, err := s.GlobalAccount().lookupStream("KV_TEST")
  5629  		require_NoError(t, err)
  5630  		kv.mu.RLock()
  5631  		fs = kv.store.(*fileStore)
  5632  		kv.mu.RUnlock()
  5633  		require_True(t, blkSize(fs) == defaultKVBlockSize)
  5634  
  5635  		// Now check NRGs
  5636  		// TEST Stream
  5637  		n := mset.raftNode()
  5638  		require_True(t, n != nil)
  5639  		fs = n.(*raft).wal.(*fileStore)
  5640  		require_True(t, blkSize(fs) == defaultMediumBlockSize)
  5641  		// KV TEST Stream
  5642  		n = kv.raftNode()
  5643  		require_True(t, n != nil)
  5644  		fs = n.(*raft).wal.(*fileStore)
  5645  		require_True(t, blkSize(fs) == defaultMediumBlockSize)
  5646  		// Consumer
  5647  		o := mset.lookupConsumer("C3")
  5648  		require_True(t, o != nil)
  5649  		n = o.raftNode()
  5650  		require_True(t, n != nil)
  5651  		fs = n.(*raft).wal.(*fileStore)
  5652  		require_True(t, blkSize(fs) == defaultMediumBlockSize)
  5653  	}
  5654  }
  5655  
  5656  func TestJetStreamClusterDetectOrphanNRGs(t *testing.T) {
  5657  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5658  	defer c.shutdown()
  5659  
  5660  	nc, js := jsClientConnect(t, c.randomServer())
  5661  	defer nc.Close()
  5662  
  5663  	// Normal Stream
  5664  	_, err := js.AddStream(&nats.StreamConfig{
  5665  		Name:     "TEST",
  5666  		Subjects: []string{"*"},
  5667  		Replicas: 3,
  5668  	})
  5669  	require_NoError(t, err)
  5670  
  5671  	_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
  5672  		Durable:   "DC",
  5673  		AckPolicy: nats.AckExplicitPolicy,
  5674  	})
  5675  	require_NoError(t, err)
  5676  
  5677  	// We will force an orphan for a certain server.
  5678  	s := c.randomNonStreamLeader(globalAccountName, "TEST")
  5679  
  5680  	mset, err := s.GlobalAccount().lookupStream("TEST")
  5681  	require_NoError(t, err)
  5682  	sgn := mset.raftNode().Group()
  5683  	mset.clearRaftNode()
  5684  
  5685  	o := mset.lookupConsumer("DC")
  5686  	require_True(t, o != nil)
  5687  	ogn := o.raftNode().Group()
  5688  	o.clearRaftNode()
  5689  
  5690  	require_NoError(t, js.DeleteStream("TEST"))
  5691  
  5692  	// Check that we do in fact have orphans.
  5693  	require_True(t, s.numRaftNodes() > 1)
  5694  
  5695  	// This function will detect orphans and clean them up.
  5696  	s.checkForNRGOrphans()
  5697  
  5698  	// Should only be meta NRG left.
  5699  	require_True(t, s.numRaftNodes() == 1)
  5700  	require_True(t, s.lookupRaftNode(sgn) == nil)
  5701  	require_True(t, s.lookupRaftNode(ogn) == nil)
  5702  }
  5703  
  5704  func TestJetStreamClusterRestartThenScaleStreamReplicas(t *testing.T) {
  5705  	t.Skip("This test takes too long, need to make shorter")
  5706  
  5707  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5708  	defer c.shutdown()
  5709  
  5710  	s := c.randomNonLeader()
  5711  	nc, js := jsClientConnect(t, s)
  5712  	defer nc.Close()
  5713  
  5714  	nc2, producer := jsClientConnect(t, s)
  5715  	defer nc2.Close()
  5716  
  5717  	_, err := js.AddStream(&nats.StreamConfig{
  5718  		Name:     "TEST",
  5719  		Subjects: []string{"foo"},
  5720  		Replicas: 3,
  5721  	})
  5722  	require_NoError(t, err)
  5723  	c.waitOnStreamLeader(globalAccountName, "TEST")
  5724  
  5725  	ctx, cancel := context.WithCancel(context.Background())
  5726  	defer cancel()
  5727  
  5728  	end := time.Now().Add(2 * time.Second)
  5729  	for time.Now().Before(end) {
  5730  		producer.Publish("foo", []byte(strings.Repeat("A", 128)))
  5731  		time.Sleep(time.Millisecond)
  5732  	}
  5733  
  5734  	var wg sync.WaitGroup
  5735  	for i := 0; i < 5; i++ {
  5736  		sub, err := js.PullSubscribe("foo", fmt.Sprintf("C-%d", i))
  5737  		require_NoError(t, err)
  5738  
  5739  		wg.Add(1)
  5740  		go func() {
  5741  			defer wg.Done()
  5742  			for range time.NewTicker(10 * time.Millisecond).C {
  5743  				select {
  5744  				case <-ctx.Done():
  5745  					return
  5746  				default:
  5747  				}
  5748  
  5749  				msgs, err := sub.Fetch(1)
  5750  				if err != nil && !errors.Is(err, nats.ErrTimeout) && !errors.Is(err, nats.ErrConnectionClosed) {
  5751  					t.Logf("Pull Error: %v", err)
  5752  				}
  5753  				for _, msg := range msgs {
  5754  					msg.Ack()
  5755  				}
  5756  			}
  5757  		}()
  5758  	}
  5759  	c.lameDuckRestartAll()
  5760  	c.waitOnStreamLeader(globalAccountName, "TEST")
  5761  
  5762  	// Swap the logger to try to detect the condition after the restart.
  5763  	loggers := make([]*captureDebugLogger, 3)
  5764  	for i, srv := range c.servers {
  5765  		l := &captureDebugLogger{dbgCh: make(chan string, 10)}
  5766  		loggers[i] = l
  5767  		srv.SetLogger(l, true, false)
  5768  	}
  5769  	condition := `Direct proposal ignored, not leader (state: CLOSED)`
  5770  	errCh := make(chan error, 10)
  5771  
  5772  	wg.Add(1)
  5773  	go func() {
  5774  		defer wg.Done()
  5775  		for {
  5776  			select {
  5777  			case dl := <-loggers[0].dbgCh:
  5778  				if strings.Contains(dl, condition) {
  5779  					errCh <- fmt.Errorf(condition)
  5780  				}
  5781  			case dl := <-loggers[1].dbgCh:
  5782  				if strings.Contains(dl, condition) {
  5783  					errCh <- fmt.Errorf(condition)
  5784  				}
  5785  			case dl := <-loggers[2].dbgCh:
  5786  				if strings.Contains(dl, condition) {
  5787  					errCh <- fmt.Errorf(condition)
  5788  				}
  5789  			case <-ctx.Done():
  5790  				return
  5791  			}
  5792  		}
  5793  	}()
  5794  
  5795  	// Start publishing again for a while.
  5796  	end = time.Now().Add(2 * time.Second)
  5797  	for time.Now().Before(end) {
  5798  		producer.Publish("foo", []byte(strings.Repeat("A", 128)))
  5799  		time.Sleep(time.Millisecond)
  5800  	}
  5801  
  5802  	// Try to do a stream edit back to R=1 after doing all the upgrade.
  5803  	info, _ := js.StreamInfo("TEST")
  5804  	sconfig := info.Config
  5805  	sconfig.Replicas = 1
  5806  	_, err = js.UpdateStream(&sconfig)
  5807  	require_NoError(t, err)
  5808  
  5809  	// Leave running for some time after the update.
  5810  	time.Sleep(2 * time.Second)
  5811  
  5812  	info, _ = js.StreamInfo("TEST")
  5813  	sconfig = info.Config
  5814  	sconfig.Replicas = 3
  5815  	_, err = js.UpdateStream(&sconfig)
  5816  	require_NoError(t, err)
  5817  
  5818  	select {
  5819  	case e := <-errCh:
  5820  		t.Fatalf("Bad condition on raft node: %v", e)
  5821  	case <-time.After(2 * time.Second):
  5822  		// Done
  5823  	}
  5824  
  5825  	// Stop goroutines and wait for them to exit.
  5826  	cancel()
  5827  	wg.Wait()
  5828  }
  5829  
  5830  // https://github.com/nats-io/nats-server/issues/4732
  5831  func TestJetStreamClusterStreamLimitsOnScaleUpAndMove(t *testing.T) {
  5832  	tmpl := `
  5833  			listen: 127.0.0.1:-1
  5834  			server_name: %s
  5835  			jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  5836  			cluster {
  5837  				name: %s
  5838  				listen: 127.0.0.1:%d
  5839  				routes = [%s]
  5840  			}
  5841  	`
  5842  	opFrag := `
  5843  			operator: %s
  5844  			system_account: %s
  5845  			resolver: { type: MEM }
  5846  			resolver_preload = {
  5847  				%s : %s
  5848  				%s : %s
  5849  			}
  5850  		`
  5851  
  5852  	_, syspub := createKey(t)
  5853  	sysJwt := encodeClaim(t, jwt.NewAccountClaims(syspub), syspub)
  5854  
  5855  	accKp, aExpPub := createKey(t)
  5856  	accClaim := jwt.NewAccountClaims(aExpPub)
  5857  	accClaim.Limits.JetStreamTieredLimits["R1"] = jwt.JetStreamLimits{
  5858  		DiskStorage: -1, Consumer: -1, Streams: 1}
  5859  	accClaim.Limits.JetStreamTieredLimits["R3"] = jwt.JetStreamLimits{
  5860  		DiskStorage: 0, Consumer: -1, Streams: 1}
  5861  	accJwt := encodeClaim(t, accClaim, aExpPub)
  5862  	accCreds := newUser(t, accKp)
  5863  
  5864  	template := tmpl + fmt.Sprintf(opFrag, ojwt, syspub, syspub, sysJwt, aExpPub, accJwt)
  5865  
  5866  	c := createJetStreamCluster(t, template, "CLOUD", _EMPTY_, 3, 22020, true)
  5867  	defer c.shutdown()
  5868  
  5869  	nc, js := jsClientConnect(t, c.randomServer(), nats.UserCredentials(accCreds))
  5870  	defer nc.Close()
  5871  
  5872  	_, err := js.AddStream(&nats.StreamConfig{
  5873  		Name:     "TEST",
  5874  		Subjects: []string{"foo"},
  5875  	})
  5876  	require_NoError(t, err)
  5877  
  5878  	toSend, msg := 100, bytes.Repeat([]byte("Z"), 1024)
  5879  	for i := 0; i < toSend; i++ {
  5880  		_, err := js.PublishAsync("foo", msg)
  5881  		require_NoError(t, err)
  5882  	}
  5883  	select {
  5884  	case <-js.PublishAsyncComplete():
  5885  	case <-time.After(5 * time.Second):
  5886  		t.Fatalf("Did not receive completion signal")
  5887  	}
  5888  
  5889  	// Scale up should fail here since no R3 storage.
  5890  	_, err = js.UpdateStream(&nats.StreamConfig{
  5891  		Name:     "TEST",
  5892  		Subjects: []string{"foo"},
  5893  		Replicas: 3,
  5894  	})
  5895  	require_Error(t, err, errors.New("insufficient storage resources"))
  5896  }
  5897  
  5898  func TestJetStreamClusterAPIAccessViaSystemAccount(t *testing.T) {
  5899  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5900  	defer c.shutdown()
  5901  
  5902  	// Connect to system account.
  5903  	nc, js := jsClientConnect(t, c.randomServer(), nats.UserInfo("admin", "s3cr3t!"))
  5904  	defer nc.Close()
  5905  
  5906  	_, err := js.AddStream(&nats.StreamConfig{Name: "TEST"})
  5907  	require_Error(t, err, NewJSNotEnabledForAccountError())
  5908  
  5909  	// Make sure same behavior swith single server.
  5910  	tmpl := `
  5911  		listen: 127.0.0.1:-1
  5912  		jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
  5913  		accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
  5914  	`
  5915  	conf := createConfFile(t, []byte(fmt.Sprintf(tmpl, t.TempDir())))
  5916  	s, _ := RunServerWithConfig(conf)
  5917  	defer s.Shutdown()
  5918  
  5919  	nc, js = jsClientConnect(t, s, nats.UserInfo("admin", "s3cr3t!"))
  5920  	defer nc.Close()
  5921  
  5922  	_, err = js.AddStream(&nats.StreamConfig{Name: "TEST"})
  5923  	require_Error(t, err, NewJSNotEnabledForAccountError())
  5924  }
  5925  
  5926  func TestJetStreamClusterStreamResetPreacks(t *testing.T) {
  5927  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  5928  	defer c.shutdown()
  5929  
  5930  	nc, js := jsClientConnect(t, c.randomServer())
  5931  	defer nc.Close()
  5932  
  5933  	_, err := js.AddStream(&nats.StreamConfig{
  5934  		Name:      "TEST",
  5935  		Subjects:  []string{"foo"},
  5936  		Retention: nats.InterestPolicy,
  5937  		Replicas:  3,
  5938  	})
  5939  	require_NoError(t, err)
  5940  
  5941  	err = js.PurgeStream("TEST", &nats.StreamPurgeRequest{Sequence: 100_000_000})
  5942  	require_NoError(t, err)
  5943  
  5944  	sub, err := js.PullSubscribe("foo", "dlc")
  5945  	require_NoError(t, err)
  5946  
  5947  	// Put 20 msgs in.
  5948  	for i := 0; i < 20; i++ {
  5949  		_, err := js.Publish("foo", nil)
  5950  		require_NoError(t, err)
  5951  	}
  5952  
  5953  	// Consume and ack 10.
  5954  	msgs, err := sub.Fetch(10, nats.MaxWait(time.Second))
  5955  	require_NoError(t, err)
  5956  	require_Equal(t, len(msgs), 10)
  5957  
  5958  	for _, msg := range msgs {
  5959  		msg.AckSync()
  5960  	}
  5961  	// Let sync propagate.
  5962  	time.Sleep(250 * time.Millisecond)
  5963  
  5964  	// Now grab a non-leader server.
  5965  	// We will shut it down and remove the stream data.
  5966  	nl := c.randomNonStreamLeader(globalAccountName, "TEST")
  5967  	mset, err := nl.GlobalAccount().lookupStream("TEST")
  5968  	require_NoError(t, err)
  5969  	fs := mset.store.(*fileStore)
  5970  	mdir := filepath.Join(fs.fcfg.StoreDir, msgDir)
  5971  	nl.Shutdown()
  5972  	// In case that was the consumer leader.
  5973  	c.waitOnConsumerLeader(globalAccountName, "TEST", "dlc")
  5974  
  5975  	// Now consume the remaining 10 and ack.
  5976  	msgs, err = sub.Fetch(10, nats.MaxWait(10*time.Second))
  5977  	require_NoError(t, err)
  5978  	require_Equal(t, len(msgs), 10)
  5979  
  5980  	for _, msg := range msgs {
  5981  		msg.AckSync()
  5982  	}
  5983  
  5984  	// Now remove the stream manually.
  5985  	require_NoError(t, os.RemoveAll(mdir))
  5986  	nl = c.restartServer(nl)
  5987  	c.waitOnAllCurrent()
  5988  
  5989  	mset, err = nl.GlobalAccount().lookupStream("TEST")
  5990  	require_NoError(t, err)
  5991  
  5992  	checkFor(t, 10*time.Second, 200*time.Millisecond, func() error {
  5993  		state := mset.state()
  5994  		if state.Msgs != 0 || state.FirstSeq != 100_000_020 {
  5995  			return fmt.Errorf("Not correct state yet: %+v", state)
  5996  		}
  5997  		return nil
  5998  	})
  5999  }
  6000  
  6001  func TestJetStreamClusterDomainAdvisory(t *testing.T) {
  6002  	tmpl := strings.Replace(jsClusterAccountsTempl, "store_dir:", "domain: NGS, store_dir:", 1)
  6003  	c := createJetStreamCluster(t, tmpl, "R3S", _EMPTY_, 3, 18033, true)
  6004  	defer c.shutdown()
  6005  
  6006  	// Connect to system account.
  6007  	nc, _ := jsClientConnect(t, c.randomServer(), nats.UserInfo("admin", "s3cr3t!"))
  6008  	defer nc.Close()
  6009  
  6010  	sub, err := nc.SubscribeSync(JSAdvisoryDomainLeaderElected)
  6011  	require_NoError(t, err)
  6012  
  6013  	// Ask meta leader to move and make sure we get an advisory.
  6014  	nc.Request(JSApiLeaderStepDown, nil, time.Second)
  6015  	c.waitOnLeader()
  6016  
  6017  	checkSubsPending(t, sub, 1)
  6018  
  6019  	m, err := sub.NextMsg(time.Second)
  6020  	require_NoError(t, err)
  6021  
  6022  	var adv JSDomainLeaderElectedAdvisory
  6023  	require_NoError(t, json.Unmarshal(m.Data, &adv))
  6024  
  6025  	ml := c.leader()
  6026  	js, cc := ml.getJetStreamCluster()
  6027  	js.mu.RLock()
  6028  	peer := cc.meta.ID()
  6029  	js.mu.RUnlock()
  6030  
  6031  	require_Equal(t, adv.Leader, peer)
  6032  	require_Equal(t, adv.Domain, "NGS")
  6033  	require_Equal(t, adv.Cluster, "R3S")
  6034  	require_Equal(t, len(adv.Replicas), 3)
  6035  }
  6036  
  6037  func TestJetStreamClusterLimitsBasedStreamFileStoreDesync(t *testing.T) {
  6038  	conf := `
  6039  	listen: 127.0.0.1:-1
  6040  	server_name: %s
  6041  	jetstream: {
  6042  		store_dir: '%s',
  6043  	}
  6044  	cluster {
  6045  		name: %s
  6046  		listen: 127.0.0.1:%d
  6047  		routes = [%s]
  6048  	}
  6049          system_account: sys
  6050          no_auth_user: js
  6051  	accounts {
  6052  	  sys {
  6053  	    users = [
  6054  	      { user: sys, pass: sys }
  6055  	    ]
  6056  	  }
  6057  	  js {
  6058  	    jetstream = { store_max_stream_bytes = 3mb }
  6059  	    users = [
  6060  	      { user: js, pass: js }
  6061  	    ]
  6062  	  }
  6063  	}`
  6064  	c := createJetStreamClusterWithTemplate(t, conf, "limits", 3)
  6065  	defer c.shutdown()
  6066  
  6067  	nc, js := jsClientConnect(t, c.randomServer())
  6068  	defer nc.Close()
  6069  
  6070  	cnc, cjs := jsClientConnect(t, c.randomServer())
  6071  	defer cnc.Close()
  6072  
  6073  	_, err := js.AddStream(&nats.StreamConfig{
  6074  		Name:     "LTEST",
  6075  		Subjects: []string{"messages.*"},
  6076  		Replicas: 3,
  6077  		MaxAge:   10 * time.Minute,
  6078  		MaxMsgs:  100_000,
  6079  	})
  6080  	require_NoError(t, err)
  6081  
  6082  	ctx, cancel := context.WithCancel(context.Background())
  6083  	defer cancel()
  6084  
  6085  	psub, err := cjs.PullSubscribe("messages.*", "consumer")
  6086  	require_NoError(t, err)
  6087  
  6088  	var (
  6089  		wg          sync.WaitGroup
  6090  		received    uint64
  6091  		errCh       = make(chan error, 100_000)
  6092  		receivedMap = make(map[string]*nats.Msg)
  6093  	)
  6094  	wg.Add(1)
  6095  	go func() {
  6096  		tick := time.NewTicker(20 * time.Millisecond)
  6097  		for {
  6098  			select {
  6099  			case <-ctx.Done():
  6100  				wg.Done()
  6101  				return
  6102  			case <-tick.C:
  6103  				msgs, err := psub.Fetch(10, nats.MaxWait(200*time.Millisecond))
  6104  				if err != nil {
  6105  					continue
  6106  				}
  6107  				for _, msg := range msgs {
  6108  					received++
  6109  					receivedMap[msg.Subject] = msg
  6110  					if meta, _ := msg.Metadata(); meta.NumDelivered > 1 {
  6111  						t.Logf("GOT MSG: %s :: %+v :: %d", msg.Subject, meta, len(msg.Data))
  6112  					}
  6113  					msg.Ack()
  6114  				}
  6115  			}
  6116  		}
  6117  	}()
  6118  
  6119  	// Send 20_000 msgs at roughly 1 msg per msec
  6120  	shouldDrop := make(map[string]error)
  6121  	wg.Add(1)
  6122  	go func() {
  6123  		payload := []byte(strings.Repeat("A", 1024))
  6124  		tick := time.NewTicker(1 * time.Millisecond)
  6125  		for i := 1; i < 100_000; {
  6126  			select {
  6127  			case <-ctx.Done():
  6128  				wg.Done()
  6129  				return
  6130  			case <-tick.C:
  6131  				// This should run into 3MB quota and get errors right away
  6132  				// before the max msgs limit does.
  6133  				subject := fmt.Sprintf("messages.%d", i)
  6134  				_, err := js.Publish(subject, payload, nats.RetryAttempts(0))
  6135  				if err != nil {
  6136  					errCh <- err
  6137  				}
  6138  				i++
  6139  
  6140  				// Any message over this number should not be a success
  6141  				// since the stream should be full due to the quota.
  6142  				// Here we capture that the messages have failed to confirm.
  6143  				if err != nil && i > 1000 {
  6144  					shouldDrop[subject] = err
  6145  				}
  6146  			}
  6147  		}
  6148  	}()
  6149  
  6150  	// Collect enough errors to cause things to get out of sync.
  6151  	var errCount int
  6152  Setup:
  6153  	for {
  6154  		select {
  6155  		case err = <-errCh:
  6156  			errCount++
  6157  			if errCount >= 20_000 {
  6158  				// Stop both producing and consuming.
  6159  				cancel()
  6160  				break Setup
  6161  			}
  6162  		case <-time.After(5 * time.Second):
  6163  			t.Fatalf("Timed out waiting for limits error")
  6164  		}
  6165  	}
  6166  
  6167  	// Both goroutines should be exiting now..
  6168  	wg.Wait()
  6169  
  6170  	// Check messages that ought to have been dropped.
  6171  	for subject := range receivedMap {
  6172  		found, ok := shouldDrop[subject]
  6173  		if ok {
  6174  			t.Errorf("Should have dropped message published on %q since got error: %v", subject, found)
  6175  		}
  6176  	}
  6177  
  6178  	getStreamDetails := func(t *testing.T, srv *Server) *StreamDetail {
  6179  		t.Helper()
  6180  		jsz, err := srv.Jsz(&JSzOptions{Accounts: true, Streams: true, Consumer: true})
  6181  		require_NoError(t, err)
  6182  		if len(jsz.AccountDetails) > 0 && len(jsz.AccountDetails[0].Streams) > 0 {
  6183  			details := jsz.AccountDetails[0]
  6184  			stream := details.Streams[0]
  6185  			return &stream
  6186  		}
  6187  		t.Error("Could not find account details")
  6188  		return nil
  6189  	}
  6190  	checkState := func(t *testing.T) error {
  6191  		t.Helper()
  6192  
  6193  		leaderSrv := c.streamLeader("js", "LTEST")
  6194  		streamLeader := getStreamDetails(t, leaderSrv)
  6195  		// t.Logf("Stream Leader: %+v", streamLeader.State)
  6196  		errs := make([]error, 0)
  6197  		for _, srv := range c.servers {
  6198  			if srv == leaderSrv {
  6199  				// Skip self
  6200  				continue
  6201  			}
  6202  			stream := getStreamDetails(t, srv)
  6203  			if stream.State.Msgs != streamLeader.State.Msgs {
  6204  				err := fmt.Errorf("Leader %v has %d messages, Follower %v has %d messages",
  6205  					stream.Cluster.Leader, streamLeader.State.Msgs,
  6206  					srv.Name(), stream.State.Msgs,
  6207  				)
  6208  				errs = append(errs, err)
  6209  			}
  6210  		}
  6211  		if len(errs) > 0 {
  6212  			return errors.Join(errs...)
  6213  		}
  6214  		return nil
  6215  	}
  6216  
  6217  	// Confirm state of the leader.
  6218  	leaderSrv := c.streamLeader("js", "LTEST")
  6219  	streamLeader := getStreamDetails(t, leaderSrv)
  6220  	if streamLeader.State.Msgs != received {
  6221  		t.Errorf("Leader %v has %d messages stored but %d messages were received (delta: %d)",
  6222  			leaderSrv.Name(), streamLeader.State.Msgs, received, received-streamLeader.State.Msgs)
  6223  	}
  6224  	cinfo, err := psub.ConsumerInfo()
  6225  	require_NoError(t, err)
  6226  	if received != cinfo.Delivered.Consumer {
  6227  		t.Errorf("Unexpected consumer sequence. Got: %v, expected: %v",
  6228  			cinfo.Delivered.Consumer, received)
  6229  	}
  6230  
  6231  	// Check whether there was a drift among the leader and followers.
  6232  	var (
  6233  		lastErr  error
  6234  		attempts int
  6235  	)
  6236  Check:
  6237  	for range time.NewTicker(1 * time.Second).C {
  6238  		lastErr = checkState(t)
  6239  		if attempts > 5 {
  6240  			break Check
  6241  		}
  6242  		attempts++
  6243  	}
  6244  
  6245  	// Read the stream
  6246  	psub2, err := cjs.PullSubscribe("messages.*", "")
  6247  	require_NoError(t, err)
  6248  
  6249  Consume2:
  6250  	for {
  6251  		msgs, err := psub2.Fetch(100)
  6252  		if err != nil {
  6253  			continue
  6254  		}
  6255  		for _, msg := range msgs {
  6256  			msg.Ack()
  6257  
  6258  			meta, _ := msg.Metadata()
  6259  			if meta.NumPending == 0 {
  6260  				break Consume2
  6261  			}
  6262  		}
  6263  	}
  6264  
  6265  	cinfo2, err := psub2.ConsumerInfo()
  6266  	require_NoError(t, err)
  6267  
  6268  	a := cinfo.Delivered.Consumer
  6269  	b := cinfo2.Delivered.Consumer
  6270  	if a != b {
  6271  		t.Errorf("Consumers to same stream are at different sequences: %d vs %d", a, b)
  6272  	}
  6273  
  6274  	// Test is done but replicas were in sync so can stop testing at this point.
  6275  	if lastErr == nil {
  6276  		return
  6277  	}
  6278  
  6279  	// Now we will cause a few step downs while out of sync to get different results.
  6280  	t.Errorf("Replicas are out of sync:\n%v", lastErr)
  6281  
  6282  	stepDown := func() {
  6283  		_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "LTEST"), nil, time.Second)
  6284  	}
  6285  	// Check StreamInfo in this state then trigger a few step downs.
  6286  	var prevLeaderMsgs uint64
  6287  	leaderSrv = c.streamLeader("js", "LTEST")
  6288  	sinfo, err := js.StreamInfo("LTEST")
  6289  	prevLeaderMsgs = sinfo.State.Msgs
  6290  	for i := 0; i < 10; i++ {
  6291  		stepDown()
  6292  		time.Sleep(2 * time.Second)
  6293  
  6294  		leaderSrv = c.streamLeader("js", "LTEST")
  6295  		sinfo, err = js.StreamInfo("LTEST")
  6296  		if err != nil {
  6297  			t.Logf("Error: %v", err)
  6298  			continue
  6299  		}
  6300  		if leaderSrv != nil && sinfo != nil {
  6301  			t.Logf("When leader is %v, Messages: %d", leaderSrv.Name(), sinfo.State.Msgs)
  6302  
  6303  			// Leave the leader as the replica with less messages that was out of sync.
  6304  			if prevLeaderMsgs > sinfo.State.Msgs {
  6305  				break
  6306  			}
  6307  		}
  6308  	}
  6309  	t.Logf("Changed to use leader %v which has %d messages", leaderSrv.Name(), sinfo.State.Msgs)
  6310  
  6311  	// Read the stream again
  6312  	psub3, err := cjs.PullSubscribe("messages.*", "")
  6313  	require_NoError(t, err)
  6314  
  6315  Consume3:
  6316  	for {
  6317  		msgs, err := psub3.Fetch(100)
  6318  		if err != nil {
  6319  			continue
  6320  		}
  6321  		for _, msg := range msgs {
  6322  			msg.Ack()
  6323  
  6324  			meta, _ := msg.Metadata()
  6325  			if meta.NumPending == 0 {
  6326  				break Consume3
  6327  			}
  6328  		}
  6329  	}
  6330  
  6331  	cinfo3, err := psub3.ConsumerInfo()
  6332  	require_NoError(t, err)
  6333  
  6334  	// Compare against consumer that was created before resource limits error
  6335  	// with one created before the step down.
  6336  	a = cinfo.Delivered.Consumer
  6337  	b = cinfo2.Delivered.Consumer
  6338  	if a != b {
  6339  		t.Errorf("Consumers to same stream are at different sequences: %d vs %d", a, b)
  6340  	}
  6341  
  6342  	// Compare against consumer that was created before resource limits error
  6343  	// with one created AFTER the step down.
  6344  	a = cinfo.Delivered.Consumer
  6345  	b = cinfo3.Delivered.Consumer
  6346  	if a != b {
  6347  		t.Errorf("Consumers to same stream are at different sequences: %d vs %d", a, b)
  6348  	}
  6349  
  6350  	// Compare consumers created after the resource limits error.
  6351  	a = cinfo2.Delivered.Consumer
  6352  	b = cinfo3.Delivered.Consumer
  6353  	if a != b {
  6354  		t.Errorf("Consumers to same stream are at different sequences: %d vs %d", a, b)
  6355  	}
  6356  }
  6357  
  6358  func TestJetStreamClusterWorkQueueStreamDiscardNewDesync(t *testing.T) {
  6359  	t.Run("max msgs", func(t *testing.T) {
  6360  		testJetStreamClusterWorkQueueStreamDiscardNewDesync(t, &nats.StreamConfig{
  6361  			Name:      "WQTEST_MM",
  6362  			Subjects:  []string{"messages.*"},
  6363  			Replicas:  3,
  6364  			MaxAge:    10 * time.Minute,
  6365  			MaxMsgs:   100,
  6366  			Retention: nats.WorkQueuePolicy,
  6367  			Discard:   nats.DiscardNew,
  6368  		})
  6369  	})
  6370  	t.Run("max bytes", func(t *testing.T) {
  6371  		testJetStreamClusterWorkQueueStreamDiscardNewDesync(t, &nats.StreamConfig{
  6372  			Name:      "WQTEST_MB",
  6373  			Subjects:  []string{"messages.*"},
  6374  			Replicas:  3,
  6375  			MaxAge:    10 * time.Minute,
  6376  			MaxBytes:  1 * 1024 * 1024,
  6377  			Retention: nats.WorkQueuePolicy,
  6378  			Discard:   nats.DiscardNew,
  6379  		})
  6380  	})
  6381  }
  6382  
  6383  func testJetStreamClusterWorkQueueStreamDiscardNewDesync(t *testing.T, sc *nats.StreamConfig) {
  6384  	conf := `
  6385  	listen: 127.0.0.1:-1
  6386  	server_name: %s
  6387  	jetstream: {
  6388  		store_dir: '%s',
  6389  	}
  6390  	cluster {
  6391  		name: %s
  6392  		listen: 127.0.0.1:%d
  6393  		routes = [%s]
  6394  	}
  6395          system_account: sys
  6396          no_auth_user: js
  6397  	accounts {
  6398  	  sys {
  6399  	    users = [
  6400  	      { user: sys, pass: sys }
  6401  	    ]
  6402  	  }
  6403  	  js {
  6404  	    jetstream = enabled
  6405  	    users = [
  6406  	      { user: js, pass: js }
  6407  	    ]
  6408  	  }
  6409  	}`
  6410  	c := createJetStreamClusterWithTemplate(t, conf, sc.Name, 3)
  6411  	defer c.shutdown()
  6412  
  6413  	nc, js := jsClientConnect(t, c.randomServer())
  6414  	defer nc.Close()
  6415  
  6416  	cnc, cjs := jsClientConnect(t, c.randomServer())
  6417  	defer cnc.Close()
  6418  
  6419  	_, err := js.AddStream(sc)
  6420  	require_NoError(t, err)
  6421  
  6422  	ctx, cancel := context.WithCancel(context.Background())
  6423  	defer cancel()
  6424  
  6425  	psub, err := cjs.PullSubscribe("messages.*", "consumer")
  6426  	require_NoError(t, err)
  6427  
  6428  	stepDown := func() {
  6429  		_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, sc.Name), nil, time.Second)
  6430  	}
  6431  
  6432  	// Messages will be produced and consumed in parallel, then once there are
  6433  	// enough errors a leader election will be triggered.
  6434  	var (
  6435  		wg          sync.WaitGroup
  6436  		received    uint64
  6437  		errCh       = make(chan error, 100_000)
  6438  		receivedMap = make(map[string]*nats.Msg)
  6439  	)
  6440  	wg.Add(1)
  6441  	go func() {
  6442  		tick := time.NewTicker(20 * time.Millisecond)
  6443  		for {
  6444  			select {
  6445  			case <-ctx.Done():
  6446  				wg.Done()
  6447  				return
  6448  			case <-tick.C:
  6449  				msgs, err := psub.Fetch(10, nats.MaxWait(200*time.Millisecond))
  6450  				if err != nil {
  6451  					// The consumer will continue to timeout here eventually.
  6452  					continue
  6453  				}
  6454  				for _, msg := range msgs {
  6455  					received++
  6456  					receivedMap[msg.Subject] = msg
  6457  					msg.Ack()
  6458  				}
  6459  			}
  6460  		}
  6461  	}()
  6462  
  6463  	shouldDrop := make(map[string]error)
  6464  	wg.Add(1)
  6465  	go func() {
  6466  		payload := []byte(strings.Repeat("A", 1024))
  6467  		tick := time.NewTicker(1 * time.Millisecond)
  6468  		for i := 1; ; i++ {
  6469  			select {
  6470  			case <-ctx.Done():
  6471  				wg.Done()
  6472  				return
  6473  			case <-tick.C:
  6474  				subject := fmt.Sprintf("messages.%d", i)
  6475  				_, err := js.Publish(subject, payload, nats.RetryAttempts(0))
  6476  				if err != nil {
  6477  					errCh <- err
  6478  				}
  6479  				// Capture the messages that have failed.
  6480  				if err != nil {
  6481  					shouldDrop[subject] = err
  6482  				}
  6483  			}
  6484  		}
  6485  	}()
  6486  
  6487  	// Collect enough errors to cause things to get out of sync.
  6488  	var errCount int
  6489  Setup:
  6490  	for {
  6491  		select {
  6492  		case err = <-errCh:
  6493  			errCount++
  6494  			if errCount%500 == 0 {
  6495  				stepDown()
  6496  			} else if errCount >= 2000 {
  6497  				// Stop both producing and consuming.
  6498  				cancel()
  6499  				break Setup
  6500  			}
  6501  		case <-time.After(5 * time.Second):
  6502  			// Unblock the test and continue.
  6503  			cancel()
  6504  			break Setup
  6505  		}
  6506  	}
  6507  
  6508  	// Both goroutines should be exiting now..
  6509  	wg.Wait()
  6510  
  6511  	// Let acks propagate for stream checks.
  6512  	time.Sleep(250 * time.Millisecond)
  6513  
  6514  	// Check messages that ought to have been dropped.
  6515  	for subject := range receivedMap {
  6516  		found, ok := shouldDrop[subject]
  6517  		if ok {
  6518  			t.Errorf("Should have dropped message published on %q since got error: %v", subject, found)
  6519  		}
  6520  	}
  6521  }
  6522  
  6523  // https://github.com/nats-io/nats-server/issues/5071
  6524  func TestJetStreamClusterStreamPlacementDistribution(t *testing.T) {
  6525  	c := createJetStreamClusterExplicit(t, "R3S", 5)
  6526  	defer c.shutdown()
  6527  
  6528  	s := c.randomNonLeader()
  6529  	nc, js := jsClientConnect(t, s)
  6530  	defer nc.Close()
  6531  
  6532  	for i := 1; i <= 10; i++ {
  6533  		_, err := js.AddStream(&nats.StreamConfig{
  6534  			Name:     fmt.Sprintf("TEST:%d", i),
  6535  			Subjects: []string{fmt.Sprintf("foo.%d.*", i)},
  6536  			Replicas: 3,
  6537  		})
  6538  		require_NoError(t, err)
  6539  	}
  6540  
  6541  	// 10 streams, 3 replicas div 5 servers.
  6542  	expectedStreams := 10 * 3 / 5
  6543  	for _, s := range c.servers {
  6544  		jsz, err := s.Jsz(nil)
  6545  		require_NoError(t, err)
  6546  		require_Equal(t, jsz.Streams, expectedStreams)
  6547  	}
  6548  }
  6549  
  6550  func TestJetStreamClusterConsumerPauseViaConfig(t *testing.T) {
  6551  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6552  	defer c.shutdown()
  6553  
  6554  	nc, js := jsClientConnect(t, c.randomServer())
  6555  	defer nc.Close()
  6556  
  6557  	_, err := js.AddStream(&nats.StreamConfig{
  6558  		Name:     "TEST",
  6559  		Subjects: []string{"foo"},
  6560  		Replicas: 3,
  6561  	})
  6562  	require_NoError(t, err)
  6563  
  6564  	jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
  6565  		Name:     "my_consumer",
  6566  		Replicas: 3,
  6567  	})
  6568  
  6569  	sub, err := js.PullSubscribe("foo", "", nats.Bind("TEST", "my_consumer"))
  6570  	require_NoError(t, err)
  6571  
  6572  	stepdown := func() {
  6573  		t.Helper()
  6574  		_, err := nc.Request(fmt.Sprintf(JSApiConsumerLeaderStepDownT, "TEST", "my_consumer"), nil, time.Second)
  6575  		require_NoError(t, err)
  6576  		c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
  6577  	}
  6578  
  6579  	publish := func(wait time.Duration) {
  6580  		t.Helper()
  6581  		for i := 0; i < 5; i++ {
  6582  			_, err = js.Publish("foo", []byte("OK"))
  6583  			require_NoError(t, err)
  6584  		}
  6585  		msgs, err := sub.Fetch(5, nats.MaxWait(wait))
  6586  		require_NoError(t, err)
  6587  		require_Equal(t, len(msgs), 5)
  6588  	}
  6589  
  6590  	// This should be fast as there's no deadline.
  6591  	publish(time.Second)
  6592  
  6593  	// Now we're going to set the deadline.
  6594  	deadline := jsTestPause_PauseConsumer(t, nc, "TEST", "my_consumer", time.Now().Add(time.Second*3))
  6595  	c.waitOnAllCurrent()
  6596  
  6597  	// It will now take longer than 3 seconds.
  6598  	publish(time.Second * 5)
  6599  	require_True(t, time.Now().After(deadline))
  6600  
  6601  	// The next set of publishes after the deadline should now be fast.
  6602  	publish(time.Second)
  6603  
  6604  	// We'll kick the leader, but since we're after the deadline, this
  6605  	// should still be fast.
  6606  	stepdown()
  6607  	publish(time.Second)
  6608  
  6609  	// Now we're going to do an update and then immediately kick the
  6610  	// leader. The pause should still be in effect afterwards.
  6611  	deadline = jsTestPause_PauseConsumer(t, nc, "TEST", "my_consumer", time.Now().Add(time.Second*3))
  6612  	c.waitOnAllCurrent()
  6613  	publish(time.Second * 5)
  6614  	require_True(t, time.Now().After(deadline))
  6615  
  6616  	// The next set of publishes after the deadline should now be fast.
  6617  	publish(time.Second)
  6618  }
  6619  
  6620  func TestJetStreamClusterConsumerPauseViaEndpoint(t *testing.T) {
  6621  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6622  	defer c.shutdown()
  6623  
  6624  	nc, js := jsClientConnect(t, c.randomServer())
  6625  	defer nc.Close()
  6626  
  6627  	_, err := js.AddStream(&nats.StreamConfig{
  6628  		Name:     "TEST",
  6629  		Subjects: []string{"push", "pull"},
  6630  		Replicas: 3,
  6631  	})
  6632  	require_NoError(t, err)
  6633  
  6634  	t.Run("PullConsumer", func(t *testing.T) {
  6635  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
  6636  			Name: "pull_consumer",
  6637  		})
  6638  		require_NoError(t, err)
  6639  
  6640  		sub, err := js.PullSubscribe("pull", "", nats.Bind("TEST", "pull_consumer"))
  6641  		require_NoError(t, err)
  6642  
  6643  		// This should succeed as there's no pause, so it definitely
  6644  		// shouldn't take more than a second.
  6645  		for i := 0; i < 10; i++ {
  6646  			_, err = js.Publish("pull", []byte("OK"))
  6647  			require_NoError(t, err)
  6648  		}
  6649  		msgs, err := sub.Fetch(10, nats.MaxWait(time.Second))
  6650  		require_NoError(t, err)
  6651  		require_Equal(t, len(msgs), 10)
  6652  
  6653  		// Now we'll pause the consumer for 3 seconds.
  6654  		deadline := time.Now().Add(time.Second * 3)
  6655  		require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "pull_consumer", deadline).Equal(deadline))
  6656  		c.waitOnAllCurrent()
  6657  
  6658  		// This should fail as we'll wait for only half of the deadline.
  6659  		for i := 0; i < 10; i++ {
  6660  			_, err = js.Publish("pull", []byte("OK"))
  6661  			require_NoError(t, err)
  6662  		}
  6663  		_, err = sub.Fetch(10, nats.MaxWait(time.Until(deadline)/2))
  6664  		require_Error(t, err, nats.ErrTimeout)
  6665  
  6666  		// This should succeed after a short wait, and when we're done,
  6667  		// we should be after the deadline.
  6668  		msgs, err = sub.Fetch(10)
  6669  		require_NoError(t, err)
  6670  		require_Equal(t, len(msgs), 10)
  6671  		require_True(t, time.Now().After(deadline))
  6672  
  6673  		// This should succeed as there's no pause, so it definitely
  6674  		// shouldn't take more than a second.
  6675  		for i := 0; i < 10; i++ {
  6676  			_, err = js.Publish("pull", []byte("OK"))
  6677  			require_NoError(t, err)
  6678  		}
  6679  		msgs, err = sub.Fetch(10, nats.MaxWait(time.Second))
  6680  		require_NoError(t, err)
  6681  		require_Equal(t, len(msgs), 10)
  6682  
  6683  		require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "pull_consumer", time.Time{}).Equal(time.Time{}))
  6684  		c.waitOnAllCurrent()
  6685  
  6686  		// This should succeed as there's no pause, so it definitely
  6687  		// shouldn't take more than a second.
  6688  		for i := 0; i < 10; i++ {
  6689  			_, err = js.Publish("pull", []byte("OK"))
  6690  			require_NoError(t, err)
  6691  		}
  6692  		msgs, err = sub.Fetch(10, nats.MaxWait(time.Second))
  6693  		require_NoError(t, err)
  6694  		require_Equal(t, len(msgs), 10)
  6695  	})
  6696  
  6697  	t.Run("PushConsumer", func(t *testing.T) {
  6698  		ch := make(chan *nats.Msg, 100)
  6699  		_, err = js.ChanSubscribe("push", ch, nats.BindStream("TEST"), nats.ConsumerName("push_consumer"))
  6700  		require_NoError(t, err)
  6701  
  6702  		// This should succeed as there's no pause, so it definitely
  6703  		// shouldn't take more than a second.
  6704  		for i := 0; i < 10; i++ {
  6705  			_, err = js.Publish("push", []byte("OK"))
  6706  			require_NoError(t, err)
  6707  		}
  6708  		for i := 0; i < 10; i++ {
  6709  			msg := require_ChanRead(t, ch, time.Second)
  6710  			require_NotEqual(t, msg, nil)
  6711  		}
  6712  
  6713  		// Now we'll pause the consumer for 3 seconds.
  6714  		deadline := time.Now().Add(time.Second * 3)
  6715  		require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "push_consumer", deadline).Equal(deadline))
  6716  		c.waitOnAllCurrent()
  6717  
  6718  		// This should succeed after a short wait, and when we're done,
  6719  		// we should be after the deadline.
  6720  		for i := 0; i < 10; i++ {
  6721  			_, err = js.Publish("push", []byte("OK"))
  6722  			require_NoError(t, err)
  6723  		}
  6724  		for i := 0; i < 10; i++ {
  6725  			msg := require_ChanRead(t, ch, time.Second*5)
  6726  			require_NotEqual(t, msg, nil)
  6727  			require_True(t, time.Now().After(deadline))
  6728  		}
  6729  
  6730  		// This should succeed as there's no pause, so it definitely
  6731  		// shouldn't take more than a second.
  6732  		for i := 0; i < 10; i++ {
  6733  			_, err = js.Publish("push", []byte("OK"))
  6734  			require_NoError(t, err)
  6735  		}
  6736  		for i := 0; i < 10; i++ {
  6737  			msg := require_ChanRead(t, ch, time.Second)
  6738  			require_NotEqual(t, msg, nil)
  6739  		}
  6740  
  6741  		require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "push_consumer", time.Time{}).Equal(time.Time{}))
  6742  		c.waitOnAllCurrent()
  6743  
  6744  		// This should succeed as there's no pause, so it definitely
  6745  		// shouldn't take more than a second.
  6746  		for i := 0; i < 10; i++ {
  6747  			_, err = js.Publish("push", []byte("OK"))
  6748  			require_NoError(t, err)
  6749  		}
  6750  		for i := 0; i < 10; i++ {
  6751  			msg := require_ChanRead(t, ch, time.Second)
  6752  			require_NotEqual(t, msg, nil)
  6753  		}
  6754  	})
  6755  }
  6756  
  6757  func TestJetStreamClusterConsumerPauseTimerFollowsLeader(t *testing.T) {
  6758  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6759  	defer c.shutdown()
  6760  
  6761  	nc, js := jsClientConnect(t, c.randomServer())
  6762  	defer nc.Close()
  6763  
  6764  	_, err := js.AddStream(&nats.StreamConfig{
  6765  		Name:     "TEST",
  6766  		Subjects: []string{"foo"},
  6767  		Replicas: 3,
  6768  	})
  6769  	require_NoError(t, err)
  6770  
  6771  	deadline := time.Now().Add(time.Hour)
  6772  	jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
  6773  		Name:       "my_consumer",
  6774  		PauseUntil: &deadline,
  6775  		Replicas:   3,
  6776  	})
  6777  
  6778  	for i := 0; i < 10; i++ {
  6779  		c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
  6780  		c.waitOnAllCurrent()
  6781  
  6782  		for _, s := range c.servers {
  6783  			stream, err := s.gacc.lookupStream("TEST")
  6784  			require_NoError(t, err)
  6785  
  6786  			consumer := stream.lookupConsumer("my_consumer")
  6787  			require_NotEqual(t, consumer, nil)
  6788  
  6789  			isLeader := s.JetStreamIsConsumerLeader(globalAccountName, "TEST", "my_consumer")
  6790  
  6791  			consumer.mu.RLock()
  6792  			hasTimer := consumer.uptmr != nil
  6793  			consumer.mu.RUnlock()
  6794  
  6795  			require_Equal(t, isLeader, hasTimer)
  6796  		}
  6797  
  6798  		_, err = nc.Request(fmt.Sprintf(JSApiConsumerLeaderStepDownT, "TEST", "my_consumer"), nil, time.Second)
  6799  		require_NoError(t, err)
  6800  	}
  6801  }
  6802  
  6803  func TestJetStreamClusterConsumerPauseHeartbeats(t *testing.T) {
  6804  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6805  	defer c.shutdown()
  6806  
  6807  	nc, js := jsClientConnect(t, c.randomServer())
  6808  	defer nc.Close()
  6809  
  6810  	_, err := js.AddStream(&nats.StreamConfig{
  6811  		Name:     "TEST",
  6812  		Subjects: []string{"foo"},
  6813  		Replicas: 3,
  6814  	})
  6815  	require_NoError(t, err)
  6816  
  6817  	deadline := time.Now().Add(time.Hour)
  6818  	dsubj := "deliver_subj"
  6819  
  6820  	ci := jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
  6821  		Name:           "my_consumer",
  6822  		PauseUntil:     &deadline,
  6823  		Heartbeat:      time.Millisecond * 100,
  6824  		DeliverSubject: dsubj,
  6825  	})
  6826  	require_True(t, ci.Config.PauseUntil.Equal(deadline))
  6827  
  6828  	ch := make(chan *nats.Msg, 10)
  6829  	_, err = nc.ChanSubscribe(dsubj, ch)
  6830  	require_NoError(t, err)
  6831  
  6832  	for i := 0; i < 20; i++ {
  6833  		msg := require_ChanRead(t, ch, time.Millisecond*200)
  6834  		require_Equal(t, msg.Header.Get("Status"), "100")
  6835  		require_Equal(t, msg.Header.Get("Description"), "Idle Heartbeat")
  6836  	}
  6837  }
  6838  
  6839  func TestJetStreamClusterConsumerPauseAdvisories(t *testing.T) {
  6840  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6841  	defer c.shutdown()
  6842  
  6843  	nc, js := jsClientConnect(t, c.randomServer())
  6844  	defer nc.Close()
  6845  
  6846  	checkAdvisory := func(msg *nats.Msg, shouldBePaused bool, deadline time.Time) {
  6847  		t.Helper()
  6848  		var advisory JSConsumerPauseAdvisory
  6849  		require_NoError(t, json.Unmarshal(msg.Data, &advisory))
  6850  		require_Equal(t, advisory.Stream, "TEST")
  6851  		require_Equal(t, advisory.Consumer, "my_consumer")
  6852  		require_Equal(t, advisory.Paused, shouldBePaused)
  6853  		require_True(t, advisory.PauseUntil.Equal(deadline))
  6854  	}
  6855  
  6856  	_, err := js.AddStream(&nats.StreamConfig{
  6857  		Name:     "TEST",
  6858  		Subjects: []string{"foo"},
  6859  		Replicas: 3,
  6860  	})
  6861  	require_NoError(t, err)
  6862  
  6863  	ch := make(chan *nats.Msg, 10)
  6864  	_, err = nc.ChanSubscribe(JSAdvisoryConsumerPausePre+".TEST.my_consumer", ch)
  6865  	require_NoError(t, err)
  6866  
  6867  	deadline := time.Now().Add(time.Second)
  6868  	jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
  6869  		Name:       "my_consumer",
  6870  		PauseUntil: &deadline,
  6871  		Replicas:   3,
  6872  	})
  6873  
  6874  	// First advisory should tell us that the consumer was paused
  6875  	// on creation.
  6876  	msg := require_ChanRead(t, ch, time.Second*2)
  6877  	checkAdvisory(msg, true, deadline)
  6878  	require_Len(t, len(ch), 0) // Should only receive one advisory.
  6879  
  6880  	// The second one for the unpause.
  6881  	msg = require_ChanRead(t, ch, time.Second*2)
  6882  	checkAdvisory(msg, false, deadline)
  6883  	require_Len(t, len(ch), 0) // Should only receive one advisory.
  6884  
  6885  	// Now we'll pause the consumer for a second using the API.
  6886  	deadline = time.Now().Add(time.Second)
  6887  	require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "my_consumer", deadline).Equal(deadline))
  6888  
  6889  	// Third advisory should tell us about the pause via the API.
  6890  	msg = require_ChanRead(t, ch, time.Second*2)
  6891  	checkAdvisory(msg, true, deadline)
  6892  	require_Len(t, len(ch), 0) // Should only receive one advisory.
  6893  
  6894  	// Finally that should unpause.
  6895  	msg = require_ChanRead(t, ch, time.Second*2)
  6896  	checkAdvisory(msg, false, deadline)
  6897  	require_Len(t, len(ch), 0) // Should only receive one advisory.
  6898  
  6899  	// Now we're going to set the deadline into the future so we can
  6900  	// see what happens when we kick leaders or restart.
  6901  	deadline = time.Now().Add(time.Hour)
  6902  	require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "my_consumer", deadline).Equal(deadline))
  6903  
  6904  	// Setting the deadline should have generated an advisory.
  6905  	msg = require_ChanRead(t, ch, time.Second)
  6906  	checkAdvisory(msg, true, deadline)
  6907  	require_Len(t, len(ch), 0) // Should only receive one advisory.
  6908  
  6909  	// Try to kick the consumer leader.
  6910  	srv := c.consumerLeader(globalAccountName, "TEST", "my_consumer")
  6911  	srv.JetStreamStepdownConsumer(globalAccountName, "TEST", "my_consumer")
  6912  	c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
  6913  
  6914  	// This shouldn't have generated an advisory.
  6915  	require_NoChanRead(t, ch, time.Second)
  6916  }
  6917  
  6918  func TestJetStreamClusterConsumerPauseSurvivesRestart(t *testing.T) {
  6919  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  6920  	defer c.shutdown()
  6921  
  6922  	nc, js := jsClientConnect(t, c.randomServer())
  6923  	defer nc.Close()
  6924  
  6925  	checkTimer := func(s *Server) {
  6926  		stream, err := s.gacc.lookupStream("TEST")
  6927  		require_NoError(t, err)
  6928  
  6929  		consumer := stream.lookupConsumer("my_consumer")
  6930  		require_NotEqual(t, consumer, nil)
  6931  
  6932  		consumer.mu.RLock()
  6933  		timer := consumer.uptmr
  6934  		consumer.mu.RUnlock()
  6935  		require_True(t, timer != nil)
  6936  	}
  6937  
  6938  	_, err := js.AddStream(&nats.StreamConfig{
  6939  		Name:     "TEST",
  6940  		Subjects: []string{"foo"},
  6941  		Replicas: 3,
  6942  	})
  6943  	require_NoError(t, err)
  6944  
  6945  	deadline := time.Now().Add(time.Hour)
  6946  	jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
  6947  		Name:       "my_consumer",
  6948  		PauseUntil: &deadline,
  6949  		Replicas:   3,
  6950  	})
  6951  
  6952  	// First try with just restarting the consumer leader.
  6953  	srv := c.consumerLeader(globalAccountName, "TEST", "my_consumer")
  6954  	srv.Shutdown()
  6955  	c.restartServer(srv)
  6956  	c.waitOnAllCurrent()
  6957  	c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
  6958  	leader := c.consumerLeader(globalAccountName, "TEST", "my_consumer")
  6959  	require_True(t, leader != nil)
  6960  	checkTimer(leader)
  6961  
  6962  	// Then try restarting the entire cluster.
  6963  	c.stopAll()
  6964  	c.restartAllSamePorts()
  6965  	c.waitOnAllCurrent()
  6966  	c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
  6967  	leader = c.consumerLeader(globalAccountName, "TEST", "my_consumer")
  6968  	require_True(t, leader != nil)
  6969  	checkTimer(leader)
  6970  }