github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/jetstream_cluster_4_test.go (about)

     1  // Copyright 2022-2024 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  //go:build !skip_js_tests && !skip_js_cluster_tests_4
    15  // +build !skip_js_tests,!skip_js_cluster_tests_4
    16  
    17  package server
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"math/rand"
    25  	"os"
    26  	"path/filepath"
    27  	"strings"
    28  	"sync"
    29  	"testing"
    30  	"time"
    31  
    32  	"github.com/nats-io/nats.go"
    33  	"github.com/nats-io/nuid"
    34  )
    35  
    36  func TestJetStreamClusterWorkQueueStreamDiscardNewDesync(t *testing.T) {
    37  	t.Run("max msgs", func(t *testing.T) {
    38  		testJetStreamClusterWorkQueueStreamDiscardNewDesync(t, &nats.StreamConfig{
    39  			Name:      "WQTEST_MM",
    40  			Subjects:  []string{"messages.*"},
    41  			Replicas:  3,
    42  			MaxAge:    10 * time.Minute,
    43  			MaxMsgs:   100,
    44  			Retention: nats.WorkQueuePolicy,
    45  			Discard:   nats.DiscardNew,
    46  		})
    47  	})
    48  	t.Run("max bytes", func(t *testing.T) {
    49  		testJetStreamClusterWorkQueueStreamDiscardNewDesync(t, &nats.StreamConfig{
    50  			Name:      "WQTEST_MB",
    51  			Subjects:  []string{"messages.*"},
    52  			Replicas:  3,
    53  			MaxAge:    10 * time.Minute,
    54  			MaxBytes:  1 * 1024 * 1024,
    55  			Retention: nats.WorkQueuePolicy,
    56  			Discard:   nats.DiscardNew,
    57  		})
    58  	})
    59  }
    60  
    61  func testJetStreamClusterWorkQueueStreamDiscardNewDesync(t *testing.T, sc *nats.StreamConfig) {
    62  	conf := `
    63  	listen: 127.0.0.1:-1
    64  	server_name: %s
    65  	jetstream: {
    66  		store_dir: '%s',
    67  	}
    68  	cluster {
    69  		name: %s
    70  		listen: 127.0.0.1:%d
    71  		routes = [%s]
    72  	}
    73          system_account: sys
    74          no_auth_user: js
    75  	accounts {
    76  	  sys {
    77  	    users = [
    78  	      { user: sys, pass: sys }
    79  	    ]
    80  	  }
    81  	  js {
    82  	    jetstream = enabled
    83  	    users = [
    84  	      { user: js, pass: js }
    85  	    ]
    86  	  }
    87  	}`
    88  	c := createJetStreamClusterWithTemplate(t, conf, sc.Name, 3)
    89  	defer c.shutdown()
    90  
    91  	nc, js := jsClientConnect(t, c.randomServer())
    92  	defer nc.Close()
    93  
    94  	cnc, cjs := jsClientConnect(t, c.randomServer())
    95  	defer cnc.Close()
    96  
    97  	_, err := js.AddStream(sc)
    98  	require_NoError(t, err)
    99  
   100  	ctx, cancel := context.WithCancel(context.Background())
   101  	defer cancel()
   102  
   103  	psub, err := cjs.PullSubscribe("messages.*", "consumer")
   104  	require_NoError(t, err)
   105  
   106  	stepDown := func() {
   107  		_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, sc.Name), nil, time.Second)
   108  	}
   109  
   110  	// Messages will be produced and consumed in parallel, then once there are
   111  	// enough errors a leader election will be triggered.
   112  	var (
   113  		wg          sync.WaitGroup
   114  		received    uint64
   115  		errCh       = make(chan error, 100_000)
   116  		receivedMap = make(map[string]*nats.Msg)
   117  	)
   118  	wg.Add(1)
   119  	go func() {
   120  		tick := time.NewTicker(20 * time.Millisecond)
   121  		for {
   122  			select {
   123  			case <-ctx.Done():
   124  				wg.Done()
   125  				return
   126  			case <-tick.C:
   127  				msgs, err := psub.Fetch(10, nats.MaxWait(200*time.Millisecond))
   128  				if err != nil {
   129  					// The consumer will continue to timeout here eventually.
   130  					continue
   131  				}
   132  				for _, msg := range msgs {
   133  					received++
   134  					receivedMap[msg.Subject] = msg
   135  					msg.Ack()
   136  				}
   137  			}
   138  		}
   139  	}()
   140  
   141  	shouldDrop := make(map[string]error)
   142  	wg.Add(1)
   143  	go func() {
   144  		payload := []byte(strings.Repeat("A", 1024))
   145  		tick := time.NewTicker(1 * time.Millisecond)
   146  		for i := 1; ; i++ {
   147  			select {
   148  			case <-ctx.Done():
   149  				wg.Done()
   150  				return
   151  			case <-tick.C:
   152  				subject := fmt.Sprintf("messages.%d", i)
   153  				_, err := js.Publish(subject, payload, nats.RetryAttempts(0))
   154  				if err != nil {
   155  					errCh <- err
   156  				}
   157  				// Capture the messages that have failed.
   158  				if err != nil {
   159  					shouldDrop[subject] = err
   160  				}
   161  			}
   162  		}
   163  	}()
   164  
   165  	// Collect enough errors to cause things to get out of sync.
   166  	var errCount int
   167  Setup:
   168  	for {
   169  		select {
   170  		case err = <-errCh:
   171  			errCount++
   172  			if errCount%500 == 0 {
   173  				stepDown()
   174  			} else if errCount >= 2000 {
   175  				// Stop both producing and consuming.
   176  				cancel()
   177  				break Setup
   178  			}
   179  		case <-time.After(5 * time.Second):
   180  			// Unblock the test and continue.
   181  			cancel()
   182  			break Setup
   183  		}
   184  	}
   185  
   186  	// Both goroutines should be exiting now..
   187  	wg.Wait()
   188  
   189  	// Let acks propagate for stream checks.
   190  	time.Sleep(250 * time.Millisecond)
   191  
   192  	// Check messages that ought to have been dropped.
   193  	for subject := range receivedMap {
   194  		found, ok := shouldDrop[subject]
   195  		if ok {
   196  			t.Errorf("Should have dropped message published on %q since got error: %v", subject, found)
   197  		}
   198  	}
   199  }
   200  
   201  // https://github.com/nats-io/nats-server/issues/5071
   202  func TestJetStreamClusterStreamPlacementDistribution(t *testing.T) {
   203  	c := createJetStreamClusterExplicit(t, "R3S", 5)
   204  	defer c.shutdown()
   205  
   206  	s := c.randomNonLeader()
   207  	nc, js := jsClientConnect(t, s)
   208  	defer nc.Close()
   209  
   210  	for i := 1; i <= 10; i++ {
   211  		_, err := js.AddStream(&nats.StreamConfig{
   212  			Name:     fmt.Sprintf("TEST:%d", i),
   213  			Subjects: []string{fmt.Sprintf("foo.%d.*", i)},
   214  			Replicas: 3,
   215  		})
   216  		require_NoError(t, err)
   217  	}
   218  
   219  	// 10 streams, 3 replicas div 5 servers.
   220  	expectedStreams := 10 * 3 / 5
   221  	for _, s := range c.servers {
   222  		jsz, err := s.Jsz(nil)
   223  		require_NoError(t, err)
   224  		require_Equal(t, jsz.Streams, expectedStreams)
   225  	}
   226  }
   227  
   228  func TestJetStreamClusterSourceWorkingQueueWithLimit(t *testing.T) {
   229  	c := createJetStreamClusterExplicit(t, "WQ3", 3)
   230  	defer c.shutdown()
   231  
   232  	nc, js := jsClientConnect(t, c.randomServer())
   233  	defer nc.Close()
   234  
   235  	_, err := js.AddStream(&nats.StreamConfig{Name: "test", Subjects: []string{"test"}, Replicas: 3})
   236  	require_NoError(t, err)
   237  
   238  	_, err = js.AddStream(&nats.StreamConfig{Name: "wq", MaxMsgs: 100, Discard: nats.DiscardNew, Retention: nats.WorkQueuePolicy,
   239  		Sources: []*nats.StreamSource{{Name: "test"}}, Replicas: 3})
   240  	require_NoError(t, err)
   241  
   242  	sendBatch := func(subject string, n int) {
   243  		for i := 0; i < n; i++ {
   244  			_, err = js.Publish(subject, []byte("OK"))
   245  			require_NoError(t, err)
   246  		}
   247  	}
   248  	// Populate each one.
   249  	sendBatch("test", 300)
   250  
   251  	checkFor(t, 3*time.Second, 250*time.Millisecond, func() error {
   252  		si, err := js.StreamInfo("wq")
   253  		require_NoError(t, err)
   254  		if si.State.Msgs != 100 {
   255  			return fmt.Errorf("Expected 100 msgs, got state: %+v", si.State)
   256  		}
   257  		return nil
   258  	})
   259  
   260  	_, err = js.AddConsumer("wq", &nats.ConsumerConfig{Durable: "wqc", FilterSubject: "test", AckPolicy: nats.AckExplicitPolicy})
   261  	require_NoError(t, err)
   262  
   263  	ss, err := js.PullSubscribe("test", "wqc", nats.Bind("wq", "wqc"))
   264  	require_NoError(t, err)
   265  	// we must have at least one message on the transformed subject name (ie no timeout)
   266  	f := func(done chan bool) {
   267  		for i := 0; i < 300; i++ {
   268  			m, err := ss.Fetch(1, nats.MaxWait(3*time.Second))
   269  			require_NoError(t, err)
   270  			time.Sleep(11 * time.Millisecond)
   271  			err = m[0].Ack()
   272  			require_NoError(t, err)
   273  		}
   274  		done <- true
   275  	}
   276  
   277  	var doneChan = make(chan bool)
   278  	go f(doneChan)
   279  
   280  	checkFor(t, 6*time.Second, 100*time.Millisecond, func() error {
   281  		si, err := js.StreamInfo("wq")
   282  		require_NoError(t, err)
   283  		if si.State.Msgs > 0 && si.State.Msgs <= 100 {
   284  			return fmt.Errorf("Expected 0 msgs, got: %d", si.State.Msgs)
   285  		} else if si.State.Msgs > 100 {
   286  			t.Fatalf("Got more than our 100 message limit: %+v", si.State)
   287  		}
   288  		return nil
   289  	})
   290  
   291  	select {
   292  	case <-doneChan:
   293  		ss.Drain()
   294  	case <-time.After(5 * time.Second):
   295  		t.Fatalf("Did not receive completion signal")
   296  	}
   297  }
   298  
   299  func TestJetStreamClusterConsumerPauseViaConfig(t *testing.T) {
   300  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   301  	defer c.shutdown()
   302  
   303  	nc, js := jsClientConnect(t, c.randomServer())
   304  	defer nc.Close()
   305  
   306  	_, err := js.AddStream(&nats.StreamConfig{
   307  		Name:     "TEST",
   308  		Subjects: []string{"foo"},
   309  		Replicas: 3,
   310  	})
   311  	require_NoError(t, err)
   312  
   313  	jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
   314  		Name:     "my_consumer",
   315  		Replicas: 3,
   316  	})
   317  
   318  	sub, err := js.PullSubscribe("foo", "", nats.Bind("TEST", "my_consumer"))
   319  	require_NoError(t, err)
   320  
   321  	stepdown := func() {
   322  		t.Helper()
   323  		_, err := nc.Request(fmt.Sprintf(JSApiConsumerLeaderStepDownT, "TEST", "my_consumer"), nil, time.Second)
   324  		require_NoError(t, err)
   325  		c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
   326  	}
   327  
   328  	publish := func(wait time.Duration) {
   329  		t.Helper()
   330  		for i := 0; i < 5; i++ {
   331  			_, err = js.Publish("foo", []byte("OK"))
   332  			require_NoError(t, err)
   333  		}
   334  		msgs, err := sub.Fetch(5, nats.MaxWait(wait))
   335  		require_NoError(t, err)
   336  		require_Equal(t, len(msgs), 5)
   337  	}
   338  
   339  	// This should be fast as there's no deadline.
   340  	publish(time.Second)
   341  
   342  	// Now we're going to set the deadline.
   343  	deadline := jsTestPause_PauseConsumer(t, nc, "TEST", "my_consumer", time.Now().Add(time.Second*3))
   344  	c.waitOnAllCurrent()
   345  
   346  	// It will now take longer than 3 seconds.
   347  	publish(time.Second * 5)
   348  	require_True(t, time.Now().After(deadline))
   349  
   350  	// The next set of publishes after the deadline should now be fast.
   351  	publish(time.Second)
   352  
   353  	// We'll kick the leader, but since we're after the deadline, this
   354  	// should still be fast.
   355  	stepdown()
   356  	publish(time.Second)
   357  
   358  	// Now we're going to do an update and then immediately kick the
   359  	// leader. The pause should still be in effect afterwards.
   360  	deadline = jsTestPause_PauseConsumer(t, nc, "TEST", "my_consumer", time.Now().Add(time.Second*3))
   361  	c.waitOnAllCurrent()
   362  	publish(time.Second * 5)
   363  	require_True(t, time.Now().After(deadline))
   364  
   365  	// The next set of publishes after the deadline should now be fast.
   366  	publish(time.Second)
   367  }
   368  
   369  func TestJetStreamClusterConsumerPauseViaEndpoint(t *testing.T) {
   370  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   371  	defer c.shutdown()
   372  
   373  	nc, js := jsClientConnect(t, c.randomServer())
   374  	defer nc.Close()
   375  
   376  	_, err := js.AddStream(&nats.StreamConfig{
   377  		Name:     "TEST",
   378  		Subjects: []string{"push", "pull"},
   379  		Replicas: 3,
   380  	})
   381  	require_NoError(t, err)
   382  
   383  	t.Run("PullConsumer", func(t *testing.T) {
   384  		_, err := js.AddConsumer("TEST", &nats.ConsumerConfig{
   385  			Name: "pull_consumer",
   386  		})
   387  		require_NoError(t, err)
   388  
   389  		sub, err := js.PullSubscribe("pull", "", nats.Bind("TEST", "pull_consumer"))
   390  		require_NoError(t, err)
   391  
   392  		// This should succeed as there's no pause, so it definitely
   393  		// shouldn't take more than a second.
   394  		for i := 0; i < 10; i++ {
   395  			_, err = js.Publish("pull", []byte("OK"))
   396  			require_NoError(t, err)
   397  		}
   398  		msgs, err := sub.Fetch(10, nats.MaxWait(time.Second))
   399  		require_NoError(t, err)
   400  		require_Equal(t, len(msgs), 10)
   401  
   402  		// Now we'll pause the consumer for 3 seconds.
   403  		deadline := time.Now().Add(time.Second * 3)
   404  		require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "pull_consumer", deadline).Equal(deadline))
   405  		c.waitOnAllCurrent()
   406  
   407  		// This should fail as we'll wait for only half of the deadline.
   408  		for i := 0; i < 10; i++ {
   409  			_, err = js.Publish("pull", []byte("OK"))
   410  			require_NoError(t, err)
   411  		}
   412  		_, err = sub.Fetch(10, nats.MaxWait(time.Until(deadline)/2))
   413  		require_Error(t, err, nats.ErrTimeout)
   414  
   415  		// This should succeed after a short wait, and when we're done,
   416  		// we should be after the deadline.
   417  		msgs, err = sub.Fetch(10)
   418  		require_NoError(t, err)
   419  		require_Equal(t, len(msgs), 10)
   420  		require_True(t, time.Now().After(deadline))
   421  
   422  		// This should succeed as there's no pause, so it definitely
   423  		// shouldn't take more than a second.
   424  		for i := 0; i < 10; i++ {
   425  			_, err = js.Publish("pull", []byte("OK"))
   426  			require_NoError(t, err)
   427  		}
   428  		msgs, err = sub.Fetch(10, nats.MaxWait(time.Second))
   429  		require_NoError(t, err)
   430  		require_Equal(t, len(msgs), 10)
   431  
   432  		require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "pull_consumer", time.Time{}).Equal(time.Time{}))
   433  		c.waitOnAllCurrent()
   434  
   435  		// This should succeed as there's no pause, so it definitely
   436  		// shouldn't take more than a second.
   437  		for i := 0; i < 10; i++ {
   438  			_, err = js.Publish("pull", []byte("OK"))
   439  			require_NoError(t, err)
   440  		}
   441  		msgs, err = sub.Fetch(10, nats.MaxWait(time.Second))
   442  		require_NoError(t, err)
   443  		require_Equal(t, len(msgs), 10)
   444  	})
   445  
   446  	t.Run("PushConsumer", func(t *testing.T) {
   447  		ch := make(chan *nats.Msg, 100)
   448  		_, err = js.ChanSubscribe("push", ch, nats.BindStream("TEST"), nats.ConsumerName("push_consumer"))
   449  		require_NoError(t, err)
   450  
   451  		// This should succeed as there's no pause, so it definitely
   452  		// shouldn't take more than a second.
   453  		for i := 0; i < 10; i++ {
   454  			_, err = js.Publish("push", []byte("OK"))
   455  			require_NoError(t, err)
   456  		}
   457  		for i := 0; i < 10; i++ {
   458  			msg := require_ChanRead(t, ch, time.Second)
   459  			require_NotEqual(t, msg, nil)
   460  		}
   461  
   462  		// Now we'll pause the consumer for 3 seconds.
   463  		deadline := time.Now().Add(time.Second * 3)
   464  		require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "push_consumer", deadline).Equal(deadline))
   465  		c.waitOnAllCurrent()
   466  
   467  		// This should succeed after a short wait, and when we're done,
   468  		// we should be after the deadline.
   469  		for i := 0; i < 10; i++ {
   470  			_, err = js.Publish("push", []byte("OK"))
   471  			require_NoError(t, err)
   472  		}
   473  		for i := 0; i < 10; i++ {
   474  			msg := require_ChanRead(t, ch, time.Second*5)
   475  			require_NotEqual(t, msg, nil)
   476  			require_True(t, time.Now().After(deadline))
   477  		}
   478  
   479  		// This should succeed as there's no pause, so it definitely
   480  		// shouldn't take more than a second.
   481  		for i := 0; i < 10; i++ {
   482  			_, err = js.Publish("push", []byte("OK"))
   483  			require_NoError(t, err)
   484  		}
   485  		for i := 0; i < 10; i++ {
   486  			msg := require_ChanRead(t, ch, time.Second)
   487  			require_NotEqual(t, msg, nil)
   488  		}
   489  
   490  		require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "push_consumer", time.Time{}).Equal(time.Time{}))
   491  		c.waitOnAllCurrent()
   492  
   493  		// This should succeed as there's no pause, so it definitely
   494  		// shouldn't take more than a second.
   495  		for i := 0; i < 10; i++ {
   496  			_, err = js.Publish("push", []byte("OK"))
   497  			require_NoError(t, err)
   498  		}
   499  		for i := 0; i < 10; i++ {
   500  			msg := require_ChanRead(t, ch, time.Second)
   501  			require_NotEqual(t, msg, nil)
   502  		}
   503  	})
   504  }
   505  
   506  func TestJetStreamClusterConsumerPauseTimerFollowsLeader(t *testing.T) {
   507  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   508  	defer c.shutdown()
   509  
   510  	nc, js := jsClientConnect(t, c.randomServer())
   511  	defer nc.Close()
   512  
   513  	_, err := js.AddStream(&nats.StreamConfig{
   514  		Name:     "TEST",
   515  		Subjects: []string{"foo"},
   516  		Replicas: 3,
   517  	})
   518  	require_NoError(t, err)
   519  
   520  	deadline := time.Now().Add(time.Hour)
   521  	jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
   522  		Name:       "my_consumer",
   523  		PauseUntil: &deadline,
   524  		Replicas:   3,
   525  	})
   526  
   527  	for i := 0; i < 10; i++ {
   528  		c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
   529  		c.waitOnAllCurrent()
   530  
   531  		for _, s := range c.servers {
   532  			stream, err := s.gacc.lookupStream("TEST")
   533  			require_NoError(t, err)
   534  
   535  			consumer := stream.lookupConsumer("my_consumer")
   536  			require_NotEqual(t, consumer, nil)
   537  
   538  			isLeader := s.JetStreamIsConsumerLeader(globalAccountName, "TEST", "my_consumer")
   539  
   540  			consumer.mu.RLock()
   541  			hasTimer := consumer.uptmr != nil
   542  			consumer.mu.RUnlock()
   543  
   544  			require_Equal(t, isLeader, hasTimer)
   545  		}
   546  
   547  		_, err = nc.Request(fmt.Sprintf(JSApiConsumerLeaderStepDownT, "TEST", "my_consumer"), nil, time.Second)
   548  		require_NoError(t, err)
   549  	}
   550  }
   551  
   552  func TestJetStreamClusterConsumerPauseHeartbeats(t *testing.T) {
   553  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   554  	defer c.shutdown()
   555  
   556  	nc, js := jsClientConnect(t, c.randomServer())
   557  	defer nc.Close()
   558  
   559  	_, err := js.AddStream(&nats.StreamConfig{
   560  		Name:     "TEST",
   561  		Subjects: []string{"foo"},
   562  		Replicas: 3,
   563  	})
   564  	require_NoError(t, err)
   565  
   566  	deadline := time.Now().Add(time.Hour)
   567  	dsubj := "deliver_subj"
   568  
   569  	ci := jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
   570  		Name:           "my_consumer",
   571  		PauseUntil:     &deadline,
   572  		Heartbeat:      time.Millisecond * 100,
   573  		DeliverSubject: dsubj,
   574  	})
   575  	require_True(t, ci.Config.PauseUntil.Equal(deadline))
   576  
   577  	ch := make(chan *nats.Msg, 10)
   578  	_, err = nc.ChanSubscribe(dsubj, ch)
   579  	require_NoError(t, err)
   580  
   581  	for i := 0; i < 20; i++ {
   582  		msg := require_ChanRead(t, ch, time.Millisecond*200)
   583  		require_Equal(t, msg.Header.Get("Status"), "100")
   584  		require_Equal(t, msg.Header.Get("Description"), "Idle Heartbeat")
   585  	}
   586  }
   587  
   588  func TestJetStreamClusterConsumerPauseAdvisories(t *testing.T) {
   589  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   590  	defer c.shutdown()
   591  
   592  	nc, js := jsClientConnect(t, c.randomServer())
   593  	defer nc.Close()
   594  
   595  	checkAdvisory := func(msg *nats.Msg, shouldBePaused bool, deadline time.Time) {
   596  		t.Helper()
   597  		var advisory JSConsumerPauseAdvisory
   598  		require_NoError(t, json.Unmarshal(msg.Data, &advisory))
   599  		require_Equal(t, advisory.Stream, "TEST")
   600  		require_Equal(t, advisory.Consumer, "my_consumer")
   601  		require_Equal(t, advisory.Paused, shouldBePaused)
   602  		require_True(t, advisory.PauseUntil.Equal(deadline))
   603  	}
   604  
   605  	_, err := js.AddStream(&nats.StreamConfig{
   606  		Name:     "TEST",
   607  		Subjects: []string{"foo"},
   608  		Replicas: 3,
   609  	})
   610  	require_NoError(t, err)
   611  
   612  	ch := make(chan *nats.Msg, 10)
   613  	_, err = nc.ChanSubscribe(JSAdvisoryConsumerPausePre+".TEST.my_consumer", ch)
   614  	require_NoError(t, err)
   615  
   616  	deadline := time.Now().Add(time.Second)
   617  	jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
   618  		Name:       "my_consumer",
   619  		PauseUntil: &deadline,
   620  		Replicas:   3,
   621  	})
   622  
   623  	// First advisory should tell us that the consumer was paused
   624  	// on creation.
   625  	msg := require_ChanRead(t, ch, time.Second*2)
   626  	checkAdvisory(msg, true, deadline)
   627  	require_Len(t, len(ch), 0) // Should only receive one advisory.
   628  
   629  	// The second one for the unpause.
   630  	msg = require_ChanRead(t, ch, time.Second*2)
   631  	checkAdvisory(msg, false, deadline)
   632  	require_Len(t, len(ch), 0) // Should only receive one advisory.
   633  
   634  	// Now we'll pause the consumer for a second using the API.
   635  	deadline = time.Now().Add(time.Second)
   636  	require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "my_consumer", deadline).Equal(deadline))
   637  
   638  	// Third advisory should tell us about the pause via the API.
   639  	msg = require_ChanRead(t, ch, time.Second*2)
   640  	checkAdvisory(msg, true, deadline)
   641  	require_Len(t, len(ch), 0) // Should only receive one advisory.
   642  
   643  	// Finally that should unpause.
   644  	msg = require_ChanRead(t, ch, time.Second*2)
   645  	checkAdvisory(msg, false, deadline)
   646  	require_Len(t, len(ch), 0) // Should only receive one advisory.
   647  
   648  	// Now we're going to set the deadline into the future so we can
   649  	// see what happens when we kick leaders or restart.
   650  	deadline = time.Now().Add(time.Hour)
   651  	require_True(t, jsTestPause_PauseConsumer(t, nc, "TEST", "my_consumer", deadline).Equal(deadline))
   652  
   653  	// Setting the deadline should have generated an advisory.
   654  	msg = require_ChanRead(t, ch, time.Second)
   655  	checkAdvisory(msg, true, deadline)
   656  	require_Len(t, len(ch), 0) // Should only receive one advisory.
   657  
   658  	// Try to kick the consumer leader.
   659  	srv := c.consumerLeader(globalAccountName, "TEST", "my_consumer")
   660  	srv.JetStreamStepdownConsumer(globalAccountName, "TEST", "my_consumer")
   661  	c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
   662  
   663  	// This shouldn't have generated an advisory.
   664  	require_NoChanRead(t, ch, time.Second)
   665  }
   666  
   667  func TestJetStreamClusterConsumerPauseSurvivesRestart(t *testing.T) {
   668  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   669  	defer c.shutdown()
   670  
   671  	nc, js := jsClientConnect(t, c.randomServer())
   672  	defer nc.Close()
   673  
   674  	checkTimer := func(s *Server) {
   675  		stream, err := s.gacc.lookupStream("TEST")
   676  		require_NoError(t, err)
   677  
   678  		consumer := stream.lookupConsumer("my_consumer")
   679  		require_NotEqual(t, consumer, nil)
   680  
   681  		consumer.mu.RLock()
   682  		timer := consumer.uptmr
   683  		consumer.mu.RUnlock()
   684  		require_True(t, timer != nil)
   685  	}
   686  
   687  	_, err := js.AddStream(&nats.StreamConfig{
   688  		Name:     "TEST",
   689  		Subjects: []string{"foo"},
   690  		Replicas: 3,
   691  	})
   692  	require_NoError(t, err)
   693  
   694  	deadline := time.Now().Add(time.Hour)
   695  	jsTestPause_CreateOrUpdateConsumer(t, nc, ActionCreate, "TEST", ConsumerConfig{
   696  		Name:       "my_consumer",
   697  		PauseUntil: &deadline,
   698  		Replicas:   3,
   699  	})
   700  
   701  	// First try with just restarting the consumer leader.
   702  	srv := c.consumerLeader(globalAccountName, "TEST", "my_consumer")
   703  	srv.Shutdown()
   704  	c.restartServer(srv)
   705  	c.waitOnAllCurrent()
   706  	c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
   707  	leader := c.consumerLeader(globalAccountName, "TEST", "my_consumer")
   708  	require_True(t, leader != nil)
   709  	checkTimer(leader)
   710  
   711  	// Then try restarting the entire cluster.
   712  	c.stopAll()
   713  	c.restartAllSamePorts()
   714  	c.waitOnAllCurrent()
   715  	c.waitOnConsumerLeader(globalAccountName, "TEST", "my_consumer")
   716  	leader = c.consumerLeader(globalAccountName, "TEST", "my_consumer")
   717  	require_True(t, leader != nil)
   718  	checkTimer(leader)
   719  }
   720  
   721  func TestJetStreamClusterStreamOrphanMsgsAndReplicasDrifting(t *testing.T) {
   722  	type testParams struct {
   723  		restartAny       bool
   724  		restartLeader    bool
   725  		rolloutRestart   bool
   726  		ldmRestart       bool
   727  		restarts         int
   728  		checkHealthz     bool
   729  		reconnectRoutes  bool
   730  		reconnectClients bool
   731  	}
   732  	test := func(t *testing.T, params *testParams, sc *nats.StreamConfig) {
   733  		conf := `
   734  		listen: 127.0.0.1:-1
   735  		server_name: %s
   736  		jetstream: {
   737  			store_dir: '%s',
   738  		}
   739  		cluster {
   740  			name: %s
   741  			listen: 127.0.0.1:%d
   742  			routes = [%s]
   743  		}
   744  		server_tags: ["test"]
   745  		system_account: sys
   746  		no_auth_user: js
   747  		accounts {
   748  			sys { users = [ { user: sys, pass: sys } ] }
   749  			js {
   750  				jetstream = enabled
   751  				users = [ { user: js, pass: js } ]
   752  		    }
   753  		}`
   754  		c := createJetStreamClusterWithTemplate(t, conf, sc.Name, 3)
   755  		defer c.shutdown()
   756  
   757  		// Update lame duck duration for all servers.
   758  		for _, s := range c.servers {
   759  			s.optsMu.Lock()
   760  			s.opts.LameDuckDuration = 5 * time.Second
   761  			s.opts.LameDuckGracePeriod = -5 * time.Second
   762  			s.optsMu.Unlock()
   763  		}
   764  
   765  		nc, js := jsClientConnect(t, c.randomServer())
   766  		defer nc.Close()
   767  
   768  		cnc, cjs := jsClientConnect(t, c.randomServer())
   769  		defer cnc.Close()
   770  
   771  		_, err := js.AddStream(sc)
   772  		require_NoError(t, err)
   773  
   774  		pctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   775  		defer cancel()
   776  
   777  		// Start producers
   778  		var wg sync.WaitGroup
   779  
   780  		// First call is just to create the pull subscribers.
   781  		mp := nats.MaxAckPending(10000)
   782  		mw := nats.PullMaxWaiting(1000)
   783  		aw := nats.AckWait(5 * time.Second)
   784  
   785  		for i := 0; i < 10; i++ {
   786  			for _, partition := range []string{"EEEEE"} {
   787  				subject := fmt.Sprintf("MSGS.%s.*.H.100XY.*.*.WQ.00000000000%d", partition, i)
   788  				consumer := fmt.Sprintf("consumer:%s:%d", partition, i)
   789  				_, err := cjs.PullSubscribe(subject, consumer, mp, mw, aw)
   790  				require_NoError(t, err)
   791  			}
   792  		}
   793  
   794  		// Create a single consumer that does no activity.
   795  		// Make sure we still calculate low ack properly and cleanup etc.
   796  		_, err = cjs.PullSubscribe("MSGS.ZZ.>", "consumer:ZZ:0", mp, mw, aw)
   797  		require_NoError(t, err)
   798  
   799  		subjects := []string{
   800  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000000",
   801  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000001",
   802  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000002",
   803  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000003",
   804  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000004",
   805  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000005",
   806  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000006",
   807  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000007",
   808  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000008",
   809  			"MSGS.EEEEE.P.H.100XY.1.100Z.WQ.000000000009",
   810  		}
   811  		payload := []byte(strings.Repeat("A", 1024))
   812  
   813  		for i := 0; i < 50; i++ {
   814  			wg.Add(1)
   815  			go func() {
   816  				pnc, pjs := jsClientConnect(t, c.randomServer())
   817  				defer pnc.Close()
   818  
   819  				for i := 1; i < 200_000; i++ {
   820  					select {
   821  					case <-pctx.Done():
   822  						wg.Done()
   823  						return
   824  					default:
   825  					}
   826  					for _, subject := range subjects {
   827  						// Send each message a few times.
   828  						msgID := nats.MsgId(nuid.Next())
   829  						pjs.PublishAsync(subject, payload, msgID)
   830  						pjs.Publish(subject, payload, msgID, nats.AckWait(250*time.Millisecond))
   831  						pjs.Publish(subject, payload, msgID, nats.AckWait(250*time.Millisecond))
   832  					}
   833  				}
   834  			}()
   835  		}
   836  
   837  		// Rogue publisher that sends the same msg ID everytime.
   838  		for i := 0; i < 10; i++ {
   839  			wg.Add(1)
   840  			go func() {
   841  				pnc, pjs := jsClientConnect(t, c.randomServer())
   842  				defer pnc.Close()
   843  
   844  				msgID := nats.MsgId("1234567890")
   845  				for i := 1; ; i++ {
   846  					select {
   847  					case <-pctx.Done():
   848  						wg.Done()
   849  						return
   850  					default:
   851  					}
   852  					for _, subject := range subjects {
   853  						// Send each message a few times.
   854  						pjs.PublishAsync(subject, payload, msgID, nats.RetryAttempts(0), nats.RetryWait(0))
   855  						pjs.Publish(subject, payload, msgID, nats.AckWait(1*time.Millisecond), nats.RetryAttempts(0), nats.RetryWait(0))
   856  						pjs.Publish(subject, payload, msgID, nats.AckWait(1*time.Millisecond), nats.RetryAttempts(0), nats.RetryWait(0))
   857  					}
   858  				}
   859  			}()
   860  		}
   861  
   862  		// Let enough messages into the stream then start consumers.
   863  		time.Sleep(15 * time.Second)
   864  
   865  		ctx, cancel := context.WithTimeout(context.Background(), 45*time.Second)
   866  		defer cancel()
   867  
   868  		for i := 0; i < 10; i++ {
   869  			subject := fmt.Sprintf("MSGS.EEEEE.*.H.100XY.*.*.WQ.00000000000%d", i)
   870  			consumer := fmt.Sprintf("consumer:EEEEE:%d", i)
   871  			for n := 0; n < 5; n++ {
   872  				cpnc, cpjs := jsClientConnect(t, c.randomServer())
   873  				defer cpnc.Close()
   874  
   875  				psub, err := cpjs.PullSubscribe(subject, consumer, mp, mw, aw)
   876  				require_NoError(t, err)
   877  
   878  				time.AfterFunc(15*time.Second, func() {
   879  					cpnc.Close()
   880  				})
   881  
   882  				wg.Add(1)
   883  				go func() {
   884  					tick := time.NewTicker(1 * time.Millisecond)
   885  					for {
   886  						if cpnc.IsClosed() {
   887  							wg.Done()
   888  							return
   889  						}
   890  						select {
   891  						case <-ctx.Done():
   892  							wg.Done()
   893  							return
   894  						case <-tick.C:
   895  							// Fetch 1 first, then if no errors Fetch 100.
   896  							msgs, err := psub.Fetch(1, nats.MaxWait(200*time.Millisecond))
   897  							if err != nil {
   898  								continue
   899  							}
   900  							for _, msg := range msgs {
   901  								msg.Ack()
   902  							}
   903  							msgs, err = psub.Fetch(100, nats.MaxWait(200*time.Millisecond))
   904  							if err != nil {
   905  								continue
   906  							}
   907  							for _, msg := range msgs {
   908  								msg.Ack()
   909  							}
   910  							msgs, err = psub.Fetch(1000, nats.MaxWait(200*time.Millisecond))
   911  							if err != nil {
   912  								continue
   913  							}
   914  							for _, msg := range msgs {
   915  								msg.Ack()
   916  							}
   917  						}
   918  					}
   919  				}()
   920  			}
   921  		}
   922  
   923  		for i := 0; i < 10; i++ {
   924  			subject := fmt.Sprintf("MSGS.EEEEE.*.H.100XY.*.*.WQ.00000000000%d", i)
   925  			consumer := fmt.Sprintf("consumer:EEEEE:%d", i)
   926  			for n := 0; n < 10; n++ {
   927  				cpnc, cpjs := jsClientConnect(t, c.randomServer())
   928  				defer cpnc.Close()
   929  
   930  				psub, err := cpjs.PullSubscribe(subject, consumer, mp, mw, aw)
   931  				if err != nil {
   932  					t.Logf("ERROR: %v", err)
   933  					continue
   934  				}
   935  
   936  				wg.Add(1)
   937  				go func() {
   938  					tick := time.NewTicker(1 * time.Millisecond)
   939  					for {
   940  						select {
   941  						case <-ctx.Done():
   942  							wg.Done()
   943  							return
   944  						case <-tick.C:
   945  							// Fetch 1 first, then if no errors Fetch 100.
   946  							msgs, err := psub.Fetch(1, nats.MaxWait(200*time.Millisecond))
   947  							if err != nil {
   948  								continue
   949  							}
   950  							for _, msg := range msgs {
   951  								msg.Ack()
   952  							}
   953  							msgs, err = psub.Fetch(100, nats.MaxWait(200*time.Millisecond))
   954  							if err != nil {
   955  								continue
   956  							}
   957  							for _, msg := range msgs {
   958  								msg.Ack()
   959  							}
   960  
   961  							msgs, err = psub.Fetch(1000, nats.MaxWait(200*time.Millisecond))
   962  							if err != nil {
   963  								continue
   964  							}
   965  							for _, msg := range msgs {
   966  								msg.Ack()
   967  							}
   968  						}
   969  					}
   970  				}()
   971  			}
   972  		}
   973  
   974  		// Periodically disconnect routes from one of the servers.
   975  		if params.reconnectRoutes {
   976  			wg.Add(1)
   977  			go func() {
   978  				for range time.NewTicker(10 * time.Second).C {
   979  					select {
   980  					case <-ctx.Done():
   981  						wg.Done()
   982  						return
   983  					default:
   984  					}
   985  
   986  					// Force disconnecting routes from one of the servers.
   987  					s := c.servers[rand.Intn(3)]
   988  					var routes []*client
   989  					t.Logf("Disconnecting routes from %v", s.Name())
   990  					s.mu.Lock()
   991  					for _, conns := range s.routes {
   992  						routes = append(routes, conns...)
   993  					}
   994  					s.mu.Unlock()
   995  					for _, r := range routes {
   996  						r.closeConnection(ClientClosed)
   997  					}
   998  				}
   999  			}()
  1000  		}
  1001  
  1002  		// Periodically reconnect clients.
  1003  		if params.reconnectClients {
  1004  			reconnectClients := func(s *Server) {
  1005  				for _, client := range s.clients {
  1006  					client.closeConnection(Kicked)
  1007  				}
  1008  			}
  1009  
  1010  			wg.Add(1)
  1011  			go func() {
  1012  				for range time.NewTicker(10 * time.Second).C {
  1013  					select {
  1014  					case <-ctx.Done():
  1015  						wg.Done()
  1016  						return
  1017  					default:
  1018  					}
  1019  					// Force reconnect clients from one of the servers.
  1020  					s := c.servers[rand.Intn(len(c.servers))]
  1021  					reconnectClients(s)
  1022  				}
  1023  			}()
  1024  		}
  1025  
  1026  		// Restarts
  1027  		time.AfterFunc(10*time.Second, func() {
  1028  			for i := 0; i < params.restarts; i++ {
  1029  				switch {
  1030  				case params.restartLeader:
  1031  					// Find server leader of the stream and restart it.
  1032  					s := c.streamLeader("js", sc.Name)
  1033  					if params.ldmRestart {
  1034  						s.lameDuckMode()
  1035  					} else {
  1036  						s.Shutdown()
  1037  					}
  1038  					s.WaitForShutdown()
  1039  					c.restartServer(s)
  1040  				case params.restartAny:
  1041  					s := c.servers[rand.Intn(len(c.servers))]
  1042  					if params.ldmRestart {
  1043  						s.lameDuckMode()
  1044  					} else {
  1045  						s.Shutdown()
  1046  					}
  1047  					s.WaitForShutdown()
  1048  					c.restartServer(s)
  1049  				case params.rolloutRestart:
  1050  					for _, s := range c.servers {
  1051  						if params.ldmRestart {
  1052  							s.lameDuckMode()
  1053  						} else {
  1054  							s.Shutdown()
  1055  						}
  1056  						s.WaitForShutdown()
  1057  						c.restartServer(s)
  1058  
  1059  						if params.checkHealthz {
  1060  							hctx, hcancel := context.WithTimeout(ctx, 15*time.Second)
  1061  							defer hcancel()
  1062  
  1063  							for range time.NewTicker(2 * time.Second).C {
  1064  								select {
  1065  								case <-hctx.Done():
  1066  								default:
  1067  								}
  1068  
  1069  								status := s.healthz(nil)
  1070  								if status.StatusCode == 200 {
  1071  									break
  1072  								}
  1073  							}
  1074  						}
  1075  					}
  1076  				}
  1077  				c.waitOnClusterReady()
  1078  			}
  1079  		})
  1080  
  1081  		// Wait until context is done then check state.
  1082  		<-ctx.Done()
  1083  
  1084  		var consumerPending int
  1085  		for i := 0; i < 10; i++ {
  1086  			ci, err := js.ConsumerInfo(sc.Name, fmt.Sprintf("consumer:EEEEE:%d", i))
  1087  			require_NoError(t, err)
  1088  			consumerPending += int(ci.NumPending)
  1089  		}
  1090  
  1091  		getStreamDetails := func(t *testing.T, srv *Server) *StreamDetail {
  1092  			t.Helper()
  1093  			jsz, err := srv.Jsz(&JSzOptions{Accounts: true, Streams: true, Consumer: true})
  1094  			require_NoError(t, err)
  1095  			if len(jsz.AccountDetails) > 0 && len(jsz.AccountDetails[0].Streams) > 0 {
  1096  				stream := jsz.AccountDetails[0].Streams[0]
  1097  				return &stream
  1098  			}
  1099  			t.Error("Could not find account details")
  1100  			return nil
  1101  		}
  1102  
  1103  		checkState := func(t *testing.T) error {
  1104  			t.Helper()
  1105  
  1106  			leaderSrv := c.streamLeader("js", sc.Name)
  1107  			if leaderSrv == nil {
  1108  				return fmt.Errorf("no leader found for stream")
  1109  			}
  1110  			streamLeader := getStreamDetails(t, leaderSrv)
  1111  			var errs []error
  1112  			for _, srv := range c.servers {
  1113  				if srv == leaderSrv {
  1114  					// Skip self
  1115  					continue
  1116  				}
  1117  				stream := getStreamDetails(t, srv)
  1118  				if stream == nil {
  1119  					return fmt.Errorf("stream not found")
  1120  				}
  1121  
  1122  				if stream.State.Msgs != streamLeader.State.Msgs {
  1123  					err := fmt.Errorf("Leader %v has %d messages, Follower %v has %d messages",
  1124  						stream.Cluster.Leader, streamLeader.State.Msgs,
  1125  						srv, stream.State.Msgs,
  1126  					)
  1127  					errs = append(errs, err)
  1128  				}
  1129  				if stream.State.FirstSeq != streamLeader.State.FirstSeq {
  1130  					err := fmt.Errorf("Leader %v FirstSeq is %d, Follower %v is at %d",
  1131  						stream.Cluster.Leader, streamLeader.State.FirstSeq,
  1132  						srv, stream.State.FirstSeq,
  1133  					)
  1134  					errs = append(errs, err)
  1135  				}
  1136  				if stream.State.LastSeq != streamLeader.State.LastSeq {
  1137  					err := fmt.Errorf("Leader %v LastSeq is %d, Follower %v is at %d",
  1138  						stream.Cluster.Leader, streamLeader.State.LastSeq,
  1139  						srv, stream.State.LastSeq,
  1140  					)
  1141  					errs = append(errs, err)
  1142  				}
  1143  			}
  1144  			if len(errs) > 0 {
  1145  				return errors.Join(errs...)
  1146  			}
  1147  			return nil
  1148  		}
  1149  
  1150  		checkMsgsEqual := func(t *testing.T) {
  1151  			// These have already been checked to be the same for all streams.
  1152  			state := getStreamDetails(t, c.streamLeader("js", sc.Name)).State
  1153  			// Gather all the streams.
  1154  			var msets []*stream
  1155  			for _, s := range c.servers {
  1156  				acc, err := s.LookupAccount("js")
  1157  				require_NoError(t, err)
  1158  				mset, err := acc.lookupStream(sc.Name)
  1159  				require_NoError(t, err)
  1160  				msets = append(msets, mset)
  1161  			}
  1162  			for seq := state.FirstSeq; seq <= state.LastSeq; seq++ {
  1163  				var msgId string
  1164  				var smv StoreMsg
  1165  				for _, mset := range msets {
  1166  					mset.mu.RLock()
  1167  					sm, err := mset.store.LoadMsg(seq, &smv)
  1168  					mset.mu.RUnlock()
  1169  					require_NoError(t, err)
  1170  					if msgId == _EMPTY_ {
  1171  						msgId = string(sm.hdr)
  1172  					} else if msgId != string(sm.hdr) {
  1173  						t.Fatalf("MsgIds do not match for seq %d: %q vs %q", seq, msgId, sm.hdr)
  1174  					}
  1175  				}
  1176  			}
  1177  		}
  1178  
  1179  		// Check state of streams and consumers.
  1180  		si, err := js.StreamInfo(sc.Name)
  1181  		require_NoError(t, err)
  1182  
  1183  		// Only check if there are any pending messages.
  1184  		if consumerPending > 0 {
  1185  			streamPending := int(si.State.Msgs)
  1186  			if streamPending != consumerPending {
  1187  				t.Errorf("Unexpected number of pending messages, stream=%d, consumers=%d", streamPending, consumerPending)
  1188  			}
  1189  		}
  1190  
  1191  		// If clustered, check whether leader and followers have drifted.
  1192  		if sc.Replicas > 1 {
  1193  			// If we have drifted do not have to wait too long, usually its stuck for good.
  1194  			checkFor(t, time.Minute, time.Second, func() error {
  1195  				return checkState(t)
  1196  			})
  1197  			// If we succeeded now let's check that all messages are also the same.
  1198  			// We may have no messages but for tests that do we make sure each msg is the same
  1199  			// across all replicas.
  1200  			checkMsgsEqual(t)
  1201  		}
  1202  
  1203  		wg.Wait()
  1204  	}
  1205  
  1206  	// Setting up test variations below:
  1207  	//
  1208  	// File based with single replica and discard old policy.
  1209  	t.Run("R1F", func(t *testing.T) {
  1210  		params := &testParams{
  1211  			restartAny:     true,
  1212  			ldmRestart:     false,
  1213  			rolloutRestart: false,
  1214  			restarts:       1,
  1215  		}
  1216  		test(t, params, &nats.StreamConfig{
  1217  			Name:        "OWQTEST_R1F",
  1218  			Subjects:    []string{"MSGS.>"},
  1219  			Replicas:    1,
  1220  			MaxAge:      30 * time.Minute,
  1221  			Duplicates:  5 * time.Minute,
  1222  			Retention:   nats.WorkQueuePolicy,
  1223  			Discard:     nats.DiscardOld,
  1224  			AllowRollup: true,
  1225  			Placement: &nats.Placement{
  1226  				Tags: []string{"test"},
  1227  			},
  1228  		})
  1229  	})
  1230  
  1231  	// Clustered memory based with discard new policy and max msgs limit.
  1232  	t.Run("R3M", func(t *testing.T) {
  1233  		params := &testParams{
  1234  			restartAny:     true,
  1235  			ldmRestart:     true,
  1236  			rolloutRestart: false,
  1237  			restarts:       1,
  1238  			checkHealthz:   false,
  1239  		}
  1240  		test(t, params, &nats.StreamConfig{
  1241  			Name:        "OWQTEST_R3M",
  1242  			Subjects:    []string{"MSGS.>"},
  1243  			Replicas:    3,
  1244  			MaxAge:      30 * time.Minute,
  1245  			MaxMsgs:     100_000,
  1246  			Duplicates:  5 * time.Minute,
  1247  			Retention:   nats.WorkQueuePolicy,
  1248  			Discard:     nats.DiscardNew,
  1249  			AllowRollup: true,
  1250  			Storage:     nats.MemoryStorage,
  1251  			Placement: &nats.Placement{
  1252  				Tags: []string{"test"},
  1253  			},
  1254  		})
  1255  	})
  1256  
  1257  	// Clustered file based with discard new policy and max msgs limit.
  1258  	t.Run("R3F_DN", func(t *testing.T) {
  1259  		params := &testParams{
  1260  			restartAny:     true,
  1261  			ldmRestart:     true,
  1262  			rolloutRestart: false,
  1263  			restarts:       1,
  1264  		}
  1265  		test(t, params, &nats.StreamConfig{
  1266  			Name:        "OWQTEST_R3F_DN",
  1267  			Subjects:    []string{"MSGS.>"},
  1268  			Replicas:    3,
  1269  			MaxAge:      30 * time.Minute,
  1270  			MaxMsgs:     100_000,
  1271  			Duplicates:  5 * time.Minute,
  1272  			Retention:   nats.WorkQueuePolicy,
  1273  			Discard:     nats.DiscardNew,
  1274  			AllowRollup: true,
  1275  			Placement: &nats.Placement{
  1276  				Tags: []string{"test"},
  1277  			},
  1278  		})
  1279  	})
  1280  
  1281  	// Clustered file based with discard old policy and max msgs limit.
  1282  	t.Run("R3F_DO", func(t *testing.T) {
  1283  		params := &testParams{
  1284  			restartAny:     true,
  1285  			ldmRestart:     true,
  1286  			rolloutRestart: false,
  1287  			restarts:       1,
  1288  		}
  1289  		test(t, params, &nats.StreamConfig{
  1290  			Name:        "OWQTEST_R3F_DO",
  1291  			Subjects:    []string{"MSGS.>"},
  1292  			Replicas:    3,
  1293  			MaxAge:      30 * time.Minute,
  1294  			MaxMsgs:     100_000,
  1295  			Duplicates:  5 * time.Minute,
  1296  			Retention:   nats.WorkQueuePolicy,
  1297  			Discard:     nats.DiscardOld,
  1298  			AllowRollup: true,
  1299  			Placement: &nats.Placement{
  1300  				Tags: []string{"test"},
  1301  			},
  1302  		})
  1303  	})
  1304  
  1305  	// Clustered file based with discard old policy and no limits.
  1306  	t.Run("R3F_DO_NOLIMIT", func(t *testing.T) {
  1307  		params := &testParams{
  1308  			restartAny:       false,
  1309  			ldmRestart:       true,
  1310  			rolloutRestart:   true,
  1311  			restarts:         3,
  1312  			checkHealthz:     true,
  1313  			reconnectRoutes:  true,
  1314  			reconnectClients: true,
  1315  		}
  1316  		test(t, params, &nats.StreamConfig{
  1317  			Name:       "OWQTEST_R3F_DO_NOLIMIT",
  1318  			Subjects:   []string{"MSGS.>"},
  1319  			Replicas:   3,
  1320  			Duplicates: 30 * time.Second,
  1321  			Discard:    nats.DiscardOld,
  1322  			Placement: &nats.Placement{
  1323  				Tags: []string{"test"},
  1324  			},
  1325  		})
  1326  	})
  1327  }
  1328  
  1329  func TestJetStreamClusterConsumerNRGCleanup(t *testing.T) {
  1330  	c := createJetStreamClusterExplicit(t, "R3S", 3)
  1331  	defer c.shutdown()
  1332  
  1333  	nc, js := jsClientConnect(t, c.randomServer())
  1334  	defer nc.Close()
  1335  
  1336  	_, err := js.AddStream(&nats.StreamConfig{
  1337  		Name:      "TEST",
  1338  		Subjects:  []string{"foo"},
  1339  		Storage:   nats.MemoryStorage,
  1340  		Retention: nats.WorkQueuePolicy,
  1341  		Replicas:  3,
  1342  	})
  1343  	require_NoError(t, err)
  1344  
  1345  	// First call is just to create the pull subscribers.
  1346  	_, err = js.PullSubscribe("foo", "dlc")
  1347  	require_NoError(t, err)
  1348  
  1349  	require_NoError(t, js.DeleteConsumer("TEST", "dlc"))
  1350  
  1351  	// Now delete the stream.
  1352  	require_NoError(t, js.DeleteStream("TEST"))
  1353  
  1354  	// Now make sure we cleaned up the NRG directories for the stream and consumer.
  1355  	var numConsumers, numStreams int
  1356  	for _, s := range c.servers {
  1357  		sd := s.JetStreamConfig().StoreDir
  1358  		nd := filepath.Join(sd, "$SYS", "_js_")
  1359  		f, err := os.Open(nd)
  1360  		require_NoError(t, err)
  1361  		dirs, err := f.ReadDir(-1)
  1362  		require_NoError(t, err)
  1363  		for _, fi := range dirs {
  1364  			if strings.HasPrefix(fi.Name(), "C-") {
  1365  				numConsumers++
  1366  			} else if strings.HasPrefix(fi.Name(), "S-") {
  1367  				numStreams++
  1368  			}
  1369  		}
  1370  	}
  1371  	require_Equal(t, numConsumers, 0)
  1372  	require_Equal(t, numStreams, 0)
  1373  }