github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/jetstream_benchmark_test.go (about)

     1  // Copyright 2023 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  //go:build !skip_js_tests && !skip_js_cluster_tests && !skip_js_cluster_tests_2
    15  // +build !skip_js_tests,!skip_js_cluster_tests,!skip_js_cluster_tests_2
    16  
    17  package server
    18  
    19  import (
    20  	"fmt"
    21  	"math/rand"
    22  	"sync"
    23  	"sync/atomic"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/nats-io/nats-server/v2/internal/fastrand"
    28  	"github.com/nats-io/nats.go"
    29  )
    30  
    31  func BenchmarkJetStreamConsume(b *testing.B) {
    32  
    33  	const (
    34  		verbose          = false
    35  		streamName       = "S"
    36  		subject          = "s"
    37  		seed             = 12345
    38  		publishTimeout   = 30 * time.Second
    39  		PublishBatchSize = 10000
    40  	)
    41  
    42  	runSyncPushConsumer := func(b *testing.B, js nats.JetStreamContext, streamName string) (int, int, int) {
    43  		const nextMsgTimeout = 3 * time.Second
    44  
    45  		subOpts := []nats.SubOpt{
    46  			nats.BindStream(streamName),
    47  		}
    48  		sub, err := js.SubscribeSync(_EMPTY_, subOpts...)
    49  		if err != nil {
    50  			b.Fatalf("Failed to subscribe: %v", err)
    51  		}
    52  		defer sub.Unsubscribe()
    53  
    54  		bitset := NewBitset(uint64(b.N))
    55  		uniqueConsumed, duplicates, errors := 0, 0, 0
    56  
    57  		b.ResetTimer()
    58  
    59  		for uniqueConsumed < b.N {
    60  			msg, err := sub.NextMsg(nextMsgTimeout)
    61  			if err != nil {
    62  				b.Fatalf("No more messages (received: %d/%d)", uniqueConsumed, b.N)
    63  			}
    64  
    65  			metadata, mdErr := msg.Metadata()
    66  			if mdErr != nil {
    67  				errors++
    68  				continue
    69  			}
    70  
    71  			ackErr := msg.Ack()
    72  			if ackErr != nil {
    73  				errors++
    74  				continue
    75  			}
    76  
    77  			seq := metadata.Sequence.Stream
    78  
    79  			index := seq - 1
    80  			if bitset.get(index) {
    81  				duplicates++
    82  				continue
    83  			}
    84  
    85  			uniqueConsumed++
    86  			bitset.set(index, true)
    87  
    88  			if verbose && uniqueConsumed%1000 == 0 {
    89  				b.Logf("Consumed: %d/%d", bitset.count(), b.N)
    90  			}
    91  		}
    92  
    93  		b.StopTimer()
    94  
    95  		return uniqueConsumed, duplicates, errors
    96  	}
    97  
    98  	runAsyncPushConsumer := func(b *testing.B, js nats.JetStreamContext, streamName string, ordered, durable bool) (int, int, int) {
    99  		const timeout = 3 * time.Minute
   100  		bitset := NewBitset(uint64(b.N))
   101  		doneCh := make(chan bool, 1)
   102  		uniqueConsumed, duplicates, errors := 0, 0, 0
   103  
   104  		handleMsg := func(msg *nats.Msg) {
   105  			metadata, mdErr := msg.Metadata()
   106  			if mdErr != nil {
   107  				// fmt.Printf("Metadata error: %v\n", mdErr)
   108  				errors++
   109  				return
   110  			}
   111  
   112  			// Ordered defaults to AckNone policy, don't try to ACK
   113  			if !ordered {
   114  				ackErr := msg.Ack()
   115  				if ackErr != nil {
   116  					// fmt.Printf("Ack error: %v\n", ackErr)
   117  					errors++
   118  					return
   119  				}
   120  			}
   121  
   122  			seq := metadata.Sequence.Stream
   123  
   124  			index := seq - 1
   125  			if bitset.get(index) {
   126  				duplicates++
   127  				return
   128  			}
   129  
   130  			uniqueConsumed++
   131  			bitset.set(index, true)
   132  
   133  			if uniqueConsumed == b.N {
   134  				msg.Sub.Unsubscribe()
   135  				doneCh <- true
   136  			}
   137  			if verbose && uniqueConsumed%1000 == 0 {
   138  				b.Logf("Consumed %d/%d", uniqueConsumed, b.N)
   139  			}
   140  		}
   141  
   142  		subOpts := []nats.SubOpt{
   143  			nats.BindStream(streamName),
   144  		}
   145  
   146  		if ordered {
   147  			subOpts = append(subOpts, nats.OrderedConsumer())
   148  		}
   149  
   150  		if durable {
   151  			subOpts = append(subOpts, nats.Durable("c"))
   152  		}
   153  
   154  		sub, err := js.Subscribe(_EMPTY_, handleMsg, subOpts...)
   155  		if err != nil {
   156  			b.Fatalf("Failed to subscribe: %v", err)
   157  		}
   158  		defer sub.Unsubscribe()
   159  
   160  		b.ResetTimer()
   161  
   162  		select {
   163  		case <-doneCh:
   164  			b.StopTimer()
   165  		case <-time.After(timeout):
   166  			b.Fatalf("Timeout, %d/%d received, %d errors", uniqueConsumed, b.N, errors)
   167  		}
   168  
   169  		return uniqueConsumed, duplicates, errors
   170  	}
   171  
   172  	runPullConsumer := func(b *testing.B, js nats.JetStreamContext, streamName string, durable bool) (int, int, int) {
   173  		const fetchMaxWait = nats.MaxWait(3 * time.Second)
   174  		const fetchMaxMessages = 1000
   175  
   176  		bitset := NewBitset(uint64(b.N))
   177  		uniqueConsumed, duplicates, errors := 0, 0, 0
   178  
   179  		subOpts := []nats.SubOpt{
   180  			nats.BindStream(streamName),
   181  		}
   182  
   183  		consumerName := _EMPTY_ // Default ephemeral
   184  		if durable {
   185  			consumerName = "c" // Durable
   186  		}
   187  
   188  		sub, err := js.PullSubscribe("", consumerName, subOpts...)
   189  		if err != nil {
   190  			b.Fatalf("Failed to subscribe: %v", err)
   191  		}
   192  		defer sub.Unsubscribe()
   193  
   194  		b.ResetTimer()
   195  
   196  	fetchLoop:
   197  		for {
   198  			msgs, err := sub.Fetch(fetchMaxMessages, fetchMaxWait)
   199  			if err != nil {
   200  				b.Fatalf("Failed to fetch: %v", err)
   201  			}
   202  
   203  		processMsgsLoop:
   204  			for _, msg := range msgs {
   205  				metadata, mdErr := msg.Metadata()
   206  				if mdErr != nil {
   207  					errors++
   208  					continue processMsgsLoop
   209  				}
   210  
   211  				ackErr := msg.Ack()
   212  				if ackErr != nil {
   213  					errors++
   214  					continue processMsgsLoop
   215  				}
   216  
   217  				seq := metadata.Sequence.Stream
   218  
   219  				index := seq - 1
   220  				if bitset.get(index) {
   221  					duplicates++
   222  					continue processMsgsLoop
   223  				}
   224  
   225  				uniqueConsumed++
   226  				bitset.set(index, true)
   227  
   228  				if uniqueConsumed == b.N {
   229  					msg.Sub.Unsubscribe()
   230  					break fetchLoop
   231  				}
   232  
   233  				if verbose && uniqueConsumed%1000 == 0 {
   234  					b.Logf("Consumed %d/%d", uniqueConsumed, b.N)
   235  				}
   236  			}
   237  		}
   238  
   239  		b.StopTimer()
   240  
   241  		return uniqueConsumed, duplicates, errors
   242  	}
   243  
   244  	type ConsumerType string
   245  	const (
   246  		PushSync         ConsumerType = "PUSH[Sync,Ephemeral]"
   247  		PushAsync        ConsumerType = "PUSH[Async,Ephemeral]"
   248  		PushAsyncOrdered ConsumerType = "PUSH[Async,Ordered]"
   249  		PushAsyncDurable ConsumerType = "PUSH[Async,Durable]"
   250  		PullDurable      ConsumerType = "PULL[Durable]"
   251  		PullEphemeral    ConsumerType = "PULL[Ephemeral]"
   252  	)
   253  
   254  	benchmarksCases := []struct {
   255  		clusterSize int
   256  		replicas    int
   257  		messageSize int
   258  		minMessages int
   259  	}{
   260  		{1, 1, 10, 100_000}, // Single node, 10B messages, ~1MiB minimum
   261  		{1, 1, 1024, 1_000}, // Single node, 1KB messages, ~1MiB minimum
   262  		{3, 3, 10, 100_000}, // Cluster, R3, 10B messages, ~1MiB minimum
   263  		{3, 3, 1024, 1_000}, // Cluster, R3, 1KB messages, ~1MiB minimum
   264  	}
   265  
   266  	//Each of the cases above is run with each of the consumer types
   267  	consumerTypes := []ConsumerType{
   268  		PushSync,
   269  		PushAsync,
   270  		PushAsyncOrdered,
   271  		PushAsyncDurable,
   272  		PullDurable,
   273  		PullEphemeral,
   274  	}
   275  
   276  	for _, bc := range benchmarksCases {
   277  
   278  		name := fmt.Sprintf(
   279  			"N=%d,R=%d,MsgSz=%db",
   280  			bc.clusterSize,
   281  			bc.replicas,
   282  			bc.messageSize,
   283  		)
   284  
   285  		b.Run(
   286  			name,
   287  			func(b *testing.B) {
   288  
   289  				for _, ct := range consumerTypes {
   290  					name := fmt.Sprintf(
   291  						"%v",
   292  						ct,
   293  					)
   294  					b.Run(
   295  						name,
   296  						func(b *testing.B) {
   297  							// Skip short runs, benchmark gets re-executed with a larger N
   298  							if b.N < bc.minMessages {
   299  								b.ResetTimer()
   300  								return
   301  							}
   302  
   303  							if verbose {
   304  								b.Logf("Running %s with %d messages", name, b.N)
   305  							}
   306  
   307  							if verbose {
   308  								b.Logf("Setting up %d nodes", bc.clusterSize)
   309  							}
   310  
   311  							cl, _, shutdown, nc, js := startJSClusterAndConnect(b, bc.clusterSize)
   312  							defer shutdown()
   313  							defer nc.Close()
   314  
   315  							if verbose {
   316  								b.Logf("Creating stream with R=%d", bc.replicas)
   317  							}
   318  							streamConfig := &nats.StreamConfig{
   319  								Name:     streamName,
   320  								Subjects: []string{subject},
   321  								Replicas: bc.replicas,
   322  							}
   323  							if _, err := js.AddStream(streamConfig); err != nil {
   324  								b.Fatalf("Error creating stream: %v", err)
   325  							}
   326  
   327  							// If replicated resource, connect to stream leader for lower variability
   328  							if bc.replicas > 1 {
   329  								connectURL := cl.streamLeader("$G", streamName).ClientURL()
   330  								nc.Close()
   331  								_, js = jsClientConnectURL(b, connectURL)
   332  							}
   333  
   334  							message := make([]byte, bc.messageSize)
   335  							rand.New(rand.NewSource(int64(seed))).Read(message)
   336  
   337  							// Publish b.N messages to the stream (in batches)
   338  							for i := 1; i <= b.N; i++ {
   339  								fastRandomMutation(message, 10)
   340  								_, err := js.PublishAsync(subject, message)
   341  								if err != nil {
   342  									b.Fatalf("Failed to publish: %s", err)
   343  								}
   344  								// Limit outstanding published messages to PublishBatchSize
   345  								if i%PublishBatchSize == 0 || i == b.N {
   346  									select {
   347  									case <-js.PublishAsyncComplete():
   348  										if verbose {
   349  											b.Logf("Published %d/%d messages", i, b.N)
   350  										}
   351  									case <-time.After(publishTimeout):
   352  										b.Fatalf("Publish timed out")
   353  									}
   354  								}
   355  							}
   356  
   357  							// Set size of each operation, for throughput calculation
   358  							b.SetBytes(int64(bc.messageSize))
   359  
   360  							// Discard time spent during setup
   361  							// Consumer may reset again further in
   362  							b.ResetTimer()
   363  
   364  							var consumed, duplicates, errors int
   365  
   366  							const (
   367  								ordered   = true
   368  								unordered = false
   369  								durable   = true
   370  								ephemeral = false
   371  							)
   372  
   373  							switch ct {
   374  							case PushSync:
   375  								consumed, duplicates, errors = runSyncPushConsumer(b, js, streamName)
   376  							case PushAsync:
   377  								consumed, duplicates, errors = runAsyncPushConsumer(b, js, streamName, unordered, ephemeral)
   378  							case PushAsyncOrdered:
   379  								consumed, duplicates, errors = runAsyncPushConsumer(b, js, streamName, ordered, ephemeral)
   380  							case PushAsyncDurable:
   381  								consumed, duplicates, errors = runAsyncPushConsumer(b, js, streamName, unordered, durable)
   382  							case PullDurable:
   383  								consumed, duplicates, errors = runPullConsumer(b, js, streamName, durable)
   384  							case PullEphemeral:
   385  								consumed, duplicates, errors = runPullConsumer(b, js, streamName, ephemeral)
   386  							default:
   387  								b.Fatalf("Unknown consumer type: %v", ct)
   388  							}
   389  
   390  							// Benchmark ends here, (consumer may have stopped earlier)
   391  							b.StopTimer()
   392  
   393  							if consumed != b.N {
   394  								b.Fatalf("Something doesn't add up: %d != %d", consumed, b.N)
   395  							}
   396  
   397  							b.ReportMetric(float64(duplicates)*100/float64(b.N), "%dupe")
   398  							b.ReportMetric(float64(errors)*100/float64(b.N), "%error")
   399  						},
   400  					)
   401  				}
   402  			},
   403  		)
   404  	}
   405  }
   406  
   407  func BenchmarkJetStreamConsumeWithFilters(b *testing.B) {
   408  	const (
   409  		verbose          = false
   410  		streamName       = "S"
   411  		subjectPrefix    = "s"
   412  		seed             = 123456
   413  		messageSize      = 32
   414  		consumerReplicas = 1
   415  		domainNameLength = 36 // Length of domain portion of subject, must be an even number
   416  		publishBatchSize = 1000
   417  		publishTimeout   = 10 * time.Second
   418  	)
   419  
   420  	clusterSizeCases := []struct {
   421  		clusterSize int              // Single node or cluster
   422  		replicas    int              // Stream replicas
   423  		storage     nats.StorageType // Stream storage
   424  	}{
   425  		{1, 1, nats.MemoryStorage},
   426  		{3, 3, nats.MemoryStorage},
   427  	}
   428  
   429  	benchmarksCases := []struct {
   430  		domains             int // Number of distinct domains
   431  		subjectsPerDomain   int // Number of distinct subjects within each domain
   432  		filters             int // Number of filters (<prefix>.<domain>.>) per consumer
   433  		concurrentConsumers int // Number of consumer running
   434  
   435  	}{
   436  		{100, 10, 5, 12},
   437  		{1000, 10, 25, 12},
   438  		{10_000, 10, 50, 12},
   439  	}
   440  
   441  	for _, cs := range clusterSizeCases {
   442  		name := fmt.Sprintf(
   443  			"N=%d,R=%d,storage=%s",
   444  			cs.clusterSize,
   445  			cs.replicas,
   446  			cs.storage.String(),
   447  		)
   448  		b.Run(
   449  			name,
   450  			func(b *testing.B) {
   451  
   452  				for _, bc := range benchmarksCases {
   453  
   454  					name := fmt.Sprintf(
   455  						"D=%d,DS=%d,F=%d,C=%d",
   456  						bc.domains,
   457  						bc.subjectsPerDomain,
   458  						bc.filters,
   459  						bc.concurrentConsumers,
   460  					)
   461  
   462  					b.Run(
   463  						name,
   464  						func(b *testing.B) {
   465  
   466  							cl, s, shutdown, nc, js := startJSClusterAndConnect(b, cs.clusterSize)
   467  							defer shutdown()
   468  							defer nc.Close()
   469  
   470  							if verbose {
   471  								b.Logf("Creating stream with R=%d", cs.replicas)
   472  							}
   473  							streamConfig := &nats.StreamConfig{
   474  								Name:              streamName,
   475  								Subjects:          []string{subjectPrefix + ".>"},
   476  								Storage:           cs.storage,
   477  								Retention:         nats.LimitsPolicy,
   478  								MaxAge:            time.Hour,
   479  								Duplicates:        10 * time.Second,
   480  								Discard:           nats.DiscardOld,
   481  								NoAck:             false,
   482  								MaxMsgs:           -1,
   483  								MaxBytes:          -1,
   484  								MaxConsumers:      -1,
   485  								Replicas:          1,
   486  								MaxMsgsPerSubject: 1,
   487  							}
   488  							if _, err := js.AddStream(streamConfig); err != nil {
   489  								b.Fatalf("Error creating stream: %v", err)
   490  							}
   491  
   492  							// If replicated resource, connect to stream leader for lower variability
   493  							connectURL := s.ClientURL()
   494  							if cs.replicas > 1 {
   495  								connectURL = cl.streamLeader("$G", streamName).ClientURL()
   496  								nc.Close()
   497  								_, js = jsClientConnectURL(b, connectURL)
   498  							}
   499  
   500  							rng := rand.New(rand.NewSource(int64(seed)))
   501  							message := make([]byte, messageSize)
   502  							domain := make([]byte, domainNameLength/2)
   503  
   504  							domains := make([]string, 0, bc.domains*bc.subjectsPerDomain)
   505  
   506  							// Publish one message per subject for each domain
   507  							published := 0
   508  							totalMessages := bc.domains * bc.subjectsPerDomain
   509  							for d := 1; d <= bc.domains; d++ {
   510  								rng.Read(domain)
   511  								for s := 1; s <= bc.subjectsPerDomain; s++ {
   512  									rng.Read(message)
   513  									domainString := fmt.Sprintf("%X", domain)
   514  									domains = append(domains, domainString)
   515  									subject := fmt.Sprintf("%s.%s.%d", subjectPrefix, domainString, s)
   516  									_, err := js.PublishAsync(subject, message)
   517  									if err != nil {
   518  										b.Fatalf("failed to publish: %s", err)
   519  									}
   520  									published += 1
   521  
   522  									// Wait for all pending to be published before trying to publish the next batch
   523  									if published%publishBatchSize == 0 || published == totalMessages {
   524  										select {
   525  										case <-js.PublishAsyncComplete():
   526  											if verbose {
   527  												b.Logf("Published %d/%d messages", published, totalMessages)
   528  											}
   529  										case <-time.After(publishTimeout):
   530  											b.Fatalf("Publish timed out")
   531  										}
   532  									}
   533  
   534  								}
   535  							}
   536  
   537  							// Number of messages that each new consumer expects to consume
   538  							messagesPerIteration := bc.filters * bc.subjectsPerDomain
   539  
   540  							// Each call to 'subscribe_consume_unsubscribe' is one benchmark operation.
   541  							// i.e. subscribe_consume_unsubscribe will be called a total of b.N times (split among C threads)
   542  							// Each operation consists of:
   543  							// - Create filter
   544  							// - Create consumer / Subscribe
   545  							// - Consume expected number of messages
   546  							// - Unsubscribe
   547  							subscribeConsumeUnsubscribe := func(js nats.JetStreamContext, rng *rand.Rand) {
   548  
   549  								// Select F unique domains to create F non-overlapping filters
   550  								filterDomains := make(map[string]bool, bc.filters)
   551  								filters := make([]string, 0, bc.filters)
   552  								for len(filterDomains) < bc.filters {
   553  									domain := domains[rng.Intn(len(domains))]
   554  									if _, found := filterDomains[domain]; found {
   555  										// Collision with existing filter, try again
   556  										continue
   557  									}
   558  									filterDomains[domain] = true
   559  									filters = append(filters, fmt.Sprintf("%s.%s.>", subjectPrefix, domain))
   560  								}
   561  
   562  								if verbose {
   563  									b.Logf("Subscribe with filters: %+v", filters)
   564  								}
   565  
   566  								// Consumer callback
   567  								received := 0
   568  								consumeWg := sync.WaitGroup{}
   569  								consumeWg.Add(1)
   570  								cb := func(msg *nats.Msg) {
   571  									received += 1
   572  									if received == messagesPerIteration {
   573  										consumeWg.Done()
   574  										if verbose {
   575  											b.Logf("Received %d/%d messages", received, messagesPerIteration)
   576  										}
   577  									}
   578  								}
   579  
   580  								// Create consumer
   581  								subOpts := []nats.SubOpt{
   582  									nats.BindStream(streamName),
   583  									nats.OrderedConsumer(),
   584  									nats.ConsumerReplicas(consumerReplicas),
   585  									nats.ConsumerFilterSubjects(filters...),
   586  									nats.ConsumerMemoryStorage(),
   587  								}
   588  
   589  								var sub *nats.Subscription
   590  
   591  								sub, err := js.Subscribe("", cb, subOpts...)
   592  								if err != nil {
   593  									b.Fatalf("Failed to subscribe: %s", err)
   594  								}
   595  
   596  								defer func(sub *nats.Subscription) {
   597  									err := sub.Unsubscribe()
   598  									if err != nil {
   599  										b.Logf("Failed to unsubscribe: %s", err)
   600  									}
   601  								}(sub)
   602  
   603  								consumeWg.Wait()
   604  							}
   605  
   606  							// Wait for all consumer threads and main to be ready
   607  							wgReady := sync.WaitGroup{}
   608  							wgReady.Add(bc.concurrentConsumers + 1)
   609  							// Wait until all consumer threads have completed
   610  							wgCompleted := sync.WaitGroup{}
   611  							wgCompleted.Add(bc.concurrentConsumers)
   612  							// Operations left for consumer threads
   613  							opsCount := atomic.Int32{}
   614  							opsCount.Store(int32(b.N))
   615  
   616  							// Start a pool of C goroutines, each one with a dedicated connection.
   617  							for i := 1; i <= bc.concurrentConsumers; i++ {
   618  								go func(consumerId int) {
   619  
   620  									// Connect
   621  									nc, js := jsClientConnectURL(b, connectURL)
   622  									defer nc.Close()
   623  
   624  									// Signal completion of work
   625  									defer wgCompleted.Done()
   626  
   627  									rng := rand.New(rand.NewSource(int64(seed + consumerId)))
   628  
   629  									// Ready, wait for everyone else
   630  									wgReady.Done()
   631  									wgReady.Wait()
   632  
   633  									completed := 0
   634  									for opsCount.Add(-1) >= 0 {
   635  										subscribeConsumeUnsubscribe(js, rng)
   636  										completed += 1
   637  									}
   638  									if verbose {
   639  										b.Logf("Consumer thread %d completed %d of %d operations", consumerId, completed, b.N)
   640  									}
   641  								}(i)
   642  							}
   643  
   644  							// Wait for all consumers to be ready
   645  							wgReady.Done()
   646  							wgReady.Wait()
   647  
   648  							// Start measuring time
   649  							b.ResetTimer()
   650  
   651  							// Wait for consumers to have chewed through b.N operations
   652  							wgCompleted.Wait()
   653  							b.StopTimer()
   654  
   655  							// Throughput is not very important in this benchmark since each operation includes
   656  							// subscribe, unsubscribe and retrieves just a few bytes
   657  							//b.SetBytes(int64(messageSize * messagesPerIteration))
   658  						},
   659  					)
   660  				}
   661  			},
   662  		)
   663  	}
   664  }
   665  
   666  func BenchmarkJetStreamPublish(b *testing.B) {
   667  
   668  	const (
   669  		verbose    = false
   670  		seed       = 12345
   671  		streamName = "S"
   672  	)
   673  
   674  	runSyncPublisher := func(b *testing.B, js nats.JetStreamContext, messageSize int, subjects []string) (int, int) {
   675  		published, errors := 0, 0
   676  		message := make([]byte, messageSize)
   677  		rand.New(rand.NewSource(int64(seed))).Read(message)
   678  
   679  		b.ResetTimer()
   680  
   681  		for i := 1; i <= b.N; i++ {
   682  			fastRandomMutation(message, 10)
   683  			subject := subjects[fastrand.Uint32n(uint32(len(subjects)))]
   684  			_, pubErr := js.Publish(subject, message)
   685  			if pubErr != nil {
   686  				errors++
   687  			} else {
   688  				published++
   689  			}
   690  
   691  			if verbose && i%1000 == 0 {
   692  				b.Logf("Published %d/%d, %d errors", i, b.N, errors)
   693  			}
   694  		}
   695  
   696  		b.StopTimer()
   697  
   698  		return published, errors
   699  	}
   700  
   701  	runAsyncPublisher := func(b *testing.B, js nats.JetStreamContext, messageSize int, subjects []string, asyncWindow int) (int, int) {
   702  		const publishCompleteMaxWait = 30 * time.Second
   703  		rng := rand.New(rand.NewSource(int64(seed)))
   704  		message := make([]byte, messageSize)
   705  		rng.Read(message)
   706  
   707  		published, errors := 0, 0
   708  
   709  		b.ResetTimer()
   710  
   711  		for published < b.N {
   712  
   713  			// Normally publish a full batch (of size `asyncWindow`)
   714  			publishBatchSize := asyncWindow
   715  			// Unless fewer are left to complete the benchmark
   716  			if b.N-published < asyncWindow {
   717  				publishBatchSize = b.N - published
   718  			}
   719  
   720  			pending := make([]nats.PubAckFuture, 0, publishBatchSize)
   721  
   722  			for i := 0; i < publishBatchSize; i++ {
   723  				fastRandomMutation(message, 10)
   724  				subject := subjects[rng.Intn(len(subjects))]
   725  				pubAckFuture, err := js.PublishAsync(subject, message)
   726  				if err != nil {
   727  					errors++
   728  					continue
   729  				}
   730  				pending = append(pending, pubAckFuture)
   731  			}
   732  
   733  			// All in this batch published, wait for completed
   734  			select {
   735  			case <-js.PublishAsyncComplete():
   736  			case <-time.After(publishCompleteMaxWait):
   737  				b.Fatalf("Publish timed out")
   738  			}
   739  
   740  			// Verify one by one if they were published successfully
   741  			for _, pubAckFuture := range pending {
   742  				select {
   743  				case <-pubAckFuture.Ok():
   744  					published++
   745  				case <-pubAckFuture.Err():
   746  					errors++
   747  				default:
   748  					b.Fatalf("PubAck is still pending after publish completed")
   749  				}
   750  			}
   751  
   752  			if verbose {
   753  				b.Logf("Published %d/%d", published, b.N)
   754  			}
   755  		}
   756  
   757  		b.StopTimer()
   758  
   759  		return published, errors
   760  	}
   761  
   762  	type PublishType string
   763  	const (
   764  		Sync  PublishType = "Sync"
   765  		Async PublishType = "Async"
   766  	)
   767  
   768  	benchmarksCases := []struct {
   769  		clusterSize int
   770  		replicas    int
   771  		messageSize int
   772  		numSubjects int
   773  		minMessages int
   774  	}{
   775  		{1, 1, 10, 1, 100_000}, // Single node, 10B messages, ~1MB minimum
   776  		{1, 1, 1024, 1, 1_000}, // Single node, 1KB messages, ~1MB minimum
   777  		{3, 3, 10, 1, 100_000}, // 3-nodes cluster, R=3, 10B messages, ~1MB minimum
   778  		{3, 3, 1024, 1, 1_000}, // 3-nodes cluster, R=3, 10B messages, ~1MB minimum
   779  	}
   780  
   781  	// All the cases above are run with each of the publisher cases below
   782  	publisherCases := []struct {
   783  		pType       PublishType
   784  		asyncWindow int
   785  	}{
   786  		{Sync, -1},
   787  		{Async, 1000},
   788  		{Async, 4000},
   789  		{Async, 8000},
   790  	}
   791  
   792  	for _, bc := range benchmarksCases {
   793  		name := fmt.Sprintf(
   794  			"N=%d,R=%d,MsgSz=%db,Subjs=%d",
   795  			bc.clusterSize,
   796  			bc.replicas,
   797  			bc.messageSize,
   798  			bc.numSubjects,
   799  		)
   800  
   801  		b.Run(
   802  			name,
   803  			func(b *testing.B) {
   804  
   805  				for _, pc := range publisherCases {
   806  					name := fmt.Sprintf("%v", pc.pType)
   807  					if pc.pType == Async && pc.asyncWindow > 0 {
   808  						name = fmt.Sprintf("%s[W:%d]", name, pc.asyncWindow)
   809  					}
   810  
   811  					b.Run(
   812  						name,
   813  						func(b *testing.B) {
   814  
   815  							subjects := make([]string, bc.numSubjects)
   816  							for i := 0; i < bc.numSubjects; i++ {
   817  								subjects[i] = fmt.Sprintf("s-%d", i+1)
   818  							}
   819  
   820  							if verbose {
   821  								b.Logf("Running %s with %d ops", name, b.N)
   822  							}
   823  
   824  							if verbose {
   825  								b.Logf("Setting up %d nodes", bc.clusterSize)
   826  							}
   827  
   828  							cl, _, shutdown, nc, _ := startJSClusterAndConnect(b, bc.clusterSize)
   829  							defer shutdown()
   830  							defer nc.Close()
   831  
   832  							jsOpts := []nats.JSOpt{
   833  								nats.MaxWait(10 * time.Second),
   834  							}
   835  
   836  							if pc.asyncWindow > 0 && pc.pType == Async {
   837  								jsOpts = append(jsOpts, nats.PublishAsyncMaxPending(pc.asyncWindow))
   838  							}
   839  
   840  							js, err := nc.JetStream(jsOpts...)
   841  							if err != nil {
   842  								b.Fatalf("Unexpected error getting JetStream context: %v", err)
   843  							}
   844  
   845  							if verbose {
   846  								b.Logf("Creating stream with R=%d and %d input subjects", bc.replicas, bc.numSubjects)
   847  							}
   848  							streamConfig := &nats.StreamConfig{
   849  								Name:     streamName,
   850  								Subjects: subjects,
   851  								Replicas: bc.replicas,
   852  							}
   853  							if _, err := js.AddStream(streamConfig); err != nil {
   854  								b.Fatalf("Error creating stream: %v", err)
   855  							}
   856  
   857  							// If replicated resource, connect to stream leader for lower variability
   858  							if bc.replicas > 1 {
   859  								connectURL := cl.streamLeader("$G", streamName).ClientURL()
   860  								nc.Close()
   861  								nc, err = nats.Connect(connectURL)
   862  								if err != nil {
   863  									b.Fatalf("Failed to create client connection to stream leader: %v", err)
   864  								}
   865  								defer nc.Close()
   866  								js, err = nc.JetStream(jsOpts...)
   867  								if err != nil {
   868  									b.Fatalf("Unexpected error getting JetStream context for stream leader: %v", err)
   869  								}
   870  							}
   871  
   872  							if verbose {
   873  								b.Logf("Running %v publisher with message size: %dB", pc.pType, bc.messageSize)
   874  							}
   875  
   876  							b.SetBytes(int64(bc.messageSize))
   877  
   878  							// Benchmark starts here
   879  							b.ResetTimer()
   880  
   881  							var published, errors int
   882  							switch pc.pType {
   883  							case Sync:
   884  								published, errors = runSyncPublisher(b, js, bc.messageSize, subjects)
   885  							case Async:
   886  								published, errors = runAsyncPublisher(b, js, bc.messageSize, subjects, pc.asyncWindow)
   887  							}
   888  
   889  							// Benchmark ends here
   890  							b.StopTimer()
   891  
   892  							if published+errors != b.N {
   893  								b.Fatalf("Something doesn't add up: %d + %d != %d", published, errors, b.N)
   894  							}
   895  
   896  							b.ReportMetric(float64(errors)*100/float64(b.N), "%error")
   897  						},
   898  					)
   899  				}
   900  			},
   901  		)
   902  	}
   903  }
   904  
   905  func BenchmarkJetStreamInterestStreamWithLimit(b *testing.B) {
   906  
   907  	const (
   908  		verbose          = true
   909  		seed             = 12345
   910  		publishBatchSize = 100
   911  		messageSize      = 256
   912  		numSubjects      = 2500
   913  		subjectPrefix    = "S"
   914  		numPublishers    = 4
   915  		randomData       = true
   916  		warmupMessages   = 1
   917  	)
   918  
   919  	if verbose {
   920  		b.Logf(
   921  			"BatchSize: %d, MsgSize: %d, Subjects: %d, Publishers: %d, Random Message: %v",
   922  			publishBatchSize,
   923  			messageSize,
   924  			numSubjects,
   925  			numPublishers,
   926  			randomData,
   927  		)
   928  	}
   929  
   930  	// Benchmark parameters: sub-benchmarks are executed for every combination of the following 3 groups
   931  	// Unless a more restrictive filter is specified, e.g.:
   932  	// BenchmarkJetStreamInterestStreamWithLimit/.*R=3.*/Storage=Memory/unlimited
   933  
   934  	// Parameter: Number of nodes and number of stream replicas
   935  	clusterAndReplicasCases := []struct {
   936  		clusterSize int
   937  		replicas    int
   938  	}{
   939  		{1, 1}, // Single node, R=1
   940  		{3, 3}, // 3-nodes cluster, R=3
   941  	}
   942  
   943  	// Parameter: Stream storage type
   944  	storageTypeCases := []nats.StorageType{
   945  		nats.MemoryStorage,
   946  		nats.FileStorage,
   947  	}
   948  
   949  	// Parameter: Stream limit configuration
   950  	limitConfigCases := map[string]func(*nats.StreamConfig){
   951  		"unlimited": func(config *nats.StreamConfig) {
   952  		},
   953  		"MaxMsg=1000": func(config *nats.StreamConfig) {
   954  			config.MaxMsgs = 100
   955  		},
   956  		"MaxMsg=10": func(config *nats.StreamConfig) {
   957  			config.MaxMsgs = 10
   958  		},
   959  		"MaxPerSubject=10": func(config *nats.StreamConfig) {
   960  			config.MaxMsgsPerSubject = 10
   961  		},
   962  		"MaxAge=1s": func(config *nats.StreamConfig) {
   963  			config.MaxAge = 1 * time.Second
   964  		},
   965  		"MaxBytes=1MB": func(config *nats.StreamConfig) {
   966  			config.MaxBytes = 1024 * 1024
   967  		},
   968  	}
   969  
   970  	// Context shared by publishers routines
   971  	type PublishersContext = struct {
   972  		readyWg      sync.WaitGroup
   973  		completedWg  sync.WaitGroup
   974  		messagesLeft int
   975  		lock         sync.Mutex
   976  		errors       int
   977  	}
   978  
   979  	// Helper: Publish synchronously as Goroutine
   980  	publish := func(publisherId int, ctx *PublishersContext, js nats.JetStreamContext) {
   981  		defer ctx.completedWg.Done()
   982  		errors := 0
   983  		messageBuf := make([]byte, messageSize)
   984  		rand.New(rand.NewSource(int64(seed + publisherId))).Read(messageBuf)
   985  
   986  		// Warm up: publish a few messages
   987  		for i := 0; i < warmupMessages; i++ {
   988  			subject := fmt.Sprintf("%s.%d", subjectPrefix, fastrand.Uint32n(numSubjects))
   989  			if randomData {
   990  				fastRandomMutation(messageBuf, 10)
   991  			}
   992  			_, err := js.Publish(subject, messageBuf)
   993  			if err != nil {
   994  				b.Logf("Warning: failed to publish warmup message: %s", err)
   995  			}
   996  		}
   997  
   998  		// Signal this publisher is ready
   999  		ctx.readyWg.Done()
  1000  
  1001  		for {
  1002  			// Obtain a batch of messages to publish
  1003  			batchSize := 0
  1004  			{
  1005  				ctx.lock.Lock()
  1006  				if ctx.messagesLeft >= publishBatchSize {
  1007  					batchSize = publishBatchSize
  1008  				} else if ctx.messagesLeft < publishBatchSize {
  1009  					batchSize = ctx.messagesLeft
  1010  				}
  1011  				ctx.messagesLeft -= batchSize
  1012  				ctx.lock.Unlock()
  1013  			}
  1014  
  1015  			// Nothing left to publish, terminate
  1016  			if batchSize == 0 {
  1017  				ctx.lock.Lock()
  1018  				ctx.errors += errors
  1019  				ctx.lock.Unlock()
  1020  				return
  1021  			}
  1022  
  1023  			// Publish a batch of messages
  1024  			for i := 0; i < batchSize; i++ {
  1025  				subject := fmt.Sprintf("%s.%d", subjectPrefix, fastrand.Uint32n(numSubjects))
  1026  				if randomData {
  1027  					fastRandomMutation(messageBuf, 10)
  1028  				}
  1029  				_, err := js.Publish(subject, messageBuf)
  1030  				if err != nil {
  1031  					errors += 1
  1032  				}
  1033  			}
  1034  		}
  1035  	}
  1036  
  1037  	// Benchmark matrix: (cluster and replicas) * (storage type) * (stream limit)
  1038  	for _, benchmarkCase := range clusterAndReplicasCases {
  1039  		b.Run(
  1040  			fmt.Sprintf(
  1041  				"N=%d,R=%d",
  1042  				benchmarkCase.clusterSize,
  1043  				benchmarkCase.replicas,
  1044  			),
  1045  			func(b *testing.B) {
  1046  				for _, storageType := range storageTypeCases {
  1047  					b.Run(
  1048  						fmt.Sprintf("Storage=%v", storageType),
  1049  						func(b *testing.B) {
  1050  
  1051  							for limitDescription, limitConfigFunc := range limitConfigCases {
  1052  								b.Run(
  1053  									limitDescription,
  1054  									func(b *testing.B) {
  1055  
  1056  										// Print benchmark parameters
  1057  										if verbose {
  1058  											b.Logf(
  1059  												"Stream: %+v, Storage: [%v] Limit: [%s], Ops: %d",
  1060  												benchmarkCase,
  1061  												storageType,
  1062  												limitDescription,
  1063  												b.N,
  1064  											)
  1065  										}
  1066  
  1067  										// Setup server or cluster
  1068  										cl, ls, shutdown, nc, js := startJSClusterAndConnect(b, benchmarkCase.clusterSize)
  1069  										defer shutdown()
  1070  										defer nc.Close()
  1071  
  1072  										// Common stream configuration
  1073  										streamConfig := &nats.StreamConfig{
  1074  											Name:      "S",
  1075  											Subjects:  []string{fmt.Sprintf("%s.>", subjectPrefix)},
  1076  											Replicas:  benchmarkCase.replicas,
  1077  											Storage:   storageType,
  1078  											Discard:   DiscardOld,
  1079  											Retention: DiscardOld,
  1080  										}
  1081  										// Configure stream limit
  1082  										limitConfigFunc(streamConfig)
  1083  
  1084  										// Create stream
  1085  										if _, err := js.AddStream(streamConfig); err != nil {
  1086  											b.Fatalf("Error creating stream: %v", err)
  1087  										}
  1088  
  1089  										// Set up publishers shared context
  1090  										var pubCtx PublishersContext
  1091  										pubCtx.readyWg.Add(numPublishers)
  1092  										pubCtx.completedWg.Add(numPublishers)
  1093  
  1094  										// Hold this lock until all publishers are ready
  1095  										pubCtx.lock.Lock()
  1096  										pubCtx.messagesLeft = b.N
  1097  
  1098  										connectURL := ls.ClientURL()
  1099  										// If replicated resource, connect to stream leader for lower variability
  1100  										if benchmarkCase.replicas > 1 {
  1101  											connectURL = cl.streamLeader("$G", "S").ClientURL()
  1102  										}
  1103  
  1104  										// Spawn publishers routines, each with its own connection and JS context
  1105  										for i := 0; i < numPublishers; i++ {
  1106  											nc, err := nats.Connect(connectURL)
  1107  											if err != nil {
  1108  												b.Fatal(err)
  1109  											}
  1110  											defer nc.Close()
  1111  											js, err := nc.JetStream()
  1112  											if err != nil {
  1113  												b.Fatal(err)
  1114  											}
  1115  											go publish(i, &pubCtx, js)
  1116  										}
  1117  
  1118  										// Wait for all publishers to be ready
  1119  										pubCtx.readyWg.Wait()
  1120  
  1121  										// Set size of each operation, for throughput calculation
  1122  										b.SetBytes(messageSize)
  1123  
  1124  										// Benchmark starts here
  1125  										b.ResetTimer()
  1126  
  1127  										// Unblock the publishers
  1128  										pubCtx.lock.Unlock()
  1129  
  1130  										// Wait for all publishers to complete
  1131  										pubCtx.completedWg.Wait()
  1132  
  1133  										// Benchmark ends here
  1134  										b.StopTimer()
  1135  
  1136  										// Sanity check, publishers may have died before completing
  1137  										if pubCtx.messagesLeft != 0 {
  1138  											b.Fatalf("Some messages left: %d", pubCtx.messagesLeft)
  1139  										}
  1140  
  1141  										b.ReportMetric(float64(pubCtx.errors)*100/float64(b.N), "%error")
  1142  									},
  1143  								)
  1144  							}
  1145  						},
  1146  					)
  1147  				}
  1148  			},
  1149  		)
  1150  	}
  1151  }
  1152  
  1153  func BenchmarkJetStreamKV(b *testing.B) {
  1154  
  1155  	const (
  1156  		verbose   = false
  1157  		kvName    = "BUCKET"
  1158  		keyPrefix = "K_"
  1159  		seed      = 12345
  1160  	)
  1161  
  1162  	runKVGet := func(b *testing.B, kv nats.KeyValue, keys []string) int {
  1163  		rng := rand.New(rand.NewSource(int64(seed)))
  1164  		errors := 0
  1165  
  1166  		b.ResetTimer()
  1167  
  1168  		for i := 1; i <= b.N; i++ {
  1169  			key := keys[rng.Intn(len(keys))]
  1170  			_, err := kv.Get(key)
  1171  			if err != nil {
  1172  				errors++
  1173  				continue
  1174  			}
  1175  
  1176  			if verbose && i%1000 == 0 {
  1177  				b.Logf("Completed %d/%d Get ops", i, b.N)
  1178  			}
  1179  		}
  1180  
  1181  		b.StopTimer()
  1182  		return errors
  1183  	}
  1184  
  1185  	runKVPut := func(b *testing.B, kv nats.KeyValue, keys []string, valueSize int) int {
  1186  
  1187  		value := make([]byte, valueSize)
  1188  		rand.New(rand.NewSource(int64(seed))).Read(value)
  1189  		errors := 0
  1190  
  1191  		b.ResetTimer()
  1192  
  1193  		for i := 1; i <= b.N; i++ {
  1194  			key := keys[fastrand.Uint32n(uint32(len(keys)))]
  1195  			fastRandomMutation(value, 10)
  1196  			_, err := kv.Put(key, value)
  1197  			if err != nil {
  1198  				errors++
  1199  				continue
  1200  			}
  1201  
  1202  			if verbose && i%1000 == 0 {
  1203  				b.Logf("Completed %d/%d Put ops", i, b.N)
  1204  			}
  1205  		}
  1206  
  1207  		b.StopTimer()
  1208  		return errors
  1209  	}
  1210  
  1211  	runKVUpdate := func(b *testing.B, kv nats.KeyValue, keys []string, valueSize int) int {
  1212  		value := make([]byte, valueSize)
  1213  		rand.New(rand.NewSource(int64(seed))).Read(value)
  1214  		errors := 0
  1215  
  1216  		b.ResetTimer()
  1217  
  1218  		for i := 1; i <= b.N; i++ {
  1219  			key := keys[fastrand.Uint32n(uint32(len(keys)))]
  1220  
  1221  			kve, getErr := kv.Get(key)
  1222  			if getErr != nil {
  1223  				errors++
  1224  				continue
  1225  			}
  1226  
  1227  			fastRandomMutation(value, 10)
  1228  			_, updateErr := kv.Update(key, value, kve.Revision())
  1229  			if updateErr != nil {
  1230  				errors++
  1231  				continue
  1232  			}
  1233  
  1234  			if verbose && i%1000 == 0 {
  1235  				b.Logf("Completed %d/%d Update ops", i, b.N)
  1236  			}
  1237  		}
  1238  
  1239  		b.StopTimer()
  1240  		return errors
  1241  	}
  1242  
  1243  	type WorkloadType string
  1244  	const (
  1245  		Get    WorkloadType = "GET"
  1246  		Put    WorkloadType = "PUT"
  1247  		Update WorkloadType = "CAS"
  1248  	)
  1249  
  1250  	benchmarksCases := []struct {
  1251  		clusterSize int
  1252  		replicas    int
  1253  		numKeys     int
  1254  		valueSize   int
  1255  	}{
  1256  		{1, 1, 100, 100},   // 1 node with 100 keys, 100B values
  1257  		{1, 1, 1000, 100},  // 1 node with 1000 keys, 100B values
  1258  		{3, 3, 100, 100},   // 3 nodes with 100 keys, 100B values
  1259  		{3, 3, 1000, 100},  // 3 nodes with 1000 keys, 100B values
  1260  		{3, 3, 1000, 1024}, // 3 nodes with 1000 keys, 1KB values
  1261  	}
  1262  
  1263  	workloadCases := []WorkloadType{
  1264  		Get,
  1265  		Put,
  1266  		Update,
  1267  	}
  1268  
  1269  	for _, bc := range benchmarksCases {
  1270  
  1271  		bName := fmt.Sprintf(
  1272  			"N=%d,R=%d,B=1,K=%d,ValSz=%db",
  1273  			bc.clusterSize,
  1274  			bc.replicas,
  1275  			bc.numKeys,
  1276  			bc.valueSize,
  1277  		)
  1278  
  1279  		b.Run(
  1280  			bName,
  1281  			func(b *testing.B) {
  1282  				for _, wc := range workloadCases {
  1283  					wName := fmt.Sprintf("%v", wc)
  1284  					b.Run(
  1285  						wName,
  1286  						func(b *testing.B) {
  1287  
  1288  							if verbose {
  1289  								b.Logf("Running %s workload %s with %d messages", wName, bName, b.N)
  1290  							}
  1291  
  1292  							if verbose {
  1293  								b.Logf("Setting up %d nodes", bc.clusterSize)
  1294  							}
  1295  
  1296  							// Pre-generate all keys
  1297  							keys := make([]string, 0, bc.numKeys)
  1298  							for i := 1; i <= bc.numKeys; i++ {
  1299  								key := fmt.Sprintf("%s%d", keyPrefix, i)
  1300  								keys = append(keys, key)
  1301  							}
  1302  
  1303  							// Setup server or cluster
  1304  							cl, _, shutdown, nc, js := startJSClusterAndConnect(b, bc.clusterSize)
  1305  							defer shutdown()
  1306  							defer nc.Close()
  1307  
  1308  							// Create bucket
  1309  							if verbose {
  1310  								b.Logf("Creating KV %s with R=%d", kvName, bc.replicas)
  1311  							}
  1312  							kvConfig := &nats.KeyValueConfig{
  1313  								Bucket:   kvName,
  1314  								Replicas: bc.replicas,
  1315  							}
  1316  							kv, err := js.CreateKeyValue(kvConfig)
  1317  							if err != nil {
  1318  								b.Fatalf("Error creating KV: %v", err)
  1319  							}
  1320  
  1321  							// Initialize all keys
  1322  							rng := rand.New(rand.NewSource(int64(seed)))
  1323  							value := make([]byte, bc.valueSize)
  1324  							for _, key := range keys {
  1325  								rng.Read(value)
  1326  								_, err := kv.Create(key, value)
  1327  								if err != nil {
  1328  									b.Fatalf("Failed to initialize %s/%s: %v", kvName, key, err)
  1329  								}
  1330  							}
  1331  
  1332  							// If replicated resource, connect to stream leader for lower variability
  1333  							if bc.replicas > 1 {
  1334  								nc.Close()
  1335  								connectURL := cl.streamLeader("$G", fmt.Sprintf("KV_%s", kvName)).ClientURL()
  1336  								nc, js = jsClientConnectURL(b, connectURL)
  1337  								defer nc.Close()
  1338  							}
  1339  
  1340  							kv, err = js.KeyValue(kv.Bucket())
  1341  							if err != nil {
  1342  								b.Fatalf("Error binding to KV: %v", err)
  1343  							}
  1344  
  1345  							// Set size of each operation, for throughput calculation
  1346  							b.SetBytes(int64(bc.valueSize))
  1347  
  1348  							// Discard time spent during setup
  1349  							// May reset again further in
  1350  							b.ResetTimer()
  1351  
  1352  							var errors int
  1353  
  1354  							switch wc {
  1355  							case Get:
  1356  								errors = runKVGet(b, kv, keys)
  1357  							case Put:
  1358  								errors = runKVPut(b, kv, keys, bc.valueSize)
  1359  							case Update:
  1360  								errors = runKVUpdate(b, kv, keys, bc.valueSize)
  1361  							default:
  1362  								b.Fatalf("Unknown workload type: %v", wc)
  1363  							}
  1364  
  1365  							// Benchmark ends here, (may have stopped earlier)
  1366  							b.StopTimer()
  1367  
  1368  							b.ReportMetric(float64(errors)*100/float64(b.N), "%error")
  1369  						},
  1370  					)
  1371  				}
  1372  			},
  1373  		)
  1374  	}
  1375  }
  1376  
  1377  func BenchmarkJetStreamObjStore(b *testing.B) {
  1378  	const (
  1379  		verbose      = false
  1380  		objStoreName = "B"
  1381  		keyPrefix    = "K_"
  1382  		seed         = 12345
  1383  		initKeys     = true
  1384  
  1385  		// read/write ratios
  1386  		ReadOnly  = 1.0
  1387  		WriteOnly = 0.0
  1388  	)
  1389  
  1390  	// rwRatio to string
  1391  	rwRatioToString := func(rwRatio float64) string {
  1392  		switch rwRatio {
  1393  		case ReadOnly:
  1394  			return "readOnly"
  1395  		case WriteOnly:
  1396  			return "writeOnly"
  1397  		default:
  1398  			return fmt.Sprintf("%0.1f", rwRatio)
  1399  		}
  1400  	}
  1401  
  1402  	// benchmark for object store by performing read/write operations with data of random size
  1403  	RunObjStoreBenchmark := func(b *testing.B, objStore nats.ObjectStore, minObjSz int, maxObjSz int, numKeys int, rwRatio float64) (int, int, int) {
  1404  		var (
  1405  			errors int
  1406  			reads  int
  1407  			writes int
  1408  		)
  1409  
  1410  		dataBuf := make([]byte, maxObjSz)
  1411  		rng := rand.New(rand.NewSource(int64(seed)))
  1412  		rng.Read(dataBuf)
  1413  
  1414  		// Each operation is processing a random amount of bytes within a size range which
  1415  		// will be either read from or written to an object store bucket. However, here we are
  1416  		// approximating the size of the processed data with a simple average of the range.
  1417  		b.SetBytes(int64((minObjSz + maxObjSz) / 2))
  1418  
  1419  		for i := 1; i <= b.N; i++ {
  1420  			key := fmt.Sprintf("%s_%d", keyPrefix, rng.Intn(numKeys))
  1421  			var err error
  1422  
  1423  			rwOp := rng.Float64()
  1424  			switch {
  1425  			case rwOp <= rwRatio:
  1426  				// Read Op
  1427  				_, err = objStore.GetBytes(key)
  1428  				reads++
  1429  			case rwOp > rwRatio:
  1430  				// Write Op
  1431  				// dataSz is a random value between min-max object size and cannot be less than 1 byte
  1432  				dataSz := rng.Intn(maxObjSz-minObjSz+1) + minObjSz
  1433  				data := dataBuf[:dataSz]
  1434  				fastRandomMutation(data, 10)
  1435  				_, err = objStore.PutBytes(key, data)
  1436  				writes++
  1437  			}
  1438  			if err != nil {
  1439  				errors++
  1440  			}
  1441  
  1442  			if verbose && i%1000 == 0 {
  1443  				b.Logf("Completed: %d reads, %d writes, %d errors. %d/%d total operations have been completed.", reads, writes, errors, i, b.N)
  1444  			}
  1445  		}
  1446  		return errors, reads, writes
  1447  	}
  1448  
  1449  	// benchmark cases table
  1450  	benchmarkCases := []struct {
  1451  		storage  nats.StorageType
  1452  		numKeys  int
  1453  		minObjSz int
  1454  		maxObjSz int
  1455  	}{
  1456  		{nats.MemoryStorage, 100, 1024, 102400},     // mem storage, 100 objects sized (1KB-100KB)
  1457  		{nats.MemoryStorage, 100, 102400, 1048576},  // mem storage, 100 objects sized (100KB-1MB)
  1458  		{nats.MemoryStorage, 1000, 10240, 102400},   // mem storage, 1k objects of various size (10KB - 100KB)
  1459  		{nats.FileStorage, 100, 1024, 102400},       // file storage, 100 objects sized (1KB-100KB)
  1460  		{nats.FileStorage, 1000, 10240, 1048576},    // file storage, 1k objects of various size (10KB - 1MB)
  1461  		{nats.FileStorage, 100, 102400, 1048576},    // file storage, 100 objects sized (100KB-1MB)
  1462  		{nats.FileStorage, 100, 1048576, 10485760},  // file storage, 100 objects sized (1MB-10MB)
  1463  		{nats.FileStorage, 10, 10485760, 104857600}, // file storage, 10 objects sized (10MB-100MB)
  1464  	}
  1465  
  1466  	var (
  1467  		clusterSizeCases = []int{1, 3}
  1468  		rwRatioCases     = []float64{ReadOnly, WriteOnly, 0.8}
  1469  	)
  1470  
  1471  	// Test with either single node or 3 node cluster
  1472  	for _, clusterSize := range clusterSizeCases {
  1473  		replicas := clusterSize
  1474  		cName := fmt.Sprintf("N=%d,R=%d", clusterSize, replicas)
  1475  		b.Run(
  1476  			cName,
  1477  			func(b *testing.B) {
  1478  				for _, rwRatio := range rwRatioCases {
  1479  					rName := fmt.Sprintf("workload=%s", rwRatioToString(rwRatio))
  1480  					b.Run(
  1481  						rName,
  1482  						func(b *testing.B) {
  1483  							// Test all tabled benchmark cases
  1484  							for _, bc := range benchmarkCases {
  1485  								bName := fmt.Sprintf("K=%d,storage=%s,minObjSz=%db,maxObjSz=%db", bc.numKeys, bc.storage, bc.minObjSz, bc.maxObjSz)
  1486  								b.Run(
  1487  									bName,
  1488  									func(b *testing.B) {
  1489  
  1490  										// Test setup
  1491  										rng := rand.New(rand.NewSource(int64(seed)))
  1492  
  1493  										if verbose {
  1494  											b.Logf("Setting up %d nodes", replicas)
  1495  										}
  1496  
  1497  										// Setup server or cluster
  1498  										cl, _, shutdown, nc, js := startJSClusterAndConnect(b, clusterSize)
  1499  										defer shutdown()
  1500  										defer nc.Close()
  1501  
  1502  										// Initialize object store
  1503  										if verbose {
  1504  											b.Logf("Creating ObjectStore %s with R=%d", objStoreName, replicas)
  1505  										}
  1506  										objStoreConfig := &nats.ObjectStoreConfig{
  1507  											Bucket:   objStoreName,
  1508  											Replicas: replicas,
  1509  											Storage:  bc.storage,
  1510  										}
  1511  										objStore, err := js.CreateObjectStore(objStoreConfig)
  1512  										if err != nil {
  1513  											b.Fatalf("Error creating ObjectStore: %v", err)
  1514  										}
  1515  
  1516  										// If replicated resource, connect to stream leader for lower variability
  1517  										if clusterSize > 1 {
  1518  											nc.Close()
  1519  											connectURL := cl.streamLeader("$G", fmt.Sprintf("OBJ_%s", objStoreName)).ClientURL()
  1520  											nc, js := jsClientConnectURL(b, connectURL)
  1521  											defer nc.Close()
  1522  											objStore, err = js.ObjectStore(objStoreName)
  1523  											if err != nil {
  1524  												b.Fatalf("Error binding to ObjectStore: %v", err)
  1525  											}
  1526  										}
  1527  
  1528  										// Initialize keys
  1529  										if initKeys {
  1530  											for n := 0; n < bc.numKeys; n++ {
  1531  												key := fmt.Sprintf("%s_%d", keyPrefix, n)
  1532  												dataSz := rng.Intn(bc.maxObjSz-bc.minObjSz+1) + bc.minObjSz
  1533  												value := make([]byte, dataSz)
  1534  												rng.Read(value)
  1535  												_, err := objStore.PutBytes(key, value)
  1536  												if err != nil {
  1537  													b.Fatalf("Failed to initialize %s/%s: %v", objStoreName, key, err)
  1538  												}
  1539  											}
  1540  										}
  1541  
  1542  										b.ResetTimer()
  1543  
  1544  										// Run benchmark
  1545  										errors, reads, writes := RunObjStoreBenchmark(b, objStore, bc.minObjSz, bc.maxObjSz, bc.numKeys, rwRatio)
  1546  
  1547  										// Report metrics
  1548  										b.ReportMetric(float64(errors)*100/float64(b.N), "%error")
  1549  										b.ReportMetric(float64(reads), "reads")
  1550  										b.ReportMetric(float64(writes), "writes")
  1551  
  1552  									},
  1553  								)
  1554  							}
  1555  						},
  1556  					)
  1557  				}
  1558  			},
  1559  		)
  1560  	}
  1561  }
  1562  
  1563  func BenchmarkJetStreamPublishConcurrent(b *testing.B) {
  1564  	const (
  1565  		subject    = "test-subject"
  1566  		streamName = "test-stream"
  1567  	)
  1568  
  1569  	type BenchPublisher struct {
  1570  		// nats connection for this publisher
  1571  		conn *nats.Conn
  1572  		// jetstream context
  1573  		js nats.JetStreamContext
  1574  		// message buffer
  1575  		messageData []byte
  1576  		// number of publish calls
  1577  		publishCalls int
  1578  		// number of publish errors
  1579  		publishErrors int
  1580  	}
  1581  
  1582  	messageSizeCases := []int64{
  1583  		10,     // 10B
  1584  		1024,   // 1KiB
  1585  		102400, // 100KiB
  1586  	}
  1587  	numPubsCases := []int{
  1588  		12,
  1589  	}
  1590  
  1591  	replicasCases := []struct {
  1592  		clusterSize int
  1593  		replicas    int
  1594  	}{
  1595  		{1, 1},
  1596  		{3, 3},
  1597  	}
  1598  
  1599  	workload := func(b *testing.B, numPubs int, messageSize int64, clientUrl string) {
  1600  
  1601  		// create N publishers
  1602  		publishers := make([]BenchPublisher, numPubs)
  1603  		for i := range publishers {
  1604  			// create publisher connection and jetstream context
  1605  			ncPub, err := nats.Connect(clientUrl)
  1606  			if err != nil {
  1607  				b.Fatal(err)
  1608  			}
  1609  			defer ncPub.Close()
  1610  			jsPub, err := ncPub.JetStream()
  1611  			if err != nil {
  1612  				b.Fatal(err)
  1613  			}
  1614  
  1615  			// initialize publisher
  1616  			publishers[i] = BenchPublisher{
  1617  				conn:          ncPub,
  1618  				js:            jsPub,
  1619  				messageData:   make([]byte, messageSize),
  1620  				publishCalls:  0,
  1621  				publishErrors: 0,
  1622  			}
  1623  			rand.New(rand.NewSource(int64(i))).Read(publishers[i].messageData)
  1624  		}
  1625  
  1626  		// waits for all publishers sub-routines and for main thread to be ready
  1627  		var workloadReadyWg sync.WaitGroup
  1628  		workloadReadyWg.Add(1 + numPubs)
  1629  
  1630  		// wait group blocks main thread until publish workload is completed, it is decremented after stream receives b.N messages from all publishers
  1631  		var benchCompleteWg sync.WaitGroup
  1632  		benchCompleteWg.Add(1)
  1633  
  1634  		// wait group to ensure all publishers have been torn down
  1635  		var finishedPublishersWg sync.WaitGroup
  1636  		finishedPublishersWg.Add(numPubs)
  1637  
  1638  		// start go routines for all publishers, wait till all publishers are initialized before starting publish workload
  1639  		for i := range publishers {
  1640  
  1641  			go func(pubId int) {
  1642  				// signal that this publisher has been torn down
  1643  				defer finishedPublishersWg.Done()
  1644  
  1645  				// publisher sub-routine is ready
  1646  				workloadReadyWg.Done()
  1647  
  1648  				// start workload when main thread and all other publishers are ready
  1649  				workloadReadyWg.Wait()
  1650  
  1651  				// publish until stream receives b.N messages
  1652  				for {
  1653  					// random bytes as payload
  1654  					fastRandomMutation(publishers[pubId].messageData, 10)
  1655  					// attempt to publish message
  1656  					pubAck, err := publishers[pubId].js.Publish(subject, publishers[pubId].messageData)
  1657  					publishers[pubId].publishCalls += 1
  1658  					if err != nil {
  1659  						publishers[pubId].publishErrors += 1
  1660  						continue
  1661  					}
  1662  					// all messages have been published to stream
  1663  					if pubAck.Sequence == uint64(b.N) {
  1664  						benchCompleteWg.Done()
  1665  					}
  1666  					// a publisher has already published b.N messages, stop publishing
  1667  					if pubAck.Sequence >= uint64(b.N) {
  1668  						return
  1669  					}
  1670  				}
  1671  			}(i)
  1672  		}
  1673  
  1674  		// set bytes per operation
  1675  		b.SetBytes(messageSize)
  1676  
  1677  		// main thread is ready
  1678  		workloadReadyWg.Done()
  1679  		// start the clock
  1680  		b.ResetTimer()
  1681  
  1682  		// wait till termination cond reached
  1683  		benchCompleteWg.Wait()
  1684  		// stop the clock
  1685  		b.StopTimer()
  1686  
  1687  		// wait for all publishers to shutdown
  1688  		finishedPublishersWg.Wait()
  1689  
  1690  		// sum up publish calls and errors
  1691  		publishCalls := 0
  1692  		publishErrors := 0
  1693  		for _, pub := range publishers {
  1694  			publishCalls += pub.publishCalls
  1695  			publishErrors += pub.publishErrors
  1696  		}
  1697  
  1698  		// report error rate
  1699  		errorRate := 100 * float64(publishErrors) / float64(publishCalls)
  1700  		b.ReportMetric(errorRate, "%error")
  1701  	}
  1702  
  1703  	// benchmark case matrix
  1704  	for _, replicasCase := range replicasCases {
  1705  		b.Run(
  1706  			fmt.Sprintf("N=%d,R=%d", replicasCase.clusterSize, replicasCase.replicas),
  1707  			func(b *testing.B) {
  1708  				for _, messageSize := range messageSizeCases {
  1709  					b.Run(
  1710  						fmt.Sprintf("msgSz=%db", messageSize),
  1711  						func(b *testing.B) {
  1712  							for _, numPubs := range numPubsCases {
  1713  								b.Run(
  1714  									fmt.Sprintf("pubs=%d", numPubs),
  1715  									func(b *testing.B) {
  1716  
  1717  										// start jetstream cluster
  1718  										cl, ls, shutdown, nc, js := startJSClusterAndConnect(b, replicasCase.clusterSize)
  1719  										defer shutdown()
  1720  										defer nc.Close()
  1721  										clientUrl := ls.ClientURL()
  1722  
  1723  										// create stream
  1724  										_, err := js.AddStream(&nats.StreamConfig{
  1725  											Name:     streamName,
  1726  											Subjects: []string{subject},
  1727  											Replicas: replicasCase.replicas,
  1728  										})
  1729  										if err != nil {
  1730  											b.Fatal(err)
  1731  										}
  1732  										defer js.DeleteStream(streamName)
  1733  
  1734  										// If replicated resource, connect to stream leader for lower variability
  1735  										if replicasCase.replicas > 1 {
  1736  											nc.Close()
  1737  											clientUrl = cl.streamLeader("$G", streamName).ClientURL()
  1738  											nc, _ = jsClientConnectURL(b, clientUrl)
  1739  											defer nc.Close()
  1740  										}
  1741  
  1742  										// run workload
  1743  										workload(b, numPubs, messageSize, clientUrl)
  1744  									},
  1745  								)
  1746  							}
  1747  						})
  1748  				}
  1749  			})
  1750  	}
  1751  }
  1752  
  1753  // Helper function to stand up a JS-enabled single server or cluster
  1754  func startJSClusterAndConnect(b *testing.B, clusterSize int) (c *cluster, s *Server, shutdown func(), nc *nats.Conn, js nats.JetStreamContext) {
  1755  	b.Helper()
  1756  	var err error
  1757  
  1758  	if clusterSize == 1 {
  1759  		s = RunBasicJetStreamServer(b)
  1760  		shutdown = func() {
  1761  			s.Shutdown()
  1762  		}
  1763  	} else {
  1764  		c = createJetStreamClusterExplicit(b, "BENCH_PUB", clusterSize)
  1765  		c.waitOnClusterReadyWithNumPeers(clusterSize)
  1766  		c.waitOnLeader()
  1767  		s = c.leader()
  1768  		shutdown = func() {
  1769  			c.shutdown()
  1770  		}
  1771  	}
  1772  
  1773  	nc, err = nats.Connect(s.ClientURL())
  1774  	if err != nil {
  1775  		b.Fatalf("failed to connect: %s", err)
  1776  	}
  1777  
  1778  	js, err = nc.JetStream()
  1779  	if err != nil {
  1780  		b.Fatalf("failed to init jetstream: %s", err)
  1781  	}
  1782  
  1783  	return c, s, shutdown, nc, js
  1784  }