github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/core_benchmarks_test.go (about)

     1  // Copyright 2023 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package server
    15  
    16  import (
    17  	"crypto/tls"
    18  	"errors"
    19  	"fmt"
    20  	"math"
    21  	"math/rand"
    22  	"os"
    23  	"strconv"
    24  	"sync"
    25  	"testing"
    26  	"time"
    27  
    28  	"github.com/nats-io/nats-server/v2/internal/fastrand"
    29  	"github.com/nats-io/nats.go"
    30  )
    31  
    32  func BenchmarkCoreRequestReply(b *testing.B) {
    33  	const (
    34  		subject = "test-subject"
    35  	)
    36  
    37  	messageSizes := []int64{
    38  		1024,   // 1kb
    39  		4096,   // 4kb
    40  		40960,  // 40kb
    41  		409600, // 400kb
    42  	}
    43  
    44  	for _, messageSize := range messageSizes {
    45  		b.Run(fmt.Sprintf("msgSz=%db", messageSize), func(b *testing.B) {
    46  
    47  			// Start server
    48  			serverOpts := DefaultOptions()
    49  			server := RunServer(serverOpts)
    50  			defer server.Shutdown()
    51  
    52  			clientUrl := server.ClientURL()
    53  
    54  			// Create "echo" subscriber
    55  			ncSub, err := nats.Connect(clientUrl)
    56  			if err != nil {
    57  				b.Fatal(err)
    58  			}
    59  			defer ncSub.Close()
    60  			sub, err := ncSub.Subscribe(subject, func(msg *nats.Msg) {
    61  				// Responder echoes the request payload as-is
    62  				msg.Respond(msg.Data)
    63  			})
    64  			defer sub.Unsubscribe()
    65  			if err != nil {
    66  				b.Fatal(err)
    67  			}
    68  
    69  			// Create publisher
    70  			ncPub, err := nats.Connect(clientUrl)
    71  			if err != nil {
    72  				b.Fatal(err)
    73  			}
    74  			defer ncPub.Close()
    75  
    76  			var errors = 0
    77  
    78  			// Create message
    79  			messageData := make([]byte, messageSize)
    80  			rand.New(rand.NewSource(12345)).Read(messageData)
    81  
    82  			b.SetBytes(messageSize)
    83  
    84  			// Benchmark
    85  			b.ResetTimer()
    86  			for i := 0; i < b.N; i++ {
    87  				fastRandomMutation(messageData, 10)
    88  
    89  				_, err := ncPub.Request(subject, messageData, time.Second)
    90  				if err != nil {
    91  					errors++
    92  				}
    93  			}
    94  			b.StopTimer()
    95  
    96  			b.ReportMetric(float64(errors), "errors")
    97  		})
    98  	}
    99  }
   100  
   101  func BenchmarkCoreTLSFanOut(b *testing.B) {
   102  	const (
   103  		subject            = "test-subject"
   104  		configsBasePath    = "./configs/tls"
   105  		maxPendingMessages = 25
   106  		maxPendingBytes    = 15 * 1024 * 1024 // 15MiB
   107  	)
   108  
   109  	keyTypeCases := []string{
   110  		"none",
   111  		"ed25519",
   112  		"rsa-1024",
   113  		"rsa-2048",
   114  		"rsa-4096",
   115  	}
   116  	messageSizeCases := []int64{
   117  		512 * 1024, // 512Kib
   118  	}
   119  	numSubsCases := []int{
   120  		5,
   121  	}
   122  
   123  	// Custom error handler that ignores ErrSlowConsumer.
   124  	// Lots of them are expected in this benchmark which indiscriminately publishes at a rate higher
   125  	// than what the server can relay to subscribers.
   126  	ignoreSlowConsumerErrorHandler := func(conn *nats.Conn, s *nats.Subscription, err error) {
   127  		if errors.Is(err, nats.ErrSlowConsumer) {
   128  			// Swallow this error
   129  		} else {
   130  			_, _ = fmt.Fprintf(os.Stderr, "Warning: %s\n", err)
   131  		}
   132  	}
   133  
   134  	for _, keyType := range keyTypeCases {
   135  
   136  		b.Run(
   137  			fmt.Sprintf("keyType=%s", keyType),
   138  			func(b *testing.B) {
   139  
   140  				for _, messageSize := range messageSizeCases {
   141  					b.Run(
   142  						fmt.Sprintf("msgSz=%db", messageSize),
   143  						func(b *testing.B) {
   144  
   145  							for _, numSubs := range numSubsCases {
   146  								b.Run(
   147  									fmt.Sprintf("subs=%d", numSubs),
   148  									func(b *testing.B) {
   149  										// Start server
   150  										configPath := fmt.Sprintf("%s/tls-%s.conf", configsBasePath, keyType)
   151  										server, _ := RunServerWithConfig(configPath)
   152  										defer server.Shutdown()
   153  
   154  										opts := []nats.Option{
   155  											nats.MaxReconnects(-1),
   156  											nats.ReconnectWait(0),
   157  											nats.ErrorHandler(ignoreSlowConsumerErrorHandler),
   158  										}
   159  
   160  										if keyType != "none" {
   161  											opts = append(opts, nats.Secure(&tls.Config{
   162  												InsecureSkipVerify: true,
   163  											}))
   164  										}
   165  
   166  										clientUrl := server.ClientURL()
   167  
   168  										// Count of messages received for by each subscriber
   169  										counters := make([]int, numSubs)
   170  
   171  										// Wait group for subscribers to signal they received b.N messages
   172  										var wg sync.WaitGroup
   173  										wg.Add(numSubs)
   174  
   175  										// Create subscribers
   176  										for i := 0; i < numSubs; i++ {
   177  											subIndex := i
   178  											ncSub, err := nats.Connect(clientUrl, opts...)
   179  											if err != nil {
   180  												b.Fatal(err)
   181  											}
   182  											defer ncSub.Close()
   183  											sub, err := ncSub.Subscribe(subject, func(msg *nats.Msg) {
   184  												counters[subIndex] += 1
   185  												if counters[subIndex] == b.N {
   186  													wg.Done()
   187  												}
   188  											})
   189  											if err != nil {
   190  												b.Fatalf("failed to subscribe: %s", err)
   191  											}
   192  											err = sub.SetPendingLimits(maxPendingMessages, maxPendingBytes)
   193  											if err != nil {
   194  												b.Fatalf("failed to set pending limits: %s", err)
   195  											}
   196  											defer sub.Unsubscribe()
   197  											if err != nil {
   198  												b.Fatal(err)
   199  											}
   200  										}
   201  
   202  										// publisher
   203  										ncPub, err := nats.Connect(clientUrl, opts...)
   204  										if err != nil {
   205  											b.Fatal(err)
   206  										}
   207  										defer ncPub.Close()
   208  
   209  										var errorCount = 0
   210  
   211  										// random bytes as payload
   212  										messageData := make([]byte, messageSize)
   213  										rand.New(rand.NewSource(12345)).Read(messageData)
   214  
   215  										quitCh := make(chan bool, 1)
   216  
   217  										publish := func() {
   218  											for {
   219  												select {
   220  												case <-quitCh:
   221  													return
   222  												default:
   223  													// continue publishing
   224  												}
   225  
   226  												fastRandomMutation(messageData, 10)
   227  												err := ncPub.Publish(subject, messageData)
   228  												if err != nil {
   229  													errorCount += 1
   230  												}
   231  											}
   232  										}
   233  
   234  										// Set bytes per operation
   235  										b.SetBytes(messageSize)
   236  										// Start the clock
   237  										b.ResetTimer()
   238  										// Start publishing as fast as the server allows
   239  										go publish()
   240  										// Wait for all subscribers to have delivered b.N messages
   241  										wg.Wait()
   242  										// Stop the clock
   243  										b.StopTimer()
   244  
   245  										// Stop publisher
   246  										quitCh <- true
   247  
   248  										b.ReportMetric(float64(errorCount), "errors")
   249  									},
   250  								)
   251  							}
   252  						},
   253  					)
   254  				}
   255  			},
   256  		)
   257  	}
   258  }
   259  
   260  func BenchmarkCoreFanOut(b *testing.B) {
   261  	const (
   262  		subject            = "test-subject"
   263  		maxPendingMessages = 25
   264  		maxPendingBytes    = 15 * 1024 * 1024 // 15MiB
   265  	)
   266  
   267  	messageSizeCases := []int64{
   268  		100,        // 100B
   269  		1024,       // 1KiB
   270  		10240,      // 10KiB
   271  		512 * 1024, // 512KiB
   272  	}
   273  	numSubsCases := []int{
   274  		3,
   275  		5,
   276  		10,
   277  	}
   278  
   279  	// Custom error handler that ignores ErrSlowConsumer.
   280  	// Lots of them are expected in this benchmark which indiscriminately publishes at a rate higher
   281  	// than what the server can relay to subscribers.
   282  	ignoreSlowConsumerErrorHandler := func(_ *nats.Conn, _ *nats.Subscription, err error) {
   283  		if errors.Is(err, nats.ErrSlowConsumer) {
   284  			// Swallow this error
   285  		} else {
   286  			_, _ = fmt.Fprintf(os.Stderr, "Warning: %s\n", err)
   287  		}
   288  	}
   289  
   290  	for _, messageSize := range messageSizeCases {
   291  		b.Run(
   292  			fmt.Sprintf("msgSz=%db", messageSize),
   293  			func(b *testing.B) {
   294  				for _, numSubs := range numSubsCases {
   295  					b.Run(
   296  						fmt.Sprintf("subs=%d", numSubs),
   297  						func(b *testing.B) {
   298  							// Start server
   299  							defaultOpts := DefaultOptions()
   300  							server := RunServer(defaultOpts)
   301  							defer server.Shutdown()
   302  
   303  							opts := []nats.Option{
   304  								nats.MaxReconnects(-1),
   305  								nats.ReconnectWait(0),
   306  								nats.ErrorHandler(ignoreSlowConsumerErrorHandler),
   307  							}
   308  
   309  							clientUrl := server.ClientURL()
   310  
   311  							// Count of messages received for by each subscriber
   312  							counters := make([]int, numSubs)
   313  
   314  							// Wait group for subscribers to signal they received b.N messages
   315  							var wg sync.WaitGroup
   316  							wg.Add(numSubs)
   317  
   318  							// Create subscribers
   319  							for i := 0; i < numSubs; i++ {
   320  								subIndex := i
   321  								ncSub, err := nats.Connect(clientUrl, opts...)
   322  								if err != nil {
   323  									b.Fatal(err)
   324  								}
   325  								defer ncSub.Close()
   326  								sub, err := ncSub.Subscribe(subject, func(_ *nats.Msg) {
   327  									counters[subIndex] += 1
   328  									if counters[subIndex] == b.N {
   329  										wg.Done()
   330  									}
   331  								})
   332  								if err != nil {
   333  									b.Fatalf("failed to subscribe: %s", err)
   334  								}
   335  								err = sub.SetPendingLimits(maxPendingMessages, maxPendingBytes)
   336  								if err != nil {
   337  									b.Fatalf("failed to set pending limits: %s", err)
   338  								}
   339  								defer sub.Unsubscribe()
   340  							}
   341  
   342  							// publisher
   343  							ncPub, err := nats.Connect(clientUrl, opts...)
   344  							if err != nil {
   345  								b.Fatal(err)
   346  							}
   347  							defer ncPub.Close()
   348  
   349  							var errorCount = 0
   350  
   351  							// random bytes as payload
   352  							messageData := make([]byte, messageSize)
   353  							rand.New(rand.NewSource(123456)).Read(messageData)
   354  
   355  							quitCh := make(chan bool, 1)
   356  
   357  							publish := func() {
   358  								for {
   359  									select {
   360  									case <-quitCh:
   361  										return
   362  									default:
   363  										// continue publishing
   364  									}
   365  
   366  									fastRandomMutation(messageData, 10)
   367  									err := ncPub.Publish(subject, messageData)
   368  									if err != nil {
   369  										errorCount += 1
   370  									}
   371  								}
   372  							}
   373  
   374  							// Set bytes per operation
   375  							b.SetBytes(messageSize)
   376  							// Start the clock
   377  							b.ResetTimer()
   378  							// Start publishing as fast as the server allows
   379  							go publish()
   380  							// Wait for all subscribers to have delivered b.N messages
   381  							wg.Wait()
   382  							// Stop the clock
   383  							b.StopTimer()
   384  
   385  							// Stop publisher
   386  							quitCh <- true
   387  
   388  							b.ReportMetric(100*float64(errorCount)/float64(b.N), "%error")
   389  						},
   390  					)
   391  				}
   392  			},
   393  		)
   394  	}
   395  }
   396  
   397  func BenchmarkCoreFanIn(b *testing.B) {
   398  
   399  	type BenchPublisher struct {
   400  		// nats connection for this publisher
   401  		conn *nats.Conn
   402  		// number of publishing errors encountered
   403  		publishErrors int
   404  		// number of messages published
   405  		publishCounter int
   406  		// quit channel which will terminate publishing
   407  		quitCh chan bool
   408  		// message data buffer
   409  		messageData []byte
   410  	}
   411  
   412  	const subjectBaseName = "test-subject"
   413  
   414  	messageSizeCases := []int64{
   415  		100,        // 100B
   416  		1024,       // 1KiB
   417  		10240,      // 10KiB
   418  		512 * 1024, // 512KiB
   419  	}
   420  	numPubsCases := []int{
   421  		3,
   422  		5,
   423  		10,
   424  	}
   425  
   426  	// Custom error handler that ignores ErrSlowConsumer.
   427  	// Lots of them are expected in this benchmark which indiscriminately publishes at a rate higher
   428  	// than what the server can relay to subscribers.
   429  	ignoreSlowConsumerErrorHandler := func(_ *nats.Conn, _ *nats.Subscription, err error) {
   430  		if errors.Is(err, nats.ErrSlowConsumer) {
   431  			// Swallow this error
   432  		} else {
   433  			_, _ = fmt.Fprintf(os.Stderr, "Warning: %s\n", err)
   434  		}
   435  	}
   436  
   437  	workload := func(b *testing.B, clientUrl string, numPubs int, messageSize int64) {
   438  
   439  		// connection options
   440  		opts := []nats.Option{
   441  			nats.MaxReconnects(-1),
   442  			nats.ReconnectWait(0),
   443  			nats.ErrorHandler(ignoreSlowConsumerErrorHandler),
   444  		}
   445  
   446  		// waits for all publishers sub-routines and for main thread to be ready
   447  		var publishersReadyWg sync.WaitGroup
   448  		publishersReadyWg.Add(numPubs + 1)
   449  
   450  		// wait group to ensure all publishers have been torn down
   451  		var finishedPublishersWg sync.WaitGroup
   452  		finishedPublishersWg.Add(numPubs)
   453  
   454  		publishers := make([]BenchPublisher, numPubs)
   455  		// create N publishers
   456  		for i := range publishers {
   457  			// create publisher connection
   458  			ncPub, err := nats.Connect(clientUrl, opts...)
   459  			if err != nil {
   460  				b.Fatal(err)
   461  			}
   462  			defer ncPub.Close()
   463  
   464  			// create bench publisher object
   465  			publisher := BenchPublisher{
   466  				conn:           ncPub,
   467  				publishErrors:  0,
   468  				publishCounter: 0,
   469  				quitCh:         make(chan bool, 1),
   470  				messageData:    make([]byte, messageSize),
   471  			}
   472  			rand.New(rand.NewSource(int64(i))).Read(publisher.messageData)
   473  			publishers[i] = publisher
   474  		}
   475  
   476  		// total number of publishers that have published b.N to the subscriber successfully
   477  		completedPublishersCount := 0
   478  
   479  		// wait group blocks main thread until publish workload is completed, it is decremented after subscriber receives b.N messages from all publishers
   480  		var benchCompleteWg sync.WaitGroup
   481  		benchCompleteWg.Add(1)
   482  
   483  		// start subscriber
   484  		ncSub, err := nats.Connect(clientUrl, opts...)
   485  		if err != nil {
   486  			b.Fatal(err)
   487  		}
   488  		defer ncSub.Close()
   489  
   490  		// subscriber
   491  		ncSub.Subscribe(fmt.Sprintf("%s.*", subjectBaseName), func(msg *nats.Msg) {
   492  			// get the publisher id from subject
   493  			pubIdx, err := strconv.Atoi(msg.Subject[len(subjectBaseName)+1:])
   494  			if err != nil {
   495  				b.Fatal(err)
   496  			}
   497  
   498  			// message successfully received from publisher
   499  			publishers[pubIdx].publishCounter += 1
   500  
   501  			// subscriber has received a total of b.N messages from this publisher
   502  			if publishers[pubIdx].publishCounter == b.N {
   503  				completedPublishersCount++
   504  				// every publisher has successfully sent b.N messages to subscriber
   505  				if completedPublishersCount == numPubs {
   506  					benchCompleteWg.Done()
   507  				}
   508  			}
   509  		})
   510  
   511  		// start publisher sub-routines
   512  		for i := range publishers {
   513  			go func(pubId int) {
   514  
   515  				// publisher sub-routine initialized
   516  				publishersReadyWg.Done()
   517  
   518  				publisher := publishers[pubId]
   519  				subject := fmt.Sprintf("%s.%d", subjectBaseName, pubId)
   520  
   521  				// signal that this publisher has been torn down
   522  				defer finishedPublishersWg.Done()
   523  
   524  				// wait till all other publishers are ready to start workload
   525  				publishersReadyWg.Wait()
   526  
   527  				// publish until quitCh is closed
   528  				for {
   529  					select {
   530  					case <-publisher.quitCh:
   531  						return
   532  					default:
   533  						// continue publishing
   534  					}
   535  					fastRandomMutation(publisher.messageData, 10)
   536  					err := publisher.conn.Publish(subject, publisher.messageData)
   537  					if err != nil {
   538  						publisher.publishErrors += 1
   539  					}
   540  				}
   541  			}(i)
   542  		}
   543  
   544  		// set bytes per operation
   545  		b.SetBytes(messageSize)
   546  		// main thread is ready
   547  		publishersReadyWg.Done()
   548  		// wait till publishers are ready
   549  		publishersReadyWg.Wait()
   550  
   551  		// start the clock
   552  		b.ResetTimer()
   553  		// wait till termination cond reached
   554  		benchCompleteWg.Wait()
   555  		// stop the clock
   556  		b.StopTimer()
   557  
   558  		// send quit signal to all publishers
   559  		for i := range publishers {
   560  			publishers[i].quitCh <- true
   561  		}
   562  		// wait for all publishers to shutdown
   563  		finishedPublishersWg.Wait()
   564  
   565  		// sum errors from all publishers
   566  		totalErrors := 0
   567  		for _, publisher := range publishers {
   568  			totalErrors += publisher.publishErrors
   569  		}
   570  		// sum total messages sent from all publishers
   571  		totalMessages := 0
   572  		for _, publisher := range publishers {
   573  			totalMessages += publisher.publishCounter
   574  		}
   575  		errorRate := 100 * float64(totalErrors) / float64(totalMessages)
   576  
   577  		// report error rate
   578  		b.ReportMetric(errorRate, "%error")
   579  
   580  	}
   581  
   582  	// benchmark case matrix
   583  	for _, messageSize := range messageSizeCases {
   584  		b.Run(
   585  			fmt.Sprintf("msgSz=%db", messageSize),
   586  			func(b *testing.B) {
   587  				for _, numPubs := range numPubsCases {
   588  					b.Run(
   589  						fmt.Sprintf("pubs=%d", numPubs),
   590  						func(b *testing.B) {
   591  							// start server
   592  							defaultOpts := DefaultOptions()
   593  							server := RunServer(defaultOpts)
   594  							defer server.Shutdown()
   595  
   596  							// get connection string
   597  							clientUrl := server.ClientURL()
   598  
   599  							// run fan-in workload
   600  							workload(b, clientUrl, numPubs, messageSize)
   601  						})
   602  				}
   603  			})
   604  	}
   605  }
   606  
   607  // fastRandomMutation performs a minor in-place mutation to the given buffer.
   608  // This is useful in benchmark to avoid sending the same payload every time (which could result in some optimizations
   609  // we do not want to measure), while not slowing down the benchmark with a full payload generated for each operation.
   610  func fastRandomMutation(data []byte, mutations int) {
   611  	for i := 0; i < mutations; i++ {
   612  		data[fastrand.Uint32n(uint32(len(data)))] = byte(fastrand.Uint32() % math.MaxUint8)
   613  	}
   614  }