github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/jetstream_benchmark_test.go (about) 1 // Copyright 2023 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 //go:build !skip_js_tests && !skip_js_cluster_tests && !skip_js_cluster_tests_2 15 // +build !skip_js_tests,!skip_js_cluster_tests,!skip_js_cluster_tests_2 16 17 package server 18 19 import ( 20 "fmt" 21 "math/rand" 22 "sync" 23 "sync/atomic" 24 "testing" 25 "time" 26 27 "github.com/nats-io/nats-server/v2/internal/fastrand" 28 "github.com/nats-io/nats.go" 29 ) 30 31 func BenchmarkJetStreamConsume(b *testing.B) { 32 33 const ( 34 verbose = false 35 streamName = "S" 36 subject = "s" 37 seed = 12345 38 publishTimeout = 30 * time.Second 39 PublishBatchSize = 10000 40 ) 41 42 runSyncPushConsumer := func(b *testing.B, js nats.JetStreamContext, streamName string) (int, int, int) { 43 const nextMsgTimeout = 3 * time.Second 44 45 subOpts := []nats.SubOpt{ 46 nats.BindStream(streamName), 47 } 48 sub, err := js.SubscribeSync(_EMPTY_, subOpts...) 49 if err != nil { 50 b.Fatalf("Failed to subscribe: %v", err) 51 } 52 defer sub.Unsubscribe() 53 54 bitset := NewBitset(uint64(b.N)) 55 uniqueConsumed, duplicates, errors := 0, 0, 0 56 57 b.ResetTimer() 58 59 for uniqueConsumed < b.N { 60 msg, err := sub.NextMsg(nextMsgTimeout) 61 if err != nil { 62 b.Fatalf("No more messages (received: %d/%d)", uniqueConsumed, b.N) 63 } 64 65 metadata, mdErr := msg.Metadata() 66 if mdErr != nil { 67 errors++ 68 continue 69 } 70 71 ackErr := msg.Ack() 72 if ackErr != nil { 73 errors++ 74 continue 75 } 76 77 seq := metadata.Sequence.Stream 78 79 index := seq - 1 80 if bitset.get(index) { 81 duplicates++ 82 continue 83 } 84 85 uniqueConsumed++ 86 bitset.set(index, true) 87 88 if verbose && uniqueConsumed%1000 == 0 { 89 b.Logf("Consumed: %d/%d", bitset.count(), b.N) 90 } 91 } 92 93 b.StopTimer() 94 95 return uniqueConsumed, duplicates, errors 96 } 97 98 runAsyncPushConsumer := func(b *testing.B, js nats.JetStreamContext, streamName string, ordered, durable bool) (int, int, int) { 99 const timeout = 3 * time.Minute 100 bitset := NewBitset(uint64(b.N)) 101 doneCh := make(chan bool, 1) 102 uniqueConsumed, duplicates, errors := 0, 0, 0 103 104 handleMsg := func(msg *nats.Msg) { 105 metadata, mdErr := msg.Metadata() 106 if mdErr != nil { 107 // fmt.Printf("Metadata error: %v\n", mdErr) 108 errors++ 109 return 110 } 111 112 // Ordered defaults to AckNone policy, don't try to ACK 113 if !ordered { 114 ackErr := msg.Ack() 115 if ackErr != nil { 116 // fmt.Printf("Ack error: %v\n", ackErr) 117 errors++ 118 return 119 } 120 } 121 122 seq := metadata.Sequence.Stream 123 124 index := seq - 1 125 if bitset.get(index) { 126 duplicates++ 127 return 128 } 129 130 uniqueConsumed++ 131 bitset.set(index, true) 132 133 if uniqueConsumed == b.N { 134 msg.Sub.Unsubscribe() 135 doneCh <- true 136 } 137 if verbose && uniqueConsumed%1000 == 0 { 138 b.Logf("Consumed %d/%d", uniqueConsumed, b.N) 139 } 140 } 141 142 subOpts := []nats.SubOpt{ 143 nats.BindStream(streamName), 144 } 145 146 if ordered { 147 subOpts = append(subOpts, nats.OrderedConsumer()) 148 } 149 150 if durable { 151 subOpts = append(subOpts, nats.Durable("c")) 152 } 153 154 sub, err := js.Subscribe(_EMPTY_, handleMsg, subOpts...) 155 if err != nil { 156 b.Fatalf("Failed to subscribe: %v", err) 157 } 158 defer sub.Unsubscribe() 159 160 b.ResetTimer() 161 162 select { 163 case <-doneCh: 164 b.StopTimer() 165 case <-time.After(timeout): 166 b.Fatalf("Timeout, %d/%d received, %d errors", uniqueConsumed, b.N, errors) 167 } 168 169 return uniqueConsumed, duplicates, errors 170 } 171 172 runPullConsumer := func(b *testing.B, js nats.JetStreamContext, streamName string, durable bool) (int, int, int) { 173 const fetchMaxWait = nats.MaxWait(3 * time.Second) 174 const fetchMaxMessages = 1000 175 176 bitset := NewBitset(uint64(b.N)) 177 uniqueConsumed, duplicates, errors := 0, 0, 0 178 179 subOpts := []nats.SubOpt{ 180 nats.BindStream(streamName), 181 } 182 183 consumerName := _EMPTY_ // Default ephemeral 184 if durable { 185 consumerName = "c" // Durable 186 } 187 188 sub, err := js.PullSubscribe("", consumerName, subOpts...) 189 if err != nil { 190 b.Fatalf("Failed to subscribe: %v", err) 191 } 192 defer sub.Unsubscribe() 193 194 b.ResetTimer() 195 196 fetchLoop: 197 for { 198 msgs, err := sub.Fetch(fetchMaxMessages, fetchMaxWait) 199 if err != nil { 200 b.Fatalf("Failed to fetch: %v", err) 201 } 202 203 processMsgsLoop: 204 for _, msg := range msgs { 205 metadata, mdErr := msg.Metadata() 206 if mdErr != nil { 207 errors++ 208 continue processMsgsLoop 209 } 210 211 ackErr := msg.Ack() 212 if ackErr != nil { 213 errors++ 214 continue processMsgsLoop 215 } 216 217 seq := metadata.Sequence.Stream 218 219 index := seq - 1 220 if bitset.get(index) { 221 duplicates++ 222 continue processMsgsLoop 223 } 224 225 uniqueConsumed++ 226 bitset.set(index, true) 227 228 if uniqueConsumed == b.N { 229 msg.Sub.Unsubscribe() 230 break fetchLoop 231 } 232 233 if verbose && uniqueConsumed%1000 == 0 { 234 b.Logf("Consumed %d/%d", uniqueConsumed, b.N) 235 } 236 } 237 } 238 239 b.StopTimer() 240 241 return uniqueConsumed, duplicates, errors 242 } 243 244 type ConsumerType string 245 const ( 246 PushSync ConsumerType = "PUSH[Sync,Ephemeral]" 247 PushAsync ConsumerType = "PUSH[Async,Ephemeral]" 248 PushAsyncOrdered ConsumerType = "PUSH[Async,Ordered]" 249 PushAsyncDurable ConsumerType = "PUSH[Async,Durable]" 250 PullDurable ConsumerType = "PULL[Durable]" 251 PullEphemeral ConsumerType = "PULL[Ephemeral]" 252 ) 253 254 benchmarksCases := []struct { 255 clusterSize int 256 replicas int 257 messageSize int 258 minMessages int 259 }{ 260 {1, 1, 10, 100_000}, // Single node, 10B messages, ~1MiB minimum 261 {1, 1, 1024, 1_000}, // Single node, 1KB messages, ~1MiB minimum 262 {3, 3, 10, 100_000}, // Cluster, R3, 10B messages, ~1MiB minimum 263 {3, 3, 1024, 1_000}, // Cluster, R3, 1KB messages, ~1MiB minimum 264 } 265 266 //Each of the cases above is run with each of the consumer types 267 consumerTypes := []ConsumerType{ 268 PushSync, 269 PushAsync, 270 PushAsyncOrdered, 271 PushAsyncDurable, 272 PullDurable, 273 PullEphemeral, 274 } 275 276 for _, bc := range benchmarksCases { 277 278 name := fmt.Sprintf( 279 "N=%d,R=%d,MsgSz=%db", 280 bc.clusterSize, 281 bc.replicas, 282 bc.messageSize, 283 ) 284 285 b.Run( 286 name, 287 func(b *testing.B) { 288 289 for _, ct := range consumerTypes { 290 name := fmt.Sprintf( 291 "%v", 292 ct, 293 ) 294 b.Run( 295 name, 296 func(b *testing.B) { 297 // Skip short runs, benchmark gets re-executed with a larger N 298 if b.N < bc.minMessages { 299 b.ResetTimer() 300 return 301 } 302 303 if verbose { 304 b.Logf("Running %s with %d messages", name, b.N) 305 } 306 307 if verbose { 308 b.Logf("Setting up %d nodes", bc.clusterSize) 309 } 310 311 cl, _, shutdown, nc, js := startJSClusterAndConnect(b, bc.clusterSize) 312 defer shutdown() 313 defer nc.Close() 314 315 if verbose { 316 b.Logf("Creating stream with R=%d", bc.replicas) 317 } 318 streamConfig := &nats.StreamConfig{ 319 Name: streamName, 320 Subjects: []string{subject}, 321 Replicas: bc.replicas, 322 } 323 if _, err := js.AddStream(streamConfig); err != nil { 324 b.Fatalf("Error creating stream: %v", err) 325 } 326 327 // If replicated resource, connect to stream leader for lower variability 328 if bc.replicas > 1 { 329 connectURL := cl.streamLeader("$G", streamName).ClientURL() 330 nc.Close() 331 _, js = jsClientConnectURL(b, connectURL) 332 } 333 334 message := make([]byte, bc.messageSize) 335 rand.New(rand.NewSource(int64(seed))).Read(message) 336 337 // Publish b.N messages to the stream (in batches) 338 for i := 1; i <= b.N; i++ { 339 fastRandomMutation(message, 10) 340 _, err := js.PublishAsync(subject, message) 341 if err != nil { 342 b.Fatalf("Failed to publish: %s", err) 343 } 344 // Limit outstanding published messages to PublishBatchSize 345 if i%PublishBatchSize == 0 || i == b.N { 346 select { 347 case <-js.PublishAsyncComplete(): 348 if verbose { 349 b.Logf("Published %d/%d messages", i, b.N) 350 } 351 case <-time.After(publishTimeout): 352 b.Fatalf("Publish timed out") 353 } 354 } 355 } 356 357 // Set size of each operation, for throughput calculation 358 b.SetBytes(int64(bc.messageSize)) 359 360 // Discard time spent during setup 361 // Consumer may reset again further in 362 b.ResetTimer() 363 364 var consumed, duplicates, errors int 365 366 const ( 367 ordered = true 368 unordered = false 369 durable = true 370 ephemeral = false 371 ) 372 373 switch ct { 374 case PushSync: 375 consumed, duplicates, errors = runSyncPushConsumer(b, js, streamName) 376 case PushAsync: 377 consumed, duplicates, errors = runAsyncPushConsumer(b, js, streamName, unordered, ephemeral) 378 case PushAsyncOrdered: 379 consumed, duplicates, errors = runAsyncPushConsumer(b, js, streamName, ordered, ephemeral) 380 case PushAsyncDurable: 381 consumed, duplicates, errors = runAsyncPushConsumer(b, js, streamName, unordered, durable) 382 case PullDurable: 383 consumed, duplicates, errors = runPullConsumer(b, js, streamName, durable) 384 case PullEphemeral: 385 consumed, duplicates, errors = runPullConsumer(b, js, streamName, ephemeral) 386 default: 387 b.Fatalf("Unknown consumer type: %v", ct) 388 } 389 390 // Benchmark ends here, (consumer may have stopped earlier) 391 b.StopTimer() 392 393 if consumed != b.N { 394 b.Fatalf("Something doesn't add up: %d != %d", consumed, b.N) 395 } 396 397 b.ReportMetric(float64(duplicates)*100/float64(b.N), "%dupe") 398 b.ReportMetric(float64(errors)*100/float64(b.N), "%error") 399 }, 400 ) 401 } 402 }, 403 ) 404 } 405 } 406 407 func BenchmarkJetStreamConsumeWithFilters(b *testing.B) { 408 const ( 409 verbose = false 410 streamName = "S" 411 subjectPrefix = "s" 412 seed = 123456 413 messageSize = 32 414 consumerReplicas = 1 415 domainNameLength = 36 // Length of domain portion of subject, must be an even number 416 publishBatchSize = 1000 417 publishTimeout = 10 * time.Second 418 ) 419 420 clusterSizeCases := []struct { 421 clusterSize int // Single node or cluster 422 replicas int // Stream replicas 423 storage nats.StorageType // Stream storage 424 }{ 425 {1, 1, nats.MemoryStorage}, 426 {3, 3, nats.MemoryStorage}, 427 } 428 429 benchmarksCases := []struct { 430 domains int // Number of distinct domains 431 subjectsPerDomain int // Number of distinct subjects within each domain 432 filters int // Number of filters (<prefix>.<domain>.>) per consumer 433 concurrentConsumers int // Number of consumer running 434 435 }{ 436 {100, 10, 5, 12}, 437 {1000, 10, 25, 12}, 438 {10_000, 10, 50, 12}, 439 } 440 441 for _, cs := range clusterSizeCases { 442 name := fmt.Sprintf( 443 "N=%d,R=%d,storage=%s", 444 cs.clusterSize, 445 cs.replicas, 446 cs.storage.String(), 447 ) 448 b.Run( 449 name, 450 func(b *testing.B) { 451 452 for _, bc := range benchmarksCases { 453 454 name := fmt.Sprintf( 455 "D=%d,DS=%d,F=%d,C=%d", 456 bc.domains, 457 bc.subjectsPerDomain, 458 bc.filters, 459 bc.concurrentConsumers, 460 ) 461 462 b.Run( 463 name, 464 func(b *testing.B) { 465 466 cl, s, shutdown, nc, js := startJSClusterAndConnect(b, cs.clusterSize) 467 defer shutdown() 468 defer nc.Close() 469 470 if verbose { 471 b.Logf("Creating stream with R=%d", cs.replicas) 472 } 473 streamConfig := &nats.StreamConfig{ 474 Name: streamName, 475 Subjects: []string{subjectPrefix + ".>"}, 476 Storage: cs.storage, 477 Retention: nats.LimitsPolicy, 478 MaxAge: time.Hour, 479 Duplicates: 10 * time.Second, 480 Discard: nats.DiscardOld, 481 NoAck: false, 482 MaxMsgs: -1, 483 MaxBytes: -1, 484 MaxConsumers: -1, 485 Replicas: 1, 486 MaxMsgsPerSubject: 1, 487 } 488 if _, err := js.AddStream(streamConfig); err != nil { 489 b.Fatalf("Error creating stream: %v", err) 490 } 491 492 // If replicated resource, connect to stream leader for lower variability 493 connectURL := s.ClientURL() 494 if cs.replicas > 1 { 495 connectURL = cl.streamLeader("$G", streamName).ClientURL() 496 nc.Close() 497 _, js = jsClientConnectURL(b, connectURL) 498 } 499 500 rng := rand.New(rand.NewSource(int64(seed))) 501 message := make([]byte, messageSize) 502 domain := make([]byte, domainNameLength/2) 503 504 domains := make([]string, 0, bc.domains*bc.subjectsPerDomain) 505 506 // Publish one message per subject for each domain 507 published := 0 508 totalMessages := bc.domains * bc.subjectsPerDomain 509 for d := 1; d <= bc.domains; d++ { 510 rng.Read(domain) 511 for s := 1; s <= bc.subjectsPerDomain; s++ { 512 rng.Read(message) 513 domainString := fmt.Sprintf("%X", domain) 514 domains = append(domains, domainString) 515 subject := fmt.Sprintf("%s.%s.%d", subjectPrefix, domainString, s) 516 _, err := js.PublishAsync(subject, message) 517 if err != nil { 518 b.Fatalf("failed to publish: %s", err) 519 } 520 published += 1 521 522 // Wait for all pending to be published before trying to publish the next batch 523 if published%publishBatchSize == 0 || published == totalMessages { 524 select { 525 case <-js.PublishAsyncComplete(): 526 if verbose { 527 b.Logf("Published %d/%d messages", published, totalMessages) 528 } 529 case <-time.After(publishTimeout): 530 b.Fatalf("Publish timed out") 531 } 532 } 533 534 } 535 } 536 537 // Number of messages that each new consumer expects to consume 538 messagesPerIteration := bc.filters * bc.subjectsPerDomain 539 540 // Each call to 'subscribe_consume_unsubscribe' is one benchmark operation. 541 // i.e. subscribe_consume_unsubscribe will be called a total of b.N times (split among C threads) 542 // Each operation consists of: 543 // - Create filter 544 // - Create consumer / Subscribe 545 // - Consume expected number of messages 546 // - Unsubscribe 547 subscribeConsumeUnsubscribe := func(js nats.JetStreamContext, rng *rand.Rand) { 548 549 // Select F unique domains to create F non-overlapping filters 550 filterDomains := make(map[string]bool, bc.filters) 551 filters := make([]string, 0, bc.filters) 552 for len(filterDomains) < bc.filters { 553 domain := domains[rng.Intn(len(domains))] 554 if _, found := filterDomains[domain]; found { 555 // Collision with existing filter, try again 556 continue 557 } 558 filterDomains[domain] = true 559 filters = append(filters, fmt.Sprintf("%s.%s.>", subjectPrefix, domain)) 560 } 561 562 if verbose { 563 b.Logf("Subscribe with filters: %+v", filters) 564 } 565 566 // Consumer callback 567 received := 0 568 consumeWg := sync.WaitGroup{} 569 consumeWg.Add(1) 570 cb := func(msg *nats.Msg) { 571 received += 1 572 if received == messagesPerIteration { 573 consumeWg.Done() 574 if verbose { 575 b.Logf("Received %d/%d messages", received, messagesPerIteration) 576 } 577 } 578 } 579 580 // Create consumer 581 subOpts := []nats.SubOpt{ 582 nats.BindStream(streamName), 583 nats.OrderedConsumer(), 584 nats.ConsumerReplicas(consumerReplicas), 585 nats.ConsumerFilterSubjects(filters...), 586 nats.ConsumerMemoryStorage(), 587 } 588 589 var sub *nats.Subscription 590 591 sub, err := js.Subscribe("", cb, subOpts...) 592 if err != nil { 593 b.Fatalf("Failed to subscribe: %s", err) 594 } 595 596 defer func(sub *nats.Subscription) { 597 err := sub.Unsubscribe() 598 if err != nil { 599 b.Logf("Failed to unsubscribe: %s", err) 600 } 601 }(sub) 602 603 consumeWg.Wait() 604 } 605 606 // Wait for all consumer threads and main to be ready 607 wgReady := sync.WaitGroup{} 608 wgReady.Add(bc.concurrentConsumers + 1) 609 // Wait until all consumer threads have completed 610 wgCompleted := sync.WaitGroup{} 611 wgCompleted.Add(bc.concurrentConsumers) 612 // Operations left for consumer threads 613 opsCount := atomic.Int32{} 614 opsCount.Store(int32(b.N)) 615 616 // Start a pool of C goroutines, each one with a dedicated connection. 617 for i := 1; i <= bc.concurrentConsumers; i++ { 618 go func(consumerId int) { 619 620 // Connect 621 nc, js := jsClientConnectURL(b, connectURL) 622 defer nc.Close() 623 624 // Signal completion of work 625 defer wgCompleted.Done() 626 627 rng := rand.New(rand.NewSource(int64(seed + consumerId))) 628 629 // Ready, wait for everyone else 630 wgReady.Done() 631 wgReady.Wait() 632 633 completed := 0 634 for opsCount.Add(-1) >= 0 { 635 subscribeConsumeUnsubscribe(js, rng) 636 completed += 1 637 } 638 if verbose { 639 b.Logf("Consumer thread %d completed %d of %d operations", consumerId, completed, b.N) 640 } 641 }(i) 642 } 643 644 // Wait for all consumers to be ready 645 wgReady.Done() 646 wgReady.Wait() 647 648 // Start measuring time 649 b.ResetTimer() 650 651 // Wait for consumers to have chewed through b.N operations 652 wgCompleted.Wait() 653 b.StopTimer() 654 655 // Throughput is not very important in this benchmark since each operation includes 656 // subscribe, unsubscribe and retrieves just a few bytes 657 //b.SetBytes(int64(messageSize * messagesPerIteration)) 658 }, 659 ) 660 } 661 }, 662 ) 663 } 664 } 665 666 func BenchmarkJetStreamPublish(b *testing.B) { 667 668 const ( 669 verbose = false 670 seed = 12345 671 streamName = "S" 672 ) 673 674 runSyncPublisher := func(b *testing.B, js nats.JetStreamContext, messageSize int, subjects []string) (int, int) { 675 published, errors := 0, 0 676 message := make([]byte, messageSize) 677 rand.New(rand.NewSource(int64(seed))).Read(message) 678 679 b.ResetTimer() 680 681 for i := 1; i <= b.N; i++ { 682 fastRandomMutation(message, 10) 683 subject := subjects[fastrand.Uint32n(uint32(len(subjects)))] 684 _, pubErr := js.Publish(subject, message) 685 if pubErr != nil { 686 errors++ 687 } else { 688 published++ 689 } 690 691 if verbose && i%1000 == 0 { 692 b.Logf("Published %d/%d, %d errors", i, b.N, errors) 693 } 694 } 695 696 b.StopTimer() 697 698 return published, errors 699 } 700 701 runAsyncPublisher := func(b *testing.B, js nats.JetStreamContext, messageSize int, subjects []string, asyncWindow int) (int, int) { 702 const publishCompleteMaxWait = 30 * time.Second 703 rng := rand.New(rand.NewSource(int64(seed))) 704 message := make([]byte, messageSize) 705 rng.Read(message) 706 707 published, errors := 0, 0 708 709 b.ResetTimer() 710 711 for published < b.N { 712 713 // Normally publish a full batch (of size `asyncWindow`) 714 publishBatchSize := asyncWindow 715 // Unless fewer are left to complete the benchmark 716 if b.N-published < asyncWindow { 717 publishBatchSize = b.N - published 718 } 719 720 pending := make([]nats.PubAckFuture, 0, publishBatchSize) 721 722 for i := 0; i < publishBatchSize; i++ { 723 fastRandomMutation(message, 10) 724 subject := subjects[rng.Intn(len(subjects))] 725 pubAckFuture, err := js.PublishAsync(subject, message) 726 if err != nil { 727 errors++ 728 continue 729 } 730 pending = append(pending, pubAckFuture) 731 } 732 733 // All in this batch published, wait for completed 734 select { 735 case <-js.PublishAsyncComplete(): 736 case <-time.After(publishCompleteMaxWait): 737 b.Fatalf("Publish timed out") 738 } 739 740 // Verify one by one if they were published successfully 741 for _, pubAckFuture := range pending { 742 select { 743 case <-pubAckFuture.Ok(): 744 published++ 745 case <-pubAckFuture.Err(): 746 errors++ 747 default: 748 b.Fatalf("PubAck is still pending after publish completed") 749 } 750 } 751 752 if verbose { 753 b.Logf("Published %d/%d", published, b.N) 754 } 755 } 756 757 b.StopTimer() 758 759 return published, errors 760 } 761 762 type PublishType string 763 const ( 764 Sync PublishType = "Sync" 765 Async PublishType = "Async" 766 ) 767 768 benchmarksCases := []struct { 769 clusterSize int 770 replicas int 771 messageSize int 772 numSubjects int 773 minMessages int 774 }{ 775 {1, 1, 10, 1, 100_000}, // Single node, 10B messages, ~1MB minimum 776 {1, 1, 1024, 1, 1_000}, // Single node, 1KB messages, ~1MB minimum 777 {3, 3, 10, 1, 100_000}, // 3-nodes cluster, R=3, 10B messages, ~1MB minimum 778 {3, 3, 1024, 1, 1_000}, // 3-nodes cluster, R=3, 10B messages, ~1MB minimum 779 } 780 781 // All the cases above are run with each of the publisher cases below 782 publisherCases := []struct { 783 pType PublishType 784 asyncWindow int 785 }{ 786 {Sync, -1}, 787 {Async, 1000}, 788 {Async, 4000}, 789 {Async, 8000}, 790 } 791 792 for _, bc := range benchmarksCases { 793 name := fmt.Sprintf( 794 "N=%d,R=%d,MsgSz=%db,Subjs=%d", 795 bc.clusterSize, 796 bc.replicas, 797 bc.messageSize, 798 bc.numSubjects, 799 ) 800 801 b.Run( 802 name, 803 func(b *testing.B) { 804 805 for _, pc := range publisherCases { 806 name := fmt.Sprintf("%v", pc.pType) 807 if pc.pType == Async && pc.asyncWindow > 0 { 808 name = fmt.Sprintf("%s[W:%d]", name, pc.asyncWindow) 809 } 810 811 b.Run( 812 name, 813 func(b *testing.B) { 814 815 subjects := make([]string, bc.numSubjects) 816 for i := 0; i < bc.numSubjects; i++ { 817 subjects[i] = fmt.Sprintf("s-%d", i+1) 818 } 819 820 if verbose { 821 b.Logf("Running %s with %d ops", name, b.N) 822 } 823 824 if verbose { 825 b.Logf("Setting up %d nodes", bc.clusterSize) 826 } 827 828 cl, _, shutdown, nc, _ := startJSClusterAndConnect(b, bc.clusterSize) 829 defer shutdown() 830 defer nc.Close() 831 832 jsOpts := []nats.JSOpt{ 833 nats.MaxWait(10 * time.Second), 834 } 835 836 if pc.asyncWindow > 0 && pc.pType == Async { 837 jsOpts = append(jsOpts, nats.PublishAsyncMaxPending(pc.asyncWindow)) 838 } 839 840 js, err := nc.JetStream(jsOpts...) 841 if err != nil { 842 b.Fatalf("Unexpected error getting JetStream context: %v", err) 843 } 844 845 if verbose { 846 b.Logf("Creating stream with R=%d and %d input subjects", bc.replicas, bc.numSubjects) 847 } 848 streamConfig := &nats.StreamConfig{ 849 Name: streamName, 850 Subjects: subjects, 851 Replicas: bc.replicas, 852 } 853 if _, err := js.AddStream(streamConfig); err != nil { 854 b.Fatalf("Error creating stream: %v", err) 855 } 856 857 // If replicated resource, connect to stream leader for lower variability 858 if bc.replicas > 1 { 859 connectURL := cl.streamLeader("$G", streamName).ClientURL() 860 nc.Close() 861 nc, err = nats.Connect(connectURL) 862 if err != nil { 863 b.Fatalf("Failed to create client connection to stream leader: %v", err) 864 } 865 defer nc.Close() 866 js, err = nc.JetStream(jsOpts...) 867 if err != nil { 868 b.Fatalf("Unexpected error getting JetStream context for stream leader: %v", err) 869 } 870 } 871 872 if verbose { 873 b.Logf("Running %v publisher with message size: %dB", pc.pType, bc.messageSize) 874 } 875 876 b.SetBytes(int64(bc.messageSize)) 877 878 // Benchmark starts here 879 b.ResetTimer() 880 881 var published, errors int 882 switch pc.pType { 883 case Sync: 884 published, errors = runSyncPublisher(b, js, bc.messageSize, subjects) 885 case Async: 886 published, errors = runAsyncPublisher(b, js, bc.messageSize, subjects, pc.asyncWindow) 887 } 888 889 // Benchmark ends here 890 b.StopTimer() 891 892 if published+errors != b.N { 893 b.Fatalf("Something doesn't add up: %d + %d != %d", published, errors, b.N) 894 } 895 896 b.ReportMetric(float64(errors)*100/float64(b.N), "%error") 897 }, 898 ) 899 } 900 }, 901 ) 902 } 903 } 904 905 func BenchmarkJetStreamInterestStreamWithLimit(b *testing.B) { 906 907 const ( 908 verbose = true 909 seed = 12345 910 publishBatchSize = 100 911 messageSize = 256 912 numSubjects = 2500 913 subjectPrefix = "S" 914 numPublishers = 4 915 randomData = true 916 warmupMessages = 1 917 ) 918 919 if verbose { 920 b.Logf( 921 "BatchSize: %d, MsgSize: %d, Subjects: %d, Publishers: %d, Random Message: %v", 922 publishBatchSize, 923 messageSize, 924 numSubjects, 925 numPublishers, 926 randomData, 927 ) 928 } 929 930 // Benchmark parameters: sub-benchmarks are executed for every combination of the following 3 groups 931 // Unless a more restrictive filter is specified, e.g.: 932 // BenchmarkJetStreamInterestStreamWithLimit/.*R=3.*/Storage=Memory/unlimited 933 934 // Parameter: Number of nodes and number of stream replicas 935 clusterAndReplicasCases := []struct { 936 clusterSize int 937 replicas int 938 }{ 939 {1, 1}, // Single node, R=1 940 {3, 3}, // 3-nodes cluster, R=3 941 } 942 943 // Parameter: Stream storage type 944 storageTypeCases := []nats.StorageType{ 945 nats.MemoryStorage, 946 nats.FileStorage, 947 } 948 949 // Parameter: Stream limit configuration 950 limitConfigCases := map[string]func(*nats.StreamConfig){ 951 "unlimited": func(config *nats.StreamConfig) { 952 }, 953 "MaxMsg=1000": func(config *nats.StreamConfig) { 954 config.MaxMsgs = 100 955 }, 956 "MaxMsg=10": func(config *nats.StreamConfig) { 957 config.MaxMsgs = 10 958 }, 959 "MaxPerSubject=10": func(config *nats.StreamConfig) { 960 config.MaxMsgsPerSubject = 10 961 }, 962 "MaxAge=1s": func(config *nats.StreamConfig) { 963 config.MaxAge = 1 * time.Second 964 }, 965 "MaxBytes=1MB": func(config *nats.StreamConfig) { 966 config.MaxBytes = 1024 * 1024 967 }, 968 } 969 970 // Context shared by publishers routines 971 type PublishersContext = struct { 972 readyWg sync.WaitGroup 973 completedWg sync.WaitGroup 974 messagesLeft int 975 lock sync.Mutex 976 errors int 977 } 978 979 // Helper: Publish synchronously as Goroutine 980 publish := func(publisherId int, ctx *PublishersContext, js nats.JetStreamContext) { 981 defer ctx.completedWg.Done() 982 errors := 0 983 messageBuf := make([]byte, messageSize) 984 rand.New(rand.NewSource(int64(seed + publisherId))).Read(messageBuf) 985 986 // Warm up: publish a few messages 987 for i := 0; i < warmupMessages; i++ { 988 subject := fmt.Sprintf("%s.%d", subjectPrefix, fastrand.Uint32n(numSubjects)) 989 if randomData { 990 fastRandomMutation(messageBuf, 10) 991 } 992 _, err := js.Publish(subject, messageBuf) 993 if err != nil { 994 b.Logf("Warning: failed to publish warmup message: %s", err) 995 } 996 } 997 998 // Signal this publisher is ready 999 ctx.readyWg.Done() 1000 1001 for { 1002 // Obtain a batch of messages to publish 1003 batchSize := 0 1004 { 1005 ctx.lock.Lock() 1006 if ctx.messagesLeft >= publishBatchSize { 1007 batchSize = publishBatchSize 1008 } else if ctx.messagesLeft < publishBatchSize { 1009 batchSize = ctx.messagesLeft 1010 } 1011 ctx.messagesLeft -= batchSize 1012 ctx.lock.Unlock() 1013 } 1014 1015 // Nothing left to publish, terminate 1016 if batchSize == 0 { 1017 ctx.lock.Lock() 1018 ctx.errors += errors 1019 ctx.lock.Unlock() 1020 return 1021 } 1022 1023 // Publish a batch of messages 1024 for i := 0; i < batchSize; i++ { 1025 subject := fmt.Sprintf("%s.%d", subjectPrefix, fastrand.Uint32n(numSubjects)) 1026 if randomData { 1027 fastRandomMutation(messageBuf, 10) 1028 } 1029 _, err := js.Publish(subject, messageBuf) 1030 if err != nil { 1031 errors += 1 1032 } 1033 } 1034 } 1035 } 1036 1037 // Benchmark matrix: (cluster and replicas) * (storage type) * (stream limit) 1038 for _, benchmarkCase := range clusterAndReplicasCases { 1039 b.Run( 1040 fmt.Sprintf( 1041 "N=%d,R=%d", 1042 benchmarkCase.clusterSize, 1043 benchmarkCase.replicas, 1044 ), 1045 func(b *testing.B) { 1046 for _, storageType := range storageTypeCases { 1047 b.Run( 1048 fmt.Sprintf("Storage=%v", storageType), 1049 func(b *testing.B) { 1050 1051 for limitDescription, limitConfigFunc := range limitConfigCases { 1052 b.Run( 1053 limitDescription, 1054 func(b *testing.B) { 1055 1056 // Print benchmark parameters 1057 if verbose { 1058 b.Logf( 1059 "Stream: %+v, Storage: [%v] Limit: [%s], Ops: %d", 1060 benchmarkCase, 1061 storageType, 1062 limitDescription, 1063 b.N, 1064 ) 1065 } 1066 1067 // Setup server or cluster 1068 cl, ls, shutdown, nc, js := startJSClusterAndConnect(b, benchmarkCase.clusterSize) 1069 defer shutdown() 1070 defer nc.Close() 1071 1072 // Common stream configuration 1073 streamConfig := &nats.StreamConfig{ 1074 Name: "S", 1075 Subjects: []string{fmt.Sprintf("%s.>", subjectPrefix)}, 1076 Replicas: benchmarkCase.replicas, 1077 Storage: storageType, 1078 Discard: DiscardOld, 1079 Retention: DiscardOld, 1080 } 1081 // Configure stream limit 1082 limitConfigFunc(streamConfig) 1083 1084 // Create stream 1085 if _, err := js.AddStream(streamConfig); err != nil { 1086 b.Fatalf("Error creating stream: %v", err) 1087 } 1088 1089 // Set up publishers shared context 1090 var pubCtx PublishersContext 1091 pubCtx.readyWg.Add(numPublishers) 1092 pubCtx.completedWg.Add(numPublishers) 1093 1094 // Hold this lock until all publishers are ready 1095 pubCtx.lock.Lock() 1096 pubCtx.messagesLeft = b.N 1097 1098 connectURL := ls.ClientURL() 1099 // If replicated resource, connect to stream leader for lower variability 1100 if benchmarkCase.replicas > 1 { 1101 connectURL = cl.streamLeader("$G", "S").ClientURL() 1102 } 1103 1104 // Spawn publishers routines, each with its own connection and JS context 1105 for i := 0; i < numPublishers; i++ { 1106 nc, err := nats.Connect(connectURL) 1107 if err != nil { 1108 b.Fatal(err) 1109 } 1110 defer nc.Close() 1111 js, err := nc.JetStream() 1112 if err != nil { 1113 b.Fatal(err) 1114 } 1115 go publish(i, &pubCtx, js) 1116 } 1117 1118 // Wait for all publishers to be ready 1119 pubCtx.readyWg.Wait() 1120 1121 // Set size of each operation, for throughput calculation 1122 b.SetBytes(messageSize) 1123 1124 // Benchmark starts here 1125 b.ResetTimer() 1126 1127 // Unblock the publishers 1128 pubCtx.lock.Unlock() 1129 1130 // Wait for all publishers to complete 1131 pubCtx.completedWg.Wait() 1132 1133 // Benchmark ends here 1134 b.StopTimer() 1135 1136 // Sanity check, publishers may have died before completing 1137 if pubCtx.messagesLeft != 0 { 1138 b.Fatalf("Some messages left: %d", pubCtx.messagesLeft) 1139 } 1140 1141 b.ReportMetric(float64(pubCtx.errors)*100/float64(b.N), "%error") 1142 }, 1143 ) 1144 } 1145 }, 1146 ) 1147 } 1148 }, 1149 ) 1150 } 1151 } 1152 1153 func BenchmarkJetStreamKV(b *testing.B) { 1154 1155 const ( 1156 verbose = false 1157 kvName = "BUCKET" 1158 keyPrefix = "K_" 1159 seed = 12345 1160 ) 1161 1162 runKVGet := func(b *testing.B, kv nats.KeyValue, keys []string) int { 1163 rng := rand.New(rand.NewSource(int64(seed))) 1164 errors := 0 1165 1166 b.ResetTimer() 1167 1168 for i := 1; i <= b.N; i++ { 1169 key := keys[rng.Intn(len(keys))] 1170 _, err := kv.Get(key) 1171 if err != nil { 1172 errors++ 1173 continue 1174 } 1175 1176 if verbose && i%1000 == 0 { 1177 b.Logf("Completed %d/%d Get ops", i, b.N) 1178 } 1179 } 1180 1181 b.StopTimer() 1182 return errors 1183 } 1184 1185 runKVPut := func(b *testing.B, kv nats.KeyValue, keys []string, valueSize int) int { 1186 1187 value := make([]byte, valueSize) 1188 rand.New(rand.NewSource(int64(seed))).Read(value) 1189 errors := 0 1190 1191 b.ResetTimer() 1192 1193 for i := 1; i <= b.N; i++ { 1194 key := keys[fastrand.Uint32n(uint32(len(keys)))] 1195 fastRandomMutation(value, 10) 1196 _, err := kv.Put(key, value) 1197 if err != nil { 1198 errors++ 1199 continue 1200 } 1201 1202 if verbose && i%1000 == 0 { 1203 b.Logf("Completed %d/%d Put ops", i, b.N) 1204 } 1205 } 1206 1207 b.StopTimer() 1208 return errors 1209 } 1210 1211 runKVUpdate := func(b *testing.B, kv nats.KeyValue, keys []string, valueSize int) int { 1212 value := make([]byte, valueSize) 1213 rand.New(rand.NewSource(int64(seed))).Read(value) 1214 errors := 0 1215 1216 b.ResetTimer() 1217 1218 for i := 1; i <= b.N; i++ { 1219 key := keys[fastrand.Uint32n(uint32(len(keys)))] 1220 1221 kve, getErr := kv.Get(key) 1222 if getErr != nil { 1223 errors++ 1224 continue 1225 } 1226 1227 fastRandomMutation(value, 10) 1228 _, updateErr := kv.Update(key, value, kve.Revision()) 1229 if updateErr != nil { 1230 errors++ 1231 continue 1232 } 1233 1234 if verbose && i%1000 == 0 { 1235 b.Logf("Completed %d/%d Update ops", i, b.N) 1236 } 1237 } 1238 1239 b.StopTimer() 1240 return errors 1241 } 1242 1243 type WorkloadType string 1244 const ( 1245 Get WorkloadType = "GET" 1246 Put WorkloadType = "PUT" 1247 Update WorkloadType = "CAS" 1248 ) 1249 1250 benchmarksCases := []struct { 1251 clusterSize int 1252 replicas int 1253 numKeys int 1254 valueSize int 1255 }{ 1256 {1, 1, 100, 100}, // 1 node with 100 keys, 100B values 1257 {1, 1, 1000, 100}, // 1 node with 1000 keys, 100B values 1258 {3, 3, 100, 100}, // 3 nodes with 100 keys, 100B values 1259 {3, 3, 1000, 100}, // 3 nodes with 1000 keys, 100B values 1260 {3, 3, 1000, 1024}, // 3 nodes with 1000 keys, 1KB values 1261 } 1262 1263 workloadCases := []WorkloadType{ 1264 Get, 1265 Put, 1266 Update, 1267 } 1268 1269 for _, bc := range benchmarksCases { 1270 1271 bName := fmt.Sprintf( 1272 "N=%d,R=%d,B=1,K=%d,ValSz=%db", 1273 bc.clusterSize, 1274 bc.replicas, 1275 bc.numKeys, 1276 bc.valueSize, 1277 ) 1278 1279 b.Run( 1280 bName, 1281 func(b *testing.B) { 1282 for _, wc := range workloadCases { 1283 wName := fmt.Sprintf("%v", wc) 1284 b.Run( 1285 wName, 1286 func(b *testing.B) { 1287 1288 if verbose { 1289 b.Logf("Running %s workload %s with %d messages", wName, bName, b.N) 1290 } 1291 1292 if verbose { 1293 b.Logf("Setting up %d nodes", bc.clusterSize) 1294 } 1295 1296 // Pre-generate all keys 1297 keys := make([]string, 0, bc.numKeys) 1298 for i := 1; i <= bc.numKeys; i++ { 1299 key := fmt.Sprintf("%s%d", keyPrefix, i) 1300 keys = append(keys, key) 1301 } 1302 1303 // Setup server or cluster 1304 cl, _, shutdown, nc, js := startJSClusterAndConnect(b, bc.clusterSize) 1305 defer shutdown() 1306 defer nc.Close() 1307 1308 // Create bucket 1309 if verbose { 1310 b.Logf("Creating KV %s with R=%d", kvName, bc.replicas) 1311 } 1312 kvConfig := &nats.KeyValueConfig{ 1313 Bucket: kvName, 1314 Replicas: bc.replicas, 1315 } 1316 kv, err := js.CreateKeyValue(kvConfig) 1317 if err != nil { 1318 b.Fatalf("Error creating KV: %v", err) 1319 } 1320 1321 // Initialize all keys 1322 rng := rand.New(rand.NewSource(int64(seed))) 1323 value := make([]byte, bc.valueSize) 1324 for _, key := range keys { 1325 rng.Read(value) 1326 _, err := kv.Create(key, value) 1327 if err != nil { 1328 b.Fatalf("Failed to initialize %s/%s: %v", kvName, key, err) 1329 } 1330 } 1331 1332 // If replicated resource, connect to stream leader for lower variability 1333 if bc.replicas > 1 { 1334 nc.Close() 1335 connectURL := cl.streamLeader("$G", fmt.Sprintf("KV_%s", kvName)).ClientURL() 1336 nc, js = jsClientConnectURL(b, connectURL) 1337 defer nc.Close() 1338 } 1339 1340 kv, err = js.KeyValue(kv.Bucket()) 1341 if err != nil { 1342 b.Fatalf("Error binding to KV: %v", err) 1343 } 1344 1345 // Set size of each operation, for throughput calculation 1346 b.SetBytes(int64(bc.valueSize)) 1347 1348 // Discard time spent during setup 1349 // May reset again further in 1350 b.ResetTimer() 1351 1352 var errors int 1353 1354 switch wc { 1355 case Get: 1356 errors = runKVGet(b, kv, keys) 1357 case Put: 1358 errors = runKVPut(b, kv, keys, bc.valueSize) 1359 case Update: 1360 errors = runKVUpdate(b, kv, keys, bc.valueSize) 1361 default: 1362 b.Fatalf("Unknown workload type: %v", wc) 1363 } 1364 1365 // Benchmark ends here, (may have stopped earlier) 1366 b.StopTimer() 1367 1368 b.ReportMetric(float64(errors)*100/float64(b.N), "%error") 1369 }, 1370 ) 1371 } 1372 }, 1373 ) 1374 } 1375 } 1376 1377 func BenchmarkJetStreamObjStore(b *testing.B) { 1378 const ( 1379 verbose = false 1380 objStoreName = "B" 1381 keyPrefix = "K_" 1382 seed = 12345 1383 initKeys = true 1384 1385 // read/write ratios 1386 ReadOnly = 1.0 1387 WriteOnly = 0.0 1388 ) 1389 1390 // rwRatio to string 1391 rwRatioToString := func(rwRatio float64) string { 1392 switch rwRatio { 1393 case ReadOnly: 1394 return "readOnly" 1395 case WriteOnly: 1396 return "writeOnly" 1397 default: 1398 return fmt.Sprintf("%0.1f", rwRatio) 1399 } 1400 } 1401 1402 // benchmark for object store by performing read/write operations with data of random size 1403 RunObjStoreBenchmark := func(b *testing.B, objStore nats.ObjectStore, minObjSz int, maxObjSz int, numKeys int, rwRatio float64) (int, int, int) { 1404 var ( 1405 errors int 1406 reads int 1407 writes int 1408 ) 1409 1410 dataBuf := make([]byte, maxObjSz) 1411 rng := rand.New(rand.NewSource(int64(seed))) 1412 rng.Read(dataBuf) 1413 1414 // Each operation is processing a random amount of bytes within a size range which 1415 // will be either read from or written to an object store bucket. However, here we are 1416 // approximating the size of the processed data with a simple average of the range. 1417 b.SetBytes(int64((minObjSz + maxObjSz) / 2)) 1418 1419 for i := 1; i <= b.N; i++ { 1420 key := fmt.Sprintf("%s_%d", keyPrefix, rng.Intn(numKeys)) 1421 var err error 1422 1423 rwOp := rng.Float64() 1424 switch { 1425 case rwOp <= rwRatio: 1426 // Read Op 1427 _, err = objStore.GetBytes(key) 1428 reads++ 1429 case rwOp > rwRatio: 1430 // Write Op 1431 // dataSz is a random value between min-max object size and cannot be less than 1 byte 1432 dataSz := rng.Intn(maxObjSz-minObjSz+1) + minObjSz 1433 data := dataBuf[:dataSz] 1434 fastRandomMutation(data, 10) 1435 _, err = objStore.PutBytes(key, data) 1436 writes++ 1437 } 1438 if err != nil { 1439 errors++ 1440 } 1441 1442 if verbose && i%1000 == 0 { 1443 b.Logf("Completed: %d reads, %d writes, %d errors. %d/%d total operations have been completed.", reads, writes, errors, i, b.N) 1444 } 1445 } 1446 return errors, reads, writes 1447 } 1448 1449 // benchmark cases table 1450 benchmarkCases := []struct { 1451 storage nats.StorageType 1452 numKeys int 1453 minObjSz int 1454 maxObjSz int 1455 }{ 1456 {nats.MemoryStorage, 100, 1024, 102400}, // mem storage, 100 objects sized (1KB-100KB) 1457 {nats.MemoryStorage, 100, 102400, 1048576}, // mem storage, 100 objects sized (100KB-1MB) 1458 {nats.MemoryStorage, 1000, 10240, 102400}, // mem storage, 1k objects of various size (10KB - 100KB) 1459 {nats.FileStorage, 100, 1024, 102400}, // file storage, 100 objects sized (1KB-100KB) 1460 {nats.FileStorage, 1000, 10240, 1048576}, // file storage, 1k objects of various size (10KB - 1MB) 1461 {nats.FileStorage, 100, 102400, 1048576}, // file storage, 100 objects sized (100KB-1MB) 1462 {nats.FileStorage, 100, 1048576, 10485760}, // file storage, 100 objects sized (1MB-10MB) 1463 {nats.FileStorage, 10, 10485760, 104857600}, // file storage, 10 objects sized (10MB-100MB) 1464 } 1465 1466 var ( 1467 clusterSizeCases = []int{1, 3} 1468 rwRatioCases = []float64{ReadOnly, WriteOnly, 0.8} 1469 ) 1470 1471 // Test with either single node or 3 node cluster 1472 for _, clusterSize := range clusterSizeCases { 1473 replicas := clusterSize 1474 cName := fmt.Sprintf("N=%d,R=%d", clusterSize, replicas) 1475 b.Run( 1476 cName, 1477 func(b *testing.B) { 1478 for _, rwRatio := range rwRatioCases { 1479 rName := fmt.Sprintf("workload=%s", rwRatioToString(rwRatio)) 1480 b.Run( 1481 rName, 1482 func(b *testing.B) { 1483 // Test all tabled benchmark cases 1484 for _, bc := range benchmarkCases { 1485 bName := fmt.Sprintf("K=%d,storage=%s,minObjSz=%db,maxObjSz=%db", bc.numKeys, bc.storage, bc.minObjSz, bc.maxObjSz) 1486 b.Run( 1487 bName, 1488 func(b *testing.B) { 1489 1490 // Test setup 1491 rng := rand.New(rand.NewSource(int64(seed))) 1492 1493 if verbose { 1494 b.Logf("Setting up %d nodes", replicas) 1495 } 1496 1497 // Setup server or cluster 1498 cl, _, shutdown, nc, js := startJSClusterAndConnect(b, clusterSize) 1499 defer shutdown() 1500 defer nc.Close() 1501 1502 // Initialize object store 1503 if verbose { 1504 b.Logf("Creating ObjectStore %s with R=%d", objStoreName, replicas) 1505 } 1506 objStoreConfig := &nats.ObjectStoreConfig{ 1507 Bucket: objStoreName, 1508 Replicas: replicas, 1509 Storage: bc.storage, 1510 } 1511 objStore, err := js.CreateObjectStore(objStoreConfig) 1512 if err != nil { 1513 b.Fatalf("Error creating ObjectStore: %v", err) 1514 } 1515 1516 // If replicated resource, connect to stream leader for lower variability 1517 if clusterSize > 1 { 1518 nc.Close() 1519 connectURL := cl.streamLeader("$G", fmt.Sprintf("OBJ_%s", objStoreName)).ClientURL() 1520 nc, js := jsClientConnectURL(b, connectURL) 1521 defer nc.Close() 1522 objStore, err = js.ObjectStore(objStoreName) 1523 if err != nil { 1524 b.Fatalf("Error binding to ObjectStore: %v", err) 1525 } 1526 } 1527 1528 // Initialize keys 1529 if initKeys { 1530 for n := 0; n < bc.numKeys; n++ { 1531 key := fmt.Sprintf("%s_%d", keyPrefix, n) 1532 dataSz := rng.Intn(bc.maxObjSz-bc.minObjSz+1) + bc.minObjSz 1533 value := make([]byte, dataSz) 1534 rng.Read(value) 1535 _, err := objStore.PutBytes(key, value) 1536 if err != nil { 1537 b.Fatalf("Failed to initialize %s/%s: %v", objStoreName, key, err) 1538 } 1539 } 1540 } 1541 1542 b.ResetTimer() 1543 1544 // Run benchmark 1545 errors, reads, writes := RunObjStoreBenchmark(b, objStore, bc.minObjSz, bc.maxObjSz, bc.numKeys, rwRatio) 1546 1547 // Report metrics 1548 b.ReportMetric(float64(errors)*100/float64(b.N), "%error") 1549 b.ReportMetric(float64(reads), "reads") 1550 b.ReportMetric(float64(writes), "writes") 1551 1552 }, 1553 ) 1554 } 1555 }, 1556 ) 1557 } 1558 }, 1559 ) 1560 } 1561 } 1562 1563 func BenchmarkJetStreamPublishConcurrent(b *testing.B) { 1564 const ( 1565 subject = "test-subject" 1566 streamName = "test-stream" 1567 ) 1568 1569 type BenchPublisher struct { 1570 // nats connection for this publisher 1571 conn *nats.Conn 1572 // jetstream context 1573 js nats.JetStreamContext 1574 // message buffer 1575 messageData []byte 1576 // number of publish calls 1577 publishCalls int 1578 // number of publish errors 1579 publishErrors int 1580 } 1581 1582 messageSizeCases := []int64{ 1583 10, // 10B 1584 1024, // 1KiB 1585 102400, // 100KiB 1586 } 1587 numPubsCases := []int{ 1588 12, 1589 } 1590 1591 replicasCases := []struct { 1592 clusterSize int 1593 replicas int 1594 }{ 1595 {1, 1}, 1596 {3, 3}, 1597 } 1598 1599 workload := func(b *testing.B, numPubs int, messageSize int64, clientUrl string) { 1600 1601 // create N publishers 1602 publishers := make([]BenchPublisher, numPubs) 1603 for i := range publishers { 1604 // create publisher connection and jetstream context 1605 ncPub, err := nats.Connect(clientUrl) 1606 if err != nil { 1607 b.Fatal(err) 1608 } 1609 defer ncPub.Close() 1610 jsPub, err := ncPub.JetStream() 1611 if err != nil { 1612 b.Fatal(err) 1613 } 1614 1615 // initialize publisher 1616 publishers[i] = BenchPublisher{ 1617 conn: ncPub, 1618 js: jsPub, 1619 messageData: make([]byte, messageSize), 1620 publishCalls: 0, 1621 publishErrors: 0, 1622 } 1623 rand.New(rand.NewSource(int64(i))).Read(publishers[i].messageData) 1624 } 1625 1626 // waits for all publishers sub-routines and for main thread to be ready 1627 var workloadReadyWg sync.WaitGroup 1628 workloadReadyWg.Add(1 + numPubs) 1629 1630 // wait group blocks main thread until publish workload is completed, it is decremented after stream receives b.N messages from all publishers 1631 var benchCompleteWg sync.WaitGroup 1632 benchCompleteWg.Add(1) 1633 1634 // wait group to ensure all publishers have been torn down 1635 var finishedPublishersWg sync.WaitGroup 1636 finishedPublishersWg.Add(numPubs) 1637 1638 // start go routines for all publishers, wait till all publishers are initialized before starting publish workload 1639 for i := range publishers { 1640 1641 go func(pubId int) { 1642 // signal that this publisher has been torn down 1643 defer finishedPublishersWg.Done() 1644 1645 // publisher sub-routine is ready 1646 workloadReadyWg.Done() 1647 1648 // start workload when main thread and all other publishers are ready 1649 workloadReadyWg.Wait() 1650 1651 // publish until stream receives b.N messages 1652 for { 1653 // random bytes as payload 1654 fastRandomMutation(publishers[pubId].messageData, 10) 1655 // attempt to publish message 1656 pubAck, err := publishers[pubId].js.Publish(subject, publishers[pubId].messageData) 1657 publishers[pubId].publishCalls += 1 1658 if err != nil { 1659 publishers[pubId].publishErrors += 1 1660 continue 1661 } 1662 // all messages have been published to stream 1663 if pubAck.Sequence == uint64(b.N) { 1664 benchCompleteWg.Done() 1665 } 1666 // a publisher has already published b.N messages, stop publishing 1667 if pubAck.Sequence >= uint64(b.N) { 1668 return 1669 } 1670 } 1671 }(i) 1672 } 1673 1674 // set bytes per operation 1675 b.SetBytes(messageSize) 1676 1677 // main thread is ready 1678 workloadReadyWg.Done() 1679 // start the clock 1680 b.ResetTimer() 1681 1682 // wait till termination cond reached 1683 benchCompleteWg.Wait() 1684 // stop the clock 1685 b.StopTimer() 1686 1687 // wait for all publishers to shutdown 1688 finishedPublishersWg.Wait() 1689 1690 // sum up publish calls and errors 1691 publishCalls := 0 1692 publishErrors := 0 1693 for _, pub := range publishers { 1694 publishCalls += pub.publishCalls 1695 publishErrors += pub.publishErrors 1696 } 1697 1698 // report error rate 1699 errorRate := 100 * float64(publishErrors) / float64(publishCalls) 1700 b.ReportMetric(errorRate, "%error") 1701 } 1702 1703 // benchmark case matrix 1704 for _, replicasCase := range replicasCases { 1705 b.Run( 1706 fmt.Sprintf("N=%d,R=%d", replicasCase.clusterSize, replicasCase.replicas), 1707 func(b *testing.B) { 1708 for _, messageSize := range messageSizeCases { 1709 b.Run( 1710 fmt.Sprintf("msgSz=%db", messageSize), 1711 func(b *testing.B) { 1712 for _, numPubs := range numPubsCases { 1713 b.Run( 1714 fmt.Sprintf("pubs=%d", numPubs), 1715 func(b *testing.B) { 1716 1717 // start jetstream cluster 1718 cl, ls, shutdown, nc, js := startJSClusterAndConnect(b, replicasCase.clusterSize) 1719 defer shutdown() 1720 defer nc.Close() 1721 clientUrl := ls.ClientURL() 1722 1723 // create stream 1724 _, err := js.AddStream(&nats.StreamConfig{ 1725 Name: streamName, 1726 Subjects: []string{subject}, 1727 Replicas: replicasCase.replicas, 1728 }) 1729 if err != nil { 1730 b.Fatal(err) 1731 } 1732 defer js.DeleteStream(streamName) 1733 1734 // If replicated resource, connect to stream leader for lower variability 1735 if replicasCase.replicas > 1 { 1736 nc.Close() 1737 clientUrl = cl.streamLeader("$G", streamName).ClientURL() 1738 nc, _ = jsClientConnectURL(b, clientUrl) 1739 defer nc.Close() 1740 } 1741 1742 // run workload 1743 workload(b, numPubs, messageSize, clientUrl) 1744 }, 1745 ) 1746 } 1747 }) 1748 } 1749 }) 1750 } 1751 } 1752 1753 // Helper function to stand up a JS-enabled single server or cluster 1754 func startJSClusterAndConnect(b *testing.B, clusterSize int) (c *cluster, s *Server, shutdown func(), nc *nats.Conn, js nats.JetStreamContext) { 1755 b.Helper() 1756 var err error 1757 1758 if clusterSize == 1 { 1759 s = RunBasicJetStreamServer(b) 1760 shutdown = func() { 1761 s.Shutdown() 1762 } 1763 } else { 1764 c = createJetStreamClusterExplicit(b, "BENCH_PUB", clusterSize) 1765 c.waitOnClusterReadyWithNumPeers(clusterSize) 1766 c.waitOnLeader() 1767 s = c.leader() 1768 shutdown = func() { 1769 c.shutdown() 1770 } 1771 } 1772 1773 nc, err = nats.Connect(s.ClientURL()) 1774 if err != nil { 1775 b.Fatalf("failed to connect: %s", err) 1776 } 1777 1778 js, err = nc.JetStream() 1779 if err != nil { 1780 b.Fatalf("failed to init jetstream: %s", err) 1781 } 1782 1783 return c, s, shutdown, nc, js 1784 }