github.com/anycable/anycable-go@v1.5.1/broker/nats.go (about)

     1  package broker
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"log/slog"
     7  	"math"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/anycable/anycable-go/common"
    13  	natsconfig "github.com/anycable/anycable-go/nats"
    14  	"github.com/anycable/anycable-go/utils"
    15  	"github.com/joomcode/errorx"
    16  	nanoid "github.com/matoous/go-nanoid"
    17  	"github.com/nats-io/nats.go"
    18  	"github.com/nats-io/nats.go/jetstream"
    19  )
    20  
    21  type NATS struct {
    22  	broadcaster Broadcaster
    23  	conf        *Config
    24  	tracker     *StreamsTracker
    25  
    26  	nconf *natsconfig.NATSConfig
    27  	conn  *nats.Conn
    28  
    29  	js      jetstream.JetStream
    30  	kv      jetstream.KeyValue
    31  	epochKV jetstream.KeyValue
    32  
    33  	jstreams   *lru[string]
    34  	jconsumers *lru[jetstream.Consumer]
    35  	streamSync *streamsSynchronizer
    36  
    37  	// Local broker is used to keep a copy of stream messages
    38  	local LocalBroker
    39  
    40  	clientMu sync.RWMutex
    41  	epochMu  sync.RWMutex
    42  
    43  	epoch string
    44  
    45  	shutdownCtx context.Context
    46  	shutdownFn  func()
    47  
    48  	readyCtx         context.Context
    49  	broadcastBacklog []*common.StreamMessage
    50  	backlogMu        sync.Mutex
    51  
    52  	log *slog.Logger
    53  }
    54  
    55  const (
    56  	kvBucket       = "_anycable_"
    57  	epochBucket    = "_anycable_epoch_"
    58  	epochKey       = "_epoch_"
    59  	sessionsPrefix = ""
    60  	streamPrefix   = "_ac_"
    61  
    62  	jetstreamReadyTimeout = 1 * time.Second
    63  )
    64  
    65  var _ Broker = (*NATS)(nil)
    66  
    67  type NATSOption func(*NATS)
    68  
    69  func WithNATSLocalBroker(b LocalBroker) NATSOption {
    70  	return func(n *NATS) {
    71  		n.local = b
    72  	}
    73  }
    74  
    75  func NewNATSBroker(broadcaster Broadcaster, c *Config, nc *natsconfig.NATSConfig, l *slog.Logger, opts ...NATSOption) *NATS {
    76  	shutdownCtx, shutdownFn := context.WithCancel(context.Background())
    77  
    78  	n := NATS{
    79  		broadcaster:      broadcaster,
    80  		conf:             c,
    81  		nconf:            nc,
    82  		shutdownCtx:      shutdownCtx,
    83  		shutdownFn:       shutdownFn,
    84  		tracker:          NewStreamsTracker(),
    85  		broadcastBacklog: []*common.StreamMessage{},
    86  		streamSync:       newStreamsSynchronizer(),
    87  		jstreams:         newLRU[string](time.Duration(c.HistoryTTL * int64(time.Second))),
    88  		jconsumers:       newLRU[jetstream.Consumer](time.Duration(c.HistoryTTL * int64(time.Second))),
    89  		log:              l.With("context", "broker").With("provider", "nats"),
    90  	}
    91  
    92  	for _, opt := range opts {
    93  		opt(&n)
    94  	}
    95  
    96  	if n.local == nil {
    97  		n.local = NewMemoryBroker(nil, c)
    98  	}
    99  
   100  	return &n
   101  }
   102  
   103  // Write Broker implementtaion here
   104  func (n *NATS) Start(done chan (error)) error {
   105  	n.clientMu.Lock()
   106  	defer n.clientMu.Unlock()
   107  
   108  	connectOptions := []nats.Option{
   109  		nats.RetryOnFailedConnect(true),
   110  		nats.MaxReconnects(n.nconf.MaxReconnectAttempts),
   111  		nats.DisconnectErrHandler(func(nc *nats.Conn, err error) {
   112  			if err != nil {
   113  				n.log.Warn("connection failed", "error", err)
   114  			}
   115  		}),
   116  		nats.ReconnectHandler(func(nc *nats.Conn) {
   117  			n.log.Info("connection restored", "url", nc.ConnectedUrl())
   118  		}),
   119  	}
   120  
   121  	if n.nconf.DontRandomizeServers {
   122  		connectOptions = append(connectOptions, nats.DontRandomize())
   123  	}
   124  
   125  	nc, err := nats.Connect(n.nconf.Servers, connectOptions...)
   126  
   127  	if err != nil {
   128  		return err
   129  	}
   130  
   131  	n.conn = nc
   132  
   133  	readyCtx, readyFn := context.WithCancelCause(context.Background())
   134  
   135  	n.readyCtx = readyCtx
   136  
   137  	// Initialize JetStream asynchronously, because we may need to wait for JetStream cluster to be ready
   138  	go func() {
   139  		err := n.initJetStreamWithRetry()
   140  		readyFn(err)
   141  		if err != nil && done != nil {
   142  			done <- err
   143  		}
   144  
   145  		if err != nil {
   146  			n.backlogFlush()
   147  		}
   148  	}()
   149  
   150  	return nil
   151  }
   152  
   153  func (n *NATS) Ready(timeout ...time.Duration) error {
   154  	var err error
   155  
   156  	if len(timeout) == 0 {
   157  		<-n.readyCtx.Done()
   158  	} else {
   159  		timer := time.After(timeout[0])
   160  
   161  		select {
   162  		case <-n.readyCtx.Done():
   163  		case <-timer:
   164  			err = fmt.Errorf("timeout waiting for JetStream to be ready")
   165  		}
   166  	}
   167  
   168  	if err != nil {
   169  		return err
   170  	}
   171  
   172  	cause := context.Cause(n.readyCtx)
   173  
   174  	if cause == context.Canceled {
   175  		return nil
   176  	} else {
   177  		return cause
   178  	}
   179  }
   180  
   181  func (n *NATS) initJetStreamWithRetry() error {
   182  	attempt := 0
   183  
   184  	for {
   185  		err := n.initJetStream()
   186  
   187  		if err == nil {
   188  			return nil
   189  		}
   190  
   191  		// delay with exponentional backoff, min 1s, max 60s
   192  		delay := utils.NextRetry(attempt)
   193  		attempt++
   194  
   195  		if attempt > 5 {
   196  			return errorx.Decorate(err, "JetStream is unavailable")
   197  		}
   198  
   199  		n.log.Warn("JetStream initialization failed", "error", err)
   200  
   201  		n.log.Info(fmt.Sprintf("next JetStream initialization attempt in %s", delay))
   202  		time.Sleep(delay)
   203  
   204  		n.log.Info("re-initializing JetStream...")
   205  	}
   206  }
   207  
   208  func (n *NATS) initJetStream() error {
   209  	n.clientMu.Lock()
   210  	defer n.clientMu.Unlock()
   211  
   212  	nc := n.conn
   213  	js, err := jetstream.New(nc)
   214  
   215  	if err != nil {
   216  		return errorx.Decorate(err, "failed to connect to JetStream")
   217  	}
   218  
   219  	n.js = js
   220  
   221  	kv, err := n.fetchBucketWithTTL(kvBucket, time.Duration(n.conf.SessionsTTL*int64(time.Second)))
   222  
   223  	if err != nil {
   224  		return errorx.Decorate(err, "failed to connect to JetStream KV")
   225  	}
   226  
   227  	n.kv = kv
   228  
   229  	epoch, err := n.calculateEpoch()
   230  
   231  	if err != nil {
   232  		return errorx.Decorate(err, "failed to calculate epoch")
   233  	}
   234  
   235  	n.writeEpoch(epoch)
   236  	err = n.local.Start(nil)
   237  
   238  	if err != nil {
   239  		return errorx.Decorate(err, "failed to start internal memory broker")
   240  	}
   241  
   242  	err = n.watchEpoch(n.shutdownCtx)
   243  
   244  	if err != nil {
   245  		n.log.Warn("failed to set up epoch watcher", "error", err)
   246  	}
   247  
   248  	n.log.Info("NATS broker is ready", "epoch", epoch)
   249  	return nil
   250  }
   251  
   252  func (n *NATS) Shutdown(ctx context.Context) error {
   253  	n.clientMu.Lock()
   254  	defer n.clientMu.Unlock()
   255  
   256  	n.shutdownFn()
   257  
   258  	if n.conn != nil {
   259  		n.conn.Close()
   260  		n.conn = nil
   261  	}
   262  
   263  	if n.local != nil {
   264  		n.local.Shutdown(ctx) // nolint:errcheck
   265  	}
   266  
   267  	return nil
   268  }
   269  
   270  func (n *NATS) Announce() string {
   271  	brokerParams := fmt.Sprintf("(history limit: %d, history ttl: %ds, sessions ttl: %ds)", n.conf.HistoryLimit, n.conf.HistoryTTL, n.conf.SessionsTTL)
   272  
   273  	return fmt.Sprintf("Using NATS broker: %s %s", n.nconf.Servers, brokerParams)
   274  }
   275  
   276  func (n *NATS) Epoch() string {
   277  	n.epochMu.RLock()
   278  	defer n.epochMu.RUnlock()
   279  
   280  	return n.epoch
   281  }
   282  
   283  func (n *NATS) SetEpoch(epoch string) error {
   284  	n.clientMu.RLock()
   285  	defer n.clientMu.RUnlock()
   286  
   287  	bucket, err := n.js.KeyValue(context.Background(), epochBucket)
   288  
   289  	if err != nil {
   290  		return err
   291  	}
   292  
   293  	_, err = bucket.Put(context.Background(), epochKey, []byte(epoch))
   294  	if err != nil {
   295  		return err
   296  	}
   297  
   298  	n.writeEpoch(epoch)
   299  
   300  	return nil
   301  }
   302  
   303  func (n *NATS) writeEpoch(val string) {
   304  	n.epochMu.Lock()
   305  	defer n.epochMu.Unlock()
   306  
   307  	n.epoch = val
   308  	if n.local != nil {
   309  		n.local.SetEpoch(val)
   310  	}
   311  }
   312  
   313  func (n *NATS) HandleBroadcast(msg *common.StreamMessage) {
   314  	if msg.Meta != nil && msg.Meta.Transient {
   315  		n.broadcaster.Broadcast(msg)
   316  		return
   317  	}
   318  
   319  	err := n.Ready(jetstreamReadyTimeout)
   320  	if err != nil {
   321  		n.log.Debug("JetStream is not ready yet to publish messages, add to backlog")
   322  		n.backlogAdd(msg)
   323  		return
   324  	}
   325  
   326  	offset, err := n.add(msg.Stream, msg.Data)
   327  
   328  	if err != nil {
   329  		n.log.Error("failed to add message to JetStream Stream", "stream", msg.Stream, "error", err)
   330  		return
   331  	}
   332  
   333  	msg.Epoch = n.Epoch()
   334  	msg.Offset = offset
   335  
   336  	n.broadcaster.Broadcast(msg)
   337  }
   338  
   339  func (n *NATS) HandleCommand(msg *common.RemoteCommandMessage) {
   340  	n.broadcaster.BroadcastCommand(msg)
   341  }
   342  
   343  func (n *NATS) Subscribe(stream string) string {
   344  	isNew := n.tracker.Add(stream)
   345  
   346  	if isNew {
   347  		n.addStreamConsumer(stream)
   348  		n.broadcaster.Subscribe(stream)
   349  	}
   350  
   351  	return stream
   352  }
   353  
   354  func (n *NATS) Unsubscribe(stream string) string {
   355  	isLast := n.tracker.Remove(stream)
   356  
   357  	if isLast {
   358  		n.broadcaster.Unsubscribe(stream)
   359  	}
   360  
   361  	return stream
   362  }
   363  
   364  func (n *NATS) HistoryFrom(stream string, epoch string, offset uint64) ([]common.StreamMessage, error) {
   365  	err := n.Ready(jetstreamReadyTimeout)
   366  	if err != nil {
   367  		return nil, err
   368  	}
   369  
   370  	n.streamSync.sync(stream)
   371  	return n.local.HistoryFrom(stream, epoch, offset)
   372  }
   373  
   374  func (n *NATS) HistorySince(stream string, since int64) ([]common.StreamMessage, error) {
   375  	err := n.Ready(jetstreamReadyTimeout)
   376  	if err != nil {
   377  		return nil, err
   378  	}
   379  
   380  	n.streamSync.sync(stream)
   381  	return n.local.HistorySince(stream, since)
   382  }
   383  
   384  func (n *NATS) CommitSession(sid string, session Cacheable) error {
   385  	err := n.Ready(jetstreamReadyTimeout)
   386  	if err != nil {
   387  		return err
   388  	}
   389  
   390  	ctx := context.Background()
   391  	key := sessionsPrefix + sid
   392  	data, err := session.ToCacheEntry()
   393  
   394  	if err != nil {
   395  		return errorx.Decorate(err, "failed to serialize session")
   396  	}
   397  
   398  	_, err = n.kv.Put(ctx, key, data)
   399  
   400  	if err != nil {
   401  		return errorx.Decorate(err, "failed to save session to NATS")
   402  	}
   403  
   404  	return nil
   405  }
   406  
   407  func (n *NATS) RestoreSession(sid string) ([]byte, error) {
   408  	err := n.Ready(jetstreamReadyTimeout)
   409  	if err != nil {
   410  		return nil, err
   411  	}
   412  
   413  	key := sessionsPrefix + sid
   414  	ctx := context.Background()
   415  
   416  	entry, err := n.kv.Get(ctx, key)
   417  
   418  	if err == jetstream.ErrKeyNotFound {
   419  		return nil, nil
   420  	}
   421  
   422  	if err != nil {
   423  		return nil, errorx.Decorate(err, "failed to restore session from NATS")
   424  	}
   425  
   426  	return entry.Value(), nil
   427  }
   428  
   429  func (n *NATS) FinishSession(sid string) error {
   430  	err := n.Ready(jetstreamReadyTimeout)
   431  	if err != nil {
   432  		return err
   433  	}
   434  
   435  	ctx := context.Background()
   436  	key := sessionsPrefix + sid
   437  
   438  	entry, err := n.kv.Get(ctx, key)
   439  
   440  	if err != nil {
   441  		return errorx.Decorate(err, "failed to restore session from NATS")
   442  	}
   443  
   444  	_, err = n.kv.Put(ctx, key, entry.Value())
   445  
   446  	if err != nil {
   447  		return errorx.Decorate(err, "failed to touch session in NATS")
   448  	}
   449  
   450  	return nil
   451  }
   452  
   453  func (n *NATS) Reset() error {
   454  	err := n.Ready(jetstreamReadyTimeout)
   455  	if err != nil {
   456  		return err
   457  	}
   458  
   459  	n.clientMu.Lock()
   460  	defer n.clientMu.Unlock()
   461  
   462  	// Delete all sessions
   463  	if n.kv != nil {
   464  		keys, err := n.kv.Keys(context.Background())
   465  		if err != nil {
   466  			if err != jetstream.ErrNoKeysFound {
   467  				return err
   468  			}
   469  		}
   470  
   471  		for _, key := range keys {
   472  			n.kv.Delete(context.Background(), key) // nolint:errcheck
   473  		}
   474  	}
   475  
   476  	lister := n.js.ListStreams(context.Background(), jetstream.WithStreamListSubject(sessionsPrefix+"*"))
   477  	for info := range lister.Info() {
   478  		n.js.DeleteStream(context.Background(), info.Config.Name) // nolint:errcheck
   479  	}
   480  
   481  	return nil
   482  }
   483  
   484  func (n *NATS) add(stream string, data string) (uint64, error) {
   485  	err := n.ensureStreamExists(stream)
   486  
   487  	if err != nil {
   488  		return 0, errorx.Decorate(err, "failed to create JetStream Stream")
   489  	}
   490  
   491  	ctx := context.Background()
   492  	key := streamPrefix + stream
   493  
   494  	// Touch on publish to make sure that the subsequent history fetch will return the latest messages
   495  	n.streamSync.touch(stream)
   496  	ack, err := n.js.Publish(ctx, key, []byte(data))
   497  
   498  	if err != nil {
   499  		return 0, errorx.Decorate(err, "failed to publish message to JetStream")
   500  	}
   501  
   502  	return ack.Sequence, nil
   503  }
   504  
   505  func (n *NATS) addStreamConsumer(stream string) {
   506  	attempts := 5
   507  
   508  	err := n.ensureStreamExists(stream)
   509  
   510  	if err != nil {
   511  		n.log.Error("failed to create JetStream stream", "stream", stream, "error", err)
   512  		return
   513  	}
   514  
   515  createConsumer:
   516  	prefixedStream := streamPrefix + stream
   517  
   518  	_, cerr := n.jconsumers.fetch(stream, func() (jetstream.Consumer, error) { // nolint:errcheck
   519  		cons, err := n.js.CreateConsumer(context.Background(), prefixedStream, jetstream.ConsumerConfig{
   520  			AckPolicy: jetstream.AckNonePolicy,
   521  		})
   522  
   523  		if err != nil {
   524  			n.log.Error("failed to create JetStream stream consumer", "stream", stream, "error", err)
   525  			return nil, err
   526  		}
   527  
   528  		n.log.Debug("created JetStream consumer", "consumer", cons.CachedInfo().Name, "stream", stream)
   529  
   530  		n.streamSync.touch(stream)
   531  
   532  		batchSize := n.conf.HistoryLimit
   533  
   534  		if batchSize == 0 {
   535  			// TODO: what should we do if history is unlimited?
   536  			batchSize = 100
   537  		}
   538  
   539  		batch, err := cons.FetchNoWait(batchSize)
   540  		if err != nil {
   541  			n.log.Error("failed to fetch initial messages from JetStream", "error", err)
   542  			return nil, err
   543  		}
   544  
   545  		for msg := range batch.Messages() {
   546  			n.consumeMessage(stream, msg)
   547  		}
   548  
   549  		_, err = cons.Consume(func(msg jetstream.Msg) {
   550  			n.consumeMessage(stream, msg)
   551  		})
   552  
   553  		if err != nil {
   554  			return nil, err
   555  		}
   556  
   557  		return cons, nil
   558  	}, func(cons jetstream.Consumer) {
   559  		name := cons.CachedInfo().Name
   560  		n.log.Debug("deleting JetStream consumer", "consumer", name, "stream", stream)
   561  		n.streamSync.remove(stream)
   562  		n.js.DeleteConsumer(context.Background(), prefixedStream, name) // nolint:errcheck
   563  	})
   564  
   565  	if cerr != nil {
   566  		if n.shouldRetryOnError(cerr, &attempts, 500*time.Millisecond) {
   567  			goto createConsumer
   568  		}
   569  	}
   570  }
   571  
   572  func (n *NATS) consumeMessage(stream string, msg jetstream.Msg) {
   573  	n.streamSync.touch(stream)
   574  
   575  	meta, err := msg.Metadata()
   576  	if err != nil {
   577  		n.log.Error("failed to get JetStream message metadata", "error", err)
   578  		return
   579  	}
   580  
   581  	seq := meta.Sequence.Stream
   582  	ts := meta.Timestamp
   583  
   584  	_, err = n.local.Store(stream, msg.Data(), seq, ts)
   585  	if err != nil {
   586  		n.log.Error("failed to store message in local broker", "error", err)
   587  		return
   588  	}
   589  }
   590  
   591  func (n *NATS) ensureStreamExists(stream string) error {
   592  	prefixedStream := streamPrefix + stream
   593  	attempts := 5
   594  
   595  createStream:
   596  	_, err := n.jstreams.fetch(stream, func() (string, error) {
   597  		ctx := context.Background()
   598  
   599  		_, err := n.js.CreateStream(ctx, jetstream.StreamConfig{
   600  			Name:     prefixedStream,
   601  			MaxMsgs:  int64(n.conf.HistoryLimit),
   602  			MaxAge:   time.Duration(n.conf.HistoryTTL * int64(time.Second)),
   603  			Replicas: 1,
   604  		})
   605  
   606  		if err != nil {
   607  			// That means we updated the stream config (TTL, limit, etc.)
   608  			if err != jetstream.ErrStreamNameAlreadyInUse {
   609  				return "", err
   610  			}
   611  		}
   612  
   613  		return stream, nil
   614  	}, func(stream string) {})
   615  
   616  	if err != nil {
   617  		if n.shouldRetryOnError(err, &attempts, 500*time.Millisecond) {
   618  			goto createStream
   619  		}
   620  	}
   621  
   622  	return err
   623  }
   624  
   625  func (n *NATS) calculateEpoch() (string, error) {
   626  	attempts := 5
   627  	maybeNewEpoch, _ := nanoid.Nanoid(4)
   628  
   629  	ttl := time.Duration(10 * int64(math.Max(float64(n.conf.HistoryTTL), float64(n.conf.SessionsTTL))*float64(time.Second)))
   630  	// We must use a separate bucket due to a different TTL
   631  	bucketKey := epochBucket
   632  
   633  fetchEpoch:
   634  	kv, err := n.fetchBucketWithTTL(bucketKey, ttl)
   635  
   636  	if err != nil {
   637  		return "", errorx.Decorate(err, "failed to connect to JetStream KV")
   638  	}
   639  
   640  	n.epochKV = kv
   641  
   642  	_, err = kv.Create(context.Background(), epochKey, []byte(maybeNewEpoch))
   643  
   644  	if err != nil && strings.Contains(err.Error(), "key exists") {
   645  		entry, kerr := kv.Get(context.Background(), epochKey)
   646  
   647  		if kerr != nil {
   648  			return "", errorx.Decorate(kerr, "failed to retrieve key: %s", epochKey)
   649  		}
   650  
   651  		return string(entry.Value()), nil
   652  	} else if err != nil {
   653  		if n.shouldRetryOnError(err, &attempts, 1*time.Second) {
   654  			goto fetchEpoch
   655  		}
   656  
   657  		return "", errorx.Decorate(err, "failed to create key: %s", epochKey)
   658  	}
   659  
   660  	return maybeNewEpoch, nil
   661  }
   662  
   663  func (n *NATS) watchEpoch(ctx context.Context) error {
   664  	watcher, err := n.epochKV.Watch(context.Background(), epochKey, jetstream.IgnoreDeletes())
   665  
   666  	if err != nil {
   667  		return err
   668  	}
   669  
   670  	go func() {
   671  		for {
   672  			select {
   673  			case <-ctx.Done():
   674  				watcher.Stop() // nolint:errcheck
   675  				return
   676  			case entry := <-watcher.Updates():
   677  				if entry != nil {
   678  					newEpoch := string(entry.Value())
   679  
   680  					if n.Epoch() != newEpoch {
   681  						n.log.Warn("epoch updated", "epoch", newEpoch)
   682  						n.writeEpoch(newEpoch)
   683  					}
   684  				}
   685  			}
   686  		}
   687  	}()
   688  
   689  	return nil
   690  }
   691  
   692  func (n *NATS) fetchBucketWithTTL(key string, ttl time.Duration) (jetstream.KeyValue, error) {
   693  	var bucket jetstream.KeyValue
   694  	newBucket := true
   695  	attempts := 5
   696  
   697  bucketSetup:
   698  	bucket, err := n.js.CreateKeyValue(context.Background(), jetstream.KeyValueConfig{
   699  		Bucket:   key,
   700  		TTL:      ttl,
   701  		Replicas: 1,
   702  	})
   703  
   704  	if err != nil {
   705  		if context.DeadlineExceeded == err {
   706  			if attempts > 0 {
   707  				attempts--
   708  				n.log.Warn("failed to retrieve bucket, retrying in 500ms...", "bucket", key)
   709  				time.Sleep(500 * time.Millisecond)
   710  				goto bucketSetup
   711  			}
   712  
   713  			return nil, errorx.Decorate(err, "failed to create bucket: %s", key)
   714  		}
   715  
   716  		// That means that bucket has been already created
   717  		if err == jetstream.ErrStreamNameAlreadyInUse {
   718  			newBucket = false
   719  			bucket, err = n.js.KeyValue(context.Background(), key)
   720  
   721  			if err != nil {
   722  				return nil, errorx.Decorate(err, "failed to retrieve bucket: %s", key)
   723  			}
   724  		}
   725  	}
   726  
   727  	if err != nil {
   728  		return nil, errorx.Decorate(err, "failed to create bucket: %s", key)
   729  	}
   730  
   731  	// Invalidate TTL settings if the bucket is the new one.
   732  	// We discard the previous bucket and create a new one with the default TTL.
   733  	if !newBucket {
   734  		status, serr := bucket.Status(context.Background())
   735  
   736  		if serr != nil {
   737  			return nil, errorx.Decorate(serr, "failed to retrieve bucket status: %s", key)
   738  		}
   739  
   740  		if status.TTL() != ttl {
   741  			n.log.Warn("bucket TTL has been changed, recreating the bucket", "bucket", key, "old_ttl", status.TTL().String(), "ttl", ttl.String())
   742  			derr := n.js.DeleteKeyValue(context.Background(), key)
   743  			if derr != nil {
   744  				return nil, errorx.Decorate(derr, "failed to delete bucket: %s", key)
   745  			}
   746  
   747  			goto bucketSetup
   748  		}
   749  	}
   750  
   751  	return bucket, nil
   752  }
   753  
   754  type lru[T comparable] struct {
   755  	entries map[string]lruEntry[T]
   756  	ttl     time.Duration
   757  	mu      sync.RWMutex
   758  }
   759  
   760  type lruEntry[T comparable] struct {
   761  	value      T
   762  	lastActive time.Time
   763  	cleanup    func(T)
   764  }
   765  
   766  func newLRU[T comparable](ttl time.Duration) *lru[T] {
   767  	return &lru[T]{entries: make(map[string]lruEntry[T]), ttl: ttl}
   768  }
   769  
   770  func (m *lru[T]) fetch(key string, create func() (T, error), cleanup func(T)) (T, error) {
   771  	m.mu.Lock()
   772  	defer m.mu.Unlock()
   773  
   774  	if val, ok := m.read(key); ok {
   775  		return val, nil
   776  	}
   777  
   778  	val, err := create()
   779  
   780  	if err != nil {
   781  		var zero T
   782  		return zero, err
   783  	}
   784  
   785  	m.write(key, val, cleanup)
   786  
   787  	return val, nil
   788  }
   789  
   790  func (m *lru[T]) write(key string, value T, cleanup func(v T)) {
   791  	m.entries[key] = lruEntry[T]{value: value, lastActive: time.Now(), cleanup: cleanup}
   792  	// perform expiration on writes (which must happen rarely)
   793  	m.expire()
   794  }
   795  
   796  func (m *lru[T]) read(key string) (res T, found bool) {
   797  	if entry, ok := m.entries[key]; ok {
   798  		if entry.lastActive.Add(m.ttl).Before(time.Now()) {
   799  			return
   800  		}
   801  
   802  		// touch entry
   803  		entry.lastActive = time.Now()
   804  		res = entry.value
   805  		found = true
   806  	}
   807  
   808  	return
   809  }
   810  
   811  func (m *lru[T]) expire() {
   812  	for key, entry := range m.entries {
   813  		if entry.lastActive.Add(m.ttl).Before(time.Now()) {
   814  			delete(m.entries, key)
   815  			entry.cleanup(entry.value)
   816  		}
   817  	}
   818  }
   819  
   820  type streamsSynchronizer struct {
   821  	my       sync.RWMutex
   822  	enntries map[string]*streamSync
   823  }
   824  
   825  func newStreamsSynchronizer() *streamsSynchronizer {
   826  	return &streamsSynchronizer{
   827  		enntries: make(map[string]*streamSync),
   828  	}
   829  }
   830  
   831  func (s *streamsSynchronizer) sync(stream string) {
   832  	s.my.RLock()
   833  
   834  	syncer, ok := s.enntries[stream]
   835  
   836  	s.my.RUnlock()
   837  
   838  	if !ok {
   839  		return
   840  	}
   841  
   842  	syncer.sync()
   843  }
   844  
   845  func (s *streamsSynchronizer) touch(stream string) {
   846  	s.my.RLock()
   847  
   848  	syncer, ok := s.enntries[stream]
   849  
   850  	s.my.RUnlock()
   851  
   852  	if ok {
   853  		syncer.restart()
   854  		return
   855  	}
   856  
   857  	s.my.Lock()
   858  	defer s.my.Unlock()
   859  
   860  	s.enntries[stream] = newStreamSync()
   861  	s.enntries[stream].restart()
   862  }
   863  
   864  func (s *streamsSynchronizer) remove(stream string) {
   865  	s.my.Lock()
   866  	defer s.my.Unlock()
   867  
   868  	if syncer, ok := s.enntries[stream]; ok {
   869  		syncer.idle()
   870  		delete(s.enntries, stream)
   871  	}
   872  }
   873  
   874  type streamSync struct {
   875  	mu          sync.Mutex
   876  	active      bool
   877  	activeSince time.Time
   878  
   879  	cv    chan struct{}
   880  	timer *time.Timer
   881  }
   882  
   883  const (
   884  	streamHistorySyncTimeout = 200 * time.Millisecond
   885  	streamHistorySyncPeriod  = 50 * time.Millisecond
   886  )
   887  
   888  func newStreamSync() *streamSync {
   889  	return &streamSync{}
   890  }
   891  
   892  // sync waits for the gap in currently consumed messages
   893  func (s *streamSync) sync() {
   894  	s.mu.Lock()
   895  
   896  	if !s.active {
   897  		s.mu.Unlock()
   898  		return
   899  	}
   900  
   901  	s.mu.Unlock()
   902  
   903  	<-s.cv
   904  }
   905  
   906  // restart is called every time a message is consumed to
   907  // keep this stream locked from reading history
   908  func (s *streamSync) restart() {
   909  	s.mu.Lock()
   910  
   911  	if s.active {
   912  		if s.activeSince.Add(streamHistorySyncTimeout).Before(time.Now()) {
   913  			s.mu.Unlock()
   914  			s.idle()
   915  			return
   916  		}
   917  		s.timer.Reset(streamHistorySyncPeriod)
   918  		s.mu.Unlock()
   919  		return
   920  	}
   921  
   922  	defer s.mu.Unlock()
   923  
   924  	s.active = true
   925  	s.activeSince = time.Now()
   926  	s.timer = time.AfterFunc(streamHistorySyncPeriod, s.idle)
   927  	s.cv = make(chan struct{})
   928  }
   929  
   930  func (s *streamSync) idle() {
   931  	s.mu.Lock()
   932  	defer s.mu.Unlock()
   933  
   934  	if !s.active {
   935  		return
   936  	}
   937  
   938  	s.active = false
   939  	close(s.cv)
   940  }
   941  
   942  func (n *NATS) shouldRetryOnError(err error, attempts *int, cooldown time.Duration) bool {
   943  	if context.DeadlineExceeded == err || jetstream.ErrNoStreamResponse == err {
   944  		if *attempts > 0 {
   945  			(*attempts)--
   946  			n.log.Warn(fmt.Sprintf("operation failed, retrying in %s...", cooldown.String()), "error", err)
   947  			time.Sleep(cooldown)
   948  			return true
   949  		}
   950  	}
   951  
   952  	return false
   953  }
   954  
   955  func (n *NATS) backlogAdd(msg *common.StreamMessage) {
   956  	n.backlogMu.Lock()
   957  	defer n.backlogMu.Unlock()
   958  
   959  	n.broadcastBacklog = append(n.broadcastBacklog, msg)
   960  }
   961  
   962  func (n *NATS) backlogFlush() {
   963  	n.backlogMu.Lock()
   964  	defer n.backlogMu.Unlock()
   965  
   966  	for _, msg := range n.broadcastBacklog {
   967  		n.HandleBroadcast(msg)
   968  	}
   969  
   970  	n.broadcastBacklog = []*common.StreamMessage{}
   971  }