github.com/wfusion/gofusion@v1.1.14/common/infra/watermill/pubsub/redis/subscriber.go (about)

     1  package redis
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  	"time"
     7  
     8  	"github.com/Rican7/retry"
     9  	"github.com/pkg/errors"
    10  	"github.com/redis/go-redis/v9"
    11  
    12  	"github.com/wfusion/gofusion/common/infra/watermill"
    13  	"github.com/wfusion/gofusion/common/infra/watermill/message"
    14  	"github.com/wfusion/gofusion/common/utils"
    15  )
    16  
    17  const (
    18  	groupStartid   = ">"
    19  	redisBusyGroup = "BUSYGROUP Consumer Group name already exists"
    20  )
    21  
    22  const (
    23  	// NoSleep can be set to SubscriberConfig.NackResendSleep
    24  	NoSleep time.Duration = -1
    25  
    26  	DefaultBlockTime = time.Millisecond * 100
    27  
    28  	DefaultClaimInterval = time.Second * 5
    29  
    30  	DefaultClaimBatchSize = int64(100)
    31  
    32  	DefaultMaxIdleTime = time.Second * 60
    33  
    34  	DefaultCheckConsumersInterval = time.Second * 300
    35  	DefaultConsumerTimeout        = time.Second * 600
    36  )
    37  
    38  type Subscriber struct {
    39  	config        SubscriberConfig
    40  	client        redis.UniversalClient
    41  	logger        watermill.LoggerAdapter
    42  	closing       chan struct{}
    43  	subscribersWg sync.WaitGroup
    44  
    45  	closed     bool
    46  	closeMutex sync.Mutex
    47  }
    48  
    49  // NewSubscriber creates a new redis stream Subscriber.
    50  func NewSubscriber(config SubscriberConfig, logger watermill.LoggerAdapter) (*Subscriber, error) {
    51  	config.setDefaults()
    52  
    53  	if err := config.Validate(); err != nil {
    54  		return nil, err
    55  	}
    56  
    57  	if logger == nil {
    58  		logger = &watermill.NopLogger{}
    59  	}
    60  
    61  	return &Subscriber{
    62  		config:  config,
    63  		client:  config.Client,
    64  		logger:  logger,
    65  		closing: make(chan struct{}),
    66  	}, nil
    67  }
    68  
    69  type SubscriberConfig struct {
    70  	Client redis.UniversalClient
    71  
    72  	Unmarshaller Unmarshaller
    73  
    74  	// Redis stream consumer id, paired with ConsumerGroup.
    75  	Consumer string
    76  	// When empty, fan-out mode will be used.
    77  	ConsumerGroup string
    78  
    79  	// How long after Nack message should be redelivered.
    80  	NackResendSleep time.Duration
    81  
    82  	// Block to wait next redis stream message.
    83  	BlockTime time.Duration
    84  
    85  	// Claim idle pending message interval.
    86  	ClaimInterval time.Duration
    87  
    88  	// How many pending messages are claimed at most each claim interval.
    89  	ClaimBatchSize int64
    90  
    91  	// How long should we treat a pending message as claimable.
    92  	MaxIdleTime time.Duration
    93  
    94  	// Check consumer status interval.
    95  	CheckConsumersInterval time.Duration
    96  
    97  	// After this timeout an idle consumer with no pending messages will be removed from the consumer group.
    98  	ConsumerTimeout time.Duration
    99  
   100  	// Start consumption from the specified message ID.
   101  	// When using "0", the consumer group will consume from the very first message.
   102  	// When using "$", the consumer group will consume from the latest message.
   103  	OldestId string
   104  
   105  	// If this is set, it will be called to decide whether a pending message that
   106  	// has been idle for more than MaxIdleTime should actually be claimed.
   107  	// If this is not set, then all pending messages that have been idle for more than MaxIdleTime will be claimed.
   108  	// This can be useful e.g. for tasks where the processing time can be very variable -
   109  	// so we can't just use a short MaxIdleTime; but at the same time dead
   110  	// consumers should be spotted quickly - so we can't just use a long MaxIdleTime either.
   111  	// In such cases, if we have another way for checking consumers' health, then we can
   112  	// leverage that in this callback.
   113  	ShouldClaimPendingMessage func(redis.XPendingExt) bool
   114  
   115  	DisableRedisConnClose bool
   116  }
   117  
   118  func (sc *SubscriberConfig) setDefaults() {
   119  	if sc.Unmarshaller == nil {
   120  		sc.Unmarshaller = DefaultMarshallerUnmarshaller{}
   121  	}
   122  	if sc.Consumer == "" {
   123  		sc.Consumer = utils.ShortUUID()
   124  	}
   125  	if sc.NackResendSleep == 0 {
   126  		sc.NackResendSleep = NoSleep
   127  	}
   128  	if sc.BlockTime == 0 {
   129  		sc.BlockTime = DefaultBlockTime
   130  	}
   131  	if sc.ClaimInterval == 0 {
   132  		sc.ClaimInterval = DefaultClaimInterval
   133  	}
   134  	if sc.ClaimBatchSize == 0 {
   135  		sc.ClaimBatchSize = DefaultClaimBatchSize
   136  	}
   137  	if sc.MaxIdleTime == 0 {
   138  		sc.MaxIdleTime = DefaultMaxIdleTime
   139  	}
   140  	if sc.CheckConsumersInterval == 0 {
   141  		sc.CheckConsumersInterval = DefaultCheckConsumersInterval
   142  	}
   143  	if sc.ConsumerTimeout == 0 {
   144  		sc.ConsumerTimeout = DefaultConsumerTimeout
   145  	}
   146  	// Consume from scratch by default
   147  	if sc.OldestId == "" {
   148  		sc.OldestId = "0"
   149  	}
   150  }
   151  
   152  func (sc *SubscriberConfig) Validate() error {
   153  	if sc.Client == nil {
   154  		return errors.New("redis client is empty")
   155  	}
   156  	return nil
   157  }
   158  
   159  func (s *Subscriber) Subscribe(ctx context.Context, topic string) (<-chan *message.Message, error) {
   160  	if s.closed {
   161  		return nil, errors.New("subscriber closed")
   162  	}
   163  
   164  	s.subscribersWg.Add(1)
   165  
   166  	logFields := watermill.LogFields{
   167  		"provider":       "redis",
   168  		"topic":          topic,
   169  		"consumer_group": s.config.ConsumerGroup,
   170  		"consumer_uuid":  s.config.Consumer,
   171  	}
   172  	s.logger.Info("[Common] watermill redis subscribing to redis stream topic", logFields)
   173  
   174  	// we don't want to have buffered channel to not consume messsage from redis stream when consumer is not consuming
   175  	output := make(chan *message.Message)
   176  
   177  	consumeClosed, err := s.consumeMessages(ctx, topic, output, logFields)
   178  	if err != nil {
   179  		s.subscribersWg.Done()
   180  		return nil, err
   181  	}
   182  
   183  	go func() {
   184  		<-consumeClosed
   185  		close(output)
   186  		s.subscribersWg.Done()
   187  	}()
   188  
   189  	return output, nil
   190  }
   191  
   192  func (s *Subscriber) consumeMessages(ctx context.Context, topic string,
   193  	output chan *message.Message, logFields watermill.LogFields) (consumeMessageClosed chan struct{}, err error) {
   194  	s.logger.Info("Starting consuming", logFields)
   195  
   196  	ctx, cancel := context.WithCancel(ctx)
   197  	go func() {
   198  		select {
   199  		case <-s.closing:
   200  			s.logger.Debug("[Common] watermill redis closing subscriber, cancelling consumeMessages", logFields)
   201  			cancel()
   202  		case <-ctx.Done():
   203  			// avoid goroutine leak
   204  		}
   205  	}()
   206  	if s.config.ConsumerGroup != "" {
   207  		// create consumer group
   208  		if _, err := s.client.XGroupCreateMkStream(ctx, topic,
   209  			s.config.ConsumerGroup, s.config.OldestId).Result(); err != nil && err.Error() != redisBusyGroup {
   210  			return nil, err
   211  		}
   212  	}
   213  
   214  	consumeMessageClosed, err = s.consumeStreams(ctx, topic, output, logFields)
   215  	if err != nil {
   216  		s.logger.Debug(
   217  			"[Common] watermill redis starting consume failed, cancelling context",
   218  			logFields.Add(watermill.LogFields{"err": err}),
   219  		)
   220  		cancel()
   221  		return nil, err
   222  	}
   223  
   224  	return consumeMessageClosed, nil
   225  }
   226  
   227  func (s *Subscriber) consumeStreams(ctx context.Context, stream string,
   228  	output chan *message.Message, logFields watermill.LogFields) (chan struct{}, error) {
   229  	messageHandler := s.createMessageHandler(output)
   230  	consumeMessageClosed := make(chan struct{})
   231  
   232  	go func() {
   233  		defer close(consumeMessageClosed)
   234  
   235  		readChannel := make(chan *redis.XStream, 1)
   236  		go s.read(ctx, stream, readChannel, logFields)
   237  
   238  		for {
   239  			select {
   240  			case xs := <-readChannel:
   241  				if xs == nil {
   242  					s.logger.Debug(
   243  						"[Common] watermill redis readStreamChannel is closed, stopping readStream", logFields)
   244  					return
   245  				}
   246  				if err := messageHandler.processMessage(ctx, xs.Stream, &xs.Messages[0], logFields); err != nil {
   247  					s.logger.Error("[Common] watermill redis processMessage fail", err, logFields)
   248  					return
   249  				}
   250  			case <-s.closing:
   251  				s.logger.Debug("[Common] watermill redis subscriber is closing, stopping readStream", logFields)
   252  				return
   253  			case <-ctx.Done():
   254  				s.logger.Debug("[Common] watermill redis ctx was cancelled, stopping readStream", logFields)
   255  				return
   256  			}
   257  		}
   258  	}()
   259  
   260  	return consumeMessageClosed, nil
   261  }
   262  
   263  func (s *Subscriber) read(ctx context.Context, stream string,
   264  	readChannel chan<- *redis.XStream, logFields watermill.LogFields) {
   265  	wg := &sync.WaitGroup{}
   266  	subCtx, subCancel := context.WithCancel(ctx)
   267  	defer func() {
   268  		subCancel()
   269  		wg.Wait()
   270  		close(readChannel)
   271  	}()
   272  	var (
   273  		streamsGroup = []string{stream, groupStartid}
   274  
   275  		fanOutStartid               = "$"
   276  		countFanOut   int64         = 0
   277  		blockTime     time.Duration = 0
   278  
   279  		xss []redis.XStream
   280  		xs  *redis.XStream
   281  		err error
   282  	)
   283  
   284  	if s.config.ConsumerGroup != "" {
   285  		// 1. get pending message from idle consumer
   286  		wg.Add(1)
   287  		s.claim(subCtx, stream, readChannel, false, wg, logFields)
   288  
   289  		// 2. background
   290  		wg.Add(1)
   291  		go s.claim(subCtx, stream, readChannel, true, wg, logFields)
   292  
   293  		// check consumer status and remove idling consumers if possible
   294  		wg.Add(1)
   295  		go s.checkConsumers(subCtx, stream, wg, logFields)
   296  	}
   297  
   298  	for {
   299  		select {
   300  		case <-s.closing:
   301  			return
   302  		case <-ctx.Done():
   303  			return
   304  		default:
   305  			if s.config.ConsumerGroup != "" {
   306  				xss, err = s.client.XReadGroup(
   307  					ctx,
   308  					&redis.XReadGroupArgs{
   309  						Group:    s.config.ConsumerGroup,
   310  						Consumer: s.config.Consumer,
   311  						Streams:  streamsGroup,
   312  						Count:    1,
   313  						Block:    blockTime,
   314  					}).Result()
   315  			} else {
   316  				xss, err = s.client.XRead(
   317  					ctx,
   318  					&redis.XReadArgs{
   319  						Streams: []string{stream, fanOutStartid},
   320  						Count:   countFanOut,
   321  						Block:   blockTime,
   322  					}).Result()
   323  			}
   324  			if errors.Is(err, redis.Nil) {
   325  				continue
   326  			} else if err != nil {
   327  				if _, ok := utils.IsChannelClosed(s.closing); !ok &&
   328  					!errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) {
   329  					s.logger.Error("[Common] watermill redis read fail", err, logFields)
   330  				}
   331  			}
   332  			if len(xss) < 1 || len(xss[0].Messages) < 1 {
   333  				continue
   334  			}
   335  			// update last delivered message
   336  			xs = &xss[0]
   337  			if s.config.ConsumerGroup == "" {
   338  				fanOutStartid = xs.Messages[0].ID
   339  				countFanOut = 1
   340  			}
   341  
   342  			blockTime = s.config.BlockTime
   343  
   344  			select {
   345  			case <-s.closing:
   346  				return
   347  			case <-ctx.Done():
   348  				return
   349  			case readChannel <- xs:
   350  			}
   351  		}
   352  	}
   353  }
   354  
   355  func (s *Subscriber) claim(ctx context.Context, stream string,
   356  	readChannel chan<- *redis.XStream, keep bool, wg *sync.WaitGroup, logFields watermill.LogFields) {
   357  	var (
   358  		xps    []redis.XPendingExt
   359  		err    error
   360  		xp     redis.XPendingExt
   361  		xm     []redis.XMessage
   362  		tick   = time.NewTicker(s.config.ClaimInterval)
   363  		initCh = make(chan byte, 1)
   364  	)
   365  	defer func() {
   366  		tick.Stop()
   367  		close(initCh)
   368  		wg.Done()
   369  	}()
   370  	if !keep { // if not keep, run immediately
   371  		initCh <- 1
   372  	}
   373  
   374  OUTER_LOOP:
   375  	for {
   376  		select {
   377  		case <-s.closing:
   378  			return
   379  		case <-ctx.Done():
   380  			return
   381  		case <-tick.C:
   382  		case <-initCh:
   383  		}
   384  
   385  		xps, err = s.client.XPendingExt(ctx, &redis.XPendingExtArgs{
   386  			Stream:   stream,
   387  			Group:    s.config.ConsumerGroup,
   388  			Idle:     s.config.MaxIdleTime,
   389  			Start:    "0",
   390  			End:      "+",
   391  			Count:    s.config.ClaimBatchSize,
   392  			Consumer: "",
   393  		}).Result()
   394  		if err != nil {
   395  			s.logger.Error(
   396  				"[Common] watermill redis xpendingext fail",
   397  				err,
   398  				logFields,
   399  			)
   400  			continue
   401  		}
   402  		for _, xp = range xps {
   403  			shouldClaim := xp.Idle >= s.config.MaxIdleTime
   404  			if shouldClaim && s.config.ShouldClaimPendingMessage != nil {
   405  				shouldClaim = s.config.ShouldClaimPendingMessage(xp)
   406  			}
   407  
   408  			if shouldClaim {
   409  				// assign the ownership of a pending message to the current consumer
   410  				xm, err = s.client.XClaim(ctx, &redis.XClaimArgs{
   411  					Stream:   stream,
   412  					Group:    s.config.ConsumerGroup,
   413  					Consumer: s.config.Consumer,
   414  					// this is important: it ensures that 2 concurrent subscribers
   415  					// won't claim the same pending message at the same time
   416  					MinIdle:  s.config.MaxIdleTime,
   417  					Messages: []string{xp.ID},
   418  				}).Result()
   419  				if err != nil {
   420  					s.logger.Error(
   421  						"[Common] watermill redis xclaim fail",
   422  						err,
   423  						logFields.Add(watermill.LogFields{"xp": xp}),
   424  					)
   425  					continue OUTER_LOOP
   426  				}
   427  				if len(xm) > 0 {
   428  					select {
   429  					case <-s.closing:
   430  						return
   431  					case <-ctx.Done():
   432  						return
   433  					case readChannel <- &redis.XStream{Stream: stream, Messages: xm}:
   434  					}
   435  				}
   436  			}
   437  		}
   438  		if len(xps) == 0 || int64(len(xps)) < s.config.ClaimBatchSize { // done
   439  			if !keep {
   440  				return
   441  			}
   442  			continue
   443  		}
   444  	}
   445  }
   446  
   447  func (s *Subscriber) checkConsumers(ctx context.Context, stream string,
   448  	wg *sync.WaitGroup, logFields watermill.LogFields) {
   449  	tick := time.NewTicker(s.config.CheckConsumersInterval)
   450  	defer func() {
   451  		tick.Stop()
   452  		wg.Done()
   453  	}()
   454  
   455  	for {
   456  		select {
   457  		case <-s.closing:
   458  			return
   459  		case <-ctx.Done():
   460  			return
   461  		case <-tick.C:
   462  		}
   463  		xics, err := s.client.XInfoConsumers(ctx, stream, s.config.ConsumerGroup).Result()
   464  		if err != nil {
   465  			s.logger.Error(
   466  				"[Common] watermill redis xinfoconsumers failed",
   467  				err,
   468  				logFields,
   469  			)
   470  		}
   471  		for _, xic := range xics {
   472  			if xic.Idle < s.config.ConsumerTimeout {
   473  				continue
   474  			}
   475  			if xic.Pending == 0 {
   476  				if err = s.client.XGroupDelConsumer(ctx, stream, s.config.ConsumerGroup, xic.Name).Err(); err != nil {
   477  					s.logger.Error(
   478  						"[Common] watermill redis xgroupdelconsumer failed",
   479  						err,
   480  						logFields,
   481  					)
   482  				}
   483  			}
   484  		}
   485  	}
   486  }
   487  
   488  func (s *Subscriber) createMessageHandler(output chan *message.Message) messageHandler {
   489  	return messageHandler{
   490  		outputChannel:   output,
   491  		rc:              s.client,
   492  		consumerGroup:   s.config.ConsumerGroup,
   493  		unmarshaller:    s.config.Unmarshaller,
   494  		nackResendSleep: s.config.NackResendSleep,
   495  		logger:          s.logger,
   496  		closing:         s.closing,
   497  	}
   498  }
   499  
   500  func (s *Subscriber) Close() error {
   501  	s.closeMutex.Lock()
   502  	defer s.closeMutex.Unlock()
   503  
   504  	if s.closed {
   505  		return nil
   506  	}
   507  
   508  	s.closed = true
   509  	close(s.closing)
   510  	s.subscribersWg.Wait()
   511  
   512  	if !s.config.DisableRedisConnClose {
   513  		if err := s.client.Close(); err != nil {
   514  			return err
   515  		}
   516  	}
   517  
   518  	s.logger.Debug("[Common] watermill redis stream subscriber closed", nil)
   519  
   520  	return nil
   521  }
   522  
   523  type messageHandler struct {
   524  	outputChannel chan<- *message.Message
   525  	rc            redis.UniversalClient
   526  	consumerGroup string
   527  	unmarshaller  Unmarshaller
   528  
   529  	nackResendSleep time.Duration
   530  
   531  	logger  watermill.LoggerAdapter
   532  	closing chan struct{}
   533  }
   534  
   535  func (h *messageHandler) processMessage(ctx context.Context, stream string,
   536  	xm *redis.XMessage, messageLogFields watermill.LogFields) error {
   537  	receivedMsgLogFields := messageLogFields.Add(watermill.LogFields{
   538  		"xadd_id":        xm.ID,
   539  		"stream":         stream,
   540  		"message_raw_id": xm.ID,
   541  	})
   542  
   543  	h.logger.Trace("[Common] watermill received message from redis stream", receivedMsgLogFields)
   544  
   545  	msg, err := h.unmarshaller.Unmarshal(xm.Values)
   546  	if err != nil {
   547  		return errors.Wrapf(err, "message unmarshal failed")
   548  	}
   549  
   550  	ctx = context.WithValue(ctx, watermill.ContextKeyMessageUUID, msg.UUID)
   551  	ctx = context.WithValue(ctx, watermill.ContextKeyRawMessageID, xm.ID)
   552  	ctx, cancelCtx := context.WithCancel(ctx)
   553  	msg.SetContext(ctx)
   554  	defer cancelCtx()
   555  
   556  	receivedMsgLogFields = receivedMsgLogFields.Add(watermill.LogFields{
   557  		"message_uuid": msg.UUID,
   558  	})
   559  
   560  ResendLoop:
   561  	for {
   562  		select {
   563  		case h.outputChannel <- msg:
   564  			h.logger.Trace("[Common] watermill redis message sent to consumer", receivedMsgLogFields)
   565  		case <-h.closing:
   566  			h.logger.Trace("[Common] watermill redis closing, message discarded", receivedMsgLogFields)
   567  			return nil
   568  		case <-ctx.Done():
   569  			h.logger.Trace("[Common] watermill redis closing, ctx cancelled before sent to consumer",
   570  				receivedMsgLogFields)
   571  			return nil
   572  		}
   573  
   574  		select {
   575  		case <-msg.Acked():
   576  			if h.consumerGroup != "" {
   577  				// deadly retry ack
   578  				err := retry.Retry(func(attempt uint) error {
   579  					err := h.rc.XAck(ctx, stream, h.consumerGroup, xm.ID).Err()
   580  					return err
   581  				}, func(attempt uint) bool {
   582  					if attempt != 0 {
   583  						time.Sleep(time.Millisecond * 100)
   584  					}
   585  					return true
   586  				}, func(attempt uint) bool {
   587  					select {
   588  					case <-h.closing:
   589  					case <-ctx.Done():
   590  					default:
   591  						return true
   592  					}
   593  					return false
   594  				})
   595  				if err != nil {
   596  					h.logger.Error("[Common] watermill redis message acked fail", err, receivedMsgLogFields)
   597  				}
   598  			}
   599  			h.logger.Trace("[Common] watermill redis message acked", receivedMsgLogFields)
   600  			break ResendLoop
   601  		case <-msg.Nacked():
   602  			h.logger.Trace("[Common] watermill redis message nacked", receivedMsgLogFields)
   603  
   604  			// reset acks, etc.
   605  			msg = msg.Copy()
   606  			msg.SetContext(ctx)
   607  			if h.nackResendSleep != NoSleep {
   608  				time.Sleep(h.nackResendSleep)
   609  			}
   610  
   611  			continue ResendLoop
   612  		case <-h.closing:
   613  			h.logger.Trace("[Common] watermill redis closing, message discarded before ack", receivedMsgLogFields)
   614  			return nil
   615  		case <-ctx.Done():
   616  			h.logger.Trace("[Common] watermill redis closing, ctx cancelled before ack", receivedMsgLogFields)
   617  			return nil
   618  		}
   619  	}
   620  
   621  	return nil
   622  }