github.com/nsqio/nsq@v1.3.0/nsqd/channel.go (about)

     1  package nsqd
     2  
     3  import (
     4  	"container/heap"
     5  	"errors"
     6  	"fmt"
     7  	"math"
     8  	"strings"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/nsqio/go-diskqueue"
    14  
    15  	"github.com/nsqio/nsq/internal/lg"
    16  	"github.com/nsqio/nsq/internal/pqueue"
    17  	"github.com/nsqio/nsq/internal/quantile"
    18  )
    19  
    20  type Consumer interface {
    21  	UnPause()
    22  	Pause()
    23  	Close() error
    24  	TimedOutMessage()
    25  	Stats(string) ClientStats
    26  	Empty()
    27  }
    28  
    29  // Channel represents the concrete type for a NSQ channel (and also
    30  // implements the Queue interface)
    31  //
    32  // There can be multiple channels per topic, each with there own unique set
    33  // of subscribers (clients).
    34  //
    35  // Channels maintain all client and message metadata, orchestrating in-flight
    36  // messages, timeouts, requeuing, etc.
    37  type Channel struct {
    38  	// 64bit atomic vars need to be first for proper alignment on 32bit platforms
    39  	requeueCount uint64
    40  	messageCount uint64
    41  	timeoutCount uint64
    42  
    43  	sync.RWMutex
    44  
    45  	topicName string
    46  	name      string
    47  	nsqd      *NSQD
    48  
    49  	backend BackendQueue
    50  
    51  	memoryMsgChan chan *Message
    52  	exitFlag      int32
    53  	exitMutex     sync.RWMutex
    54  
    55  	// state tracking
    56  	clients        map[int64]Consumer
    57  	paused         int32
    58  	ephemeral      bool
    59  	deleteCallback func(*Channel)
    60  	deleter        sync.Once
    61  
    62  	// Stats tracking
    63  	e2eProcessingLatencyStream *quantile.Quantile
    64  
    65  	// TODO: these can be DRYd up
    66  	deferredMessages map[MessageID]*pqueue.Item
    67  	deferredPQ       pqueue.PriorityQueue
    68  	deferredMutex    sync.Mutex
    69  	inFlightMessages map[MessageID]*Message
    70  	inFlightPQ       inFlightPqueue
    71  	inFlightMutex    sync.Mutex
    72  }
    73  
    74  // NewChannel creates a new instance of the Channel type and returns a pointer
    75  func NewChannel(topicName string, channelName string, nsqd *NSQD,
    76  	deleteCallback func(*Channel)) *Channel {
    77  
    78  	c := &Channel{
    79  		topicName:      topicName,
    80  		name:           channelName,
    81  		memoryMsgChan:  nil,
    82  		clients:        make(map[int64]Consumer),
    83  		deleteCallback: deleteCallback,
    84  		nsqd:           nsqd,
    85  		ephemeral:      strings.HasSuffix(channelName, "#ephemeral"),
    86  	}
    87  	// avoid mem-queue if size == 0 for more consistent ordering
    88  	if nsqd.getOpts().MemQueueSize > 0 || c.ephemeral {
    89  		c.memoryMsgChan = make(chan *Message, nsqd.getOpts().MemQueueSize)
    90  	}
    91  	if len(nsqd.getOpts().E2EProcessingLatencyPercentiles) > 0 {
    92  		c.e2eProcessingLatencyStream = quantile.New(
    93  			nsqd.getOpts().E2EProcessingLatencyWindowTime,
    94  			nsqd.getOpts().E2EProcessingLatencyPercentiles,
    95  		)
    96  	}
    97  
    98  	c.initPQ()
    99  
   100  	if c.ephemeral {
   101  		c.backend = newDummyBackendQueue()
   102  	} else {
   103  		dqLogf := func(level diskqueue.LogLevel, f string, args ...interface{}) {
   104  			opts := nsqd.getOpts()
   105  			lg.Logf(opts.Logger, opts.LogLevel, lg.LogLevel(level), f, args...)
   106  		}
   107  		// backend names, for uniqueness, automatically include the topic...
   108  		backendName := getBackendName(topicName, channelName)
   109  		c.backend = diskqueue.New(
   110  			backendName,
   111  			nsqd.getOpts().DataPath,
   112  			nsqd.getOpts().MaxBytesPerFile,
   113  			int32(minValidMsgLength),
   114  			int32(nsqd.getOpts().MaxMsgSize)+minValidMsgLength,
   115  			nsqd.getOpts().SyncEvery,
   116  			nsqd.getOpts().SyncTimeout,
   117  			dqLogf,
   118  		)
   119  	}
   120  
   121  	c.nsqd.Notify(c, !c.ephemeral)
   122  
   123  	return c
   124  }
   125  
   126  func (c *Channel) initPQ() {
   127  	pqSize := int(math.Max(1, float64(c.nsqd.getOpts().MemQueueSize)/10))
   128  
   129  	c.inFlightMutex.Lock()
   130  	c.inFlightMessages = make(map[MessageID]*Message)
   131  	c.inFlightPQ = newInFlightPqueue(pqSize)
   132  	c.inFlightMutex.Unlock()
   133  
   134  	c.deferredMutex.Lock()
   135  	c.deferredMessages = make(map[MessageID]*pqueue.Item)
   136  	c.deferredPQ = pqueue.New(pqSize)
   137  	c.deferredMutex.Unlock()
   138  }
   139  
   140  // Exiting returns a boolean indicating if this channel is closed/exiting
   141  func (c *Channel) Exiting() bool {
   142  	return atomic.LoadInt32(&c.exitFlag) == 1
   143  }
   144  
   145  // Delete empties the channel and closes
   146  func (c *Channel) Delete() error {
   147  	return c.exit(true)
   148  }
   149  
   150  // Close cleanly closes the Channel
   151  func (c *Channel) Close() error {
   152  	return c.exit(false)
   153  }
   154  
   155  func (c *Channel) exit(deleted bool) error {
   156  	c.exitMutex.Lock()
   157  	defer c.exitMutex.Unlock()
   158  
   159  	if !atomic.CompareAndSwapInt32(&c.exitFlag, 0, 1) {
   160  		return errors.New("exiting")
   161  	}
   162  
   163  	if deleted {
   164  		c.nsqd.logf(LOG_INFO, "CHANNEL(%s): deleting", c.name)
   165  
   166  		// since we are explicitly deleting a channel (not just at system exit time)
   167  		// de-register this from the lookupd
   168  		c.nsqd.Notify(c, !c.ephemeral)
   169  	} else {
   170  		c.nsqd.logf(LOG_INFO, "CHANNEL(%s): closing", c.name)
   171  	}
   172  
   173  	// this forceably closes client connections
   174  	c.RLock()
   175  	for _, client := range c.clients {
   176  		client.Close()
   177  	}
   178  	c.RUnlock()
   179  
   180  	if deleted {
   181  		// empty the queue (deletes the backend files, too)
   182  		c.Empty()
   183  		return c.backend.Delete()
   184  	}
   185  
   186  	// write anything leftover to disk
   187  	c.flush()
   188  	return c.backend.Close()
   189  }
   190  
   191  func (c *Channel) Empty() error {
   192  	c.Lock()
   193  	defer c.Unlock()
   194  
   195  	c.initPQ()
   196  	for _, client := range c.clients {
   197  		client.Empty()
   198  	}
   199  
   200  	for {
   201  		select {
   202  		case <-c.memoryMsgChan:
   203  		default:
   204  			goto finish
   205  		}
   206  	}
   207  
   208  finish:
   209  	return c.backend.Empty()
   210  }
   211  
   212  // flush persists all the messages in internal memory buffers to the backend
   213  // it does not drain inflight/deferred because it is only called in Close()
   214  func (c *Channel) flush() error {
   215  	if len(c.memoryMsgChan) > 0 || len(c.inFlightMessages) > 0 || len(c.deferredMessages) > 0 {
   216  		c.nsqd.logf(LOG_INFO, "CHANNEL(%s): flushing %d memory %d in-flight %d deferred messages to backend",
   217  			c.name, len(c.memoryMsgChan), len(c.inFlightMessages), len(c.deferredMessages))
   218  	}
   219  
   220  	for {
   221  		select {
   222  		case msg := <-c.memoryMsgChan:
   223  			err := writeMessageToBackend(msg, c.backend)
   224  			if err != nil {
   225  				c.nsqd.logf(LOG_ERROR, "failed to write message to backend - %s", err)
   226  			}
   227  		default:
   228  			goto finish
   229  		}
   230  	}
   231  
   232  finish:
   233  	c.inFlightMutex.Lock()
   234  	for _, msg := range c.inFlightMessages {
   235  		err := writeMessageToBackend(msg, c.backend)
   236  		if err != nil {
   237  			c.nsqd.logf(LOG_ERROR, "failed to write message to backend - %s", err)
   238  		}
   239  	}
   240  	c.inFlightMutex.Unlock()
   241  
   242  	c.deferredMutex.Lock()
   243  	for _, item := range c.deferredMessages {
   244  		msg := item.Value.(*Message)
   245  		err := writeMessageToBackend(msg, c.backend)
   246  		if err != nil {
   247  			c.nsqd.logf(LOG_ERROR, "failed to write message to backend - %s", err)
   248  		}
   249  	}
   250  	c.deferredMutex.Unlock()
   251  
   252  	return nil
   253  }
   254  
   255  func (c *Channel) Depth() int64 {
   256  	return int64(len(c.memoryMsgChan)) + c.backend.Depth()
   257  }
   258  
   259  func (c *Channel) Pause() error {
   260  	return c.doPause(true)
   261  }
   262  
   263  func (c *Channel) UnPause() error {
   264  	return c.doPause(false)
   265  }
   266  
   267  func (c *Channel) doPause(pause bool) error {
   268  	if pause {
   269  		atomic.StoreInt32(&c.paused, 1)
   270  	} else {
   271  		atomic.StoreInt32(&c.paused, 0)
   272  	}
   273  
   274  	c.RLock()
   275  	for _, client := range c.clients {
   276  		if pause {
   277  			client.Pause()
   278  		} else {
   279  			client.UnPause()
   280  		}
   281  	}
   282  	c.RUnlock()
   283  	return nil
   284  }
   285  
   286  func (c *Channel) IsPaused() bool {
   287  	return atomic.LoadInt32(&c.paused) == 1
   288  }
   289  
   290  // PutMessage writes a Message to the queue
   291  func (c *Channel) PutMessage(m *Message) error {
   292  	c.exitMutex.RLock()
   293  	defer c.exitMutex.RUnlock()
   294  	if c.Exiting() {
   295  		return errors.New("exiting")
   296  	}
   297  	err := c.put(m)
   298  	if err != nil {
   299  		return err
   300  	}
   301  	atomic.AddUint64(&c.messageCount, 1)
   302  	return nil
   303  }
   304  
   305  func (c *Channel) put(m *Message) error {
   306  	select {
   307  	case c.memoryMsgChan <- m:
   308  	default:
   309  		err := writeMessageToBackend(m, c.backend)
   310  		c.nsqd.SetHealth(err)
   311  		if err != nil {
   312  			c.nsqd.logf(LOG_ERROR, "CHANNEL(%s): failed to write message to backend - %s",
   313  				c.name, err)
   314  			return err
   315  		}
   316  	}
   317  	return nil
   318  }
   319  
   320  func (c *Channel) PutMessageDeferred(msg *Message, timeout time.Duration) {
   321  	atomic.AddUint64(&c.messageCount, 1)
   322  	c.StartDeferredTimeout(msg, timeout)
   323  }
   324  
   325  // TouchMessage resets the timeout for an in-flight message
   326  func (c *Channel) TouchMessage(clientID int64, id MessageID, clientMsgTimeout time.Duration) error {
   327  	msg, err := c.popInFlightMessage(clientID, id)
   328  	if err != nil {
   329  		return err
   330  	}
   331  	c.removeFromInFlightPQ(msg)
   332  
   333  	newTimeout := time.Now().Add(clientMsgTimeout)
   334  	if newTimeout.Sub(msg.deliveryTS) >=
   335  		c.nsqd.getOpts().MaxMsgTimeout {
   336  		// we would have gone over, set to the max
   337  		newTimeout = msg.deliveryTS.Add(c.nsqd.getOpts().MaxMsgTimeout)
   338  	}
   339  
   340  	msg.pri = newTimeout.UnixNano()
   341  	err = c.pushInFlightMessage(msg)
   342  	if err != nil {
   343  		return err
   344  	}
   345  	c.addToInFlightPQ(msg)
   346  	return nil
   347  }
   348  
   349  // FinishMessage successfully discards an in-flight message
   350  func (c *Channel) FinishMessage(clientID int64, id MessageID) error {
   351  	msg, err := c.popInFlightMessage(clientID, id)
   352  	if err != nil {
   353  		return err
   354  	}
   355  	c.removeFromInFlightPQ(msg)
   356  	if c.e2eProcessingLatencyStream != nil {
   357  		c.e2eProcessingLatencyStream.Insert(msg.Timestamp)
   358  	}
   359  	return nil
   360  }
   361  
   362  // RequeueMessage requeues a message based on `time.Duration`, ie:
   363  //
   364  // `timeoutMs` == 0 - requeue a message immediately
   365  // `timeoutMs`  > 0 - asynchronously wait for the specified timeout
   366  //
   367  //	and requeue a message (aka "deferred requeue")
   368  func (c *Channel) RequeueMessage(clientID int64, id MessageID, timeout time.Duration) error {
   369  	// remove from inflight first
   370  	msg, err := c.popInFlightMessage(clientID, id)
   371  	if err != nil {
   372  		return err
   373  	}
   374  	c.removeFromInFlightPQ(msg)
   375  	atomic.AddUint64(&c.requeueCount, 1)
   376  
   377  	if timeout == 0 {
   378  		c.exitMutex.RLock()
   379  		if c.Exiting() {
   380  			c.exitMutex.RUnlock()
   381  			return errors.New("exiting")
   382  		}
   383  		err := c.put(msg)
   384  		c.exitMutex.RUnlock()
   385  		return err
   386  	}
   387  
   388  	// deferred requeue
   389  	return c.StartDeferredTimeout(msg, timeout)
   390  }
   391  
   392  // AddClient adds a client to the Channel's client list
   393  func (c *Channel) AddClient(clientID int64, client Consumer) error {
   394  	c.exitMutex.RLock()
   395  	defer c.exitMutex.RUnlock()
   396  
   397  	if c.Exiting() {
   398  		return errors.New("exiting")
   399  	}
   400  
   401  	c.RLock()
   402  	_, ok := c.clients[clientID]
   403  	numClients := len(c.clients)
   404  	c.RUnlock()
   405  	if ok {
   406  		return nil
   407  	}
   408  
   409  	maxChannelConsumers := c.nsqd.getOpts().MaxChannelConsumers
   410  	if maxChannelConsumers != 0 && numClients >= maxChannelConsumers {
   411  		return fmt.Errorf("consumers for %s:%s exceeds limit of %d",
   412  			c.topicName, c.name, maxChannelConsumers)
   413  	}
   414  
   415  	c.Lock()
   416  	c.clients[clientID] = client
   417  	c.Unlock()
   418  	return nil
   419  }
   420  
   421  // RemoveClient removes a client from the Channel's client list
   422  func (c *Channel) RemoveClient(clientID int64) {
   423  	c.exitMutex.RLock()
   424  	defer c.exitMutex.RUnlock()
   425  
   426  	if c.Exiting() {
   427  		return
   428  	}
   429  
   430  	c.RLock()
   431  	_, ok := c.clients[clientID]
   432  	c.RUnlock()
   433  	if !ok {
   434  		return
   435  	}
   436  
   437  	c.Lock()
   438  	delete(c.clients, clientID)
   439  	numClients := len(c.clients)
   440  	c.Unlock()
   441  
   442  	if numClients == 0 && c.ephemeral {
   443  		go c.deleter.Do(func() { c.deleteCallback(c) })
   444  	}
   445  }
   446  
   447  func (c *Channel) StartInFlightTimeout(msg *Message, clientID int64, timeout time.Duration) error {
   448  	now := time.Now()
   449  	msg.clientID = clientID
   450  	msg.deliveryTS = now
   451  	msg.pri = now.Add(timeout).UnixNano()
   452  	err := c.pushInFlightMessage(msg)
   453  	if err != nil {
   454  		return err
   455  	}
   456  	c.addToInFlightPQ(msg)
   457  	return nil
   458  }
   459  
   460  func (c *Channel) StartDeferredTimeout(msg *Message, timeout time.Duration) error {
   461  	absTs := time.Now().Add(timeout).UnixNano()
   462  	item := &pqueue.Item{Value: msg, Priority: absTs}
   463  	err := c.pushDeferredMessage(item)
   464  	if err != nil {
   465  		return err
   466  	}
   467  	c.addToDeferredPQ(item)
   468  	return nil
   469  }
   470  
   471  // pushInFlightMessage atomically adds a message to the in-flight dictionary
   472  func (c *Channel) pushInFlightMessage(msg *Message) error {
   473  	c.inFlightMutex.Lock()
   474  	_, ok := c.inFlightMessages[msg.ID]
   475  	if ok {
   476  		c.inFlightMutex.Unlock()
   477  		return errors.New("ID already in flight")
   478  	}
   479  	c.inFlightMessages[msg.ID] = msg
   480  	c.inFlightMutex.Unlock()
   481  	return nil
   482  }
   483  
   484  // popInFlightMessage atomically removes a message from the in-flight dictionary
   485  func (c *Channel) popInFlightMessage(clientID int64, id MessageID) (*Message, error) {
   486  	c.inFlightMutex.Lock()
   487  	msg, ok := c.inFlightMessages[id]
   488  	if !ok {
   489  		c.inFlightMutex.Unlock()
   490  		return nil, errors.New("ID not in flight")
   491  	}
   492  	if msg.clientID != clientID {
   493  		c.inFlightMutex.Unlock()
   494  		return nil, errors.New("client does not own message")
   495  	}
   496  	delete(c.inFlightMessages, id)
   497  	c.inFlightMutex.Unlock()
   498  	return msg, nil
   499  }
   500  
   501  func (c *Channel) addToInFlightPQ(msg *Message) {
   502  	c.inFlightMutex.Lock()
   503  	c.inFlightPQ.Push(msg)
   504  	c.inFlightMutex.Unlock()
   505  }
   506  
   507  func (c *Channel) removeFromInFlightPQ(msg *Message) {
   508  	c.inFlightMutex.Lock()
   509  	if msg.index == -1 {
   510  		// this item has already been popped off the pqueue
   511  		c.inFlightMutex.Unlock()
   512  		return
   513  	}
   514  	c.inFlightPQ.Remove(msg.index)
   515  	c.inFlightMutex.Unlock()
   516  }
   517  
   518  func (c *Channel) pushDeferredMessage(item *pqueue.Item) error {
   519  	c.deferredMutex.Lock()
   520  	// TODO: these map lookups are costly
   521  	id := item.Value.(*Message).ID
   522  	_, ok := c.deferredMessages[id]
   523  	if ok {
   524  		c.deferredMutex.Unlock()
   525  		return errors.New("ID already deferred")
   526  	}
   527  	c.deferredMessages[id] = item
   528  	c.deferredMutex.Unlock()
   529  	return nil
   530  }
   531  
   532  func (c *Channel) popDeferredMessage(id MessageID) (*pqueue.Item, error) {
   533  	c.deferredMutex.Lock()
   534  	// TODO: these map lookups are costly
   535  	item, ok := c.deferredMessages[id]
   536  	if !ok {
   537  		c.deferredMutex.Unlock()
   538  		return nil, errors.New("ID not deferred")
   539  	}
   540  	delete(c.deferredMessages, id)
   541  	c.deferredMutex.Unlock()
   542  	return item, nil
   543  }
   544  
   545  func (c *Channel) addToDeferredPQ(item *pqueue.Item) {
   546  	c.deferredMutex.Lock()
   547  	heap.Push(&c.deferredPQ, item)
   548  	c.deferredMutex.Unlock()
   549  }
   550  
   551  func (c *Channel) processDeferredQueue(t int64) bool {
   552  	c.exitMutex.RLock()
   553  	defer c.exitMutex.RUnlock()
   554  
   555  	if c.Exiting() {
   556  		return false
   557  	}
   558  
   559  	dirty := false
   560  	for {
   561  		c.deferredMutex.Lock()
   562  		item, _ := c.deferredPQ.PeekAndShift(t)
   563  		c.deferredMutex.Unlock()
   564  
   565  		if item == nil {
   566  			goto exit
   567  		}
   568  		dirty = true
   569  
   570  		msg := item.Value.(*Message)
   571  		_, err := c.popDeferredMessage(msg.ID)
   572  		if err != nil {
   573  			goto exit
   574  		}
   575  		c.put(msg)
   576  	}
   577  
   578  exit:
   579  	return dirty
   580  }
   581  
   582  func (c *Channel) processInFlightQueue(t int64) bool {
   583  	c.exitMutex.RLock()
   584  	defer c.exitMutex.RUnlock()
   585  
   586  	if c.Exiting() {
   587  		return false
   588  	}
   589  
   590  	dirty := false
   591  	for {
   592  		c.inFlightMutex.Lock()
   593  		msg, _ := c.inFlightPQ.PeekAndShift(t)
   594  		c.inFlightMutex.Unlock()
   595  
   596  		if msg == nil {
   597  			goto exit
   598  		}
   599  		dirty = true
   600  
   601  		_, err := c.popInFlightMessage(msg.clientID, msg.ID)
   602  		if err != nil {
   603  			goto exit
   604  		}
   605  		atomic.AddUint64(&c.timeoutCount, 1)
   606  		c.RLock()
   607  		client, ok := c.clients[msg.clientID]
   608  		c.RUnlock()
   609  		if ok {
   610  			client.TimedOutMessage()
   611  		}
   612  		c.put(msg)
   613  	}
   614  
   615  exit:
   616  	return dirty
   617  }