github.com/nsqio/nsq@v1.3.0/nsqd/topic.go (about)

     1  package nsqd
     2  
     3  import (
     4  	"errors"
     5  	"strings"
     6  	"sync"
     7  	"sync/atomic"
     8  	"time"
     9  
    10  	"github.com/nsqio/go-diskqueue"
    11  	"github.com/nsqio/nsq/internal/lg"
    12  	"github.com/nsqio/nsq/internal/quantile"
    13  	"github.com/nsqio/nsq/internal/util"
    14  )
    15  
    16  type Topic struct {
    17  	// 64bit atomic vars need to be first for proper alignment on 32bit platforms
    18  	messageCount uint64
    19  	messageBytes uint64
    20  
    21  	sync.RWMutex
    22  
    23  	name              string
    24  	channelMap        map[string]*Channel
    25  	backend           BackendQueue
    26  	memoryMsgChan     chan *Message
    27  	startChan         chan int
    28  	exitChan          chan int
    29  	channelUpdateChan chan int
    30  	waitGroup         util.WaitGroupWrapper
    31  	exitFlag          int32
    32  	idFactory         *guidFactory
    33  
    34  	ephemeral      bool
    35  	deleteCallback func(*Topic)
    36  	deleter        sync.Once
    37  
    38  	paused    int32
    39  	pauseChan chan int
    40  
    41  	nsqd *NSQD
    42  }
    43  
    44  // Topic constructor
    45  func NewTopic(topicName string, nsqd *NSQD, deleteCallback func(*Topic)) *Topic {
    46  	t := &Topic{
    47  		name:              topicName,
    48  		channelMap:        make(map[string]*Channel),
    49  		memoryMsgChan:     make(chan *Message, nsqd.getOpts().MemQueueSize),
    50  		startChan:         make(chan int, 1),
    51  		exitChan:          make(chan int),
    52  		channelUpdateChan: make(chan int),
    53  		nsqd:              nsqd,
    54  		paused:            0,
    55  		pauseChan:         make(chan int),
    56  		deleteCallback:    deleteCallback,
    57  		idFactory:         NewGUIDFactory(nsqd.getOpts().ID),
    58  	}
    59  	if strings.HasSuffix(topicName, "#ephemeral") {
    60  		t.ephemeral = true
    61  		t.backend = newDummyBackendQueue()
    62  	} else {
    63  		dqLogf := func(level diskqueue.LogLevel, f string, args ...interface{}) {
    64  			opts := nsqd.getOpts()
    65  			lg.Logf(opts.Logger, opts.LogLevel, lg.LogLevel(level), f, args...)
    66  		}
    67  		t.backend = diskqueue.New(
    68  			topicName,
    69  			nsqd.getOpts().DataPath,
    70  			nsqd.getOpts().MaxBytesPerFile,
    71  			int32(minValidMsgLength),
    72  			int32(nsqd.getOpts().MaxMsgSize)+minValidMsgLength,
    73  			nsqd.getOpts().SyncEvery,
    74  			nsqd.getOpts().SyncTimeout,
    75  			dqLogf,
    76  		)
    77  	}
    78  
    79  	t.waitGroup.Wrap(t.messagePump)
    80  
    81  	t.nsqd.Notify(t, !t.ephemeral)
    82  
    83  	return t
    84  }
    85  
    86  func (t *Topic) Start() {
    87  	select {
    88  	case t.startChan <- 1:
    89  	default:
    90  	}
    91  }
    92  
    93  // Exiting returns a boolean indicating if this topic is closed/exiting
    94  func (t *Topic) Exiting() bool {
    95  	return atomic.LoadInt32(&t.exitFlag) == 1
    96  }
    97  
    98  // GetChannel performs a thread safe operation
    99  // to return a pointer to a Channel object (potentially new)
   100  // for the given Topic
   101  func (t *Topic) GetChannel(channelName string) *Channel {
   102  	t.Lock()
   103  	channel, isNew := t.getOrCreateChannel(channelName)
   104  	t.Unlock()
   105  
   106  	if isNew {
   107  		// update messagePump state
   108  		select {
   109  		case t.channelUpdateChan <- 1:
   110  		case <-t.exitChan:
   111  		}
   112  	}
   113  
   114  	return channel
   115  }
   116  
   117  // this expects the caller to handle locking
   118  func (t *Topic) getOrCreateChannel(channelName string) (*Channel, bool) {
   119  	channel, ok := t.channelMap[channelName]
   120  	if !ok {
   121  		deleteCallback := func(c *Channel) {
   122  			t.DeleteExistingChannel(c.name)
   123  		}
   124  		channel = NewChannel(t.name, channelName, t.nsqd, deleteCallback)
   125  		t.channelMap[channelName] = channel
   126  		t.nsqd.logf(LOG_INFO, "TOPIC(%s): new channel(%s)", t.name, channel.name)
   127  		return channel, true
   128  	}
   129  	return channel, false
   130  }
   131  
   132  func (t *Topic) GetExistingChannel(channelName string) (*Channel, error) {
   133  	t.RLock()
   134  	defer t.RUnlock()
   135  	channel, ok := t.channelMap[channelName]
   136  	if !ok {
   137  		return nil, errors.New("channel does not exist")
   138  	}
   139  	return channel, nil
   140  }
   141  
   142  // DeleteExistingChannel removes a channel from the topic only if it exists
   143  func (t *Topic) DeleteExistingChannel(channelName string) error {
   144  	t.RLock()
   145  	channel, ok := t.channelMap[channelName]
   146  	t.RUnlock()
   147  	if !ok {
   148  		return errors.New("channel does not exist")
   149  	}
   150  
   151  	t.nsqd.logf(LOG_INFO, "TOPIC(%s): deleting channel %s", t.name, channel.name)
   152  
   153  	// delete empties the channel before closing
   154  	// (so that we dont leave any messages around)
   155  	//
   156  	// we do this before removing the channel from map below (with no lock)
   157  	// so that any incoming subs will error and not create a new channel
   158  	// to enforce ordering
   159  	channel.Delete()
   160  
   161  	t.Lock()
   162  	delete(t.channelMap, channelName)
   163  	numChannels := len(t.channelMap)
   164  	t.Unlock()
   165  
   166  	// update messagePump state
   167  	select {
   168  	case t.channelUpdateChan <- 1:
   169  	case <-t.exitChan:
   170  	}
   171  
   172  	if numChannels == 0 && t.ephemeral {
   173  		go t.deleter.Do(func() { t.deleteCallback(t) })
   174  	}
   175  
   176  	return nil
   177  }
   178  
   179  // PutMessage writes a Message to the queue
   180  func (t *Topic) PutMessage(m *Message) error {
   181  	t.RLock()
   182  	defer t.RUnlock()
   183  	if atomic.LoadInt32(&t.exitFlag) == 1 {
   184  		return errors.New("exiting")
   185  	}
   186  	err := t.put(m)
   187  	if err != nil {
   188  		return err
   189  	}
   190  	atomic.AddUint64(&t.messageCount, 1)
   191  	atomic.AddUint64(&t.messageBytes, uint64(len(m.Body)))
   192  	return nil
   193  }
   194  
   195  // PutMessages writes multiple Messages to the queue
   196  func (t *Topic) PutMessages(msgs []*Message) error {
   197  	t.RLock()
   198  	defer t.RUnlock()
   199  	if atomic.LoadInt32(&t.exitFlag) == 1 {
   200  		return errors.New("exiting")
   201  	}
   202  
   203  	messageTotalBytes := 0
   204  
   205  	for i, m := range msgs {
   206  		err := t.put(m)
   207  		if err != nil {
   208  			atomic.AddUint64(&t.messageCount, uint64(i))
   209  			atomic.AddUint64(&t.messageBytes, uint64(messageTotalBytes))
   210  			return err
   211  		}
   212  		messageTotalBytes += len(m.Body)
   213  	}
   214  
   215  	atomic.AddUint64(&t.messageBytes, uint64(messageTotalBytes))
   216  	atomic.AddUint64(&t.messageCount, uint64(len(msgs)))
   217  	return nil
   218  }
   219  
   220  func (t *Topic) put(m *Message) error {
   221  	// If mem-queue-size == 0, avoid memory chan, for more consistent ordering,
   222  	// but try to use memory chan for deferred messages (they lose deferred timer
   223  	// in backend queue) or if topic is ephemeral (there is no backend queue).
   224  	if cap(t.memoryMsgChan) > 0 || t.ephemeral || m.deferred != 0 {
   225  		select {
   226  		case t.memoryMsgChan <- m:
   227  			return nil
   228  		default:
   229  			break // write to backend
   230  		}
   231  	}
   232  	err := writeMessageToBackend(m, t.backend)
   233  	t.nsqd.SetHealth(err)
   234  	if err != nil {
   235  		t.nsqd.logf(LOG_ERROR,
   236  			"TOPIC(%s) ERROR: failed to write message to backend - %s",
   237  			t.name, err)
   238  		return err
   239  	}
   240  	return nil
   241  }
   242  
   243  func (t *Topic) Depth() int64 {
   244  	return int64(len(t.memoryMsgChan)) + t.backend.Depth()
   245  }
   246  
   247  // messagePump selects over the in-memory and backend queue and
   248  // writes messages to every channel for this topic
   249  func (t *Topic) messagePump() {
   250  	var msg *Message
   251  	var buf []byte
   252  	var err error
   253  	var chans []*Channel
   254  	var memoryMsgChan chan *Message
   255  	var backendChan <-chan []byte
   256  
   257  	// do not pass messages before Start(), but avoid blocking Pause() or GetChannel()
   258  	for {
   259  		select {
   260  		case <-t.channelUpdateChan:
   261  			continue
   262  		case <-t.pauseChan:
   263  			continue
   264  		case <-t.exitChan:
   265  			goto exit
   266  		case <-t.startChan:
   267  		}
   268  		break
   269  	}
   270  	t.RLock()
   271  	for _, c := range t.channelMap {
   272  		chans = append(chans, c)
   273  	}
   274  	t.RUnlock()
   275  	if len(chans) > 0 && !t.IsPaused() {
   276  		memoryMsgChan = t.memoryMsgChan
   277  		backendChan = t.backend.ReadChan()
   278  	}
   279  
   280  	// main message loop
   281  	for {
   282  		select {
   283  		case msg = <-memoryMsgChan:
   284  		case buf = <-backendChan:
   285  			msg, err = decodeMessage(buf)
   286  			if err != nil {
   287  				t.nsqd.logf(LOG_ERROR, "failed to decode message - %s", err)
   288  				continue
   289  			}
   290  		case <-t.channelUpdateChan:
   291  			chans = chans[:0]
   292  			t.RLock()
   293  			for _, c := range t.channelMap {
   294  				chans = append(chans, c)
   295  			}
   296  			t.RUnlock()
   297  			if len(chans) == 0 || t.IsPaused() {
   298  				memoryMsgChan = nil
   299  				backendChan = nil
   300  			} else {
   301  				memoryMsgChan = t.memoryMsgChan
   302  				backendChan = t.backend.ReadChan()
   303  			}
   304  			continue
   305  		case <-t.pauseChan:
   306  			if len(chans) == 0 || t.IsPaused() {
   307  				memoryMsgChan = nil
   308  				backendChan = nil
   309  			} else {
   310  				memoryMsgChan = t.memoryMsgChan
   311  				backendChan = t.backend.ReadChan()
   312  			}
   313  			continue
   314  		case <-t.exitChan:
   315  			goto exit
   316  		}
   317  
   318  		for i, channel := range chans {
   319  			chanMsg := msg
   320  			// copy the message because each channel
   321  			// needs a unique instance but...
   322  			// fastpath to avoid copy if its the first channel
   323  			// (the topic already created the first copy)
   324  			if i > 0 {
   325  				chanMsg = NewMessage(msg.ID, msg.Body)
   326  				chanMsg.Timestamp = msg.Timestamp
   327  				chanMsg.deferred = msg.deferred
   328  			}
   329  			if chanMsg.deferred != 0 {
   330  				channel.PutMessageDeferred(chanMsg, chanMsg.deferred)
   331  				continue
   332  			}
   333  			err := channel.PutMessage(chanMsg)
   334  			if err != nil {
   335  				t.nsqd.logf(LOG_ERROR,
   336  					"TOPIC(%s) ERROR: failed to put msg(%s) to channel(%s) - %s",
   337  					t.name, msg.ID, channel.name, err)
   338  			}
   339  		}
   340  	}
   341  
   342  exit:
   343  	t.nsqd.logf(LOG_INFO, "TOPIC(%s): closing ... messagePump", t.name)
   344  }
   345  
   346  // Delete empties the topic and all its channels and closes
   347  func (t *Topic) Delete() error {
   348  	return t.exit(true)
   349  }
   350  
   351  // Close persists all outstanding topic data and closes all its channels
   352  func (t *Topic) Close() error {
   353  	return t.exit(false)
   354  }
   355  
   356  func (t *Topic) exit(deleted bool) error {
   357  	if !atomic.CompareAndSwapInt32(&t.exitFlag, 0, 1) {
   358  		return errors.New("exiting")
   359  	}
   360  
   361  	if deleted {
   362  		t.nsqd.logf(LOG_INFO, "TOPIC(%s): deleting", t.name)
   363  
   364  		// since we are explicitly deleting a topic (not just at system exit time)
   365  		// de-register this from the lookupd
   366  		t.nsqd.Notify(t, !t.ephemeral)
   367  	} else {
   368  		t.nsqd.logf(LOG_INFO, "TOPIC(%s): closing", t.name)
   369  	}
   370  
   371  	close(t.exitChan)
   372  
   373  	// synchronize the close of messagePump()
   374  	t.waitGroup.Wait()
   375  
   376  	if deleted {
   377  		t.Lock()
   378  		for _, channel := range t.channelMap {
   379  			delete(t.channelMap, channel.name)
   380  			channel.Delete()
   381  		}
   382  		t.Unlock()
   383  
   384  		// empty the queue (deletes the backend files, too)
   385  		t.Empty()
   386  		return t.backend.Delete()
   387  	}
   388  
   389  	// close all the channels
   390  	t.RLock()
   391  	for _, channel := range t.channelMap {
   392  		err := channel.Close()
   393  		if err != nil {
   394  			// we need to continue regardless of error to close all the channels
   395  			t.nsqd.logf(LOG_ERROR, "channel(%s) close - %s", channel.name, err)
   396  		}
   397  	}
   398  	t.RUnlock()
   399  
   400  	// write anything leftover to disk
   401  	t.flush()
   402  	return t.backend.Close()
   403  }
   404  
   405  func (t *Topic) Empty() error {
   406  	for {
   407  		select {
   408  		case <-t.memoryMsgChan:
   409  		default:
   410  			goto finish
   411  		}
   412  	}
   413  
   414  finish:
   415  	return t.backend.Empty()
   416  }
   417  
   418  func (t *Topic) flush() error {
   419  	if len(t.memoryMsgChan) > 0 {
   420  		t.nsqd.logf(LOG_INFO,
   421  			"TOPIC(%s): flushing %d memory messages to backend",
   422  			t.name, len(t.memoryMsgChan))
   423  	}
   424  
   425  	for {
   426  		select {
   427  		case msg := <-t.memoryMsgChan:
   428  			err := writeMessageToBackend(msg, t.backend)
   429  			if err != nil {
   430  				t.nsqd.logf(LOG_ERROR,
   431  					"ERROR: failed to write message to backend - %s", err)
   432  			}
   433  		default:
   434  			goto finish
   435  		}
   436  	}
   437  
   438  finish:
   439  	return nil
   440  }
   441  
   442  func (t *Topic) AggregateChannelE2eProcessingLatency() *quantile.Quantile {
   443  	var latencyStream *quantile.Quantile
   444  	t.RLock()
   445  	realChannels := make([]*Channel, 0, len(t.channelMap))
   446  	for _, c := range t.channelMap {
   447  		realChannels = append(realChannels, c)
   448  	}
   449  	t.RUnlock()
   450  	for _, c := range realChannels {
   451  		if c.e2eProcessingLatencyStream == nil {
   452  			continue
   453  		}
   454  		if latencyStream == nil {
   455  			latencyStream = quantile.New(
   456  				t.nsqd.getOpts().E2EProcessingLatencyWindowTime,
   457  				t.nsqd.getOpts().E2EProcessingLatencyPercentiles)
   458  		}
   459  		latencyStream.Merge(c.e2eProcessingLatencyStream)
   460  	}
   461  	return latencyStream
   462  }
   463  
   464  func (t *Topic) Pause() error {
   465  	return t.doPause(true)
   466  }
   467  
   468  func (t *Topic) UnPause() error {
   469  	return t.doPause(false)
   470  }
   471  
   472  func (t *Topic) doPause(pause bool) error {
   473  	if pause {
   474  		atomic.StoreInt32(&t.paused, 1)
   475  	} else {
   476  		atomic.StoreInt32(&t.paused, 0)
   477  	}
   478  
   479  	select {
   480  	case t.pauseChan <- 1:
   481  	case <-t.exitChan:
   482  	}
   483  
   484  	return nil
   485  }
   486  
   487  func (t *Topic) IsPaused() bool {
   488  	return atomic.LoadInt32(&t.paused) == 1
   489  }
   490  
   491  func (t *Topic) GenerateID() MessageID {
   492  	var i int64 = 0
   493  	for {
   494  		id, err := t.idFactory.NewGUID()
   495  		if err == nil {
   496  			return id.Hex()
   497  		}
   498  		if i%10000 == 0 {
   499  			t.nsqd.logf(LOG_ERROR, "TOPIC(%s): failed to create guid - %s", t.name, err)
   500  		}
   501  		time.Sleep(time.Millisecond)
   502  		i++
   503  	}
   504  }