vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/messager/message_manager.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package messager
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"fmt"
    23  	"io"
    24  	"math/rand"
    25  	"sync"
    26  	"time"
    27  
    28  	"vitess.io/vitess/go/mysql"
    29  	"vitess.io/vitess/go/sqltypes"
    30  	"vitess.io/vitess/go/stats"
    31  	"vitess.io/vitess/go/sync2"
    32  	"vitess.io/vitess/go/timer"
    33  	"vitess.io/vitess/go/vt/log"
    34  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    35  	querypb "vitess.io/vitess/go/vt/proto/query"
    36  	"vitess.io/vitess/go/vt/sqlparser"
    37  	"vitess.io/vitess/go/vt/vtgate/evalengine"
    38  	"vitess.io/vitess/go/vt/vttablet/tabletserver/schema"
    39  	"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
    40  )
    41  
    42  var (
    43  	// MessageStats tracks stats for messages.
    44  	MessageStats = stats.NewGaugesWithMultiLabels(
    45  		"Messages",
    46  		"Stats for messages",
    47  		[]string{"TableName", "Metric"})
    48  )
    49  
    50  type QueryGenerator interface {
    51  	GenerateAckQuery(ids []string) (string, map[string]*querypb.BindVariable)
    52  	GeneratePostponeQuery(ids []string) (string, map[string]*querypb.BindVariable)
    53  	GeneratePurgeQuery(timeCutoff int64) (string, map[string]*querypb.BindVariable)
    54  }
    55  
    56  type messageReceiver struct {
    57  	ctx     context.Context
    58  	errChan chan error
    59  	send    func(*sqltypes.Result) error
    60  	cancel  context.CancelFunc
    61  }
    62  
    63  func newMessageReceiver(ctx context.Context, send func(*sqltypes.Result) error) (*messageReceiver, <-chan struct{}) {
    64  	ctx, cancel := context.WithCancel(ctx)
    65  	rcv := &messageReceiver{
    66  		ctx:     ctx,
    67  		errChan: make(chan error, 1),
    68  		send:    send,
    69  		cancel:  cancel,
    70  	}
    71  	return rcv, ctx.Done()
    72  }
    73  
    74  func (rcv *messageReceiver) Send(qr *sqltypes.Result) error {
    75  	// We have to use a channel so we can also
    76  	// monitor the context.
    77  	go func() {
    78  		rcv.errChan <- rcv.send(qr)
    79  	}()
    80  	select {
    81  	case <-rcv.ctx.Done():
    82  		return io.EOF
    83  	case err := <-rcv.errChan:
    84  		if err == io.EOF {
    85  			// This is only a failsafe. If we received an EOF,
    86  			// grpc would have already canceled the context.
    87  			rcv.cancel()
    88  		}
    89  		return err
    90  	}
    91  }
    92  
    93  // receiverWithStatus is a separate struct to signify
    94  // that the busy flag is controlled by the messageManager
    95  // mutex.
    96  type receiverWithStatus struct {
    97  	receiver *messageReceiver
    98  	busy     bool
    99  }
   100  
   101  // messageManager manages messages for a message table.
   102  //
   103  // messageManager has three core components that interact with each other.
   104  // 1. The cache: this is essentially the send queue. Its size is limited by the
   105  // number of rows.
   106  // 2. The send loop: this loop pulls items out of the cache and sends them to the
   107  // various clients.
   108  // 3. The poller: this wakes up periodically to fill the cache with values by
   109  // reading the message table from the database.
   110  // The message manager operates in three modes:
   111  //
   112  // Idle mode
   113  // This mode is entered if there is no client connected or if there are
   114  // no outstanding messages to be sent. In this mode:
   115  // The cache is empty.
   116  // The send loop is in a cond.Wait state doing nothing.
   117  // The poller wakes up periodically, but terminates immediately.
   118  // Idle mode is exited when there is at least one client connected
   119  // and there are pending messages to be sent.
   120  //
   121  // Steady state mode
   122  // In this mode, there are connected clients and there is a continuous
   123  // stream of messages being sent. The cache is not full, and there are
   124  // occasional resends.
   125  // Message creation inserts rows to the database and adds them to the cache.
   126  // Each addition makes sure the send loop is awakened if it's waiting.
   127  // The send loop continuously pulls items out of the cache and sends them.
   128  // After every successful send, the messages are postponed, and are
   129  // also removed from the cache.
   130  // The poller wakes up periodically, loads messages that are due and adds them
   131  // to the cache. Most of these items are likely to be those that did not
   132  // receive a timely ack.
   133  //
   134  // messagesPending mode
   135  // This mode is a variation of the steady state mode. This mode is
   136  // entered when there are outstanding items in the database that need to be sent
   137  // but are not present in the cache. This state can be entered in one
   138  // of two ways:
   139  // 1. The poller read returns as many rows as the cache size
   140  // 2. The Add of a message fails (cache full). This is invoked from the vstream.
   141  // In any of the above cases, the messagesPending flag gets turned on.
   142  // In this phase, the send loop proactively wakes up the poller every time
   143  // it clears the cache.
   144  // The system exits the messagesPending state if the number of items the poller
   145  // loads are less than the cache size and all cache adds are successful.
   146  // If so, the system reverts to the steady state mode.
   147  //
   148  // Rate limiting
   149  // There are two ways for the system to rate-limit:
   150  // 1. Client ingestion rate. If clients ingest messages slowly,
   151  // that makes the senders wait on them to send more messages.
   152  // 2. Postpone rate limiting: A client is considered to be non-busy only
   153  // after it has postponed the message it has sent. This way, if postpones
   154  // are too slow, the clients become less available and essentially
   155  // limit the send rate to how fast messages can be postponed.
   156  // The postpone functions also needs to obtain a semaphore that limits
   157  // the number of tx pool connections they can occupy.
   158  //
   159  // Client load balancing
   160  // The messages are sent to the clients in a round-robin fashion.
   161  // If, for some reason, a client is closed, the load balancer resets
   162  // by starting with the first non-busy client.
   163  //
   164  // The Purge thread
   165  // This thread is mostly independent. It wakes up periodically
   166  // to delete old rows that were successfully acked.
   167  type messageManager struct {
   168  	tsv TabletService
   169  	vs  VStreamer
   170  
   171  	name         sqlparser.IdentifierCS
   172  	fieldResult  *sqltypes.Result
   173  	ackWaitTime  time.Duration
   174  	purgeAfter   time.Duration
   175  	minBackoff   time.Duration
   176  	maxBackoff   time.Duration
   177  	batchSize    int
   178  	pollerTicks  *timer.Timer
   179  	purgeTicks   *timer.Timer
   180  	postponeSema *sync2.Semaphore
   181  
   182  	mu     sync.Mutex
   183  	isOpen bool
   184  	// cond waits on curReceiver == -1 || cache.IsEmpty():
   185  	// No current receivers available or cache is empty.
   186  	cond            sync.Cond
   187  	cache           *cache
   188  	receivers       []*receiverWithStatus
   189  	curReceiver     int
   190  	messagesPending bool
   191  	// streamCancel is set when a vstream is running, and is reset
   192  	// to nil after a cancel. This allows for startVStream and stopVStream
   193  	// to be idempotent.
   194  	// This is implicitly protected by the main mutex because startVStream
   195  	// and stopVStream are called while holding the main mutex.
   196  	streamCancel func()
   197  
   198  	// cacheManagementMu keeps the cache and database consistent with each
   199  	// other by ensuring that only one of the streams is processing messages
   200  	// and updating the cache at a time. The poller uses a results streamer to
   201  	// pull directly from the message table and the message manager uses a
   202  	// binlog streamer to process change events. This mutex ensures that only
   203  	// one of them are updating the cache at any one time.
   204  	// It prevents items from being removed from cache while the poller
   205  	// reads from the db and adds items to it. Otherwise, the poller
   206  	// might add an older snapshot of a row that was just postponed.
   207  	// It blocks the vstream (binlog streamer) from receiving messages while
   208  	// the poller reads a snapshot and updates lastPollPosition. Any events
   209  	// older than lastPollPosition must be ignored by the vstream. It
   210  	// consequently also blocks vstream from updating the cache while the
   211  	// poller is active.
   212  	// TODO(mattlord): since this is primarily a flow control mechanism, we
   213  	// should do it in a more idiomatic go way using channels or cond vars.
   214  	cacheManagementMu sync.Mutex
   215  	// The lastPollPosition variable is the main point of coordination
   216  	// between the poller and the binlog streamer to ensure that we are
   217  	// not re-processing older events and moving along linearly in the
   218  	// shared virtual GTID stream within the message manager.
   219  	// It is theoretically possible for the binlog streamer to be ahead of
   220  	// the lastPollPosition. This is because of how semi-sync works today
   221  	// where a replica could have received and processed a GTID that the primary
   222  	// may not have yet commited; but this is harmless because any events missed
   223  	// will be picked up during the next poller run.
   224  	lastPollPosition *mysql.Position
   225  
   226  	// wg is for ensuring all running goroutines have returned
   227  	// before we can close the manager. You need to Add before
   228  	// launching any gorooutine while holding a lock on mu.
   229  	// The goroutine must in turn defer on Done.
   230  	wg sync.WaitGroup
   231  
   232  	vsFilter                  *binlogdatapb.Filter
   233  	readByPriorityAndTimeNext *sqlparser.ParsedQuery
   234  	ackQuery                  *sqlparser.ParsedQuery
   235  	postponeQuery             *sqlparser.ParsedQuery
   236  	purgeQuery                *sqlparser.ParsedQuery
   237  }
   238  
   239  // newMessageManager creates a new message manager.
   240  // Calls into tsv have to be made asynchronously. Otherwise,
   241  // it can lead to deadlocks.
   242  func newMessageManager(tsv TabletService, vs VStreamer, table *schema.Table, postponeSema *sync2.Semaphore) *messageManager {
   243  	mm := &messageManager{
   244  		tsv:  tsv,
   245  		vs:   vs,
   246  		name: table.Name,
   247  		fieldResult: &sqltypes.Result{
   248  			Fields: table.MessageInfo.Fields,
   249  		},
   250  		ackWaitTime:     table.MessageInfo.AckWaitDuration,
   251  		purgeAfter:      table.MessageInfo.PurgeAfterDuration,
   252  		minBackoff:      table.MessageInfo.MinBackoff,
   253  		maxBackoff:      table.MessageInfo.MaxBackoff,
   254  		batchSize:       table.MessageInfo.BatchSize,
   255  		cache:           newCache(table.MessageInfo.CacheSize),
   256  		pollerTicks:     timer.NewTimer(table.MessageInfo.PollInterval),
   257  		purgeTicks:      timer.NewTimer(table.MessageInfo.PollInterval),
   258  		postponeSema:    postponeSema,
   259  		messagesPending: true,
   260  	}
   261  	mm.cond.L = &mm.mu
   262  
   263  	columnList := buildSelectColumnList(table)
   264  	vsQuery := fmt.Sprintf("select priority, time_next, epoch, time_acked, %s from %v", columnList, mm.name)
   265  	mm.vsFilter = &binlogdatapb.Filter{
   266  		Rules: []*binlogdatapb.Rule{{
   267  			Match:  table.Name.String(),
   268  			Filter: vsQuery,
   269  		}},
   270  	}
   271  	mm.readByPriorityAndTimeNext = sqlparser.BuildParsedQuery(
   272  		// There should be a poller_idx defined on (time_acked, priority, time_next desc)
   273  		// for this to be as effecient as possible
   274  		"select priority, time_next, epoch, time_acked, %s from %v where time_acked is null and time_next < %a order by priority, time_next desc limit %a",
   275  		columnList, mm.name, ":time_next", ":max")
   276  	mm.ackQuery = sqlparser.BuildParsedQuery(
   277  		"update %v set time_acked = %a, time_next = null where id in %a and time_acked is null",
   278  		mm.name, ":time_acked", "::ids")
   279  	mm.purgeQuery = sqlparser.BuildParsedQuery(
   280  		"delete from %v where time_acked < %a limit 500", mm.name, ":time_acked")
   281  
   282  	mm.postponeQuery = buildPostponeQuery(mm.name, mm.minBackoff, mm.maxBackoff)
   283  
   284  	return mm
   285  }
   286  
   287  func buildPostponeQuery(name sqlparser.IdentifierCS, minBackoff, maxBackoff time.Duration) *sqlparser.ParsedQuery {
   288  	var args []any
   289  
   290  	// since messages are immediately postponed upon sending, we need to add exponential backoff on top
   291  	// of the ackWaitTime, otherwise messages will be resent too quickly.
   292  	buf := bytes.NewBufferString("update %v set time_next = %a + %a + ")
   293  	args = append(args, name, ":time_now", ":wait_time")
   294  
   295  	// have backoff be +/- 33%, whenever this is injected, append (:min_backoff, :jitter)
   296  	jitteredBackoff := "FLOOR((%a<<ifnull(epoch, 0)) * %a)"
   297  
   298  	//
   299  	// if the jittered backoff is less than min_backoff, just set it to :min_backoff
   300  	//
   301  	buf.WriteString(fmt.Sprintf("IF(%s < %%a, %%a, ", jitteredBackoff))
   302  	// jitteredBackoff < :min_backoff
   303  	args = append(args, ":min_backoff", ":jitter", ":min_backoff")
   304  	// if it is less, then use :min_backoff
   305  	args = append(args, ":min_backoff")
   306  
   307  	// now we are setting the false case on the above IF statement
   308  	if maxBackoff == 0 {
   309  		// if there is no max_backoff, just use jitteredBackoff
   310  		buf.WriteString(jitteredBackoff)
   311  		args = append(args, ":min_backoff", ":jitter")
   312  	} else {
   313  		// make sure that it doesn't exceed max_backoff
   314  		buf.WriteString(fmt.Sprintf("IF(%s > %%a, %%a, %s)", jitteredBackoff, jitteredBackoff))
   315  		// jitteredBackoff > :max_backoff
   316  		args = append(args, ":min_backoff", ":jitter", ":max_backoff")
   317  		// if it is greater, then use :max_backoff
   318  		args = append(args, ":max_backoff")
   319  		// otherwise just use jitteredBackoff
   320  		args = append(args, ":min_backoff", ":jitter")
   321  	}
   322  
   323  	// close the if statement
   324  	buf.WriteString(")")
   325  
   326  	// now that we've identified time_next, finish the statement
   327  	buf.WriteString(", epoch = ifnull(epoch, 0)+1 where id in %a and time_acked is null")
   328  	args = append(args, "::ids")
   329  
   330  	return sqlparser.BuildParsedQuery(buf.String(), args...)
   331  }
   332  
   333  // buildSelectColumnList is a convenience function that
   334  // builds a 'select' list for the user-defined columns.
   335  func buildSelectColumnList(t *schema.Table) string {
   336  	buf := sqlparser.NewTrackedBuffer(nil)
   337  	for i, c := range t.MessageInfo.Fields {
   338  		// Column names may have to be escaped.
   339  		if i == 0 {
   340  			buf.Myprintf("%v", sqlparser.NewIdentifierCI(c.Name))
   341  		} else {
   342  			buf.Myprintf(", %v", sqlparser.NewIdentifierCI(c.Name))
   343  		}
   344  	}
   345  	return buf.String()
   346  }
   347  
   348  // Open starts the messageManager service.
   349  func (mm *messageManager) Open() {
   350  	mm.mu.Lock()
   351  	defer mm.mu.Unlock()
   352  	if mm.isOpen {
   353  		return
   354  	}
   355  	mm.isOpen = true
   356  	mm.curReceiver = -1
   357  
   358  	mm.wg.Add(1)
   359  	go mm.runSend() // calls the offsetting mm.wg.Done()
   360  	// TODO(sougou): improve ticks to add randomness.
   361  	mm.pollerTicks.Start(mm.runPoller)
   362  	mm.purgeTicks.Start(mm.runPurge)
   363  }
   364  
   365  // Close stops the messageManager service.
   366  func (mm *messageManager) Close() {
   367  	log.Infof("messageManager - started execution of Close")
   368  	mm.pollerTicks.Stop()
   369  	mm.purgeTicks.Stop()
   370  	log.Infof("messageManager - stopped the ticks. Acquiring mu Lock")
   371  
   372  	mm.mu.Lock()
   373  	log.Infof("messageManager - acquired mu Lock")
   374  	if !mm.isOpen {
   375  		log.Infof("messageManager - manager is not open")
   376  		mm.mu.Unlock()
   377  		return
   378  	}
   379  	mm.isOpen = false
   380  	log.Infof("messageManager - cancelling all receivers")
   381  	for _, rcvr := range mm.receivers {
   382  		rcvr.receiver.cancel()
   383  	}
   384  	mm.receivers = nil
   385  	MessageStats.Set([]string{mm.name.String(), "ClientCount"}, 0)
   386  	log.Infof("messageManager - clearing cache")
   387  	mm.cache.Clear()
   388  	log.Infof("messageManager - sending a broadcast")
   389  	// This broadcast will cause runSend to exit.
   390  	mm.cond.Broadcast()
   391  	log.Infof("messageManager - stopping VStream")
   392  	mm.stopVStream()
   393  	mm.mu.Unlock()
   394  
   395  	log.Infof("messageManager - Waiting for the wait group")
   396  	mm.wg.Wait()
   397  	log.Infof("messageManager - closed")
   398  }
   399  
   400  // Subscribe registers the send function as a receiver of messages
   401  // and returns a 'done' channel that will be closed when the subscription
   402  // ends. There are many reasons for a subscription to end: a grpc context
   403  // cancel or timeout, or tabletserver shutdown, etc.
   404  func (mm *messageManager) Subscribe(ctx context.Context, send func(*sqltypes.Result) error) <-chan struct{} {
   405  	receiver, done := newMessageReceiver(ctx, send)
   406  
   407  	mm.mu.Lock()
   408  	defer mm.mu.Unlock()
   409  	if !mm.isOpen {
   410  		receiver.cancel()
   411  		return done
   412  	}
   413  
   414  	if err := receiver.Send(mm.fieldResult); err != nil {
   415  		log.Errorf("Terminating connection due to error sending field info: %v", err)
   416  		receiver.cancel()
   417  		return done
   418  	}
   419  
   420  	withStatus := &receiverWithStatus{
   421  		receiver: receiver,
   422  	}
   423  	if len(mm.receivers) == 0 {
   424  		mm.startVStream()
   425  	}
   426  	mm.receivers = append(mm.receivers, withStatus)
   427  	MessageStats.Set([]string{mm.name.String(), "ClientCount"}, int64(len(mm.receivers)))
   428  	if mm.curReceiver == -1 {
   429  		mm.rescanReceivers(-1)
   430  	}
   431  
   432  	// Track the context and unsubscribe if it gets cancelled.
   433  	go func() {
   434  		<-done
   435  		mm.unsubscribe(receiver)
   436  	}()
   437  	return done
   438  }
   439  
   440  func (mm *messageManager) unsubscribe(receiver *messageReceiver) {
   441  	mm.mu.Lock()
   442  	defer mm.mu.Unlock()
   443  	for i, rcv := range mm.receivers {
   444  		if rcv.receiver != receiver {
   445  			continue
   446  		}
   447  		// Delete the item at current position.
   448  		n := len(mm.receivers)
   449  		copy(mm.receivers[i:n-1], mm.receivers[i+1:n])
   450  		mm.receivers = mm.receivers[0 : n-1]
   451  		MessageStats.Set([]string{mm.name.String(), "ClientCount"}, int64(len(mm.receivers)))
   452  		break
   453  	}
   454  	// curReceiver is obsolete. Recompute.
   455  	mm.rescanReceivers(-1)
   456  	// If there are no receivers. Shut down the cache.
   457  	if len(mm.receivers) == 0 {
   458  		mm.stopVStream()
   459  		mm.cache.Clear()
   460  	}
   461  }
   462  
   463  // rescanReceivers finds the next available receiver
   464  // using start as the starting point. If one was found,
   465  // it sets curReceiver to that index. If curReceiver
   466  // was previously -1, it broadcasts. If none was found,
   467  // curReceiver is set to -1. If there's no starting point,
   468  // it must be specified as -1.
   469  func (mm *messageManager) rescanReceivers(start int) {
   470  	cur := start
   471  	for range mm.receivers {
   472  		cur = (cur + 1) % len(mm.receivers)
   473  		if !mm.receivers[cur].busy {
   474  			if mm.curReceiver == -1 {
   475  				mm.cond.Broadcast()
   476  			}
   477  			mm.curReceiver = cur
   478  			return
   479  		}
   480  	}
   481  	// Nothing was found.
   482  	mm.curReceiver = -1
   483  }
   484  
   485  // Add adds the message to the cache. It returns true
   486  // if successful. If the message is already present,
   487  // it still returns true.
   488  func (mm *messageManager) Add(mr *MessageRow) bool {
   489  	mm.mu.Lock()
   490  	defer mm.mu.Unlock()
   491  	if len(mm.receivers) == 0 {
   492  		return false
   493  	}
   494  	// If cache is empty, we have to broadcast that we're not empty
   495  	// any more.
   496  	if mm.cache.IsEmpty() {
   497  		defer mm.cond.Broadcast()
   498  	}
   499  	if !mm.cache.Add(mr) {
   500  		// Cache is full. Enter "messagesPending" mode.
   501  		mm.messagesPending = true
   502  		return false
   503  	}
   504  	return true
   505  }
   506  
   507  func (mm *messageManager) runSend() {
   508  	defer func() {
   509  		mm.tsv.LogError()
   510  		mm.wg.Done()
   511  	}()
   512  
   513  	mm.mu.Lock()
   514  	defer mm.mu.Unlock()
   515  
   516  	for {
   517  		// It's theoretically possible that this loop can keep going without
   518  		// a wait. If so, the lock will never be released for other functions
   519  		// like Close to take action. So, let's release and acquire the lock
   520  		// to avoid starving other contenders.
   521  		mm.mu.Unlock()
   522  		mm.mu.Lock()
   523  
   524  		var rows [][]sqltypes.Value
   525  		for {
   526  			if !mm.isOpen {
   527  				return
   528  			}
   529  
   530  			// If cache became empty, there are messages pending, and there are subscribed
   531  			// receivers, we have to trigger the poller to fetch more.
   532  			if mm.cache.IsEmpty() && mm.messagesPending && len(mm.receivers) != 0 {
   533  				// Do this as a separate goroutine. Otherwise, this could cause
   534  				// the following deadlock:
   535  				// 1. runSend obtains a lock
   536  				// 2. Poller gets triggered, and waits for lock.
   537  				// 3. runSend calls this function, but the trigger will hang because
   538  				// this function cannot return until poller returns.
   539  				go mm.pollerTicks.Trigger()
   540  			}
   541  
   542  			// If there are no receivers or cache is empty, we wait.
   543  			if mm.curReceiver == -1 || mm.cache.IsEmpty() {
   544  				mm.cond.Wait()
   545  				continue
   546  			}
   547  
   548  			// Fetch rows from cache.
   549  			lateCount := int64(0)
   550  			for i := 0; i < mm.batchSize; i++ {
   551  				mr := mm.cache.Pop()
   552  				if mr == nil {
   553  					break
   554  				}
   555  				if mr.Epoch >= 1 {
   556  					lateCount++
   557  				}
   558  				rows = append(rows, mr.Row)
   559  			}
   560  			MessageStats.Add([]string{mm.name.String(), "Delayed"}, lateCount)
   561  
   562  			// If we have rows to send, break out of this loop.
   563  			if rows != nil {
   564  				break
   565  			}
   566  		}
   567  		MessageStats.Add([]string{mm.name.String(), "Sent"}, int64(len(rows)))
   568  		// If we're here, there is a current receiver, and messages
   569  		// to send. Reserve the receiver and find the next one.
   570  		receiver := mm.receivers[mm.curReceiver]
   571  		receiver.busy = true
   572  		mm.rescanReceivers(mm.curReceiver)
   573  
   574  		// Send the message asynchronously.
   575  		mm.wg.Add(1)
   576  		go mm.send(receiver, &sqltypes.Result{Rows: rows}) // calls the offsetting mm.wg.Done()
   577  	}
   578  }
   579  
   580  func (mm *messageManager) send(receiver *receiverWithStatus, qr *sqltypes.Result) {
   581  	defer func() {
   582  		mm.tsv.LogError()
   583  		mm.wg.Done()
   584  	}()
   585  
   586  	ids := make([]string, len(qr.Rows))
   587  	for i, row := range qr.Rows {
   588  		ids[i] = row[0].ToString()
   589  	}
   590  
   591  	defer func() {
   592  		// Hold cacheManagementMu to prevent the ids from being discarded
   593  		// if poller is active. Otherwise, it could have read a
   594  		// snapshot of a row before the postponement and requeue
   595  		// the message.
   596  		mm.cacheManagementMu.Lock()
   597  		defer mm.cacheManagementMu.Unlock()
   598  		mm.cache.Discard(ids)
   599  	}()
   600  
   601  	defer func() {
   602  		mm.mu.Lock()
   603  		defer mm.mu.Unlock()
   604  
   605  		receiver.busy = false
   606  		// Rescan if there were no previously available receivers
   607  		// because the current receiver became non-busy.
   608  		if mm.curReceiver == -1 {
   609  			mm.rescanReceivers(-1)
   610  		}
   611  	}()
   612  
   613  	if err := receiver.receiver.Send(qr); err != nil {
   614  		// Log the error, but we still want to postpone the message.
   615  		// Otherwise, if this is a chronic failure like "message too
   616  		// big", we'll end up spamming non-stop.
   617  		log.Errorf("Error sending messages: %v: %v", qr, err)
   618  	}
   619  	mm.postpone(mm.tsv, mm.ackWaitTime, ids)
   620  }
   621  
   622  func (mm *messageManager) postpone(tsv TabletService, ackWaitTime time.Duration, ids []string) {
   623  	// Use the semaphore to limit parallelism.
   624  	if !mm.postponeSema.Acquire() {
   625  		// Unreachable.
   626  		return
   627  	}
   628  	defer mm.postponeSema.Release()
   629  	ctx, cancel := context.WithTimeout(tabletenv.LocalContext(), ackWaitTime)
   630  	defer cancel()
   631  	if _, err := tsv.PostponeMessages(ctx, nil, mm, ids); err != nil {
   632  		// This can happen during spikes. Record the incident for monitoring.
   633  		MessageStats.Add([]string{mm.name.String(), "PostponeFailed"}, 1)
   634  	}
   635  }
   636  
   637  func (mm *messageManager) startVStream() {
   638  	if mm.streamCancel != nil {
   639  		return
   640  	}
   641  	var ctx context.Context
   642  	ctx, mm.streamCancel = context.WithCancel(tabletenv.LocalContext())
   643  	go mm.runVStream(ctx)
   644  }
   645  
   646  func (mm *messageManager) stopVStream() {
   647  	log.Infof("messageManager - calling stream cancel")
   648  	if mm.streamCancel != nil {
   649  		mm.streamCancel()
   650  		mm.streamCancel = nil
   651  	}
   652  }
   653  
   654  func (mm *messageManager) runVStream(ctx context.Context) {
   655  	for {
   656  		err := mm.runOneVStream(ctx)
   657  		select {
   658  		case <-ctx.Done():
   659  			log.Info("Context canceled, exiting vstream")
   660  			return
   661  		default:
   662  		}
   663  		MessageStats.Add([]string{mm.name.String(), "VStreamFailed"}, 1)
   664  		log.Infof("VStream ended: %v, retrying in 5 seconds", err)
   665  		time.Sleep(5 * time.Second)
   666  	}
   667  }
   668  
   669  // runOneVStream watches for any new rows or rows that have been modified.
   670  // Whether it's an insert or an update, if the new value of the
   671  // row indicates that the message is eligible to be sent, it's added to
   672  // the cache.
   673  // Deletes are ignored.
   674  // If the poller updates lastPollPosition, then all GTIDs up to that
   675  // point are deemed obsolete and are skipped.
   676  func (mm *messageManager) runOneVStream(ctx context.Context) error {
   677  	var curPos string
   678  	var fields []*querypb.Field
   679  
   680  	err := mm.vs.Stream(ctx, "current", nil, mm.vsFilter, func(events []*binlogdatapb.VEvent) error {
   681  		// We need to get the flow control lock
   682  		mm.cacheManagementMu.Lock()
   683  		defer mm.cacheManagementMu.Unlock()
   684  
   685  		select {
   686  		case <-ctx.Done():
   687  			return io.EOF
   688  		default:
   689  		}
   690  
   691  		mustSkip := func() (bool, error) {
   692  			if mm.lastPollPosition == nil {
   693  				return false, nil
   694  			}
   695  			if curPos == "" {
   696  				return true, nil
   697  			}
   698  			cur, err := mysql.DecodePosition(curPos)
   699  			if err != nil {
   700  				return false, err
   701  			}
   702  			if cur.AtLeast(*mm.lastPollPosition) {
   703  				mm.lastPollPosition = nil
   704  				return false, nil
   705  			}
   706  			return true, nil
   707  		}
   708  		skipEvents, err := mustSkip()
   709  		if err != nil {
   710  			return err
   711  		}
   712  		var newPos string
   713  		for _, ev := range events {
   714  			switch ev.Type {
   715  			case binlogdatapb.VEventType_FIELD:
   716  				fields = ev.FieldEvent.Fields
   717  			case binlogdatapb.VEventType_ROW:
   718  				if skipEvents {
   719  					continue
   720  				}
   721  				if err := mm.processRowEvent(fields, ev.RowEvent); err != nil {
   722  					return err
   723  				}
   724  			case binlogdatapb.VEventType_GTID:
   725  				newPos = ev.Gtid
   726  			case binlogdatapb.VEventType_COMMIT, binlogdatapb.VEventType_DDL, binlogdatapb.VEventType_OTHER:
   727  				// Update curPos only when the GTID concludes, which is through one
   728  				// of the above events.
   729  				curPos = newPos
   730  				skipEvents, err = mustSkip()
   731  				if err != nil {
   732  					return err
   733  				}
   734  			}
   735  		}
   736  		return nil
   737  	})
   738  	return err
   739  }
   740  
   741  func (mm *messageManager) processRowEvent(fields []*querypb.Field, rowEvent *binlogdatapb.RowEvent) error {
   742  	if fields == nil {
   743  		// Unreachable.
   744  		return fmt.Errorf("internal error: unexpected rows without fields")
   745  	}
   746  
   747  	now := time.Now().UnixNano()
   748  	for _, rc := range rowEvent.RowChanges {
   749  		if rc.After == nil {
   750  			continue
   751  		}
   752  		row := sqltypes.MakeRowTrusted(fields, rc.After)
   753  		mr, err := BuildMessageRow(row)
   754  		if err != nil {
   755  			return err
   756  		}
   757  		if mr.TimeAcked != 0 || mr.TimeNext > now {
   758  			continue
   759  		}
   760  		mm.Add(mr)
   761  	}
   762  	return nil
   763  }
   764  
   765  func (mm *messageManager) runPoller() {
   766  	// We need to get the flow control lock first
   767  	mm.cacheManagementMu.Lock()
   768  	defer mm.cacheManagementMu.Unlock()
   769  	// Now we can get the main/structure lock and ensure e.g. that the
   770  	// the receiver count does not change during the run
   771  	mm.mu.Lock()
   772  	defer mm.mu.Unlock()
   773  
   774  	// Fast-path. Skip all the work.
   775  	if len(mm.receivers) == 0 {
   776  		return
   777  	}
   778  
   779  	ctx, cancel := context.WithTimeout(tabletenv.LocalContext(), mm.pollerTicks.Interval())
   780  	defer func() {
   781  		mm.tsv.LogError()
   782  		cancel()
   783  	}()
   784  
   785  	size := mm.cache.Size()
   786  	bindVars := map[string]*querypb.BindVariable{
   787  		"time_next": sqltypes.Int64BindVariable(time.Now().UnixNano()),
   788  		"max":       sqltypes.Int64BindVariable(int64(size)),
   789  	}
   790  
   791  	qr, err := mm.readPending(ctx, bindVars)
   792  	if err != nil {
   793  		return
   794  	}
   795  
   796  	mm.messagesPending = false
   797  	if len(qr.Rows) >= size {
   798  		// There are probably more messages to be sent.
   799  		mm.messagesPending = true
   800  	}
   801  	if len(qr.Rows) != 0 {
   802  		// We've most likely added items.
   803  		// Wake up the sender.
   804  		defer mm.cond.Broadcast()
   805  	}
   806  	for _, row := range qr.Rows {
   807  		mr, err := BuildMessageRow(row)
   808  		if err != nil {
   809  			mm.tsv.Stats().InternalErrors.Add("Messages", 1)
   810  			log.Errorf("Error reading message row: %v", err)
   811  			continue
   812  		}
   813  		if !mm.cache.Add(mr) {
   814  			mm.messagesPending = true
   815  			return
   816  		}
   817  	}
   818  }
   819  
   820  func (mm *messageManager) runPurge() {
   821  	go func() {
   822  		ctx, cancel := context.WithTimeout(tabletenv.LocalContext(), mm.purgeTicks.Interval())
   823  		defer func() {
   824  			mm.tsv.LogError()
   825  			cancel()
   826  		}()
   827  		for {
   828  			count, err := mm.tsv.PurgeMessages(ctx, nil, mm, time.Now().Add(-mm.purgeAfter).UnixNano())
   829  			if err != nil {
   830  				MessageStats.Add([]string{mm.name.String(), "PurgeFailed"}, 1)
   831  				log.Errorf("Unable to delete messages: %v", err)
   832  			} else {
   833  				MessageStats.Add([]string{mm.name.String(), "Purged"}, count)
   834  			}
   835  			// If deleted 500 or more, we should continue.
   836  			if count < 500 {
   837  				return
   838  			}
   839  		}
   840  	}()
   841  }
   842  
   843  // GenerateAckQuery returns the query and bind vars for acking a message.
   844  func (mm *messageManager) GenerateAckQuery(ids []string) (string, map[string]*querypb.BindVariable) {
   845  	idbvs := &querypb.BindVariable{
   846  		Type:   querypb.Type_TUPLE,
   847  		Values: make([]*querypb.Value, 0, len(ids)),
   848  	}
   849  	for _, id := range ids {
   850  		idbvs.Values = append(idbvs.Values, &querypb.Value{
   851  			Type:  querypb.Type_VARBINARY,
   852  			Value: []byte(id),
   853  		})
   854  	}
   855  	return mm.ackQuery.Query, map[string]*querypb.BindVariable{
   856  		"time_acked": sqltypes.Int64BindVariable(time.Now().UnixNano()),
   857  		"ids":        idbvs,
   858  	}
   859  }
   860  
   861  // GeneratePostponeQuery returns the query and bind vars for postponing a message.
   862  func (mm *messageManager) GeneratePostponeQuery(ids []string) (string, map[string]*querypb.BindVariable) {
   863  	idbvs := &querypb.BindVariable{
   864  		Type:   querypb.Type_TUPLE,
   865  		Values: make([]*querypb.Value, 0, len(ids)),
   866  	}
   867  	for _, id := range ids {
   868  		idbvs.Values = append(idbvs.Values, &querypb.Value{
   869  			Type:  querypb.Type_VARBINARY,
   870  			Value: []byte(id),
   871  		})
   872  	}
   873  
   874  	bvs := map[string]*querypb.BindVariable{
   875  		"time_now":    sqltypes.Int64BindVariable(time.Now().UnixNano()),
   876  		"wait_time":   sqltypes.Int64BindVariable(int64(mm.ackWaitTime)),
   877  		"min_backoff": sqltypes.Int64BindVariable(int64(mm.minBackoff)),
   878  		"jitter":      sqltypes.Float64BindVariable(.666666 + rand.Float64()*.666666),
   879  		"ids":         idbvs,
   880  	}
   881  
   882  	if mm.maxBackoff > 0 {
   883  		bvs["max_backoff"] = sqltypes.Int64BindVariable(int64(mm.maxBackoff))
   884  	}
   885  
   886  	return mm.postponeQuery.Query, bvs
   887  }
   888  
   889  // GeneratePurgeQuery returns the query and bind vars for purging messages.
   890  func (mm *messageManager) GeneratePurgeQuery(timeCutoff int64) (string, map[string]*querypb.BindVariable) {
   891  	return mm.purgeQuery.Query, map[string]*querypb.BindVariable{
   892  		"time_acked": sqltypes.Int64BindVariable(timeCutoff),
   893  	}
   894  }
   895  
   896  // BuildMessageRow builds a MessageRow from a db row.
   897  func BuildMessageRow(row []sqltypes.Value) (*MessageRow, error) {
   898  	mr := &MessageRow{Row: row[4:]}
   899  	if !row[0].IsNull() {
   900  		v, err := evalengine.ToInt64(row[0])
   901  		if err != nil {
   902  			return nil, err
   903  		}
   904  		mr.Priority = v
   905  	}
   906  	if !row[1].IsNull() {
   907  		v, err := evalengine.ToInt64(row[1])
   908  		if err != nil {
   909  			return nil, err
   910  		}
   911  		mr.TimeNext = v
   912  	}
   913  	if !row[2].IsNull() {
   914  		v, err := evalengine.ToInt64(row[2])
   915  		if err != nil {
   916  			return nil, err
   917  		}
   918  		mr.Epoch = v
   919  	}
   920  	if !row[3].IsNull() {
   921  		v, err := evalengine.ToInt64(row[3])
   922  		if err != nil {
   923  			return nil, err
   924  		}
   925  		mr.TimeAcked = v
   926  	}
   927  	return mr, nil
   928  }
   929  
   930  func (mm *messageManager) readPending(ctx context.Context, bindVars map[string]*querypb.BindVariable) (*sqltypes.Result, error) {
   931  	query, err := mm.readByPriorityAndTimeNext.GenerateQuery(bindVars, nil)
   932  	if err != nil {
   933  		mm.tsv.Stats().InternalErrors.Add("Messages", 1)
   934  		log.Errorf("Error reading rows from message table: %v", err)
   935  		return nil, err
   936  	}
   937  	qr := &sqltypes.Result{}
   938  	err = mm.vs.StreamResults(ctx, query, func(response *binlogdatapb.VStreamResultsResponse) error {
   939  		if response.Fields != nil {
   940  			qr.Fields = response.Fields
   941  		}
   942  		if response.Gtid != "" {
   943  			pos, err := mysql.DecodePosition(response.Gtid)
   944  			if err != nil {
   945  				return err
   946  			}
   947  			mm.lastPollPosition = &pos
   948  		}
   949  		for _, row := range response.Rows {
   950  			qr.Rows = append(qr.Rows, sqltypes.MakeRowTrusted(qr.Fields, row))
   951  		}
   952  		return nil
   953  	})
   954  	if err != nil {
   955  		return nil, err
   956  	}
   957  	return qr, err
   958  }
   959  
   960  func (mm *messageManager) getReceiverCount() int {
   961  	mm.mu.Lock()
   962  	defer mm.mu.Unlock()
   963  	return len(mm.receivers)
   964  }
   965  
   966  func (mm *messageManager) getLastPollPosition() *mysql.Position {
   967  	mm.cacheManagementMu.Lock()
   968  	defer mm.cacheManagementMu.Unlock()
   969  	return mm.lastPollPosition
   970  }