vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/vreplication/engine.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vreplication
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"sort"
    24  	"strconv"
    25  	"sync"
    26  	"time"
    27  
    28  	"google.golang.org/protobuf/proto"
    29  
    30  	"vitess.io/vitess/go/mysql"
    31  	"vitess.io/vitess/go/sqltypes"
    32  	"vitess.io/vitess/go/sync2"
    33  	"vitess.io/vitess/go/vt/binlog/binlogplayer"
    34  	"vitess.io/vitess/go/vt/dbconfigs"
    35  	"vitess.io/vitess/go/vt/log"
    36  	"vitess.io/vitess/go/vt/mysqlctl"
    37  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    38  	querypb "vitess.io/vitess/go/vt/proto/query"
    39  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    40  	"vitess.io/vitess/go/vt/topo"
    41  	"vitess.io/vitess/go/vt/vterrors"
    42  	"vitess.io/vitess/go/vt/vtgate/evalengine"
    43  	"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
    44  	"vitess.io/vitess/go/vt/vttablet/tabletserver/throttle"
    45  )
    46  
    47  const (
    48  	reshardingJournalTableName = "_vt.resharding_journal"
    49  	vreplicationTableName      = "_vt.vreplication"
    50  	copyStateTableName         = "_vt.copy_state"
    51  	postCopyActionTableName    = "_vt.post_copy_action"
    52  
    53  	maxRows                      = 10000
    54  	throttlerVReplicationAppName = "vreplication"
    55  	throttlerOnlineDDLAppName    = "online-ddl"
    56  )
    57  
    58  const (
    59  	PostCopyActionNone PostCopyActionType = iota
    60  	PostCopyActionSQL
    61  )
    62  
    63  // waitRetryTime can be changed to a smaller value for tests.
    64  // A VReplication stream can be created by sending an insert statement
    65  // to the Engine. Such a stream can also be updated or deleted. The fields
    66  // of the table are described in binlogplayer.binlog_player.go. Changing
    67  // values in a vreplication row will cause the Engine to accordingly react.
    68  // For example, setting the state to 'Stopped' will cause that stream to
    69  // stop replicating.
    70  var waitRetryTime = 1 * time.Second
    71  
    72  // How frequently vcopier will update _vt.vreplication rows_copied
    73  var rowsCopiedUpdateInterval = 30 * time.Second
    74  
    75  // How frequntly vcopier will garbage collect old copy_state rows.
    76  // By default, do it in between every 2nd and 3rd rows copied update.
    77  var copyStateGCInterval = (rowsCopiedUpdateInterval * 3) - (rowsCopiedUpdateInterval / 2)
    78  
    79  // Engine is the engine for handling vreplication.
    80  type Engine struct {
    81  	// mu synchronizes isOpen, cancelRetry, controllers and wg.
    82  	mu     sync.Mutex
    83  	isOpen bool
    84  	// If cancelRetry is set, then a retry loop is running.
    85  	// Invoking the function guarantees that there will be
    86  	// no more retries.
    87  	cancelRetry context.CancelFunc
    88  	controllers map[int]*controller
    89  	// wg is used by in-flight functions that can run for long periods.
    90  	wg sync.WaitGroup
    91  
    92  	// ctx is the root context for all controllers.
    93  	ctx context.Context
    94  	// cancel will cancel the root context, thereby all controllers.
    95  	cancel context.CancelFunc
    96  
    97  	ts                      *topo.Server
    98  	cell                    string
    99  	mysqld                  mysqlctl.MysqlDaemon
   100  	dbClientFactoryFiltered func() binlogplayer.DBClient
   101  	dbClientFactoryDba      func() binlogplayer.DBClient
   102  	dbName                  string
   103  
   104  	journaler map[string]*journalEvent
   105  	ec        *externalConnector
   106  
   107  	throttlerClient *throttle.Client
   108  
   109  	// This should only be set in Test Engines in order to short
   110  	// curcuit functions as needed in unit tests. It's automatically
   111  	// enabled in NewSimpleTestEngine. This should NOT be used in
   112  	// production.
   113  	shortcircuit bool
   114  }
   115  
   116  type journalEvent struct {
   117  	journal      *binlogdatapb.Journal
   118  	participants map[string]int
   119  	shardGTIDs   map[string]*binlogdatapb.ShardGtid
   120  }
   121  
   122  type PostCopyActionType int
   123  type PostCopyAction struct {
   124  	Type PostCopyActionType `json:"type"`
   125  	Task string             `json:"task"`
   126  }
   127  
   128  // NewEngine creates a new Engine.
   129  // A nil ts means that the Engine is disabled.
   130  func NewEngine(config *tabletenv.TabletConfig, ts *topo.Server, cell string, mysqld mysqlctl.MysqlDaemon, lagThrottler *throttle.Throttler) *Engine {
   131  	vre := &Engine{
   132  		controllers:     make(map[int]*controller),
   133  		ts:              ts,
   134  		cell:            cell,
   135  		mysqld:          mysqld,
   136  		journaler:       make(map[string]*journalEvent),
   137  		ec:              newExternalConnector(config.ExternalConnections),
   138  		throttlerClient: throttle.NewBackgroundClient(lagThrottler, throttlerVReplicationAppName, throttle.ThrottleCheckPrimaryWrite),
   139  	}
   140  
   141  	return vre
   142  }
   143  
   144  // InitDBConfig should be invoked after the db name is computed.
   145  func (vre *Engine) InitDBConfig(dbcfgs *dbconfigs.DBConfigs) {
   146  	// If we're already initilized, it's a test engine. Ignore the call.
   147  	if vre.dbClientFactoryFiltered != nil && vre.dbClientFactoryDba != nil {
   148  		return
   149  	}
   150  	vre.dbClientFactoryFiltered = func() binlogplayer.DBClient {
   151  		return binlogplayer.NewDBClient(dbcfgs.FilteredWithDB())
   152  	}
   153  	vre.dbClientFactoryDba = func() binlogplayer.DBClient {
   154  		return binlogplayer.NewDBClient(dbcfgs.DbaWithDB())
   155  	}
   156  	vre.dbName = dbcfgs.DBName
   157  }
   158  
   159  // NewTestEngine creates a new Engine for testing.
   160  func NewTestEngine(ts *topo.Server, cell string, mysqld mysqlctl.MysqlDaemon, dbClientFactoryFiltered func() binlogplayer.DBClient, dbClientFactoryDba func() binlogplayer.DBClient, dbname string, externalConfig map[string]*dbconfigs.DBConfigs) *Engine {
   161  	vre := &Engine{
   162  		controllers:             make(map[int]*controller),
   163  		ts:                      ts,
   164  		cell:                    cell,
   165  		mysqld:                  mysqld,
   166  		dbClientFactoryFiltered: dbClientFactoryFiltered,
   167  		dbClientFactoryDba:      dbClientFactoryDba,
   168  		dbName:                  dbname,
   169  		journaler:               make(map[string]*journalEvent),
   170  		ec:                      newExternalConnector(externalConfig),
   171  	}
   172  	return vre
   173  }
   174  
   175  // NewSimpleTestEngine creates a new Engine for testing that can
   176  // also short curcuit functions as needed.
   177  func NewSimpleTestEngine(ts *topo.Server, cell string, mysqld mysqlctl.MysqlDaemon, dbClientFactoryFiltered func() binlogplayer.DBClient, dbClientFactoryDba func() binlogplayer.DBClient, dbname string, externalConfig map[string]*dbconfigs.DBConfigs) *Engine {
   178  	vre := &Engine{
   179  		controllers:             make(map[int]*controller),
   180  		ts:                      ts,
   181  		cell:                    cell,
   182  		mysqld:                  mysqld,
   183  		dbClientFactoryFiltered: dbClientFactoryFiltered,
   184  		dbClientFactoryDba:      dbClientFactoryDba,
   185  		dbName:                  dbname,
   186  		journaler:               make(map[string]*journalEvent),
   187  		ec:                      newExternalConnector(externalConfig),
   188  		shortcircuit:            true,
   189  	}
   190  	return vre
   191  }
   192  
   193  // Open starts the Engine service.
   194  func (vre *Engine) Open(ctx context.Context) {
   195  	vre.mu.Lock()
   196  	defer vre.mu.Unlock()
   197  
   198  	if vre.ts == nil {
   199  		return
   200  	}
   201  	if vre.isOpen {
   202  		return
   203  	}
   204  	log.Infof("VReplication Engine: opening")
   205  
   206  	// Cancel any existing retry loops.
   207  	// This guarantees that there will be no more
   208  	// retries unless we start a new loop.
   209  	if vre.cancelRetry != nil {
   210  		vre.cancelRetry()
   211  		vre.cancelRetry = nil
   212  	}
   213  
   214  	if err := vre.openLocked(ctx); err != nil {
   215  		log.Infof("openLocked error: %s", err)
   216  		ctx, cancel := context.WithCancel(ctx)
   217  		vre.cancelRetry = cancel
   218  		go vre.retry(ctx, err)
   219  	}
   220  	log.Infof("VReplication engine opened successfully")
   221  }
   222  
   223  func (vre *Engine) openLocked(ctx context.Context) error {
   224  
   225  	rows, err := vre.readAllRows(ctx)
   226  	if err != nil {
   227  		return err
   228  	}
   229  
   230  	vre.ctx, vre.cancel = context.WithCancel(ctx)
   231  	vre.isOpen = true
   232  	vre.initControllers(rows)
   233  	vre.updateStats()
   234  	return nil
   235  }
   236  
   237  var openRetryInterval = sync2.NewAtomicDuration(1 * time.Second)
   238  
   239  func (vre *Engine) retry(ctx context.Context, err error) {
   240  	log.Errorf("Error starting vreplication engine: %v, will keep retrying.", err)
   241  	for {
   242  		timer := time.NewTimer(openRetryInterval.Get())
   243  		select {
   244  		case <-ctx.Done():
   245  			timer.Stop()
   246  			return
   247  		case <-timer.C:
   248  		}
   249  		vre.mu.Lock()
   250  		// Recheck the context within the lock.
   251  		// This guarantees that we will not retry
   252  		// after the context was canceled. This
   253  		// can almost never happen.
   254  		select {
   255  		case <-ctx.Done():
   256  			vre.mu.Unlock()
   257  			return
   258  		default:
   259  		}
   260  		if err := vre.openLocked(ctx); err == nil {
   261  			// Don't invoke cancelRetry because openLocked
   262  			// will hold on to this context for later cancelation.
   263  			vre.cancelRetry = nil
   264  			vre.mu.Unlock()
   265  			return
   266  		}
   267  		vre.mu.Unlock()
   268  	}
   269  }
   270  
   271  func (vre *Engine) initControllers(rows []map[string]string) {
   272  	for _, row := range rows {
   273  		ct, err := newController(vre.ctx, row, vre.dbClientFactoryFiltered, vre.mysqld, vre.ts, vre.cell, tabletTypesStr, nil, vre)
   274  		if err != nil {
   275  			log.Errorf("Controller could not be initialized for stream: %v", row)
   276  			continue
   277  		}
   278  		vre.controllers[int(ct.id)] = ct
   279  	}
   280  }
   281  
   282  // IsOpen returns true if Engine is open.
   283  func (vre *Engine) IsOpen() bool {
   284  	vre.mu.Lock()
   285  	defer vre.mu.Unlock()
   286  	return vre.isOpen
   287  }
   288  
   289  // Close closes the Engine service.
   290  func (vre *Engine) Close() {
   291  	vre.mu.Lock()
   292  	defer vre.mu.Unlock()
   293  
   294  	// If we're retrying, we're not open.
   295  	// Just cancel the retry loop.
   296  	if vre.cancelRetry != nil {
   297  		vre.cancelRetry()
   298  		vre.cancelRetry = nil
   299  		return
   300  	}
   301  
   302  	if !vre.isOpen {
   303  		return
   304  	}
   305  
   306  	vre.ec.Close()
   307  	vre.cancel()
   308  	// We still have to wait for all controllers to stop.
   309  	for _, ct := range vre.controllers {
   310  		ct.Stop()
   311  	}
   312  	vre.controllers = make(map[int]*controller)
   313  
   314  	// Wait for long-running functions to exit.
   315  	vre.wg.Wait()
   316  
   317  	vre.mysqld.DisableBinlogPlayback()
   318  	vre.isOpen = false
   319  
   320  	vre.updateStats()
   321  	log.Infof("VReplication Engine: closed")
   322  }
   323  
   324  func (vre *Engine) getDBClient(isAdmin bool) binlogplayer.DBClient {
   325  	if isAdmin {
   326  		return vre.dbClientFactoryDba()
   327  	}
   328  	return vre.dbClientFactoryFiltered()
   329  }
   330  
   331  // ExecWithDBA runs the specified query as the DBA user
   332  func (vre *Engine) ExecWithDBA(query string) (*sqltypes.Result, error) {
   333  	return vre.exec(query, true /*runAsAdmin*/)
   334  }
   335  
   336  // Exec runs the specified query as the Filtered user
   337  func (vre *Engine) Exec(query string) (*sqltypes.Result, error) {
   338  	return vre.exec(query, false /*runAsAdmin*/)
   339  }
   340  
   341  // Exec executes the query and the related actions.
   342  // Example insert statement:
   343  // insert into _vt.vreplication
   344  //
   345  //	(workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state)
   346  //	values ('Resharding', 'keyspace:"ks" shard:"0" tables:"a" tables:"b" ', 'MariaDB/0-1-1083', 9223372036854775807, 9223372036854775807, 481823, 0, 'Running')`
   347  //
   348  // Example update statement:
   349  // update _vt.vreplication set state='Stopped', message='testing stop' where id=1
   350  // Example delete: delete from _vt.vreplication where id=1
   351  // Example select: select * from _vt.vreplication
   352  func (vre *Engine) exec(query string, runAsAdmin bool) (*sqltypes.Result, error) {
   353  	vre.mu.Lock()
   354  	defer vre.mu.Unlock()
   355  	if !vre.isOpen {
   356  		return nil, vterrors.New(vtrpcpb.Code_UNAVAILABLE, "vreplication engine is closed")
   357  	}
   358  	if vre.cancelRetry != nil {
   359  		return nil, vterrors.New(vtrpcpb.Code_UNAVAILABLE, "engine is still trying to open")
   360  	}
   361  	defer vre.updateStats()
   362  
   363  	plan, err := buildControllerPlan(query)
   364  	if err != nil {
   365  		return nil, err
   366  	}
   367  
   368  	dbClient := vre.getDBClient(runAsAdmin)
   369  	if err := dbClient.Connect(); err != nil {
   370  		return nil, err
   371  	}
   372  	defer dbClient.Close()
   373  
   374  	// Change the database to ensure that these events don't get
   375  	// replicated by another vreplication. This can happen when
   376  	// we reverse replication.
   377  	if _, err := dbClient.ExecuteFetch("use _vt", 1); err != nil {
   378  		return nil, err
   379  	}
   380  
   381  	switch plan.opcode {
   382  	case insertQuery:
   383  		qr, err := dbClient.ExecuteFetch(plan.query, 1)
   384  		if err != nil {
   385  			return nil, err
   386  		}
   387  		if qr.InsertID == 0 {
   388  			return nil, fmt.Errorf("insert failed to generate an id")
   389  		}
   390  		vdbc := newVDBClient(dbClient, binlogplayer.NewStats())
   391  		for id := int(qr.InsertID); id < int(qr.InsertID)+plan.numInserts; id++ {
   392  			if ct := vre.controllers[id]; ct != nil {
   393  				// Unreachable. Just a failsafe.
   394  				ct.Stop()
   395  				delete(vre.controllers, id)
   396  			}
   397  			params, err := readRow(dbClient, id)
   398  			if err != nil {
   399  				return nil, err
   400  			}
   401  			ct, err := newController(vre.ctx, params, vre.dbClientFactoryFiltered, vre.mysqld, vre.ts, vre.cell, tabletTypesStr, nil, vre)
   402  			if err != nil {
   403  				return nil, err
   404  			}
   405  			vre.controllers[id] = ct
   406  			if err := insertLogWithParams(vdbc, LogStreamCreate, uint32(id), params); err != nil {
   407  				return nil, err
   408  			}
   409  		}
   410  		return qr, nil
   411  	case updateQuery:
   412  		ids, bv, err := vre.fetchIDs(dbClient, plan.selector)
   413  		if err != nil {
   414  			return nil, err
   415  		}
   416  		if len(ids) == 0 {
   417  			return &sqltypes.Result{}, nil
   418  		}
   419  		blpStats := make(map[int]*binlogplayer.Stats)
   420  		for _, id := range ids {
   421  			if ct := vre.controllers[id]; ct != nil {
   422  				// Stop the current controller.
   423  				ct.Stop()
   424  				blpStats[id] = ct.blpStats
   425  			}
   426  		}
   427  		query, err = plan.applier.GenerateQuery(bv, nil)
   428  		if err != nil {
   429  			return nil, err
   430  		}
   431  		qr, err := dbClient.ExecuteFetch(query, maxRows)
   432  		if err != nil {
   433  			return nil, err
   434  		}
   435  		vdbc := newVDBClient(dbClient, binlogplayer.NewStats())
   436  		for _, id := range ids {
   437  			params, err := readRow(dbClient, id)
   438  			if err != nil {
   439  				return nil, err
   440  			}
   441  			// Create a new controller in place of the old one.
   442  			// For continuity, the new controller inherits the previous stats.
   443  			ct, err := newController(vre.ctx, params, vre.dbClientFactoryFiltered, vre.mysqld, vre.ts, vre.cell, tabletTypesStr, blpStats[id], vre)
   444  			if err != nil {
   445  				return nil, err
   446  			}
   447  			vre.controllers[id] = ct
   448  			if err := insertLog(vdbc, LogStateChange, uint32(id), params["state"], ""); err != nil {
   449  				return nil, err
   450  			}
   451  		}
   452  		return qr, nil
   453  	case deleteQuery:
   454  		ids, bv, err := vre.fetchIDs(dbClient, plan.selector)
   455  		if err != nil {
   456  			return nil, err
   457  		}
   458  		if len(ids) == 0 {
   459  			return &sqltypes.Result{}, nil
   460  		}
   461  		// Stop and delete the current controllers.
   462  		vdbc := newVDBClient(dbClient, binlogplayer.NewStats())
   463  		for _, id := range ids {
   464  			if ct := vre.controllers[id]; ct != nil {
   465  				ct.Stop()
   466  				delete(vre.controllers, id)
   467  			}
   468  			if err := insertLogWithParams(vdbc, LogStreamDelete, uint32(id), nil); err != nil {
   469  				return nil, err
   470  			}
   471  		}
   472  		if err := dbClient.Begin(); err != nil {
   473  			return nil, err
   474  		}
   475  		query, err := plan.applier.GenerateQuery(bv, nil)
   476  		if err != nil {
   477  			return nil, err
   478  		}
   479  		qr, err := dbClient.ExecuteFetch(query, maxRows)
   480  		if err != nil {
   481  			return nil, err
   482  		}
   483  		delQuery, err := plan.delCopyState.GenerateQuery(bv, nil)
   484  		if err != nil {
   485  			return nil, err
   486  		}
   487  		_, err = dbClient.ExecuteFetch(delQuery, maxRows)
   488  		if err != nil {
   489  			return nil, err
   490  		}
   491  		delQuery, err = plan.delPostCopyAction.GenerateQuery(bv, nil)
   492  		if err != nil {
   493  			return nil, err
   494  		}
   495  		_, err = dbClient.ExecuteFetch(delQuery, maxRows)
   496  		if err != nil {
   497  			return nil, err
   498  		}
   499  		if err := dbClient.Commit(); err != nil {
   500  			return nil, err
   501  		}
   502  		return qr, nil
   503  	case selectQuery, reshardingJournalQuery:
   504  		// select and resharding journal queries are passed through.
   505  		return dbClient.ExecuteFetch(plan.query, maxRows)
   506  	}
   507  	panic("unreachable")
   508  }
   509  
   510  func (vre *Engine) fetchIDs(dbClient binlogplayer.DBClient, selector string) (ids []int, bv map[string]*querypb.BindVariable, err error) {
   511  	qr, err := dbClient.ExecuteFetch(selector, 10000)
   512  	if err != nil {
   513  		return nil, nil, err
   514  	}
   515  	for _, row := range qr.Rows {
   516  		id, err := evalengine.ToInt64(row[0])
   517  		if err != nil {
   518  			return nil, nil, err
   519  		}
   520  		ids = append(ids, int(id))
   521  	}
   522  	bvval, err := sqltypes.BuildBindVariable(ids)
   523  	if err != nil {
   524  		// Unreachable.
   525  		return nil, nil, err
   526  	}
   527  	bv = map[string]*querypb.BindVariable{"ids": bvval}
   528  	return ids, bv, nil
   529  }
   530  
   531  // registerJournal is invoked if any of the vreplication streams encounters a journal event.
   532  // Multiple registerJournal functions collaborate to converge on the final action.
   533  // The first invocation creates an entry in vre.journaler. The entry is initialized
   534  // with the list of participants that also need to converge.
   535  // The middle invocation happens on the first and subsequent calls: the current participant
   536  // marks itself as having joined the wait.
   537  // The final invocation happens for the last participant that joins. Having confirmed
   538  // that all the participants have joined, transitionJournal is invoked, which deletes
   539  // all current participant streams and creates new ones to replace them.
   540  // A unified journal event is identified by the workflow name and journal id.
   541  // Multiple independent journal events can go through this cycle concurrently.
   542  func (vre *Engine) registerJournal(journal *binlogdatapb.Journal, id int) error {
   543  	vre.mu.Lock()
   544  	defer vre.mu.Unlock()
   545  	if !vre.isOpen {
   546  		// Unreachable.
   547  		return nil
   548  	}
   549  
   550  	workflow := vre.controllers[id].workflow
   551  	key := fmt.Sprintf("%s:%d", workflow, journal.Id)
   552  	ks := fmt.Sprintf("%s:%s", vre.controllers[id].source.Keyspace, vre.controllers[id].source.Shard)
   553  	log.Infof("Journal encountered for (%s %s): %v", key, ks, journal)
   554  	je, ok := vre.journaler[key]
   555  	if !ok {
   556  		log.Infof("First stream for workflow %s has joined, creating journaler entry", workflow)
   557  		je = &journalEvent{
   558  			journal:      journal,
   559  			participants: make(map[string]int),
   560  			shardGTIDs:   make(map[string]*binlogdatapb.ShardGtid),
   561  		}
   562  		vre.journaler[key] = je
   563  	}
   564  	// Middle invocation. Register yourself
   565  	controllerSources := make(map[string]bool)
   566  	for _, ct := range vre.controllers {
   567  		if ct.workflow != workflow {
   568  			// Only compare with streams that belong to the current workflow.
   569  			continue
   570  		}
   571  		ks := fmt.Sprintf("%s:%s", ct.source.Keyspace, ct.source.Shard)
   572  		controllerSources[ks] = true
   573  	}
   574  	for _, jks := range journal.Participants {
   575  		ks := fmt.Sprintf("%s:%s", jks.Keyspace, jks.Shard)
   576  		if _, ok := controllerSources[ks]; !ok {
   577  			log.Errorf("cannot redirect on journal: not all sources are present in this workflow: missing %v", ks)
   578  			return fmt.Errorf("cannot redirect on journal: not all sources are present in this workflow: missing %v", ks)
   579  		}
   580  		if _, ok := je.participants[ks]; !ok {
   581  			log.Infof("New participant %s found for workflow %s", ks, workflow)
   582  			je.participants[ks] = 0
   583  		} else {
   584  			log.Infof("Participant %s:%d already exists for workflow %s", ks, je.participants[ks], workflow)
   585  		}
   586  	}
   587  	for _, gtid := range journal.ShardGtids {
   588  		je.shardGTIDs[gtid.Shard] = gtid
   589  	}
   590  
   591  	je.participants[ks] = id
   592  	// Check if all participants have joined.
   593  	for ks, pid := range je.participants {
   594  		if pid == 0 {
   595  			// Still need to wait.
   596  			log.Infof("Not all participants have joined, including %s", ks)
   597  			return nil
   598  		}
   599  	}
   600  	// Final invocation. Perform the transition.
   601  	delete(vre.journaler, key)
   602  	go vre.transitionJournal(je)
   603  	return nil
   604  }
   605  
   606  // transitionJournal stops all existing participants, deletes their vreplication
   607  // entries, and creates new ones as instructed by the journal metadata.
   608  func (vre *Engine) transitionJournal(je *journalEvent) {
   609  	vre.mu.Lock()
   610  	defer vre.mu.Unlock()
   611  	if !vre.isOpen {
   612  		return
   613  	}
   614  
   615  	log.Infof("Transitioning for journal:workload %v", je)
   616  
   617  	//sort both participants and shardgtids
   618  	participants := make([]string, 0)
   619  	for ks := range je.participants {
   620  		participants = append(participants, ks)
   621  	}
   622  	sort.Sort(ShardSorter(participants))
   623  	log.Infof("Participants %+v, oldParticipants %+v", participants, je.participants)
   624  	shardGTIDs := make([]string, 0)
   625  	for shard := range je.shardGTIDs {
   626  		shardGTIDs = append(shardGTIDs, shard)
   627  	}
   628  	sort.Strings(shardGTIDs)
   629  
   630  	// Wait for participating controllers to stop.
   631  	// Also collect one id reference.
   632  	refid := 0
   633  	for id := range participants {
   634  		ks := participants[id]
   635  		refid = je.participants[ks]
   636  		vre.controllers[refid].Stop()
   637  	}
   638  
   639  	dbClient := vre.dbClientFactoryFiltered()
   640  	if err := dbClient.Connect(); err != nil {
   641  		log.Errorf("transitionJournal: unable to connect to the database: %v", err)
   642  		return
   643  	}
   644  	defer dbClient.Close()
   645  
   646  	if err := dbClient.Begin(); err != nil {
   647  		log.Errorf("transitionJournal: %v", err)
   648  		return
   649  	}
   650  
   651  	// Use the reference row to copy other fields like cell, tablet_types, etc.
   652  	params, err := readRow(dbClient, refid)
   653  	if err != nil {
   654  		log.Errorf("transitionJournal: %v", err)
   655  		return
   656  	}
   657  	var newids []int
   658  	for _, shard := range shardGTIDs {
   659  		sgtid := je.shardGTIDs[shard]
   660  		bls := proto.Clone(vre.controllers[refid].source).(*binlogdatapb.BinlogSource)
   661  		bls.Keyspace, bls.Shard = sgtid.Keyspace, sgtid.Shard
   662  
   663  		workflowType, _ := strconv.ParseInt(params["workflow_type"], 10, 64)
   664  		workflowSubType, _ := strconv.ParseInt(params["workflow_sub_type"], 10, 64)
   665  		deferSecondaryKeys, _ := strconv.ParseBool(params["defer_secondary_keys"])
   666  		ig := NewInsertGenerator(binlogplayer.BlpRunning, vre.dbName)
   667  		ig.AddRow(params["workflow"], bls, sgtid.Gtid, params["cell"], params["tablet_types"], workflowType, workflowSubType, deferSecondaryKeys)
   668  		qr, err := dbClient.ExecuteFetch(ig.String(), maxRows)
   669  		if err != nil {
   670  			log.Errorf("transitionJournal: %v", err)
   671  			return
   672  		}
   673  		log.Infof("Created stream: %v for %v", qr.InsertID, sgtid)
   674  		newids = append(newids, int(qr.InsertID))
   675  	}
   676  	for _, ks := range participants {
   677  		id := je.participants[ks]
   678  		_, err := dbClient.ExecuteFetch(binlogplayer.DeleteVReplication(uint32(id)), maxRows)
   679  		if err != nil {
   680  			log.Errorf("transitionJournal: %v", err)
   681  			return
   682  		}
   683  		log.Infof("Deleted stream: %v", id)
   684  	}
   685  	if err := dbClient.Commit(); err != nil {
   686  		log.Errorf("transitionJournal: %v", err)
   687  		return
   688  	}
   689  
   690  	for id := range participants {
   691  		ks := participants[id]
   692  		id := je.participants[ks]
   693  		delete(vre.controllers, id)
   694  	}
   695  
   696  	for _, id := range newids {
   697  		params, err := readRow(dbClient, id)
   698  		if err != nil {
   699  			log.Errorf("transitionJournal: %v", err)
   700  			return
   701  		}
   702  		ct, err := newController(vre.ctx, params, vre.dbClientFactoryFiltered, vre.mysqld, vre.ts, vre.cell, tabletTypesStr, nil, vre)
   703  		if err != nil {
   704  			log.Errorf("transitionJournal: %v", err)
   705  			return
   706  		}
   707  		vre.controllers[id] = ct
   708  	}
   709  	log.Infof("Completed transition for journal:workload %v", je)
   710  }
   711  
   712  // WaitForPos waits for the replication to reach the specified position.
   713  func (vre *Engine) WaitForPos(ctx context.Context, id int, pos string) error {
   714  	start := time.Now()
   715  	mPos, err := binlogplayer.DecodePosition(pos)
   716  	if err != nil {
   717  		return err
   718  	}
   719  
   720  	if err := func() error {
   721  		vre.mu.Lock()
   722  		defer vre.mu.Unlock()
   723  		if !vre.isOpen {
   724  			return errors.New("vreplication engine is closed")
   725  		}
   726  
   727  		// Ensure that the engine won't be closed while this is running.
   728  		vre.wg.Add(1)
   729  		return nil
   730  	}(); err != nil {
   731  		return err
   732  	}
   733  	defer vre.wg.Done()
   734  
   735  	if vre.shortcircuit {
   736  		return nil
   737  	}
   738  
   739  	dbClient := vre.dbClientFactoryFiltered()
   740  	if err := dbClient.Connect(); err != nil {
   741  		return err
   742  	}
   743  	defer dbClient.Close()
   744  
   745  	tkr := time.NewTicker(waitRetryTime)
   746  	defer tkr.Stop()
   747  	for {
   748  		qr, err := dbClient.ExecuteFetch(binlogplayer.ReadVReplicationStatus(uint32(id)), 10)
   749  		switch {
   750  		case err != nil:
   751  			// We have high contention on the _vt.vreplication row, so retry if our read gets
   752  			// killed off by the deadlock detector and should be re-tried.
   753  			// The full error we get back from MySQL in that case is:
   754  			// Deadlock found when trying to get lock; try restarting transaction (errno 1213) (sqlstate 40001)
   755  			// Docs: https://dev.mysql.com/doc/mysql-errors/en/server-error-reference.html#error_er_lock_deadlock
   756  			if sqlErr, ok := err.(*mysql.SQLError); ok && sqlErr.Number() == mysql.ERLockDeadlock {
   757  				log.Infof("Deadlock detected waiting for pos %s: %v; will retry", pos, err)
   758  			} else {
   759  				return err
   760  			}
   761  		case len(qr.Rows) == 0:
   762  			return fmt.Errorf("vreplication stream %d not found", id)
   763  		case len(qr.Rows) > 1 || len(qr.Rows[0]) != 3:
   764  			return fmt.Errorf("unexpected result: %v", qr)
   765  		}
   766  
   767  		// When err is not nil then we got a retryable error and will loop again
   768  		if err == nil {
   769  			current, dcerr := binlogplayer.DecodePosition(qr.Rows[0][0].ToString())
   770  			if dcerr != nil {
   771  				return dcerr
   772  			}
   773  
   774  			if current.AtLeast(mPos) {
   775  				log.Infof("position: %s reached, wait time: %v", pos, time.Since(start))
   776  				return nil
   777  			}
   778  
   779  			if qr.Rows[0][1].ToString() == binlogplayer.BlpStopped {
   780  				return fmt.Errorf("replication has stopped at %v before reaching position %v, message: %s", current, mPos, qr.Rows[0][2].ToString())
   781  			}
   782  		}
   783  
   784  		select {
   785  		case <-ctx.Done():
   786  			var doneErr error
   787  			if err != nil { // we had a retryable error and never got status info
   788  				doneErr = fmt.Errorf("error waiting for pos: %s, unable to get vreplication status for id %d: %v, wait time: %v",
   789  					pos, id, err, time.Since(start))
   790  			} else {
   791  				doneErr = fmt.Errorf("error waiting for pos: %s, last pos: %s: %v, wait time: %v: %s",
   792  					pos, qr.Rows[0][0].ToString(), ctx.Err(), time.Since(start),
   793  					"possibly no tablets are available to stream in the source keyspace for your cell and tablet_types setting")
   794  			}
   795  			log.Error(doneErr.Error())
   796  			return doneErr
   797  		case <-vre.ctx.Done():
   798  			return fmt.Errorf("vreplication is closing: %v", vre.ctx.Err())
   799  		case <-tkr.C:
   800  		}
   801  	}
   802  }
   803  
   804  // UpdateStats must be called with lock held.
   805  func (vre *Engine) updateStats() {
   806  	globalStats.mu.Lock()
   807  	defer globalStats.mu.Unlock()
   808  
   809  	globalStats.isOpen = vre.isOpen
   810  	globalStats.controllers = make(map[int]*controller, len(vre.controllers))
   811  	for id, ct := range vre.controllers {
   812  		globalStats.controllers[id] = ct
   813  	}
   814  }
   815  
   816  func (vre *Engine) readAllRows(ctx context.Context) ([]map[string]string, error) {
   817  	dbClient := vre.dbClientFactoryFiltered()
   818  	if err := dbClient.Connect(); err != nil {
   819  		return nil, err
   820  	}
   821  	defer dbClient.Close()
   822  	qr, err := dbClient.ExecuteFetch(fmt.Sprintf("select * from _vt.vreplication where db_name=%v", encodeString(vre.dbName)), maxRows)
   823  	if err != nil {
   824  		return nil, err
   825  	}
   826  	maps := make([]map[string]string, len(qr.Rows))
   827  	for i := range qr.Rows {
   828  		mrow, err := rowToMap(qr, i)
   829  		if err != nil {
   830  			return nil, err
   831  		}
   832  		maps[i] = mrow
   833  	}
   834  	return maps, nil
   835  }
   836  
   837  func readRow(dbClient binlogplayer.DBClient, id int) (map[string]string, error) {
   838  	qr, err := dbClient.ExecuteFetch(fmt.Sprintf("select * from _vt.vreplication where id = %d", id), 10)
   839  	if err != nil {
   840  		return nil, err
   841  	}
   842  	if len(qr.Rows) != 1 {
   843  		return nil, fmt.Errorf("unexpected number of rows: %v", qr)
   844  	}
   845  	if len(qr.Fields) != len(qr.Rows[0]) {
   846  		return nil, fmt.Errorf("fields don't match rows: %v", qr)
   847  	}
   848  	row, err := rowToMap(qr, 0)
   849  	if err != nil {
   850  		return nil, err
   851  	}
   852  	gtid, ok := row["pos"]
   853  	if ok {
   854  		b := binlogplayer.MysqlUncompress(gtid)
   855  		if b != nil {
   856  			gtid = string(b)
   857  			row["pos"] = gtid
   858  		}
   859  	}
   860  	return row, nil
   861  }
   862  
   863  // rowToMap converts a row into a map for easier processing.
   864  func rowToMap(qr *sqltypes.Result, rownum int) (map[string]string, error) {
   865  	row := qr.Rows[rownum]
   866  	m := make(map[string]string, len(row))
   867  	for i, fld := range qr.Fields {
   868  		if row[i].IsNull() {
   869  			continue
   870  		}
   871  		m[fld.Name] = row[i].ToString()
   872  	}
   873  	return m, nil
   874  }