vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/tm_init.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Package tabletmanager exports the TabletManager object. It keeps the local tablet
    19  state, starts / stops all associated services (query service,
    20  update stream, binlog players, ...), and handles tabletmanager RPCs
    21  to update the state.
    22  
    23  The tm is responsible for maintaining the tablet record in the
    24  topology server. Only 'vtctl DeleteTablet'
    25  should be run by other processes, everything else should ask
    26  the tablet server to make the change.
    27  
    28  Most RPC calls obtain the actionSema, except the easy read-only ones.
    29  RPC calls that change the tablet record will also call updateState.
    30  
    31  See rpc_server.go for all cases, and which actions take the actionSema,
    32  and which run changeCallback.
    33  */
    34  package tabletmanager
    35  
    36  import (
    37  	"context"
    38  	"encoding/hex"
    39  	"fmt"
    40  	"math/rand"
    41  	"regexp"
    42  	"strings"
    43  	"sync"
    44  	"time"
    45  
    46  	"github.com/spf13/pflag"
    47  	"k8s.io/apimachinery/pkg/util/sets"
    48  
    49  	"vitess.io/vitess/go/flagutil"
    50  	"vitess.io/vitess/go/mysql/collations"
    51  	"vitess.io/vitess/go/netutil"
    52  	"vitess.io/vitess/go/stats"
    53  	"vitess.io/vitess/go/sync2"
    54  	"vitess.io/vitess/go/vt/binlog"
    55  	"vitess.io/vitess/go/vt/dbconfigs"
    56  	"vitess.io/vitess/go/vt/dbconnpool"
    57  	"vitess.io/vitess/go/vt/key"
    58  	"vitess.io/vitess/go/vt/log"
    59  	"vitess.io/vitess/go/vt/logutil"
    60  	"vitess.io/vitess/go/vt/mysqlctl"
    61  	querypb "vitess.io/vitess/go/vt/proto/query"
    62  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    63  	"vitess.io/vitess/go/vt/servenv"
    64  	"vitess.io/vitess/go/vt/topo"
    65  	"vitess.io/vitess/go/vt/topo/topoproto"
    66  	"vitess.io/vitess/go/vt/topotools"
    67  	"vitess.io/vitess/go/vt/vtctl/reparentutil"
    68  	"vitess.io/vitess/go/vt/vterrors"
    69  	"vitess.io/vitess/go/vt/vttablet/tabletmanager/vdiff"
    70  	"vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication"
    71  	"vitess.io/vitess/go/vt/vttablet/tabletserver"
    72  )
    73  
    74  // Query rules from denylist
    75  const denyListQueryList string = "DenyListQueryRules"
    76  
    77  var (
    78  	// The following flags initialize the tablet record.
    79  	tabletHostname     string
    80  	initKeyspace       string
    81  	initShard          string
    82  	initTabletType     string
    83  	initDbNameOverride string
    84  	skipBuildInfoTags  = "/.*/"
    85  	initTags           flagutil.StringMapValue
    86  
    87  	initPopulateMetadata bool
    88  	initTimeout          = 1 * time.Minute
    89  )
    90  
    91  func registerInitFlags(fs *pflag.FlagSet) {
    92  	fs.StringVar(&tabletHostname, "tablet_hostname", tabletHostname, "if not empty, this hostname will be assumed instead of trying to resolve it")
    93  	fs.StringVar(&initKeyspace, "init_keyspace", initKeyspace, "(init parameter) keyspace to use for this tablet")
    94  	fs.StringVar(&initShard, "init_shard", initShard, "(init parameter) shard to use for this tablet")
    95  	fs.StringVar(&initTabletType, "init_tablet_type", initTabletType, "(init parameter) the tablet type to use for this tablet.")
    96  	fs.StringVar(&initDbNameOverride, "init_db_name_override", initDbNameOverride, "(init parameter) override the name of the db used by vttablet. Without this flag, the db name defaults to vt_<keyspacename>")
    97  	fs.StringVar(&skipBuildInfoTags, "vttablet_skip_buildinfo_tags", skipBuildInfoTags, "comma-separated list of buildinfo tags to skip from merging with --init_tags. each tag is either an exact match or a regular expression of the form '/regexp/'.")
    98  	fs.Var(&initTags, "init_tags", "(init parameter) comma separated list of key:value pairs used to tag the tablet")
    99  
   100  	fs.BoolVar(&initPopulateMetadata, "init_populate_metadata", initPopulateMetadata, "(init parameter) populate metadata tables even if restore_from_backup is disabled. If restore_from_backup is enabled, metadata tables are always populated regardless of this flag.")
   101  	fs.MarkDeprecated("init_populate_metadata", "this flag is no longer being used and will be removed in future versions")
   102  	fs.DurationVar(&initTimeout, "init_timeout", initTimeout, "(init parameter) timeout to use for the init phase.")
   103  }
   104  
   105  var (
   106  	// statsTabletType is set to expose the current tablet type.
   107  	statsTabletType *stats.String
   108  
   109  	// statsTabletTypeCount exposes the current tablet type as a label,
   110  	// with the value counting the occurrences of the respective tablet type.
   111  	// Useful for Prometheus which doesn't support exporting strings as stat values.
   112  	statsTabletTypeCount *stats.CountersWithSingleLabel
   113  
   114  	// statsBackupIsRunning is set to 1 (true) if a backup is running.
   115  	statsBackupIsRunning *stats.GaugesWithMultiLabels
   116  
   117  	// statsIsInSrvKeyspace is set to 1 (true), 0 (false) whether the tablet is in the serving keyspace
   118  	statsIsInSrvKeyspace *stats.Gauge
   119  
   120  	statsKeyspace      = stats.NewString("TabletKeyspace")
   121  	statsShard         = stats.NewString("TabletShard")
   122  	statsKeyRangeStart = stats.NewString("TabletKeyRangeStart")
   123  	statsKeyRangeEnd   = stats.NewString("TabletKeyRangeEnd")
   124  	statsAlias         = stats.NewString("TabletAlias")
   125  
   126  	// The following variables can be changed to speed up tests.
   127  	mysqlPortRetryInterval       = 1 * time.Second
   128  	rebuildKeyspaceRetryInterval = 1 * time.Second
   129  )
   130  
   131  func init() {
   132  	servenv.OnParseFor("vtcombo", registerInitFlags)
   133  	servenv.OnParseFor("vttablet", registerInitFlags)
   134  
   135  	statsTabletType = stats.NewString("TabletType")
   136  	statsTabletTypeCount = stats.NewCountersWithSingleLabel("TabletTypeCount", "Number of times the tablet changed to the labeled type", "type")
   137  	statsBackupIsRunning = stats.NewGaugesWithMultiLabels("BackupIsRunning", "Whether a backup is running", []string{"mode"})
   138  	statsIsInSrvKeyspace = stats.NewGauge("IsInSrvKeyspace", "Whether the vttablet is in the serving keyspace (1 = true / 0 = false)")
   139  }
   140  
   141  // TabletManager is the main class for the tablet manager.
   142  type TabletManager struct {
   143  	// The following fields are set during creation
   144  	BatchCtx            context.Context
   145  	TopoServer          *topo.Server
   146  	Cnf                 *mysqlctl.Mycnf
   147  	MysqlDaemon         mysqlctl.MysqlDaemon
   148  	DBConfigs           *dbconfigs.DBConfigs
   149  	QueryServiceControl tabletserver.Controller
   150  	UpdateStream        binlog.UpdateStreamControl
   151  	VREngine            *vreplication.Engine
   152  	VDiffEngine         *vdiff.Engine
   153  
   154  	// tmState manages the TabletManager state.
   155  	tmState *tmState
   156  
   157  	// tabletAlias is saved away from tablet for read-only access
   158  	tabletAlias *topodatapb.TabletAlias
   159  
   160  	// baseTabletType is the tablet type we revert back to
   161  	// when we transition back from something like PRIMARY.
   162  	baseTabletType topodatapb.TabletType
   163  
   164  	// actionSema is there to run only one action at a time.
   165  	// This semaphore can be held for long periods of time (hours),
   166  	// like in the case of a restore. This semaphore must be obtained
   167  	// first before other mutexes.
   168  	actionSema *sync2.Semaphore
   169  
   170  	// mutex protects all the following fields (that start with '_'),
   171  	// only hold the mutex to update the fields, nothing else.
   172  	mutex sync.Mutex
   173  
   174  	// _shardSyncChan is a channel for informing the shard sync goroutine that
   175  	// it should wake up and recheck the tablet state, to make sure it and the
   176  	// shard record are in sync.
   177  	//
   178  	// Call tm.notifyShardSync() instead of sending directly to this channel.
   179  	_shardSyncChan chan struct{}
   180  
   181  	// _shardSyncDone is a channel for waiting until the shard sync goroutine
   182  	// has really finished after _shardSyncCancel was called.
   183  	_shardSyncDone chan struct{}
   184  
   185  	// _shardSyncCancel is the function to stop the background shard sync goroutine.
   186  	_shardSyncCancel context.CancelFunc
   187  
   188  	// _rebuildKeyspaceDone is a channel for waiting until the current keyspace
   189  	// has been rebuilt
   190  	_rebuildKeyspaceDone chan struct{}
   191  
   192  	// _rebuildKeyspaceCancel is the function to stop a keyspace rebuild currently
   193  	// in progress
   194  	_rebuildKeyspaceCancel context.CancelFunc
   195  
   196  	// _lockTablesConnection is used to get and release the table read locks to pause replication
   197  	_lockTablesConnection *dbconnpool.DBConnection
   198  	_lockTablesTimer      *time.Timer
   199  	// _isBackupRunning tells us whether there is a backup that is currently running
   200  	_isBackupRunning bool
   201  }
   202  
   203  // BuildTabletFromInput builds a tablet record from input parameters.
   204  func BuildTabletFromInput(alias *topodatapb.TabletAlias, port, grpcPort int32, dbServerVersion string, db *dbconfigs.DBConfigs) (*topodatapb.Tablet, error) {
   205  	hostname := tabletHostname
   206  	if hostname == "" {
   207  		var err error
   208  		hostname, err = netutil.FullyQualifiedHostname()
   209  		if err != nil {
   210  			return nil, err
   211  		}
   212  		log.Infof("Using detected machine hostname: %v, to change this, fix your machine network configuration or override it with --tablet_hostname. Tablet %s", hostname, alias.String())
   213  	} else {
   214  		log.Infof("Using hostname: %v from --tablet_hostname flag. Tablet %s", hostname, alias.String())
   215  	}
   216  
   217  	if initKeyspace == "" || initShard == "" {
   218  		return nil, fmt.Errorf("init_keyspace and init_shard must be specified")
   219  	}
   220  
   221  	// parse and validate shard name
   222  	shard, keyRange, err := topo.ValidateShardName(initShard)
   223  	if err != nil {
   224  		return nil, vterrors.Wrapf(err, "cannot validate shard name %v", initShard)
   225  	}
   226  
   227  	tabletType, err := topoproto.ParseTabletType(initTabletType)
   228  	if err != nil {
   229  		return nil, err
   230  	}
   231  	switch tabletType {
   232  	case topodatapb.TabletType_SPARE, topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY:
   233  	default:
   234  		return nil, fmt.Errorf("invalid init_tablet_type %v; can only be REPLICA, RDONLY or SPARE", tabletType)
   235  	}
   236  
   237  	buildTags, err := getBuildTags(servenv.AppVersion.ToStringMap(), skipBuildInfoTags)
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  
   242  	var charset uint8
   243  	if db != nil && db.Charset != "" {
   244  		charset, err = collations.Local().ParseConnectionCharset(db.Charset)
   245  		if err != nil {
   246  			return nil, err
   247  		}
   248  	} else {
   249  		charset = collations.Local().DefaultConnectionCharset()
   250  	}
   251  
   252  	return &topodatapb.Tablet{
   253  		Alias:    alias,
   254  		Hostname: hostname,
   255  		PortMap: map[string]int32{
   256  			"vt":   port,
   257  			"grpc": grpcPort,
   258  		},
   259  		Keyspace:             initKeyspace,
   260  		Shard:                shard,
   261  		KeyRange:             keyRange,
   262  		Type:                 tabletType,
   263  		DbNameOverride:       initDbNameOverride,
   264  		Tags:                 mergeTags(buildTags, initTags),
   265  		DbServerVersion:      dbServerVersion,
   266  		DefaultConnCollation: uint32(charset),
   267  	}, nil
   268  }
   269  
   270  func getBuildTags(buildTags map[string]string, skipTagsCSV string) (map[string]string, error) {
   271  	if skipTagsCSV == "" {
   272  		return buildTags, nil
   273  	}
   274  
   275  	skipTags := strings.Split(skipTagsCSV, ",")
   276  	skippers := make([]func(string) bool, len(skipTags))
   277  	for i, skipTag := range skipTags {
   278  		skipTag := skipTag // copy to preserve iteration scope in the closures below
   279  		if strings.HasPrefix(skipTag, "/") && strings.HasSuffix(skipTag, "/") && len(skipTag) > 1 {
   280  			// regexp mode
   281  			tagRegexp, err := regexp.Compile(skipTag[1 : len(skipTag)-1])
   282  			if err != nil {
   283  				return nil, err
   284  			}
   285  
   286  			skippers[i] = func(s string) bool {
   287  				return tagRegexp.MatchString(s)
   288  			}
   289  		} else {
   290  			skippers[i] = func(s string) bool {
   291  				log.Warningf(skipTag)
   292  				return s == skipTag
   293  			}
   294  		}
   295  	}
   296  
   297  	skippedTags := sets.New[string]()
   298  	for tag := range buildTags {
   299  		for _, skipFn := range skippers {
   300  			if skipFn(tag) {
   301  				skippedTags.Insert(tag)
   302  				break
   303  			}
   304  		}
   305  	}
   306  
   307  	result := make(map[string]string, len(buildTags)-skippedTags.Len())
   308  	for tag, val := range buildTags {
   309  		if skippedTags.Has(tag) {
   310  			continue
   311  		}
   312  
   313  		result[tag] = val
   314  	}
   315  
   316  	return result, nil
   317  }
   318  
   319  func mergeTags(a, b map[string]string) map[string]string {
   320  	maxCap := len(a)
   321  	if x := len(b); x > maxCap {
   322  		maxCap = x
   323  	}
   324  
   325  	result := make(map[string]string, maxCap)
   326  	for k, v := range a {
   327  		result[k] = v
   328  	}
   329  
   330  	for k, v := range b {
   331  		result[k] = v
   332  	}
   333  
   334  	return result
   335  }
   336  
   337  // Start starts the TabletManager.
   338  func (tm *TabletManager) Start(tablet *topodatapb.Tablet, healthCheckInterval time.Duration) error {
   339  	defer func() {
   340  		log.Infof("TabletManager Start took ~%d ms", time.Since(servenv.GetInitStartTime()).Milliseconds())
   341  	}()
   342  	log.Infof("TabletManager Start")
   343  	tm.DBConfigs.DBName = topoproto.TabletDbName(tablet)
   344  	tm.tabletAlias = tablet.Alias
   345  	tm.tmState = newTMState(tm, tablet)
   346  	tm.actionSema = sync2.NewSemaphore(1, 0)
   347  
   348  	tm.baseTabletType = tablet.Type
   349  
   350  	ctx, cancel := context.WithTimeout(tm.BatchCtx, initTimeout)
   351  	defer cancel()
   352  	si, err := tm.createKeyspaceShard(ctx)
   353  	if err != nil {
   354  		return err
   355  	}
   356  	if err := tm.checkPrimaryShip(ctx, si); err != nil {
   357  		return err
   358  	}
   359  	if err := tm.checkMysql(ctx); err != nil {
   360  		return err
   361  	}
   362  	if err := tm.initTablet(ctx); err != nil {
   363  		return err
   364  	}
   365  
   366  	err = tm.QueryServiceControl.InitDBConfig(&querypb.Target{
   367  		Keyspace:   tablet.Keyspace,
   368  		Shard:      tablet.Shard,
   369  		TabletType: tablet.Type,
   370  	}, tm.DBConfigs, tm.MysqlDaemon)
   371  	if err != nil {
   372  		return vterrors.Wrap(err, "failed to InitDBConfig")
   373  	}
   374  	tm.QueryServiceControl.RegisterQueryRuleSource(denyListQueryList)
   375  
   376  	if tm.UpdateStream != nil {
   377  		tm.UpdateStream.InitDBConfig(tm.DBConfigs)
   378  		servenv.OnRun(tm.UpdateStream.RegisterService)
   379  		servenv.OnTerm(tm.UpdateStream.Disable)
   380  	}
   381  
   382  	if tm.VREngine != nil {
   383  		tm.VREngine.InitDBConfig(tm.DBConfigs)
   384  		servenv.OnTerm(tm.VREngine.Close)
   385  	}
   386  
   387  	if tm.VDiffEngine != nil {
   388  		tm.VDiffEngine.InitDBConfig(tm.DBConfigs)
   389  		servenv.OnTerm(tm.VDiffEngine.Close)
   390  	}
   391  
   392  	// The following initializations don't need to be done
   393  	// in any specific order.
   394  	tm.startShardSync()
   395  	tm.exportStats()
   396  	servenv.OnRun(tm.registerTabletManager)
   397  
   398  	restoring, err := tm.handleRestore(tm.BatchCtx)
   399  	if err != nil {
   400  		return err
   401  	}
   402  	if restoring {
   403  		// If restore was triggered, it will take care
   404  		// of updating the tablet state and initializing replication.
   405  		return nil
   406  	}
   407  	// We should be re-read the tablet from tabletManager and use the type specified there.
   408  	// We shouldn't use the base tablet type directly, since the type could have changed to PRIMARY
   409  	// earlier in tm.checkPrimaryShip code.
   410  	_, err = tm.initializeReplication(ctx, tm.Tablet().Type)
   411  	tm.tmState.Open()
   412  	return err
   413  }
   414  
   415  // Close prepares a tablet for shutdown. First we check our tablet ownership and
   416  // then prune the tablet topology entry of all post-init fields. This prevents
   417  // stale identifiers from hanging around in topology.
   418  func (tm *TabletManager) Close() {
   419  	// Stop the shard sync loop and wait for it to exit. We do this in Close()
   420  	// rather than registering it as an OnTerm hook so the shard sync loop keeps
   421  	// running during lame duck.
   422  	tm.stopShardSync()
   423  	tm.stopRebuildKeyspace()
   424  
   425  	// cleanup initialized fields in the tablet entry
   426  	f := func(tablet *topodatapb.Tablet) error {
   427  		if err := topotools.CheckOwnership(tm.Tablet(), tablet); err != nil {
   428  			return err
   429  		}
   430  		tablet.Hostname = ""
   431  		tablet.MysqlHostname = ""
   432  		tablet.PortMap = nil
   433  		return nil
   434  	}
   435  
   436  	updateCtx, updateCancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
   437  	defer updateCancel()
   438  
   439  	if _, err := tm.TopoServer.UpdateTabletFields(updateCtx, tm.tabletAlias, f); err != nil {
   440  		log.Warningf("Failed to update tablet record, may contain stale identifiers: %v", err)
   441  	}
   442  
   443  	tm.tmState.Close()
   444  }
   445  
   446  // Stop shuts down the tm. Normally this is not necessary, since we use
   447  // servenv OnTerm and OnClose hooks to coordinate shutdown automatically,
   448  // while taking lameduck into account. However, this may be useful for tests,
   449  // when you want to clean up an tm immediately.
   450  func (tm *TabletManager) Stop() {
   451  	// Stop the shard sync loop and wait for it to exit. This needs to be done
   452  	// here in addition to in Close() because tests do not call Close().
   453  	tm.stopShardSync()
   454  	tm.stopRebuildKeyspace()
   455  
   456  	if tm.UpdateStream != nil {
   457  		tm.UpdateStream.Disable()
   458  	}
   459  
   460  	if tm.VREngine != nil {
   461  		tm.VREngine.Close()
   462  	}
   463  
   464  	if tm.VDiffEngine != nil {
   465  		tm.VDiffEngine.Close()
   466  	}
   467  
   468  	tm.MysqlDaemon.Close()
   469  	tm.tmState.Close()
   470  }
   471  
   472  func (tm *TabletManager) createKeyspaceShard(ctx context.Context) (*topo.ShardInfo, error) {
   473  	// mutex is needed because we set _shardInfo and _srvKeyspace
   474  	tm.mutex.Lock()
   475  	defer tm.mutex.Unlock()
   476  
   477  	tablet := tm.Tablet()
   478  	log.Infof("Reading/creating keyspace and shard records for %v/%v", tablet.Keyspace, tablet.Shard)
   479  
   480  	// Read the shard, create it if necessary.
   481  	var shardInfo *topo.ShardInfo
   482  	if err := tm.withRetry(ctx, "creating keyspace and shard", func() error {
   483  		var err error
   484  		shardInfo, err = tm.TopoServer.GetOrCreateShard(ctx, tablet.Keyspace, tablet.Shard)
   485  		return err
   486  	}); err != nil {
   487  		return nil, vterrors.Wrap(err, "createKeyspaceShard: cannot GetOrCreateShard shard")
   488  	}
   489  	tm.tmState.RefreshFromTopoInfo(ctx, shardInfo, nil)
   490  
   491  	// Rebuild keyspace if this the first tablet in this keyspace/cell
   492  	srvKeyspace, err := tm.TopoServer.GetSrvKeyspace(ctx, tm.tabletAlias.Cell, tablet.Keyspace)
   493  	switch {
   494  	case err == nil:
   495  		tm.tmState.RefreshFromTopoInfo(ctx, nil, srvKeyspace)
   496  	case topo.IsErrType(err, topo.NoNode):
   497  		var rebuildKsCtx context.Context
   498  		rebuildKsCtx, tm._rebuildKeyspaceCancel = context.WithCancel(tm.BatchCtx)
   499  		tm._rebuildKeyspaceDone = make(chan struct{})
   500  		go tm.rebuildKeyspace(rebuildKsCtx, tm._rebuildKeyspaceDone, tablet.Keyspace, rebuildKeyspaceRetryInterval)
   501  	default:
   502  		return nil, vterrors.Wrap(err, "initeKeyspaceShardTopo: failed to read SrvKeyspace")
   503  	}
   504  
   505  	// Rebuild vschema graph if this is the first tablet in this keyspace/cell.
   506  	srvVSchema, err := tm.TopoServer.GetSrvVSchema(ctx, tm.tabletAlias.Cell)
   507  	switch {
   508  	case err == nil:
   509  		// Check if vschema was rebuilt after the initial creation of the keyspace.
   510  		if _, keyspaceExists := srvVSchema.GetKeyspaces()[tablet.Keyspace]; !keyspaceExists {
   511  			if err := tm.TopoServer.RebuildSrvVSchema(ctx, []string{tm.tabletAlias.Cell}); err != nil {
   512  				return nil, vterrors.Wrap(err, "initeKeyspaceShardTopo: failed to RebuildSrvVSchema")
   513  			}
   514  		}
   515  	case topo.IsErrType(err, topo.NoNode):
   516  		// There is no SrvSchema in this cell at all, so we definitely need to rebuild.
   517  		if err := tm.TopoServer.RebuildSrvVSchema(ctx, []string{tm.tabletAlias.Cell}); err != nil {
   518  			return nil, vterrors.Wrap(err, "initeKeyspaceShardTopo: failed to RebuildSrvVSchema")
   519  		}
   520  	default:
   521  		return nil, vterrors.Wrap(err, "initeKeyspaceShardTopo: failed to read SrvVSchema")
   522  	}
   523  	return shardInfo, nil
   524  }
   525  
   526  func (tm *TabletManager) stopRebuildKeyspace() {
   527  	var doneChan <-chan struct{}
   528  
   529  	tm.mutex.Lock()
   530  	if tm._rebuildKeyspaceCancel != nil {
   531  		tm._rebuildKeyspaceCancel()
   532  	}
   533  	doneChan = tm._rebuildKeyspaceDone
   534  	tm.mutex.Unlock()
   535  
   536  	if doneChan != nil {
   537  		<-doneChan
   538  	}
   539  }
   540  
   541  func (tm *TabletManager) rebuildKeyspace(ctx context.Context, done chan<- struct{}, keyspace string, retryInterval time.Duration) {
   542  	var srvKeyspace *topodatapb.SrvKeyspace
   543  
   544  	defer func() {
   545  		log.Infof("Keyspace rebuilt: %v", keyspace)
   546  		if ctx.Err() == nil {
   547  			tm.tmState.RefreshFromTopoInfo(tm.BatchCtx, nil, srvKeyspace)
   548  		}
   549  		close(done)
   550  	}()
   551  
   552  	// RebuildKeyspace will fail until at least one tablet is up for every shard.
   553  	firstTime := true
   554  	var err error
   555  	for {
   556  		if ctx.Err() != nil {
   557  			return
   558  		}
   559  		if !firstTime {
   560  			// If keyspace was rebuilt by someone else, we can just exit.
   561  			srvKeyspace, err = tm.TopoServer.GetSrvKeyspace(ctx, tm.tabletAlias.Cell, keyspace)
   562  			if err == nil || ctx.Err() != nil {
   563  				return
   564  			}
   565  		}
   566  		err = topotools.RebuildKeyspace(ctx, logutil.NewConsoleLogger(), tm.TopoServer, keyspace, []string{tm.tabletAlias.Cell}, false)
   567  		if err == nil {
   568  			srvKeyspace, err = tm.TopoServer.GetSrvKeyspace(ctx, tm.tabletAlias.Cell, keyspace)
   569  			if err == nil || ctx.Err() != nil {
   570  				return
   571  			}
   572  		}
   573  		if firstTime {
   574  			log.Warningf("rebuildKeyspace failed, will retry every %v: %v", retryInterval, err)
   575  		}
   576  		firstTime = false
   577  		time.Sleep(retryInterval)
   578  	}
   579  }
   580  
   581  func (tm *TabletManager) checkPrimaryShip(ctx context.Context, si *topo.ShardInfo) error {
   582  	if si.PrimaryAlias != nil && topoproto.TabletAliasEqual(si.PrimaryAlias, tm.tabletAlias) {
   583  		// We're marked as primary in the shard record, which could mean the primary
   584  		// tablet process was just restarted. However, we need to check if a new
   585  		// primary is in the process of taking over. In that case, it will let us
   586  		// know by forcibly updating the old primary's tablet record.
   587  		oldTablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias)
   588  		switch {
   589  		case topo.IsErrType(err, topo.NoNode):
   590  			// There's no existing tablet record, so we can assume
   591  			// no one has left us a message to step down.
   592  			log.Infof("Shard primary alias matches, but there is no existing tablet record. Switching to primary with 'Now' as time")
   593  			tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
   594  				tablet.Type = topodatapb.TabletType_PRIMARY
   595  				// Update the primary term start time (current value is 0) because we
   596  				// assume that we are actually the PRIMARY and in case of a tiebreak,
   597  				// vtgate should prefer us.
   598  				tablet.PrimaryTermStartTime = logutil.TimeToProto(time.Now())
   599  			})
   600  		case err == nil:
   601  			if oldTablet.Type == topodatapb.TabletType_PRIMARY {
   602  				log.Infof("Shard primary alias matches, and existing tablet agrees. Switching to primary with tablet's primary term start time: %v", oldTablet.PrimaryTermStartTime)
   603  				// We're marked as primary in the shard record,
   604  				// and our existing tablet record agrees.
   605  				tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
   606  					tablet.Type = topodatapb.TabletType_PRIMARY
   607  					tablet.PrimaryTermStartTime = oldTablet.PrimaryTermStartTime
   608  				})
   609  			} else {
   610  				log.Warningf("Shard primary alias matches, but existing tablet is not primary. Switching from %v to primary with the shard's primary term start time: %v", oldTablet.Type, si.PrimaryTermStartTime)
   611  				tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
   612  					tablet.Type = topodatapb.TabletType_PRIMARY
   613  					tablet.PrimaryTermStartTime = si.PrimaryTermStartTime
   614  				})
   615  			}
   616  		default:
   617  			return vterrors.Wrap(err, "InitTablet failed to read existing tablet record")
   618  		}
   619  	} else {
   620  		oldTablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias)
   621  		switch {
   622  		case topo.IsErrType(err, topo.NoNode):
   623  			// There's no existing tablet record, so there is nothing to do
   624  		case err == nil:
   625  			if oldTablet.Type == topodatapb.TabletType_PRIMARY {
   626  				// Our existing tablet type is primary, but the shard record does not agree.
   627  				// Only take over if our primary_term_start_time is after what is in the shard record
   628  				oldPrimaryTermStartTime := oldTablet.GetPrimaryTermStartTime()
   629  				currentShardTime := si.GetPrimaryTermStartTime()
   630  				if oldPrimaryTermStartTime.After(currentShardTime) {
   631  					log.Infof("Shard primary alias does not match, but the tablet's primary term start time is newer. Switching to primary with tablet's primary term start time: %v", oldTablet.PrimaryTermStartTime)
   632  					tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
   633  						tablet.Type = topodatapb.TabletType_PRIMARY
   634  						tablet.PrimaryTermStartTime = oldTablet.PrimaryTermStartTime
   635  					})
   636  				} else {
   637  					log.Infof("Existing tablet type is primary, but the shard record has a different primary with a newer timestamp. Remaining a replica")
   638  				}
   639  			}
   640  		default:
   641  			return vterrors.Wrap(err, "InitTablet failed to read existing tablet record")
   642  		}
   643  	}
   644  	return nil
   645  }
   646  
   647  func (tm *TabletManager) checkMysql(ctx context.Context) error {
   648  	appConfig, err := tm.DBConfigs.AppWithDB().MysqlParams()
   649  	if err != nil {
   650  		return err
   651  	}
   652  	if appConfig.Host != "" {
   653  		tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
   654  			tablet.MysqlHostname = appConfig.Host
   655  			tablet.MysqlPort = int32(appConfig.Port)
   656  		})
   657  	} else {
   658  		// Assume unix socket was specified and try to get the port from mysqld
   659  		tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
   660  			tablet.MysqlHostname = tablet.Hostname
   661  		})
   662  		mysqlPort, err := tm.MysqlDaemon.GetMysqlPort()
   663  		if err != nil {
   664  			log.Warningf("Cannot get current mysql port, will keep retrying every %v: %v", mysqlPortRetryInterval, err)
   665  			go tm.findMysqlPort(mysqlPortRetryInterval)
   666  		} else {
   667  			tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
   668  				tablet.MysqlPort = mysqlPort
   669  			})
   670  		}
   671  	}
   672  	return nil
   673  }
   674  
   675  func (tm *TabletManager) findMysqlPort(retryInterval time.Duration) {
   676  	for {
   677  		time.Sleep(retryInterval)
   678  		mport, err := tm.MysqlDaemon.GetMysqlPort()
   679  		if err != nil {
   680  			continue
   681  		}
   682  		log.Infof("Identified mysql port: %v", mport)
   683  		tm.tmState.SetMysqlPort(mport)
   684  		return
   685  	}
   686  }
   687  
   688  func (tm *TabletManager) initTablet(ctx context.Context) error {
   689  	tablet := tm.Tablet()
   690  	err := tm.TopoServer.CreateTablet(ctx, tablet)
   691  	switch {
   692  	case err == nil:
   693  		// It worked, we're good.
   694  	case topo.IsErrType(err, topo.NodeExists):
   695  		// The node already exists, will just try to update
   696  		// it. So we read it first.
   697  		oldTablet, err := tm.TopoServer.GetTablet(ctx, tablet.Alias)
   698  		if err != nil {
   699  			return vterrors.Wrap(err, "initTablet failed to read existing tablet record")
   700  		}
   701  
   702  		// Sanity check the keyspace and shard
   703  		if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard {
   704  			return fmt.Errorf("initTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard)
   705  		}
   706  
   707  		// Update ShardReplication in any case, to be sure.  This is
   708  		// meant to fix the case when a Tablet record was created, but
   709  		// then the ShardReplication record was not (because for
   710  		// instance of a startup timeout). Upon running this code
   711  		// again, we want to fix ShardReplication.
   712  		if updateErr := topo.UpdateTabletReplicationData(ctx, tm.TopoServer, tablet); updateErr != nil {
   713  			return vterrors.Wrap(updateErr, "UpdateTabletReplicationData failed")
   714  		}
   715  
   716  		// Then overwrite everything, ignoring version mismatch.
   717  		if err := tm.TopoServer.UpdateTablet(ctx, topo.NewTabletInfo(tablet, nil)); err != nil {
   718  			return vterrors.Wrap(err, "UpdateTablet failed")
   719  		}
   720  	default:
   721  		return vterrors.Wrap(err, "CreateTablet failed")
   722  	}
   723  	return nil
   724  }
   725  
   726  func (tm *TabletManager) handleRestore(ctx context.Context) (bool, error) {
   727  	// Sanity check for inconsistent flags
   728  	if tm.Cnf == nil && restoreFromBackup {
   729  		return false, fmt.Errorf("you cannot enable --restore_from_backup without a my.cnf file")
   730  	}
   731  
   732  	// Restore in the background
   733  	if restoreFromBackup {
   734  		go func() {
   735  			// Open the state manager after restore is done.
   736  			defer tm.tmState.Open()
   737  
   738  			// Zero date will cause us to use the latest, which is the default
   739  			backupTime := time.Time{}
   740  
   741  			// Or if a backup timestamp was specified then we use the last backup taken at or before that time
   742  			if restoreFromBackupTsStr != "" {
   743  				var err error
   744  				backupTime, err = time.Parse(mysqlctl.BackupTimestampFormat, restoreFromBackupTsStr)
   745  				if err != nil {
   746  					log.Exitf(fmt.Sprintf("RestoreFromBackup failed: unable to parse the backup timestamp value provided of '%s'", restoreFromBackupTsStr))
   747  				}
   748  			}
   749  
   750  			// restoreFromBackup will just be a regular action
   751  			// (same as if it was triggered remotely)
   752  			if err := tm.RestoreData(ctx, logutil.NewConsoleLogger(), waitForBackupInterval, false /* deleteBeforeRestore */, backupTime); err != nil {
   753  				log.Exitf("RestoreFromBackup failed: %v", err)
   754  			}
   755  		}()
   756  		return true, nil
   757  	}
   758  
   759  	return false, nil
   760  }
   761  
   762  func (tm *TabletManager) exportStats() {
   763  	tablet := tm.Tablet()
   764  	statsKeyspace.Set(tablet.Keyspace)
   765  	statsShard.Set(tablet.Shard)
   766  	statsTabletType.Set(topoproto.TabletTypeLString(tm.tmState.tablet.Type))
   767  	statsTabletTypeCount.Add(topoproto.TabletTypeLString(tm.tmState.tablet.Type), 1)
   768  	if key.KeyRangeIsPartial(tablet.KeyRange) {
   769  		statsKeyRangeStart.Set(hex.EncodeToString(tablet.KeyRange.Start))
   770  		statsKeyRangeEnd.Set(hex.EncodeToString(tablet.KeyRange.End))
   771  	}
   772  	statsAlias.Set(topoproto.TabletAliasString(tablet.Alias))
   773  }
   774  
   775  // withRetry will exponentially back off and retry a function upon
   776  // failure, until the context is Done(), or the function returned with
   777  // no error. We use this at startup with a context timeout set to the
   778  // value of the init_timeout flag, so we can try to modify the
   779  // topology over a longer period instead of dying right away.
   780  func (tm *TabletManager) withRetry(ctx context.Context, description string, work func() error) error {
   781  	backoff := 1 * time.Second
   782  	for {
   783  		err := work()
   784  		if err == nil || err == context.Canceled || err == context.DeadlineExceeded {
   785  			return err
   786  		}
   787  
   788  		log.Warningf("%v failed (%v), backing off %v before retrying", description, err, backoff)
   789  		select {
   790  		case <-ctx.Done():
   791  			return ctx.Err()
   792  		case <-time.After(backoff):
   793  			// Exponential backoff with 1.3 as a factor,
   794  			// and randomized down by at most 20
   795  			// percent. The generated time series looks
   796  			// good.  Also note rand.Seed is called at
   797  			// init() time in binlog_players.go.
   798  			f := float64(backoff) * 1.3
   799  			f -= f * 0.2 * rand.Float64()
   800  			backoff = time.Duration(f)
   801  		}
   802  	}
   803  }
   804  
   805  // Tablet reads the stored Tablet from the tm.
   806  func (tm *TabletManager) Tablet() *topodatapb.Tablet {
   807  	return tm.tmState.Tablet()
   808  }
   809  
   810  // DeniedTables returns the list of currently denied tables.
   811  func (tm *TabletManager) DeniedTables() []string {
   812  	return tm.tmState.DeniedTables()
   813  }
   814  
   815  // hookExtraEnv returns the map to pass to local hooks
   816  func (tm *TabletManager) hookExtraEnv() map[string]string {
   817  	tablet := tm.Tablet()
   818  
   819  	return map[string]string{
   820  		"TABLET_ALIAS": topoproto.TabletAliasString(tm.tabletAlias),
   821  		"KEYSPACE":     tablet.Keyspace,
   822  		"SHARD":        tablet.Shard,
   823  	}
   824  }
   825  
   826  // initializeReplication is used to initialize the replication when the tablet starts.
   827  // It returns the current primary tablet for use externally
   828  func (tm *TabletManager) initializeReplication(ctx context.Context, tabletType topodatapb.TabletType) (primary *topo.TabletInfo, err error) {
   829  	// If active reparents are disabled, we do not touch replication.
   830  	// There is nothing to do
   831  	if mysqlctl.DisableActiveReparents {
   832  		return nil, nil
   833  	}
   834  
   835  	// If the desired tablet type is primary, then we shouldn't be setting our replication source.
   836  	// So there is nothing to do.
   837  	if tabletType == topodatapb.TabletType_PRIMARY {
   838  		return nil, nil
   839  	}
   840  
   841  	// Read the shard to find the current primary, and its location.
   842  	tablet := tm.Tablet()
   843  	si, err := tm.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard)
   844  	if err != nil {
   845  		return nil, vterrors.Wrap(err, "cannot read shard")
   846  	}
   847  	if si.PrimaryAlias == nil {
   848  		// There's no primary. This is fine, since there might be no primary currently
   849  		log.Warningf("cannot start replication during initialization: shard %v/%v has no primary.", tablet.Keyspace, tablet.Shard)
   850  		return nil, nil
   851  	}
   852  	if topoproto.TabletAliasEqual(si.PrimaryAlias, tablet.Alias) {
   853  		// We used to be the primary before we got restarted,
   854  		// and no other primary has been elected in the meantime.
   855  		// There isn't anything to do here either.
   856  		log.Warningf("cannot start replication during initialization: primary in shard record still points to this tablet.")
   857  		return nil, nil
   858  	}
   859  	currentPrimary, err := tm.TopoServer.GetTablet(ctx, si.PrimaryAlias)
   860  	if err != nil {
   861  		return nil, vterrors.Wrapf(err, "cannot read primary tablet %v", si.PrimaryAlias)
   862  	}
   863  
   864  	durabilityName, err := tm.TopoServer.GetKeyspaceDurability(ctx, tablet.Keyspace)
   865  	if err != nil {
   866  		return nil, vterrors.Wrapf(err, "cannot read keyspace durability policy %v", tablet.Keyspace)
   867  	}
   868  	log.Infof("Getting a new durability policy for %v", durabilityName)
   869  	durability, err := reparentutil.GetDurabilityPolicy(durabilityName)
   870  	if err != nil {
   871  		return nil, vterrors.Wrapf(err, "cannot get durability policy %v", durabilityName)
   872  	}
   873  	// If using semi-sync, we need to enable it before connecting to primary.
   874  	// We should set the correct type, since it is used in replica semi-sync
   875  	tablet.Type = tabletType
   876  	if err := tm.fixSemiSync(tabletType, convertBoolToSemiSyncAction(reparentutil.IsReplicaSemiSync(durability, currentPrimary.Tablet, tablet))); err != nil {
   877  		return nil, err
   878  	}
   879  
   880  	// Set primary and start replication.
   881  	if currentPrimary.Tablet.MysqlHostname == "" {
   882  		log.Warningf("primary tablet in the shard record does not have mysql hostname specified, possibly because that tablet has been shut down.")
   883  		return nil, nil
   884  	}
   885  	if err := tm.MysqlDaemon.SetReplicationSource(ctx, currentPrimary.Tablet.MysqlHostname, int(currentPrimary.Tablet.MysqlPort), true /* stopReplicationBefore */, true /* startReplicationAfter */); err != nil {
   886  		return nil, vterrors.Wrap(err, "MysqlDaemon.SetReplicationSource failed")
   887  	}
   888  
   889  	return currentPrimary, nil
   890  }