vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/tm_init.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 Package tabletmanager exports the TabletManager object. It keeps the local tablet 19 state, starts / stops all associated services (query service, 20 update stream, binlog players, ...), and handles tabletmanager RPCs 21 to update the state. 22 23 The tm is responsible for maintaining the tablet record in the 24 topology server. Only 'vtctl DeleteTablet' 25 should be run by other processes, everything else should ask 26 the tablet server to make the change. 27 28 Most RPC calls obtain the actionSema, except the easy read-only ones. 29 RPC calls that change the tablet record will also call updateState. 30 31 See rpc_server.go for all cases, and which actions take the actionSema, 32 and which run changeCallback. 33 */ 34 package tabletmanager 35 36 import ( 37 "context" 38 "encoding/hex" 39 "fmt" 40 "math/rand" 41 "regexp" 42 "strings" 43 "sync" 44 "time" 45 46 "github.com/spf13/pflag" 47 "k8s.io/apimachinery/pkg/util/sets" 48 49 "vitess.io/vitess/go/flagutil" 50 "vitess.io/vitess/go/mysql/collations" 51 "vitess.io/vitess/go/netutil" 52 "vitess.io/vitess/go/stats" 53 "vitess.io/vitess/go/sync2" 54 "vitess.io/vitess/go/vt/binlog" 55 "vitess.io/vitess/go/vt/dbconfigs" 56 "vitess.io/vitess/go/vt/dbconnpool" 57 "vitess.io/vitess/go/vt/key" 58 "vitess.io/vitess/go/vt/log" 59 "vitess.io/vitess/go/vt/logutil" 60 "vitess.io/vitess/go/vt/mysqlctl" 61 querypb "vitess.io/vitess/go/vt/proto/query" 62 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 63 "vitess.io/vitess/go/vt/servenv" 64 "vitess.io/vitess/go/vt/topo" 65 "vitess.io/vitess/go/vt/topo/topoproto" 66 "vitess.io/vitess/go/vt/topotools" 67 "vitess.io/vitess/go/vt/vtctl/reparentutil" 68 "vitess.io/vitess/go/vt/vterrors" 69 "vitess.io/vitess/go/vt/vttablet/tabletmanager/vdiff" 70 "vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication" 71 "vitess.io/vitess/go/vt/vttablet/tabletserver" 72 ) 73 74 // Query rules from denylist 75 const denyListQueryList string = "DenyListQueryRules" 76 77 var ( 78 // The following flags initialize the tablet record. 79 tabletHostname string 80 initKeyspace string 81 initShard string 82 initTabletType string 83 initDbNameOverride string 84 skipBuildInfoTags = "/.*/" 85 initTags flagutil.StringMapValue 86 87 initPopulateMetadata bool 88 initTimeout = 1 * time.Minute 89 ) 90 91 func registerInitFlags(fs *pflag.FlagSet) { 92 fs.StringVar(&tabletHostname, "tablet_hostname", tabletHostname, "if not empty, this hostname will be assumed instead of trying to resolve it") 93 fs.StringVar(&initKeyspace, "init_keyspace", initKeyspace, "(init parameter) keyspace to use for this tablet") 94 fs.StringVar(&initShard, "init_shard", initShard, "(init parameter) shard to use for this tablet") 95 fs.StringVar(&initTabletType, "init_tablet_type", initTabletType, "(init parameter) the tablet type to use for this tablet.") 96 fs.StringVar(&initDbNameOverride, "init_db_name_override", initDbNameOverride, "(init parameter) override the name of the db used by vttablet. Without this flag, the db name defaults to vt_<keyspacename>") 97 fs.StringVar(&skipBuildInfoTags, "vttablet_skip_buildinfo_tags", skipBuildInfoTags, "comma-separated list of buildinfo tags to skip from merging with --init_tags. each tag is either an exact match or a regular expression of the form '/regexp/'.") 98 fs.Var(&initTags, "init_tags", "(init parameter) comma separated list of key:value pairs used to tag the tablet") 99 100 fs.BoolVar(&initPopulateMetadata, "init_populate_metadata", initPopulateMetadata, "(init parameter) populate metadata tables even if restore_from_backup is disabled. If restore_from_backup is enabled, metadata tables are always populated regardless of this flag.") 101 fs.MarkDeprecated("init_populate_metadata", "this flag is no longer being used and will be removed in future versions") 102 fs.DurationVar(&initTimeout, "init_timeout", initTimeout, "(init parameter) timeout to use for the init phase.") 103 } 104 105 var ( 106 // statsTabletType is set to expose the current tablet type. 107 statsTabletType *stats.String 108 109 // statsTabletTypeCount exposes the current tablet type as a label, 110 // with the value counting the occurrences of the respective tablet type. 111 // Useful for Prometheus which doesn't support exporting strings as stat values. 112 statsTabletTypeCount *stats.CountersWithSingleLabel 113 114 // statsBackupIsRunning is set to 1 (true) if a backup is running. 115 statsBackupIsRunning *stats.GaugesWithMultiLabels 116 117 // statsIsInSrvKeyspace is set to 1 (true), 0 (false) whether the tablet is in the serving keyspace 118 statsIsInSrvKeyspace *stats.Gauge 119 120 statsKeyspace = stats.NewString("TabletKeyspace") 121 statsShard = stats.NewString("TabletShard") 122 statsKeyRangeStart = stats.NewString("TabletKeyRangeStart") 123 statsKeyRangeEnd = stats.NewString("TabletKeyRangeEnd") 124 statsAlias = stats.NewString("TabletAlias") 125 126 // The following variables can be changed to speed up tests. 127 mysqlPortRetryInterval = 1 * time.Second 128 rebuildKeyspaceRetryInterval = 1 * time.Second 129 ) 130 131 func init() { 132 servenv.OnParseFor("vtcombo", registerInitFlags) 133 servenv.OnParseFor("vttablet", registerInitFlags) 134 135 statsTabletType = stats.NewString("TabletType") 136 statsTabletTypeCount = stats.NewCountersWithSingleLabel("TabletTypeCount", "Number of times the tablet changed to the labeled type", "type") 137 statsBackupIsRunning = stats.NewGaugesWithMultiLabels("BackupIsRunning", "Whether a backup is running", []string{"mode"}) 138 statsIsInSrvKeyspace = stats.NewGauge("IsInSrvKeyspace", "Whether the vttablet is in the serving keyspace (1 = true / 0 = false)") 139 } 140 141 // TabletManager is the main class for the tablet manager. 142 type TabletManager struct { 143 // The following fields are set during creation 144 BatchCtx context.Context 145 TopoServer *topo.Server 146 Cnf *mysqlctl.Mycnf 147 MysqlDaemon mysqlctl.MysqlDaemon 148 DBConfigs *dbconfigs.DBConfigs 149 QueryServiceControl tabletserver.Controller 150 UpdateStream binlog.UpdateStreamControl 151 VREngine *vreplication.Engine 152 VDiffEngine *vdiff.Engine 153 154 // tmState manages the TabletManager state. 155 tmState *tmState 156 157 // tabletAlias is saved away from tablet for read-only access 158 tabletAlias *topodatapb.TabletAlias 159 160 // baseTabletType is the tablet type we revert back to 161 // when we transition back from something like PRIMARY. 162 baseTabletType topodatapb.TabletType 163 164 // actionSema is there to run only one action at a time. 165 // This semaphore can be held for long periods of time (hours), 166 // like in the case of a restore. This semaphore must be obtained 167 // first before other mutexes. 168 actionSema *sync2.Semaphore 169 170 // mutex protects all the following fields (that start with '_'), 171 // only hold the mutex to update the fields, nothing else. 172 mutex sync.Mutex 173 174 // _shardSyncChan is a channel for informing the shard sync goroutine that 175 // it should wake up and recheck the tablet state, to make sure it and the 176 // shard record are in sync. 177 // 178 // Call tm.notifyShardSync() instead of sending directly to this channel. 179 _shardSyncChan chan struct{} 180 181 // _shardSyncDone is a channel for waiting until the shard sync goroutine 182 // has really finished after _shardSyncCancel was called. 183 _shardSyncDone chan struct{} 184 185 // _shardSyncCancel is the function to stop the background shard sync goroutine. 186 _shardSyncCancel context.CancelFunc 187 188 // _rebuildKeyspaceDone is a channel for waiting until the current keyspace 189 // has been rebuilt 190 _rebuildKeyspaceDone chan struct{} 191 192 // _rebuildKeyspaceCancel is the function to stop a keyspace rebuild currently 193 // in progress 194 _rebuildKeyspaceCancel context.CancelFunc 195 196 // _lockTablesConnection is used to get and release the table read locks to pause replication 197 _lockTablesConnection *dbconnpool.DBConnection 198 _lockTablesTimer *time.Timer 199 // _isBackupRunning tells us whether there is a backup that is currently running 200 _isBackupRunning bool 201 } 202 203 // BuildTabletFromInput builds a tablet record from input parameters. 204 func BuildTabletFromInput(alias *topodatapb.TabletAlias, port, grpcPort int32, dbServerVersion string, db *dbconfigs.DBConfigs) (*topodatapb.Tablet, error) { 205 hostname := tabletHostname 206 if hostname == "" { 207 var err error 208 hostname, err = netutil.FullyQualifiedHostname() 209 if err != nil { 210 return nil, err 211 } 212 log.Infof("Using detected machine hostname: %v, to change this, fix your machine network configuration or override it with --tablet_hostname. Tablet %s", hostname, alias.String()) 213 } else { 214 log.Infof("Using hostname: %v from --tablet_hostname flag. Tablet %s", hostname, alias.String()) 215 } 216 217 if initKeyspace == "" || initShard == "" { 218 return nil, fmt.Errorf("init_keyspace and init_shard must be specified") 219 } 220 221 // parse and validate shard name 222 shard, keyRange, err := topo.ValidateShardName(initShard) 223 if err != nil { 224 return nil, vterrors.Wrapf(err, "cannot validate shard name %v", initShard) 225 } 226 227 tabletType, err := topoproto.ParseTabletType(initTabletType) 228 if err != nil { 229 return nil, err 230 } 231 switch tabletType { 232 case topodatapb.TabletType_SPARE, topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY: 233 default: 234 return nil, fmt.Errorf("invalid init_tablet_type %v; can only be REPLICA, RDONLY or SPARE", tabletType) 235 } 236 237 buildTags, err := getBuildTags(servenv.AppVersion.ToStringMap(), skipBuildInfoTags) 238 if err != nil { 239 return nil, err 240 } 241 242 var charset uint8 243 if db != nil && db.Charset != "" { 244 charset, err = collations.Local().ParseConnectionCharset(db.Charset) 245 if err != nil { 246 return nil, err 247 } 248 } else { 249 charset = collations.Local().DefaultConnectionCharset() 250 } 251 252 return &topodatapb.Tablet{ 253 Alias: alias, 254 Hostname: hostname, 255 PortMap: map[string]int32{ 256 "vt": port, 257 "grpc": grpcPort, 258 }, 259 Keyspace: initKeyspace, 260 Shard: shard, 261 KeyRange: keyRange, 262 Type: tabletType, 263 DbNameOverride: initDbNameOverride, 264 Tags: mergeTags(buildTags, initTags), 265 DbServerVersion: dbServerVersion, 266 DefaultConnCollation: uint32(charset), 267 }, nil 268 } 269 270 func getBuildTags(buildTags map[string]string, skipTagsCSV string) (map[string]string, error) { 271 if skipTagsCSV == "" { 272 return buildTags, nil 273 } 274 275 skipTags := strings.Split(skipTagsCSV, ",") 276 skippers := make([]func(string) bool, len(skipTags)) 277 for i, skipTag := range skipTags { 278 skipTag := skipTag // copy to preserve iteration scope in the closures below 279 if strings.HasPrefix(skipTag, "/") && strings.HasSuffix(skipTag, "/") && len(skipTag) > 1 { 280 // regexp mode 281 tagRegexp, err := regexp.Compile(skipTag[1 : len(skipTag)-1]) 282 if err != nil { 283 return nil, err 284 } 285 286 skippers[i] = func(s string) bool { 287 return tagRegexp.MatchString(s) 288 } 289 } else { 290 skippers[i] = func(s string) bool { 291 log.Warningf(skipTag) 292 return s == skipTag 293 } 294 } 295 } 296 297 skippedTags := sets.New[string]() 298 for tag := range buildTags { 299 for _, skipFn := range skippers { 300 if skipFn(tag) { 301 skippedTags.Insert(tag) 302 break 303 } 304 } 305 } 306 307 result := make(map[string]string, len(buildTags)-skippedTags.Len()) 308 for tag, val := range buildTags { 309 if skippedTags.Has(tag) { 310 continue 311 } 312 313 result[tag] = val 314 } 315 316 return result, nil 317 } 318 319 func mergeTags(a, b map[string]string) map[string]string { 320 maxCap := len(a) 321 if x := len(b); x > maxCap { 322 maxCap = x 323 } 324 325 result := make(map[string]string, maxCap) 326 for k, v := range a { 327 result[k] = v 328 } 329 330 for k, v := range b { 331 result[k] = v 332 } 333 334 return result 335 } 336 337 // Start starts the TabletManager. 338 func (tm *TabletManager) Start(tablet *topodatapb.Tablet, healthCheckInterval time.Duration) error { 339 defer func() { 340 log.Infof("TabletManager Start took ~%d ms", time.Since(servenv.GetInitStartTime()).Milliseconds()) 341 }() 342 log.Infof("TabletManager Start") 343 tm.DBConfigs.DBName = topoproto.TabletDbName(tablet) 344 tm.tabletAlias = tablet.Alias 345 tm.tmState = newTMState(tm, tablet) 346 tm.actionSema = sync2.NewSemaphore(1, 0) 347 348 tm.baseTabletType = tablet.Type 349 350 ctx, cancel := context.WithTimeout(tm.BatchCtx, initTimeout) 351 defer cancel() 352 si, err := tm.createKeyspaceShard(ctx) 353 if err != nil { 354 return err 355 } 356 if err := tm.checkPrimaryShip(ctx, si); err != nil { 357 return err 358 } 359 if err := tm.checkMysql(ctx); err != nil { 360 return err 361 } 362 if err := tm.initTablet(ctx); err != nil { 363 return err 364 } 365 366 err = tm.QueryServiceControl.InitDBConfig(&querypb.Target{ 367 Keyspace: tablet.Keyspace, 368 Shard: tablet.Shard, 369 TabletType: tablet.Type, 370 }, tm.DBConfigs, tm.MysqlDaemon) 371 if err != nil { 372 return vterrors.Wrap(err, "failed to InitDBConfig") 373 } 374 tm.QueryServiceControl.RegisterQueryRuleSource(denyListQueryList) 375 376 if tm.UpdateStream != nil { 377 tm.UpdateStream.InitDBConfig(tm.DBConfigs) 378 servenv.OnRun(tm.UpdateStream.RegisterService) 379 servenv.OnTerm(tm.UpdateStream.Disable) 380 } 381 382 if tm.VREngine != nil { 383 tm.VREngine.InitDBConfig(tm.DBConfigs) 384 servenv.OnTerm(tm.VREngine.Close) 385 } 386 387 if tm.VDiffEngine != nil { 388 tm.VDiffEngine.InitDBConfig(tm.DBConfigs) 389 servenv.OnTerm(tm.VDiffEngine.Close) 390 } 391 392 // The following initializations don't need to be done 393 // in any specific order. 394 tm.startShardSync() 395 tm.exportStats() 396 servenv.OnRun(tm.registerTabletManager) 397 398 restoring, err := tm.handleRestore(tm.BatchCtx) 399 if err != nil { 400 return err 401 } 402 if restoring { 403 // If restore was triggered, it will take care 404 // of updating the tablet state and initializing replication. 405 return nil 406 } 407 // We should be re-read the tablet from tabletManager and use the type specified there. 408 // We shouldn't use the base tablet type directly, since the type could have changed to PRIMARY 409 // earlier in tm.checkPrimaryShip code. 410 _, err = tm.initializeReplication(ctx, tm.Tablet().Type) 411 tm.tmState.Open() 412 return err 413 } 414 415 // Close prepares a tablet for shutdown. First we check our tablet ownership and 416 // then prune the tablet topology entry of all post-init fields. This prevents 417 // stale identifiers from hanging around in topology. 418 func (tm *TabletManager) Close() { 419 // Stop the shard sync loop and wait for it to exit. We do this in Close() 420 // rather than registering it as an OnTerm hook so the shard sync loop keeps 421 // running during lame duck. 422 tm.stopShardSync() 423 tm.stopRebuildKeyspace() 424 425 // cleanup initialized fields in the tablet entry 426 f := func(tablet *topodatapb.Tablet) error { 427 if err := topotools.CheckOwnership(tm.Tablet(), tablet); err != nil { 428 return err 429 } 430 tablet.Hostname = "" 431 tablet.MysqlHostname = "" 432 tablet.PortMap = nil 433 return nil 434 } 435 436 updateCtx, updateCancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout) 437 defer updateCancel() 438 439 if _, err := tm.TopoServer.UpdateTabletFields(updateCtx, tm.tabletAlias, f); err != nil { 440 log.Warningf("Failed to update tablet record, may contain stale identifiers: %v", err) 441 } 442 443 tm.tmState.Close() 444 } 445 446 // Stop shuts down the tm. Normally this is not necessary, since we use 447 // servenv OnTerm and OnClose hooks to coordinate shutdown automatically, 448 // while taking lameduck into account. However, this may be useful for tests, 449 // when you want to clean up an tm immediately. 450 func (tm *TabletManager) Stop() { 451 // Stop the shard sync loop and wait for it to exit. This needs to be done 452 // here in addition to in Close() because tests do not call Close(). 453 tm.stopShardSync() 454 tm.stopRebuildKeyspace() 455 456 if tm.UpdateStream != nil { 457 tm.UpdateStream.Disable() 458 } 459 460 if tm.VREngine != nil { 461 tm.VREngine.Close() 462 } 463 464 if tm.VDiffEngine != nil { 465 tm.VDiffEngine.Close() 466 } 467 468 tm.MysqlDaemon.Close() 469 tm.tmState.Close() 470 } 471 472 func (tm *TabletManager) createKeyspaceShard(ctx context.Context) (*topo.ShardInfo, error) { 473 // mutex is needed because we set _shardInfo and _srvKeyspace 474 tm.mutex.Lock() 475 defer tm.mutex.Unlock() 476 477 tablet := tm.Tablet() 478 log.Infof("Reading/creating keyspace and shard records for %v/%v", tablet.Keyspace, tablet.Shard) 479 480 // Read the shard, create it if necessary. 481 var shardInfo *topo.ShardInfo 482 if err := tm.withRetry(ctx, "creating keyspace and shard", func() error { 483 var err error 484 shardInfo, err = tm.TopoServer.GetOrCreateShard(ctx, tablet.Keyspace, tablet.Shard) 485 return err 486 }); err != nil { 487 return nil, vterrors.Wrap(err, "createKeyspaceShard: cannot GetOrCreateShard shard") 488 } 489 tm.tmState.RefreshFromTopoInfo(ctx, shardInfo, nil) 490 491 // Rebuild keyspace if this the first tablet in this keyspace/cell 492 srvKeyspace, err := tm.TopoServer.GetSrvKeyspace(ctx, tm.tabletAlias.Cell, tablet.Keyspace) 493 switch { 494 case err == nil: 495 tm.tmState.RefreshFromTopoInfo(ctx, nil, srvKeyspace) 496 case topo.IsErrType(err, topo.NoNode): 497 var rebuildKsCtx context.Context 498 rebuildKsCtx, tm._rebuildKeyspaceCancel = context.WithCancel(tm.BatchCtx) 499 tm._rebuildKeyspaceDone = make(chan struct{}) 500 go tm.rebuildKeyspace(rebuildKsCtx, tm._rebuildKeyspaceDone, tablet.Keyspace, rebuildKeyspaceRetryInterval) 501 default: 502 return nil, vterrors.Wrap(err, "initeKeyspaceShardTopo: failed to read SrvKeyspace") 503 } 504 505 // Rebuild vschema graph if this is the first tablet in this keyspace/cell. 506 srvVSchema, err := tm.TopoServer.GetSrvVSchema(ctx, tm.tabletAlias.Cell) 507 switch { 508 case err == nil: 509 // Check if vschema was rebuilt after the initial creation of the keyspace. 510 if _, keyspaceExists := srvVSchema.GetKeyspaces()[tablet.Keyspace]; !keyspaceExists { 511 if err := tm.TopoServer.RebuildSrvVSchema(ctx, []string{tm.tabletAlias.Cell}); err != nil { 512 return nil, vterrors.Wrap(err, "initeKeyspaceShardTopo: failed to RebuildSrvVSchema") 513 } 514 } 515 case topo.IsErrType(err, topo.NoNode): 516 // There is no SrvSchema in this cell at all, so we definitely need to rebuild. 517 if err := tm.TopoServer.RebuildSrvVSchema(ctx, []string{tm.tabletAlias.Cell}); err != nil { 518 return nil, vterrors.Wrap(err, "initeKeyspaceShardTopo: failed to RebuildSrvVSchema") 519 } 520 default: 521 return nil, vterrors.Wrap(err, "initeKeyspaceShardTopo: failed to read SrvVSchema") 522 } 523 return shardInfo, nil 524 } 525 526 func (tm *TabletManager) stopRebuildKeyspace() { 527 var doneChan <-chan struct{} 528 529 tm.mutex.Lock() 530 if tm._rebuildKeyspaceCancel != nil { 531 tm._rebuildKeyspaceCancel() 532 } 533 doneChan = tm._rebuildKeyspaceDone 534 tm.mutex.Unlock() 535 536 if doneChan != nil { 537 <-doneChan 538 } 539 } 540 541 func (tm *TabletManager) rebuildKeyspace(ctx context.Context, done chan<- struct{}, keyspace string, retryInterval time.Duration) { 542 var srvKeyspace *topodatapb.SrvKeyspace 543 544 defer func() { 545 log.Infof("Keyspace rebuilt: %v", keyspace) 546 if ctx.Err() == nil { 547 tm.tmState.RefreshFromTopoInfo(tm.BatchCtx, nil, srvKeyspace) 548 } 549 close(done) 550 }() 551 552 // RebuildKeyspace will fail until at least one tablet is up for every shard. 553 firstTime := true 554 var err error 555 for { 556 if ctx.Err() != nil { 557 return 558 } 559 if !firstTime { 560 // If keyspace was rebuilt by someone else, we can just exit. 561 srvKeyspace, err = tm.TopoServer.GetSrvKeyspace(ctx, tm.tabletAlias.Cell, keyspace) 562 if err == nil || ctx.Err() != nil { 563 return 564 } 565 } 566 err = topotools.RebuildKeyspace(ctx, logutil.NewConsoleLogger(), tm.TopoServer, keyspace, []string{tm.tabletAlias.Cell}, false) 567 if err == nil { 568 srvKeyspace, err = tm.TopoServer.GetSrvKeyspace(ctx, tm.tabletAlias.Cell, keyspace) 569 if err == nil || ctx.Err() != nil { 570 return 571 } 572 } 573 if firstTime { 574 log.Warningf("rebuildKeyspace failed, will retry every %v: %v", retryInterval, err) 575 } 576 firstTime = false 577 time.Sleep(retryInterval) 578 } 579 } 580 581 func (tm *TabletManager) checkPrimaryShip(ctx context.Context, si *topo.ShardInfo) error { 582 if si.PrimaryAlias != nil && topoproto.TabletAliasEqual(si.PrimaryAlias, tm.tabletAlias) { 583 // We're marked as primary in the shard record, which could mean the primary 584 // tablet process was just restarted. However, we need to check if a new 585 // primary is in the process of taking over. In that case, it will let us 586 // know by forcibly updating the old primary's tablet record. 587 oldTablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias) 588 switch { 589 case topo.IsErrType(err, topo.NoNode): 590 // There's no existing tablet record, so we can assume 591 // no one has left us a message to step down. 592 log.Infof("Shard primary alias matches, but there is no existing tablet record. Switching to primary with 'Now' as time") 593 tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) { 594 tablet.Type = topodatapb.TabletType_PRIMARY 595 // Update the primary term start time (current value is 0) because we 596 // assume that we are actually the PRIMARY and in case of a tiebreak, 597 // vtgate should prefer us. 598 tablet.PrimaryTermStartTime = logutil.TimeToProto(time.Now()) 599 }) 600 case err == nil: 601 if oldTablet.Type == topodatapb.TabletType_PRIMARY { 602 log.Infof("Shard primary alias matches, and existing tablet agrees. Switching to primary with tablet's primary term start time: %v", oldTablet.PrimaryTermStartTime) 603 // We're marked as primary in the shard record, 604 // and our existing tablet record agrees. 605 tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) { 606 tablet.Type = topodatapb.TabletType_PRIMARY 607 tablet.PrimaryTermStartTime = oldTablet.PrimaryTermStartTime 608 }) 609 } else { 610 log.Warningf("Shard primary alias matches, but existing tablet is not primary. Switching from %v to primary with the shard's primary term start time: %v", oldTablet.Type, si.PrimaryTermStartTime) 611 tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) { 612 tablet.Type = topodatapb.TabletType_PRIMARY 613 tablet.PrimaryTermStartTime = si.PrimaryTermStartTime 614 }) 615 } 616 default: 617 return vterrors.Wrap(err, "InitTablet failed to read existing tablet record") 618 } 619 } else { 620 oldTablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias) 621 switch { 622 case topo.IsErrType(err, topo.NoNode): 623 // There's no existing tablet record, so there is nothing to do 624 case err == nil: 625 if oldTablet.Type == topodatapb.TabletType_PRIMARY { 626 // Our existing tablet type is primary, but the shard record does not agree. 627 // Only take over if our primary_term_start_time is after what is in the shard record 628 oldPrimaryTermStartTime := oldTablet.GetPrimaryTermStartTime() 629 currentShardTime := si.GetPrimaryTermStartTime() 630 if oldPrimaryTermStartTime.After(currentShardTime) { 631 log.Infof("Shard primary alias does not match, but the tablet's primary term start time is newer. Switching to primary with tablet's primary term start time: %v", oldTablet.PrimaryTermStartTime) 632 tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) { 633 tablet.Type = topodatapb.TabletType_PRIMARY 634 tablet.PrimaryTermStartTime = oldTablet.PrimaryTermStartTime 635 }) 636 } else { 637 log.Infof("Existing tablet type is primary, but the shard record has a different primary with a newer timestamp. Remaining a replica") 638 } 639 } 640 default: 641 return vterrors.Wrap(err, "InitTablet failed to read existing tablet record") 642 } 643 } 644 return nil 645 } 646 647 func (tm *TabletManager) checkMysql(ctx context.Context) error { 648 appConfig, err := tm.DBConfigs.AppWithDB().MysqlParams() 649 if err != nil { 650 return err 651 } 652 if appConfig.Host != "" { 653 tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) { 654 tablet.MysqlHostname = appConfig.Host 655 tablet.MysqlPort = int32(appConfig.Port) 656 }) 657 } else { 658 // Assume unix socket was specified and try to get the port from mysqld 659 tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) { 660 tablet.MysqlHostname = tablet.Hostname 661 }) 662 mysqlPort, err := tm.MysqlDaemon.GetMysqlPort() 663 if err != nil { 664 log.Warningf("Cannot get current mysql port, will keep retrying every %v: %v", mysqlPortRetryInterval, err) 665 go tm.findMysqlPort(mysqlPortRetryInterval) 666 } else { 667 tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) { 668 tablet.MysqlPort = mysqlPort 669 }) 670 } 671 } 672 return nil 673 } 674 675 func (tm *TabletManager) findMysqlPort(retryInterval time.Duration) { 676 for { 677 time.Sleep(retryInterval) 678 mport, err := tm.MysqlDaemon.GetMysqlPort() 679 if err != nil { 680 continue 681 } 682 log.Infof("Identified mysql port: %v", mport) 683 tm.tmState.SetMysqlPort(mport) 684 return 685 } 686 } 687 688 func (tm *TabletManager) initTablet(ctx context.Context) error { 689 tablet := tm.Tablet() 690 err := tm.TopoServer.CreateTablet(ctx, tablet) 691 switch { 692 case err == nil: 693 // It worked, we're good. 694 case topo.IsErrType(err, topo.NodeExists): 695 // The node already exists, will just try to update 696 // it. So we read it first. 697 oldTablet, err := tm.TopoServer.GetTablet(ctx, tablet.Alias) 698 if err != nil { 699 return vterrors.Wrap(err, "initTablet failed to read existing tablet record") 700 } 701 702 // Sanity check the keyspace and shard 703 if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { 704 return fmt.Errorf("initTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) 705 } 706 707 // Update ShardReplication in any case, to be sure. This is 708 // meant to fix the case when a Tablet record was created, but 709 // then the ShardReplication record was not (because for 710 // instance of a startup timeout). Upon running this code 711 // again, we want to fix ShardReplication. 712 if updateErr := topo.UpdateTabletReplicationData(ctx, tm.TopoServer, tablet); updateErr != nil { 713 return vterrors.Wrap(updateErr, "UpdateTabletReplicationData failed") 714 } 715 716 // Then overwrite everything, ignoring version mismatch. 717 if err := tm.TopoServer.UpdateTablet(ctx, topo.NewTabletInfo(tablet, nil)); err != nil { 718 return vterrors.Wrap(err, "UpdateTablet failed") 719 } 720 default: 721 return vterrors.Wrap(err, "CreateTablet failed") 722 } 723 return nil 724 } 725 726 func (tm *TabletManager) handleRestore(ctx context.Context) (bool, error) { 727 // Sanity check for inconsistent flags 728 if tm.Cnf == nil && restoreFromBackup { 729 return false, fmt.Errorf("you cannot enable --restore_from_backup without a my.cnf file") 730 } 731 732 // Restore in the background 733 if restoreFromBackup { 734 go func() { 735 // Open the state manager after restore is done. 736 defer tm.tmState.Open() 737 738 // Zero date will cause us to use the latest, which is the default 739 backupTime := time.Time{} 740 741 // Or if a backup timestamp was specified then we use the last backup taken at or before that time 742 if restoreFromBackupTsStr != "" { 743 var err error 744 backupTime, err = time.Parse(mysqlctl.BackupTimestampFormat, restoreFromBackupTsStr) 745 if err != nil { 746 log.Exitf(fmt.Sprintf("RestoreFromBackup failed: unable to parse the backup timestamp value provided of '%s'", restoreFromBackupTsStr)) 747 } 748 } 749 750 // restoreFromBackup will just be a regular action 751 // (same as if it was triggered remotely) 752 if err := tm.RestoreData(ctx, logutil.NewConsoleLogger(), waitForBackupInterval, false /* deleteBeforeRestore */, backupTime); err != nil { 753 log.Exitf("RestoreFromBackup failed: %v", err) 754 } 755 }() 756 return true, nil 757 } 758 759 return false, nil 760 } 761 762 func (tm *TabletManager) exportStats() { 763 tablet := tm.Tablet() 764 statsKeyspace.Set(tablet.Keyspace) 765 statsShard.Set(tablet.Shard) 766 statsTabletType.Set(topoproto.TabletTypeLString(tm.tmState.tablet.Type)) 767 statsTabletTypeCount.Add(topoproto.TabletTypeLString(tm.tmState.tablet.Type), 1) 768 if key.KeyRangeIsPartial(tablet.KeyRange) { 769 statsKeyRangeStart.Set(hex.EncodeToString(tablet.KeyRange.Start)) 770 statsKeyRangeEnd.Set(hex.EncodeToString(tablet.KeyRange.End)) 771 } 772 statsAlias.Set(topoproto.TabletAliasString(tablet.Alias)) 773 } 774 775 // withRetry will exponentially back off and retry a function upon 776 // failure, until the context is Done(), or the function returned with 777 // no error. We use this at startup with a context timeout set to the 778 // value of the init_timeout flag, so we can try to modify the 779 // topology over a longer period instead of dying right away. 780 func (tm *TabletManager) withRetry(ctx context.Context, description string, work func() error) error { 781 backoff := 1 * time.Second 782 for { 783 err := work() 784 if err == nil || err == context.Canceled || err == context.DeadlineExceeded { 785 return err 786 } 787 788 log.Warningf("%v failed (%v), backing off %v before retrying", description, err, backoff) 789 select { 790 case <-ctx.Done(): 791 return ctx.Err() 792 case <-time.After(backoff): 793 // Exponential backoff with 1.3 as a factor, 794 // and randomized down by at most 20 795 // percent. The generated time series looks 796 // good. Also note rand.Seed is called at 797 // init() time in binlog_players.go. 798 f := float64(backoff) * 1.3 799 f -= f * 0.2 * rand.Float64() 800 backoff = time.Duration(f) 801 } 802 } 803 } 804 805 // Tablet reads the stored Tablet from the tm. 806 func (tm *TabletManager) Tablet() *topodatapb.Tablet { 807 return tm.tmState.Tablet() 808 } 809 810 // DeniedTables returns the list of currently denied tables. 811 func (tm *TabletManager) DeniedTables() []string { 812 return tm.tmState.DeniedTables() 813 } 814 815 // hookExtraEnv returns the map to pass to local hooks 816 func (tm *TabletManager) hookExtraEnv() map[string]string { 817 tablet := tm.Tablet() 818 819 return map[string]string{ 820 "TABLET_ALIAS": topoproto.TabletAliasString(tm.tabletAlias), 821 "KEYSPACE": tablet.Keyspace, 822 "SHARD": tablet.Shard, 823 } 824 } 825 826 // initializeReplication is used to initialize the replication when the tablet starts. 827 // It returns the current primary tablet for use externally 828 func (tm *TabletManager) initializeReplication(ctx context.Context, tabletType topodatapb.TabletType) (primary *topo.TabletInfo, err error) { 829 // If active reparents are disabled, we do not touch replication. 830 // There is nothing to do 831 if mysqlctl.DisableActiveReparents { 832 return nil, nil 833 } 834 835 // If the desired tablet type is primary, then we shouldn't be setting our replication source. 836 // So there is nothing to do. 837 if tabletType == topodatapb.TabletType_PRIMARY { 838 return nil, nil 839 } 840 841 // Read the shard to find the current primary, and its location. 842 tablet := tm.Tablet() 843 si, err := tm.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard) 844 if err != nil { 845 return nil, vterrors.Wrap(err, "cannot read shard") 846 } 847 if si.PrimaryAlias == nil { 848 // There's no primary. This is fine, since there might be no primary currently 849 log.Warningf("cannot start replication during initialization: shard %v/%v has no primary.", tablet.Keyspace, tablet.Shard) 850 return nil, nil 851 } 852 if topoproto.TabletAliasEqual(si.PrimaryAlias, tablet.Alias) { 853 // We used to be the primary before we got restarted, 854 // and no other primary has been elected in the meantime. 855 // There isn't anything to do here either. 856 log.Warningf("cannot start replication during initialization: primary in shard record still points to this tablet.") 857 return nil, nil 858 } 859 currentPrimary, err := tm.TopoServer.GetTablet(ctx, si.PrimaryAlias) 860 if err != nil { 861 return nil, vterrors.Wrapf(err, "cannot read primary tablet %v", si.PrimaryAlias) 862 } 863 864 durabilityName, err := tm.TopoServer.GetKeyspaceDurability(ctx, tablet.Keyspace) 865 if err != nil { 866 return nil, vterrors.Wrapf(err, "cannot read keyspace durability policy %v", tablet.Keyspace) 867 } 868 log.Infof("Getting a new durability policy for %v", durabilityName) 869 durability, err := reparentutil.GetDurabilityPolicy(durabilityName) 870 if err != nil { 871 return nil, vterrors.Wrapf(err, "cannot get durability policy %v", durabilityName) 872 } 873 // If using semi-sync, we need to enable it before connecting to primary. 874 // We should set the correct type, since it is used in replica semi-sync 875 tablet.Type = tabletType 876 if err := tm.fixSemiSync(tabletType, convertBoolToSemiSyncAction(reparentutil.IsReplicaSemiSync(durability, currentPrimary.Tablet, tablet))); err != nil { 877 return nil, err 878 } 879 880 // Set primary and start replication. 881 if currentPrimary.Tablet.MysqlHostname == "" { 882 log.Warningf("primary tablet in the shard record does not have mysql hostname specified, possibly because that tablet has been shut down.") 883 return nil, nil 884 } 885 if err := tm.MysqlDaemon.SetReplicationSource(ctx, currentPrimary.Tablet.MysqlHostname, int(currentPrimary.Tablet.MysqlPort), true /* stopReplicationBefore */, true /* startReplicationAfter */); err != nil { 886 return nil, vterrors.Wrap(err, "MysqlDaemon.SetReplicationSource failed") 887 } 888 889 return currentPrimary, nil 890 }