vitess.io/vitess@v0.16.2/go/vt/vtctl/grpcvtctldserver/server.go (about) 1 /* 2 Copyright 2020 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package grpcvtctldserver 18 19 import ( 20 "context" 21 "encoding/json" 22 "errors" 23 "fmt" 24 "io" 25 "net/http" 26 "path/filepath" 27 "sort" 28 "strings" 29 "sync" 30 "time" 31 32 "google.golang.org/grpc" 33 "google.golang.org/protobuf/proto" 34 "k8s.io/apimachinery/pkg/util/sets" 35 36 "vitess.io/vitess/go/event" 37 "vitess.io/vitess/go/netutil" 38 "vitess.io/vitess/go/protoutil" 39 "vitess.io/vitess/go/sqlescape" 40 "vitess.io/vitess/go/sync2" 41 "vitess.io/vitess/go/trace" 42 "vitess.io/vitess/go/vt/callerid" 43 "vitess.io/vitess/go/vt/concurrency" 44 hk "vitess.io/vitess/go/vt/hook" 45 "vitess.io/vitess/go/vt/key" 46 "vitess.io/vitess/go/vt/log" 47 "vitess.io/vitess/go/vt/logutil" 48 "vitess.io/vitess/go/vt/mysqlctl" 49 "vitess.io/vitess/go/vt/mysqlctl/backupstorage" 50 "vitess.io/vitess/go/vt/mysqlctl/mysqlctlproto" 51 "vitess.io/vitess/go/vt/mysqlctl/tmutils" 52 "vitess.io/vitess/go/vt/schema" 53 "vitess.io/vitess/go/vt/schemamanager" 54 "vitess.io/vitess/go/vt/sqlparser" 55 "vitess.io/vitess/go/vt/topo" 56 "vitess.io/vitess/go/vt/topo/topoproto" 57 "vitess.io/vitess/go/vt/topotools" 58 "vitess.io/vitess/go/vt/topotools/events" 59 "vitess.io/vitess/go/vt/vtctl/reparentutil" 60 "vitess.io/vitess/go/vt/vtctl/schematools" 61 "vitess.io/vitess/go/vt/vtctl/workflow" 62 "vitess.io/vitess/go/vt/vterrors" 63 "vitess.io/vitess/go/vt/vttablet/tmclient" 64 65 logutilpb "vitess.io/vitess/go/vt/proto/logutil" 66 mysqlctlpb "vitess.io/vitess/go/vt/proto/mysqlctl" 67 querypb "vitess.io/vitess/go/vt/proto/query" 68 replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" 69 tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata" 70 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 71 vschemapb "vitess.io/vitess/go/vt/proto/vschema" 72 vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata" 73 vtctlservicepb "vitess.io/vitess/go/vt/proto/vtctlservice" 74 "vitess.io/vitess/go/vt/proto/vtrpc" 75 ) 76 77 const ( 78 initShardPrimaryOperation = "InitShardPrimary" 79 ) 80 81 // VtctldServer implements the Vtctld RPC service protocol. 82 type VtctldServer struct { 83 vtctlservicepb.UnimplementedVtctldServer 84 ts *topo.Server 85 tmc tmclient.TabletManagerClient 86 ws *workflow.Server 87 } 88 89 // NewVtctldServer returns a new VtctldServer for the given topo server. 90 func NewVtctldServer(ts *topo.Server) *VtctldServer { 91 tmc := tmclient.NewTabletManagerClient() 92 93 return &VtctldServer{ 94 ts: ts, 95 tmc: tmc, 96 ws: workflow.NewServer(ts, tmc), 97 } 98 } 99 100 // NewTestVtctldServer returns a new VtctldServer for the given topo server 101 // AND tmclient for use in tests. This should NOT be used in production. 102 func NewTestVtctldServer(ts *topo.Server, tmc tmclient.TabletManagerClient) *VtctldServer { 103 return &VtctldServer{ 104 ts: ts, 105 tmc: tmc, 106 ws: workflow.NewServer(ts, tmc), 107 } 108 } 109 110 func panicHandler(err *error) { 111 if x := recover(); x != nil { 112 *err = fmt.Errorf("uncaught panic: %v", x) 113 } 114 } 115 116 // AddCellInfo is part of the vtctlservicepb.VtctldServer interface. 117 func (s *VtctldServer) AddCellInfo(ctx context.Context, req *vtctldatapb.AddCellInfoRequest) (resp *vtctldatapb.AddCellInfoResponse, err error) { 118 span, ctx := trace.NewSpan(ctx, "VtctldServer.AddCellInfo") 119 defer span.Finish() 120 121 defer panicHandler(&err) 122 123 if req.CellInfo.Root == "" { 124 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "CellInfo.Root must be non-empty") 125 return nil, err 126 } 127 128 span.Annotate("cell", req.Name) 129 span.Annotate("cell_root", req.CellInfo.Root) 130 span.Annotate("cell_address", req.CellInfo.ServerAddress) 131 132 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 133 defer cancel() 134 135 if err = s.ts.CreateCellInfo(ctx, req.Name, req.CellInfo); err != nil { 136 return nil, err 137 } 138 139 return &vtctldatapb.AddCellInfoResponse{}, nil 140 } 141 142 // AddCellsAlias is part of the vtctlservicepb.VtctldServer interface. 143 func (s *VtctldServer) AddCellsAlias(ctx context.Context, req *vtctldatapb.AddCellsAliasRequest) (resp *vtctldatapb.AddCellsAliasResponse, err error) { 144 span, ctx := trace.NewSpan(ctx, "VtctldServer.AddCellsAlias") 145 defer span.Finish() 146 147 defer panicHandler(&err) 148 149 span.Annotate("cells_alias", req.Name) 150 span.Annotate("cells", strings.Join(req.Cells, ",")) 151 152 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 153 defer cancel() 154 155 if err = s.ts.CreateCellsAlias(ctx, req.Name, &topodatapb.CellsAlias{Cells: req.Cells}); err != nil { 156 return nil, err 157 } 158 159 return &vtctldatapb.AddCellsAliasResponse{}, nil 160 } 161 162 // ApplyRoutingRules is part of the vtctlservicepb.VtctldServer interface. 163 func (s *VtctldServer) ApplyRoutingRules(ctx context.Context, req *vtctldatapb.ApplyRoutingRulesRequest) (resp *vtctldatapb.ApplyRoutingRulesResponse, err error) { 164 span, ctx := trace.NewSpan(ctx, "VtctldServer.ApplyRoutingRules") 165 defer span.Finish() 166 167 defer panicHandler(&err) 168 169 span.Annotate("skip_rebuild", req.SkipRebuild) 170 span.Annotate("rebuild_cells", strings.Join(req.RebuildCells, ",")) 171 172 if err = s.ts.SaveRoutingRules(ctx, req.RoutingRules); err != nil { 173 return nil, err 174 } 175 176 resp = &vtctldatapb.ApplyRoutingRulesResponse{} 177 178 if req.SkipRebuild { 179 log.Warningf("Skipping rebuild of SrvVSchema, will need to run RebuildVSchemaGraph for changes to take effect") 180 return resp, nil 181 } 182 183 if err = s.ts.RebuildSrvVSchema(ctx, req.RebuildCells); err != nil { 184 err = vterrors.Wrapf(err, "RebuildSrvVSchema(%v) failed: %v", req.RebuildCells, err) 185 return nil, err 186 } 187 188 return resp, nil 189 } 190 191 // ApplyShardRoutingRules is part of the vtctlservicepb.VtctldServer interface. 192 func (s *VtctldServer) ApplyShardRoutingRules(ctx context.Context, req *vtctldatapb.ApplyShardRoutingRulesRequest) (*vtctldatapb.ApplyShardRoutingRulesResponse, error) { 193 span, ctx := trace.NewSpan(ctx, "VtctldServer.ApplyShardRoutingRules") 194 defer span.Finish() 195 196 span.Annotate("skip_rebuild", req.SkipRebuild) 197 span.Annotate("rebuild_cells", strings.Join(req.RebuildCells, ",")) 198 199 if err := s.ts.SaveShardRoutingRules(ctx, req.ShardRoutingRules); err != nil { 200 return nil, err 201 } 202 203 resp := &vtctldatapb.ApplyShardRoutingRulesResponse{} 204 205 if req.SkipRebuild { 206 log.Warningf("Skipping rebuild of SrvVSchema as requested, you will need to run RebuildVSchemaGraph for changes to take effect") 207 return resp, nil 208 } 209 210 if err := s.ts.RebuildSrvVSchema(ctx, req.RebuildCells); err != nil { 211 return nil, vterrors.Wrapf(err, "RebuildSrvVSchema(%v) failed: %v", req.RebuildCells, err) 212 } 213 214 return resp, nil 215 } 216 217 // ApplySchema is part of the vtctlservicepb.VtctldServer interface. 218 func (s *VtctldServer) ApplySchema(ctx context.Context, req *vtctldatapb.ApplySchemaRequest) (resp *vtctldatapb.ApplySchemaResponse, err error) { 219 span, ctx := trace.NewSpan(ctx, "VtctldServer.ApplySchema") 220 defer span.Finish() 221 222 defer panicHandler(&err) 223 224 span.Annotate("keyspace", req.Keyspace) 225 span.Annotate("skip_preflight", req.SkipPreflight) 226 span.Annotate("ddl_strategy", req.DdlStrategy) 227 228 if len(req.Sql) == 0 { 229 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "Sql must be a non-empty array") 230 return nil, err 231 } 232 233 // Attach the callerID as the EffectiveCallerID. 234 if req.CallerId != nil { 235 span.Annotate("caller_id", req.CallerId.Principal) 236 ctx = callerid.NewContext(ctx, req.CallerId, &querypb.VTGateCallerID{Username: req.CallerId.Principal}) 237 } 238 239 executionUUID, err := schema.CreateUUID() 240 if err != nil { 241 err = vterrors.Wrapf(err, "unable to create execution UUID") 242 return resp, err 243 } 244 245 migrationContext := req.MigrationContext 246 if migrationContext == "" { 247 migrationContext = fmt.Sprintf("vtctl:%s", executionUUID) 248 } 249 250 waitReplicasTimeout, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout) 251 if err != nil { 252 err = vterrors.Wrapf(err, "unable to parse WaitReplicasTimeout into a valid duration") 253 return nil, err 254 } else if !ok { 255 waitReplicasTimeout = time.Second * 30 256 } 257 258 m := sync.RWMutex{} 259 logstream := []*logutilpb.Event{} 260 logger := logutil.NewCallbackLogger(func(e *logutilpb.Event) { 261 m.Lock() 262 defer m.Unlock() 263 264 logstream = append(logstream, e) 265 }) 266 267 executor := schemamanager.NewTabletExecutor(migrationContext, s.ts, s.tmc, logger, waitReplicasTimeout) 268 if req.AllowLongUnavailability { 269 executor.AllowBigSchemaChange() 270 } 271 if req.SkipPreflight { 272 executor.SkipPreflight() 273 } 274 275 if err = executor.SetDDLStrategy(req.DdlStrategy); err != nil { 276 err = vterrors.Wrapf(err, "invalid DdlStrategy: %s", req.DdlStrategy) 277 return resp, err 278 } 279 280 if len(req.UuidList) > 0 { 281 if err = executor.SetUUIDList(req.UuidList); err != nil { 282 err = vterrors.Wrapf(err, "invalid UuidList: %s", req.UuidList) 283 return resp, err 284 } 285 } 286 287 execResult, err := schemamanager.Run( 288 ctx, 289 schemamanager.NewPlainController(req.Sql, req.Keyspace), 290 executor, 291 ) 292 293 if err != nil { 294 return &vtctldatapb.ApplySchemaResponse{}, err 295 } 296 297 return &vtctldatapb.ApplySchemaResponse{ 298 UuidList: execResult.UUIDs, 299 }, err 300 } 301 302 // ApplyVSchema is part of the vtctlservicepb.VtctldServer interface. 303 func (s *VtctldServer) ApplyVSchema(ctx context.Context, req *vtctldatapb.ApplyVSchemaRequest) (resp *vtctldatapb.ApplyVSchemaResponse, err error) { 304 span, ctx := trace.NewSpan(ctx, "VtctldServer.ApplyVSchema") 305 defer span.Finish() 306 307 defer panicHandler(&err) 308 309 span.Annotate("keyspace", req.Keyspace) 310 span.Annotate("cells", strings.Join(req.Cells, ",")) 311 span.Annotate("skip_rebuild", req.SkipRebuild) 312 span.Annotate("dry_run", req.DryRun) 313 314 if _, err = s.ts.GetKeyspace(ctx, req.Keyspace); err != nil { 315 if topo.IsErrType(err, topo.NoNode) { 316 err = vterrors.Wrapf(err, "keyspace(%s) doesn't exist, check if the keyspace is initialized", req.Keyspace) 317 } else { 318 err = vterrors.Wrapf(err, "GetKeyspace(%s)", req.Keyspace) 319 } 320 321 return nil, err 322 } 323 324 if (req.Sql != "" && req.VSchema != nil) || (req.Sql == "" && req.VSchema == nil) { 325 err = vterrors.New(vtrpc.Code_INVALID_ARGUMENT, "must pass exactly one of req.VSchema and req.Sql") 326 return nil, err 327 } 328 329 var vs *vschemapb.Keyspace 330 331 if req.Sql != "" { 332 span.Annotate("sql_mode", true) 333 334 var stmt sqlparser.Statement 335 stmt, err = sqlparser.Parse(req.Sql) 336 if err != nil { 337 err = vterrors.Wrapf(err, "Parse(%s)", req.Sql) 338 return nil, err 339 } 340 ddl, ok := stmt.(*sqlparser.AlterVschema) 341 if !ok { 342 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "error parsing VSchema DDL statement `%s`", req.Sql) 343 return nil, err 344 } 345 346 vs, err = s.ts.GetVSchema(ctx, req.Keyspace) 347 if err != nil && !topo.IsErrType(err, topo.NoNode) { 348 err = vterrors.Wrapf(err, "GetVSchema(%s)", req.Keyspace) 349 return nil, err 350 } // otherwise, we keep the empty vschema object from above 351 352 vs, err = topotools.ApplyVSchemaDDL(req.Keyspace, vs, ddl) 353 if err != nil { 354 err = vterrors.Wrapf(err, "ApplyVSchemaDDL(%s,%v,%v)", req.Keyspace, vs, ddl) 355 return nil, err 356 } 357 } else { // "jsonMode" 358 span.Annotate("sql_mode", false) 359 vs = req.VSchema 360 } 361 362 if req.DryRun { // we return what was passed in and parsed, rather than current 363 return &vtctldatapb.ApplyVSchemaResponse{VSchema: vs}, nil 364 } 365 366 if err = s.ts.SaveVSchema(ctx, req.Keyspace, vs); err != nil { 367 err = vterrors.Wrapf(err, "SaveVSchema(%s, %v)", req.Keyspace, req.VSchema) 368 return nil, err 369 } 370 371 if !req.SkipRebuild { 372 if err = s.ts.RebuildSrvVSchema(ctx, req.Cells); err != nil { 373 err = vterrors.Wrapf(err, "RebuildSrvVSchema") 374 return nil, err 375 } 376 } 377 updatedVS, err := s.ts.GetVSchema(ctx, req.Keyspace) 378 if err != nil { 379 err = vterrors.Wrapf(err, "GetVSchema(%s)", req.Keyspace) 380 return nil, err 381 } 382 return &vtctldatapb.ApplyVSchemaResponse{VSchema: updatedVS}, nil 383 } 384 385 // Backup is part of the vtctlservicepb.VtctldServer interface. 386 func (s *VtctldServer) Backup(req *vtctldatapb.BackupRequest, stream vtctlservicepb.Vtctld_BackupServer) (err error) { 387 span, ctx := trace.NewSpan(stream.Context(), "VtctldServer.Backup") 388 defer span.Finish() 389 390 defer panicHandler(&err) 391 392 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 393 span.Annotate("allow_primary", req.AllowPrimary) 394 span.Annotate("concurrency", req.Concurrency) 395 span.Annotate("incremental_from_pos", req.IncrementalFromPos) 396 397 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 398 if err != nil { 399 return err 400 } 401 402 span.Annotate("keyspace", ti.Keyspace) 403 span.Annotate("shard", ti.Shard) 404 405 err = s.backupTablet(ctx, ti.Tablet, req, stream) 406 return err 407 } 408 409 // BackupShard is part of the vtctlservicepb.VtctldServer interface. 410 func (s *VtctldServer) BackupShard(req *vtctldatapb.BackupShardRequest, stream vtctlservicepb.Vtctld_BackupShardServer) (err error) { 411 span, ctx := trace.NewSpan(stream.Context(), "VtctldServer.BackupShard") 412 defer span.Finish() 413 414 defer panicHandler(&err) 415 416 span.Annotate("keyspace", req.Keyspace) 417 span.Annotate("shard", req.Shard) 418 span.Annotate("allow_primary", req.AllowPrimary) 419 span.Annotate("concurrency", req.Concurrency) 420 421 tablets, stats, err := reparentutil.ShardReplicationStatuses(ctx, s.ts, s.tmc, req.Keyspace, req.Shard) 422 if err != nil { 423 return err 424 } 425 426 var ( 427 backupTablet *topodatapb.Tablet 428 backupTabletLag uint32 429 ) 430 431 for i, tablet := range tablets { 432 switch tablet.Type { 433 case topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY, topodatapb.TabletType_SPARE: 434 default: 435 continue 436 } 437 438 if lag := stats[i].ReplicationLagSeconds; backupTablet == nil || lag < backupTabletLag { 439 backupTablet = tablet.Tablet 440 backupTabletLag = lag 441 } 442 } 443 444 if backupTablet == nil && req.AllowPrimary { 445 for _, tablet := range tablets { 446 if tablet.Type != topodatapb.TabletType_PRIMARY { 447 continue 448 } 449 450 backupTablet = tablet.Tablet 451 break 452 } 453 } 454 455 if backupTablet == nil { 456 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "no tablet available for backup") 457 return err 458 } 459 460 span.Annotate("tablet_alias", topoproto.TabletAliasString(backupTablet.Alias)) 461 462 r := &vtctldatapb.BackupRequest{Concurrency: req.Concurrency, AllowPrimary: req.AllowPrimary} 463 err = s.backupTablet(ctx, backupTablet, r, stream) 464 return err 465 } 466 467 func (s *VtctldServer) backupTablet(ctx context.Context, tablet *topodatapb.Tablet, req *vtctldatapb.BackupRequest, stream interface { 468 Send(resp *vtctldatapb.BackupResponse) error 469 }) error { 470 r := &tabletmanagerdatapb.BackupRequest{ 471 Concurrency: int64(req.Concurrency), 472 AllowPrimary: req.AllowPrimary, 473 IncrementalFromPos: req.IncrementalFromPos, 474 } 475 logStream, err := s.tmc.Backup(ctx, tablet, r) 476 if err != nil { 477 return err 478 } 479 480 logger := logutil.NewConsoleLogger() 481 for { 482 event, err := logStream.Recv() 483 switch err { 484 case nil: 485 logutil.LogEvent(logger, event) 486 resp := &vtctldatapb.BackupResponse{ 487 TabletAlias: tablet.Alias, 488 Keyspace: tablet.Keyspace, 489 Shard: tablet.Shard, 490 Event: event, 491 } 492 if err := stream.Send(resp); err != nil { 493 logger.Errorf("failed to send stream response %+v: %v", resp, err) 494 } 495 case io.EOF: 496 // Do not do anything for primary tablets and when active reparenting is disabled 497 if mysqlctl.DisableActiveReparents || tablet.Type == topodatapb.TabletType_PRIMARY { 498 return nil 499 } 500 501 // Otherwise we find the correct primary tablet and set the replication source, 502 // since the primary could have changed while we executed the backup which can 503 // also affect whether we want to send semi sync acks or not. 504 tabletInfo, err := s.ts.GetTablet(ctx, tablet.Alias) 505 if err != nil { 506 return err 507 } 508 509 return reparentutil.SetReplicationSource(ctx, s.ts, s.tmc, tabletInfo.Tablet) 510 default: 511 return err 512 } 513 } 514 } 515 516 // ChangeTabletType is part of the vtctlservicepb.VtctldServer interface. 517 func (s *VtctldServer) ChangeTabletType(ctx context.Context, req *vtctldatapb.ChangeTabletTypeRequest) (resp *vtctldatapb.ChangeTabletTypeResponse, err error) { 518 span, ctx := trace.NewSpan(ctx, "VtctldServer.ChangeTabletType") 519 defer span.Finish() 520 521 defer panicHandler(&err) 522 523 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 524 span.Annotate("dry_run", req.DryRun) 525 span.Annotate("tablet_type", topoproto.TabletTypeLString(req.DbType)) 526 527 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 528 defer cancel() 529 530 tablet, err := s.ts.GetTablet(ctx, req.TabletAlias) 531 if err != nil { 532 return nil, err 533 } 534 535 span.Annotate("before_tablet_type", topoproto.TabletTypeLString(tablet.Type)) 536 537 if !topo.IsTrivialTypeChange(tablet.Type, req.DbType) { 538 err = fmt.Errorf("tablet %v type change %v -> %v is not an allowed transition for ChangeTabletType", req.TabletAlias, tablet.Type, req.DbType) 539 return nil, err 540 } 541 542 if req.DryRun { 543 afterTablet := proto.Clone(tablet.Tablet).(*topodatapb.Tablet) 544 afterTablet.Type = req.DbType 545 546 return &vtctldatapb.ChangeTabletTypeResponse{ 547 BeforeTablet: tablet.Tablet, 548 AfterTablet: afterTablet, 549 WasDryRun: true, 550 }, nil 551 } 552 553 shard, err := s.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) 554 if err != nil { 555 return nil, err 556 } 557 558 durabilityName, err := s.ts.GetKeyspaceDurability(ctx, tablet.Keyspace) 559 if err != nil { 560 return nil, err 561 } 562 log.Infof("Getting a new durability policy for %v", durabilityName) 563 durability, err := reparentutil.GetDurabilityPolicy(durabilityName) 564 if err != nil { 565 return nil, err 566 } 567 568 if !shard.HasPrimary() { 569 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "no primary tablet for shard %v/%v", tablet.Keyspace, tablet.Shard) 570 return nil, err 571 } 572 573 shardPrimary, err := s.ts.GetTablet(ctx, shard.PrimaryAlias) 574 if err != nil { 575 err = fmt.Errorf("cannot lookup primary tablet %v for shard %v/%v: %w", topoproto.TabletAliasString(shard.PrimaryAlias), tablet.Keyspace, tablet.Shard, err) 576 return nil, err 577 } 578 579 if shardPrimary.Type != topodatapb.TabletType_PRIMARY { 580 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "TopologyServer has incosistent state for shard primary %v", topoproto.TabletAliasString(shard.PrimaryAlias)) 581 return nil, err 582 } 583 584 if shardPrimary.Keyspace != tablet.Keyspace || shardPrimary.Shard != tablet.Shard { 585 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "primary %v and potential replica %v not in same keypace shard (%v/%v)", topoproto.TabletAliasString(shard.PrimaryAlias), req.TabletAlias, tablet.Keyspace, tablet.Shard) 586 return nil, err 587 } 588 589 // We should clone the tablet and change its type to the expected type before checking the durability rules 590 // Since we want to check the durability rules for the desired state and not before we make that change 591 expectedTablet := proto.Clone(tablet.Tablet).(*topodatapb.Tablet) 592 expectedTablet.Type = req.DbType 593 err = s.tmc.ChangeType(ctx, tablet.Tablet, req.DbType, reparentutil.IsReplicaSemiSync(durability, shardPrimary.Tablet, expectedTablet)) 594 if err != nil { 595 return nil, err 596 } 597 598 var changedTablet *topodatapb.Tablet 599 600 changedTabletInfo, err := s.ts.GetTablet(ctx, req.TabletAlias) 601 if err != nil { 602 log.Warningf("error while reading the tablet we just changed back out of the topo: %v", err) 603 } else { 604 changedTablet = changedTabletInfo.Tablet 605 } 606 607 return &vtctldatapb.ChangeTabletTypeResponse{ 608 BeforeTablet: tablet.Tablet, 609 AfterTablet: changedTablet, 610 WasDryRun: false, 611 }, nil 612 } 613 614 // CreateKeyspace is part of the vtctlservicepb.VtctldServer interface. 615 func (s *VtctldServer) CreateKeyspace(ctx context.Context, req *vtctldatapb.CreateKeyspaceRequest) (resp *vtctldatapb.CreateKeyspaceResponse, err error) { 616 span, ctx := trace.NewSpan(ctx, "VtctldServer.CreateKeyspace") 617 defer span.Finish() 618 619 defer panicHandler(&err) 620 621 span.Annotate("keyspace", req.Name) 622 span.Annotate("keyspace_type", topoproto.KeyspaceTypeLString(req.Type)) 623 span.Annotate("force", req.Force) 624 span.Annotate("allow_empty_vschema", req.AllowEmptyVSchema) 625 span.Annotate("durability_policy", req.DurabilityPolicy) 626 627 switch req.Type { 628 case topodatapb.KeyspaceType_NORMAL: 629 case topodatapb.KeyspaceType_SNAPSHOT: 630 if req.BaseKeyspace == "" { 631 err = errors.New("BaseKeyspace is required for SNAPSHOT keyspaces") 632 return nil, err 633 } 634 635 if req.SnapshotTime == nil { 636 err = errors.New("SnapshotTime is required for SNAPSHOT keyspaces") 637 return nil, err 638 } 639 640 span.Annotate("base_keyspace", req.BaseKeyspace) 641 span.Annotate("snapshot_time", req.SnapshotTime) // TODO: get a proper string repr 642 default: 643 return nil, fmt.Errorf("unknown keyspace type %v", req.Type) 644 } 645 646 ki := &topodatapb.Keyspace{ 647 KeyspaceType: req.Type, 648 ServedFroms: req.ServedFroms, 649 BaseKeyspace: req.BaseKeyspace, 650 SnapshotTime: req.SnapshotTime, 651 DurabilityPolicy: req.DurabilityPolicy, 652 } 653 654 err = s.ts.CreateKeyspace(ctx, req.Name, ki) 655 if req.Force && topo.IsErrType(err, topo.NodeExists) { 656 log.Infof("keyspace %v already exists (ignoring error with Force=true)", req.Name) 657 err = nil 658 659 // Get the actual keyspace out of the topo; it may differ in structure, 660 // and we want to return the authoritative version as the "created" one 661 // to the client. 662 var ks *topo.KeyspaceInfo 663 ks, _ = s.ts.GetKeyspace(ctx, req.Name) 664 ki = ks.Keyspace 665 } 666 667 if err != nil { 668 return nil, err 669 } 670 671 if !req.AllowEmptyVSchema { 672 if err = s.ts.EnsureVSchema(ctx, req.Name); err != nil { 673 return nil, err 674 } 675 } 676 677 if req.Type == topodatapb.KeyspaceType_SNAPSHOT { 678 var vs *vschemapb.Keyspace 679 vs, err = s.ts.GetVSchema(ctx, req.BaseKeyspace) 680 if err != nil { 681 log.Infof("error from GetVSchema(%v) = %v", req.BaseKeyspace, err) 682 if topo.IsErrType(err, topo.NoNode) { 683 log.Infof("base keyspace %v does not exist; continuing with bare, unsharded vschema", req.BaseKeyspace) 684 vs = &vschemapb.Keyspace{ 685 Sharded: false, 686 Tables: map[string]*vschemapb.Table{}, 687 Vindexes: map[string]*vschemapb.Vindex{}, 688 } 689 } else { 690 return nil, err 691 } 692 } 693 694 // SNAPSHOT keyspaces are excluded from global routing. 695 vs.RequireExplicitRouting = true 696 697 if err = s.ts.SaveVSchema(ctx, req.Name, vs); err != nil { 698 err = fmt.Errorf("SaveVSchema(%v) = %w", vs, err) 699 return nil, err 700 } 701 } 702 703 cells := []string{} 704 err = s.ts.RebuildSrvVSchema(ctx, cells) 705 if err != nil { 706 return nil, fmt.Errorf("RebuildSrvVSchema(%v) = %w", cells, err) 707 } 708 709 return &vtctldatapb.CreateKeyspaceResponse{ 710 Keyspace: &vtctldatapb.Keyspace{ 711 Name: req.Name, 712 Keyspace: ki, 713 }, 714 }, nil 715 } 716 717 // CreateShard is part of the vtctlservicepb.VtctldServer interface. 718 func (s *VtctldServer) CreateShard(ctx context.Context, req *vtctldatapb.CreateShardRequest) (resp *vtctldatapb.CreateShardResponse, err error) { 719 span, ctx := trace.NewSpan(ctx, "VtctldServer.CreateShard") 720 defer span.Finish() 721 722 defer panicHandler(&err) 723 724 span.Annotate("keyspace", req.Keyspace) 725 span.Annotate("shard", req.ShardName) 726 span.Annotate("force", req.Force) 727 span.Annotate("include_parent", req.IncludeParent) 728 729 if req.IncludeParent { 730 log.Infof("Creating empty keyspace for %s", req.Keyspace) 731 if err2 := s.ts.CreateKeyspace(ctx, req.Keyspace, &topodatapb.Keyspace{}); err2 != nil { 732 if req.Force && topo.IsErrType(err2, topo.NodeExists) { 733 log.Infof("keyspace %v already exists; ignoring error because Force = true", req.Keyspace) 734 } else { 735 err = err2 736 return nil, err 737 } 738 } 739 } 740 741 shardExists := false 742 743 if err = s.ts.CreateShard(ctx, req.Keyspace, req.ShardName); err != nil { 744 if req.Force && topo.IsErrType(err, topo.NodeExists) { 745 log.Infof("shard %v/%v already exists; ignoring error because Force = true", req.Keyspace, req.ShardName) 746 shardExists = true 747 err = nil 748 } else { 749 return nil, err 750 } 751 } 752 753 // Fetch what we just created out of the topo. Errors should never happen 754 // here, but we'll check them anyway. 755 756 ks, err := s.ts.GetKeyspace(ctx, req.Keyspace) 757 if err != nil { 758 return nil, err 759 } 760 761 shard, err := s.ts.GetShard(ctx, req.Keyspace, req.ShardName) 762 if err != nil { 763 return nil, err 764 } 765 766 return &vtctldatapb.CreateShardResponse{ 767 Keyspace: &vtctldatapb.Keyspace{ 768 Name: req.Keyspace, 769 Keyspace: ks.Keyspace, 770 }, 771 Shard: &vtctldatapb.Shard{ 772 Keyspace: req.Keyspace, 773 Name: req.ShardName, 774 Shard: shard.Shard, 775 }, 776 ShardAlreadyExists: shardExists, 777 }, nil 778 } 779 780 // DeleteCellInfo is part of the vtctlservicepb.VtctldServer interface. 781 func (s *VtctldServer) DeleteCellInfo(ctx context.Context, req *vtctldatapb.DeleteCellInfoRequest) (resp *vtctldatapb.DeleteCellInfoResponse, err error) { 782 span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteCellInfo") 783 defer span.Finish() 784 785 defer panicHandler(&err) 786 787 span.Annotate("cell", req.Name) 788 span.Annotate("force", req.Force) 789 790 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 791 defer cancel() 792 793 if err = s.ts.DeleteCellInfo(ctx, req.Name, req.Force); err != nil { 794 return nil, err 795 } 796 797 return &vtctldatapb.DeleteCellInfoResponse{}, nil 798 } 799 800 // DeleteCellsAlias is part of the vtctlservicepb.VtctldServer interface. 801 func (s *VtctldServer) DeleteCellsAlias(ctx context.Context, req *vtctldatapb.DeleteCellsAliasRequest) (resp *vtctldatapb.DeleteCellsAliasResponse, err error) { 802 span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteCellsAlias") 803 defer span.Finish() 804 805 defer panicHandler(&err) 806 807 span.Annotate("cells_alias", req.Name) 808 809 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 810 defer cancel() 811 812 if err = s.ts.DeleteCellsAlias(ctx, req.Name); err != nil { 813 return nil, err 814 } 815 816 return &vtctldatapb.DeleteCellsAliasResponse{}, nil 817 } 818 819 // DeleteKeyspace is part of the vtctlservicepb.VtctldServer interface. 820 func (s *VtctldServer) DeleteKeyspace(ctx context.Context, req *vtctldatapb.DeleteKeyspaceRequest) (resp *vtctldatapb.DeleteKeyspaceResponse, err error) { 821 span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteKeyspace") 822 defer span.Finish() 823 824 defer panicHandler(&err) 825 826 span.Annotate("keyspace", req.Keyspace) 827 span.Annotate("recursive", req.Recursive) 828 span.Annotate("force", req.Force) 829 830 lctx, unlock, lerr := s.ts.LockKeyspace(ctx, req.Keyspace, "DeleteKeyspace") 831 switch { 832 case lerr == nil: 833 ctx = lctx 834 case !req.Force: 835 err = fmt.Errorf("failed to lock %s; if you really want to delete this keyspace, re-run with Force=true: %w", req.Keyspace, lerr) 836 return nil, err 837 default: 838 log.Warningf("%s: failed to lock keyspace %s for deletion, but force=true, proceeding anyway ...", lerr, req.Keyspace) 839 } 840 841 if unlock != nil { 842 defer func() { 843 // Attempting to unlock a keyspace we successfully deleted results 844 // in ts.unlockKeyspace returning an error, which can make the 845 // overall RPC _seem_ like it failed. 846 // 847 // So, we do this extra checking to allow for specifically this 848 // scenario to result in "success." 849 origErr := err 850 unlock(&err) 851 if origErr == nil && topo.IsErrType(err, topo.NoNode) { 852 err = nil 853 } 854 }() 855 } 856 857 shards, err := s.ts.GetShardNames(ctx, req.Keyspace) 858 if err != nil { 859 return nil, err 860 } 861 862 if len(shards) > 0 { 863 if !req.Recursive { 864 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "keyspace %v still has %d shards; use Recursive=true or remove them manually", req.Keyspace, len(shards)) 865 return nil, err 866 } 867 868 log.Infof("Deleting all %d shards (and their tablets) in keyspace %v", len(shards), req.Keyspace) 869 recursive := true 870 evenIfServing := true 871 force := req.Force 872 873 for _, shard := range shards { 874 log.Infof("Recursively deleting shard %v/%v", req.Keyspace, shard) 875 err = deleteShard(ctx, s.ts, req.Keyspace, shard, recursive, evenIfServing, force) 876 if err != nil { 877 err = fmt.Errorf("cannot delete shard %v/%v: %w", req.Keyspace, shard, err) 878 return nil, err 879 } 880 } 881 } 882 883 cells, err := s.ts.GetKnownCells(ctx) 884 if err != nil { 885 return nil, err 886 } 887 888 for _, cell := range cells { 889 if err := s.ts.DeleteKeyspaceReplication(ctx, cell, req.Keyspace); err != nil && !topo.IsErrType(err, topo.NoNode) { 890 log.Warningf("Cannot delete KeyspaceReplication in cell %v for %v: %v", cell, req.Keyspace, err) 891 } 892 893 if err := s.ts.DeleteSrvKeyspace(ctx, cell, req.Keyspace); err != nil && !topo.IsErrType(err, topo.NoNode) { 894 log.Warningf("Cannot delete SrvKeyspace in cell %v for %v: %v", cell, req.Keyspace, err) 895 } 896 } 897 898 err = s.ts.DeleteKeyspace(ctx, req.Keyspace) 899 if err != nil { 900 return nil, err 901 } 902 903 return &vtctldatapb.DeleteKeyspaceResponse{}, nil 904 } 905 906 // DeleteShards is part of the vtctlservicepb.VtctldServer interface. 907 func (s *VtctldServer) DeleteShards(ctx context.Context, req *vtctldatapb.DeleteShardsRequest) (resp *vtctldatapb.DeleteShardsResponse, err error) { 908 span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteShards") 909 defer span.Finish() 910 911 defer panicHandler(&err) 912 913 span.Annotate("num_shards", len(req.Shards)) 914 span.Annotate("even_if_serving", req.EvenIfServing) 915 span.Annotate("recursive", req.Recursive) 916 span.Annotate("force", req.Force) 917 918 for _, shard := range req.Shards { 919 if err2 := deleteShard(ctx, s.ts, shard.Keyspace, shard.Name, req.Recursive, req.EvenIfServing, req.Force); err2 != nil { 920 err = err2 921 return nil, err 922 } 923 } 924 925 return &vtctldatapb.DeleteShardsResponse{}, nil 926 } 927 928 // DeleteSrvVSchema is part of the vtctlservicepb.VtctldServer interface. 929 func (s *VtctldServer) DeleteSrvVSchema(ctx context.Context, req *vtctldatapb.DeleteSrvVSchemaRequest) (resp *vtctldatapb.DeleteSrvVSchemaResponse, err error) { 930 span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteSrvVSchema") 931 defer span.Finish() 932 933 defer panicHandler(&err) 934 935 if req.Cell == "" { 936 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "cell must be non-empty") 937 return nil, err 938 } 939 940 span.Annotate("cell", req.Cell) 941 942 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 943 defer cancel() 944 945 if err = s.ts.DeleteSrvVSchema(ctx, req.Cell); err != nil { 946 return nil, err 947 } 948 949 return &vtctldatapb.DeleteSrvVSchemaResponse{}, nil 950 } 951 952 // DeleteTablets is part of the vtctlservicepb.VtctldServer interface. 953 func (s *VtctldServer) DeleteTablets(ctx context.Context, req *vtctldatapb.DeleteTabletsRequest) (resp *vtctldatapb.DeleteTabletsResponse, err error) { 954 span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteTablets") 955 defer span.Finish() 956 957 defer panicHandler(&err) 958 959 span.Annotate("num_tablets", len(req.TabletAliases)) 960 span.Annotate("allow_primary", req.AllowPrimary) 961 962 for _, alias := range req.TabletAliases { 963 if err2 := deleteTablet(ctx, s.ts, alias, req.AllowPrimary); err2 != nil { 964 err = err2 965 return nil, err 966 } 967 } 968 969 return &vtctldatapb.DeleteTabletsResponse{}, nil 970 } 971 972 // EmergencyReparentShard is part of the vtctldservicepb.VtctldServer interface. 973 func (s *VtctldServer) EmergencyReparentShard(ctx context.Context, req *vtctldatapb.EmergencyReparentShardRequest) (resp *vtctldatapb.EmergencyReparentShardResponse, err error) { 974 span, ctx := trace.NewSpan(ctx, "VtctldServer.EmergencyReparentShard") 975 defer span.Finish() 976 977 defer panicHandler(&err) 978 979 span.Annotate("keyspace", req.Keyspace) 980 span.Annotate("shard", req.Shard) 981 span.Annotate("new_primary_alias", topoproto.TabletAliasString(req.NewPrimary)) 982 983 ignoreReplicaAliases := topoproto.TabletAliasList(req.IgnoreReplicas).ToStringSlice() 984 span.Annotate("ignore_replicas", strings.Join(ignoreReplicaAliases, ",")) 985 986 waitReplicasTimeout, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout) 987 if err != nil { 988 return nil, err 989 } else if !ok { 990 waitReplicasTimeout = time.Second * 30 991 } 992 993 span.Annotate("wait_replicas_timeout_sec", waitReplicasTimeout.Seconds()) 994 span.Annotate("prevent_cross_cell_promotion", req.PreventCrossCellPromotion) 995 996 m := sync.RWMutex{} 997 logstream := []*logutilpb.Event{} 998 logger := logutil.NewCallbackLogger(func(e *logutilpb.Event) { 999 m.Lock() 1000 defer m.Unlock() 1001 1002 logstream = append(logstream, e) 1003 }) 1004 1005 ev, err := reparentutil.NewEmergencyReparenter(s.ts, s.tmc, logger).ReparentShard(ctx, 1006 req.Keyspace, 1007 req.Shard, 1008 reparentutil.EmergencyReparentOptions{ 1009 NewPrimaryAlias: req.NewPrimary, 1010 IgnoreReplicas: sets.New[string](ignoreReplicaAliases...), 1011 WaitReplicasTimeout: waitReplicasTimeout, 1012 PreventCrossCellPromotion: req.PreventCrossCellPromotion, 1013 }, 1014 ) 1015 1016 resp = &vtctldatapb.EmergencyReparentShardResponse{ 1017 Keyspace: req.Keyspace, 1018 Shard: req.Shard, 1019 } 1020 1021 if ev != nil { 1022 resp.Keyspace = ev.ShardInfo.Keyspace() 1023 resp.Shard = ev.ShardInfo.ShardName() 1024 1025 if ev.NewPrimary != nil && !topoproto.TabletAliasIsZero(ev.NewPrimary.Alias) { 1026 resp.PromotedPrimary = ev.NewPrimary.Alias 1027 } 1028 } 1029 1030 m.RLock() 1031 defer m.RUnlock() 1032 1033 resp.Events = make([]*logutilpb.Event, len(logstream)) 1034 copy(resp.Events, logstream) 1035 1036 return resp, err 1037 } 1038 1039 // ExecuteFetchAsApp is part of the vtctlservicepb.VtctldServer interface. 1040 func (s *VtctldServer) ExecuteFetchAsApp(ctx context.Context, req *vtctldatapb.ExecuteFetchAsAppRequest) (resp *vtctldatapb.ExecuteFetchAsAppResponse, err error) { 1041 span, ctx := trace.NewSpan(ctx, "VtctldServer.ExecuteFetchAsApp") 1042 defer span.Finish() 1043 1044 defer panicHandler(&err) 1045 1046 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 1047 span.Annotate("max_rows", req.MaxRows) 1048 span.Annotate("use_pool", req.UsePool) 1049 1050 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 1051 if err != nil { 1052 return nil, err 1053 } 1054 1055 qr, err := s.tmc.ExecuteFetchAsApp(ctx, ti.Tablet, req.UsePool, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ 1056 Query: []byte(req.Query), 1057 MaxRows: uint64(req.MaxRows), 1058 }) 1059 if err != nil { 1060 return nil, err 1061 } 1062 1063 return &vtctldatapb.ExecuteFetchAsAppResponse{Result: qr}, nil 1064 } 1065 1066 // ExecuteFetchAsDBA is part of the vtctlservicepb.VtctldServer interface. 1067 func (s *VtctldServer) ExecuteFetchAsDBA(ctx context.Context, req *vtctldatapb.ExecuteFetchAsDBARequest) (resp *vtctldatapb.ExecuteFetchAsDBAResponse, err error) { 1068 span, ctx := trace.NewSpan(ctx, "VtctldServer.ExecuteFetchAsDBA") 1069 defer span.Finish() 1070 1071 defer panicHandler(&err) 1072 1073 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 1074 span.Annotate("max_rows", req.MaxRows) 1075 span.Annotate("disable_binlogs", req.DisableBinlogs) 1076 span.Annotate("reload_schema", req.ReloadSchema) 1077 1078 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 1079 if err != nil { 1080 return nil, err 1081 } 1082 1083 qr, err := s.tmc.ExecuteFetchAsDba(ctx, ti.Tablet, false, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{ 1084 Query: []byte(req.Query), 1085 MaxRows: uint64(req.MaxRows), 1086 DisableBinlogs: req.DisableBinlogs, 1087 ReloadSchema: req.ReloadSchema, 1088 }) 1089 if err != nil { 1090 return nil, err 1091 } 1092 1093 return &vtctldatapb.ExecuteFetchAsDBAResponse{Result: qr}, nil 1094 } 1095 1096 // ExecuteHook is part of the vtctlservicepb.VtctldServer interface. 1097 func (s *VtctldServer) ExecuteHook(ctx context.Context, req *vtctldatapb.ExecuteHookRequest) (resp *vtctldatapb.ExecuteHookResponse, err error) { 1098 span, ctx := trace.NewSpan(ctx, "VtctldServer.ExecuteHook") 1099 defer span.Finish() 1100 1101 defer panicHandler(&err) 1102 1103 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 1104 1105 if req.TabletHookRequest == nil { 1106 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "TabletHookRequest cannot be nil") 1107 return nil, err 1108 } 1109 1110 span.Annotate("hook_name", req.TabletHookRequest.Name) 1111 1112 if strings.Contains(req.TabletHookRequest.Name, "/") { 1113 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "hook name cannot contain a '/'; was %v", req.TabletHookRequest.Name) 1114 return nil, err 1115 } 1116 1117 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 1118 if err != nil { 1119 return nil, err 1120 } 1121 1122 hook := hk.NewHookWithEnv(req.TabletHookRequest.Name, req.TabletHookRequest.Parameters, req.TabletHookRequest.ExtraEnv) 1123 hr, err := s.tmc.ExecuteHook(ctx, ti.Tablet, hook) 1124 if err != nil { 1125 return nil, err 1126 } 1127 1128 return &vtctldatapb.ExecuteHookResponse{HookResult: &tabletmanagerdatapb.ExecuteHookResponse{ 1129 ExitStatus: int64(hr.ExitStatus), 1130 Stdout: hr.Stdout, 1131 Stderr: hr.Stderr, 1132 }}, nil 1133 } 1134 1135 // FindAllShardsInKeyspace is part of the vtctlservicepb.VtctldServer interface. 1136 func (s *VtctldServer) FindAllShardsInKeyspace(ctx context.Context, req *vtctldatapb.FindAllShardsInKeyspaceRequest) (resp *vtctldatapb.FindAllShardsInKeyspaceResponse, err error) { 1137 span, ctx := trace.NewSpan(ctx, "VtctldServer.FindAllShardsInKeyspace") 1138 defer span.Finish() 1139 1140 defer panicHandler(&err) 1141 1142 span.Annotate("keyspace", req.Keyspace) 1143 1144 result, err := s.ts.FindAllShardsInKeyspace(ctx, req.Keyspace) 1145 if err != nil { 1146 return nil, err 1147 } 1148 1149 shards := map[string]*vtctldatapb.Shard{} 1150 for _, shard := range result { 1151 shards[shard.ShardName()] = &vtctldatapb.Shard{ 1152 Keyspace: req.Keyspace, 1153 Name: shard.ShardName(), 1154 Shard: shard.Shard, 1155 } 1156 } 1157 1158 return &vtctldatapb.FindAllShardsInKeyspaceResponse{ 1159 Shards: shards, 1160 }, nil 1161 } 1162 1163 // GetBackups is part of the vtctldservicepb.VtctldServer interface. 1164 func (s *VtctldServer) GetBackups(ctx context.Context, req *vtctldatapb.GetBackupsRequest) (resp *vtctldatapb.GetBackupsResponse, err error) { 1165 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetBackups") 1166 defer span.Finish() 1167 1168 defer panicHandler(&err) 1169 1170 span.Annotate("keyspace", req.Keyspace) 1171 span.Annotate("shard", req.Shard) 1172 span.Annotate("limit", req.Limit) 1173 span.Annotate("detailed", req.Detailed) 1174 span.Annotate("detailed_limit", req.DetailedLimit) 1175 1176 bs, err := backupstorage.GetBackupStorage() 1177 if err != nil { 1178 return nil, err 1179 } 1180 defer bs.Close() 1181 1182 bucket := filepath.Join(req.Keyspace, req.Shard) 1183 span.Annotate("backup_path", bucket) 1184 1185 bhs, err := bs.ListBackups(ctx, bucket) 1186 if err != nil { 1187 return nil, err 1188 } 1189 1190 totalBackups := len(bhs) 1191 if req.Limit > 0 { 1192 totalBackups = int(req.Limit) 1193 } 1194 1195 totalDetailedBackups := len(bhs) 1196 if req.DetailedLimit > 0 { 1197 totalDetailedBackups = int(req.DetailedLimit) 1198 } 1199 1200 backups := make([]*mysqlctlpb.BackupInfo, 0, totalBackups) 1201 backupsToSkip := len(bhs) - totalBackups 1202 backupsToSkipDetails := len(bhs) - totalDetailedBackups 1203 1204 for i, bh := range bhs { 1205 if i < backupsToSkip { 1206 continue 1207 } 1208 1209 bi := mysqlctlproto.BackupHandleToProto(bh) 1210 bi.Keyspace = req.Keyspace 1211 bi.Shard = req.Shard 1212 1213 if req.Detailed { 1214 if i >= backupsToSkipDetails { // nolint:staticcheck 1215 // (TODO:@ajm188) Update backupengine/backupstorage implementations 1216 // to get Status info for backups. 1217 } 1218 } 1219 1220 backups = append(backups, bi) 1221 } 1222 1223 return &vtctldatapb.GetBackupsResponse{ 1224 Backups: backups, 1225 }, nil 1226 } 1227 1228 // GetCellInfoNames is part of the vtctlservicepb.VtctldServer interface. 1229 func (s *VtctldServer) GetCellInfoNames(ctx context.Context, req *vtctldatapb.GetCellInfoNamesRequest) (resp *vtctldatapb.GetCellInfoNamesResponse, err error) { 1230 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetCellInfoNames") 1231 defer span.Finish() 1232 1233 defer panicHandler(&err) 1234 1235 names, err := s.ts.GetCellInfoNames(ctx) 1236 if err != nil { 1237 return nil, err 1238 } 1239 1240 return &vtctldatapb.GetCellInfoNamesResponse{Names: names}, nil 1241 } 1242 1243 // GetCellInfo is part of the vtctlservicepb.VtctldServer interface. 1244 func (s *VtctldServer) GetCellInfo(ctx context.Context, req *vtctldatapb.GetCellInfoRequest) (resp *vtctldatapb.GetCellInfoResponse, err error) { 1245 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetCellInfo") 1246 defer span.Finish() 1247 1248 defer panicHandler(&err) 1249 1250 if req.Cell == "" { 1251 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "cell field is required") 1252 return nil, err 1253 } 1254 1255 span.Annotate("cell", req.Cell) 1256 1257 // We use a strong read, because users using this command want the latest 1258 // data, and this is user-generated, not used in any automated process. 1259 strongRead := true 1260 ci, err := s.ts.GetCellInfo(ctx, req.Cell, strongRead) 1261 if err != nil { 1262 return nil, err 1263 } 1264 1265 return &vtctldatapb.GetCellInfoResponse{CellInfo: ci}, nil 1266 } 1267 1268 // GetCellsAliases is part of the vtctlservicepb.VtctldServer interface. 1269 func (s *VtctldServer) GetCellsAliases(ctx context.Context, req *vtctldatapb.GetCellsAliasesRequest) (resp *vtctldatapb.GetCellsAliasesResponse, err error) { 1270 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetCellsAliases") 1271 defer span.Finish() 1272 1273 defer panicHandler(&err) 1274 1275 strongRead := true 1276 aliases, err := s.ts.GetCellsAliases(ctx, strongRead) 1277 if err != nil { 1278 return nil, err 1279 } 1280 1281 return &vtctldatapb.GetCellsAliasesResponse{Aliases: aliases}, nil 1282 } 1283 1284 // GetFullStatus is part of the vtctlservicepb.VtctldServer interface. 1285 func (s *VtctldServer) GetFullStatus(ctx context.Context, req *vtctldatapb.GetFullStatusRequest) (resp *vtctldatapb.GetFullStatusResponse, err error) { 1286 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetFullStatus") 1287 defer span.Finish() 1288 1289 defer panicHandler(&err) 1290 1291 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 1292 1293 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 1294 if err != nil { 1295 return nil, err 1296 } 1297 1298 res, err := s.tmc.FullStatus(ctx, ti.Tablet) 1299 if err != nil { 1300 return nil, err 1301 } 1302 1303 return &vtctldatapb.GetFullStatusResponse{ 1304 Status: res, 1305 }, nil 1306 } 1307 1308 // GetKeyspace is part of the vtctlservicepb.VtctldServer interface. 1309 func (s *VtctldServer) GetKeyspace(ctx context.Context, req *vtctldatapb.GetKeyspaceRequest) (resp *vtctldatapb.GetKeyspaceResponse, err error) { 1310 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetKeyspace") 1311 defer span.Finish() 1312 1313 defer panicHandler(&err) 1314 1315 span.Annotate("keyspace", req.Keyspace) 1316 1317 keyspace, err := s.ts.GetKeyspace(ctx, req.Keyspace) 1318 if err != nil { 1319 return nil, err 1320 } 1321 1322 return &vtctldatapb.GetKeyspaceResponse{ 1323 Keyspace: &vtctldatapb.Keyspace{ 1324 Name: req.Keyspace, 1325 Keyspace: keyspace.Keyspace, 1326 }, 1327 }, nil 1328 } 1329 1330 // GetKeyspaces is part of the vtctlservicepb.VtctldServer interface. 1331 func (s *VtctldServer) GetKeyspaces(ctx context.Context, req *vtctldatapb.GetKeyspacesRequest) (resp *vtctldatapb.GetKeyspacesResponse, err error) { 1332 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetKeyspaces") 1333 defer span.Finish() 1334 1335 defer panicHandler(&err) 1336 1337 names, err := s.ts.GetKeyspaces(ctx) 1338 if err != nil { 1339 return nil, err 1340 } 1341 1342 keyspaces := make([]*vtctldatapb.Keyspace, len(names)) 1343 1344 for i, name := range names { 1345 ks, err2 := s.GetKeyspace(ctx, &vtctldatapb.GetKeyspaceRequest{Keyspace: name}) 1346 if err2 != nil { 1347 err = err2 1348 return nil, err 1349 } 1350 1351 keyspaces[i] = ks.Keyspace 1352 } 1353 1354 return &vtctldatapb.GetKeyspacesResponse{Keyspaces: keyspaces}, nil 1355 } 1356 1357 // GetPermissions is part of the vtctlservicepb.VtctldServer interface. 1358 func (s *VtctldServer) GetPermissions(ctx context.Context, req *vtctldatapb.GetPermissionsRequest) (resp *vtctldatapb.GetPermissionsResponse, err error) { 1359 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetPermissions") 1360 defer span.Finish() 1361 1362 defer panicHandler(&err) 1363 1364 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 1365 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 1366 if err != nil { 1367 err = vterrors.Errorf(vtrpc.Code_NOT_FOUND, "Failed to get tablet %v: %v", req.TabletAlias, err) 1368 return nil, err 1369 } 1370 1371 p, err := s.tmc.GetPermissions(ctx, ti.Tablet) 1372 if err != nil { 1373 return nil, err 1374 } 1375 1376 return &vtctldatapb.GetPermissionsResponse{ 1377 Permissions: p, 1378 }, nil 1379 } 1380 1381 // GetRoutingRules is part of the vtctlservicepb.VtctldServer interface. 1382 func (s *VtctldServer) GetRoutingRules(ctx context.Context, req *vtctldatapb.GetRoutingRulesRequest) (resp *vtctldatapb.GetRoutingRulesResponse, err error) { 1383 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetRoutingRules") 1384 defer span.Finish() 1385 1386 defer panicHandler(&err) 1387 1388 rr, err := s.ts.GetRoutingRules(ctx) 1389 if err != nil { 1390 return nil, err 1391 } 1392 1393 return &vtctldatapb.GetRoutingRulesResponse{ 1394 RoutingRules: rr, 1395 }, nil 1396 } 1397 1398 // GetShardRoutingRules is part of the vtctlservicepb.VtctldServer interface. 1399 func (s *VtctldServer) GetShardRoutingRules(ctx context.Context, req *vtctldatapb.GetShardRoutingRulesRequest) (*vtctldatapb.GetShardRoutingRulesResponse, error) { 1400 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetShardRoutingRules") 1401 defer span.Finish() 1402 1403 srr, err := s.ts.GetShardRoutingRules(ctx) 1404 if err != nil { 1405 return nil, err 1406 } 1407 1408 return &vtctldatapb.GetShardRoutingRulesResponse{ 1409 ShardRoutingRules: srr, 1410 }, nil 1411 } 1412 1413 // GetSchema is part of the vtctlservicepb.VtctldServer interface. 1414 func (s *VtctldServer) GetSchema(ctx context.Context, req *vtctldatapb.GetSchemaRequest) (resp *vtctldatapb.GetSchemaResponse, err error) { 1415 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSchema") 1416 defer span.Finish() 1417 1418 defer panicHandler(&err) 1419 1420 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 1421 span.Annotate("tables", strings.Join(req.Tables, ",")) 1422 span.Annotate("exclude_tables", strings.Join(req.ExcludeTables, ",")) 1423 span.Annotate("include_views", req.IncludeViews) 1424 span.Annotate("table_names_only", req.TableNamesOnly) 1425 span.Annotate("table_sizes_only", req.TableSizesOnly) 1426 span.Annotate("table_schema_only", req.TableSchemaOnly) 1427 1428 r := &tabletmanagerdatapb.GetSchemaRequest{Tables: req.Tables, ExcludeTables: req.ExcludeTables, IncludeViews: req.IncludeViews, TableSchemaOnly: req.TableSchemaOnly} 1429 sd, err := schematools.GetSchema(ctx, s.ts, s.tmc, req.TabletAlias, r) 1430 if err != nil { 1431 return nil, err 1432 } 1433 1434 if req.TableNamesOnly { 1435 nameTds := make([]*tabletmanagerdatapb.TableDefinition, len(sd.TableDefinitions)) 1436 1437 for i, td := range sd.TableDefinitions { 1438 nameTds[i] = &tabletmanagerdatapb.TableDefinition{ 1439 Name: td.Name, 1440 } 1441 } 1442 1443 sd.TableDefinitions = nameTds 1444 } else if req.TableSizesOnly { 1445 sizeTds := make([]*tabletmanagerdatapb.TableDefinition, len(sd.TableDefinitions)) 1446 1447 for i, td := range sd.TableDefinitions { 1448 sizeTds[i] = &tabletmanagerdatapb.TableDefinition{ 1449 Name: td.Name, 1450 Type: td.Type, 1451 RowCount: td.RowCount, 1452 DataLength: td.DataLength, 1453 } 1454 } 1455 1456 sd.TableDefinitions = sizeTds 1457 } 1458 1459 return &vtctldatapb.GetSchemaResponse{ 1460 Schema: sd, 1461 }, nil 1462 } 1463 1464 // GetShard is part of the vtctlservicepb.VtctldServer interface. 1465 func (s *VtctldServer) GetShard(ctx context.Context, req *vtctldatapb.GetShardRequest) (resp *vtctldatapb.GetShardResponse, err error) { 1466 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetShard") 1467 defer span.Finish() 1468 1469 defer panicHandler(&err) 1470 1471 span.Annotate("keyspace", req.Keyspace) 1472 span.Annotate("shard", req.ShardName) 1473 1474 shard, err := s.ts.GetShard(ctx, req.Keyspace, req.ShardName) 1475 if err != nil { 1476 return nil, err 1477 } 1478 1479 return &vtctldatapb.GetShardResponse{ 1480 Shard: &vtctldatapb.Shard{ 1481 Keyspace: req.Keyspace, 1482 Name: req.ShardName, 1483 Shard: shard.Shard, 1484 }, 1485 }, nil 1486 } 1487 1488 // GetSrvKeyspaceNames is part of the vtctlservicepb.VtctldServer interface. 1489 func (s *VtctldServer) GetSrvKeyspaceNames(ctx context.Context, req *vtctldatapb.GetSrvKeyspaceNamesRequest) (resp *vtctldatapb.GetSrvKeyspaceNamesResponse, err error) { 1490 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSrvKeyspaceNames") 1491 defer span.Finish() 1492 1493 defer panicHandler(&err) 1494 1495 cells := req.Cells 1496 if len(cells) == 0 { 1497 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 1498 defer cancel() 1499 1500 cells, err = s.ts.GetCellInfoNames(ctx) 1501 if err != nil { 1502 return nil, err 1503 } 1504 } 1505 1506 namesByCell := make(map[string]*vtctldatapb.GetSrvKeyspaceNamesResponse_NameList, len(cells)) 1507 1508 // Contact each cell sequentially, each cell is bounded by *topo.RemoteOperationTimeout. 1509 // Total runtime is O(len(cells) * topo.RemoteOperationTimeout). 1510 for _, cell := range cells { 1511 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 1512 names, err2 := s.ts.GetSrvKeyspaceNames(ctx, cell) 1513 if err2 != nil { 1514 cancel() 1515 err = err2 1516 return nil, err 1517 } 1518 1519 cancel() 1520 namesByCell[cell] = &vtctldatapb.GetSrvKeyspaceNamesResponse_NameList{Names: names} 1521 } 1522 1523 return &vtctldatapb.GetSrvKeyspaceNamesResponse{ 1524 Names: namesByCell, 1525 }, nil 1526 } 1527 1528 // GetSrvKeyspaces is part of the vtctlservicepb.VtctldServer interface. 1529 func (s *VtctldServer) GetSrvKeyspaces(ctx context.Context, req *vtctldatapb.GetSrvKeyspacesRequest) (resp *vtctldatapb.GetSrvKeyspacesResponse, err error) { 1530 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSrvKeyspaces") 1531 defer span.Finish() 1532 1533 defer panicHandler(&err) 1534 1535 cells := req.Cells 1536 1537 if len(cells) == 0 { 1538 cells, err = s.ts.GetCellInfoNames(ctx) 1539 if err != nil { 1540 return nil, err 1541 } 1542 } 1543 1544 span.Annotate("cells", strings.Join(cells, ",")) 1545 1546 srvKeyspaces := make(map[string]*topodatapb.SrvKeyspace, len(cells)) 1547 1548 for _, cell := range cells { 1549 var srvKeyspace *topodatapb.SrvKeyspace 1550 srvKeyspace, err = s.ts.GetSrvKeyspace(ctx, cell, req.Keyspace) 1551 1552 if err != nil { 1553 if !topo.IsErrType(err, topo.NoNode) { 1554 return nil, err 1555 } 1556 1557 log.Warningf("no srvkeyspace for keyspace %s in cell %s", req.Keyspace, cell) 1558 1559 srvKeyspace = nil 1560 } 1561 1562 srvKeyspaces[cell] = srvKeyspace 1563 } 1564 1565 return &vtctldatapb.GetSrvKeyspacesResponse{ 1566 SrvKeyspaces: srvKeyspaces, 1567 }, nil 1568 } 1569 1570 // UpdateThrottlerConfig updates throttler config for all cells 1571 func (s *VtctldServer) UpdateThrottlerConfig(ctx context.Context, req *vtctldatapb.UpdateThrottlerConfigRequest) (resp *vtctldatapb.UpdateThrottlerConfigResponse, err error) { 1572 span, ctx := trace.NewSpan(ctx, "VtctldServer.UpdateThrottlerConfig") 1573 defer span.Finish() 1574 1575 defer panicHandler(&err) 1576 1577 if req.Enable && req.Disable { 1578 return nil, fmt.Errorf("--enable and --disable are mutually exclusive") 1579 } 1580 if req.CheckAsCheckSelf && req.CheckAsCheckShard { 1581 return nil, fmt.Errorf("--check-as-check-self and --check-as-check-shard are mutually exclusive") 1582 } 1583 1584 update := func(throttlerConfig *topodatapb.ThrottlerConfig) *topodatapb.ThrottlerConfig { 1585 if throttlerConfig == nil { 1586 throttlerConfig = &topodatapb.ThrottlerConfig{} 1587 } 1588 if req.CustomQuerySet { 1589 // custom query provided 1590 throttlerConfig.CustomQuery = req.CustomQuery 1591 throttlerConfig.Threshold = req.Threshold // allowed to be zero/negative because who knows what kind of custom query this is 1592 } else { 1593 // no custom query, throttler works by querying replication lag. We only allow positive values 1594 if req.Threshold > 0 { 1595 throttlerConfig.Threshold = req.Threshold 1596 } 1597 } 1598 if req.Enable { 1599 throttlerConfig.Enabled = true 1600 } 1601 if req.Disable { 1602 throttlerConfig.Enabled = false 1603 } 1604 if req.CheckAsCheckSelf { 1605 throttlerConfig.CheckAsCheckSelf = true 1606 } 1607 if req.CheckAsCheckShard { 1608 throttlerConfig.CheckAsCheckSelf = false 1609 } 1610 return throttlerConfig 1611 } 1612 1613 ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, "UpdateThrottlerConfig") 1614 if lockErr != nil { 1615 return nil, lockErr 1616 } 1617 defer unlock(&err) 1618 1619 ki, err := s.ts.GetKeyspace(ctx, req.Keyspace) 1620 if err != nil { 1621 return nil, err 1622 } 1623 1624 ki.ThrottlerConfig = update(ki.ThrottlerConfig) 1625 1626 err = s.ts.UpdateKeyspace(ctx, ki) 1627 if err != nil { 1628 return nil, err 1629 } 1630 1631 _, err = s.ts.UpdateSrvKeyspaceThrottlerConfig(ctx, req.Keyspace, []string{}, update) 1632 1633 return &vtctldatapb.UpdateThrottlerConfigResponse{}, err 1634 } 1635 1636 // GetSrvVSchema is part of the vtctlservicepb.VtctldServer interface. 1637 func (s *VtctldServer) GetSrvVSchema(ctx context.Context, req *vtctldatapb.GetSrvVSchemaRequest) (resp *vtctldatapb.GetSrvVSchemaResponse, err error) { 1638 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSrvVSchema") 1639 defer span.Finish() 1640 1641 defer panicHandler(&err) 1642 1643 span.Annotate("cell", req.Cell) 1644 1645 vschema, err := s.ts.GetSrvVSchema(ctx, req.Cell) 1646 if err != nil { 1647 return nil, err 1648 } 1649 1650 return &vtctldatapb.GetSrvVSchemaResponse{ 1651 SrvVSchema: vschema, 1652 }, nil 1653 } 1654 1655 // GetSrvVSchemas is part of the vtctlservicepb.VtctldServer interface. 1656 func (s *VtctldServer) GetSrvVSchemas(ctx context.Context, req *vtctldatapb.GetSrvVSchemasRequest) (resp *vtctldatapb.GetSrvVSchemasResponse, err error) { 1657 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSrvVSchemas") 1658 defer span.Finish() 1659 1660 defer panicHandler(&err) 1661 1662 allCells, err := s.ts.GetCellInfoNames(ctx) 1663 if err != nil { 1664 return nil, err 1665 } 1666 1667 cells := allCells 1668 1669 // Omit any cell names in the request that don't map to existing cells 1670 if len(req.Cells) > 0 { 1671 s1 := sets.New[string](allCells...) 1672 s2 := sets.New[string](req.Cells...) 1673 1674 cells = sets.List(s1.Intersection(s2)) 1675 } 1676 1677 span.Annotate("cells", strings.Join(cells, ",")) 1678 svs := make(map[string]*vschemapb.SrvVSchema, len(cells)) 1679 1680 for _, cell := range cells { 1681 var sv *vschemapb.SrvVSchema 1682 sv, err = s.ts.GetSrvVSchema(ctx, cell) 1683 1684 if err != nil { 1685 if !topo.IsErrType(err, topo.NoNode) { 1686 return nil, err 1687 } 1688 1689 log.Warningf("no SrvVSchema for cell %s", cell) 1690 sv = nil 1691 } 1692 1693 svs[cell] = sv 1694 } 1695 1696 return &vtctldatapb.GetSrvVSchemasResponse{ 1697 SrvVSchemas: svs, 1698 }, nil 1699 } 1700 1701 // GetTablet is part of the vtctlservicepb.VtctldServer interface. 1702 func (s *VtctldServer) GetTablet(ctx context.Context, req *vtctldatapb.GetTabletRequest) (resp *vtctldatapb.GetTabletResponse, err error) { 1703 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetTablet") 1704 defer span.Finish() 1705 1706 defer panicHandler(&err) 1707 1708 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 1709 1710 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 1711 if err != nil { 1712 return nil, err 1713 } 1714 1715 return &vtctldatapb.GetTabletResponse{ 1716 Tablet: ti.Tablet, 1717 }, nil 1718 } 1719 1720 // GetTablets is part of the vtctlservicepb.VtctldServer interface. 1721 func (s *VtctldServer) GetTablets(ctx context.Context, req *vtctldatapb.GetTabletsRequest) (resp *vtctldatapb.GetTabletsResponse, err error) { 1722 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetTablets") 1723 defer span.Finish() 1724 1725 defer panicHandler(&err) 1726 1727 span.Annotate("cells", strings.Join(req.Cells, ",")) 1728 if req.TabletType != topodatapb.TabletType_UNKNOWN { 1729 span.Annotate("tablet_type", topodatapb.TabletType_name[int32(req.TabletType)]) 1730 } 1731 span.Annotate("strict", req.Strict) 1732 1733 // It is possible that an old primary has not yet updated its type in the 1734 // topo. In that case, report its type as UNKNOWN. It used to be PRIMARY but 1735 // is no longer the serving primary. 1736 adjustTypeForStalePrimary := func(ti *topo.TabletInfo, mtst time.Time) { 1737 if ti.Type == topodatapb.TabletType_PRIMARY && ti.GetPrimaryTermStartTime().Before(mtst) { 1738 ti.Tablet.Type = topodatapb.TabletType_UNKNOWN 1739 } 1740 } 1741 1742 // Create a context for our per-cell RPCs, with a timeout upper-bounded at 1743 // the RemoteOperationTimeout. 1744 // 1745 // Per-cell goroutines may also cancel this context if they fail and the 1746 // request specified Strict=true to allow us to fail faster. 1747 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 1748 defer cancel() 1749 1750 var tabletMap map[string]*topo.TabletInfo 1751 1752 switch { 1753 case len(req.TabletAliases) > 0: 1754 span.Annotate("tablet_aliases", strings.Join(topoproto.TabletAliasList(req.TabletAliases).ToStringSlice(), ",")) 1755 1756 tabletMap, err = s.ts.GetTabletMap(ctx, req.TabletAliases) 1757 if err != nil { 1758 err = fmt.Errorf("GetTabletMap(%v) failed: %w", req.TabletAliases, err) 1759 } 1760 case req.Keyspace != "" && req.Shard != "": 1761 span.Annotate("keyspace", req.Keyspace) 1762 span.Annotate("shard", req.Shard) 1763 1764 tabletMap, err = s.ts.GetTabletMapForShard(ctx, req.Keyspace, req.Shard) 1765 if err != nil { 1766 err = fmt.Errorf("GetTabletMapForShard(%s, %s) failed: %w", req.Keyspace, req.Shard, err) 1767 } 1768 default: 1769 // goto the req.Cells branch 1770 tabletMap = nil 1771 } 1772 1773 if err != nil { 1774 switch { 1775 case topo.IsErrType(err, topo.PartialResult): 1776 if req.Strict { 1777 return nil, err 1778 } 1779 1780 log.Warningf("GetTablets encountered non-fatal error %s; continuing because Strict=false", err) 1781 default: 1782 return nil, err 1783 } 1784 } 1785 1786 if tabletMap != nil { 1787 var truePrimaryTimestamp time.Time 1788 for _, ti := range tabletMap { 1789 if ti.Type == topodatapb.TabletType_PRIMARY { 1790 primaryTimestamp := ti.GetPrimaryTermStartTime() 1791 if primaryTimestamp.After(truePrimaryTimestamp) { 1792 truePrimaryTimestamp = primaryTimestamp 1793 } 1794 } 1795 } 1796 1797 tablets := make([]*topodatapb.Tablet, 0, len(tabletMap)) 1798 for _, ti := range tabletMap { 1799 adjustTypeForStalePrimary(ti, truePrimaryTimestamp) 1800 tablets = append(tablets, ti.Tablet) 1801 } 1802 1803 return &vtctldatapb.GetTabletsResponse{Tablets: tablets}, nil 1804 } 1805 1806 cells := req.Cells 1807 if len(cells) == 0 { 1808 var c []string 1809 c, err = s.ts.GetKnownCells(ctx) 1810 if err != nil { 1811 return nil, err 1812 } 1813 1814 cells = c 1815 } 1816 1817 var ( 1818 m sync.Mutex 1819 wg sync.WaitGroup 1820 rec concurrency.AllErrorRecorder 1821 allTablets []*topo.TabletInfo 1822 ) 1823 1824 for _, cell := range cells { 1825 wg.Add(1) 1826 1827 go func(cell string) { 1828 defer wg.Done() 1829 1830 tablets, err := s.ts.GetTabletsByCell(ctx, cell) 1831 if err != nil { 1832 if req.Strict { 1833 log.Infof("GetTablets got an error from cell %s: %s. Running in strict mode, so canceling other cell RPCs", cell, err) 1834 cancel() 1835 } 1836 rec.RecordError(fmt.Errorf("GetTabletsByCell(%s) failed: %w", cell, err)) 1837 return 1838 } 1839 1840 m.Lock() 1841 defer m.Unlock() 1842 allTablets = append(allTablets, tablets...) 1843 }(cell) 1844 } 1845 1846 wg.Wait() 1847 1848 if rec.HasErrors() { 1849 if req.Strict || len(rec.Errors) == len(cells) { 1850 err = rec.Error() 1851 return nil, err 1852 } 1853 } 1854 1855 // Collect true primary term start times, and optionally filter out any 1856 // tablets by keyspace according to the request. 1857 PrimaryTermStartTimes := map[string]time.Time{} 1858 filteredTablets := make([]*topo.TabletInfo, 0, len(allTablets)) 1859 1860 for _, tablet := range allTablets { 1861 if req.Keyspace != "" && tablet.Keyspace != req.Keyspace { 1862 continue 1863 } 1864 if req.TabletType != 0 && tablet.Type != req.TabletType { 1865 continue 1866 } 1867 1868 key := tablet.Keyspace + "." + tablet.Shard 1869 if v, ok := PrimaryTermStartTimes[key]; ok { 1870 if tablet.GetPrimaryTermStartTime().After(v) { 1871 PrimaryTermStartTimes[key] = tablet.GetPrimaryTermStartTime() 1872 } 1873 } else { 1874 PrimaryTermStartTimes[key] = tablet.GetPrimaryTermStartTime() 1875 } 1876 1877 filteredTablets = append(filteredTablets, tablet) 1878 } 1879 1880 adjustedTablets := make([]*topodatapb.Tablet, len(filteredTablets)) 1881 1882 // collect the tablets with adjusted primary term start times. they've 1883 // already been filtered by the above loop, so no keyspace filtering 1884 // here. 1885 for i, ti := range filteredTablets { 1886 key := ti.Keyspace + "." + ti.Shard 1887 adjustTypeForStalePrimary(ti, PrimaryTermStartTimes[key]) 1888 1889 adjustedTablets[i] = ti.Tablet 1890 } 1891 1892 return &vtctldatapb.GetTabletsResponse{ 1893 Tablets: adjustedTablets, 1894 }, nil 1895 } 1896 1897 // GetTopologyPath is part of the vtctlservicepb.VtctldServer interface. 1898 // It returns the cell located at the provided path in the topology server. 1899 func (s *VtctldServer) GetTopologyPath(ctx context.Context, req *vtctldatapb.GetTopologyPathRequest) (*vtctldatapb.GetTopologyPathResponse, error) { 1900 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetTopology") 1901 defer span.Finish() 1902 1903 // handle toplevel display: global, then one line per cell. 1904 if req.Path == "/" { 1905 cells, err := s.ts.GetKnownCells(ctx) 1906 if err != nil { 1907 return nil, err 1908 } 1909 resp := vtctldatapb.GetTopologyPathResponse{ 1910 Cell: &vtctldatapb.TopologyCell{ 1911 Path: req.Path, 1912 // the toplevel display has no name, just children 1913 Children: append([]string{topo.GlobalCell}, cells...), 1914 }, 1915 } 1916 return &resp, nil 1917 } 1918 1919 // otherwise, delegate to getTopologyCell to parse the path and return the cell there 1920 cell, err := s.getTopologyCell(ctx, req.Path) 1921 if err != nil { 1922 return nil, err 1923 } 1924 1925 return &vtctldatapb.GetTopologyPathResponse{ 1926 Cell: cell, 1927 }, nil 1928 } 1929 1930 // GetVersion returns the version of a tablet from its debug vars 1931 func (s *VtctldServer) GetVersion(ctx context.Context, req *vtctldatapb.GetVersionRequest) (resp *vtctldatapb.GetVersionResponse, err error) { 1932 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetVersion") 1933 defer span.Finish() 1934 1935 defer panicHandler(&err) 1936 1937 tabletAlias := req.TabletAlias 1938 tablet, err := s.ts.GetTablet(ctx, tabletAlias) 1939 if err != nil { 1940 return nil, err 1941 } 1942 1943 version, err := GetVersionFunc()(tablet.Addr()) 1944 if err != nil { 1945 return nil, err 1946 } 1947 log.Infof("Tablet %v is running version '%v'", topoproto.TabletAliasString(tabletAlias), version) 1948 return &vtctldatapb.GetVersionResponse{Version: version}, err 1949 } 1950 1951 // GetVSchema is part of the vtctlservicepb.VtctldServer interface. 1952 func (s *VtctldServer) GetVSchema(ctx context.Context, req *vtctldatapb.GetVSchemaRequest) (resp *vtctldatapb.GetVSchemaResponse, err error) { 1953 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetVSchema") 1954 defer span.Finish() 1955 1956 defer panicHandler(&err) 1957 1958 span.Annotate("keyspace", req.Keyspace) 1959 1960 vschema, err := s.ts.GetVSchema(ctx, req.Keyspace) 1961 if err != nil { 1962 return nil, err 1963 } 1964 1965 return &vtctldatapb.GetVSchemaResponse{ 1966 VSchema: vschema, 1967 }, nil 1968 } 1969 1970 // GetWorkflows is part of the vtctlservicepb.VtctldServer interface. 1971 func (s *VtctldServer) GetWorkflows(ctx context.Context, req *vtctldatapb.GetWorkflowsRequest) (resp *vtctldatapb.GetWorkflowsResponse, err error) { 1972 span, ctx := trace.NewSpan(ctx, "VtctldServer.GetWorkflows") 1973 defer span.Finish() 1974 1975 defer panicHandler(&err) 1976 1977 span.Annotate("keyspace", req.Keyspace) 1978 span.Annotate("active_only", req.ActiveOnly) 1979 1980 resp, err = s.ws.GetWorkflows(ctx, req) 1981 return resp, err 1982 } 1983 1984 // InitShardPrimary is part of the vtctlservicepb.VtctldServer interface. 1985 func (s *VtctldServer) InitShardPrimary(ctx context.Context, req *vtctldatapb.InitShardPrimaryRequest) (resp *vtctldatapb.InitShardPrimaryResponse, err error) { 1986 span, ctx := trace.NewSpan(ctx, "VtctldServer.InitShardPrimary") 1987 defer span.Finish() 1988 1989 defer panicHandler(&err) 1990 1991 if req.Keyspace == "" { 1992 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "keyspace field is required") 1993 return nil, err 1994 } 1995 1996 if req.Shard == "" { 1997 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "shard field is required") 1998 return nil, err 1999 } 2000 2001 waitReplicasTimeout, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout) 2002 if err != nil { 2003 return nil, err 2004 } else if !ok { 2005 waitReplicasTimeout = time.Second * 30 2006 } 2007 2008 span.Annotate("keyspace", req.Keyspace) 2009 span.Annotate("shard", req.Shard) 2010 span.Annotate("wait_replicas_timeout_sec", waitReplicasTimeout.Seconds()) 2011 span.Annotate("force", req.Force) 2012 2013 ctx, unlock, err := s.ts.LockShard(ctx, req.Keyspace, req.Shard, fmt.Sprintf("InitShardPrimary(%v)", topoproto.TabletAliasString(req.PrimaryElectTabletAlias))) 2014 if err != nil { 2015 return nil, err 2016 } 2017 defer unlock(&err) 2018 2019 m := sync.RWMutex{} 2020 ev := &events.Reparent{} 2021 logstream := []*logutilpb.Event{} 2022 2023 resp = &vtctldatapb.InitShardPrimaryResponse{} 2024 err = s.InitShardPrimaryLocked(ctx, ev, req, waitReplicasTimeout, s.tmc, logutil.NewCallbackLogger(func(e *logutilpb.Event) { 2025 m.Lock() 2026 defer m.Unlock() 2027 2028 logstream = append(logstream, e) 2029 })) 2030 if err != nil { 2031 event.DispatchUpdate(ev, "failed InitShardPrimary: "+err.Error()) 2032 } else { 2033 event.DispatchUpdate(ev, "finished InitShardPrimary") 2034 } 2035 2036 m.RLock() 2037 defer m.RUnlock() 2038 2039 resp.Events = make([]*logutilpb.Event, len(logstream)) 2040 copy(resp.Events, logstream) 2041 2042 return resp, err 2043 } 2044 2045 // InitShardPrimaryLocked is the main work of doing an InitShardPrimary. It 2046 // should only called by callers that have already locked the shard in the topo. 2047 // It is only public so that it can be used in wrangler and legacy vtctl server. 2048 func (s *VtctldServer) InitShardPrimaryLocked( 2049 ctx context.Context, 2050 ev *events.Reparent, 2051 req *vtctldatapb.InitShardPrimaryRequest, 2052 waitReplicasTimeout time.Duration, 2053 tmc tmclient.TabletManagerClient, 2054 logger logutil.Logger, 2055 ) error { 2056 // (TODO:@amason) The code below this point is a verbatim copy of 2057 // initShardMasterLocked in package wrangler, modulo the following: 2058 // - s/keyspace/req.Keyspace 2059 // - s/shard/req.Shard 2060 // - s/masterElectTabletAlias/req.PrimaryElectTabletAlias 2061 // - s/wr.logger/logger 2062 // - s/wr.tmc/tmc 2063 // - s/wr.ts/s.ts 2064 // 2065 // It is also sufficiently complex and critical code that I feel it's unwise 2066 // to port and refactor in one change; so, this comment serves both as an 2067 // acknowledgement of that, as well as a TODO marker for us to revisit this. 2068 shardInfo, err := s.ts.GetShard(ctx, req.Keyspace, req.Shard) 2069 if err != nil { 2070 return err 2071 } 2072 ev.ShardInfo = *shardInfo 2073 2074 durabilityName, err := s.ts.GetKeyspaceDurability(ctx, req.Keyspace) 2075 if err != nil { 2076 return err 2077 } 2078 log.Infof("Getting a new durability policy for %v", durabilityName) 2079 durability, err := reparentutil.GetDurabilityPolicy(durabilityName) 2080 if err != nil { 2081 return err 2082 } 2083 2084 event.DispatchUpdate(ev, "reading tablet map") 2085 tabletMap, err := s.ts.GetTabletMapForShard(ctx, req.Keyspace, req.Shard) 2086 if err != nil { 2087 return err 2088 } 2089 2090 // Check the primary elect is in tabletMap. 2091 primaryElectTabletAliasStr := topoproto.TabletAliasString(req.PrimaryElectTabletAlias) 2092 primaryElectTabletInfo, ok := tabletMap[primaryElectTabletAliasStr] 2093 if !ok { 2094 return fmt.Errorf("primary-elect tablet %v is not in the shard", topoproto.TabletAliasString(req.PrimaryElectTabletAlias)) 2095 } 2096 ev.NewPrimary = proto.Clone(primaryElectTabletInfo.Tablet).(*topodatapb.Tablet) 2097 2098 // Check the primary is the only primary is the shard, or -force was used. 2099 _, primaryTabletMap := topotools.SortedTabletMap(tabletMap) 2100 if !topoproto.TabletAliasEqual(shardInfo.PrimaryAlias, req.PrimaryElectTabletAlias) { 2101 if !req.Force { 2102 return fmt.Errorf("primary-elect tablet %v is not the shard primary, use -force to proceed anyway", topoproto.TabletAliasString(req.PrimaryElectTabletAlias)) 2103 } 2104 2105 logger.Warningf("primary-elect tablet %v is not the shard primary, proceeding anyway as -force was used", topoproto.TabletAliasString(req.PrimaryElectTabletAlias)) 2106 } 2107 if _, ok := primaryTabletMap[primaryElectTabletAliasStr]; !ok { 2108 if !req.Force { 2109 return fmt.Errorf("primary-elect tablet %v is not a primary in the shard, use -force to proceed anyway", topoproto.TabletAliasString(req.PrimaryElectTabletAlias)) 2110 } 2111 logger.Warningf("primary-elect tablet %v is not a primary in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(req.PrimaryElectTabletAlias)) 2112 } 2113 haveOtherPrimary := false 2114 for alias := range primaryTabletMap { 2115 if primaryElectTabletAliasStr != alias { 2116 haveOtherPrimary = true 2117 } 2118 } 2119 if haveOtherPrimary { 2120 if !req.Force { 2121 return fmt.Errorf("primary-elect tablet %v is not the only primary in the shard, use -force to proceed anyway", topoproto.TabletAliasString(req.PrimaryElectTabletAlias)) 2122 } 2123 logger.Warningf("primary-elect tablet %v is not the only primary in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(req.PrimaryElectTabletAlias)) 2124 } 2125 2126 // First phase: reset replication on all tablets. If anyone fails, 2127 // we stop. It is probably because it is unreachable, and may leave 2128 // an unstable database process in the mix, with a database daemon 2129 // at a wrong replication spot. 2130 2131 // Create a context for the following RPCs that respects waitReplicasTimeout 2132 resetCtx, resetCancel := context.WithTimeout(ctx, waitReplicasTimeout) 2133 defer resetCancel() 2134 2135 event.DispatchUpdate(ev, "resetting replication on all tablets") 2136 wg := sync.WaitGroup{} 2137 rec := concurrency.AllErrorRecorder{} 2138 for alias, tabletInfo := range tabletMap { 2139 wg.Add(1) 2140 go func(alias string, tabletInfo *topo.TabletInfo) { 2141 defer wg.Done() 2142 logger.Infof("resetting replication on tablet %v", alias) 2143 if err := tmc.ResetReplication(resetCtx, tabletInfo.Tablet); err != nil { 2144 rec.RecordError(fmt.Errorf("tablet %v ResetReplication failed (either fix it, or Scrap it): %v", alias, err)) 2145 } 2146 }(alias, tabletInfo) 2147 } 2148 wg.Wait() 2149 if err := rec.Error(); err != nil { 2150 // if any of the replicas failed 2151 return err 2152 } 2153 2154 // Check we still have the topology lock. 2155 if err := topo.CheckShardLocked(ctx, req.Keyspace, req.Shard); err != nil { 2156 return fmt.Errorf("lost topology lock, aborting: %v", err) 2157 } 2158 2159 // Tell the new primary to break its replicas, return its replication 2160 // position 2161 logger.Infof("initializing primary on %v", topoproto.TabletAliasString(req.PrimaryElectTabletAlias)) 2162 event.DispatchUpdate(ev, "initializing primary") 2163 rp, err := tmc.InitPrimary(ctx, primaryElectTabletInfo.Tablet, reparentutil.SemiSyncAckers(durability, primaryElectTabletInfo.Tablet) > 0) 2164 if err != nil { 2165 return err 2166 } 2167 2168 // Check we stil have the topology lock. 2169 if err := topo.CheckShardLocked(ctx, req.Keyspace, req.Shard); err != nil { 2170 return fmt.Errorf("lost topology lock, aborting: %v", err) 2171 } 2172 2173 // Create a cancelable context for the following RPCs. 2174 // If error conditions happen, we can cancel all outgoing RPCs. 2175 replCtx, replCancel := context.WithTimeout(ctx, waitReplicasTimeout) 2176 defer replCancel() 2177 2178 // Now tell the new primary to insert the reparent_journal row, 2179 // and tell everybody else to become a replica of the new primary, 2180 // and wait for the row in the reparent_journal table. 2181 // We start all these in parallel, to handle the semi-sync 2182 // case: for the primary to be able to commit its row in the 2183 // reparent_journal table, it needs connected replicas. 2184 event.DispatchUpdate(ev, "reparenting all tablets") 2185 now := time.Now().UnixNano() 2186 wgPrimary := sync.WaitGroup{} 2187 wgReplicas := sync.WaitGroup{} 2188 var primaryErr error 2189 for alias, tabletInfo := range tabletMap { 2190 if alias == primaryElectTabletAliasStr { 2191 wgPrimary.Add(1) 2192 go func(alias string, tabletInfo *topo.TabletInfo) { 2193 defer wgPrimary.Done() 2194 logger.Infof("populating reparent journal on new primary %v", alias) 2195 primaryErr = tmc.PopulateReparentJournal(replCtx, tabletInfo.Tablet, now, 2196 initShardPrimaryOperation, 2197 req.PrimaryElectTabletAlias, rp) 2198 }(alias, tabletInfo) 2199 } else { 2200 wgReplicas.Add(1) 2201 go func(alias string, tabletInfo *topo.TabletInfo) { 2202 defer wgReplicas.Done() 2203 logger.Infof("initializing replica %v", alias) 2204 if err := tmc.InitReplica(replCtx, tabletInfo.Tablet, req.PrimaryElectTabletAlias, rp, now, reparentutil.IsReplicaSemiSync(durability, primaryElectTabletInfo.Tablet, tabletInfo.Tablet)); err != nil { 2205 rec.RecordError(fmt.Errorf("tablet %v InitReplica failed: %v", alias, err)) 2206 } 2207 }(alias, tabletInfo) 2208 } 2209 } 2210 2211 // After the primary is done, we can update the shard record 2212 // (note with semi-sync, it also means at least one replica is done). 2213 wgPrimary.Wait() 2214 if primaryErr != nil { 2215 // The primary failed, there is no way the 2216 // replicas will work. So we cancel them all. 2217 logger.Warningf("primary failed to PopulateReparentJournal, canceling replicas") 2218 replCancel() 2219 wgReplicas.Wait() 2220 return fmt.Errorf("failed to PopulateReparentJournal on primary: %v", primaryErr) 2221 } 2222 if !topoproto.TabletAliasEqual(shardInfo.PrimaryAlias, req.PrimaryElectTabletAlias) { 2223 if _, err := s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error { 2224 si.PrimaryAlias = req.PrimaryElectTabletAlias 2225 return nil 2226 }); err != nil { 2227 wgReplicas.Wait() 2228 return fmt.Errorf("failed to update shard primary record: %v", err) 2229 } 2230 } 2231 2232 // Wait for the replicas to complete. If some of them fail, we 2233 // don't want to rebuild the shard serving graph (the failure 2234 // will most likely be a timeout, and our context will be 2235 // expired, so the rebuild will fail anyway) 2236 wgReplicas.Wait() 2237 if err := rec.Error(); err != nil { 2238 return err 2239 } 2240 2241 // Create database if necessary on the primary. replicas will get it too through 2242 // replication. Since the user called InitShardPrimary, they've told us to 2243 // assume that whatever data is on all the replicas is what they intended. 2244 // If the database doesn't exist, it means the user intends for these tablets 2245 // to begin serving with no data (i.e. first time initialization). 2246 createDB := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", sqlescape.EscapeID(topoproto.TabletDbName(primaryElectTabletInfo.Tablet))) 2247 if _, err := tmc.ExecuteFetchAsDba(ctx, primaryElectTabletInfo.Tablet, false, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{ 2248 Query: []byte(createDB), 2249 MaxRows: 1, 2250 ReloadSchema: true, 2251 }); err != nil { 2252 return fmt.Errorf("failed to create database: %v", err) 2253 } 2254 // Refresh the state to force the tabletserver to reconnect after db has been created. 2255 if err := tmc.RefreshState(ctx, primaryElectTabletInfo.Tablet); err != nil { 2256 log.Warningf("RefreshState failed: %v", err) 2257 } 2258 2259 return nil 2260 } 2261 2262 // PingTablet is part of the vtctlservicepb.VtctldServer interface. 2263 func (s *VtctldServer) PingTablet(ctx context.Context, req *vtctldatapb.PingTabletRequest) (resp *vtctldatapb.PingTabletResponse, err error) { 2264 span, ctx := trace.NewSpan(ctx, "VtctldServer.PingTablet") 2265 defer span.Finish() 2266 2267 defer panicHandler(&err) 2268 2269 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 2270 2271 tablet, err := s.ts.GetTablet(ctx, req.TabletAlias) 2272 if err != nil { 2273 return nil, err 2274 } 2275 2276 err = s.tmc.Ping(ctx, tablet.Tablet) 2277 if err != nil { 2278 return nil, err 2279 } 2280 2281 return &vtctldatapb.PingTabletResponse{}, nil 2282 } 2283 2284 // PlannedReparentShard is part of the vtctldservicepb.VtctldServer interface. 2285 func (s *VtctldServer) PlannedReparentShard(ctx context.Context, req *vtctldatapb.PlannedReparentShardRequest) (resp *vtctldatapb.PlannedReparentShardResponse, err error) { 2286 span, ctx := trace.NewSpan(ctx, "VtctldServer.PlannedReparentShard") 2287 defer span.Finish() 2288 2289 defer panicHandler(&err) 2290 2291 waitReplicasTimeout, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout) 2292 if err != nil { 2293 return nil, err 2294 } else if !ok { 2295 waitReplicasTimeout = time.Second * 30 2296 } 2297 2298 span.Annotate("keyspace", req.Keyspace) 2299 span.Annotate("shard", req.Shard) 2300 span.Annotate("wait_replicas_timeout_sec", waitReplicasTimeout.Seconds()) 2301 2302 if req.AvoidPrimary != nil { 2303 span.Annotate("avoid_primary_alias", topoproto.TabletAliasString(req.AvoidPrimary)) 2304 } 2305 2306 if req.NewPrimary != nil { 2307 span.Annotate("new_primary_alias", topoproto.TabletAliasString(req.NewPrimary)) 2308 } 2309 2310 m := sync.RWMutex{} 2311 logstream := []*logutilpb.Event{} 2312 logger := logutil.NewCallbackLogger(func(e *logutilpb.Event) { 2313 m.Lock() 2314 defer m.Unlock() 2315 2316 logstream = append(logstream, e) 2317 }) 2318 2319 ev, err := reparentutil.NewPlannedReparenter(s.ts, s.tmc, logger).ReparentShard(ctx, 2320 req.Keyspace, 2321 req.Shard, 2322 reparentutil.PlannedReparentOptions{ 2323 AvoidPrimaryAlias: req.AvoidPrimary, 2324 NewPrimaryAlias: req.NewPrimary, 2325 WaitReplicasTimeout: waitReplicasTimeout, 2326 }, 2327 ) 2328 2329 resp = &vtctldatapb.PlannedReparentShardResponse{ 2330 Keyspace: req.Keyspace, 2331 Shard: req.Shard, 2332 } 2333 2334 if ev != nil { 2335 resp.Keyspace = ev.ShardInfo.Keyspace() 2336 resp.Shard = ev.ShardInfo.ShardName() 2337 2338 if !topoproto.TabletAliasIsZero(ev.NewPrimary.Alias) { 2339 resp.PromotedPrimary = ev.NewPrimary.Alias 2340 } 2341 } 2342 2343 m.RLock() 2344 defer m.RUnlock() 2345 2346 resp.Events = make([]*logutilpb.Event, len(logstream)) 2347 copy(resp.Events, logstream) 2348 2349 return resp, err 2350 } 2351 2352 // RebuildKeyspaceGraph is part of the vtctlservicepb.VtctldServer interface. 2353 func (s *VtctldServer) RebuildKeyspaceGraph(ctx context.Context, req *vtctldatapb.RebuildKeyspaceGraphRequest) (resp *vtctldatapb.RebuildKeyspaceGraphResponse, err error) { 2354 span, ctx := trace.NewSpan(ctx, "VtctldServer.RebuildKeyspaceGraph") 2355 defer span.Finish() 2356 2357 defer panicHandler(&err) 2358 2359 span.Annotate("keyspace", req.Keyspace) 2360 span.Annotate("cells", strings.Join(req.Cells, ",")) 2361 span.Annotate("allow_partial", req.AllowPartial) 2362 2363 if err = topotools.RebuildKeyspace(ctx, logutil.NewCallbackLogger(func(e *logutilpb.Event) {}), s.ts, req.Keyspace, req.Cells, req.AllowPartial); err != nil { 2364 return nil, err 2365 } 2366 2367 return &vtctldatapb.RebuildKeyspaceGraphResponse{}, nil 2368 } 2369 2370 // RebuildVSchemaGraph is part of the vtctlservicepb.VtctldServer interface. 2371 func (s *VtctldServer) RebuildVSchemaGraph(ctx context.Context, req *vtctldatapb.RebuildVSchemaGraphRequest) (resp *vtctldatapb.RebuildVSchemaGraphResponse, err error) { 2372 span, ctx := trace.NewSpan(ctx, "VtctldServer.RebuildVSchemaGraph") 2373 defer span.Finish() 2374 2375 defer panicHandler(&err) 2376 2377 span.Annotate("cells", strings.Join(req.Cells, ",")) 2378 2379 if err = s.ts.RebuildSrvVSchema(ctx, req.Cells); err != nil { 2380 return nil, err 2381 } 2382 2383 return &vtctldatapb.RebuildVSchemaGraphResponse{}, nil 2384 } 2385 2386 // RefreshState is part of the vtctldservicepb.VtctldServer interface. 2387 func (s *VtctldServer) RefreshState(ctx context.Context, req *vtctldatapb.RefreshStateRequest) (resp *vtctldatapb.RefreshStateResponse, err error) { 2388 span, ctx := trace.NewSpan(ctx, "VtctldServer.RefreshState") 2389 defer span.Finish() 2390 2391 defer panicHandler(&err) 2392 2393 if req.TabletAlias == nil { 2394 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "RefreshState requires a tablet alias") 2395 return nil, err 2396 } 2397 2398 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 2399 defer cancel() 2400 2401 tablet, err := s.ts.GetTablet(ctx, req.TabletAlias) 2402 if err != nil { 2403 err = fmt.Errorf("Failed to get tablet %s: %w", topoproto.TabletAliasString(req.TabletAlias), err) 2404 return nil, err 2405 } 2406 2407 if err = s.tmc.RefreshState(ctx, tablet.Tablet); err != nil { 2408 return nil, err 2409 } 2410 2411 return &vtctldatapb.RefreshStateResponse{}, nil 2412 } 2413 2414 // RefreshStateByShard is part of the vtctldservicepb.VtctldServer interface. 2415 func (s *VtctldServer) RefreshStateByShard(ctx context.Context, req *vtctldatapb.RefreshStateByShardRequest) (resp *vtctldatapb.RefreshStateByShardResponse, err error) { 2416 span, ctx := trace.NewSpan(ctx, "VtctldServer.RefreshStateByShard") 2417 defer span.Finish() 2418 2419 defer panicHandler(&err) 2420 2421 if req.Keyspace == "" { 2422 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "RefreshStateByShard requires a keyspace") 2423 return nil, err 2424 } 2425 2426 if req.Shard == "" { 2427 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "RefreshStateByShard requires a shard") 2428 return nil, err 2429 } 2430 2431 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 2432 defer cancel() 2433 2434 si, err := s.ts.GetShard(ctx, req.Keyspace, req.Shard) 2435 if err != nil { 2436 err = fmt.Errorf("Failed to get shard %s/%s/: %w", req.Keyspace, req.Shard, err) 2437 return nil, err 2438 } 2439 2440 isPartial, partialDetails, err := topotools.RefreshTabletsByShard(ctx, s.ts, s.tmc, si, req.Cells, logutil.NewCallbackLogger(func(e *logutilpb.Event) { 2441 switch e.Level { 2442 case logutilpb.Level_WARNING: 2443 log.Warningf(e.Value) 2444 case logutilpb.Level_ERROR: 2445 log.Errorf(e.Value) 2446 default: 2447 log.Infof(e.Value) 2448 } 2449 })) 2450 if err != nil { 2451 return nil, err 2452 } 2453 2454 return &vtctldatapb.RefreshStateByShardResponse{ 2455 IsPartialRefresh: isPartial, 2456 PartialRefreshDetails: partialDetails, 2457 }, nil 2458 } 2459 2460 // ReloadSchema is part of the vtctlservicepb.VtctldServer interface. 2461 func (s *VtctldServer) ReloadSchema(ctx context.Context, req *vtctldatapb.ReloadSchemaRequest) (resp *vtctldatapb.ReloadSchemaResponse, err error) { 2462 span, ctx := trace.NewSpan(ctx, "VtctldServer.ReloadSchema") 2463 defer span.Finish() 2464 2465 defer panicHandler(&err) 2466 2467 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 2468 2469 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 2470 if err != nil { 2471 err = vterrors.Errorf(vtrpc.Code_NOT_FOUND, "GetTablet(%v) failed: %v", req.TabletAlias, err) 2472 return nil, err 2473 } 2474 2475 err = s.tmc.ReloadSchema(ctx, ti.Tablet, "") 2476 if err != nil { 2477 return nil, err 2478 } 2479 2480 return &vtctldatapb.ReloadSchemaResponse{}, nil 2481 } 2482 2483 // ReloadSchemaShard is part of the vtctlservicepb.VtctldServer interface. 2484 func (s *VtctldServer) ReloadSchemaShard(ctx context.Context, req *vtctldatapb.ReloadSchemaShardRequest) (resp *vtctldatapb.ReloadSchemaShardResponse, err error) { 2485 defer panicHandler(&err) 2486 2487 logger, getEvents := eventStreamLogger() 2488 2489 var sema *sync2.Semaphore 2490 if req.Concurrency > 0 { 2491 sema = sync2.NewSemaphore(int(req.Concurrency), 0) 2492 } 2493 2494 s.reloadSchemaShard(ctx, req, sema, logger) 2495 2496 return &vtctldatapb.ReloadSchemaShardResponse{ 2497 Events: getEvents(), 2498 }, nil 2499 } 2500 2501 func (s *VtctldServer) reloadSchemaShard(ctx context.Context, req *vtctldatapb.ReloadSchemaShardRequest, sema *sync2.Semaphore, logger logutil.Logger) { 2502 span, ctx := trace.NewSpan(ctx, "VtctldServer.ReloadSchemaShard") 2503 defer span.Finish() 2504 2505 span.Annotate("keyspace", req.Keyspace) 2506 span.Annotate("shard", req.Shard) 2507 span.Annotate("concurrency", req.Concurrency) 2508 span.Annotate("include_primary", req.IncludePrimary) 2509 span.Annotate("wait_position", req.WaitPosition) 2510 2511 isPartial, ok := schematools.ReloadShard(ctx, s.ts, s.tmc, logger, req.Keyspace, req.Shard, req.WaitPosition, sema, req.IncludePrimary) 2512 if !ok { 2513 return 2514 } 2515 2516 span.Annotate("is_partial_result", isPartial) 2517 } 2518 2519 // ReloadSchemaKeyspace is part of the vtctlservicepb.VtctldServer interface. 2520 func (s *VtctldServer) ReloadSchemaKeyspace(ctx context.Context, req *vtctldatapb.ReloadSchemaKeyspaceRequest) (resp *vtctldatapb.ReloadSchemaKeyspaceResponse, err error) { 2521 span, ctx := trace.NewSpan(ctx, "VtctldServer.ReloadSchemaKeyspace") 2522 defer span.Finish() 2523 2524 defer panicHandler(&err) 2525 2526 span.Annotate("keyspace", req.Keyspace) 2527 span.Annotate("concurrency", req.Concurrency) 2528 span.Annotate("include_primary", req.IncludePrimary) 2529 span.Annotate("wait_position", req.WaitPosition) 2530 2531 shards, err := s.ts.GetShardNames(ctx, req.Keyspace) 2532 if err != nil { 2533 err = vterrors.Errorf(vtrpc.Code_INTERNAL, "GetShardNames(%v) failed: %v", req.Keyspace, err) 2534 return nil, err 2535 } 2536 2537 var ( 2538 wg sync.WaitGroup 2539 sema *sync2.Semaphore 2540 logger, getEvents = eventStreamLogger() 2541 ) 2542 2543 if req.Concurrency > 0 { 2544 sema = sync2.NewSemaphore(int(req.Concurrency), 0) 2545 } 2546 2547 for _, shard := range shards { 2548 wg.Add(1) 2549 go func(shard string) { 2550 defer wg.Done() 2551 s.reloadSchemaShard(ctx, &vtctldatapb.ReloadSchemaShardRequest{ 2552 Keyspace: req.Keyspace, 2553 Shard: shard, 2554 IncludePrimary: req.IncludePrimary, 2555 WaitPosition: req.WaitPosition, 2556 }, sema, logger) 2557 }(shard) 2558 } 2559 2560 wg.Wait() 2561 2562 return &vtctldatapb.ReloadSchemaKeyspaceResponse{ 2563 Events: getEvents(), 2564 }, nil 2565 } 2566 2567 // RemoveBackup is part of the vtctlservicepb.VtctldServer interface. 2568 func (s *VtctldServer) RemoveBackup(ctx context.Context, req *vtctldatapb.RemoveBackupRequest) (resp *vtctldatapb.RemoveBackupResponse, err error) { 2569 span, ctx := trace.NewSpan(ctx, "VtctldServer.RemoveBackup") 2570 defer span.Finish() 2571 2572 defer panicHandler(&err) 2573 2574 bucket := fmt.Sprintf("%v/%v", req.Keyspace, req.Shard) 2575 2576 span.Annotate("keyspace", req.Keyspace) 2577 span.Annotate("shard", req.Shard) 2578 span.Annotate("bucket", bucket) 2579 span.Annotate("backup_name", req.Name) 2580 2581 bs, err := backupstorage.GetBackupStorage() 2582 if err != nil { 2583 return nil, err 2584 } 2585 defer bs.Close() 2586 2587 if err = bs.RemoveBackup(ctx, bucket, req.Name); err != nil { 2588 return nil, err 2589 } 2590 2591 return &vtctldatapb.RemoveBackupResponse{}, nil 2592 } 2593 2594 // RemoveKeyspaceCell is part of the vtctlservicepb.VtctldServer interface. 2595 func (s *VtctldServer) RemoveKeyspaceCell(ctx context.Context, req *vtctldatapb.RemoveKeyspaceCellRequest) (resp *vtctldatapb.RemoveKeyspaceCellResponse, err error) { 2596 span, ctx := trace.NewSpan(ctx, "VtctldServer.RemoveKeyspaceCell") 2597 defer span.Finish() 2598 2599 defer panicHandler(&err) 2600 2601 span.Annotate("keyspace", req.Keyspace) 2602 span.Annotate("cell", req.Cell) 2603 span.Annotate("force", req.Force) 2604 span.Annotate("recursive", req.Recursive) 2605 2606 shards, err := s.ts.GetShardNames(ctx, req.Keyspace) 2607 if err != nil { 2608 return nil, err 2609 } 2610 2611 // Remove all the shards, serially. Stop immediately if any fail. 2612 for _, shard := range shards { 2613 log.Infof("Removing cell %v from shard %v/%v", req.Cell, req.Keyspace, shard) 2614 if err2 := removeShardCell(ctx, s.ts, req.Cell, req.Keyspace, shard, req.Recursive, req.Force); err2 != nil { 2615 err = fmt.Errorf("cannot remove cell %v from shard %v/%v: %w", req.Cell, req.Keyspace, shard, err2) 2616 return nil, err 2617 } 2618 } 2619 2620 // Last, remove the SrvKeyspace object. 2621 log.Infof("Removing cell %v keyspace %v SrvKeyspace object", req.Cell, req.Keyspace) 2622 if err = s.ts.DeleteSrvKeyspace(ctx, req.Cell, req.Keyspace); err != nil { 2623 err = fmt.Errorf("cannot delete SrvKeyspace from cell %v for keyspace %v: %w", req.Cell, req.Keyspace, err) 2624 return nil, err 2625 } 2626 2627 return &vtctldatapb.RemoveKeyspaceCellResponse{}, nil 2628 } 2629 2630 // RemoveShardCell is part of the vtctlservicepb.VtctldServer interface. 2631 func (s *VtctldServer) RemoveShardCell(ctx context.Context, req *vtctldatapb.RemoveShardCellRequest) (resp *vtctldatapb.RemoveShardCellResponse, err error) { 2632 span, ctx := trace.NewSpan(ctx, "VtctldServer.RemoveShardCell") 2633 defer span.Finish() 2634 2635 defer panicHandler(&err) 2636 2637 span.Annotate("keyspace", req.Keyspace) 2638 span.Annotate("shard", req.ShardName) 2639 span.Annotate("cell", req.Cell) 2640 span.Annotate("force", req.Force) 2641 span.Annotate("recursive", req.Recursive) 2642 2643 if err = removeShardCell(ctx, s.ts, req.Cell, req.Keyspace, req.ShardName, req.Recursive, req.Force); err != nil { 2644 return nil, err 2645 } 2646 2647 return &vtctldatapb.RemoveShardCellResponse{}, nil 2648 } 2649 2650 // ReparentTablet is part of the vtctldservicepb.VtctldServer interface. 2651 func (s *VtctldServer) ReparentTablet(ctx context.Context, req *vtctldatapb.ReparentTabletRequest) (resp *vtctldatapb.ReparentTabletResponse, err error) { 2652 span, ctx := trace.NewSpan(ctx, "VtctldServer.ReparentTablet") 2653 defer span.Finish() 2654 2655 defer panicHandler(&err) 2656 2657 if req.Tablet == nil { 2658 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "tablet alias must not be nil") 2659 return nil, err 2660 } 2661 2662 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.Tablet)) 2663 2664 tablet, err := s.ts.GetTablet(ctx, req.Tablet) 2665 if err != nil { 2666 return nil, err 2667 } 2668 2669 shard, err := s.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) 2670 if err != nil { 2671 return nil, err 2672 } 2673 2674 if !shard.HasPrimary() { 2675 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "no primary tablet for shard %v/%v", tablet.Keyspace, tablet.Shard) 2676 return nil, err 2677 } 2678 2679 shardPrimary, err := s.ts.GetTablet(ctx, shard.PrimaryAlias) 2680 if err != nil { 2681 err = fmt.Errorf("cannot lookup primary tablet %v for shard %v/%v: %w", topoproto.TabletAliasString(shard.PrimaryAlias), tablet.Keyspace, tablet.Shard, err) 2682 return nil, err 2683 } 2684 2685 if shardPrimary.Type != topodatapb.TabletType_PRIMARY { 2686 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "TopologyServer has incosistent state for shard primary %v", topoproto.TabletAliasString(shard.PrimaryAlias)) 2687 return nil, err 2688 } 2689 2690 if shardPrimary.Keyspace != tablet.Keyspace || shardPrimary.Shard != tablet.Shard { 2691 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "primary %v and potential replica %v not in same keypace shard (%v/%v)", topoproto.TabletAliasString(shard.PrimaryAlias), topoproto.TabletAliasString(req.Tablet), tablet.Keyspace, tablet.Shard) 2692 return nil, err 2693 } 2694 2695 if topoproto.TabletAliasEqual(req.Tablet, shardPrimary.Alias) { 2696 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "cannot ReparentTablet current shard primary (%v) onto itself", topoproto.TabletAliasString(req.Tablet)) 2697 return nil, err 2698 } 2699 2700 durabilityName, err := s.ts.GetKeyspaceDurability(ctx, tablet.Keyspace) 2701 if err != nil { 2702 return nil, err 2703 } 2704 log.Infof("Getting a new durability policy for %v", durabilityName) 2705 durability, err := reparentutil.GetDurabilityPolicy(durabilityName) 2706 if err != nil { 2707 return nil, err 2708 } 2709 2710 if err = s.tmc.SetReplicationSource(ctx, tablet.Tablet, shard.PrimaryAlias, 0, "", false, reparentutil.IsReplicaSemiSync(durability, shardPrimary.Tablet, tablet.Tablet)); err != nil { 2711 return nil, err 2712 } 2713 2714 return &vtctldatapb.ReparentTabletResponse{ 2715 Keyspace: tablet.Keyspace, 2716 Shard: tablet.Shard, 2717 Primary: shard.PrimaryAlias, 2718 }, nil 2719 } 2720 2721 func (s *VtctldServer) RestoreFromBackup(req *vtctldatapb.RestoreFromBackupRequest, stream vtctlservicepb.Vtctld_RestoreFromBackupServer) (err error) { 2722 span, ctx := trace.NewSpan(stream.Context(), "VtctldServer.RestoreFromBackup") 2723 defer span.Finish() 2724 2725 defer panicHandler(&err) 2726 2727 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 2728 backupTime := protoutil.TimeFromProto(req.BackupTime) 2729 if !backupTime.IsZero() { 2730 span.Annotate("backup_timestamp", backupTime.Format(mysqlctl.BackupTimestampFormat)) 2731 } 2732 2733 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 2734 if err != nil { 2735 return err 2736 } 2737 2738 span.Annotate("keyspace", ti.Keyspace) 2739 span.Annotate("shard", ti.Shard) 2740 2741 r := &tabletmanagerdatapb.RestoreFromBackupRequest{ 2742 BackupTime: req.BackupTime, 2743 RestoreToPos: req.RestoreToPos, 2744 DryRun: req.DryRun, 2745 } 2746 logStream, err := s.tmc.RestoreFromBackup(ctx, ti.Tablet, r) 2747 if err != nil { 2748 return err 2749 } 2750 2751 logger := logutil.NewConsoleLogger() 2752 2753 for { 2754 var event *logutilpb.Event 2755 event, err = logStream.Recv() 2756 switch err { 2757 case nil: 2758 logutil.LogEvent(logger, event) 2759 resp := &vtctldatapb.RestoreFromBackupResponse{ 2760 TabletAlias: req.TabletAlias, 2761 Keyspace: ti.Keyspace, 2762 Shard: ti.Shard, 2763 Event: event, 2764 } 2765 if err = stream.Send(resp); err != nil { 2766 logger.Errorf("failed to send stream response %+v: %v", resp, err) 2767 } 2768 case io.EOF: 2769 // Do not do anything when active reparenting is disabled. 2770 if mysqlctl.DisableActiveReparents { 2771 return nil 2772 } 2773 if req.RestoreToPos != "" && !req.DryRun { 2774 // point in time recovery. Do not restore replication 2775 return nil 2776 } 2777 2778 // Otherwise, we find the correct primary tablet and set the 2779 // replication source on the freshly-restored tablet, since the 2780 // shard primary may have changed while it was restoring. 2781 // 2782 // This also affects whether or not we want to send semi-sync ACKs. 2783 var ti *topo.TabletInfo 2784 ti, err = s.ts.GetTablet(ctx, req.TabletAlias) 2785 if err != nil { 2786 return err 2787 } 2788 2789 err = reparentutil.SetReplicationSource(ctx, s.ts, s.tmc, ti.Tablet) 2790 return err 2791 default: 2792 return err 2793 } 2794 } 2795 } 2796 2797 // RunHealthCheck is part of the vtctlservicepb.VtctldServer interface. 2798 func (s *VtctldServer) RunHealthCheck(ctx context.Context, req *vtctldatapb.RunHealthCheckRequest) (resp *vtctldatapb.RunHealthCheckResponse, err error) { 2799 span, ctx := trace.NewSpan(ctx, "VtctldServer.RunHealthCheck") 2800 defer span.Finish() 2801 2802 defer panicHandler(&err) 2803 2804 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 2805 2806 ti, err := s.ts.GetTablet(ctx, req.TabletAlias) 2807 if err != nil { 2808 return nil, err 2809 } 2810 2811 err = s.tmc.RunHealthCheck(ctx, ti.Tablet) 2812 if err != nil { 2813 return nil, err 2814 } 2815 2816 return &vtctldatapb.RunHealthCheckResponse{}, nil 2817 } 2818 2819 // SetKeyspaceDurabilityPolicy is part of the vtctlservicepb.VtctldServer interface. 2820 func (s *VtctldServer) SetKeyspaceDurabilityPolicy(ctx context.Context, req *vtctldatapb.SetKeyspaceDurabilityPolicyRequest) (resp *vtctldatapb.SetKeyspaceDurabilityPolicyResponse, err error) { 2821 span, ctx := trace.NewSpan(ctx, "VtctldServer.SetKeyspaceDurabilityPolicy") 2822 defer span.Finish() 2823 2824 defer panicHandler(&err) 2825 2826 span.Annotate("keyspace", req.Keyspace) 2827 span.Annotate("durability_policy", req.DurabilityPolicy) 2828 2829 ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, "SetKeyspaceDurabilityPolicy") 2830 if lockErr != nil { 2831 err = lockErr 2832 return nil, err 2833 } 2834 2835 defer unlock(&err) 2836 2837 ki, err := s.ts.GetKeyspace(ctx, req.Keyspace) 2838 if err != nil { 2839 return nil, err 2840 } 2841 2842 policyValid := reparentutil.CheckDurabilityPolicyExists(req.DurabilityPolicy) 2843 if !policyValid { 2844 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "durability policy <%v> is not a valid policy. Please register it as a policy first", req.DurabilityPolicy) 2845 return nil, err 2846 } 2847 2848 ki.DurabilityPolicy = req.DurabilityPolicy 2849 2850 err = s.ts.UpdateKeyspace(ctx, ki) 2851 if err != nil { 2852 return nil, err 2853 } 2854 2855 return &vtctldatapb.SetKeyspaceDurabilityPolicyResponse{ 2856 Keyspace: ki.Keyspace, 2857 }, nil 2858 } 2859 2860 // SetKeyspaceServedFrom is part of the vtctlservicepb.VtctldServer interface. 2861 func (s *VtctldServer) SetKeyspaceServedFrom(ctx context.Context, req *vtctldatapb.SetKeyspaceServedFromRequest) (resp *vtctldatapb.SetKeyspaceServedFromResponse, err error) { 2862 span, ctx := trace.NewSpan(ctx, "VtctldServer.SetKeyspaceServedFrom") 2863 defer span.Finish() 2864 2865 defer panicHandler(&err) 2866 2867 span.Annotate("keyspace", req.Keyspace) 2868 span.Annotate("tablet_type", topoproto.TabletTypeLString(req.TabletType)) 2869 span.Annotate("cells", strings.Join(req.Cells, ",")) 2870 span.Annotate("remove", req.Remove) 2871 span.Annotate("source_keyspace", req.SourceKeyspace) 2872 2873 ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, "SetKeyspaceServedFrom") 2874 if lockErr != nil { 2875 err = lockErr 2876 return nil, err 2877 } 2878 2879 defer unlock(&err) 2880 2881 ki, err := s.ts.GetKeyspace(ctx, req.Keyspace) 2882 if err != nil { 2883 return nil, err 2884 } 2885 2886 err = ki.UpdateServedFromMap(req.TabletType, req.Cells, req.SourceKeyspace, req.Remove, nil) 2887 if err != nil { 2888 return nil, err 2889 } 2890 2891 err = s.ts.UpdateKeyspace(ctx, ki) 2892 if err != nil { 2893 return nil, err 2894 } 2895 2896 return &vtctldatapb.SetKeyspaceServedFromResponse{ 2897 Keyspace: ki.Keyspace, 2898 }, nil 2899 } 2900 2901 // SetShardIsPrimaryServing is part of the vtctlservicepb.VtctldServer interface. 2902 func (s *VtctldServer) SetShardIsPrimaryServing(ctx context.Context, req *vtctldatapb.SetShardIsPrimaryServingRequest) (resp *vtctldatapb.SetShardIsPrimaryServingResponse, err error) { 2903 span, ctx := trace.NewSpan(ctx, "VtctldServer.SetShardIsPrimaryServing") 2904 defer span.Finish() 2905 2906 defer panicHandler(&err) 2907 2908 span.Annotate("keyspace", req.Keyspace) 2909 span.Annotate("shard", req.Shard) 2910 span.Annotate("is_serving", req.IsServing) 2911 2912 ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, fmt.Sprintf("SetShardIsPrimaryServing(%v,%v,%v)", req.Keyspace, req.Shard, req.IsServing)) 2913 if lockErr != nil { 2914 err = lockErr 2915 return nil, err 2916 } 2917 2918 defer unlock(&err) 2919 2920 si, err := s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error { 2921 si.IsPrimaryServing = req.IsServing 2922 return nil 2923 }) 2924 if err != nil { 2925 return nil, err 2926 } 2927 2928 return &vtctldatapb.SetShardIsPrimaryServingResponse{ 2929 Shard: si.Shard, 2930 }, nil 2931 } 2932 2933 // SetShardTabletControl is part of the vtctlservicepb.VtctldServer interface. 2934 func (s *VtctldServer) SetShardTabletControl(ctx context.Context, req *vtctldatapb.SetShardTabletControlRequest) (resp *vtctldatapb.SetShardTabletControlResponse, err error) { 2935 span, ctx := trace.NewSpan(ctx, "VtctldServer.SetShardTabletControl") 2936 defer span.Finish() 2937 2938 span.Annotate("keyspace", req.Keyspace) 2939 span.Annotate("shard", req.Shard) 2940 span.Annotate("tablet_type", topoproto.TabletTypeLString(req.TabletType)) 2941 span.Annotate("cells", strings.Join(req.Cells, ",")) 2942 span.Annotate("denied_tables", strings.Join(req.DeniedTables, ",")) 2943 span.Annotate("disable_query_service", req.DisableQueryService) 2944 span.Annotate("remove", req.Remove) 2945 2946 ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, "SetShardTabletControl") 2947 if lockErr != nil { 2948 err = lockErr 2949 return nil, err 2950 } 2951 2952 defer unlock(&err) 2953 2954 si, err := s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error { 2955 return si.UpdateSourceDeniedTables(ctx, req.TabletType, req.Cells, req.Remove, req.DeniedTables) 2956 }) 2957 2958 switch { 2959 case topo.IsErrType(err, topo.NoUpdateNeeded): 2960 // ok, fallthrough to DisableQueryService 2961 case err != nil: 2962 return nil, err 2963 } 2964 2965 if si == nil { // occurs only when UpdateShardFields above returns NoUpdateNeeded 2966 si, err = s.ts.GetShard(ctx, req.Keyspace, req.Shard) 2967 if err != nil { 2968 return nil, err 2969 } 2970 } 2971 if !req.Remove && len(req.DeniedTables) == 0 { 2972 err = s.ts.UpdateDisableQueryService(ctx, req.Keyspace, []*topo.ShardInfo{si}, req.TabletType, req.Cells, req.DisableQueryService) 2973 if err != nil { 2974 return nil, err 2975 } 2976 } 2977 2978 return &vtctldatapb.SetShardTabletControlResponse{ 2979 Shard: si.Shard, 2980 }, nil 2981 } 2982 2983 // SetWritable is part of the vtctldservicepb.VtctldServer interface. 2984 func (s *VtctldServer) SetWritable(ctx context.Context, req *vtctldatapb.SetWritableRequest) (resp *vtctldatapb.SetWritableResponse, err error) { 2985 span, ctx := trace.NewSpan(ctx, "VtctldServer.SetWritable") 2986 defer span.Finish() 2987 2988 defer panicHandler(&err) 2989 2990 if req.TabletAlias == nil { 2991 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "SetWritable.TabletAlias is required") 2992 return nil, err 2993 } 2994 2995 alias := topoproto.TabletAliasString(req.TabletAlias) 2996 span.Annotate("tablet_alias", alias) 2997 span.Annotate("writable", req.Writable) 2998 2999 tablet, err := s.ts.GetTablet(ctx, req.TabletAlias) 3000 if err != nil { 3001 log.Errorf("SetWritable: failed to read tablet record for %v: %v", alias, err) 3002 return nil, err 3003 } 3004 3005 var f func(context.Context, *topodatapb.Tablet) error 3006 switch req.Writable { 3007 case true: 3008 f = s.tmc.SetReadWrite 3009 case false: 3010 f = s.tmc.SetReadOnly 3011 } 3012 3013 if err = f(ctx, tablet.Tablet); err != nil { 3014 log.Errorf("SetWritable: failed to set writable=%v on %v: %v", req.Writable, alias, err) 3015 return nil, err 3016 } 3017 3018 return &vtctldatapb.SetWritableResponse{}, nil 3019 } 3020 3021 // ShardReplicationAdd is part of the vtctlservicepb.VtctldServer interface. 3022 func (s *VtctldServer) ShardReplicationAdd(ctx context.Context, req *vtctldatapb.ShardReplicationAddRequest) (resp *vtctldatapb.ShardReplicationAddResponse, err error) { 3023 span, ctx := trace.NewSpan(ctx, "VtctldServer.ShardReplicationAdd") 3024 defer span.Finish() 3025 3026 defer panicHandler(&err) 3027 3028 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 3029 span.Annotate("keyspace", req.Keyspace) 3030 span.Annotate("shard", req.Shard) 3031 3032 if err = topo.UpdateShardReplicationRecord(ctx, s.ts, req.Keyspace, req.Shard, req.TabletAlias); err != nil { 3033 return nil, err 3034 } 3035 3036 return &vtctldatapb.ShardReplicationAddResponse{}, nil 3037 } 3038 3039 // ShardReplicationFix is part of the vtctlservicepb.VtctldServer interface. 3040 func (s *VtctldServer) ShardReplicationFix(ctx context.Context, req *vtctldatapb.ShardReplicationFixRequest) (resp *vtctldatapb.ShardReplicationFixResponse, err error) { 3041 span, ctx := trace.NewSpan(ctx, "VtctldServer.ShardReplicationFix") 3042 defer span.Finish() 3043 3044 defer panicHandler(&err) 3045 3046 span.Annotate("keyspace", req.Keyspace) 3047 span.Annotate("shard", req.Shard) 3048 span.Annotate("cell", req.Cell) 3049 3050 problem, err := topo.FixShardReplication(ctx, s.ts, logutil.NewConsoleLogger(), req.Cell, req.Keyspace, req.Shard) 3051 if err != nil { 3052 return nil, err 3053 } 3054 3055 if problem != nil { 3056 span.Annotate("problem_tablet", topoproto.TabletAliasString(problem.TabletAlias)) 3057 span.Annotate("problem_type", strings.ToLower(topoproto.ShardReplicationErrorTypeString(problem.Type))) 3058 } 3059 3060 return &vtctldatapb.ShardReplicationFixResponse{ 3061 Error: problem, 3062 }, nil 3063 } 3064 3065 // ShardReplicationPositions is part of the vtctldservicepb.VtctldServer interface. 3066 func (s *VtctldServer) ShardReplicationPositions(ctx context.Context, req *vtctldatapb.ShardReplicationPositionsRequest) (resp *vtctldatapb.ShardReplicationPositionsResponse, err error) { 3067 span, ctx := trace.NewSpan(ctx, "VtctldServer.ShardReplicationPositions") 3068 defer span.Finish() 3069 3070 defer panicHandler(&err) 3071 3072 span.Annotate("keyspace", req.Keyspace) 3073 span.Annotate("shard", req.Shard) 3074 3075 tabletInfoMap, err := s.ts.GetTabletMapForShard(ctx, req.Keyspace, req.Shard) 3076 if err != nil { 3077 err = fmt.Errorf("GetTabletMapForShard(%s, %s) failed: %w", req.Keyspace, req.Shard, err) 3078 return nil, err 3079 } 3080 3081 log.Infof("Gathering tablet replication status for: %v", tabletInfoMap) 3082 3083 var ( 3084 m sync.Mutex 3085 wg sync.WaitGroup 3086 rec concurrency.AllErrorRecorder 3087 results = make(map[string]*replicationdatapb.Status, len(tabletInfoMap)) 3088 tabletMap = make(map[string]*topodatapb.Tablet, len(tabletInfoMap)) 3089 ) 3090 3091 // For each tablet, we're going to create an individual context, using 3092 // *topo.RemoteOperationTimeout as the maximum timeout (but we'll respect 3093 // any stricter timeout in the parent context). If an individual tablet 3094 // times out fetching its replication position, we won't fail the overall 3095 // request. Instead, we'll log a warning and record a nil entry in the 3096 // result map; that way, the caller can tell the difference between a tablet 3097 // that timed out vs a tablet that didn't get queried at all. 3098 3099 for alias, tabletInfo := range tabletInfoMap { 3100 switch { 3101 case tabletInfo.Type == topodatapb.TabletType_PRIMARY: 3102 wg.Add(1) 3103 3104 go func(ctx context.Context, alias string, tablet *topodatapb.Tablet) { 3105 defer wg.Done() 3106 3107 span, ctx := trace.NewSpan(ctx, "VtctldServer.getPrimaryPosition") 3108 defer span.Finish() 3109 3110 span.Annotate("tablet_alias", alias) 3111 3112 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3113 defer cancel() 3114 3115 var status *replicationdatapb.Status 3116 3117 pos, err := s.tmc.PrimaryPosition(ctx, tablet) 3118 if err != nil { 3119 switch ctx.Err() { 3120 case context.Canceled: 3121 log.Warningf("context canceled before obtaining primary position from %s: %s", alias, err) 3122 case context.DeadlineExceeded: 3123 log.Warningf("context deadline exceeded before obtaining primary position from %s: %s", alias, err) 3124 default: 3125 // The RPC was not timed out or canceled. We treat this 3126 // as a fatal error for the overall request. 3127 rec.RecordError(fmt.Errorf("PrimaryPosition(%s) failed: %w", alias, err)) 3128 return 3129 } 3130 } else { 3131 // No error, record a valid status for this tablet. 3132 status = &replicationdatapb.Status{ 3133 Position: pos, 3134 } 3135 } 3136 3137 m.Lock() 3138 defer m.Unlock() 3139 3140 results[alias] = status 3141 tabletMap[alias] = tablet 3142 }(ctx, alias, tabletInfo.Tablet) 3143 case tabletInfo.IsReplicaType(): 3144 wg.Add(1) 3145 3146 go func(ctx context.Context, alias string, tablet *topodatapb.Tablet) { 3147 defer wg.Done() 3148 3149 span, ctx := trace.NewSpan(ctx, "VtctldServer.getReplicationStatus") 3150 defer span.Finish() 3151 3152 span.Annotate("tablet_alias", alias) 3153 3154 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3155 defer cancel() 3156 3157 status, err := s.tmc.ReplicationStatus(ctx, tablet) 3158 if err != nil { 3159 switch ctx.Err() { 3160 case context.Canceled: 3161 log.Warningf("context canceled before obtaining replication position from %s: %s", alias, err) 3162 case context.DeadlineExceeded: 3163 log.Warningf("context deadline exceeded before obtaining replication position from %s: %s", alias, err) 3164 default: 3165 // The RPC was not timed out or canceled. We treat this 3166 // as a fatal error for the overall request. 3167 rec.RecordError(fmt.Errorf("ReplicationStatus(%s) failed: %s", alias, err)) 3168 return 3169 } 3170 3171 status = nil // Don't record any position for this tablet. 3172 } 3173 3174 m.Lock() 3175 defer m.Unlock() 3176 3177 results[alias] = status 3178 tabletMap[alias] = tablet 3179 }(ctx, alias, tabletInfo.Tablet) 3180 } 3181 } 3182 3183 wg.Wait() 3184 3185 if rec.HasErrors() { 3186 err = rec.Error() 3187 return nil, err 3188 } 3189 3190 return &vtctldatapb.ShardReplicationPositionsResponse{ 3191 ReplicationStatuses: results, 3192 TabletMap: tabletMap, 3193 }, nil 3194 } 3195 3196 // ShardReplicationRemove is part of the vtctlservicepb.VtctldServer interface. 3197 func (s *VtctldServer) ShardReplicationRemove(ctx context.Context, req *vtctldatapb.ShardReplicationRemoveRequest) (resp *vtctldatapb.ShardReplicationRemoveResponse, err error) { 3198 span, ctx := trace.NewSpan(ctx, "VtctldServer.ShardReplicationRemove") 3199 defer span.Finish() 3200 3201 defer panicHandler(&err) 3202 3203 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 3204 span.Annotate("keyspace", req.Keyspace) 3205 span.Annotate("shard", req.Shard) 3206 3207 if err = topo.RemoveShardReplicationRecord(ctx, s.ts, req.TabletAlias.Cell, req.Keyspace, req.Shard, req.TabletAlias); err != nil { 3208 return nil, err 3209 } 3210 3211 return &vtctldatapb.ShardReplicationRemoveResponse{}, nil 3212 } 3213 3214 // SleepTablet is part of the vtctlservicepb.VtctldServer interface. 3215 func (s *VtctldServer) SleepTablet(ctx context.Context, req *vtctldatapb.SleepTabletRequest) (resp *vtctldatapb.SleepTabletResponse, err error) { 3216 span, ctx := trace.NewSpan(ctx, "VtctldServer.SleepTablet") 3217 defer span.Finish() 3218 3219 defer panicHandler(&err) 3220 3221 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias)) 3222 3223 dur, ok, err := protoutil.DurationFromProto(req.Duration) 3224 if err != nil { 3225 return nil, err 3226 } else if !ok { 3227 dur = topo.RemoteOperationTimeout 3228 } 3229 3230 span.Annotate("sleep_duration", dur.String()) 3231 3232 tablet, err := s.ts.GetTablet(ctx, req.TabletAlias) 3233 if err != nil { 3234 return nil, err 3235 } 3236 3237 err = s.tmc.Sleep(ctx, tablet.Tablet, dur) 3238 if err != nil { 3239 return nil, err 3240 } 3241 3242 return &vtctldatapb.SleepTabletResponse{}, nil 3243 } 3244 3245 // SourceShardAdd is part of the vtctlservicepb.VtctldServer interface. 3246 func (s *VtctldServer) SourceShardAdd(ctx context.Context, req *vtctldatapb.SourceShardAddRequest) (resp *vtctldatapb.SourceShardAddResponse, err error) { 3247 span, ctx := trace.NewSpan(ctx, "VtctldServer.SourceShardAdd") 3248 defer span.Finish() 3249 3250 defer panicHandler(&err) 3251 3252 span.Annotate("keyspace", req.Keyspace) 3253 span.Annotate("shard", req.Shard) 3254 span.Annotate("uid", req.Uid) 3255 span.Annotate("source_keyspace", req.SourceKeyspace) 3256 span.Annotate("source_shard", req.SourceShard) 3257 span.Annotate("keyrange", key.KeyRangeString(req.KeyRange)) 3258 span.Annotate("tables", strings.Join(req.Tables, ",")) 3259 3260 var si *topo.ShardInfo 3261 3262 ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, fmt.Sprintf("SourceShardAdd(%v)", req.Uid)) 3263 if lockErr != nil { 3264 err = lockErr 3265 return nil, err 3266 } 3267 defer unlock(&err) 3268 3269 si, err = s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error { 3270 for _, ss := range si.SourceShards { 3271 if ss.Uid == req.Uid { 3272 return fmt.Errorf("%w: uid %v is already in use", topo.NewError(topo.NoUpdateNeeded, fmt.Sprintf("%s/%s", req.Keyspace, req.Shard)), req.Uid) 3273 } 3274 } 3275 3276 si.SourceShards = append(si.SourceShards, &topodatapb.Shard_SourceShard{ 3277 Keyspace: req.SourceKeyspace, 3278 Shard: req.SourceShard, 3279 Uid: req.Uid, 3280 KeyRange: req.KeyRange, 3281 Tables: req.Tables, 3282 }) 3283 return nil 3284 }) 3285 if err != nil { 3286 return nil, err 3287 } 3288 3289 resp = &vtctldatapb.SourceShardAddResponse{} 3290 switch si { 3291 case nil: 3292 // If we return NoUpdateNeeded from ts.UpdateShardFields, then we don't 3293 // get a ShardInfo back. 3294 default: 3295 resp.Shard = si.Shard 3296 } 3297 3298 return resp, err 3299 } 3300 3301 // SourceShardDelete is part of the vtctlservicepb.VtctldServer interface. 3302 func (s *VtctldServer) SourceShardDelete(ctx context.Context, req *vtctldatapb.SourceShardDeleteRequest) (resp *vtctldatapb.SourceShardDeleteResponse, err error) { 3303 span, ctx := trace.NewSpan(ctx, "VtctldServer.SourceShardDelete") 3304 defer span.Finish() 3305 3306 defer panicHandler(&err) 3307 3308 span.Annotate("keyspace", req.Keyspace) 3309 span.Annotate("shard", req.Shard) 3310 span.Annotate("uid", req.Uid) 3311 3312 var si *topo.ShardInfo 3313 3314 ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, fmt.Sprintf("SourceShardDelete(%v)", req.Uid)) 3315 if lockErr != nil { 3316 err = lockErr 3317 return nil, err 3318 } 3319 defer unlock(&err) 3320 3321 si, err = s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error { 3322 var newSourceShards []*topodatapb.Shard_SourceShard 3323 for _, ss := range si.SourceShards { 3324 if ss.Uid != req.Uid { 3325 newSourceShards = append(newSourceShards, ss) 3326 } 3327 } 3328 3329 if len(newSourceShards) == len(si.SourceShards) { 3330 return fmt.Errorf("%w: no SourceShard with uid %v", topo.NewError(topo.NoUpdateNeeded, fmt.Sprintf("%s/%s", req.Keyspace, req.Shard)), req.Uid) 3331 } 3332 3333 si.SourceShards = newSourceShards 3334 return nil 3335 }) 3336 if err != nil { 3337 return nil, err 3338 } 3339 3340 resp = &vtctldatapb.SourceShardDeleteResponse{} 3341 switch si { 3342 case nil: 3343 // If we return NoUpdateNeeded from ts.UpdateShardFields, then we don't 3344 // get a ShardInfo back. 3345 default: 3346 resp.Shard = si.Shard 3347 } 3348 3349 return resp, err 3350 } 3351 3352 // StartReplication is part of the vtctldservicepb.VtctldServer interface. 3353 func (s *VtctldServer) StartReplication(ctx context.Context, req *vtctldatapb.StartReplicationRequest) (resp *vtctldatapb.StartReplicationResponse, err error) { 3354 span, ctx := trace.NewSpan(ctx, "VtctldServer.StartReplication") 3355 defer span.Finish() 3356 3357 defer panicHandler(&err) 3358 3359 if req.TabletAlias == nil { 3360 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "StartReplication.TabletAlias is required") 3361 return nil, err 3362 } 3363 3364 alias := topoproto.TabletAliasString(req.TabletAlias) 3365 span.Annotate("tablet_alias", alias) 3366 3367 tablet, err := s.ts.GetTablet(ctx, req.TabletAlias) 3368 if err != nil { 3369 log.Errorf("StartReplication: failed to read tablet record for %v: %v", alias, err) 3370 return nil, err 3371 } 3372 3373 shard, err := s.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) 3374 if err != nil { 3375 return nil, err 3376 } 3377 3378 if !shard.HasPrimary() { 3379 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "no primary tablet for shard %v/%v", tablet.Keyspace, tablet.Shard) 3380 return nil, err 3381 } 3382 3383 shardPrimary, err := s.ts.GetTablet(ctx, shard.PrimaryAlias) 3384 if err != nil { 3385 err = fmt.Errorf("cannot lookup primary tablet %v for shard %v/%v: %w", topoproto.TabletAliasString(shard.PrimaryAlias), tablet.Keyspace, tablet.Shard, err) 3386 return nil, err 3387 } 3388 3389 if shardPrimary.Type != topodatapb.TabletType_PRIMARY { 3390 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "TopologyServer has incosistent state for shard primary %v", topoproto.TabletAliasString(shard.PrimaryAlias)) 3391 return nil, err 3392 } 3393 3394 if shardPrimary.Keyspace != tablet.Keyspace || shardPrimary.Shard != tablet.Shard { 3395 err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "primary %v and replica %v not in same keypace shard (%v/%v)", topoproto.TabletAliasString(shard.PrimaryAlias), topoproto.TabletAliasString(tablet.Alias), tablet.Keyspace, tablet.Shard) 3396 return nil, err 3397 } 3398 3399 durabilityName, err := s.ts.GetKeyspaceDurability(ctx, tablet.Keyspace) 3400 if err != nil { 3401 return nil, err 3402 } 3403 log.Infof("Getting a new durability policy for %v", durabilityName) 3404 durability, err := reparentutil.GetDurabilityPolicy(durabilityName) 3405 if err != nil { 3406 return nil, err 3407 } 3408 3409 if err = s.tmc.StartReplication(ctx, tablet.Tablet, reparentutil.IsReplicaSemiSync(durability, shardPrimary.Tablet, tablet.Tablet)); err != nil { 3410 log.Errorf("StartReplication: failed to start replication on %v: %v", alias, err) 3411 return nil, err 3412 } 3413 3414 return &vtctldatapb.StartReplicationResponse{}, nil 3415 } 3416 3417 // StopReplication is part of the vtctldservicepb.VtctldServer interface. 3418 func (s *VtctldServer) StopReplication(ctx context.Context, req *vtctldatapb.StopReplicationRequest) (resp *vtctldatapb.StopReplicationResponse, err error) { 3419 span, ctx := trace.NewSpan(ctx, "VtctldServer.StopReplication") 3420 defer span.Finish() 3421 3422 defer panicHandler(&err) 3423 3424 if req.TabletAlias == nil { 3425 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "StopReplication.TabletAlias is required") 3426 return nil, err 3427 } 3428 3429 alias := topoproto.TabletAliasString(req.TabletAlias) 3430 span.Annotate("tablet_alias", alias) 3431 3432 tablet, err := s.ts.GetTablet(ctx, req.TabletAlias) 3433 if err != nil { 3434 log.Errorf("StopReplication: failed to read tablet record for %v: %v", alias, err) 3435 return nil, err 3436 } 3437 3438 if err := s.tmc.StopReplication(ctx, tablet.Tablet); err != nil { 3439 log.Errorf("StopReplication: failed to stop replication on %v: %v", alias, err) 3440 return nil, err 3441 } 3442 3443 return &vtctldatapb.StopReplicationResponse{}, nil 3444 } 3445 3446 // TabletExternallyReparented is part of the vtctldservicepb.VtctldServer interface. 3447 func (s *VtctldServer) TabletExternallyReparented(ctx context.Context, req *vtctldatapb.TabletExternallyReparentedRequest) (resp *vtctldatapb.TabletExternallyReparentedResponse, err error) { 3448 span, ctx := trace.NewSpan(ctx, "VtctldServer.TabletExternallyReparented") 3449 defer span.Finish() 3450 3451 defer panicHandler(&err) 3452 3453 if req.Tablet == nil { 3454 err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "TabletExternallyReparentedRequest.Tablet must not be nil") 3455 return nil, err 3456 } 3457 3458 span.Annotate("tablet_alias", topoproto.TabletAliasString(req.Tablet)) 3459 3460 tablet, err := s.ts.GetTablet(ctx, req.Tablet) 3461 if err != nil { 3462 log.Warningf("TabletExternallyReparented: failed to read tablet record for %v: %v", topoproto.TabletAliasString(req.Tablet), err) 3463 return nil, err 3464 } 3465 3466 shard, err := s.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) 3467 if err != nil { 3468 log.Warningf("TabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) 3469 return nil, err 3470 } 3471 3472 resp = &vtctldatapb.TabletExternallyReparentedResponse{ 3473 Keyspace: shard.Keyspace(), 3474 Shard: shard.ShardName(), 3475 NewPrimary: req.Tablet, 3476 OldPrimary: shard.PrimaryAlias, 3477 } 3478 3479 // If the externally reparented (new primary) tablet is already PRIMARY in 3480 // the topo, this is a no-op. 3481 if tablet.Type == topodatapb.TabletType_PRIMARY { 3482 return resp, nil 3483 } 3484 3485 log.Infof("TabletExternallyReparented: executing tablet type change %v -> PRIMARY on %v", tablet.Type, topoproto.TabletAliasString(req.Tablet)) 3486 ev := &events.Reparent{ 3487 ShardInfo: *shard, 3488 NewPrimary: proto.Clone(tablet.Tablet).(*topodatapb.Tablet), 3489 OldPrimary: &topodatapb.Tablet{ 3490 Alias: shard.PrimaryAlias, 3491 Type: topodatapb.TabletType_PRIMARY, 3492 }, 3493 } 3494 3495 defer func() { 3496 // Ensure we dispatch an update with any failure. 3497 if err != nil { 3498 event.DispatchUpdate(ev, "failed: "+err.Error()) 3499 } 3500 }() 3501 3502 event.DispatchUpdate(ev, "starting external reparent") 3503 3504 durabilityName, err := s.ts.GetKeyspaceDurability(ctx, tablet.Keyspace) 3505 if err != nil { 3506 return nil, err 3507 } 3508 log.Infof("Getting a new durability policy for %v", durabilityName) 3509 durability, err := reparentutil.GetDurabilityPolicy(durabilityName) 3510 if err != nil { 3511 return nil, err 3512 } 3513 3514 if err = s.tmc.ChangeType(ctx, tablet.Tablet, topodatapb.TabletType_PRIMARY, reparentutil.SemiSyncAckers(durability, tablet.Tablet) > 0); err != nil { 3515 log.Warningf("ChangeType(%v, PRIMARY): %v", topoproto.TabletAliasString(req.Tablet), err) 3516 return nil, err 3517 } 3518 3519 event.DispatchUpdate(ev, "finished") 3520 3521 return resp, nil 3522 } 3523 3524 // UpdateCellInfo is part of the vtctlservicepb.VtctldServer interface. 3525 func (s *VtctldServer) UpdateCellInfo(ctx context.Context, req *vtctldatapb.UpdateCellInfoRequest) (resp *vtctldatapb.UpdateCellInfoResponse, err error) { 3526 span, ctx := trace.NewSpan(ctx, "VtctldServer.UpdateCellInfo") 3527 defer span.Finish() 3528 3529 defer panicHandler(&err) 3530 3531 span.Annotate("cell", req.Name) 3532 span.Annotate("cell_server_address", req.CellInfo.ServerAddress) 3533 span.Annotate("cell_root", req.CellInfo.Root) 3534 3535 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3536 defer cancel() 3537 3538 var updatedCi *topodatapb.CellInfo 3539 err = s.ts.UpdateCellInfoFields(ctx, req.Name, func(ci *topodatapb.CellInfo) error { 3540 defer func() { 3541 updatedCi = proto.Clone(ci).(*topodatapb.CellInfo) 3542 }() 3543 3544 changed := false 3545 3546 if req.CellInfo.ServerAddress != "" && req.CellInfo.ServerAddress != ci.ServerAddress { 3547 changed = true 3548 ci.ServerAddress = req.CellInfo.ServerAddress 3549 } 3550 3551 if req.CellInfo.Root != "" && req.CellInfo.Root != ci.Root { 3552 changed = true 3553 ci.Root = req.CellInfo.Root 3554 } 3555 3556 if !changed { 3557 return topo.NewError(topo.NoUpdateNeeded, req.Name) 3558 } 3559 3560 return nil 3561 }) 3562 3563 if err != nil { 3564 return nil, err 3565 } 3566 3567 return &vtctldatapb.UpdateCellInfoResponse{ 3568 Name: req.Name, 3569 CellInfo: updatedCi, 3570 }, nil 3571 } 3572 3573 // UpdateCellsAlias is part of the vtctlservicepb.VtctldServer interface. 3574 func (s *VtctldServer) UpdateCellsAlias(ctx context.Context, req *vtctldatapb.UpdateCellsAliasRequest) (resp *vtctldatapb.UpdateCellsAliasResponse, err error) { 3575 span, ctx := trace.NewSpan(ctx, "VtctldServer.UpdateCellsAlias") 3576 defer span.Finish() 3577 3578 defer panicHandler(&err) 3579 3580 span.Annotate("cells_alias", req.Name) 3581 span.Annotate("cells_alias_cells", strings.Join(req.CellsAlias.Cells, ",")) 3582 3583 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3584 defer cancel() 3585 3586 var updatedCa *topodatapb.CellsAlias 3587 err = s.ts.UpdateCellsAlias(ctx, req.Name, func(ca *topodatapb.CellsAlias) error { 3588 defer func() { 3589 updatedCa = proto.Clone(ca).(*topodatapb.CellsAlias) 3590 }() 3591 3592 ca.Cells = req.CellsAlias.Cells 3593 return nil 3594 }) 3595 3596 if err != nil { 3597 return nil, err 3598 } 3599 3600 return &vtctldatapb.UpdateCellsAliasResponse{ 3601 Name: req.Name, 3602 CellsAlias: updatedCa, 3603 }, nil 3604 } 3605 3606 // Validate is part of the vtctlservicepb.VtctldServer interface. 3607 func (s *VtctldServer) Validate(ctx context.Context, req *vtctldatapb.ValidateRequest) (resp *vtctldatapb.ValidateResponse, err error) { 3608 span, ctx := trace.NewSpan(ctx, "VtctldServer.Validate") 3609 defer span.Finish() 3610 3611 defer panicHandler(&err) 3612 3613 span.Annotate("ping_tablets", req.PingTablets) 3614 3615 resp = &vtctldatapb.ValidateResponse{} 3616 getKeyspacesCtx, getKeyspacesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3617 defer getKeyspacesCancel() 3618 3619 keyspaces, err := s.ts.GetKeyspaces(getKeyspacesCtx) 3620 if err != nil { 3621 resp.Results = append(resp.Results, fmt.Sprintf("GetKeyspaces failed: %v", err)) 3622 return resp, nil 3623 } 3624 3625 var ( 3626 m sync.Mutex 3627 wg sync.WaitGroup 3628 ) 3629 3630 wg.Add(1) 3631 go func() { 3632 defer wg.Done() 3633 validateAllTablets := func(ctx context.Context, keyspaces []string) { 3634 span, ctx := trace.NewSpan(ctx, "VtctldServer.validateAllTablets") 3635 defer span.Finish() 3636 3637 cellSet := sets.New[string]() 3638 for _, keyspace := range keyspaces { 3639 getShardNamesCtx, getShardNamesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3640 shards, err := s.ts.GetShardNames(getShardNamesCtx, keyspace) 3641 getShardNamesCancel() // don't defer in a loop 3642 3643 if err != nil { 3644 m.Lock() 3645 resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShardNames(%v) failed: %v", keyspace, err)) 3646 m.Unlock() 3647 continue 3648 } 3649 3650 for _, shard := range shards { 3651 findAllTabletAliasesCtx, findAllTabletAliasesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3652 aliases, err := s.ts.FindAllTabletAliasesInShard(findAllTabletAliasesCtx, keyspace, shard) 3653 findAllTabletAliasesCancel() // don't defer in a loop 3654 3655 if err != nil { 3656 m.Lock() 3657 resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.FindAllTabletAliasesInShard(%v/%v) failed: %v", keyspace, shard, err)) 3658 m.Unlock() 3659 continue 3660 } 3661 3662 for _, alias := range aliases { 3663 cellSet.Insert(alias.Cell) 3664 } 3665 } 3666 } 3667 3668 for _, cell := range sets.List(cellSet) { 3669 getTabletsByCellCtx, getTabletsByCellCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3670 aliases, err := s.ts.GetTabletAliasesByCell(getTabletsByCellCtx, cell) 3671 getTabletsByCellCancel() // don't defer in a loop 3672 3673 if err != nil { 3674 m.Lock() 3675 resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetTabletsByCell(%v) failed: %v", cell, err)) 3676 m.Unlock() 3677 continue 3678 } 3679 3680 for _, alias := range aliases { 3681 wg.Add(1) 3682 go func(alias *topodatapb.TabletAlias) { 3683 defer wg.Done() 3684 3685 span, ctx := trace.NewSpan(ctx, "VtctldServer.validateTablet") 3686 defer span.Finish() 3687 3688 key := topoproto.TabletAliasString(alias) 3689 span.Annotate("tablet_alias", key) 3690 3691 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3692 defer cancel() 3693 3694 if err := topo.Validate(ctx, s.ts, alias); err != nil { 3695 m.Lock() 3696 defer m.Unlock() 3697 3698 resp.Results = append(resp.Results, fmt.Sprintf("topo.Validate(%v) failed: %v", key, err)) 3699 return 3700 } 3701 3702 log.Infof("tablet %v is valid", key) 3703 }(alias) 3704 } 3705 } 3706 } 3707 3708 validateAllTablets(ctx, keyspaces) 3709 }() 3710 3711 resp.ResultsByKeyspace = make(map[string]*vtctldatapb.ValidateKeyspaceResponse, len(keyspaces)) 3712 3713 for _, keyspace := range keyspaces { 3714 wg.Add(1) 3715 go func(keyspace string) { 3716 defer wg.Done() 3717 keyspaceResp, err := s.ValidateKeyspace(ctx, &vtctldatapb.ValidateKeyspaceRequest{ 3718 Keyspace: keyspace, 3719 PingTablets: req.PingTablets, 3720 }) 3721 3722 m.Lock() 3723 defer m.Unlock() 3724 3725 if err != nil { 3726 resp.ResultsByKeyspace[keyspace] = &vtctldatapb.ValidateKeyspaceResponse{ 3727 Results: []string{fmt.Sprintf("failed to validate: %v", err)}, 3728 } 3729 return 3730 } 3731 3732 resp.ResultsByKeyspace[keyspace] = keyspaceResp 3733 }(keyspace) 3734 } 3735 3736 wg.Wait() 3737 return resp, err 3738 } 3739 3740 // ValidateKeyspace is part of the vtctlservicepb.VtctldServer interface. 3741 func (s *VtctldServer) ValidateKeyspace(ctx context.Context, req *vtctldatapb.ValidateKeyspaceRequest) (resp *vtctldatapb.ValidateKeyspaceResponse, err error) { 3742 span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateKeyspace") 3743 defer span.Finish() 3744 3745 defer panicHandler(&err) 3746 3747 span.Annotate("keyspace", req.Keyspace) 3748 span.Annotate("ping_tablets", req.PingTablets) 3749 3750 resp = &vtctldatapb.ValidateKeyspaceResponse{} 3751 getShardNamesCtx, getShardNamesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3752 defer getShardNamesCancel() 3753 3754 shards, err := s.ts.GetShardNames(getShardNamesCtx, req.Keyspace) 3755 if err != nil { 3756 resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShardNames(%v) failed: %v", req.Keyspace, err)) 3757 err = nil 3758 return resp, err 3759 } 3760 3761 resp.ResultsByShard = make(map[string]*vtctldatapb.ValidateShardResponse, len(shards)) 3762 3763 var ( 3764 m sync.Mutex 3765 wg sync.WaitGroup 3766 ) 3767 for _, shard := range shards { 3768 wg.Add(1) 3769 go func(shard string) { 3770 defer wg.Done() 3771 shardResp, err := s.ValidateShard(ctx, &vtctldatapb.ValidateShardRequest{ 3772 Keyspace: req.Keyspace, 3773 Shard: shard, 3774 PingTablets: req.PingTablets, 3775 }) 3776 3777 m.Lock() 3778 defer m.Unlock() 3779 3780 if err != nil { 3781 resp.Results = append(resp.Results, fmt.Sprintf("error validating shard %v/%v: %v", req.Keyspace, shard, err)) 3782 return 3783 } 3784 3785 resp.ResultsByShard[shard] = shardResp 3786 }(shard) 3787 } 3788 3789 wg.Wait() 3790 return resp, err 3791 } 3792 3793 // ValidateSchemaKeyspace is a part of the vtctlservicepb.VtctldServer interface. 3794 // It will diff the schema from all the tablets in the keyspace. 3795 func (s *VtctldServer) ValidateSchemaKeyspace(ctx context.Context, req *vtctldatapb.ValidateSchemaKeyspaceRequest) (resp *vtctldatapb.ValidateSchemaKeyspaceResponse, err error) { 3796 span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateSchemaKeyspace") 3797 defer span.Finish() 3798 3799 defer panicHandler(&err) 3800 3801 span.Annotate("keyspace", req.Keyspace) 3802 keyspace := req.Keyspace 3803 3804 resp = &vtctldatapb.ValidateSchemaKeyspaceResponse{ 3805 Results: []string{}, 3806 } 3807 3808 shards, err := s.ts.GetShardNames(ctx, keyspace) 3809 if err != nil { 3810 resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShardNames(%v) failed: %v", req.Keyspace, err)) 3811 err = nil 3812 return resp, err 3813 } 3814 3815 resp.ResultsByShard = make(map[string]*vtctldatapb.ValidateShardResponse, len(shards)) 3816 3817 // Initiate individual shard results first 3818 for _, shard := range shards { 3819 resp.ResultsByShard[shard] = &vtctldatapb.ValidateShardResponse{ 3820 Results: []string{}, 3821 } 3822 } 3823 3824 if req.IncludeVschema { 3825 results, err2 := s.ValidateVSchema(ctx, &vtctldatapb.ValidateVSchemaRequest{ 3826 Keyspace: keyspace, 3827 Shards: shards, 3828 ExcludeTables: req.ExcludeTables, 3829 IncludeViews: req.IncludeViews, 3830 }) 3831 if err2 != nil { 3832 err = err2 3833 return nil, err 3834 } 3835 3836 if len(results.Results) > 0 { 3837 resp.Results = append(resp.Results, results.Results...) 3838 for shard, shardResults := range resp.ResultsByShard { 3839 resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, shardResults.Results...) 3840 } 3841 return resp, err 3842 } 3843 } 3844 3845 sort.Strings(shards) 3846 3847 var ( 3848 referenceSchema *tabletmanagerdatapb.SchemaDefinition 3849 referenceAlias *topodatapb.TabletAlias 3850 m sync.Mutex 3851 wg sync.WaitGroup 3852 ) 3853 3854 r := &tabletmanagerdatapb.GetSchemaRequest{ExcludeTables: req.ExcludeTables, IncludeViews: req.IncludeViews} 3855 for _, shard := range shards[0:] { 3856 wg.Add(1) 3857 go func(shard string) { 3858 defer wg.Done() 3859 3860 si, err := s.ts.GetShard(ctx, keyspace, shard) 3861 3862 m.Lock() 3863 defer m.Unlock() 3864 3865 if err != nil { 3866 errMessage := fmt.Sprintf("GetShard(%v, %v) failed: %v", keyspace, shard, err) 3867 resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, errMessage) 3868 resp.Results = append(resp.Results, errMessage) 3869 return 3870 } 3871 3872 if !si.HasPrimary() { 3873 if !req.SkipNoPrimary { 3874 errMessage := fmt.Sprintf("no primary in shard %v/%v", keyspace, shard) 3875 resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, errMessage) 3876 resp.Results = append(resp.Results, errMessage) 3877 } 3878 return 3879 } 3880 3881 if referenceSchema == nil { 3882 referenceAlias = si.PrimaryAlias 3883 referenceSchema, err = schematools.GetSchema(ctx, s.ts, s.tmc, referenceAlias, r) 3884 if err != nil { 3885 return 3886 } 3887 } 3888 3889 aliases, err := s.ts.FindAllTabletAliasesInShard(ctx, keyspace, shard) 3890 if err != nil { 3891 errMessage := fmt.Sprintf("FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err) 3892 resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, errMessage) 3893 resp.Results = append(resp.Results, errMessage) 3894 return 3895 } 3896 3897 aliasWg := sync.WaitGroup{} 3898 aliasErrs := concurrency.AllErrorRecorder{} 3899 3900 for _, alias := range aliases { 3901 if referenceAlias == alias { 3902 continue 3903 } 3904 aliasWg.Add(1) 3905 go func(alias *topodatapb.TabletAlias) { 3906 defer aliasWg.Done() 3907 replicaSchema, err := schematools.GetSchema(ctx, s.ts, s.tmc, alias, r) 3908 if err != nil { 3909 aliasErrs.RecordError(fmt.Errorf("GetSchema(%v, nil, %v, %v) failed: %v", alias, req.ExcludeTables, req.IncludeViews, err)) 3910 return 3911 } 3912 3913 tmutils.DiffSchema(topoproto.TabletAliasString(referenceAlias), referenceSchema, topoproto.TabletAliasString(alias), replicaSchema, &aliasErrs) 3914 }(alias) 3915 } 3916 aliasWg.Wait() 3917 3918 if aliasErrs.HasErrors() { 3919 for _, err := range aliasErrs.Errors { 3920 errMessage := err.Error() 3921 resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, errMessage) 3922 resp.Results = append(resp.Results, errMessage) 3923 } 3924 } 3925 }(shard) 3926 } 3927 3928 wg.Wait() 3929 3930 return resp, err 3931 } 3932 3933 // ValidateShard is part of the vtctlservicepb.VtctldServer interface. 3934 func (s *VtctldServer) ValidateShard(ctx context.Context, req *vtctldatapb.ValidateShardRequest) (resp *vtctldatapb.ValidateShardResponse, err error) { 3935 span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateShard") 3936 defer span.Finish() 3937 3938 defer panicHandler(&err) 3939 3940 span.Annotate("keyspace", req.Keyspace) 3941 span.Annotate("shard", req.Shard) 3942 span.Annotate("ping_tablets", req.PingTablets) 3943 3944 resp = &vtctldatapb.ValidateShardResponse{} 3945 getShardCtx, getShardCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3946 defer getShardCancel() 3947 3948 si, err := s.ts.GetShard(getShardCtx, req.Keyspace, req.Shard) 3949 if err != nil { 3950 resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShard(%v, %v) failed: %v", req.Keyspace, req.Shard, err)) 3951 err = nil 3952 return resp, err 3953 } 3954 3955 findAllTabletAliasesCtx, findAllTabletAliasesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3956 defer findAllTabletAliasesCancel() 3957 3958 aliases, err := s.ts.FindAllTabletAliasesInShard(findAllTabletAliasesCtx, req.Keyspace, req.Shard) 3959 if err != nil { 3960 resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", req.Keyspace, req.Shard, err)) 3961 err = nil 3962 return resp, err 3963 } 3964 3965 getTabletMapCtx, getTabletMapCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 3966 defer getTabletMapCancel() 3967 tabletMap, _ := s.ts.GetTabletMap(getTabletMapCtx, aliases) 3968 3969 var primaryAlias *topodatapb.TabletAlias 3970 for _, alias := range aliases { 3971 key := topoproto.TabletAliasString(alias) 3972 ti, ok := tabletMap[key] 3973 if !ok { 3974 resp.Results = append(resp.Results, fmt.Sprintf("tablet %v not found in map", key)) 3975 continue 3976 } 3977 3978 if ti.Type == topodatapb.TabletType_PRIMARY { 3979 switch primaryAlias { 3980 case nil: 3981 primaryAlias = alias 3982 default: 3983 resp.Results = append(resp.Results, fmt.Sprintf("shard %v/%v already has primary %v but found other primary %v", req.Keyspace, req.Shard, topoproto.TabletAliasString(primaryAlias), key)) 3984 } 3985 } 3986 } 3987 3988 if primaryAlias == nil { 3989 resp.Results = append(resp.Results, fmt.Sprintf("no primary for shard %v/%v", req.Keyspace, req.Shard)) 3990 } else if !topoproto.TabletAliasEqual(si.PrimaryAlias, primaryAlias) { 3991 resp.Results = append(resp.Results, fmt.Sprintf("primary mismatch for shard %v/%v: found %v, expected %v", si.Keyspace(), si.ShardName(), topoproto.TabletAliasString(primaryAlias), topoproto.TabletAliasString(si.PrimaryAlias))) 3992 } 3993 3994 var ( 3995 wg sync.WaitGroup 3996 results = make(chan string, len(aliases)) 3997 ) 3998 3999 for _, alias := range aliases { 4000 wg.Add(1) 4001 go func(alias *topodatapb.TabletAlias) { 4002 defer wg.Done() 4003 4004 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 4005 defer cancel() 4006 4007 if err := topo.Validate(ctx, s.ts, alias); err != nil { 4008 results <- fmt.Sprintf("topo.Validate(%v) failed: %v", topoproto.TabletAliasString(alias), err) 4009 return 4010 } 4011 4012 log.Infof("tablet %v is valid", topoproto.TabletAliasString(alias)) 4013 }(alias) 4014 } 4015 4016 if req.PingTablets { 4017 validateReplication := func(ctx context.Context, si *topo.ShardInfo, tabletMap map[string]*topo.TabletInfo, results chan<- string) { 4018 if si.PrimaryAlias == nil { 4019 results <- fmt.Sprintf("no primary in shard record %v/%v", si.Keyspace(), si.ShardName()) 4020 return 4021 } 4022 4023 shardPrimaryAliasStr := topoproto.TabletAliasString(si.PrimaryAlias) 4024 primaryTabletInfo, ok := tabletMap[shardPrimaryAliasStr] 4025 if !ok { 4026 results <- fmt.Sprintf("primary %v not in tablet map", shardPrimaryAliasStr) 4027 return 4028 } 4029 4030 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 4031 defer cancel() 4032 4033 replicaList, err := s.tmc.GetReplicas(ctx, primaryTabletInfo.Tablet) 4034 if err != nil { 4035 results <- fmt.Sprintf("GetReplicas(%v) failed: %v", primaryTabletInfo, err) 4036 return 4037 } 4038 4039 if len(replicaList) == 0 { 4040 results <- fmt.Sprintf("no replicas of tablet %v found", shardPrimaryAliasStr) 4041 return 4042 } 4043 4044 tabletIPMap := make(map[string]*topodatapb.Tablet) 4045 replicaIPMap := make(map[string]bool) 4046 for _, tablet := range tabletMap { 4047 ip, err := topoproto.MySQLIP(tablet.Tablet) 4048 if err != nil { 4049 results <- fmt.Sprintf("could not resolve IP for tablet %s: %v", tablet.Tablet.MysqlHostname, err) 4050 continue 4051 } 4052 4053 tabletIPMap[netutil.NormalizeIP(ip)] = tablet.Tablet 4054 } 4055 4056 // See if every replica is in the replication graph. 4057 for _, replicaAddr := range replicaList { 4058 if tabletIPMap[netutil.NormalizeIP(replicaAddr)] == nil { 4059 results <- fmt.Sprintf("replica %v not in replication graph for shard %v/%v (mysql instance without vttablet?)", replicaAddr, si.Keyspace(), si.ShardName()) 4060 } 4061 4062 replicaIPMap[netutil.NormalizeIP(replicaAddr)] = true 4063 } 4064 4065 // See if every entry in the replication graph is connected to the primary. 4066 for _, tablet := range tabletMap { 4067 if !tablet.IsReplicaType() { 4068 continue 4069 } 4070 4071 ip, err := topoproto.MySQLIP(tablet.Tablet) 4072 if err != nil { 4073 results <- fmt.Sprintf("could not resolve IP for tablet %s: %v", tablet.Tablet.MysqlHostname, err) 4074 continue 4075 } 4076 4077 if !replicaIPMap[netutil.NormalizeIP(ip)] { 4078 results <- fmt.Sprintf("replica %v not replicating: %v replica list: %q", topoproto.TabletAliasString(tablet.Alias), ip, replicaList) 4079 } 4080 } 4081 } 4082 pingTablets := func(ctx context.Context, tabletMap map[string]*topo.TabletInfo, results chan<- string) { 4083 for alias, ti := range tabletMap { 4084 wg.Add(1) 4085 go func(alias string, ti *topo.TabletInfo) { 4086 defer wg.Done() 4087 4088 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 4089 defer cancel() 4090 4091 if err := s.tmc.Ping(ctx, ti.Tablet); err != nil { 4092 results <- fmt.Sprintf("Ping(%v) failed: %v tablet hostname: %v", alias, err, ti.Hostname) 4093 } 4094 }(alias, ti) 4095 } 4096 } 4097 4098 validateReplication(ctx, si, tabletMap, results) // done synchronously 4099 pingTablets(ctx, tabletMap, results) // done async, using the waitgroup declared above in the main method body. 4100 } 4101 4102 done := make(chan bool) 4103 go func() { 4104 for result := range results { 4105 resp.Results = append(resp.Results, result) 4106 } 4107 done <- true 4108 }() 4109 4110 wg.Wait() 4111 close(results) 4112 <-done 4113 4114 return resp, err 4115 } 4116 4117 // ValidateVersionKeyspace validates all versions are the same in all 4118 // tablets in a keyspace 4119 func (s *VtctldServer) ValidateVersionKeyspace(ctx context.Context, req *vtctldatapb.ValidateVersionKeyspaceRequest) (resp *vtctldatapb.ValidateVersionKeyspaceResponse, err error) { 4120 span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateVersionKeyspace") 4121 defer span.Finish() 4122 4123 defer panicHandler(&err) 4124 4125 keyspace := req.Keyspace 4126 shards, err := s.ts.GetShardNames(ctx, keyspace) 4127 resp = &vtctldatapb.ValidateVersionKeyspaceResponse{ 4128 Results: []string{}, 4129 ResultsByShard: make(map[string]*vtctldatapb.ValidateShardResponse, len(shards)), 4130 } 4131 4132 if err != nil { 4133 resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShardNames(%v) failed: %v", keyspace, err)) 4134 err = nil 4135 return 4136 } 4137 4138 if len(shards) == 0 { 4139 resp.Results = append(resp.Results, fmt.Sprintf("no shards in keyspace %v", keyspace)) 4140 return 4141 } 4142 4143 si, err := s.ts.GetShard(ctx, keyspace, shards[0]) 4144 if err != nil { 4145 resp.Results = append(resp.Results, fmt.Sprintf("unable to find primary shard %v/%v", keyspace, shards[0])) 4146 err = nil 4147 return 4148 } 4149 if !si.HasPrimary() { 4150 resp.Results = append(resp.Results, fmt.Sprintf("no primary in shard %v/%v", keyspace, shards[0])) 4151 return 4152 } 4153 4154 referenceAlias := si.PrimaryAlias 4155 referenceVersion, err := s.GetVersion(ctx, &vtctldatapb.GetVersionRequest{TabletAlias: referenceAlias}) 4156 if err != nil { 4157 resp.Results = append(resp.Results, fmt.Sprintf("unable to get reference version of first shard's primary tablet: %v", err)) 4158 err = nil 4159 return 4160 } 4161 4162 var validateVersionKeyspaceResponseMutex sync.Mutex 4163 4164 for _, shard := range shards { 4165 shardResp := vtctldatapb.ValidateShardResponse{ 4166 Results: []string{}, 4167 } 4168 4169 var ( 4170 validateShardResponseMutex sync.Mutex 4171 tabletWaitGroup sync.WaitGroup 4172 ) 4173 4174 aliases, err := s.ts.FindAllTabletAliasesInShard(ctx, keyspace, shard) 4175 if err != nil { 4176 errMessage := fmt.Sprintf("unable to find tablet aliases in shard %v: %v", shard, err) 4177 shardResp.Results = append(shardResp.Results, errMessage) 4178 validateVersionKeyspaceResponseMutex.Lock() 4179 resp.Results = append(resp.Results, errMessage) 4180 resp.ResultsByShard[shard] = &shardResp 4181 validateVersionKeyspaceResponseMutex.Unlock() 4182 continue 4183 } 4184 4185 for _, alias := range aliases { 4186 if topoproto.TabletAliasEqual(alias, si.PrimaryAlias) { 4187 continue 4188 } 4189 4190 tabletWaitGroup.Add(1) 4191 go func(alias *topodatapb.TabletAlias, m *sync.Mutex, ctx context.Context) { 4192 defer tabletWaitGroup.Done() 4193 replicaVersion, err := s.GetVersion(ctx, &vtctldatapb.GetVersionRequest{TabletAlias: alias}) 4194 if err != nil { 4195 validateShardResponseMutex.Lock() 4196 shardResp.Results = append(shardResp.Results, fmt.Sprintf("unable to get version for tablet %v: %v", alias, err)) 4197 validateShardResponseMutex.Unlock() 4198 return 4199 } 4200 4201 if referenceVersion.Version != replicaVersion.Version { 4202 validateShardResponseMutex.Lock() 4203 shardResp.Results = append(shardResp.Results, fmt.Sprintf("primary %v version %v is different than replica %v version %v", topoproto.TabletAliasString(referenceAlias), referenceVersion, topoproto.TabletAliasString(alias), replicaVersion)) 4204 validateShardResponseMutex.Unlock() 4205 } 4206 }(alias, &validateShardResponseMutex, ctx) 4207 } 4208 4209 tabletWaitGroup.Wait() 4210 validateVersionKeyspaceResponseMutex.Lock() 4211 resp.Results = append(resp.Results, shardResp.Results...) 4212 resp.ResultsByShard[shard] = &shardResp 4213 validateVersionKeyspaceResponseMutex.Unlock() 4214 } 4215 4216 return resp, err 4217 } 4218 4219 // ValidateVersionShard validates all versions are the same in all 4220 // tablets in a shard 4221 func (s *VtctldServer) ValidateVersionShard(ctx context.Context, req *vtctldatapb.ValidateVersionShardRequest) (resp *vtctldatapb.ValidateVersionShardResponse, err error) { 4222 span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateVersionShard") 4223 defer span.Finish() 4224 4225 defer panicHandler(&err) 4226 4227 shard, err := s.ts.GetShard(ctx, req.Keyspace, req.Shard) 4228 if err != nil { 4229 err = fmt.Errorf("GetShard(%s) failed: %v", req.Shard, err) 4230 return nil, err 4231 } 4232 4233 if !shard.HasPrimary() { 4234 err = fmt.Errorf("no primary in shard %v/%v", req.Keyspace, req.Shard) 4235 return nil, err 4236 } 4237 4238 log.Infof("Gathering version for primary %v", topoproto.TabletAliasString(shard.PrimaryAlias)) 4239 primaryVersion, err := s.GetVersion(ctx, &vtctldatapb.GetVersionRequest{ 4240 TabletAlias: shard.PrimaryAlias, 4241 }) 4242 if err != nil { 4243 err = fmt.Errorf("GetVersion(%s) failed: %v", topoproto.TabletAliasString(shard.PrimaryAlias), err) 4244 return nil, err 4245 } 4246 4247 aliases, err := s.ts.FindAllTabletAliasesInShard(ctx, req.Keyspace, req.Shard) 4248 if err != nil { 4249 err = fmt.Errorf("FindAllTabletAliasesInShard(%s, %s) failed: %v", req.Keyspace, req.Shard, err) 4250 return nil, err 4251 } 4252 4253 er := concurrency.AllErrorRecorder{} 4254 wg := sync.WaitGroup{} 4255 for _, alias := range aliases { 4256 if topoproto.TabletAliasEqual(alias, shard.PrimaryAlias) { 4257 continue 4258 } 4259 4260 wg.Add(1) 4261 go s.diffVersion(ctx, primaryVersion.Version, shard.PrimaryAlias, alias, &wg, &er) 4262 } 4263 4264 wg.Wait() 4265 4266 response := vtctldatapb.ValidateVersionShardResponse{} 4267 if er.HasErrors() { 4268 response.Results = append(response.Results, er.ErrorStrings()...) 4269 } 4270 4271 return &response, nil 4272 } 4273 4274 // ValidateVSchema compares the schema of each primary tablet in "keyspace/shards..." to the vschema and errs if there are differences 4275 func (s *VtctldServer) ValidateVSchema(ctx context.Context, req *vtctldatapb.ValidateVSchemaRequest) (resp *vtctldatapb.ValidateVSchemaResponse, err error) { 4276 span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateVSchema") 4277 defer span.Finish() 4278 4279 defer panicHandler(&err) 4280 keyspace := req.Keyspace 4281 shards := req.Shards 4282 excludeTables := req.ExcludeTables 4283 includeViews := req.IncludeViews 4284 4285 vschm, err := s.ts.GetVSchema(ctx, keyspace) 4286 if err != nil { 4287 err = fmt.Errorf("GetVSchema(%s) failed: %v", keyspace, err) 4288 return nil, err 4289 } 4290 4291 resp = &vtctldatapb.ValidateVSchemaResponse{ 4292 Results: []string{}, 4293 ResultsByShard: make(map[string]*vtctldatapb.ValidateShardResponse, len(shards)), 4294 } 4295 4296 var ( 4297 wg sync.WaitGroup 4298 m sync.Mutex 4299 ) 4300 4301 wg.Add(len(shards)) 4302 4303 for _, shard := range shards { 4304 go func(shard string) { 4305 defer wg.Done() 4306 4307 shardResult := vtctldatapb.ValidateShardResponse{ 4308 Results: []string{}, 4309 } 4310 4311 notFoundTables := []string{} 4312 si, err := s.ts.GetShard(ctx, keyspace, shard) 4313 if err != nil { 4314 errorMessage := fmt.Sprintf("GetShard(%v, %v) failed: %v", keyspace, shard, err) 4315 shardResult.Results = append(shardResult.Results, errorMessage) 4316 m.Lock() 4317 resp.Results = append(resp.Results, errorMessage) 4318 resp.ResultsByShard[shard] = &shardResult 4319 m.Unlock() 4320 return 4321 } 4322 r := &tabletmanagerdatapb.GetSchemaRequest{ExcludeTables: req.ExcludeTables, IncludeViews: req.IncludeViews} 4323 primarySchema, err := schematools.GetSchema(ctx, s.ts, s.tmc, si.PrimaryAlias, r) 4324 if err != nil { 4325 errorMessage := fmt.Sprintf("GetSchema(%s, nil, %v, %v) (%v/%v) failed: %v", si.PrimaryAlias.String(), 4326 excludeTables, includeViews, keyspace, shard, err, 4327 ) 4328 shardResult.Results = append(shardResult.Results, errorMessage) 4329 m.Lock() 4330 resp.Results = append(resp.Results, errorMessage) 4331 resp.ResultsByShard[shard] = &shardResult 4332 m.Unlock() 4333 return 4334 } 4335 for _, tableDef := range primarySchema.TableDefinitions { 4336 if _, ok := vschm.Tables[tableDef.Name]; !ok { 4337 if !schema.IsInternalOperationTableName(tableDef.Name) { 4338 notFoundTables = append(notFoundTables, tableDef.Name) 4339 } 4340 } 4341 } 4342 if len(notFoundTables) > 0 { 4343 errorMessage := fmt.Sprintf("%v/%v has tables that are not in the vschema: %v", keyspace, shard, notFoundTables) 4344 shardResult.Results = append(shardResult.Results, errorMessage) 4345 m.Lock() 4346 resp.Results = append(resp.Results, errorMessage) 4347 resp.ResultsByShard[shard] = &shardResult 4348 m.Unlock() 4349 } 4350 m.Lock() 4351 resp.ResultsByShard[shard] = &shardResult 4352 m.Unlock() 4353 }(shard) 4354 } 4355 wg.Wait() 4356 return resp, err 4357 } 4358 4359 // StartServer registers a VtctldServer for RPCs on the given gRPC server. 4360 func StartServer(s *grpc.Server, ts *topo.Server) { 4361 vtctlservicepb.RegisterVtctldServer(s, NewVtctldServer(ts)) 4362 } 4363 4364 // getTopologyCell is a helper method that returns a topology cell given its path. 4365 func (s *VtctldServer) getTopologyCell(ctx context.Context, cellPath string) (*vtctldatapb.TopologyCell, error) { 4366 // extract cell and relative path 4367 parts := strings.Split(cellPath, "/") 4368 if parts[0] != "" || len(parts) < 2 { 4369 err := vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "invalid path: %s", cellPath) 4370 return nil, err 4371 } 4372 cell := parts[1] 4373 relativePath := cellPath[len(cell)+1:] 4374 topoCell := vtctldatapb.TopologyCell{Name: parts[len(parts)-1], Path: cellPath} 4375 4376 conn, err := s.ts.ConnForCell(ctx, cell) 4377 if err != nil { 4378 err := vterrors.Errorf(vtrpc.Code_UNAVAILABLE, "error fetching connection to cell %s: %v", cell, err) 4379 return nil, err 4380 } 4381 4382 data, _, dataErr := conn.Get(ctx, relativePath) 4383 4384 if dataErr == nil { 4385 result, err := topo.DecodeContent(relativePath, data, false) 4386 if err != nil { 4387 err := vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "error decoding file content for cell %s: %v", cellPath, err) 4388 return nil, err 4389 } 4390 topoCell.Data = result 4391 // since there is data at this cell, it cannot be a directory cell 4392 // so we can early return the topocell 4393 return &topoCell, nil 4394 } 4395 4396 children, childrenErr := conn.ListDir(ctx, relativePath, false /*full*/) 4397 4398 if childrenErr != nil && dataErr != nil { 4399 err := vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "cell %s with path %s has no file contents and no children: %v", cell, cellPath, err) 4400 return nil, err 4401 } 4402 4403 topoCell.Children = make([]string, len(children)) 4404 4405 for i, c := range children { 4406 topoCell.Children[i] = c.Name 4407 } 4408 4409 return &topoCell, nil 4410 } 4411 4412 // Helper function to get version of a tablet from its debug vars 4413 var getVersionFromTabletDebugVars = func(tabletAddr string) (string, error) { 4414 resp, err := http.Get("http://" + tabletAddr + "/debug/vars") 4415 if err != nil { 4416 return "", err 4417 } 4418 defer resp.Body.Close() 4419 body, err := io.ReadAll(resp.Body) 4420 if err != nil { 4421 return "", err 4422 } 4423 4424 var vars struct { 4425 BuildHost string 4426 BuildUser string 4427 BuildTimestamp int64 4428 BuildGitRev string 4429 } 4430 err = json.Unmarshal(body, &vars) 4431 if err != nil { 4432 return "", err 4433 } 4434 4435 version := fmt.Sprintf("%v", vars) 4436 return version, nil 4437 } 4438 4439 var versionFuncMu sync.Mutex 4440 var getVersionFromTablet = getVersionFromTabletDebugVars 4441 4442 func SetVersionFunc(versionFunc func(string) (string, error)) { 4443 versionFuncMu.Lock() 4444 defer versionFuncMu.Unlock() 4445 getVersionFromTablet = versionFunc 4446 } 4447 4448 func GetVersionFunc() func(string) (string, error) { 4449 versionFuncMu.Lock() 4450 defer versionFuncMu.Unlock() 4451 return getVersionFromTablet 4452 } 4453 4454 // helper method to asynchronously get and diff a version 4455 func (s *VtctldServer) diffVersion(ctx context.Context, primaryVersion string, primaryAlias *topodatapb.TabletAlias, alias *topodatapb.TabletAlias, wg *sync.WaitGroup, er concurrency.ErrorRecorder) { 4456 defer wg.Done() 4457 log.Infof("Gathering version for %v", topoproto.TabletAliasString(alias)) 4458 replicaVersion, err := s.GetVersion(ctx, &vtctldatapb.GetVersionRequest{ 4459 TabletAlias: alias, 4460 }) 4461 if err != nil { 4462 er.RecordError(fmt.Errorf("unable to get version for tablet %v: %v", alias, err)) 4463 return 4464 } 4465 4466 if primaryVersion != replicaVersion.Version { 4467 er.RecordError(fmt.Errorf("primary %v version %v is different than replica %v version %v", topoproto.TabletAliasString(primaryAlias), primaryVersion, topoproto.TabletAliasString(alias), replicaVersion)) 4468 } 4469 }