vitess.io/vitess@v0.16.2/go/vt/vtctl/grpcvtctldserver/server.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package grpcvtctldserver
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"net/http"
    26  	"path/filepath"
    27  	"sort"
    28  	"strings"
    29  	"sync"
    30  	"time"
    31  
    32  	"google.golang.org/grpc"
    33  	"google.golang.org/protobuf/proto"
    34  	"k8s.io/apimachinery/pkg/util/sets"
    35  
    36  	"vitess.io/vitess/go/event"
    37  	"vitess.io/vitess/go/netutil"
    38  	"vitess.io/vitess/go/protoutil"
    39  	"vitess.io/vitess/go/sqlescape"
    40  	"vitess.io/vitess/go/sync2"
    41  	"vitess.io/vitess/go/trace"
    42  	"vitess.io/vitess/go/vt/callerid"
    43  	"vitess.io/vitess/go/vt/concurrency"
    44  	hk "vitess.io/vitess/go/vt/hook"
    45  	"vitess.io/vitess/go/vt/key"
    46  	"vitess.io/vitess/go/vt/log"
    47  	"vitess.io/vitess/go/vt/logutil"
    48  	"vitess.io/vitess/go/vt/mysqlctl"
    49  	"vitess.io/vitess/go/vt/mysqlctl/backupstorage"
    50  	"vitess.io/vitess/go/vt/mysqlctl/mysqlctlproto"
    51  	"vitess.io/vitess/go/vt/mysqlctl/tmutils"
    52  	"vitess.io/vitess/go/vt/schema"
    53  	"vitess.io/vitess/go/vt/schemamanager"
    54  	"vitess.io/vitess/go/vt/sqlparser"
    55  	"vitess.io/vitess/go/vt/topo"
    56  	"vitess.io/vitess/go/vt/topo/topoproto"
    57  	"vitess.io/vitess/go/vt/topotools"
    58  	"vitess.io/vitess/go/vt/topotools/events"
    59  	"vitess.io/vitess/go/vt/vtctl/reparentutil"
    60  	"vitess.io/vitess/go/vt/vtctl/schematools"
    61  	"vitess.io/vitess/go/vt/vtctl/workflow"
    62  	"vitess.io/vitess/go/vt/vterrors"
    63  	"vitess.io/vitess/go/vt/vttablet/tmclient"
    64  
    65  	logutilpb "vitess.io/vitess/go/vt/proto/logutil"
    66  	mysqlctlpb "vitess.io/vitess/go/vt/proto/mysqlctl"
    67  	querypb "vitess.io/vitess/go/vt/proto/query"
    68  	replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata"
    69  	tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
    70  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    71  	vschemapb "vitess.io/vitess/go/vt/proto/vschema"
    72  	vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata"
    73  	vtctlservicepb "vitess.io/vitess/go/vt/proto/vtctlservice"
    74  	"vitess.io/vitess/go/vt/proto/vtrpc"
    75  )
    76  
    77  const (
    78  	initShardPrimaryOperation = "InitShardPrimary"
    79  )
    80  
    81  // VtctldServer implements the Vtctld RPC service protocol.
    82  type VtctldServer struct {
    83  	vtctlservicepb.UnimplementedVtctldServer
    84  	ts  *topo.Server
    85  	tmc tmclient.TabletManagerClient
    86  	ws  *workflow.Server
    87  }
    88  
    89  // NewVtctldServer returns a new VtctldServer for the given topo server.
    90  func NewVtctldServer(ts *topo.Server) *VtctldServer {
    91  	tmc := tmclient.NewTabletManagerClient()
    92  
    93  	return &VtctldServer{
    94  		ts:  ts,
    95  		tmc: tmc,
    96  		ws:  workflow.NewServer(ts, tmc),
    97  	}
    98  }
    99  
   100  // NewTestVtctldServer returns a new VtctldServer for the given topo server
   101  // AND tmclient for use in tests. This should NOT be used in production.
   102  func NewTestVtctldServer(ts *topo.Server, tmc tmclient.TabletManagerClient) *VtctldServer {
   103  	return &VtctldServer{
   104  		ts:  ts,
   105  		tmc: tmc,
   106  		ws:  workflow.NewServer(ts, tmc),
   107  	}
   108  }
   109  
   110  func panicHandler(err *error) {
   111  	if x := recover(); x != nil {
   112  		*err = fmt.Errorf("uncaught panic: %v", x)
   113  	}
   114  }
   115  
   116  // AddCellInfo is part of the vtctlservicepb.VtctldServer interface.
   117  func (s *VtctldServer) AddCellInfo(ctx context.Context, req *vtctldatapb.AddCellInfoRequest) (resp *vtctldatapb.AddCellInfoResponse, err error) {
   118  	span, ctx := trace.NewSpan(ctx, "VtctldServer.AddCellInfo")
   119  	defer span.Finish()
   120  
   121  	defer panicHandler(&err)
   122  
   123  	if req.CellInfo.Root == "" {
   124  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "CellInfo.Root must be non-empty")
   125  		return nil, err
   126  	}
   127  
   128  	span.Annotate("cell", req.Name)
   129  	span.Annotate("cell_root", req.CellInfo.Root)
   130  	span.Annotate("cell_address", req.CellInfo.ServerAddress)
   131  
   132  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
   133  	defer cancel()
   134  
   135  	if err = s.ts.CreateCellInfo(ctx, req.Name, req.CellInfo); err != nil {
   136  		return nil, err
   137  	}
   138  
   139  	return &vtctldatapb.AddCellInfoResponse{}, nil
   140  }
   141  
   142  // AddCellsAlias is part of the vtctlservicepb.VtctldServer interface.
   143  func (s *VtctldServer) AddCellsAlias(ctx context.Context, req *vtctldatapb.AddCellsAliasRequest) (resp *vtctldatapb.AddCellsAliasResponse, err error) {
   144  	span, ctx := trace.NewSpan(ctx, "VtctldServer.AddCellsAlias")
   145  	defer span.Finish()
   146  
   147  	defer panicHandler(&err)
   148  
   149  	span.Annotate("cells_alias", req.Name)
   150  	span.Annotate("cells", strings.Join(req.Cells, ","))
   151  
   152  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
   153  	defer cancel()
   154  
   155  	if err = s.ts.CreateCellsAlias(ctx, req.Name, &topodatapb.CellsAlias{Cells: req.Cells}); err != nil {
   156  		return nil, err
   157  	}
   158  
   159  	return &vtctldatapb.AddCellsAliasResponse{}, nil
   160  }
   161  
   162  // ApplyRoutingRules is part of the vtctlservicepb.VtctldServer interface.
   163  func (s *VtctldServer) ApplyRoutingRules(ctx context.Context, req *vtctldatapb.ApplyRoutingRulesRequest) (resp *vtctldatapb.ApplyRoutingRulesResponse, err error) {
   164  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ApplyRoutingRules")
   165  	defer span.Finish()
   166  
   167  	defer panicHandler(&err)
   168  
   169  	span.Annotate("skip_rebuild", req.SkipRebuild)
   170  	span.Annotate("rebuild_cells", strings.Join(req.RebuildCells, ","))
   171  
   172  	if err = s.ts.SaveRoutingRules(ctx, req.RoutingRules); err != nil {
   173  		return nil, err
   174  	}
   175  
   176  	resp = &vtctldatapb.ApplyRoutingRulesResponse{}
   177  
   178  	if req.SkipRebuild {
   179  		log.Warningf("Skipping rebuild of SrvVSchema, will need to run RebuildVSchemaGraph for changes to take effect")
   180  		return resp, nil
   181  	}
   182  
   183  	if err = s.ts.RebuildSrvVSchema(ctx, req.RebuildCells); err != nil {
   184  		err = vterrors.Wrapf(err, "RebuildSrvVSchema(%v) failed: %v", req.RebuildCells, err)
   185  		return nil, err
   186  	}
   187  
   188  	return resp, nil
   189  }
   190  
   191  // ApplyShardRoutingRules is part of the vtctlservicepb.VtctldServer interface.
   192  func (s *VtctldServer) ApplyShardRoutingRules(ctx context.Context, req *vtctldatapb.ApplyShardRoutingRulesRequest) (*vtctldatapb.ApplyShardRoutingRulesResponse, error) {
   193  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ApplyShardRoutingRules")
   194  	defer span.Finish()
   195  
   196  	span.Annotate("skip_rebuild", req.SkipRebuild)
   197  	span.Annotate("rebuild_cells", strings.Join(req.RebuildCells, ","))
   198  
   199  	if err := s.ts.SaveShardRoutingRules(ctx, req.ShardRoutingRules); err != nil {
   200  		return nil, err
   201  	}
   202  
   203  	resp := &vtctldatapb.ApplyShardRoutingRulesResponse{}
   204  
   205  	if req.SkipRebuild {
   206  		log.Warningf("Skipping rebuild of SrvVSchema as requested, you will need to run RebuildVSchemaGraph for changes to take effect")
   207  		return resp, nil
   208  	}
   209  
   210  	if err := s.ts.RebuildSrvVSchema(ctx, req.RebuildCells); err != nil {
   211  		return nil, vterrors.Wrapf(err, "RebuildSrvVSchema(%v) failed: %v", req.RebuildCells, err)
   212  	}
   213  
   214  	return resp, nil
   215  }
   216  
   217  // ApplySchema is part of the vtctlservicepb.VtctldServer interface.
   218  func (s *VtctldServer) ApplySchema(ctx context.Context, req *vtctldatapb.ApplySchemaRequest) (resp *vtctldatapb.ApplySchemaResponse, err error) {
   219  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ApplySchema")
   220  	defer span.Finish()
   221  
   222  	defer panicHandler(&err)
   223  
   224  	span.Annotate("keyspace", req.Keyspace)
   225  	span.Annotate("skip_preflight", req.SkipPreflight)
   226  	span.Annotate("ddl_strategy", req.DdlStrategy)
   227  
   228  	if len(req.Sql) == 0 {
   229  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "Sql must be a non-empty array")
   230  		return nil, err
   231  	}
   232  
   233  	// Attach the callerID as the EffectiveCallerID.
   234  	if req.CallerId != nil {
   235  		span.Annotate("caller_id", req.CallerId.Principal)
   236  		ctx = callerid.NewContext(ctx, req.CallerId, &querypb.VTGateCallerID{Username: req.CallerId.Principal})
   237  	}
   238  
   239  	executionUUID, err := schema.CreateUUID()
   240  	if err != nil {
   241  		err = vterrors.Wrapf(err, "unable to create execution UUID")
   242  		return resp, err
   243  	}
   244  
   245  	migrationContext := req.MigrationContext
   246  	if migrationContext == "" {
   247  		migrationContext = fmt.Sprintf("vtctl:%s", executionUUID)
   248  	}
   249  
   250  	waitReplicasTimeout, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout)
   251  	if err != nil {
   252  		err = vterrors.Wrapf(err, "unable to parse WaitReplicasTimeout into a valid duration")
   253  		return nil, err
   254  	} else if !ok {
   255  		waitReplicasTimeout = time.Second * 30
   256  	}
   257  
   258  	m := sync.RWMutex{}
   259  	logstream := []*logutilpb.Event{}
   260  	logger := logutil.NewCallbackLogger(func(e *logutilpb.Event) {
   261  		m.Lock()
   262  		defer m.Unlock()
   263  
   264  		logstream = append(logstream, e)
   265  	})
   266  
   267  	executor := schemamanager.NewTabletExecutor(migrationContext, s.ts, s.tmc, logger, waitReplicasTimeout)
   268  	if req.AllowLongUnavailability {
   269  		executor.AllowBigSchemaChange()
   270  	}
   271  	if req.SkipPreflight {
   272  		executor.SkipPreflight()
   273  	}
   274  
   275  	if err = executor.SetDDLStrategy(req.DdlStrategy); err != nil {
   276  		err = vterrors.Wrapf(err, "invalid DdlStrategy: %s", req.DdlStrategy)
   277  		return resp, err
   278  	}
   279  
   280  	if len(req.UuidList) > 0 {
   281  		if err = executor.SetUUIDList(req.UuidList); err != nil {
   282  			err = vterrors.Wrapf(err, "invalid UuidList: %s", req.UuidList)
   283  			return resp, err
   284  		}
   285  	}
   286  
   287  	execResult, err := schemamanager.Run(
   288  		ctx,
   289  		schemamanager.NewPlainController(req.Sql, req.Keyspace),
   290  		executor,
   291  	)
   292  
   293  	if err != nil {
   294  		return &vtctldatapb.ApplySchemaResponse{}, err
   295  	}
   296  
   297  	return &vtctldatapb.ApplySchemaResponse{
   298  		UuidList: execResult.UUIDs,
   299  	}, err
   300  }
   301  
   302  // ApplyVSchema is part of the vtctlservicepb.VtctldServer interface.
   303  func (s *VtctldServer) ApplyVSchema(ctx context.Context, req *vtctldatapb.ApplyVSchemaRequest) (resp *vtctldatapb.ApplyVSchemaResponse, err error) {
   304  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ApplyVSchema")
   305  	defer span.Finish()
   306  
   307  	defer panicHandler(&err)
   308  
   309  	span.Annotate("keyspace", req.Keyspace)
   310  	span.Annotate("cells", strings.Join(req.Cells, ","))
   311  	span.Annotate("skip_rebuild", req.SkipRebuild)
   312  	span.Annotate("dry_run", req.DryRun)
   313  
   314  	if _, err = s.ts.GetKeyspace(ctx, req.Keyspace); err != nil {
   315  		if topo.IsErrType(err, topo.NoNode) {
   316  			err = vterrors.Wrapf(err, "keyspace(%s) doesn't exist, check if the keyspace is initialized", req.Keyspace)
   317  		} else {
   318  			err = vterrors.Wrapf(err, "GetKeyspace(%s)", req.Keyspace)
   319  		}
   320  
   321  		return nil, err
   322  	}
   323  
   324  	if (req.Sql != "" && req.VSchema != nil) || (req.Sql == "" && req.VSchema == nil) {
   325  		err = vterrors.New(vtrpc.Code_INVALID_ARGUMENT, "must pass exactly one of req.VSchema and req.Sql")
   326  		return nil, err
   327  	}
   328  
   329  	var vs *vschemapb.Keyspace
   330  
   331  	if req.Sql != "" {
   332  		span.Annotate("sql_mode", true)
   333  
   334  		var stmt sqlparser.Statement
   335  		stmt, err = sqlparser.Parse(req.Sql)
   336  		if err != nil {
   337  			err = vterrors.Wrapf(err, "Parse(%s)", req.Sql)
   338  			return nil, err
   339  		}
   340  		ddl, ok := stmt.(*sqlparser.AlterVschema)
   341  		if !ok {
   342  			err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "error parsing VSchema DDL statement `%s`", req.Sql)
   343  			return nil, err
   344  		}
   345  
   346  		vs, err = s.ts.GetVSchema(ctx, req.Keyspace)
   347  		if err != nil && !topo.IsErrType(err, topo.NoNode) {
   348  			err = vterrors.Wrapf(err, "GetVSchema(%s)", req.Keyspace)
   349  			return nil, err
   350  		} // otherwise, we keep the empty vschema object from above
   351  
   352  		vs, err = topotools.ApplyVSchemaDDL(req.Keyspace, vs, ddl)
   353  		if err != nil {
   354  			err = vterrors.Wrapf(err, "ApplyVSchemaDDL(%s,%v,%v)", req.Keyspace, vs, ddl)
   355  			return nil, err
   356  		}
   357  	} else { // "jsonMode"
   358  		span.Annotate("sql_mode", false)
   359  		vs = req.VSchema
   360  	}
   361  
   362  	if req.DryRun { // we return what was passed in and parsed, rather than current
   363  		return &vtctldatapb.ApplyVSchemaResponse{VSchema: vs}, nil
   364  	}
   365  
   366  	if err = s.ts.SaveVSchema(ctx, req.Keyspace, vs); err != nil {
   367  		err = vterrors.Wrapf(err, "SaveVSchema(%s, %v)", req.Keyspace, req.VSchema)
   368  		return nil, err
   369  	}
   370  
   371  	if !req.SkipRebuild {
   372  		if err = s.ts.RebuildSrvVSchema(ctx, req.Cells); err != nil {
   373  			err = vterrors.Wrapf(err, "RebuildSrvVSchema")
   374  			return nil, err
   375  		}
   376  	}
   377  	updatedVS, err := s.ts.GetVSchema(ctx, req.Keyspace)
   378  	if err != nil {
   379  		err = vterrors.Wrapf(err, "GetVSchema(%s)", req.Keyspace)
   380  		return nil, err
   381  	}
   382  	return &vtctldatapb.ApplyVSchemaResponse{VSchema: updatedVS}, nil
   383  }
   384  
   385  // Backup is part of the vtctlservicepb.VtctldServer interface.
   386  func (s *VtctldServer) Backup(req *vtctldatapb.BackupRequest, stream vtctlservicepb.Vtctld_BackupServer) (err error) {
   387  	span, ctx := trace.NewSpan(stream.Context(), "VtctldServer.Backup")
   388  	defer span.Finish()
   389  
   390  	defer panicHandler(&err)
   391  
   392  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
   393  	span.Annotate("allow_primary", req.AllowPrimary)
   394  	span.Annotate("concurrency", req.Concurrency)
   395  	span.Annotate("incremental_from_pos", req.IncrementalFromPos)
   396  
   397  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
   398  	if err != nil {
   399  		return err
   400  	}
   401  
   402  	span.Annotate("keyspace", ti.Keyspace)
   403  	span.Annotate("shard", ti.Shard)
   404  
   405  	err = s.backupTablet(ctx, ti.Tablet, req, stream)
   406  	return err
   407  }
   408  
   409  // BackupShard is part of the vtctlservicepb.VtctldServer interface.
   410  func (s *VtctldServer) BackupShard(req *vtctldatapb.BackupShardRequest, stream vtctlservicepb.Vtctld_BackupShardServer) (err error) {
   411  	span, ctx := trace.NewSpan(stream.Context(), "VtctldServer.BackupShard")
   412  	defer span.Finish()
   413  
   414  	defer panicHandler(&err)
   415  
   416  	span.Annotate("keyspace", req.Keyspace)
   417  	span.Annotate("shard", req.Shard)
   418  	span.Annotate("allow_primary", req.AllowPrimary)
   419  	span.Annotate("concurrency", req.Concurrency)
   420  
   421  	tablets, stats, err := reparentutil.ShardReplicationStatuses(ctx, s.ts, s.tmc, req.Keyspace, req.Shard)
   422  	if err != nil {
   423  		return err
   424  	}
   425  
   426  	var (
   427  		backupTablet    *topodatapb.Tablet
   428  		backupTabletLag uint32
   429  	)
   430  
   431  	for i, tablet := range tablets {
   432  		switch tablet.Type {
   433  		case topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY, topodatapb.TabletType_SPARE:
   434  		default:
   435  			continue
   436  		}
   437  
   438  		if lag := stats[i].ReplicationLagSeconds; backupTablet == nil || lag < backupTabletLag {
   439  			backupTablet = tablet.Tablet
   440  			backupTabletLag = lag
   441  		}
   442  	}
   443  
   444  	if backupTablet == nil && req.AllowPrimary {
   445  		for _, tablet := range tablets {
   446  			if tablet.Type != topodatapb.TabletType_PRIMARY {
   447  				continue
   448  			}
   449  
   450  			backupTablet = tablet.Tablet
   451  			break
   452  		}
   453  	}
   454  
   455  	if backupTablet == nil {
   456  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "no tablet available for backup")
   457  		return err
   458  	}
   459  
   460  	span.Annotate("tablet_alias", topoproto.TabletAliasString(backupTablet.Alias))
   461  
   462  	r := &vtctldatapb.BackupRequest{Concurrency: req.Concurrency, AllowPrimary: req.AllowPrimary}
   463  	err = s.backupTablet(ctx, backupTablet, r, stream)
   464  	return err
   465  }
   466  
   467  func (s *VtctldServer) backupTablet(ctx context.Context, tablet *topodatapb.Tablet, req *vtctldatapb.BackupRequest, stream interface {
   468  	Send(resp *vtctldatapb.BackupResponse) error
   469  }) error {
   470  	r := &tabletmanagerdatapb.BackupRequest{
   471  		Concurrency:        int64(req.Concurrency),
   472  		AllowPrimary:       req.AllowPrimary,
   473  		IncrementalFromPos: req.IncrementalFromPos,
   474  	}
   475  	logStream, err := s.tmc.Backup(ctx, tablet, r)
   476  	if err != nil {
   477  		return err
   478  	}
   479  
   480  	logger := logutil.NewConsoleLogger()
   481  	for {
   482  		event, err := logStream.Recv()
   483  		switch err {
   484  		case nil:
   485  			logutil.LogEvent(logger, event)
   486  			resp := &vtctldatapb.BackupResponse{
   487  				TabletAlias: tablet.Alias,
   488  				Keyspace:    tablet.Keyspace,
   489  				Shard:       tablet.Shard,
   490  				Event:       event,
   491  			}
   492  			if err := stream.Send(resp); err != nil {
   493  				logger.Errorf("failed to send stream response %+v: %v", resp, err)
   494  			}
   495  		case io.EOF:
   496  			// Do not do anything for primary tablets and when active reparenting is disabled
   497  			if mysqlctl.DisableActiveReparents || tablet.Type == topodatapb.TabletType_PRIMARY {
   498  				return nil
   499  			}
   500  
   501  			// Otherwise we find the correct primary tablet and set the replication source,
   502  			// since the primary could have changed while we executed the backup which can
   503  			// also affect whether we want to send semi sync acks or not.
   504  			tabletInfo, err := s.ts.GetTablet(ctx, tablet.Alias)
   505  			if err != nil {
   506  				return err
   507  			}
   508  
   509  			return reparentutil.SetReplicationSource(ctx, s.ts, s.tmc, tabletInfo.Tablet)
   510  		default:
   511  			return err
   512  		}
   513  	}
   514  }
   515  
   516  // ChangeTabletType is part of the vtctlservicepb.VtctldServer interface.
   517  func (s *VtctldServer) ChangeTabletType(ctx context.Context, req *vtctldatapb.ChangeTabletTypeRequest) (resp *vtctldatapb.ChangeTabletTypeResponse, err error) {
   518  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ChangeTabletType")
   519  	defer span.Finish()
   520  
   521  	defer panicHandler(&err)
   522  
   523  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
   524  	span.Annotate("dry_run", req.DryRun)
   525  	span.Annotate("tablet_type", topoproto.TabletTypeLString(req.DbType))
   526  
   527  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
   528  	defer cancel()
   529  
   530  	tablet, err := s.ts.GetTablet(ctx, req.TabletAlias)
   531  	if err != nil {
   532  		return nil, err
   533  	}
   534  
   535  	span.Annotate("before_tablet_type", topoproto.TabletTypeLString(tablet.Type))
   536  
   537  	if !topo.IsTrivialTypeChange(tablet.Type, req.DbType) {
   538  		err = fmt.Errorf("tablet %v type change %v -> %v is not an allowed transition for ChangeTabletType", req.TabletAlias, tablet.Type, req.DbType)
   539  		return nil, err
   540  	}
   541  
   542  	if req.DryRun {
   543  		afterTablet := proto.Clone(tablet.Tablet).(*topodatapb.Tablet)
   544  		afterTablet.Type = req.DbType
   545  
   546  		return &vtctldatapb.ChangeTabletTypeResponse{
   547  			BeforeTablet: tablet.Tablet,
   548  			AfterTablet:  afterTablet,
   549  			WasDryRun:    true,
   550  		}, nil
   551  	}
   552  
   553  	shard, err := s.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard)
   554  	if err != nil {
   555  		return nil, err
   556  	}
   557  
   558  	durabilityName, err := s.ts.GetKeyspaceDurability(ctx, tablet.Keyspace)
   559  	if err != nil {
   560  		return nil, err
   561  	}
   562  	log.Infof("Getting a new durability policy for %v", durabilityName)
   563  	durability, err := reparentutil.GetDurabilityPolicy(durabilityName)
   564  	if err != nil {
   565  		return nil, err
   566  	}
   567  
   568  	if !shard.HasPrimary() {
   569  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "no primary tablet for shard %v/%v", tablet.Keyspace, tablet.Shard)
   570  		return nil, err
   571  	}
   572  
   573  	shardPrimary, err := s.ts.GetTablet(ctx, shard.PrimaryAlias)
   574  	if err != nil {
   575  		err = fmt.Errorf("cannot lookup primary tablet %v for shard %v/%v: %w", topoproto.TabletAliasString(shard.PrimaryAlias), tablet.Keyspace, tablet.Shard, err)
   576  		return nil, err
   577  	}
   578  
   579  	if shardPrimary.Type != topodatapb.TabletType_PRIMARY {
   580  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "TopologyServer has incosistent state for shard primary %v", topoproto.TabletAliasString(shard.PrimaryAlias))
   581  		return nil, err
   582  	}
   583  
   584  	if shardPrimary.Keyspace != tablet.Keyspace || shardPrimary.Shard != tablet.Shard {
   585  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "primary %v and potential replica %v not in same keypace shard (%v/%v)", topoproto.TabletAliasString(shard.PrimaryAlias), req.TabletAlias, tablet.Keyspace, tablet.Shard)
   586  		return nil, err
   587  	}
   588  
   589  	// We should clone the tablet and change its type to the expected type before checking the durability rules
   590  	// Since we want to check the durability rules for the desired state and not before we make that change
   591  	expectedTablet := proto.Clone(tablet.Tablet).(*topodatapb.Tablet)
   592  	expectedTablet.Type = req.DbType
   593  	err = s.tmc.ChangeType(ctx, tablet.Tablet, req.DbType, reparentutil.IsReplicaSemiSync(durability, shardPrimary.Tablet, expectedTablet))
   594  	if err != nil {
   595  		return nil, err
   596  	}
   597  
   598  	var changedTablet *topodatapb.Tablet
   599  
   600  	changedTabletInfo, err := s.ts.GetTablet(ctx, req.TabletAlias)
   601  	if err != nil {
   602  		log.Warningf("error while reading the tablet we just changed back out of the topo: %v", err)
   603  	} else {
   604  		changedTablet = changedTabletInfo.Tablet
   605  	}
   606  
   607  	return &vtctldatapb.ChangeTabletTypeResponse{
   608  		BeforeTablet: tablet.Tablet,
   609  		AfterTablet:  changedTablet,
   610  		WasDryRun:    false,
   611  	}, nil
   612  }
   613  
   614  // CreateKeyspace is part of the vtctlservicepb.VtctldServer interface.
   615  func (s *VtctldServer) CreateKeyspace(ctx context.Context, req *vtctldatapb.CreateKeyspaceRequest) (resp *vtctldatapb.CreateKeyspaceResponse, err error) {
   616  	span, ctx := trace.NewSpan(ctx, "VtctldServer.CreateKeyspace")
   617  	defer span.Finish()
   618  
   619  	defer panicHandler(&err)
   620  
   621  	span.Annotate("keyspace", req.Name)
   622  	span.Annotate("keyspace_type", topoproto.KeyspaceTypeLString(req.Type))
   623  	span.Annotate("force", req.Force)
   624  	span.Annotate("allow_empty_vschema", req.AllowEmptyVSchema)
   625  	span.Annotate("durability_policy", req.DurabilityPolicy)
   626  
   627  	switch req.Type {
   628  	case topodatapb.KeyspaceType_NORMAL:
   629  	case topodatapb.KeyspaceType_SNAPSHOT:
   630  		if req.BaseKeyspace == "" {
   631  			err = errors.New("BaseKeyspace is required for SNAPSHOT keyspaces")
   632  			return nil, err
   633  		}
   634  
   635  		if req.SnapshotTime == nil {
   636  			err = errors.New("SnapshotTime is required for SNAPSHOT keyspaces")
   637  			return nil, err
   638  		}
   639  
   640  		span.Annotate("base_keyspace", req.BaseKeyspace)
   641  		span.Annotate("snapshot_time", req.SnapshotTime) // TODO: get a proper string repr
   642  	default:
   643  		return nil, fmt.Errorf("unknown keyspace type %v", req.Type)
   644  	}
   645  
   646  	ki := &topodatapb.Keyspace{
   647  		KeyspaceType:     req.Type,
   648  		ServedFroms:      req.ServedFroms,
   649  		BaseKeyspace:     req.BaseKeyspace,
   650  		SnapshotTime:     req.SnapshotTime,
   651  		DurabilityPolicy: req.DurabilityPolicy,
   652  	}
   653  
   654  	err = s.ts.CreateKeyspace(ctx, req.Name, ki)
   655  	if req.Force && topo.IsErrType(err, topo.NodeExists) {
   656  		log.Infof("keyspace %v already exists (ignoring error with Force=true)", req.Name)
   657  		err = nil
   658  
   659  		// Get the actual keyspace out of the topo; it may differ in structure,
   660  		// and we want to return the authoritative version as the "created" one
   661  		// to the client.
   662  		var ks *topo.KeyspaceInfo
   663  		ks, _ = s.ts.GetKeyspace(ctx, req.Name)
   664  		ki = ks.Keyspace
   665  	}
   666  
   667  	if err != nil {
   668  		return nil, err
   669  	}
   670  
   671  	if !req.AllowEmptyVSchema {
   672  		if err = s.ts.EnsureVSchema(ctx, req.Name); err != nil {
   673  			return nil, err
   674  		}
   675  	}
   676  
   677  	if req.Type == topodatapb.KeyspaceType_SNAPSHOT {
   678  		var vs *vschemapb.Keyspace
   679  		vs, err = s.ts.GetVSchema(ctx, req.BaseKeyspace)
   680  		if err != nil {
   681  			log.Infof("error from GetVSchema(%v) = %v", req.BaseKeyspace, err)
   682  			if topo.IsErrType(err, topo.NoNode) {
   683  				log.Infof("base keyspace %v does not exist; continuing with bare, unsharded vschema", req.BaseKeyspace)
   684  				vs = &vschemapb.Keyspace{
   685  					Sharded:  false,
   686  					Tables:   map[string]*vschemapb.Table{},
   687  					Vindexes: map[string]*vschemapb.Vindex{},
   688  				}
   689  			} else {
   690  				return nil, err
   691  			}
   692  		}
   693  
   694  		// SNAPSHOT keyspaces are excluded from global routing.
   695  		vs.RequireExplicitRouting = true
   696  
   697  		if err = s.ts.SaveVSchema(ctx, req.Name, vs); err != nil {
   698  			err = fmt.Errorf("SaveVSchema(%v) = %w", vs, err)
   699  			return nil, err
   700  		}
   701  	}
   702  
   703  	cells := []string{}
   704  	err = s.ts.RebuildSrvVSchema(ctx, cells)
   705  	if err != nil {
   706  		return nil, fmt.Errorf("RebuildSrvVSchema(%v) = %w", cells, err)
   707  	}
   708  
   709  	return &vtctldatapb.CreateKeyspaceResponse{
   710  		Keyspace: &vtctldatapb.Keyspace{
   711  			Name:     req.Name,
   712  			Keyspace: ki,
   713  		},
   714  	}, nil
   715  }
   716  
   717  // CreateShard is part of the vtctlservicepb.VtctldServer interface.
   718  func (s *VtctldServer) CreateShard(ctx context.Context, req *vtctldatapb.CreateShardRequest) (resp *vtctldatapb.CreateShardResponse, err error) {
   719  	span, ctx := trace.NewSpan(ctx, "VtctldServer.CreateShard")
   720  	defer span.Finish()
   721  
   722  	defer panicHandler(&err)
   723  
   724  	span.Annotate("keyspace", req.Keyspace)
   725  	span.Annotate("shard", req.ShardName)
   726  	span.Annotate("force", req.Force)
   727  	span.Annotate("include_parent", req.IncludeParent)
   728  
   729  	if req.IncludeParent {
   730  		log.Infof("Creating empty keyspace for %s", req.Keyspace)
   731  		if err2 := s.ts.CreateKeyspace(ctx, req.Keyspace, &topodatapb.Keyspace{}); err2 != nil {
   732  			if req.Force && topo.IsErrType(err2, topo.NodeExists) {
   733  				log.Infof("keyspace %v already exists; ignoring error because Force = true", req.Keyspace)
   734  			} else {
   735  				err = err2
   736  				return nil, err
   737  			}
   738  		}
   739  	}
   740  
   741  	shardExists := false
   742  
   743  	if err = s.ts.CreateShard(ctx, req.Keyspace, req.ShardName); err != nil {
   744  		if req.Force && topo.IsErrType(err, topo.NodeExists) {
   745  			log.Infof("shard %v/%v already exists; ignoring error because Force = true", req.Keyspace, req.ShardName)
   746  			shardExists = true
   747  			err = nil
   748  		} else {
   749  			return nil, err
   750  		}
   751  	}
   752  
   753  	// Fetch what we just created out of the topo. Errors should never happen
   754  	// here, but we'll check them anyway.
   755  
   756  	ks, err := s.ts.GetKeyspace(ctx, req.Keyspace)
   757  	if err != nil {
   758  		return nil, err
   759  	}
   760  
   761  	shard, err := s.ts.GetShard(ctx, req.Keyspace, req.ShardName)
   762  	if err != nil {
   763  		return nil, err
   764  	}
   765  
   766  	return &vtctldatapb.CreateShardResponse{
   767  		Keyspace: &vtctldatapb.Keyspace{
   768  			Name:     req.Keyspace,
   769  			Keyspace: ks.Keyspace,
   770  		},
   771  		Shard: &vtctldatapb.Shard{
   772  			Keyspace: req.Keyspace,
   773  			Name:     req.ShardName,
   774  			Shard:    shard.Shard,
   775  		},
   776  		ShardAlreadyExists: shardExists,
   777  	}, nil
   778  }
   779  
   780  // DeleteCellInfo is part of the vtctlservicepb.VtctldServer interface.
   781  func (s *VtctldServer) DeleteCellInfo(ctx context.Context, req *vtctldatapb.DeleteCellInfoRequest) (resp *vtctldatapb.DeleteCellInfoResponse, err error) {
   782  	span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteCellInfo")
   783  	defer span.Finish()
   784  
   785  	defer panicHandler(&err)
   786  
   787  	span.Annotate("cell", req.Name)
   788  	span.Annotate("force", req.Force)
   789  
   790  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
   791  	defer cancel()
   792  
   793  	if err = s.ts.DeleteCellInfo(ctx, req.Name, req.Force); err != nil {
   794  		return nil, err
   795  	}
   796  
   797  	return &vtctldatapb.DeleteCellInfoResponse{}, nil
   798  }
   799  
   800  // DeleteCellsAlias is part of the vtctlservicepb.VtctldServer interface.
   801  func (s *VtctldServer) DeleteCellsAlias(ctx context.Context, req *vtctldatapb.DeleteCellsAliasRequest) (resp *vtctldatapb.DeleteCellsAliasResponse, err error) {
   802  	span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteCellsAlias")
   803  	defer span.Finish()
   804  
   805  	defer panicHandler(&err)
   806  
   807  	span.Annotate("cells_alias", req.Name)
   808  
   809  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
   810  	defer cancel()
   811  
   812  	if err = s.ts.DeleteCellsAlias(ctx, req.Name); err != nil {
   813  		return nil, err
   814  	}
   815  
   816  	return &vtctldatapb.DeleteCellsAliasResponse{}, nil
   817  }
   818  
   819  // DeleteKeyspace is part of the vtctlservicepb.VtctldServer interface.
   820  func (s *VtctldServer) DeleteKeyspace(ctx context.Context, req *vtctldatapb.DeleteKeyspaceRequest) (resp *vtctldatapb.DeleteKeyspaceResponse, err error) {
   821  	span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteKeyspace")
   822  	defer span.Finish()
   823  
   824  	defer panicHandler(&err)
   825  
   826  	span.Annotate("keyspace", req.Keyspace)
   827  	span.Annotate("recursive", req.Recursive)
   828  	span.Annotate("force", req.Force)
   829  
   830  	lctx, unlock, lerr := s.ts.LockKeyspace(ctx, req.Keyspace, "DeleteKeyspace")
   831  	switch {
   832  	case lerr == nil:
   833  		ctx = lctx
   834  	case !req.Force:
   835  		err = fmt.Errorf("failed to lock %s; if you really want to delete this keyspace, re-run with Force=true: %w", req.Keyspace, lerr)
   836  		return nil, err
   837  	default:
   838  		log.Warningf("%s: failed to lock keyspace %s for deletion, but force=true, proceeding anyway ...", lerr, req.Keyspace)
   839  	}
   840  
   841  	if unlock != nil {
   842  		defer func() {
   843  			// Attempting to unlock a keyspace we successfully deleted results
   844  			// in ts.unlockKeyspace returning an error, which can make the
   845  			// overall RPC _seem_ like it failed.
   846  			//
   847  			// So, we do this extra checking to allow for specifically this
   848  			// scenario to result in "success."
   849  			origErr := err
   850  			unlock(&err)
   851  			if origErr == nil && topo.IsErrType(err, topo.NoNode) {
   852  				err = nil
   853  			}
   854  		}()
   855  	}
   856  
   857  	shards, err := s.ts.GetShardNames(ctx, req.Keyspace)
   858  	if err != nil {
   859  		return nil, err
   860  	}
   861  
   862  	if len(shards) > 0 {
   863  		if !req.Recursive {
   864  			err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "keyspace %v still has %d shards; use Recursive=true or remove them manually", req.Keyspace, len(shards))
   865  			return nil, err
   866  		}
   867  
   868  		log.Infof("Deleting all %d shards (and their tablets) in keyspace %v", len(shards), req.Keyspace)
   869  		recursive := true
   870  		evenIfServing := true
   871  		force := req.Force
   872  
   873  		for _, shard := range shards {
   874  			log.Infof("Recursively deleting shard %v/%v", req.Keyspace, shard)
   875  			err = deleteShard(ctx, s.ts, req.Keyspace, shard, recursive, evenIfServing, force)
   876  			if err != nil {
   877  				err = fmt.Errorf("cannot delete shard %v/%v: %w", req.Keyspace, shard, err)
   878  				return nil, err
   879  			}
   880  		}
   881  	}
   882  
   883  	cells, err := s.ts.GetKnownCells(ctx)
   884  	if err != nil {
   885  		return nil, err
   886  	}
   887  
   888  	for _, cell := range cells {
   889  		if err := s.ts.DeleteKeyspaceReplication(ctx, cell, req.Keyspace); err != nil && !topo.IsErrType(err, topo.NoNode) {
   890  			log.Warningf("Cannot delete KeyspaceReplication in cell %v for %v: %v", cell, req.Keyspace, err)
   891  		}
   892  
   893  		if err := s.ts.DeleteSrvKeyspace(ctx, cell, req.Keyspace); err != nil && !topo.IsErrType(err, topo.NoNode) {
   894  			log.Warningf("Cannot delete SrvKeyspace in cell %v for %v: %v", cell, req.Keyspace, err)
   895  		}
   896  	}
   897  
   898  	err = s.ts.DeleteKeyspace(ctx, req.Keyspace)
   899  	if err != nil {
   900  		return nil, err
   901  	}
   902  
   903  	return &vtctldatapb.DeleteKeyspaceResponse{}, nil
   904  }
   905  
   906  // DeleteShards is part of the vtctlservicepb.VtctldServer interface.
   907  func (s *VtctldServer) DeleteShards(ctx context.Context, req *vtctldatapb.DeleteShardsRequest) (resp *vtctldatapb.DeleteShardsResponse, err error) {
   908  	span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteShards")
   909  	defer span.Finish()
   910  
   911  	defer panicHandler(&err)
   912  
   913  	span.Annotate("num_shards", len(req.Shards))
   914  	span.Annotate("even_if_serving", req.EvenIfServing)
   915  	span.Annotate("recursive", req.Recursive)
   916  	span.Annotate("force", req.Force)
   917  
   918  	for _, shard := range req.Shards {
   919  		if err2 := deleteShard(ctx, s.ts, shard.Keyspace, shard.Name, req.Recursive, req.EvenIfServing, req.Force); err2 != nil {
   920  			err = err2
   921  			return nil, err
   922  		}
   923  	}
   924  
   925  	return &vtctldatapb.DeleteShardsResponse{}, nil
   926  }
   927  
   928  // DeleteSrvVSchema is part of the vtctlservicepb.VtctldServer interface.
   929  func (s *VtctldServer) DeleteSrvVSchema(ctx context.Context, req *vtctldatapb.DeleteSrvVSchemaRequest) (resp *vtctldatapb.DeleteSrvVSchemaResponse, err error) {
   930  	span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteSrvVSchema")
   931  	defer span.Finish()
   932  
   933  	defer panicHandler(&err)
   934  
   935  	if req.Cell == "" {
   936  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "cell must be non-empty")
   937  		return nil, err
   938  	}
   939  
   940  	span.Annotate("cell", req.Cell)
   941  
   942  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
   943  	defer cancel()
   944  
   945  	if err = s.ts.DeleteSrvVSchema(ctx, req.Cell); err != nil {
   946  		return nil, err
   947  	}
   948  
   949  	return &vtctldatapb.DeleteSrvVSchemaResponse{}, nil
   950  }
   951  
   952  // DeleteTablets is part of the vtctlservicepb.VtctldServer interface.
   953  func (s *VtctldServer) DeleteTablets(ctx context.Context, req *vtctldatapb.DeleteTabletsRequest) (resp *vtctldatapb.DeleteTabletsResponse, err error) {
   954  	span, ctx := trace.NewSpan(ctx, "VtctldServer.DeleteTablets")
   955  	defer span.Finish()
   956  
   957  	defer panicHandler(&err)
   958  
   959  	span.Annotate("num_tablets", len(req.TabletAliases))
   960  	span.Annotate("allow_primary", req.AllowPrimary)
   961  
   962  	for _, alias := range req.TabletAliases {
   963  		if err2 := deleteTablet(ctx, s.ts, alias, req.AllowPrimary); err2 != nil {
   964  			err = err2
   965  			return nil, err
   966  		}
   967  	}
   968  
   969  	return &vtctldatapb.DeleteTabletsResponse{}, nil
   970  }
   971  
   972  // EmergencyReparentShard is part of the vtctldservicepb.VtctldServer interface.
   973  func (s *VtctldServer) EmergencyReparentShard(ctx context.Context, req *vtctldatapb.EmergencyReparentShardRequest) (resp *vtctldatapb.EmergencyReparentShardResponse, err error) {
   974  	span, ctx := trace.NewSpan(ctx, "VtctldServer.EmergencyReparentShard")
   975  	defer span.Finish()
   976  
   977  	defer panicHandler(&err)
   978  
   979  	span.Annotate("keyspace", req.Keyspace)
   980  	span.Annotate("shard", req.Shard)
   981  	span.Annotate("new_primary_alias", topoproto.TabletAliasString(req.NewPrimary))
   982  
   983  	ignoreReplicaAliases := topoproto.TabletAliasList(req.IgnoreReplicas).ToStringSlice()
   984  	span.Annotate("ignore_replicas", strings.Join(ignoreReplicaAliases, ","))
   985  
   986  	waitReplicasTimeout, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout)
   987  	if err != nil {
   988  		return nil, err
   989  	} else if !ok {
   990  		waitReplicasTimeout = time.Second * 30
   991  	}
   992  
   993  	span.Annotate("wait_replicas_timeout_sec", waitReplicasTimeout.Seconds())
   994  	span.Annotate("prevent_cross_cell_promotion", req.PreventCrossCellPromotion)
   995  
   996  	m := sync.RWMutex{}
   997  	logstream := []*logutilpb.Event{}
   998  	logger := logutil.NewCallbackLogger(func(e *logutilpb.Event) {
   999  		m.Lock()
  1000  		defer m.Unlock()
  1001  
  1002  		logstream = append(logstream, e)
  1003  	})
  1004  
  1005  	ev, err := reparentutil.NewEmergencyReparenter(s.ts, s.tmc, logger).ReparentShard(ctx,
  1006  		req.Keyspace,
  1007  		req.Shard,
  1008  		reparentutil.EmergencyReparentOptions{
  1009  			NewPrimaryAlias:           req.NewPrimary,
  1010  			IgnoreReplicas:            sets.New[string](ignoreReplicaAliases...),
  1011  			WaitReplicasTimeout:       waitReplicasTimeout,
  1012  			PreventCrossCellPromotion: req.PreventCrossCellPromotion,
  1013  		},
  1014  	)
  1015  
  1016  	resp = &vtctldatapb.EmergencyReparentShardResponse{
  1017  		Keyspace: req.Keyspace,
  1018  		Shard:    req.Shard,
  1019  	}
  1020  
  1021  	if ev != nil {
  1022  		resp.Keyspace = ev.ShardInfo.Keyspace()
  1023  		resp.Shard = ev.ShardInfo.ShardName()
  1024  
  1025  		if ev.NewPrimary != nil && !topoproto.TabletAliasIsZero(ev.NewPrimary.Alias) {
  1026  			resp.PromotedPrimary = ev.NewPrimary.Alias
  1027  		}
  1028  	}
  1029  
  1030  	m.RLock()
  1031  	defer m.RUnlock()
  1032  
  1033  	resp.Events = make([]*logutilpb.Event, len(logstream))
  1034  	copy(resp.Events, logstream)
  1035  
  1036  	return resp, err
  1037  }
  1038  
  1039  // ExecuteFetchAsApp is part of the vtctlservicepb.VtctldServer interface.
  1040  func (s *VtctldServer) ExecuteFetchAsApp(ctx context.Context, req *vtctldatapb.ExecuteFetchAsAppRequest) (resp *vtctldatapb.ExecuteFetchAsAppResponse, err error) {
  1041  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ExecuteFetchAsApp")
  1042  	defer span.Finish()
  1043  
  1044  	defer panicHandler(&err)
  1045  
  1046  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  1047  	span.Annotate("max_rows", req.MaxRows)
  1048  	span.Annotate("use_pool", req.UsePool)
  1049  
  1050  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
  1051  	if err != nil {
  1052  		return nil, err
  1053  	}
  1054  
  1055  	qr, err := s.tmc.ExecuteFetchAsApp(ctx, ti.Tablet, req.UsePool, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{
  1056  		Query:   []byte(req.Query),
  1057  		MaxRows: uint64(req.MaxRows),
  1058  	})
  1059  	if err != nil {
  1060  		return nil, err
  1061  	}
  1062  
  1063  	return &vtctldatapb.ExecuteFetchAsAppResponse{Result: qr}, nil
  1064  }
  1065  
  1066  // ExecuteFetchAsDBA is part of the vtctlservicepb.VtctldServer interface.
  1067  func (s *VtctldServer) ExecuteFetchAsDBA(ctx context.Context, req *vtctldatapb.ExecuteFetchAsDBARequest) (resp *vtctldatapb.ExecuteFetchAsDBAResponse, err error) {
  1068  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ExecuteFetchAsDBA")
  1069  	defer span.Finish()
  1070  
  1071  	defer panicHandler(&err)
  1072  
  1073  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  1074  	span.Annotate("max_rows", req.MaxRows)
  1075  	span.Annotate("disable_binlogs", req.DisableBinlogs)
  1076  	span.Annotate("reload_schema", req.ReloadSchema)
  1077  
  1078  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
  1079  	if err != nil {
  1080  		return nil, err
  1081  	}
  1082  
  1083  	qr, err := s.tmc.ExecuteFetchAsDba(ctx, ti.Tablet, false, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{
  1084  		Query:          []byte(req.Query),
  1085  		MaxRows:        uint64(req.MaxRows),
  1086  		DisableBinlogs: req.DisableBinlogs,
  1087  		ReloadSchema:   req.ReloadSchema,
  1088  	})
  1089  	if err != nil {
  1090  		return nil, err
  1091  	}
  1092  
  1093  	return &vtctldatapb.ExecuteFetchAsDBAResponse{Result: qr}, nil
  1094  }
  1095  
  1096  // ExecuteHook is part of the vtctlservicepb.VtctldServer interface.
  1097  func (s *VtctldServer) ExecuteHook(ctx context.Context, req *vtctldatapb.ExecuteHookRequest) (resp *vtctldatapb.ExecuteHookResponse, err error) {
  1098  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ExecuteHook")
  1099  	defer span.Finish()
  1100  
  1101  	defer panicHandler(&err)
  1102  
  1103  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  1104  
  1105  	if req.TabletHookRequest == nil {
  1106  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "TabletHookRequest cannot be nil")
  1107  		return nil, err
  1108  	}
  1109  
  1110  	span.Annotate("hook_name", req.TabletHookRequest.Name)
  1111  
  1112  	if strings.Contains(req.TabletHookRequest.Name, "/") {
  1113  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "hook name cannot contain a '/'; was %v", req.TabletHookRequest.Name)
  1114  		return nil, err
  1115  	}
  1116  
  1117  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
  1118  	if err != nil {
  1119  		return nil, err
  1120  	}
  1121  
  1122  	hook := hk.NewHookWithEnv(req.TabletHookRequest.Name, req.TabletHookRequest.Parameters, req.TabletHookRequest.ExtraEnv)
  1123  	hr, err := s.tmc.ExecuteHook(ctx, ti.Tablet, hook)
  1124  	if err != nil {
  1125  		return nil, err
  1126  	}
  1127  
  1128  	return &vtctldatapb.ExecuteHookResponse{HookResult: &tabletmanagerdatapb.ExecuteHookResponse{
  1129  		ExitStatus: int64(hr.ExitStatus),
  1130  		Stdout:     hr.Stdout,
  1131  		Stderr:     hr.Stderr,
  1132  	}}, nil
  1133  }
  1134  
  1135  // FindAllShardsInKeyspace is part of the vtctlservicepb.VtctldServer interface.
  1136  func (s *VtctldServer) FindAllShardsInKeyspace(ctx context.Context, req *vtctldatapb.FindAllShardsInKeyspaceRequest) (resp *vtctldatapb.FindAllShardsInKeyspaceResponse, err error) {
  1137  	span, ctx := trace.NewSpan(ctx, "VtctldServer.FindAllShardsInKeyspace")
  1138  	defer span.Finish()
  1139  
  1140  	defer panicHandler(&err)
  1141  
  1142  	span.Annotate("keyspace", req.Keyspace)
  1143  
  1144  	result, err := s.ts.FindAllShardsInKeyspace(ctx, req.Keyspace)
  1145  	if err != nil {
  1146  		return nil, err
  1147  	}
  1148  
  1149  	shards := map[string]*vtctldatapb.Shard{}
  1150  	for _, shard := range result {
  1151  		shards[shard.ShardName()] = &vtctldatapb.Shard{
  1152  			Keyspace: req.Keyspace,
  1153  			Name:     shard.ShardName(),
  1154  			Shard:    shard.Shard,
  1155  		}
  1156  	}
  1157  
  1158  	return &vtctldatapb.FindAllShardsInKeyspaceResponse{
  1159  		Shards: shards,
  1160  	}, nil
  1161  }
  1162  
  1163  // GetBackups is part of the vtctldservicepb.VtctldServer interface.
  1164  func (s *VtctldServer) GetBackups(ctx context.Context, req *vtctldatapb.GetBackupsRequest) (resp *vtctldatapb.GetBackupsResponse, err error) {
  1165  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetBackups")
  1166  	defer span.Finish()
  1167  
  1168  	defer panicHandler(&err)
  1169  
  1170  	span.Annotate("keyspace", req.Keyspace)
  1171  	span.Annotate("shard", req.Shard)
  1172  	span.Annotate("limit", req.Limit)
  1173  	span.Annotate("detailed", req.Detailed)
  1174  	span.Annotate("detailed_limit", req.DetailedLimit)
  1175  
  1176  	bs, err := backupstorage.GetBackupStorage()
  1177  	if err != nil {
  1178  		return nil, err
  1179  	}
  1180  	defer bs.Close()
  1181  
  1182  	bucket := filepath.Join(req.Keyspace, req.Shard)
  1183  	span.Annotate("backup_path", bucket)
  1184  
  1185  	bhs, err := bs.ListBackups(ctx, bucket)
  1186  	if err != nil {
  1187  		return nil, err
  1188  	}
  1189  
  1190  	totalBackups := len(bhs)
  1191  	if req.Limit > 0 {
  1192  		totalBackups = int(req.Limit)
  1193  	}
  1194  
  1195  	totalDetailedBackups := len(bhs)
  1196  	if req.DetailedLimit > 0 {
  1197  		totalDetailedBackups = int(req.DetailedLimit)
  1198  	}
  1199  
  1200  	backups := make([]*mysqlctlpb.BackupInfo, 0, totalBackups)
  1201  	backupsToSkip := len(bhs) - totalBackups
  1202  	backupsToSkipDetails := len(bhs) - totalDetailedBackups
  1203  
  1204  	for i, bh := range bhs {
  1205  		if i < backupsToSkip {
  1206  			continue
  1207  		}
  1208  
  1209  		bi := mysqlctlproto.BackupHandleToProto(bh)
  1210  		bi.Keyspace = req.Keyspace
  1211  		bi.Shard = req.Shard
  1212  
  1213  		if req.Detailed {
  1214  			if i >= backupsToSkipDetails { // nolint:staticcheck
  1215  				// (TODO:@ajm188) Update backupengine/backupstorage implementations
  1216  				// to get Status info for backups.
  1217  			}
  1218  		}
  1219  
  1220  		backups = append(backups, bi)
  1221  	}
  1222  
  1223  	return &vtctldatapb.GetBackupsResponse{
  1224  		Backups: backups,
  1225  	}, nil
  1226  }
  1227  
  1228  // GetCellInfoNames is part of the vtctlservicepb.VtctldServer interface.
  1229  func (s *VtctldServer) GetCellInfoNames(ctx context.Context, req *vtctldatapb.GetCellInfoNamesRequest) (resp *vtctldatapb.GetCellInfoNamesResponse, err error) {
  1230  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetCellInfoNames")
  1231  	defer span.Finish()
  1232  
  1233  	defer panicHandler(&err)
  1234  
  1235  	names, err := s.ts.GetCellInfoNames(ctx)
  1236  	if err != nil {
  1237  		return nil, err
  1238  	}
  1239  
  1240  	return &vtctldatapb.GetCellInfoNamesResponse{Names: names}, nil
  1241  }
  1242  
  1243  // GetCellInfo is part of the vtctlservicepb.VtctldServer interface.
  1244  func (s *VtctldServer) GetCellInfo(ctx context.Context, req *vtctldatapb.GetCellInfoRequest) (resp *vtctldatapb.GetCellInfoResponse, err error) {
  1245  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetCellInfo")
  1246  	defer span.Finish()
  1247  
  1248  	defer panicHandler(&err)
  1249  
  1250  	if req.Cell == "" {
  1251  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "cell field is required")
  1252  		return nil, err
  1253  	}
  1254  
  1255  	span.Annotate("cell", req.Cell)
  1256  
  1257  	// We use a strong read, because users using this command want the latest
  1258  	// data, and this is user-generated, not used in any automated process.
  1259  	strongRead := true
  1260  	ci, err := s.ts.GetCellInfo(ctx, req.Cell, strongRead)
  1261  	if err != nil {
  1262  		return nil, err
  1263  	}
  1264  
  1265  	return &vtctldatapb.GetCellInfoResponse{CellInfo: ci}, nil
  1266  }
  1267  
  1268  // GetCellsAliases is part of the vtctlservicepb.VtctldServer interface.
  1269  func (s *VtctldServer) GetCellsAliases(ctx context.Context, req *vtctldatapb.GetCellsAliasesRequest) (resp *vtctldatapb.GetCellsAliasesResponse, err error) {
  1270  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetCellsAliases")
  1271  	defer span.Finish()
  1272  
  1273  	defer panicHandler(&err)
  1274  
  1275  	strongRead := true
  1276  	aliases, err := s.ts.GetCellsAliases(ctx, strongRead)
  1277  	if err != nil {
  1278  		return nil, err
  1279  	}
  1280  
  1281  	return &vtctldatapb.GetCellsAliasesResponse{Aliases: aliases}, nil
  1282  }
  1283  
  1284  // GetFullStatus is part of the vtctlservicepb.VtctldServer interface.
  1285  func (s *VtctldServer) GetFullStatus(ctx context.Context, req *vtctldatapb.GetFullStatusRequest) (resp *vtctldatapb.GetFullStatusResponse, err error) {
  1286  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetFullStatus")
  1287  	defer span.Finish()
  1288  
  1289  	defer panicHandler(&err)
  1290  
  1291  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  1292  
  1293  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
  1294  	if err != nil {
  1295  		return nil, err
  1296  	}
  1297  
  1298  	res, err := s.tmc.FullStatus(ctx, ti.Tablet)
  1299  	if err != nil {
  1300  		return nil, err
  1301  	}
  1302  
  1303  	return &vtctldatapb.GetFullStatusResponse{
  1304  		Status: res,
  1305  	}, nil
  1306  }
  1307  
  1308  // GetKeyspace is part of the vtctlservicepb.VtctldServer interface.
  1309  func (s *VtctldServer) GetKeyspace(ctx context.Context, req *vtctldatapb.GetKeyspaceRequest) (resp *vtctldatapb.GetKeyspaceResponse, err error) {
  1310  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetKeyspace")
  1311  	defer span.Finish()
  1312  
  1313  	defer panicHandler(&err)
  1314  
  1315  	span.Annotate("keyspace", req.Keyspace)
  1316  
  1317  	keyspace, err := s.ts.GetKeyspace(ctx, req.Keyspace)
  1318  	if err != nil {
  1319  		return nil, err
  1320  	}
  1321  
  1322  	return &vtctldatapb.GetKeyspaceResponse{
  1323  		Keyspace: &vtctldatapb.Keyspace{
  1324  			Name:     req.Keyspace,
  1325  			Keyspace: keyspace.Keyspace,
  1326  		},
  1327  	}, nil
  1328  }
  1329  
  1330  // GetKeyspaces is part of the vtctlservicepb.VtctldServer interface.
  1331  func (s *VtctldServer) GetKeyspaces(ctx context.Context, req *vtctldatapb.GetKeyspacesRequest) (resp *vtctldatapb.GetKeyspacesResponse, err error) {
  1332  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetKeyspaces")
  1333  	defer span.Finish()
  1334  
  1335  	defer panicHandler(&err)
  1336  
  1337  	names, err := s.ts.GetKeyspaces(ctx)
  1338  	if err != nil {
  1339  		return nil, err
  1340  	}
  1341  
  1342  	keyspaces := make([]*vtctldatapb.Keyspace, len(names))
  1343  
  1344  	for i, name := range names {
  1345  		ks, err2 := s.GetKeyspace(ctx, &vtctldatapb.GetKeyspaceRequest{Keyspace: name})
  1346  		if err2 != nil {
  1347  			err = err2
  1348  			return nil, err
  1349  		}
  1350  
  1351  		keyspaces[i] = ks.Keyspace
  1352  	}
  1353  
  1354  	return &vtctldatapb.GetKeyspacesResponse{Keyspaces: keyspaces}, nil
  1355  }
  1356  
  1357  // GetPermissions is part of the vtctlservicepb.VtctldServer interface.
  1358  func (s *VtctldServer) GetPermissions(ctx context.Context, req *vtctldatapb.GetPermissionsRequest) (resp *vtctldatapb.GetPermissionsResponse, err error) {
  1359  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetPermissions")
  1360  	defer span.Finish()
  1361  
  1362  	defer panicHandler(&err)
  1363  
  1364  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  1365  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
  1366  	if err != nil {
  1367  		err = vterrors.Errorf(vtrpc.Code_NOT_FOUND, "Failed to get tablet %v: %v", req.TabletAlias, err)
  1368  		return nil, err
  1369  	}
  1370  
  1371  	p, err := s.tmc.GetPermissions(ctx, ti.Tablet)
  1372  	if err != nil {
  1373  		return nil, err
  1374  	}
  1375  
  1376  	return &vtctldatapb.GetPermissionsResponse{
  1377  		Permissions: p,
  1378  	}, nil
  1379  }
  1380  
  1381  // GetRoutingRules is part of the vtctlservicepb.VtctldServer interface.
  1382  func (s *VtctldServer) GetRoutingRules(ctx context.Context, req *vtctldatapb.GetRoutingRulesRequest) (resp *vtctldatapb.GetRoutingRulesResponse, err error) {
  1383  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetRoutingRules")
  1384  	defer span.Finish()
  1385  
  1386  	defer panicHandler(&err)
  1387  
  1388  	rr, err := s.ts.GetRoutingRules(ctx)
  1389  	if err != nil {
  1390  		return nil, err
  1391  	}
  1392  
  1393  	return &vtctldatapb.GetRoutingRulesResponse{
  1394  		RoutingRules: rr,
  1395  	}, nil
  1396  }
  1397  
  1398  // GetShardRoutingRules is part of the vtctlservicepb.VtctldServer interface.
  1399  func (s *VtctldServer) GetShardRoutingRules(ctx context.Context, req *vtctldatapb.GetShardRoutingRulesRequest) (*vtctldatapb.GetShardRoutingRulesResponse, error) {
  1400  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetShardRoutingRules")
  1401  	defer span.Finish()
  1402  
  1403  	srr, err := s.ts.GetShardRoutingRules(ctx)
  1404  	if err != nil {
  1405  		return nil, err
  1406  	}
  1407  
  1408  	return &vtctldatapb.GetShardRoutingRulesResponse{
  1409  		ShardRoutingRules: srr,
  1410  	}, nil
  1411  }
  1412  
  1413  // GetSchema is part of the vtctlservicepb.VtctldServer interface.
  1414  func (s *VtctldServer) GetSchema(ctx context.Context, req *vtctldatapb.GetSchemaRequest) (resp *vtctldatapb.GetSchemaResponse, err error) {
  1415  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSchema")
  1416  	defer span.Finish()
  1417  
  1418  	defer panicHandler(&err)
  1419  
  1420  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  1421  	span.Annotate("tables", strings.Join(req.Tables, ","))
  1422  	span.Annotate("exclude_tables", strings.Join(req.ExcludeTables, ","))
  1423  	span.Annotate("include_views", req.IncludeViews)
  1424  	span.Annotate("table_names_only", req.TableNamesOnly)
  1425  	span.Annotate("table_sizes_only", req.TableSizesOnly)
  1426  	span.Annotate("table_schema_only", req.TableSchemaOnly)
  1427  
  1428  	r := &tabletmanagerdatapb.GetSchemaRequest{Tables: req.Tables, ExcludeTables: req.ExcludeTables, IncludeViews: req.IncludeViews, TableSchemaOnly: req.TableSchemaOnly}
  1429  	sd, err := schematools.GetSchema(ctx, s.ts, s.tmc, req.TabletAlias, r)
  1430  	if err != nil {
  1431  		return nil, err
  1432  	}
  1433  
  1434  	if req.TableNamesOnly {
  1435  		nameTds := make([]*tabletmanagerdatapb.TableDefinition, len(sd.TableDefinitions))
  1436  
  1437  		for i, td := range sd.TableDefinitions {
  1438  			nameTds[i] = &tabletmanagerdatapb.TableDefinition{
  1439  				Name: td.Name,
  1440  			}
  1441  		}
  1442  
  1443  		sd.TableDefinitions = nameTds
  1444  	} else if req.TableSizesOnly {
  1445  		sizeTds := make([]*tabletmanagerdatapb.TableDefinition, len(sd.TableDefinitions))
  1446  
  1447  		for i, td := range sd.TableDefinitions {
  1448  			sizeTds[i] = &tabletmanagerdatapb.TableDefinition{
  1449  				Name:       td.Name,
  1450  				Type:       td.Type,
  1451  				RowCount:   td.RowCount,
  1452  				DataLength: td.DataLength,
  1453  			}
  1454  		}
  1455  
  1456  		sd.TableDefinitions = sizeTds
  1457  	}
  1458  
  1459  	return &vtctldatapb.GetSchemaResponse{
  1460  		Schema: sd,
  1461  	}, nil
  1462  }
  1463  
  1464  // GetShard is part of the vtctlservicepb.VtctldServer interface.
  1465  func (s *VtctldServer) GetShard(ctx context.Context, req *vtctldatapb.GetShardRequest) (resp *vtctldatapb.GetShardResponse, err error) {
  1466  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetShard")
  1467  	defer span.Finish()
  1468  
  1469  	defer panicHandler(&err)
  1470  
  1471  	span.Annotate("keyspace", req.Keyspace)
  1472  	span.Annotate("shard", req.ShardName)
  1473  
  1474  	shard, err := s.ts.GetShard(ctx, req.Keyspace, req.ShardName)
  1475  	if err != nil {
  1476  		return nil, err
  1477  	}
  1478  
  1479  	return &vtctldatapb.GetShardResponse{
  1480  		Shard: &vtctldatapb.Shard{
  1481  			Keyspace: req.Keyspace,
  1482  			Name:     req.ShardName,
  1483  			Shard:    shard.Shard,
  1484  		},
  1485  	}, nil
  1486  }
  1487  
  1488  // GetSrvKeyspaceNames is part of the vtctlservicepb.VtctldServer interface.
  1489  func (s *VtctldServer) GetSrvKeyspaceNames(ctx context.Context, req *vtctldatapb.GetSrvKeyspaceNamesRequest) (resp *vtctldatapb.GetSrvKeyspaceNamesResponse, err error) {
  1490  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSrvKeyspaceNames")
  1491  	defer span.Finish()
  1492  
  1493  	defer panicHandler(&err)
  1494  
  1495  	cells := req.Cells
  1496  	if len(cells) == 0 {
  1497  		ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  1498  		defer cancel()
  1499  
  1500  		cells, err = s.ts.GetCellInfoNames(ctx)
  1501  		if err != nil {
  1502  			return nil, err
  1503  		}
  1504  	}
  1505  
  1506  	namesByCell := make(map[string]*vtctldatapb.GetSrvKeyspaceNamesResponse_NameList, len(cells))
  1507  
  1508  	// Contact each cell sequentially, each cell is bounded by *topo.RemoteOperationTimeout.
  1509  	// Total runtime is O(len(cells) * topo.RemoteOperationTimeout).
  1510  	for _, cell := range cells {
  1511  		ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  1512  		names, err2 := s.ts.GetSrvKeyspaceNames(ctx, cell)
  1513  		if err2 != nil {
  1514  			cancel()
  1515  			err = err2
  1516  			return nil, err
  1517  		}
  1518  
  1519  		cancel()
  1520  		namesByCell[cell] = &vtctldatapb.GetSrvKeyspaceNamesResponse_NameList{Names: names}
  1521  	}
  1522  
  1523  	return &vtctldatapb.GetSrvKeyspaceNamesResponse{
  1524  		Names: namesByCell,
  1525  	}, nil
  1526  }
  1527  
  1528  // GetSrvKeyspaces is part of the vtctlservicepb.VtctldServer interface.
  1529  func (s *VtctldServer) GetSrvKeyspaces(ctx context.Context, req *vtctldatapb.GetSrvKeyspacesRequest) (resp *vtctldatapb.GetSrvKeyspacesResponse, err error) {
  1530  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSrvKeyspaces")
  1531  	defer span.Finish()
  1532  
  1533  	defer panicHandler(&err)
  1534  
  1535  	cells := req.Cells
  1536  
  1537  	if len(cells) == 0 {
  1538  		cells, err = s.ts.GetCellInfoNames(ctx)
  1539  		if err != nil {
  1540  			return nil, err
  1541  		}
  1542  	}
  1543  
  1544  	span.Annotate("cells", strings.Join(cells, ","))
  1545  
  1546  	srvKeyspaces := make(map[string]*topodatapb.SrvKeyspace, len(cells))
  1547  
  1548  	for _, cell := range cells {
  1549  		var srvKeyspace *topodatapb.SrvKeyspace
  1550  		srvKeyspace, err = s.ts.GetSrvKeyspace(ctx, cell, req.Keyspace)
  1551  
  1552  		if err != nil {
  1553  			if !topo.IsErrType(err, topo.NoNode) {
  1554  				return nil, err
  1555  			}
  1556  
  1557  			log.Warningf("no srvkeyspace for keyspace %s in cell %s", req.Keyspace, cell)
  1558  
  1559  			srvKeyspace = nil
  1560  		}
  1561  
  1562  		srvKeyspaces[cell] = srvKeyspace
  1563  	}
  1564  
  1565  	return &vtctldatapb.GetSrvKeyspacesResponse{
  1566  		SrvKeyspaces: srvKeyspaces,
  1567  	}, nil
  1568  }
  1569  
  1570  // UpdateThrottlerConfig updates throttler config for all cells
  1571  func (s *VtctldServer) UpdateThrottlerConfig(ctx context.Context, req *vtctldatapb.UpdateThrottlerConfigRequest) (resp *vtctldatapb.UpdateThrottlerConfigResponse, err error) {
  1572  	span, ctx := trace.NewSpan(ctx, "VtctldServer.UpdateThrottlerConfig")
  1573  	defer span.Finish()
  1574  
  1575  	defer panicHandler(&err)
  1576  
  1577  	if req.Enable && req.Disable {
  1578  		return nil, fmt.Errorf("--enable and --disable are mutually exclusive")
  1579  	}
  1580  	if req.CheckAsCheckSelf && req.CheckAsCheckShard {
  1581  		return nil, fmt.Errorf("--check-as-check-self and --check-as-check-shard are mutually exclusive")
  1582  	}
  1583  
  1584  	update := func(throttlerConfig *topodatapb.ThrottlerConfig) *topodatapb.ThrottlerConfig {
  1585  		if throttlerConfig == nil {
  1586  			throttlerConfig = &topodatapb.ThrottlerConfig{}
  1587  		}
  1588  		if req.CustomQuerySet {
  1589  			// custom query provided
  1590  			throttlerConfig.CustomQuery = req.CustomQuery
  1591  			throttlerConfig.Threshold = req.Threshold // allowed to be zero/negative because who knows what kind of custom query this is
  1592  		} else {
  1593  			// no custom query, throttler works by querying replication lag. We only allow positive values
  1594  			if req.Threshold > 0 {
  1595  				throttlerConfig.Threshold = req.Threshold
  1596  			}
  1597  		}
  1598  		if req.Enable {
  1599  			throttlerConfig.Enabled = true
  1600  		}
  1601  		if req.Disable {
  1602  			throttlerConfig.Enabled = false
  1603  		}
  1604  		if req.CheckAsCheckSelf {
  1605  			throttlerConfig.CheckAsCheckSelf = true
  1606  		}
  1607  		if req.CheckAsCheckShard {
  1608  			throttlerConfig.CheckAsCheckSelf = false
  1609  		}
  1610  		return throttlerConfig
  1611  	}
  1612  
  1613  	ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, "UpdateThrottlerConfig")
  1614  	if lockErr != nil {
  1615  		return nil, lockErr
  1616  	}
  1617  	defer unlock(&err)
  1618  
  1619  	ki, err := s.ts.GetKeyspace(ctx, req.Keyspace)
  1620  	if err != nil {
  1621  		return nil, err
  1622  	}
  1623  
  1624  	ki.ThrottlerConfig = update(ki.ThrottlerConfig)
  1625  
  1626  	err = s.ts.UpdateKeyspace(ctx, ki)
  1627  	if err != nil {
  1628  		return nil, err
  1629  	}
  1630  
  1631  	_, err = s.ts.UpdateSrvKeyspaceThrottlerConfig(ctx, req.Keyspace, []string{}, update)
  1632  
  1633  	return &vtctldatapb.UpdateThrottlerConfigResponse{}, err
  1634  }
  1635  
  1636  // GetSrvVSchema is part of the vtctlservicepb.VtctldServer interface.
  1637  func (s *VtctldServer) GetSrvVSchema(ctx context.Context, req *vtctldatapb.GetSrvVSchemaRequest) (resp *vtctldatapb.GetSrvVSchemaResponse, err error) {
  1638  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSrvVSchema")
  1639  	defer span.Finish()
  1640  
  1641  	defer panicHandler(&err)
  1642  
  1643  	span.Annotate("cell", req.Cell)
  1644  
  1645  	vschema, err := s.ts.GetSrvVSchema(ctx, req.Cell)
  1646  	if err != nil {
  1647  		return nil, err
  1648  	}
  1649  
  1650  	return &vtctldatapb.GetSrvVSchemaResponse{
  1651  		SrvVSchema: vschema,
  1652  	}, nil
  1653  }
  1654  
  1655  // GetSrvVSchemas is part of the vtctlservicepb.VtctldServer interface.
  1656  func (s *VtctldServer) GetSrvVSchemas(ctx context.Context, req *vtctldatapb.GetSrvVSchemasRequest) (resp *vtctldatapb.GetSrvVSchemasResponse, err error) {
  1657  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetSrvVSchemas")
  1658  	defer span.Finish()
  1659  
  1660  	defer panicHandler(&err)
  1661  
  1662  	allCells, err := s.ts.GetCellInfoNames(ctx)
  1663  	if err != nil {
  1664  		return nil, err
  1665  	}
  1666  
  1667  	cells := allCells
  1668  
  1669  	// Omit any cell names in the request that don't map to existing cells
  1670  	if len(req.Cells) > 0 {
  1671  		s1 := sets.New[string](allCells...)
  1672  		s2 := sets.New[string](req.Cells...)
  1673  
  1674  		cells = sets.List(s1.Intersection(s2))
  1675  	}
  1676  
  1677  	span.Annotate("cells", strings.Join(cells, ","))
  1678  	svs := make(map[string]*vschemapb.SrvVSchema, len(cells))
  1679  
  1680  	for _, cell := range cells {
  1681  		var sv *vschemapb.SrvVSchema
  1682  		sv, err = s.ts.GetSrvVSchema(ctx, cell)
  1683  
  1684  		if err != nil {
  1685  			if !topo.IsErrType(err, topo.NoNode) {
  1686  				return nil, err
  1687  			}
  1688  
  1689  			log.Warningf("no SrvVSchema for cell %s", cell)
  1690  			sv = nil
  1691  		}
  1692  
  1693  		svs[cell] = sv
  1694  	}
  1695  
  1696  	return &vtctldatapb.GetSrvVSchemasResponse{
  1697  		SrvVSchemas: svs,
  1698  	}, nil
  1699  }
  1700  
  1701  // GetTablet is part of the vtctlservicepb.VtctldServer interface.
  1702  func (s *VtctldServer) GetTablet(ctx context.Context, req *vtctldatapb.GetTabletRequest) (resp *vtctldatapb.GetTabletResponse, err error) {
  1703  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetTablet")
  1704  	defer span.Finish()
  1705  
  1706  	defer panicHandler(&err)
  1707  
  1708  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  1709  
  1710  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
  1711  	if err != nil {
  1712  		return nil, err
  1713  	}
  1714  
  1715  	return &vtctldatapb.GetTabletResponse{
  1716  		Tablet: ti.Tablet,
  1717  	}, nil
  1718  }
  1719  
  1720  // GetTablets is part of the vtctlservicepb.VtctldServer interface.
  1721  func (s *VtctldServer) GetTablets(ctx context.Context, req *vtctldatapb.GetTabletsRequest) (resp *vtctldatapb.GetTabletsResponse, err error) {
  1722  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetTablets")
  1723  	defer span.Finish()
  1724  
  1725  	defer panicHandler(&err)
  1726  
  1727  	span.Annotate("cells", strings.Join(req.Cells, ","))
  1728  	if req.TabletType != topodatapb.TabletType_UNKNOWN {
  1729  		span.Annotate("tablet_type", topodatapb.TabletType_name[int32(req.TabletType)])
  1730  	}
  1731  	span.Annotate("strict", req.Strict)
  1732  
  1733  	// It is possible that an old primary has not yet updated its type in the
  1734  	// topo. In that case, report its type as UNKNOWN. It used to be PRIMARY but
  1735  	// is no longer the serving primary.
  1736  	adjustTypeForStalePrimary := func(ti *topo.TabletInfo, mtst time.Time) {
  1737  		if ti.Type == topodatapb.TabletType_PRIMARY && ti.GetPrimaryTermStartTime().Before(mtst) {
  1738  			ti.Tablet.Type = topodatapb.TabletType_UNKNOWN
  1739  		}
  1740  	}
  1741  
  1742  	// Create a context for our per-cell RPCs, with a timeout upper-bounded at
  1743  	// the RemoteOperationTimeout.
  1744  	//
  1745  	// Per-cell goroutines may also cancel this context if they fail and the
  1746  	// request specified Strict=true to allow us to fail faster.
  1747  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  1748  	defer cancel()
  1749  
  1750  	var tabletMap map[string]*topo.TabletInfo
  1751  
  1752  	switch {
  1753  	case len(req.TabletAliases) > 0:
  1754  		span.Annotate("tablet_aliases", strings.Join(topoproto.TabletAliasList(req.TabletAliases).ToStringSlice(), ","))
  1755  
  1756  		tabletMap, err = s.ts.GetTabletMap(ctx, req.TabletAliases)
  1757  		if err != nil {
  1758  			err = fmt.Errorf("GetTabletMap(%v) failed: %w", req.TabletAliases, err)
  1759  		}
  1760  	case req.Keyspace != "" && req.Shard != "":
  1761  		span.Annotate("keyspace", req.Keyspace)
  1762  		span.Annotate("shard", req.Shard)
  1763  
  1764  		tabletMap, err = s.ts.GetTabletMapForShard(ctx, req.Keyspace, req.Shard)
  1765  		if err != nil {
  1766  			err = fmt.Errorf("GetTabletMapForShard(%s, %s) failed: %w", req.Keyspace, req.Shard, err)
  1767  		}
  1768  	default:
  1769  		// goto the req.Cells branch
  1770  		tabletMap = nil
  1771  	}
  1772  
  1773  	if err != nil {
  1774  		switch {
  1775  		case topo.IsErrType(err, topo.PartialResult):
  1776  			if req.Strict {
  1777  				return nil, err
  1778  			}
  1779  
  1780  			log.Warningf("GetTablets encountered non-fatal error %s; continuing because Strict=false", err)
  1781  		default:
  1782  			return nil, err
  1783  		}
  1784  	}
  1785  
  1786  	if tabletMap != nil {
  1787  		var truePrimaryTimestamp time.Time
  1788  		for _, ti := range tabletMap {
  1789  			if ti.Type == topodatapb.TabletType_PRIMARY {
  1790  				primaryTimestamp := ti.GetPrimaryTermStartTime()
  1791  				if primaryTimestamp.After(truePrimaryTimestamp) {
  1792  					truePrimaryTimestamp = primaryTimestamp
  1793  				}
  1794  			}
  1795  		}
  1796  
  1797  		tablets := make([]*topodatapb.Tablet, 0, len(tabletMap))
  1798  		for _, ti := range tabletMap {
  1799  			adjustTypeForStalePrimary(ti, truePrimaryTimestamp)
  1800  			tablets = append(tablets, ti.Tablet)
  1801  		}
  1802  
  1803  		return &vtctldatapb.GetTabletsResponse{Tablets: tablets}, nil
  1804  	}
  1805  
  1806  	cells := req.Cells
  1807  	if len(cells) == 0 {
  1808  		var c []string
  1809  		c, err = s.ts.GetKnownCells(ctx)
  1810  		if err != nil {
  1811  			return nil, err
  1812  		}
  1813  
  1814  		cells = c
  1815  	}
  1816  
  1817  	var (
  1818  		m          sync.Mutex
  1819  		wg         sync.WaitGroup
  1820  		rec        concurrency.AllErrorRecorder
  1821  		allTablets []*topo.TabletInfo
  1822  	)
  1823  
  1824  	for _, cell := range cells {
  1825  		wg.Add(1)
  1826  
  1827  		go func(cell string) {
  1828  			defer wg.Done()
  1829  
  1830  			tablets, err := s.ts.GetTabletsByCell(ctx, cell)
  1831  			if err != nil {
  1832  				if req.Strict {
  1833  					log.Infof("GetTablets got an error from cell %s: %s. Running in strict mode, so canceling other cell RPCs", cell, err)
  1834  					cancel()
  1835  				}
  1836  				rec.RecordError(fmt.Errorf("GetTabletsByCell(%s) failed: %w", cell, err))
  1837  				return
  1838  			}
  1839  
  1840  			m.Lock()
  1841  			defer m.Unlock()
  1842  			allTablets = append(allTablets, tablets...)
  1843  		}(cell)
  1844  	}
  1845  
  1846  	wg.Wait()
  1847  
  1848  	if rec.HasErrors() {
  1849  		if req.Strict || len(rec.Errors) == len(cells) {
  1850  			err = rec.Error()
  1851  			return nil, err
  1852  		}
  1853  	}
  1854  
  1855  	// Collect true primary term start times, and optionally filter out any
  1856  	// tablets by keyspace according to the request.
  1857  	PrimaryTermStartTimes := map[string]time.Time{}
  1858  	filteredTablets := make([]*topo.TabletInfo, 0, len(allTablets))
  1859  
  1860  	for _, tablet := range allTablets {
  1861  		if req.Keyspace != "" && tablet.Keyspace != req.Keyspace {
  1862  			continue
  1863  		}
  1864  		if req.TabletType != 0 && tablet.Type != req.TabletType {
  1865  			continue
  1866  		}
  1867  
  1868  		key := tablet.Keyspace + "." + tablet.Shard
  1869  		if v, ok := PrimaryTermStartTimes[key]; ok {
  1870  			if tablet.GetPrimaryTermStartTime().After(v) {
  1871  				PrimaryTermStartTimes[key] = tablet.GetPrimaryTermStartTime()
  1872  			}
  1873  		} else {
  1874  			PrimaryTermStartTimes[key] = tablet.GetPrimaryTermStartTime()
  1875  		}
  1876  
  1877  		filteredTablets = append(filteredTablets, tablet)
  1878  	}
  1879  
  1880  	adjustedTablets := make([]*topodatapb.Tablet, len(filteredTablets))
  1881  
  1882  	// collect the tablets with adjusted primary term start times. they've
  1883  	// already been filtered by the above loop, so no keyspace filtering
  1884  	// here.
  1885  	for i, ti := range filteredTablets {
  1886  		key := ti.Keyspace + "." + ti.Shard
  1887  		adjustTypeForStalePrimary(ti, PrimaryTermStartTimes[key])
  1888  
  1889  		adjustedTablets[i] = ti.Tablet
  1890  	}
  1891  
  1892  	return &vtctldatapb.GetTabletsResponse{
  1893  		Tablets: adjustedTablets,
  1894  	}, nil
  1895  }
  1896  
  1897  // GetTopologyPath is part of the vtctlservicepb.VtctldServer interface.
  1898  // It returns the cell located at the provided path in the topology server.
  1899  func (s *VtctldServer) GetTopologyPath(ctx context.Context, req *vtctldatapb.GetTopologyPathRequest) (*vtctldatapb.GetTopologyPathResponse, error) {
  1900  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetTopology")
  1901  	defer span.Finish()
  1902  
  1903  	// handle toplevel display: global, then one line per cell.
  1904  	if req.Path == "/" {
  1905  		cells, err := s.ts.GetKnownCells(ctx)
  1906  		if err != nil {
  1907  			return nil, err
  1908  		}
  1909  		resp := vtctldatapb.GetTopologyPathResponse{
  1910  			Cell: &vtctldatapb.TopologyCell{
  1911  				Path: req.Path,
  1912  				// the toplevel display has no name, just children
  1913  				Children: append([]string{topo.GlobalCell}, cells...),
  1914  			},
  1915  		}
  1916  		return &resp, nil
  1917  	}
  1918  
  1919  	// otherwise, delegate to getTopologyCell to parse the path and return the cell there
  1920  	cell, err := s.getTopologyCell(ctx, req.Path)
  1921  	if err != nil {
  1922  		return nil, err
  1923  	}
  1924  
  1925  	return &vtctldatapb.GetTopologyPathResponse{
  1926  		Cell: cell,
  1927  	}, nil
  1928  }
  1929  
  1930  // GetVersion returns the version of a tablet from its debug vars
  1931  func (s *VtctldServer) GetVersion(ctx context.Context, req *vtctldatapb.GetVersionRequest) (resp *vtctldatapb.GetVersionResponse, err error) {
  1932  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetVersion")
  1933  	defer span.Finish()
  1934  
  1935  	defer panicHandler(&err)
  1936  
  1937  	tabletAlias := req.TabletAlias
  1938  	tablet, err := s.ts.GetTablet(ctx, tabletAlias)
  1939  	if err != nil {
  1940  		return nil, err
  1941  	}
  1942  
  1943  	version, err := GetVersionFunc()(tablet.Addr())
  1944  	if err != nil {
  1945  		return nil, err
  1946  	}
  1947  	log.Infof("Tablet %v is running version '%v'", topoproto.TabletAliasString(tabletAlias), version)
  1948  	return &vtctldatapb.GetVersionResponse{Version: version}, err
  1949  }
  1950  
  1951  // GetVSchema is part of the vtctlservicepb.VtctldServer interface.
  1952  func (s *VtctldServer) GetVSchema(ctx context.Context, req *vtctldatapb.GetVSchemaRequest) (resp *vtctldatapb.GetVSchemaResponse, err error) {
  1953  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetVSchema")
  1954  	defer span.Finish()
  1955  
  1956  	defer panicHandler(&err)
  1957  
  1958  	span.Annotate("keyspace", req.Keyspace)
  1959  
  1960  	vschema, err := s.ts.GetVSchema(ctx, req.Keyspace)
  1961  	if err != nil {
  1962  		return nil, err
  1963  	}
  1964  
  1965  	return &vtctldatapb.GetVSchemaResponse{
  1966  		VSchema: vschema,
  1967  	}, nil
  1968  }
  1969  
  1970  // GetWorkflows is part of the vtctlservicepb.VtctldServer interface.
  1971  func (s *VtctldServer) GetWorkflows(ctx context.Context, req *vtctldatapb.GetWorkflowsRequest) (resp *vtctldatapb.GetWorkflowsResponse, err error) {
  1972  	span, ctx := trace.NewSpan(ctx, "VtctldServer.GetWorkflows")
  1973  	defer span.Finish()
  1974  
  1975  	defer panicHandler(&err)
  1976  
  1977  	span.Annotate("keyspace", req.Keyspace)
  1978  	span.Annotate("active_only", req.ActiveOnly)
  1979  
  1980  	resp, err = s.ws.GetWorkflows(ctx, req)
  1981  	return resp, err
  1982  }
  1983  
  1984  // InitShardPrimary is part of the vtctlservicepb.VtctldServer interface.
  1985  func (s *VtctldServer) InitShardPrimary(ctx context.Context, req *vtctldatapb.InitShardPrimaryRequest) (resp *vtctldatapb.InitShardPrimaryResponse, err error) {
  1986  	span, ctx := trace.NewSpan(ctx, "VtctldServer.InitShardPrimary")
  1987  	defer span.Finish()
  1988  
  1989  	defer panicHandler(&err)
  1990  
  1991  	if req.Keyspace == "" {
  1992  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "keyspace field is required")
  1993  		return nil, err
  1994  	}
  1995  
  1996  	if req.Shard == "" {
  1997  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "shard field is required")
  1998  		return nil, err
  1999  	}
  2000  
  2001  	waitReplicasTimeout, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout)
  2002  	if err != nil {
  2003  		return nil, err
  2004  	} else if !ok {
  2005  		waitReplicasTimeout = time.Second * 30
  2006  	}
  2007  
  2008  	span.Annotate("keyspace", req.Keyspace)
  2009  	span.Annotate("shard", req.Shard)
  2010  	span.Annotate("wait_replicas_timeout_sec", waitReplicasTimeout.Seconds())
  2011  	span.Annotate("force", req.Force)
  2012  
  2013  	ctx, unlock, err := s.ts.LockShard(ctx, req.Keyspace, req.Shard, fmt.Sprintf("InitShardPrimary(%v)", topoproto.TabletAliasString(req.PrimaryElectTabletAlias)))
  2014  	if err != nil {
  2015  		return nil, err
  2016  	}
  2017  	defer unlock(&err)
  2018  
  2019  	m := sync.RWMutex{}
  2020  	ev := &events.Reparent{}
  2021  	logstream := []*logutilpb.Event{}
  2022  
  2023  	resp = &vtctldatapb.InitShardPrimaryResponse{}
  2024  	err = s.InitShardPrimaryLocked(ctx, ev, req, waitReplicasTimeout, s.tmc, logutil.NewCallbackLogger(func(e *logutilpb.Event) {
  2025  		m.Lock()
  2026  		defer m.Unlock()
  2027  
  2028  		logstream = append(logstream, e)
  2029  	}))
  2030  	if err != nil {
  2031  		event.DispatchUpdate(ev, "failed InitShardPrimary: "+err.Error())
  2032  	} else {
  2033  		event.DispatchUpdate(ev, "finished InitShardPrimary")
  2034  	}
  2035  
  2036  	m.RLock()
  2037  	defer m.RUnlock()
  2038  
  2039  	resp.Events = make([]*logutilpb.Event, len(logstream))
  2040  	copy(resp.Events, logstream)
  2041  
  2042  	return resp, err
  2043  }
  2044  
  2045  // InitShardPrimaryLocked is the main work of doing an InitShardPrimary. It
  2046  // should only called by callers that have already locked the shard in the topo.
  2047  // It is only public so that it can be used in wrangler and legacy vtctl server.
  2048  func (s *VtctldServer) InitShardPrimaryLocked(
  2049  	ctx context.Context,
  2050  	ev *events.Reparent,
  2051  	req *vtctldatapb.InitShardPrimaryRequest,
  2052  	waitReplicasTimeout time.Duration,
  2053  	tmc tmclient.TabletManagerClient,
  2054  	logger logutil.Logger,
  2055  ) error {
  2056  	// (TODO:@amason) The code below this point is a verbatim copy of
  2057  	// initShardMasterLocked in package wrangler, modulo the following:
  2058  	// - s/keyspace/req.Keyspace
  2059  	// - s/shard/req.Shard
  2060  	// - s/masterElectTabletAlias/req.PrimaryElectTabletAlias
  2061  	// - s/wr.logger/logger
  2062  	// - s/wr.tmc/tmc
  2063  	// - s/wr.ts/s.ts
  2064  	//
  2065  	// It is also sufficiently complex and critical code that I feel it's unwise
  2066  	// to port and refactor in one change; so, this comment serves both as an
  2067  	// acknowledgement of that, as well as a TODO marker for us to revisit this.
  2068  	shardInfo, err := s.ts.GetShard(ctx, req.Keyspace, req.Shard)
  2069  	if err != nil {
  2070  		return err
  2071  	}
  2072  	ev.ShardInfo = *shardInfo
  2073  
  2074  	durabilityName, err := s.ts.GetKeyspaceDurability(ctx, req.Keyspace)
  2075  	if err != nil {
  2076  		return err
  2077  	}
  2078  	log.Infof("Getting a new durability policy for %v", durabilityName)
  2079  	durability, err := reparentutil.GetDurabilityPolicy(durabilityName)
  2080  	if err != nil {
  2081  		return err
  2082  	}
  2083  
  2084  	event.DispatchUpdate(ev, "reading tablet map")
  2085  	tabletMap, err := s.ts.GetTabletMapForShard(ctx, req.Keyspace, req.Shard)
  2086  	if err != nil {
  2087  		return err
  2088  	}
  2089  
  2090  	// Check the primary elect is in tabletMap.
  2091  	primaryElectTabletAliasStr := topoproto.TabletAliasString(req.PrimaryElectTabletAlias)
  2092  	primaryElectTabletInfo, ok := tabletMap[primaryElectTabletAliasStr]
  2093  	if !ok {
  2094  		return fmt.Errorf("primary-elect tablet %v is not in the shard", topoproto.TabletAliasString(req.PrimaryElectTabletAlias))
  2095  	}
  2096  	ev.NewPrimary = proto.Clone(primaryElectTabletInfo.Tablet).(*topodatapb.Tablet)
  2097  
  2098  	// Check the primary is the only primary is the shard, or -force was used.
  2099  	_, primaryTabletMap := topotools.SortedTabletMap(tabletMap)
  2100  	if !topoproto.TabletAliasEqual(shardInfo.PrimaryAlias, req.PrimaryElectTabletAlias) {
  2101  		if !req.Force {
  2102  			return fmt.Errorf("primary-elect tablet %v is not the shard primary, use -force to proceed anyway", topoproto.TabletAliasString(req.PrimaryElectTabletAlias))
  2103  		}
  2104  
  2105  		logger.Warningf("primary-elect tablet %v is not the shard primary, proceeding anyway as -force was used", topoproto.TabletAliasString(req.PrimaryElectTabletAlias))
  2106  	}
  2107  	if _, ok := primaryTabletMap[primaryElectTabletAliasStr]; !ok {
  2108  		if !req.Force {
  2109  			return fmt.Errorf("primary-elect tablet %v is not a primary in the shard, use -force to proceed anyway", topoproto.TabletAliasString(req.PrimaryElectTabletAlias))
  2110  		}
  2111  		logger.Warningf("primary-elect tablet %v is not a primary in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(req.PrimaryElectTabletAlias))
  2112  	}
  2113  	haveOtherPrimary := false
  2114  	for alias := range primaryTabletMap {
  2115  		if primaryElectTabletAliasStr != alias {
  2116  			haveOtherPrimary = true
  2117  		}
  2118  	}
  2119  	if haveOtherPrimary {
  2120  		if !req.Force {
  2121  			return fmt.Errorf("primary-elect tablet %v is not the only primary in the shard, use -force to proceed anyway", topoproto.TabletAliasString(req.PrimaryElectTabletAlias))
  2122  		}
  2123  		logger.Warningf("primary-elect tablet %v is not the only primary in the shard, proceeding anyway as -force was used", topoproto.TabletAliasString(req.PrimaryElectTabletAlias))
  2124  	}
  2125  
  2126  	// First phase: reset replication on all tablets. If anyone fails,
  2127  	// we stop. It is probably because it is unreachable, and may leave
  2128  	// an unstable database process in the mix, with a database daemon
  2129  	// at a wrong replication spot.
  2130  
  2131  	// Create a context for the following RPCs that respects waitReplicasTimeout
  2132  	resetCtx, resetCancel := context.WithTimeout(ctx, waitReplicasTimeout)
  2133  	defer resetCancel()
  2134  
  2135  	event.DispatchUpdate(ev, "resetting replication on all tablets")
  2136  	wg := sync.WaitGroup{}
  2137  	rec := concurrency.AllErrorRecorder{}
  2138  	for alias, tabletInfo := range tabletMap {
  2139  		wg.Add(1)
  2140  		go func(alias string, tabletInfo *topo.TabletInfo) {
  2141  			defer wg.Done()
  2142  			logger.Infof("resetting replication on tablet %v", alias)
  2143  			if err := tmc.ResetReplication(resetCtx, tabletInfo.Tablet); err != nil {
  2144  				rec.RecordError(fmt.Errorf("tablet %v ResetReplication failed (either fix it, or Scrap it): %v", alias, err))
  2145  			}
  2146  		}(alias, tabletInfo)
  2147  	}
  2148  	wg.Wait()
  2149  	if err := rec.Error(); err != nil {
  2150  		// if any of the replicas failed
  2151  		return err
  2152  	}
  2153  
  2154  	// Check we still have the topology lock.
  2155  	if err := topo.CheckShardLocked(ctx, req.Keyspace, req.Shard); err != nil {
  2156  		return fmt.Errorf("lost topology lock, aborting: %v", err)
  2157  	}
  2158  
  2159  	// Tell the new primary to break its replicas, return its replication
  2160  	// position
  2161  	logger.Infof("initializing primary on %v", topoproto.TabletAliasString(req.PrimaryElectTabletAlias))
  2162  	event.DispatchUpdate(ev, "initializing primary")
  2163  	rp, err := tmc.InitPrimary(ctx, primaryElectTabletInfo.Tablet, reparentutil.SemiSyncAckers(durability, primaryElectTabletInfo.Tablet) > 0)
  2164  	if err != nil {
  2165  		return err
  2166  	}
  2167  
  2168  	// Check we stil have the topology lock.
  2169  	if err := topo.CheckShardLocked(ctx, req.Keyspace, req.Shard); err != nil {
  2170  		return fmt.Errorf("lost topology lock, aborting: %v", err)
  2171  	}
  2172  
  2173  	// Create a cancelable context for the following RPCs.
  2174  	// If error conditions happen, we can cancel all outgoing RPCs.
  2175  	replCtx, replCancel := context.WithTimeout(ctx, waitReplicasTimeout)
  2176  	defer replCancel()
  2177  
  2178  	// Now tell the new primary to insert the reparent_journal row,
  2179  	// and tell everybody else to become a replica of the new primary,
  2180  	// and wait for the row in the reparent_journal table.
  2181  	// We start all these in parallel, to handle the semi-sync
  2182  	// case: for the primary to be able to commit its row in the
  2183  	// reparent_journal table, it needs connected replicas.
  2184  	event.DispatchUpdate(ev, "reparenting all tablets")
  2185  	now := time.Now().UnixNano()
  2186  	wgPrimary := sync.WaitGroup{}
  2187  	wgReplicas := sync.WaitGroup{}
  2188  	var primaryErr error
  2189  	for alias, tabletInfo := range tabletMap {
  2190  		if alias == primaryElectTabletAliasStr {
  2191  			wgPrimary.Add(1)
  2192  			go func(alias string, tabletInfo *topo.TabletInfo) {
  2193  				defer wgPrimary.Done()
  2194  				logger.Infof("populating reparent journal on new primary %v", alias)
  2195  				primaryErr = tmc.PopulateReparentJournal(replCtx, tabletInfo.Tablet, now,
  2196  					initShardPrimaryOperation,
  2197  					req.PrimaryElectTabletAlias, rp)
  2198  			}(alias, tabletInfo)
  2199  		} else {
  2200  			wgReplicas.Add(1)
  2201  			go func(alias string, tabletInfo *topo.TabletInfo) {
  2202  				defer wgReplicas.Done()
  2203  				logger.Infof("initializing replica %v", alias)
  2204  				if err := tmc.InitReplica(replCtx, tabletInfo.Tablet, req.PrimaryElectTabletAlias, rp, now, reparentutil.IsReplicaSemiSync(durability, primaryElectTabletInfo.Tablet, tabletInfo.Tablet)); err != nil {
  2205  					rec.RecordError(fmt.Errorf("tablet %v InitReplica failed: %v", alias, err))
  2206  				}
  2207  			}(alias, tabletInfo)
  2208  		}
  2209  	}
  2210  
  2211  	// After the primary is done, we can update the shard record
  2212  	// (note with semi-sync, it also means at least one replica is done).
  2213  	wgPrimary.Wait()
  2214  	if primaryErr != nil {
  2215  		// The primary failed, there is no way the
  2216  		// replicas will work.  So we cancel them all.
  2217  		logger.Warningf("primary failed to PopulateReparentJournal, canceling replicas")
  2218  		replCancel()
  2219  		wgReplicas.Wait()
  2220  		return fmt.Errorf("failed to PopulateReparentJournal on primary: %v", primaryErr)
  2221  	}
  2222  	if !topoproto.TabletAliasEqual(shardInfo.PrimaryAlias, req.PrimaryElectTabletAlias) {
  2223  		if _, err := s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error {
  2224  			si.PrimaryAlias = req.PrimaryElectTabletAlias
  2225  			return nil
  2226  		}); err != nil {
  2227  			wgReplicas.Wait()
  2228  			return fmt.Errorf("failed to update shard primary record: %v", err)
  2229  		}
  2230  	}
  2231  
  2232  	// Wait for the replicas to complete. If some of them fail, we
  2233  	// don't want to rebuild the shard serving graph (the failure
  2234  	// will most likely be a timeout, and our context will be
  2235  	// expired, so the rebuild will fail anyway)
  2236  	wgReplicas.Wait()
  2237  	if err := rec.Error(); err != nil {
  2238  		return err
  2239  	}
  2240  
  2241  	// Create database if necessary on the primary. replicas will get it too through
  2242  	// replication. Since the user called InitShardPrimary, they've told us to
  2243  	// assume that whatever data is on all the replicas is what they intended.
  2244  	// If the database doesn't exist, it means the user intends for these tablets
  2245  	// to begin serving with no data (i.e. first time initialization).
  2246  	createDB := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", sqlescape.EscapeID(topoproto.TabletDbName(primaryElectTabletInfo.Tablet)))
  2247  	if _, err := tmc.ExecuteFetchAsDba(ctx, primaryElectTabletInfo.Tablet, false, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{
  2248  		Query:        []byte(createDB),
  2249  		MaxRows:      1,
  2250  		ReloadSchema: true,
  2251  	}); err != nil {
  2252  		return fmt.Errorf("failed to create database: %v", err)
  2253  	}
  2254  	// Refresh the state to force the tabletserver to reconnect after db has been created.
  2255  	if err := tmc.RefreshState(ctx, primaryElectTabletInfo.Tablet); err != nil {
  2256  		log.Warningf("RefreshState failed: %v", err)
  2257  	}
  2258  
  2259  	return nil
  2260  }
  2261  
  2262  // PingTablet is part of the vtctlservicepb.VtctldServer interface.
  2263  func (s *VtctldServer) PingTablet(ctx context.Context, req *vtctldatapb.PingTabletRequest) (resp *vtctldatapb.PingTabletResponse, err error) {
  2264  	span, ctx := trace.NewSpan(ctx, "VtctldServer.PingTablet")
  2265  	defer span.Finish()
  2266  
  2267  	defer panicHandler(&err)
  2268  
  2269  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  2270  
  2271  	tablet, err := s.ts.GetTablet(ctx, req.TabletAlias)
  2272  	if err != nil {
  2273  		return nil, err
  2274  	}
  2275  
  2276  	err = s.tmc.Ping(ctx, tablet.Tablet)
  2277  	if err != nil {
  2278  		return nil, err
  2279  	}
  2280  
  2281  	return &vtctldatapb.PingTabletResponse{}, nil
  2282  }
  2283  
  2284  // PlannedReparentShard is part of the vtctldservicepb.VtctldServer interface.
  2285  func (s *VtctldServer) PlannedReparentShard(ctx context.Context, req *vtctldatapb.PlannedReparentShardRequest) (resp *vtctldatapb.PlannedReparentShardResponse, err error) {
  2286  	span, ctx := trace.NewSpan(ctx, "VtctldServer.PlannedReparentShard")
  2287  	defer span.Finish()
  2288  
  2289  	defer panicHandler(&err)
  2290  
  2291  	waitReplicasTimeout, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout)
  2292  	if err != nil {
  2293  		return nil, err
  2294  	} else if !ok {
  2295  		waitReplicasTimeout = time.Second * 30
  2296  	}
  2297  
  2298  	span.Annotate("keyspace", req.Keyspace)
  2299  	span.Annotate("shard", req.Shard)
  2300  	span.Annotate("wait_replicas_timeout_sec", waitReplicasTimeout.Seconds())
  2301  
  2302  	if req.AvoidPrimary != nil {
  2303  		span.Annotate("avoid_primary_alias", topoproto.TabletAliasString(req.AvoidPrimary))
  2304  	}
  2305  
  2306  	if req.NewPrimary != nil {
  2307  		span.Annotate("new_primary_alias", topoproto.TabletAliasString(req.NewPrimary))
  2308  	}
  2309  
  2310  	m := sync.RWMutex{}
  2311  	logstream := []*logutilpb.Event{}
  2312  	logger := logutil.NewCallbackLogger(func(e *logutilpb.Event) {
  2313  		m.Lock()
  2314  		defer m.Unlock()
  2315  
  2316  		logstream = append(logstream, e)
  2317  	})
  2318  
  2319  	ev, err := reparentutil.NewPlannedReparenter(s.ts, s.tmc, logger).ReparentShard(ctx,
  2320  		req.Keyspace,
  2321  		req.Shard,
  2322  		reparentutil.PlannedReparentOptions{
  2323  			AvoidPrimaryAlias:   req.AvoidPrimary,
  2324  			NewPrimaryAlias:     req.NewPrimary,
  2325  			WaitReplicasTimeout: waitReplicasTimeout,
  2326  		},
  2327  	)
  2328  
  2329  	resp = &vtctldatapb.PlannedReparentShardResponse{
  2330  		Keyspace: req.Keyspace,
  2331  		Shard:    req.Shard,
  2332  	}
  2333  
  2334  	if ev != nil {
  2335  		resp.Keyspace = ev.ShardInfo.Keyspace()
  2336  		resp.Shard = ev.ShardInfo.ShardName()
  2337  
  2338  		if !topoproto.TabletAliasIsZero(ev.NewPrimary.Alias) {
  2339  			resp.PromotedPrimary = ev.NewPrimary.Alias
  2340  		}
  2341  	}
  2342  
  2343  	m.RLock()
  2344  	defer m.RUnlock()
  2345  
  2346  	resp.Events = make([]*logutilpb.Event, len(logstream))
  2347  	copy(resp.Events, logstream)
  2348  
  2349  	return resp, err
  2350  }
  2351  
  2352  // RebuildKeyspaceGraph is part of the vtctlservicepb.VtctldServer interface.
  2353  func (s *VtctldServer) RebuildKeyspaceGraph(ctx context.Context, req *vtctldatapb.RebuildKeyspaceGraphRequest) (resp *vtctldatapb.RebuildKeyspaceGraphResponse, err error) {
  2354  	span, ctx := trace.NewSpan(ctx, "VtctldServer.RebuildKeyspaceGraph")
  2355  	defer span.Finish()
  2356  
  2357  	defer panicHandler(&err)
  2358  
  2359  	span.Annotate("keyspace", req.Keyspace)
  2360  	span.Annotate("cells", strings.Join(req.Cells, ","))
  2361  	span.Annotate("allow_partial", req.AllowPartial)
  2362  
  2363  	if err = topotools.RebuildKeyspace(ctx, logutil.NewCallbackLogger(func(e *logutilpb.Event) {}), s.ts, req.Keyspace, req.Cells, req.AllowPartial); err != nil {
  2364  		return nil, err
  2365  	}
  2366  
  2367  	return &vtctldatapb.RebuildKeyspaceGraphResponse{}, nil
  2368  }
  2369  
  2370  // RebuildVSchemaGraph is part of the vtctlservicepb.VtctldServer interface.
  2371  func (s *VtctldServer) RebuildVSchemaGraph(ctx context.Context, req *vtctldatapb.RebuildVSchemaGraphRequest) (resp *vtctldatapb.RebuildVSchemaGraphResponse, err error) {
  2372  	span, ctx := trace.NewSpan(ctx, "VtctldServer.RebuildVSchemaGraph")
  2373  	defer span.Finish()
  2374  
  2375  	defer panicHandler(&err)
  2376  
  2377  	span.Annotate("cells", strings.Join(req.Cells, ","))
  2378  
  2379  	if err = s.ts.RebuildSrvVSchema(ctx, req.Cells); err != nil {
  2380  		return nil, err
  2381  	}
  2382  
  2383  	return &vtctldatapb.RebuildVSchemaGraphResponse{}, nil
  2384  }
  2385  
  2386  // RefreshState is part of the vtctldservicepb.VtctldServer interface.
  2387  func (s *VtctldServer) RefreshState(ctx context.Context, req *vtctldatapb.RefreshStateRequest) (resp *vtctldatapb.RefreshStateResponse, err error) {
  2388  	span, ctx := trace.NewSpan(ctx, "VtctldServer.RefreshState")
  2389  	defer span.Finish()
  2390  
  2391  	defer panicHandler(&err)
  2392  
  2393  	if req.TabletAlias == nil {
  2394  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "RefreshState requires a tablet alias")
  2395  		return nil, err
  2396  	}
  2397  
  2398  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  2399  	defer cancel()
  2400  
  2401  	tablet, err := s.ts.GetTablet(ctx, req.TabletAlias)
  2402  	if err != nil {
  2403  		err = fmt.Errorf("Failed to get tablet %s: %w", topoproto.TabletAliasString(req.TabletAlias), err)
  2404  		return nil, err
  2405  	}
  2406  
  2407  	if err = s.tmc.RefreshState(ctx, tablet.Tablet); err != nil {
  2408  		return nil, err
  2409  	}
  2410  
  2411  	return &vtctldatapb.RefreshStateResponse{}, nil
  2412  }
  2413  
  2414  // RefreshStateByShard is part of the vtctldservicepb.VtctldServer interface.
  2415  func (s *VtctldServer) RefreshStateByShard(ctx context.Context, req *vtctldatapb.RefreshStateByShardRequest) (resp *vtctldatapb.RefreshStateByShardResponse, err error) {
  2416  	span, ctx := trace.NewSpan(ctx, "VtctldServer.RefreshStateByShard")
  2417  	defer span.Finish()
  2418  
  2419  	defer panicHandler(&err)
  2420  
  2421  	if req.Keyspace == "" {
  2422  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "RefreshStateByShard requires a keyspace")
  2423  		return nil, err
  2424  	}
  2425  
  2426  	if req.Shard == "" {
  2427  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "RefreshStateByShard requires a shard")
  2428  		return nil, err
  2429  	}
  2430  
  2431  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  2432  	defer cancel()
  2433  
  2434  	si, err := s.ts.GetShard(ctx, req.Keyspace, req.Shard)
  2435  	if err != nil {
  2436  		err = fmt.Errorf("Failed to get shard %s/%s/: %w", req.Keyspace, req.Shard, err)
  2437  		return nil, err
  2438  	}
  2439  
  2440  	isPartial, partialDetails, err := topotools.RefreshTabletsByShard(ctx, s.ts, s.tmc, si, req.Cells, logutil.NewCallbackLogger(func(e *logutilpb.Event) {
  2441  		switch e.Level {
  2442  		case logutilpb.Level_WARNING:
  2443  			log.Warningf(e.Value)
  2444  		case logutilpb.Level_ERROR:
  2445  			log.Errorf(e.Value)
  2446  		default:
  2447  			log.Infof(e.Value)
  2448  		}
  2449  	}))
  2450  	if err != nil {
  2451  		return nil, err
  2452  	}
  2453  
  2454  	return &vtctldatapb.RefreshStateByShardResponse{
  2455  		IsPartialRefresh:      isPartial,
  2456  		PartialRefreshDetails: partialDetails,
  2457  	}, nil
  2458  }
  2459  
  2460  // ReloadSchema is part of the vtctlservicepb.VtctldServer interface.
  2461  func (s *VtctldServer) ReloadSchema(ctx context.Context, req *vtctldatapb.ReloadSchemaRequest) (resp *vtctldatapb.ReloadSchemaResponse, err error) {
  2462  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ReloadSchema")
  2463  	defer span.Finish()
  2464  
  2465  	defer panicHandler(&err)
  2466  
  2467  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  2468  
  2469  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
  2470  	if err != nil {
  2471  		err = vterrors.Errorf(vtrpc.Code_NOT_FOUND, "GetTablet(%v) failed: %v", req.TabletAlias, err)
  2472  		return nil, err
  2473  	}
  2474  
  2475  	err = s.tmc.ReloadSchema(ctx, ti.Tablet, "")
  2476  	if err != nil {
  2477  		return nil, err
  2478  	}
  2479  
  2480  	return &vtctldatapb.ReloadSchemaResponse{}, nil
  2481  }
  2482  
  2483  // ReloadSchemaShard is part of the vtctlservicepb.VtctldServer interface.
  2484  func (s *VtctldServer) ReloadSchemaShard(ctx context.Context, req *vtctldatapb.ReloadSchemaShardRequest) (resp *vtctldatapb.ReloadSchemaShardResponse, err error) {
  2485  	defer panicHandler(&err)
  2486  
  2487  	logger, getEvents := eventStreamLogger()
  2488  
  2489  	var sema *sync2.Semaphore
  2490  	if req.Concurrency > 0 {
  2491  		sema = sync2.NewSemaphore(int(req.Concurrency), 0)
  2492  	}
  2493  
  2494  	s.reloadSchemaShard(ctx, req, sema, logger)
  2495  
  2496  	return &vtctldatapb.ReloadSchemaShardResponse{
  2497  		Events: getEvents(),
  2498  	}, nil
  2499  }
  2500  
  2501  func (s *VtctldServer) reloadSchemaShard(ctx context.Context, req *vtctldatapb.ReloadSchemaShardRequest, sema *sync2.Semaphore, logger logutil.Logger) {
  2502  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ReloadSchemaShard")
  2503  	defer span.Finish()
  2504  
  2505  	span.Annotate("keyspace", req.Keyspace)
  2506  	span.Annotate("shard", req.Shard)
  2507  	span.Annotate("concurrency", req.Concurrency)
  2508  	span.Annotate("include_primary", req.IncludePrimary)
  2509  	span.Annotate("wait_position", req.WaitPosition)
  2510  
  2511  	isPartial, ok := schematools.ReloadShard(ctx, s.ts, s.tmc, logger, req.Keyspace, req.Shard, req.WaitPosition, sema, req.IncludePrimary)
  2512  	if !ok {
  2513  		return
  2514  	}
  2515  
  2516  	span.Annotate("is_partial_result", isPartial)
  2517  }
  2518  
  2519  // ReloadSchemaKeyspace is part of the vtctlservicepb.VtctldServer interface.
  2520  func (s *VtctldServer) ReloadSchemaKeyspace(ctx context.Context, req *vtctldatapb.ReloadSchemaKeyspaceRequest) (resp *vtctldatapb.ReloadSchemaKeyspaceResponse, err error) {
  2521  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ReloadSchemaKeyspace")
  2522  	defer span.Finish()
  2523  
  2524  	defer panicHandler(&err)
  2525  
  2526  	span.Annotate("keyspace", req.Keyspace)
  2527  	span.Annotate("concurrency", req.Concurrency)
  2528  	span.Annotate("include_primary", req.IncludePrimary)
  2529  	span.Annotate("wait_position", req.WaitPosition)
  2530  
  2531  	shards, err := s.ts.GetShardNames(ctx, req.Keyspace)
  2532  	if err != nil {
  2533  		err = vterrors.Errorf(vtrpc.Code_INTERNAL, "GetShardNames(%v) failed: %v", req.Keyspace, err)
  2534  		return nil, err
  2535  	}
  2536  
  2537  	var (
  2538  		wg                sync.WaitGroup
  2539  		sema              *sync2.Semaphore
  2540  		logger, getEvents = eventStreamLogger()
  2541  	)
  2542  
  2543  	if req.Concurrency > 0 {
  2544  		sema = sync2.NewSemaphore(int(req.Concurrency), 0)
  2545  	}
  2546  
  2547  	for _, shard := range shards {
  2548  		wg.Add(1)
  2549  		go func(shard string) {
  2550  			defer wg.Done()
  2551  			s.reloadSchemaShard(ctx, &vtctldatapb.ReloadSchemaShardRequest{
  2552  				Keyspace:       req.Keyspace,
  2553  				Shard:          shard,
  2554  				IncludePrimary: req.IncludePrimary,
  2555  				WaitPosition:   req.WaitPosition,
  2556  			}, sema, logger)
  2557  		}(shard)
  2558  	}
  2559  
  2560  	wg.Wait()
  2561  
  2562  	return &vtctldatapb.ReloadSchemaKeyspaceResponse{
  2563  		Events: getEvents(),
  2564  	}, nil
  2565  }
  2566  
  2567  // RemoveBackup is part of the vtctlservicepb.VtctldServer interface.
  2568  func (s *VtctldServer) RemoveBackup(ctx context.Context, req *vtctldatapb.RemoveBackupRequest) (resp *vtctldatapb.RemoveBackupResponse, err error) {
  2569  	span, ctx := trace.NewSpan(ctx, "VtctldServer.RemoveBackup")
  2570  	defer span.Finish()
  2571  
  2572  	defer panicHandler(&err)
  2573  
  2574  	bucket := fmt.Sprintf("%v/%v", req.Keyspace, req.Shard)
  2575  
  2576  	span.Annotate("keyspace", req.Keyspace)
  2577  	span.Annotate("shard", req.Shard)
  2578  	span.Annotate("bucket", bucket)
  2579  	span.Annotate("backup_name", req.Name)
  2580  
  2581  	bs, err := backupstorage.GetBackupStorage()
  2582  	if err != nil {
  2583  		return nil, err
  2584  	}
  2585  	defer bs.Close()
  2586  
  2587  	if err = bs.RemoveBackup(ctx, bucket, req.Name); err != nil {
  2588  		return nil, err
  2589  	}
  2590  
  2591  	return &vtctldatapb.RemoveBackupResponse{}, nil
  2592  }
  2593  
  2594  // RemoveKeyspaceCell is part of the vtctlservicepb.VtctldServer interface.
  2595  func (s *VtctldServer) RemoveKeyspaceCell(ctx context.Context, req *vtctldatapb.RemoveKeyspaceCellRequest) (resp *vtctldatapb.RemoveKeyspaceCellResponse, err error) {
  2596  	span, ctx := trace.NewSpan(ctx, "VtctldServer.RemoveKeyspaceCell")
  2597  	defer span.Finish()
  2598  
  2599  	defer panicHandler(&err)
  2600  
  2601  	span.Annotate("keyspace", req.Keyspace)
  2602  	span.Annotate("cell", req.Cell)
  2603  	span.Annotate("force", req.Force)
  2604  	span.Annotate("recursive", req.Recursive)
  2605  
  2606  	shards, err := s.ts.GetShardNames(ctx, req.Keyspace)
  2607  	if err != nil {
  2608  		return nil, err
  2609  	}
  2610  
  2611  	// Remove all the shards, serially. Stop immediately if any fail.
  2612  	for _, shard := range shards {
  2613  		log.Infof("Removing cell %v from shard %v/%v", req.Cell, req.Keyspace, shard)
  2614  		if err2 := removeShardCell(ctx, s.ts, req.Cell, req.Keyspace, shard, req.Recursive, req.Force); err2 != nil {
  2615  			err = fmt.Errorf("cannot remove cell %v from shard %v/%v: %w", req.Cell, req.Keyspace, shard, err2)
  2616  			return nil, err
  2617  		}
  2618  	}
  2619  
  2620  	// Last, remove the SrvKeyspace object.
  2621  	log.Infof("Removing cell %v keyspace %v SrvKeyspace object", req.Cell, req.Keyspace)
  2622  	if err = s.ts.DeleteSrvKeyspace(ctx, req.Cell, req.Keyspace); err != nil {
  2623  		err = fmt.Errorf("cannot delete SrvKeyspace from cell %v for keyspace %v: %w", req.Cell, req.Keyspace, err)
  2624  		return nil, err
  2625  	}
  2626  
  2627  	return &vtctldatapb.RemoveKeyspaceCellResponse{}, nil
  2628  }
  2629  
  2630  // RemoveShardCell is part of the vtctlservicepb.VtctldServer interface.
  2631  func (s *VtctldServer) RemoveShardCell(ctx context.Context, req *vtctldatapb.RemoveShardCellRequest) (resp *vtctldatapb.RemoveShardCellResponse, err error) {
  2632  	span, ctx := trace.NewSpan(ctx, "VtctldServer.RemoveShardCell")
  2633  	defer span.Finish()
  2634  
  2635  	defer panicHandler(&err)
  2636  
  2637  	span.Annotate("keyspace", req.Keyspace)
  2638  	span.Annotate("shard", req.ShardName)
  2639  	span.Annotate("cell", req.Cell)
  2640  	span.Annotate("force", req.Force)
  2641  	span.Annotate("recursive", req.Recursive)
  2642  
  2643  	if err = removeShardCell(ctx, s.ts, req.Cell, req.Keyspace, req.ShardName, req.Recursive, req.Force); err != nil {
  2644  		return nil, err
  2645  	}
  2646  
  2647  	return &vtctldatapb.RemoveShardCellResponse{}, nil
  2648  }
  2649  
  2650  // ReparentTablet is part of the vtctldservicepb.VtctldServer interface.
  2651  func (s *VtctldServer) ReparentTablet(ctx context.Context, req *vtctldatapb.ReparentTabletRequest) (resp *vtctldatapb.ReparentTabletResponse, err error) {
  2652  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ReparentTablet")
  2653  	defer span.Finish()
  2654  
  2655  	defer panicHandler(&err)
  2656  
  2657  	if req.Tablet == nil {
  2658  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "tablet alias must not be nil")
  2659  		return nil, err
  2660  	}
  2661  
  2662  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.Tablet))
  2663  
  2664  	tablet, err := s.ts.GetTablet(ctx, req.Tablet)
  2665  	if err != nil {
  2666  		return nil, err
  2667  	}
  2668  
  2669  	shard, err := s.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard)
  2670  	if err != nil {
  2671  		return nil, err
  2672  	}
  2673  
  2674  	if !shard.HasPrimary() {
  2675  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "no primary tablet for shard %v/%v", tablet.Keyspace, tablet.Shard)
  2676  		return nil, err
  2677  	}
  2678  
  2679  	shardPrimary, err := s.ts.GetTablet(ctx, shard.PrimaryAlias)
  2680  	if err != nil {
  2681  		err = fmt.Errorf("cannot lookup primary tablet %v for shard %v/%v: %w", topoproto.TabletAliasString(shard.PrimaryAlias), tablet.Keyspace, tablet.Shard, err)
  2682  		return nil, err
  2683  	}
  2684  
  2685  	if shardPrimary.Type != topodatapb.TabletType_PRIMARY {
  2686  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "TopologyServer has incosistent state for shard primary %v", topoproto.TabletAliasString(shard.PrimaryAlias))
  2687  		return nil, err
  2688  	}
  2689  
  2690  	if shardPrimary.Keyspace != tablet.Keyspace || shardPrimary.Shard != tablet.Shard {
  2691  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "primary %v and potential replica %v not in same keypace shard (%v/%v)", topoproto.TabletAliasString(shard.PrimaryAlias), topoproto.TabletAliasString(req.Tablet), tablet.Keyspace, tablet.Shard)
  2692  		return nil, err
  2693  	}
  2694  
  2695  	if topoproto.TabletAliasEqual(req.Tablet, shardPrimary.Alias) {
  2696  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "cannot ReparentTablet current shard primary (%v) onto itself", topoproto.TabletAliasString(req.Tablet))
  2697  		return nil, err
  2698  	}
  2699  
  2700  	durabilityName, err := s.ts.GetKeyspaceDurability(ctx, tablet.Keyspace)
  2701  	if err != nil {
  2702  		return nil, err
  2703  	}
  2704  	log.Infof("Getting a new durability policy for %v", durabilityName)
  2705  	durability, err := reparentutil.GetDurabilityPolicy(durabilityName)
  2706  	if err != nil {
  2707  		return nil, err
  2708  	}
  2709  
  2710  	if err = s.tmc.SetReplicationSource(ctx, tablet.Tablet, shard.PrimaryAlias, 0, "", false, reparentutil.IsReplicaSemiSync(durability, shardPrimary.Tablet, tablet.Tablet)); err != nil {
  2711  		return nil, err
  2712  	}
  2713  
  2714  	return &vtctldatapb.ReparentTabletResponse{
  2715  		Keyspace: tablet.Keyspace,
  2716  		Shard:    tablet.Shard,
  2717  		Primary:  shard.PrimaryAlias,
  2718  	}, nil
  2719  }
  2720  
  2721  func (s *VtctldServer) RestoreFromBackup(req *vtctldatapb.RestoreFromBackupRequest, stream vtctlservicepb.Vtctld_RestoreFromBackupServer) (err error) {
  2722  	span, ctx := trace.NewSpan(stream.Context(), "VtctldServer.RestoreFromBackup")
  2723  	defer span.Finish()
  2724  
  2725  	defer panicHandler(&err)
  2726  
  2727  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  2728  	backupTime := protoutil.TimeFromProto(req.BackupTime)
  2729  	if !backupTime.IsZero() {
  2730  		span.Annotate("backup_timestamp", backupTime.Format(mysqlctl.BackupTimestampFormat))
  2731  	}
  2732  
  2733  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
  2734  	if err != nil {
  2735  		return err
  2736  	}
  2737  
  2738  	span.Annotate("keyspace", ti.Keyspace)
  2739  	span.Annotate("shard", ti.Shard)
  2740  
  2741  	r := &tabletmanagerdatapb.RestoreFromBackupRequest{
  2742  		BackupTime:   req.BackupTime,
  2743  		RestoreToPos: req.RestoreToPos,
  2744  		DryRun:       req.DryRun,
  2745  	}
  2746  	logStream, err := s.tmc.RestoreFromBackup(ctx, ti.Tablet, r)
  2747  	if err != nil {
  2748  		return err
  2749  	}
  2750  
  2751  	logger := logutil.NewConsoleLogger()
  2752  
  2753  	for {
  2754  		var event *logutilpb.Event
  2755  		event, err = logStream.Recv()
  2756  		switch err {
  2757  		case nil:
  2758  			logutil.LogEvent(logger, event)
  2759  			resp := &vtctldatapb.RestoreFromBackupResponse{
  2760  				TabletAlias: req.TabletAlias,
  2761  				Keyspace:    ti.Keyspace,
  2762  				Shard:       ti.Shard,
  2763  				Event:       event,
  2764  			}
  2765  			if err = stream.Send(resp); err != nil {
  2766  				logger.Errorf("failed to send stream response %+v: %v", resp, err)
  2767  			}
  2768  		case io.EOF:
  2769  			// Do not do anything when active reparenting is disabled.
  2770  			if mysqlctl.DisableActiveReparents {
  2771  				return nil
  2772  			}
  2773  			if req.RestoreToPos != "" && !req.DryRun {
  2774  				// point in time recovery. Do not restore replication
  2775  				return nil
  2776  			}
  2777  
  2778  			// Otherwise, we find the correct primary tablet and set the
  2779  			// replication source on the freshly-restored tablet, since the
  2780  			// shard primary may have changed while it was restoring.
  2781  			//
  2782  			// This also affects whether or not we want to send semi-sync ACKs.
  2783  			var ti *topo.TabletInfo
  2784  			ti, err = s.ts.GetTablet(ctx, req.TabletAlias)
  2785  			if err != nil {
  2786  				return err
  2787  			}
  2788  
  2789  			err = reparentutil.SetReplicationSource(ctx, s.ts, s.tmc, ti.Tablet)
  2790  			return err
  2791  		default:
  2792  			return err
  2793  		}
  2794  	}
  2795  }
  2796  
  2797  // RunHealthCheck is part of the vtctlservicepb.VtctldServer interface.
  2798  func (s *VtctldServer) RunHealthCheck(ctx context.Context, req *vtctldatapb.RunHealthCheckRequest) (resp *vtctldatapb.RunHealthCheckResponse, err error) {
  2799  	span, ctx := trace.NewSpan(ctx, "VtctldServer.RunHealthCheck")
  2800  	defer span.Finish()
  2801  
  2802  	defer panicHandler(&err)
  2803  
  2804  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  2805  
  2806  	ti, err := s.ts.GetTablet(ctx, req.TabletAlias)
  2807  	if err != nil {
  2808  		return nil, err
  2809  	}
  2810  
  2811  	err = s.tmc.RunHealthCheck(ctx, ti.Tablet)
  2812  	if err != nil {
  2813  		return nil, err
  2814  	}
  2815  
  2816  	return &vtctldatapb.RunHealthCheckResponse{}, nil
  2817  }
  2818  
  2819  // SetKeyspaceDurabilityPolicy is part of the vtctlservicepb.VtctldServer interface.
  2820  func (s *VtctldServer) SetKeyspaceDurabilityPolicy(ctx context.Context, req *vtctldatapb.SetKeyspaceDurabilityPolicyRequest) (resp *vtctldatapb.SetKeyspaceDurabilityPolicyResponse, err error) {
  2821  	span, ctx := trace.NewSpan(ctx, "VtctldServer.SetKeyspaceDurabilityPolicy")
  2822  	defer span.Finish()
  2823  
  2824  	defer panicHandler(&err)
  2825  
  2826  	span.Annotate("keyspace", req.Keyspace)
  2827  	span.Annotate("durability_policy", req.DurabilityPolicy)
  2828  
  2829  	ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, "SetKeyspaceDurabilityPolicy")
  2830  	if lockErr != nil {
  2831  		err = lockErr
  2832  		return nil, err
  2833  	}
  2834  
  2835  	defer unlock(&err)
  2836  
  2837  	ki, err := s.ts.GetKeyspace(ctx, req.Keyspace)
  2838  	if err != nil {
  2839  		return nil, err
  2840  	}
  2841  
  2842  	policyValid := reparentutil.CheckDurabilityPolicyExists(req.DurabilityPolicy)
  2843  	if !policyValid {
  2844  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "durability policy <%v> is not a valid policy. Please register it as a policy first", req.DurabilityPolicy)
  2845  		return nil, err
  2846  	}
  2847  
  2848  	ki.DurabilityPolicy = req.DurabilityPolicy
  2849  
  2850  	err = s.ts.UpdateKeyspace(ctx, ki)
  2851  	if err != nil {
  2852  		return nil, err
  2853  	}
  2854  
  2855  	return &vtctldatapb.SetKeyspaceDurabilityPolicyResponse{
  2856  		Keyspace: ki.Keyspace,
  2857  	}, nil
  2858  }
  2859  
  2860  // SetKeyspaceServedFrom is part of the vtctlservicepb.VtctldServer interface.
  2861  func (s *VtctldServer) SetKeyspaceServedFrom(ctx context.Context, req *vtctldatapb.SetKeyspaceServedFromRequest) (resp *vtctldatapb.SetKeyspaceServedFromResponse, err error) {
  2862  	span, ctx := trace.NewSpan(ctx, "VtctldServer.SetKeyspaceServedFrom")
  2863  	defer span.Finish()
  2864  
  2865  	defer panicHandler(&err)
  2866  
  2867  	span.Annotate("keyspace", req.Keyspace)
  2868  	span.Annotate("tablet_type", topoproto.TabletTypeLString(req.TabletType))
  2869  	span.Annotate("cells", strings.Join(req.Cells, ","))
  2870  	span.Annotate("remove", req.Remove)
  2871  	span.Annotate("source_keyspace", req.SourceKeyspace)
  2872  
  2873  	ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, "SetKeyspaceServedFrom")
  2874  	if lockErr != nil {
  2875  		err = lockErr
  2876  		return nil, err
  2877  	}
  2878  
  2879  	defer unlock(&err)
  2880  
  2881  	ki, err := s.ts.GetKeyspace(ctx, req.Keyspace)
  2882  	if err != nil {
  2883  		return nil, err
  2884  	}
  2885  
  2886  	err = ki.UpdateServedFromMap(req.TabletType, req.Cells, req.SourceKeyspace, req.Remove, nil)
  2887  	if err != nil {
  2888  		return nil, err
  2889  	}
  2890  
  2891  	err = s.ts.UpdateKeyspace(ctx, ki)
  2892  	if err != nil {
  2893  		return nil, err
  2894  	}
  2895  
  2896  	return &vtctldatapb.SetKeyspaceServedFromResponse{
  2897  		Keyspace: ki.Keyspace,
  2898  	}, nil
  2899  }
  2900  
  2901  // SetShardIsPrimaryServing is part of the vtctlservicepb.VtctldServer interface.
  2902  func (s *VtctldServer) SetShardIsPrimaryServing(ctx context.Context, req *vtctldatapb.SetShardIsPrimaryServingRequest) (resp *vtctldatapb.SetShardIsPrimaryServingResponse, err error) {
  2903  	span, ctx := trace.NewSpan(ctx, "VtctldServer.SetShardIsPrimaryServing")
  2904  	defer span.Finish()
  2905  
  2906  	defer panicHandler(&err)
  2907  
  2908  	span.Annotate("keyspace", req.Keyspace)
  2909  	span.Annotate("shard", req.Shard)
  2910  	span.Annotate("is_serving", req.IsServing)
  2911  
  2912  	ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, fmt.Sprintf("SetShardIsPrimaryServing(%v,%v,%v)", req.Keyspace, req.Shard, req.IsServing))
  2913  	if lockErr != nil {
  2914  		err = lockErr
  2915  		return nil, err
  2916  	}
  2917  
  2918  	defer unlock(&err)
  2919  
  2920  	si, err := s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error {
  2921  		si.IsPrimaryServing = req.IsServing
  2922  		return nil
  2923  	})
  2924  	if err != nil {
  2925  		return nil, err
  2926  	}
  2927  
  2928  	return &vtctldatapb.SetShardIsPrimaryServingResponse{
  2929  		Shard: si.Shard,
  2930  	}, nil
  2931  }
  2932  
  2933  // SetShardTabletControl is part of the vtctlservicepb.VtctldServer interface.
  2934  func (s *VtctldServer) SetShardTabletControl(ctx context.Context, req *vtctldatapb.SetShardTabletControlRequest) (resp *vtctldatapb.SetShardTabletControlResponse, err error) {
  2935  	span, ctx := trace.NewSpan(ctx, "VtctldServer.SetShardTabletControl")
  2936  	defer span.Finish()
  2937  
  2938  	span.Annotate("keyspace", req.Keyspace)
  2939  	span.Annotate("shard", req.Shard)
  2940  	span.Annotate("tablet_type", topoproto.TabletTypeLString(req.TabletType))
  2941  	span.Annotate("cells", strings.Join(req.Cells, ","))
  2942  	span.Annotate("denied_tables", strings.Join(req.DeniedTables, ","))
  2943  	span.Annotate("disable_query_service", req.DisableQueryService)
  2944  	span.Annotate("remove", req.Remove)
  2945  
  2946  	ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, "SetShardTabletControl")
  2947  	if lockErr != nil {
  2948  		err = lockErr
  2949  		return nil, err
  2950  	}
  2951  
  2952  	defer unlock(&err)
  2953  
  2954  	si, err := s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error {
  2955  		return si.UpdateSourceDeniedTables(ctx, req.TabletType, req.Cells, req.Remove, req.DeniedTables)
  2956  	})
  2957  
  2958  	switch {
  2959  	case topo.IsErrType(err, topo.NoUpdateNeeded):
  2960  		// ok, fallthrough to DisableQueryService
  2961  	case err != nil:
  2962  		return nil, err
  2963  	}
  2964  
  2965  	if si == nil { // occurs only when UpdateShardFields above returns NoUpdateNeeded
  2966  		si, err = s.ts.GetShard(ctx, req.Keyspace, req.Shard)
  2967  		if err != nil {
  2968  			return nil, err
  2969  		}
  2970  	}
  2971  	if !req.Remove && len(req.DeniedTables) == 0 {
  2972  		err = s.ts.UpdateDisableQueryService(ctx, req.Keyspace, []*topo.ShardInfo{si}, req.TabletType, req.Cells, req.DisableQueryService)
  2973  		if err != nil {
  2974  			return nil, err
  2975  		}
  2976  	}
  2977  
  2978  	return &vtctldatapb.SetShardTabletControlResponse{
  2979  		Shard: si.Shard,
  2980  	}, nil
  2981  }
  2982  
  2983  // SetWritable is part of the vtctldservicepb.VtctldServer interface.
  2984  func (s *VtctldServer) SetWritable(ctx context.Context, req *vtctldatapb.SetWritableRequest) (resp *vtctldatapb.SetWritableResponse, err error) {
  2985  	span, ctx := trace.NewSpan(ctx, "VtctldServer.SetWritable")
  2986  	defer span.Finish()
  2987  
  2988  	defer panicHandler(&err)
  2989  
  2990  	if req.TabletAlias == nil {
  2991  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "SetWritable.TabletAlias is required")
  2992  		return nil, err
  2993  	}
  2994  
  2995  	alias := topoproto.TabletAliasString(req.TabletAlias)
  2996  	span.Annotate("tablet_alias", alias)
  2997  	span.Annotate("writable", req.Writable)
  2998  
  2999  	tablet, err := s.ts.GetTablet(ctx, req.TabletAlias)
  3000  	if err != nil {
  3001  		log.Errorf("SetWritable: failed to read tablet record for %v: %v", alias, err)
  3002  		return nil, err
  3003  	}
  3004  
  3005  	var f func(context.Context, *topodatapb.Tablet) error
  3006  	switch req.Writable {
  3007  	case true:
  3008  		f = s.tmc.SetReadWrite
  3009  	case false:
  3010  		f = s.tmc.SetReadOnly
  3011  	}
  3012  
  3013  	if err = f(ctx, tablet.Tablet); err != nil {
  3014  		log.Errorf("SetWritable: failed to set writable=%v on %v: %v", req.Writable, alias, err)
  3015  		return nil, err
  3016  	}
  3017  
  3018  	return &vtctldatapb.SetWritableResponse{}, nil
  3019  }
  3020  
  3021  // ShardReplicationAdd is part of the vtctlservicepb.VtctldServer interface.
  3022  func (s *VtctldServer) ShardReplicationAdd(ctx context.Context, req *vtctldatapb.ShardReplicationAddRequest) (resp *vtctldatapb.ShardReplicationAddResponse, err error) {
  3023  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ShardReplicationAdd")
  3024  	defer span.Finish()
  3025  
  3026  	defer panicHandler(&err)
  3027  
  3028  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  3029  	span.Annotate("keyspace", req.Keyspace)
  3030  	span.Annotate("shard", req.Shard)
  3031  
  3032  	if err = topo.UpdateShardReplicationRecord(ctx, s.ts, req.Keyspace, req.Shard, req.TabletAlias); err != nil {
  3033  		return nil, err
  3034  	}
  3035  
  3036  	return &vtctldatapb.ShardReplicationAddResponse{}, nil
  3037  }
  3038  
  3039  // ShardReplicationFix is part of the vtctlservicepb.VtctldServer interface.
  3040  func (s *VtctldServer) ShardReplicationFix(ctx context.Context, req *vtctldatapb.ShardReplicationFixRequest) (resp *vtctldatapb.ShardReplicationFixResponse, err error) {
  3041  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ShardReplicationFix")
  3042  	defer span.Finish()
  3043  
  3044  	defer panicHandler(&err)
  3045  
  3046  	span.Annotate("keyspace", req.Keyspace)
  3047  	span.Annotate("shard", req.Shard)
  3048  	span.Annotate("cell", req.Cell)
  3049  
  3050  	problem, err := topo.FixShardReplication(ctx, s.ts, logutil.NewConsoleLogger(), req.Cell, req.Keyspace, req.Shard)
  3051  	if err != nil {
  3052  		return nil, err
  3053  	}
  3054  
  3055  	if problem != nil {
  3056  		span.Annotate("problem_tablet", topoproto.TabletAliasString(problem.TabletAlias))
  3057  		span.Annotate("problem_type", strings.ToLower(topoproto.ShardReplicationErrorTypeString(problem.Type)))
  3058  	}
  3059  
  3060  	return &vtctldatapb.ShardReplicationFixResponse{
  3061  		Error: problem,
  3062  	}, nil
  3063  }
  3064  
  3065  // ShardReplicationPositions is part of the vtctldservicepb.VtctldServer interface.
  3066  func (s *VtctldServer) ShardReplicationPositions(ctx context.Context, req *vtctldatapb.ShardReplicationPositionsRequest) (resp *vtctldatapb.ShardReplicationPositionsResponse, err error) {
  3067  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ShardReplicationPositions")
  3068  	defer span.Finish()
  3069  
  3070  	defer panicHandler(&err)
  3071  
  3072  	span.Annotate("keyspace", req.Keyspace)
  3073  	span.Annotate("shard", req.Shard)
  3074  
  3075  	tabletInfoMap, err := s.ts.GetTabletMapForShard(ctx, req.Keyspace, req.Shard)
  3076  	if err != nil {
  3077  		err = fmt.Errorf("GetTabletMapForShard(%s, %s) failed: %w", req.Keyspace, req.Shard, err)
  3078  		return nil, err
  3079  	}
  3080  
  3081  	log.Infof("Gathering tablet replication status for: %v", tabletInfoMap)
  3082  
  3083  	var (
  3084  		m         sync.Mutex
  3085  		wg        sync.WaitGroup
  3086  		rec       concurrency.AllErrorRecorder
  3087  		results   = make(map[string]*replicationdatapb.Status, len(tabletInfoMap))
  3088  		tabletMap = make(map[string]*topodatapb.Tablet, len(tabletInfoMap))
  3089  	)
  3090  
  3091  	// For each tablet, we're going to create an individual context, using
  3092  	// *topo.RemoteOperationTimeout as the maximum timeout (but we'll respect
  3093  	// any stricter timeout in the parent context). If an individual tablet
  3094  	// times out fetching its replication position, we won't fail the overall
  3095  	// request. Instead, we'll log a warning and record a nil entry in the
  3096  	// result map; that way, the caller can tell the difference between a tablet
  3097  	// that timed out vs a tablet that didn't get queried at all.
  3098  
  3099  	for alias, tabletInfo := range tabletInfoMap {
  3100  		switch {
  3101  		case tabletInfo.Type == topodatapb.TabletType_PRIMARY:
  3102  			wg.Add(1)
  3103  
  3104  			go func(ctx context.Context, alias string, tablet *topodatapb.Tablet) {
  3105  				defer wg.Done()
  3106  
  3107  				span, ctx := trace.NewSpan(ctx, "VtctldServer.getPrimaryPosition")
  3108  				defer span.Finish()
  3109  
  3110  				span.Annotate("tablet_alias", alias)
  3111  
  3112  				ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3113  				defer cancel()
  3114  
  3115  				var status *replicationdatapb.Status
  3116  
  3117  				pos, err := s.tmc.PrimaryPosition(ctx, tablet)
  3118  				if err != nil {
  3119  					switch ctx.Err() {
  3120  					case context.Canceled:
  3121  						log.Warningf("context canceled before obtaining primary position from %s: %s", alias, err)
  3122  					case context.DeadlineExceeded:
  3123  						log.Warningf("context deadline exceeded before obtaining primary position from %s: %s", alias, err)
  3124  					default:
  3125  						// The RPC was not timed out or canceled. We treat this
  3126  						// as a fatal error for the overall request.
  3127  						rec.RecordError(fmt.Errorf("PrimaryPosition(%s) failed: %w", alias, err))
  3128  						return
  3129  					}
  3130  				} else {
  3131  					// No error, record a valid status for this tablet.
  3132  					status = &replicationdatapb.Status{
  3133  						Position: pos,
  3134  					}
  3135  				}
  3136  
  3137  				m.Lock()
  3138  				defer m.Unlock()
  3139  
  3140  				results[alias] = status
  3141  				tabletMap[alias] = tablet
  3142  			}(ctx, alias, tabletInfo.Tablet)
  3143  		case tabletInfo.IsReplicaType():
  3144  			wg.Add(1)
  3145  
  3146  			go func(ctx context.Context, alias string, tablet *topodatapb.Tablet) {
  3147  				defer wg.Done()
  3148  
  3149  				span, ctx := trace.NewSpan(ctx, "VtctldServer.getReplicationStatus")
  3150  				defer span.Finish()
  3151  
  3152  				span.Annotate("tablet_alias", alias)
  3153  
  3154  				ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3155  				defer cancel()
  3156  
  3157  				status, err := s.tmc.ReplicationStatus(ctx, tablet)
  3158  				if err != nil {
  3159  					switch ctx.Err() {
  3160  					case context.Canceled:
  3161  						log.Warningf("context canceled before obtaining replication position from %s: %s", alias, err)
  3162  					case context.DeadlineExceeded:
  3163  						log.Warningf("context deadline exceeded before obtaining replication position from %s: %s", alias, err)
  3164  					default:
  3165  						// The RPC was not timed out or canceled. We treat this
  3166  						// as a fatal error for the overall request.
  3167  						rec.RecordError(fmt.Errorf("ReplicationStatus(%s) failed: %s", alias, err))
  3168  						return
  3169  					}
  3170  
  3171  					status = nil // Don't record any position for this tablet.
  3172  				}
  3173  
  3174  				m.Lock()
  3175  				defer m.Unlock()
  3176  
  3177  				results[alias] = status
  3178  				tabletMap[alias] = tablet
  3179  			}(ctx, alias, tabletInfo.Tablet)
  3180  		}
  3181  	}
  3182  
  3183  	wg.Wait()
  3184  
  3185  	if rec.HasErrors() {
  3186  		err = rec.Error()
  3187  		return nil, err
  3188  	}
  3189  
  3190  	return &vtctldatapb.ShardReplicationPositionsResponse{
  3191  		ReplicationStatuses: results,
  3192  		TabletMap:           tabletMap,
  3193  	}, nil
  3194  }
  3195  
  3196  // ShardReplicationRemove is part of the vtctlservicepb.VtctldServer interface.
  3197  func (s *VtctldServer) ShardReplicationRemove(ctx context.Context, req *vtctldatapb.ShardReplicationRemoveRequest) (resp *vtctldatapb.ShardReplicationRemoveResponse, err error) {
  3198  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ShardReplicationRemove")
  3199  	defer span.Finish()
  3200  
  3201  	defer panicHandler(&err)
  3202  
  3203  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  3204  	span.Annotate("keyspace", req.Keyspace)
  3205  	span.Annotate("shard", req.Shard)
  3206  
  3207  	if err = topo.RemoveShardReplicationRecord(ctx, s.ts, req.TabletAlias.Cell, req.Keyspace, req.Shard, req.TabletAlias); err != nil {
  3208  		return nil, err
  3209  	}
  3210  
  3211  	return &vtctldatapb.ShardReplicationRemoveResponse{}, nil
  3212  }
  3213  
  3214  // SleepTablet is part of the vtctlservicepb.VtctldServer interface.
  3215  func (s *VtctldServer) SleepTablet(ctx context.Context, req *vtctldatapb.SleepTabletRequest) (resp *vtctldatapb.SleepTabletResponse, err error) {
  3216  	span, ctx := trace.NewSpan(ctx, "VtctldServer.SleepTablet")
  3217  	defer span.Finish()
  3218  
  3219  	defer panicHandler(&err)
  3220  
  3221  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  3222  
  3223  	dur, ok, err := protoutil.DurationFromProto(req.Duration)
  3224  	if err != nil {
  3225  		return nil, err
  3226  	} else if !ok {
  3227  		dur = topo.RemoteOperationTimeout
  3228  	}
  3229  
  3230  	span.Annotate("sleep_duration", dur.String())
  3231  
  3232  	tablet, err := s.ts.GetTablet(ctx, req.TabletAlias)
  3233  	if err != nil {
  3234  		return nil, err
  3235  	}
  3236  
  3237  	err = s.tmc.Sleep(ctx, tablet.Tablet, dur)
  3238  	if err != nil {
  3239  		return nil, err
  3240  	}
  3241  
  3242  	return &vtctldatapb.SleepTabletResponse{}, nil
  3243  }
  3244  
  3245  // SourceShardAdd is part of the vtctlservicepb.VtctldServer interface.
  3246  func (s *VtctldServer) SourceShardAdd(ctx context.Context, req *vtctldatapb.SourceShardAddRequest) (resp *vtctldatapb.SourceShardAddResponse, err error) {
  3247  	span, ctx := trace.NewSpan(ctx, "VtctldServer.SourceShardAdd")
  3248  	defer span.Finish()
  3249  
  3250  	defer panicHandler(&err)
  3251  
  3252  	span.Annotate("keyspace", req.Keyspace)
  3253  	span.Annotate("shard", req.Shard)
  3254  	span.Annotate("uid", req.Uid)
  3255  	span.Annotate("source_keyspace", req.SourceKeyspace)
  3256  	span.Annotate("source_shard", req.SourceShard)
  3257  	span.Annotate("keyrange", key.KeyRangeString(req.KeyRange))
  3258  	span.Annotate("tables", strings.Join(req.Tables, ","))
  3259  
  3260  	var si *topo.ShardInfo
  3261  
  3262  	ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, fmt.Sprintf("SourceShardAdd(%v)", req.Uid))
  3263  	if lockErr != nil {
  3264  		err = lockErr
  3265  		return nil, err
  3266  	}
  3267  	defer unlock(&err)
  3268  
  3269  	si, err = s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error {
  3270  		for _, ss := range si.SourceShards {
  3271  			if ss.Uid == req.Uid {
  3272  				return fmt.Errorf("%w: uid %v is already in use", topo.NewError(topo.NoUpdateNeeded, fmt.Sprintf("%s/%s", req.Keyspace, req.Shard)), req.Uid)
  3273  			}
  3274  		}
  3275  
  3276  		si.SourceShards = append(si.SourceShards, &topodatapb.Shard_SourceShard{
  3277  			Keyspace: req.SourceKeyspace,
  3278  			Shard:    req.SourceShard,
  3279  			Uid:      req.Uid,
  3280  			KeyRange: req.KeyRange,
  3281  			Tables:   req.Tables,
  3282  		})
  3283  		return nil
  3284  	})
  3285  	if err != nil {
  3286  		return nil, err
  3287  	}
  3288  
  3289  	resp = &vtctldatapb.SourceShardAddResponse{}
  3290  	switch si {
  3291  	case nil:
  3292  		// If we return NoUpdateNeeded from ts.UpdateShardFields, then we don't
  3293  		// get a ShardInfo back.
  3294  	default:
  3295  		resp.Shard = si.Shard
  3296  	}
  3297  
  3298  	return resp, err
  3299  }
  3300  
  3301  // SourceShardDelete is part of the vtctlservicepb.VtctldServer interface.
  3302  func (s *VtctldServer) SourceShardDelete(ctx context.Context, req *vtctldatapb.SourceShardDeleteRequest) (resp *vtctldatapb.SourceShardDeleteResponse, err error) {
  3303  	span, ctx := trace.NewSpan(ctx, "VtctldServer.SourceShardDelete")
  3304  	defer span.Finish()
  3305  
  3306  	defer panicHandler(&err)
  3307  
  3308  	span.Annotate("keyspace", req.Keyspace)
  3309  	span.Annotate("shard", req.Shard)
  3310  	span.Annotate("uid", req.Uid)
  3311  
  3312  	var si *topo.ShardInfo
  3313  
  3314  	ctx, unlock, lockErr := s.ts.LockKeyspace(ctx, req.Keyspace, fmt.Sprintf("SourceShardDelete(%v)", req.Uid))
  3315  	if lockErr != nil {
  3316  		err = lockErr
  3317  		return nil, err
  3318  	}
  3319  	defer unlock(&err)
  3320  
  3321  	si, err = s.ts.UpdateShardFields(ctx, req.Keyspace, req.Shard, func(si *topo.ShardInfo) error {
  3322  		var newSourceShards []*topodatapb.Shard_SourceShard
  3323  		for _, ss := range si.SourceShards {
  3324  			if ss.Uid != req.Uid {
  3325  				newSourceShards = append(newSourceShards, ss)
  3326  			}
  3327  		}
  3328  
  3329  		if len(newSourceShards) == len(si.SourceShards) {
  3330  			return fmt.Errorf("%w: no SourceShard with uid %v", topo.NewError(topo.NoUpdateNeeded, fmt.Sprintf("%s/%s", req.Keyspace, req.Shard)), req.Uid)
  3331  		}
  3332  
  3333  		si.SourceShards = newSourceShards
  3334  		return nil
  3335  	})
  3336  	if err != nil {
  3337  		return nil, err
  3338  	}
  3339  
  3340  	resp = &vtctldatapb.SourceShardDeleteResponse{}
  3341  	switch si {
  3342  	case nil:
  3343  		// If we return NoUpdateNeeded from ts.UpdateShardFields, then we don't
  3344  		// get a ShardInfo back.
  3345  	default:
  3346  		resp.Shard = si.Shard
  3347  	}
  3348  
  3349  	return resp, err
  3350  }
  3351  
  3352  // StartReplication is part of the vtctldservicepb.VtctldServer interface.
  3353  func (s *VtctldServer) StartReplication(ctx context.Context, req *vtctldatapb.StartReplicationRequest) (resp *vtctldatapb.StartReplicationResponse, err error) {
  3354  	span, ctx := trace.NewSpan(ctx, "VtctldServer.StartReplication")
  3355  	defer span.Finish()
  3356  
  3357  	defer panicHandler(&err)
  3358  
  3359  	if req.TabletAlias == nil {
  3360  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "StartReplication.TabletAlias is required")
  3361  		return nil, err
  3362  	}
  3363  
  3364  	alias := topoproto.TabletAliasString(req.TabletAlias)
  3365  	span.Annotate("tablet_alias", alias)
  3366  
  3367  	tablet, err := s.ts.GetTablet(ctx, req.TabletAlias)
  3368  	if err != nil {
  3369  		log.Errorf("StartReplication: failed to read tablet record for %v: %v", alias, err)
  3370  		return nil, err
  3371  	}
  3372  
  3373  	shard, err := s.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard)
  3374  	if err != nil {
  3375  		return nil, err
  3376  	}
  3377  
  3378  	if !shard.HasPrimary() {
  3379  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "no primary tablet for shard %v/%v", tablet.Keyspace, tablet.Shard)
  3380  		return nil, err
  3381  	}
  3382  
  3383  	shardPrimary, err := s.ts.GetTablet(ctx, shard.PrimaryAlias)
  3384  	if err != nil {
  3385  		err = fmt.Errorf("cannot lookup primary tablet %v for shard %v/%v: %w", topoproto.TabletAliasString(shard.PrimaryAlias), tablet.Keyspace, tablet.Shard, err)
  3386  		return nil, err
  3387  	}
  3388  
  3389  	if shardPrimary.Type != topodatapb.TabletType_PRIMARY {
  3390  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "TopologyServer has incosistent state for shard primary %v", topoproto.TabletAliasString(shard.PrimaryAlias))
  3391  		return nil, err
  3392  	}
  3393  
  3394  	if shardPrimary.Keyspace != tablet.Keyspace || shardPrimary.Shard != tablet.Shard {
  3395  		err = vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "primary %v and replica %v not in same keypace shard (%v/%v)", topoproto.TabletAliasString(shard.PrimaryAlias), topoproto.TabletAliasString(tablet.Alias), tablet.Keyspace, tablet.Shard)
  3396  		return nil, err
  3397  	}
  3398  
  3399  	durabilityName, err := s.ts.GetKeyspaceDurability(ctx, tablet.Keyspace)
  3400  	if err != nil {
  3401  		return nil, err
  3402  	}
  3403  	log.Infof("Getting a new durability policy for %v", durabilityName)
  3404  	durability, err := reparentutil.GetDurabilityPolicy(durabilityName)
  3405  	if err != nil {
  3406  		return nil, err
  3407  	}
  3408  
  3409  	if err = s.tmc.StartReplication(ctx, tablet.Tablet, reparentutil.IsReplicaSemiSync(durability, shardPrimary.Tablet, tablet.Tablet)); err != nil {
  3410  		log.Errorf("StartReplication: failed to start replication on %v: %v", alias, err)
  3411  		return nil, err
  3412  	}
  3413  
  3414  	return &vtctldatapb.StartReplicationResponse{}, nil
  3415  }
  3416  
  3417  // StopReplication is part of the vtctldservicepb.VtctldServer interface.
  3418  func (s *VtctldServer) StopReplication(ctx context.Context, req *vtctldatapb.StopReplicationRequest) (resp *vtctldatapb.StopReplicationResponse, err error) {
  3419  	span, ctx := trace.NewSpan(ctx, "VtctldServer.StopReplication")
  3420  	defer span.Finish()
  3421  
  3422  	defer panicHandler(&err)
  3423  
  3424  	if req.TabletAlias == nil {
  3425  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "StopReplication.TabletAlias is required")
  3426  		return nil, err
  3427  	}
  3428  
  3429  	alias := topoproto.TabletAliasString(req.TabletAlias)
  3430  	span.Annotate("tablet_alias", alias)
  3431  
  3432  	tablet, err := s.ts.GetTablet(ctx, req.TabletAlias)
  3433  	if err != nil {
  3434  		log.Errorf("StopReplication: failed to read tablet record for %v: %v", alias, err)
  3435  		return nil, err
  3436  	}
  3437  
  3438  	if err := s.tmc.StopReplication(ctx, tablet.Tablet); err != nil {
  3439  		log.Errorf("StopReplication: failed to stop replication on %v: %v", alias, err)
  3440  		return nil, err
  3441  	}
  3442  
  3443  	return &vtctldatapb.StopReplicationResponse{}, nil
  3444  }
  3445  
  3446  // TabletExternallyReparented is part of the vtctldservicepb.VtctldServer interface.
  3447  func (s *VtctldServer) TabletExternallyReparented(ctx context.Context, req *vtctldatapb.TabletExternallyReparentedRequest) (resp *vtctldatapb.TabletExternallyReparentedResponse, err error) {
  3448  	span, ctx := trace.NewSpan(ctx, "VtctldServer.TabletExternallyReparented")
  3449  	defer span.Finish()
  3450  
  3451  	defer panicHandler(&err)
  3452  
  3453  	if req.Tablet == nil {
  3454  		err = vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "TabletExternallyReparentedRequest.Tablet must not be nil")
  3455  		return nil, err
  3456  	}
  3457  
  3458  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.Tablet))
  3459  
  3460  	tablet, err := s.ts.GetTablet(ctx, req.Tablet)
  3461  	if err != nil {
  3462  		log.Warningf("TabletExternallyReparented: failed to read tablet record for %v: %v", topoproto.TabletAliasString(req.Tablet), err)
  3463  		return nil, err
  3464  	}
  3465  
  3466  	shard, err := s.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard)
  3467  	if err != nil {
  3468  		log.Warningf("TabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err)
  3469  		return nil, err
  3470  	}
  3471  
  3472  	resp = &vtctldatapb.TabletExternallyReparentedResponse{
  3473  		Keyspace:   shard.Keyspace(),
  3474  		Shard:      shard.ShardName(),
  3475  		NewPrimary: req.Tablet,
  3476  		OldPrimary: shard.PrimaryAlias,
  3477  	}
  3478  
  3479  	// If the externally reparented (new primary) tablet is already PRIMARY in
  3480  	// the topo, this is a no-op.
  3481  	if tablet.Type == topodatapb.TabletType_PRIMARY {
  3482  		return resp, nil
  3483  	}
  3484  
  3485  	log.Infof("TabletExternallyReparented: executing tablet type change %v -> PRIMARY on %v", tablet.Type, topoproto.TabletAliasString(req.Tablet))
  3486  	ev := &events.Reparent{
  3487  		ShardInfo:  *shard,
  3488  		NewPrimary: proto.Clone(tablet.Tablet).(*topodatapb.Tablet),
  3489  		OldPrimary: &topodatapb.Tablet{
  3490  			Alias: shard.PrimaryAlias,
  3491  			Type:  topodatapb.TabletType_PRIMARY,
  3492  		},
  3493  	}
  3494  
  3495  	defer func() {
  3496  		// Ensure we dispatch an update with any failure.
  3497  		if err != nil {
  3498  			event.DispatchUpdate(ev, "failed: "+err.Error())
  3499  		}
  3500  	}()
  3501  
  3502  	event.DispatchUpdate(ev, "starting external reparent")
  3503  
  3504  	durabilityName, err := s.ts.GetKeyspaceDurability(ctx, tablet.Keyspace)
  3505  	if err != nil {
  3506  		return nil, err
  3507  	}
  3508  	log.Infof("Getting a new durability policy for %v", durabilityName)
  3509  	durability, err := reparentutil.GetDurabilityPolicy(durabilityName)
  3510  	if err != nil {
  3511  		return nil, err
  3512  	}
  3513  
  3514  	if err = s.tmc.ChangeType(ctx, tablet.Tablet, topodatapb.TabletType_PRIMARY, reparentutil.SemiSyncAckers(durability, tablet.Tablet) > 0); err != nil {
  3515  		log.Warningf("ChangeType(%v, PRIMARY): %v", topoproto.TabletAliasString(req.Tablet), err)
  3516  		return nil, err
  3517  	}
  3518  
  3519  	event.DispatchUpdate(ev, "finished")
  3520  
  3521  	return resp, nil
  3522  }
  3523  
  3524  // UpdateCellInfo is part of the vtctlservicepb.VtctldServer interface.
  3525  func (s *VtctldServer) UpdateCellInfo(ctx context.Context, req *vtctldatapb.UpdateCellInfoRequest) (resp *vtctldatapb.UpdateCellInfoResponse, err error) {
  3526  	span, ctx := trace.NewSpan(ctx, "VtctldServer.UpdateCellInfo")
  3527  	defer span.Finish()
  3528  
  3529  	defer panicHandler(&err)
  3530  
  3531  	span.Annotate("cell", req.Name)
  3532  	span.Annotate("cell_server_address", req.CellInfo.ServerAddress)
  3533  	span.Annotate("cell_root", req.CellInfo.Root)
  3534  
  3535  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3536  	defer cancel()
  3537  
  3538  	var updatedCi *topodatapb.CellInfo
  3539  	err = s.ts.UpdateCellInfoFields(ctx, req.Name, func(ci *topodatapb.CellInfo) error {
  3540  		defer func() {
  3541  			updatedCi = proto.Clone(ci).(*topodatapb.CellInfo)
  3542  		}()
  3543  
  3544  		changed := false
  3545  
  3546  		if req.CellInfo.ServerAddress != "" && req.CellInfo.ServerAddress != ci.ServerAddress {
  3547  			changed = true
  3548  			ci.ServerAddress = req.CellInfo.ServerAddress
  3549  		}
  3550  
  3551  		if req.CellInfo.Root != "" && req.CellInfo.Root != ci.Root {
  3552  			changed = true
  3553  			ci.Root = req.CellInfo.Root
  3554  		}
  3555  
  3556  		if !changed {
  3557  			return topo.NewError(topo.NoUpdateNeeded, req.Name)
  3558  		}
  3559  
  3560  		return nil
  3561  	})
  3562  
  3563  	if err != nil {
  3564  		return nil, err
  3565  	}
  3566  
  3567  	return &vtctldatapb.UpdateCellInfoResponse{
  3568  		Name:     req.Name,
  3569  		CellInfo: updatedCi,
  3570  	}, nil
  3571  }
  3572  
  3573  // UpdateCellsAlias is part of the vtctlservicepb.VtctldServer interface.
  3574  func (s *VtctldServer) UpdateCellsAlias(ctx context.Context, req *vtctldatapb.UpdateCellsAliasRequest) (resp *vtctldatapb.UpdateCellsAliasResponse, err error) {
  3575  	span, ctx := trace.NewSpan(ctx, "VtctldServer.UpdateCellsAlias")
  3576  	defer span.Finish()
  3577  
  3578  	defer panicHandler(&err)
  3579  
  3580  	span.Annotate("cells_alias", req.Name)
  3581  	span.Annotate("cells_alias_cells", strings.Join(req.CellsAlias.Cells, ","))
  3582  
  3583  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3584  	defer cancel()
  3585  
  3586  	var updatedCa *topodatapb.CellsAlias
  3587  	err = s.ts.UpdateCellsAlias(ctx, req.Name, func(ca *topodatapb.CellsAlias) error {
  3588  		defer func() {
  3589  			updatedCa = proto.Clone(ca).(*topodatapb.CellsAlias)
  3590  		}()
  3591  
  3592  		ca.Cells = req.CellsAlias.Cells
  3593  		return nil
  3594  	})
  3595  
  3596  	if err != nil {
  3597  		return nil, err
  3598  	}
  3599  
  3600  	return &vtctldatapb.UpdateCellsAliasResponse{
  3601  		Name:       req.Name,
  3602  		CellsAlias: updatedCa,
  3603  	}, nil
  3604  }
  3605  
  3606  // Validate is part of the vtctlservicepb.VtctldServer interface.
  3607  func (s *VtctldServer) Validate(ctx context.Context, req *vtctldatapb.ValidateRequest) (resp *vtctldatapb.ValidateResponse, err error) {
  3608  	span, ctx := trace.NewSpan(ctx, "VtctldServer.Validate")
  3609  	defer span.Finish()
  3610  
  3611  	defer panicHandler(&err)
  3612  
  3613  	span.Annotate("ping_tablets", req.PingTablets)
  3614  
  3615  	resp = &vtctldatapb.ValidateResponse{}
  3616  	getKeyspacesCtx, getKeyspacesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3617  	defer getKeyspacesCancel()
  3618  
  3619  	keyspaces, err := s.ts.GetKeyspaces(getKeyspacesCtx)
  3620  	if err != nil {
  3621  		resp.Results = append(resp.Results, fmt.Sprintf("GetKeyspaces failed: %v", err))
  3622  		return resp, nil
  3623  	}
  3624  
  3625  	var (
  3626  		m  sync.Mutex
  3627  		wg sync.WaitGroup
  3628  	)
  3629  
  3630  	wg.Add(1)
  3631  	go func() {
  3632  		defer wg.Done()
  3633  		validateAllTablets := func(ctx context.Context, keyspaces []string) {
  3634  			span, ctx := trace.NewSpan(ctx, "VtctldServer.validateAllTablets")
  3635  			defer span.Finish()
  3636  
  3637  			cellSet := sets.New[string]()
  3638  			for _, keyspace := range keyspaces {
  3639  				getShardNamesCtx, getShardNamesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3640  				shards, err := s.ts.GetShardNames(getShardNamesCtx, keyspace)
  3641  				getShardNamesCancel() // don't defer in a loop
  3642  
  3643  				if err != nil {
  3644  					m.Lock()
  3645  					resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShardNames(%v) failed: %v", keyspace, err))
  3646  					m.Unlock()
  3647  					continue
  3648  				}
  3649  
  3650  				for _, shard := range shards {
  3651  					findAllTabletAliasesCtx, findAllTabletAliasesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3652  					aliases, err := s.ts.FindAllTabletAliasesInShard(findAllTabletAliasesCtx, keyspace, shard)
  3653  					findAllTabletAliasesCancel() // don't defer in a loop
  3654  
  3655  					if err != nil {
  3656  						m.Lock()
  3657  						resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.FindAllTabletAliasesInShard(%v/%v) failed: %v", keyspace, shard, err))
  3658  						m.Unlock()
  3659  						continue
  3660  					}
  3661  
  3662  					for _, alias := range aliases {
  3663  						cellSet.Insert(alias.Cell)
  3664  					}
  3665  				}
  3666  			}
  3667  
  3668  			for _, cell := range sets.List(cellSet) {
  3669  				getTabletsByCellCtx, getTabletsByCellCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3670  				aliases, err := s.ts.GetTabletAliasesByCell(getTabletsByCellCtx, cell)
  3671  				getTabletsByCellCancel() // don't defer in a loop
  3672  
  3673  				if err != nil {
  3674  					m.Lock()
  3675  					resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetTabletsByCell(%v) failed: %v", cell, err))
  3676  					m.Unlock()
  3677  					continue
  3678  				}
  3679  
  3680  				for _, alias := range aliases {
  3681  					wg.Add(1)
  3682  					go func(alias *topodatapb.TabletAlias) {
  3683  						defer wg.Done()
  3684  
  3685  						span, ctx := trace.NewSpan(ctx, "VtctldServer.validateTablet")
  3686  						defer span.Finish()
  3687  
  3688  						key := topoproto.TabletAliasString(alias)
  3689  						span.Annotate("tablet_alias", key)
  3690  
  3691  						ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3692  						defer cancel()
  3693  
  3694  						if err := topo.Validate(ctx, s.ts, alias); err != nil {
  3695  							m.Lock()
  3696  							defer m.Unlock()
  3697  
  3698  							resp.Results = append(resp.Results, fmt.Sprintf("topo.Validate(%v) failed: %v", key, err))
  3699  							return
  3700  						}
  3701  
  3702  						log.Infof("tablet %v is valid", key)
  3703  					}(alias)
  3704  				}
  3705  			}
  3706  		}
  3707  
  3708  		validateAllTablets(ctx, keyspaces)
  3709  	}()
  3710  
  3711  	resp.ResultsByKeyspace = make(map[string]*vtctldatapb.ValidateKeyspaceResponse, len(keyspaces))
  3712  
  3713  	for _, keyspace := range keyspaces {
  3714  		wg.Add(1)
  3715  		go func(keyspace string) {
  3716  			defer wg.Done()
  3717  			keyspaceResp, err := s.ValidateKeyspace(ctx, &vtctldatapb.ValidateKeyspaceRequest{
  3718  				Keyspace:    keyspace,
  3719  				PingTablets: req.PingTablets,
  3720  			})
  3721  
  3722  			m.Lock()
  3723  			defer m.Unlock()
  3724  
  3725  			if err != nil {
  3726  				resp.ResultsByKeyspace[keyspace] = &vtctldatapb.ValidateKeyspaceResponse{
  3727  					Results: []string{fmt.Sprintf("failed to validate: %v", err)},
  3728  				}
  3729  				return
  3730  			}
  3731  
  3732  			resp.ResultsByKeyspace[keyspace] = keyspaceResp
  3733  		}(keyspace)
  3734  	}
  3735  
  3736  	wg.Wait()
  3737  	return resp, err
  3738  }
  3739  
  3740  // ValidateKeyspace is part of the vtctlservicepb.VtctldServer interface.
  3741  func (s *VtctldServer) ValidateKeyspace(ctx context.Context, req *vtctldatapb.ValidateKeyspaceRequest) (resp *vtctldatapb.ValidateKeyspaceResponse, err error) {
  3742  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateKeyspace")
  3743  	defer span.Finish()
  3744  
  3745  	defer panicHandler(&err)
  3746  
  3747  	span.Annotate("keyspace", req.Keyspace)
  3748  	span.Annotate("ping_tablets", req.PingTablets)
  3749  
  3750  	resp = &vtctldatapb.ValidateKeyspaceResponse{}
  3751  	getShardNamesCtx, getShardNamesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3752  	defer getShardNamesCancel()
  3753  
  3754  	shards, err := s.ts.GetShardNames(getShardNamesCtx, req.Keyspace)
  3755  	if err != nil {
  3756  		resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShardNames(%v) failed: %v", req.Keyspace, err))
  3757  		err = nil
  3758  		return resp, err
  3759  	}
  3760  
  3761  	resp.ResultsByShard = make(map[string]*vtctldatapb.ValidateShardResponse, len(shards))
  3762  
  3763  	var (
  3764  		m  sync.Mutex
  3765  		wg sync.WaitGroup
  3766  	)
  3767  	for _, shard := range shards {
  3768  		wg.Add(1)
  3769  		go func(shard string) {
  3770  			defer wg.Done()
  3771  			shardResp, err := s.ValidateShard(ctx, &vtctldatapb.ValidateShardRequest{
  3772  				Keyspace:    req.Keyspace,
  3773  				Shard:       shard,
  3774  				PingTablets: req.PingTablets,
  3775  			})
  3776  
  3777  			m.Lock()
  3778  			defer m.Unlock()
  3779  
  3780  			if err != nil {
  3781  				resp.Results = append(resp.Results, fmt.Sprintf("error validating shard %v/%v: %v", req.Keyspace, shard, err))
  3782  				return
  3783  			}
  3784  
  3785  			resp.ResultsByShard[shard] = shardResp
  3786  		}(shard)
  3787  	}
  3788  
  3789  	wg.Wait()
  3790  	return resp, err
  3791  }
  3792  
  3793  // ValidateSchemaKeyspace is a part of the vtctlservicepb.VtctldServer interface.
  3794  // It will diff the schema from all the tablets in the keyspace.
  3795  func (s *VtctldServer) ValidateSchemaKeyspace(ctx context.Context, req *vtctldatapb.ValidateSchemaKeyspaceRequest) (resp *vtctldatapb.ValidateSchemaKeyspaceResponse, err error) {
  3796  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateSchemaKeyspace")
  3797  	defer span.Finish()
  3798  
  3799  	defer panicHandler(&err)
  3800  
  3801  	span.Annotate("keyspace", req.Keyspace)
  3802  	keyspace := req.Keyspace
  3803  
  3804  	resp = &vtctldatapb.ValidateSchemaKeyspaceResponse{
  3805  		Results: []string{},
  3806  	}
  3807  
  3808  	shards, err := s.ts.GetShardNames(ctx, keyspace)
  3809  	if err != nil {
  3810  		resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShardNames(%v) failed: %v", req.Keyspace, err))
  3811  		err = nil
  3812  		return resp, err
  3813  	}
  3814  
  3815  	resp.ResultsByShard = make(map[string]*vtctldatapb.ValidateShardResponse, len(shards))
  3816  
  3817  	// Initiate individual shard results first
  3818  	for _, shard := range shards {
  3819  		resp.ResultsByShard[shard] = &vtctldatapb.ValidateShardResponse{
  3820  			Results: []string{},
  3821  		}
  3822  	}
  3823  
  3824  	if req.IncludeVschema {
  3825  		results, err2 := s.ValidateVSchema(ctx, &vtctldatapb.ValidateVSchemaRequest{
  3826  			Keyspace:      keyspace,
  3827  			Shards:        shards,
  3828  			ExcludeTables: req.ExcludeTables,
  3829  			IncludeViews:  req.IncludeViews,
  3830  		})
  3831  		if err2 != nil {
  3832  			err = err2
  3833  			return nil, err
  3834  		}
  3835  
  3836  		if len(results.Results) > 0 {
  3837  			resp.Results = append(resp.Results, results.Results...)
  3838  			for shard, shardResults := range resp.ResultsByShard {
  3839  				resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, shardResults.Results...)
  3840  			}
  3841  			return resp, err
  3842  		}
  3843  	}
  3844  
  3845  	sort.Strings(shards)
  3846  
  3847  	var (
  3848  		referenceSchema *tabletmanagerdatapb.SchemaDefinition
  3849  		referenceAlias  *topodatapb.TabletAlias
  3850  		m               sync.Mutex
  3851  		wg              sync.WaitGroup
  3852  	)
  3853  
  3854  	r := &tabletmanagerdatapb.GetSchemaRequest{ExcludeTables: req.ExcludeTables, IncludeViews: req.IncludeViews}
  3855  	for _, shard := range shards[0:] {
  3856  		wg.Add(1)
  3857  		go func(shard string) {
  3858  			defer wg.Done()
  3859  
  3860  			si, err := s.ts.GetShard(ctx, keyspace, shard)
  3861  
  3862  			m.Lock()
  3863  			defer m.Unlock()
  3864  
  3865  			if err != nil {
  3866  				errMessage := fmt.Sprintf("GetShard(%v, %v) failed: %v", keyspace, shard, err)
  3867  				resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, errMessage)
  3868  				resp.Results = append(resp.Results, errMessage)
  3869  				return
  3870  			}
  3871  
  3872  			if !si.HasPrimary() {
  3873  				if !req.SkipNoPrimary {
  3874  					errMessage := fmt.Sprintf("no primary in shard %v/%v", keyspace, shard)
  3875  					resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, errMessage)
  3876  					resp.Results = append(resp.Results, errMessage)
  3877  				}
  3878  				return
  3879  			}
  3880  
  3881  			if referenceSchema == nil {
  3882  				referenceAlias = si.PrimaryAlias
  3883  				referenceSchema, err = schematools.GetSchema(ctx, s.ts, s.tmc, referenceAlias, r)
  3884  				if err != nil {
  3885  					return
  3886  				}
  3887  			}
  3888  
  3889  			aliases, err := s.ts.FindAllTabletAliasesInShard(ctx, keyspace, shard)
  3890  			if err != nil {
  3891  				errMessage := fmt.Sprintf("FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err)
  3892  				resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, errMessage)
  3893  				resp.Results = append(resp.Results, errMessage)
  3894  				return
  3895  			}
  3896  
  3897  			aliasWg := sync.WaitGroup{}
  3898  			aliasErrs := concurrency.AllErrorRecorder{}
  3899  
  3900  			for _, alias := range aliases {
  3901  				if referenceAlias == alias {
  3902  					continue
  3903  				}
  3904  				aliasWg.Add(1)
  3905  				go func(alias *topodatapb.TabletAlias) {
  3906  					defer aliasWg.Done()
  3907  					replicaSchema, err := schematools.GetSchema(ctx, s.ts, s.tmc, alias, r)
  3908  					if err != nil {
  3909  						aliasErrs.RecordError(fmt.Errorf("GetSchema(%v, nil, %v, %v) failed: %v", alias, req.ExcludeTables, req.IncludeViews, err))
  3910  						return
  3911  					}
  3912  
  3913  					tmutils.DiffSchema(topoproto.TabletAliasString(referenceAlias), referenceSchema, topoproto.TabletAliasString(alias), replicaSchema, &aliasErrs)
  3914  				}(alias)
  3915  			}
  3916  			aliasWg.Wait()
  3917  
  3918  			if aliasErrs.HasErrors() {
  3919  				for _, err := range aliasErrs.Errors {
  3920  					errMessage := err.Error()
  3921  					resp.ResultsByShard[shard].Results = append(resp.ResultsByShard[shard].Results, errMessage)
  3922  					resp.Results = append(resp.Results, errMessage)
  3923  				}
  3924  			}
  3925  		}(shard)
  3926  	}
  3927  
  3928  	wg.Wait()
  3929  
  3930  	return resp, err
  3931  }
  3932  
  3933  // ValidateShard is part of the vtctlservicepb.VtctldServer interface.
  3934  func (s *VtctldServer) ValidateShard(ctx context.Context, req *vtctldatapb.ValidateShardRequest) (resp *vtctldatapb.ValidateShardResponse, err error) {
  3935  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateShard")
  3936  	defer span.Finish()
  3937  
  3938  	defer panicHandler(&err)
  3939  
  3940  	span.Annotate("keyspace", req.Keyspace)
  3941  	span.Annotate("shard", req.Shard)
  3942  	span.Annotate("ping_tablets", req.PingTablets)
  3943  
  3944  	resp = &vtctldatapb.ValidateShardResponse{}
  3945  	getShardCtx, getShardCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3946  	defer getShardCancel()
  3947  
  3948  	si, err := s.ts.GetShard(getShardCtx, req.Keyspace, req.Shard)
  3949  	if err != nil {
  3950  		resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShard(%v, %v) failed: %v", req.Keyspace, req.Shard, err))
  3951  		err = nil
  3952  		return resp, err
  3953  	}
  3954  
  3955  	findAllTabletAliasesCtx, findAllTabletAliasesCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3956  	defer findAllTabletAliasesCancel()
  3957  
  3958  	aliases, err := s.ts.FindAllTabletAliasesInShard(findAllTabletAliasesCtx, req.Keyspace, req.Shard)
  3959  	if err != nil {
  3960  		resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", req.Keyspace, req.Shard, err))
  3961  		err = nil
  3962  		return resp, err
  3963  	}
  3964  
  3965  	getTabletMapCtx, getTabletMapCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  3966  	defer getTabletMapCancel()
  3967  	tabletMap, _ := s.ts.GetTabletMap(getTabletMapCtx, aliases)
  3968  
  3969  	var primaryAlias *topodatapb.TabletAlias
  3970  	for _, alias := range aliases {
  3971  		key := topoproto.TabletAliasString(alias)
  3972  		ti, ok := tabletMap[key]
  3973  		if !ok {
  3974  			resp.Results = append(resp.Results, fmt.Sprintf("tablet %v not found in map", key))
  3975  			continue
  3976  		}
  3977  
  3978  		if ti.Type == topodatapb.TabletType_PRIMARY {
  3979  			switch primaryAlias {
  3980  			case nil:
  3981  				primaryAlias = alias
  3982  			default:
  3983  				resp.Results = append(resp.Results, fmt.Sprintf("shard %v/%v already has primary %v but found other primary %v", req.Keyspace, req.Shard, topoproto.TabletAliasString(primaryAlias), key))
  3984  			}
  3985  		}
  3986  	}
  3987  
  3988  	if primaryAlias == nil {
  3989  		resp.Results = append(resp.Results, fmt.Sprintf("no primary for shard %v/%v", req.Keyspace, req.Shard))
  3990  	} else if !topoproto.TabletAliasEqual(si.PrimaryAlias, primaryAlias) {
  3991  		resp.Results = append(resp.Results, fmt.Sprintf("primary mismatch for shard %v/%v: found %v, expected %v", si.Keyspace(), si.ShardName(), topoproto.TabletAliasString(primaryAlias), topoproto.TabletAliasString(si.PrimaryAlias)))
  3992  	}
  3993  
  3994  	var (
  3995  		wg      sync.WaitGroup
  3996  		results = make(chan string, len(aliases))
  3997  	)
  3998  
  3999  	for _, alias := range aliases {
  4000  		wg.Add(1)
  4001  		go func(alias *topodatapb.TabletAlias) {
  4002  			defer wg.Done()
  4003  
  4004  			ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  4005  			defer cancel()
  4006  
  4007  			if err := topo.Validate(ctx, s.ts, alias); err != nil {
  4008  				results <- fmt.Sprintf("topo.Validate(%v) failed: %v", topoproto.TabletAliasString(alias), err)
  4009  				return
  4010  			}
  4011  
  4012  			log.Infof("tablet %v is valid", topoproto.TabletAliasString(alias))
  4013  		}(alias)
  4014  	}
  4015  
  4016  	if req.PingTablets {
  4017  		validateReplication := func(ctx context.Context, si *topo.ShardInfo, tabletMap map[string]*topo.TabletInfo, results chan<- string) {
  4018  			if si.PrimaryAlias == nil {
  4019  				results <- fmt.Sprintf("no primary in shard record %v/%v", si.Keyspace(), si.ShardName())
  4020  				return
  4021  			}
  4022  
  4023  			shardPrimaryAliasStr := topoproto.TabletAliasString(si.PrimaryAlias)
  4024  			primaryTabletInfo, ok := tabletMap[shardPrimaryAliasStr]
  4025  			if !ok {
  4026  				results <- fmt.Sprintf("primary %v not in tablet map", shardPrimaryAliasStr)
  4027  				return
  4028  			}
  4029  
  4030  			ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  4031  			defer cancel()
  4032  
  4033  			replicaList, err := s.tmc.GetReplicas(ctx, primaryTabletInfo.Tablet)
  4034  			if err != nil {
  4035  				results <- fmt.Sprintf("GetReplicas(%v) failed: %v", primaryTabletInfo, err)
  4036  				return
  4037  			}
  4038  
  4039  			if len(replicaList) == 0 {
  4040  				results <- fmt.Sprintf("no replicas of tablet %v found", shardPrimaryAliasStr)
  4041  				return
  4042  			}
  4043  
  4044  			tabletIPMap := make(map[string]*topodatapb.Tablet)
  4045  			replicaIPMap := make(map[string]bool)
  4046  			for _, tablet := range tabletMap {
  4047  				ip, err := topoproto.MySQLIP(tablet.Tablet)
  4048  				if err != nil {
  4049  					results <- fmt.Sprintf("could not resolve IP for tablet %s: %v", tablet.Tablet.MysqlHostname, err)
  4050  					continue
  4051  				}
  4052  
  4053  				tabletIPMap[netutil.NormalizeIP(ip)] = tablet.Tablet
  4054  			}
  4055  
  4056  			// See if every replica is in the replication graph.
  4057  			for _, replicaAddr := range replicaList {
  4058  				if tabletIPMap[netutil.NormalizeIP(replicaAddr)] == nil {
  4059  					results <- fmt.Sprintf("replica %v not in replication graph for shard %v/%v (mysql instance without vttablet?)", replicaAddr, si.Keyspace(), si.ShardName())
  4060  				}
  4061  
  4062  				replicaIPMap[netutil.NormalizeIP(replicaAddr)] = true
  4063  			}
  4064  
  4065  			// See if every entry in the replication graph is connected to the primary.
  4066  			for _, tablet := range tabletMap {
  4067  				if !tablet.IsReplicaType() {
  4068  					continue
  4069  				}
  4070  
  4071  				ip, err := topoproto.MySQLIP(tablet.Tablet)
  4072  				if err != nil {
  4073  					results <- fmt.Sprintf("could not resolve IP for tablet %s: %v", tablet.Tablet.MysqlHostname, err)
  4074  					continue
  4075  				}
  4076  
  4077  				if !replicaIPMap[netutil.NormalizeIP(ip)] {
  4078  					results <- fmt.Sprintf("replica %v not replicating: %v replica list: %q", topoproto.TabletAliasString(tablet.Alias), ip, replicaList)
  4079  				}
  4080  			}
  4081  		}
  4082  		pingTablets := func(ctx context.Context, tabletMap map[string]*topo.TabletInfo, results chan<- string) {
  4083  			for alias, ti := range tabletMap {
  4084  				wg.Add(1)
  4085  				go func(alias string, ti *topo.TabletInfo) {
  4086  					defer wg.Done()
  4087  
  4088  					ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
  4089  					defer cancel()
  4090  
  4091  					if err := s.tmc.Ping(ctx, ti.Tablet); err != nil {
  4092  						results <- fmt.Sprintf("Ping(%v) failed: %v tablet hostname: %v", alias, err, ti.Hostname)
  4093  					}
  4094  				}(alias, ti)
  4095  			}
  4096  		}
  4097  
  4098  		validateReplication(ctx, si, tabletMap, results) // done synchronously
  4099  		pingTablets(ctx, tabletMap, results)             // done async, using the waitgroup declared above in the main method body.
  4100  	}
  4101  
  4102  	done := make(chan bool)
  4103  	go func() {
  4104  		for result := range results {
  4105  			resp.Results = append(resp.Results, result)
  4106  		}
  4107  		done <- true
  4108  	}()
  4109  
  4110  	wg.Wait()
  4111  	close(results)
  4112  	<-done
  4113  
  4114  	return resp, err
  4115  }
  4116  
  4117  // ValidateVersionKeyspace validates all versions are the same in all
  4118  // tablets in a keyspace
  4119  func (s *VtctldServer) ValidateVersionKeyspace(ctx context.Context, req *vtctldatapb.ValidateVersionKeyspaceRequest) (resp *vtctldatapb.ValidateVersionKeyspaceResponse, err error) {
  4120  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateVersionKeyspace")
  4121  	defer span.Finish()
  4122  
  4123  	defer panicHandler(&err)
  4124  
  4125  	keyspace := req.Keyspace
  4126  	shards, err := s.ts.GetShardNames(ctx, keyspace)
  4127  	resp = &vtctldatapb.ValidateVersionKeyspaceResponse{
  4128  		Results:        []string{},
  4129  		ResultsByShard: make(map[string]*vtctldatapb.ValidateShardResponse, len(shards)),
  4130  	}
  4131  
  4132  	if err != nil {
  4133  		resp.Results = append(resp.Results, fmt.Sprintf("TopologyServer.GetShardNames(%v) failed: %v", keyspace, err))
  4134  		err = nil
  4135  		return
  4136  	}
  4137  
  4138  	if len(shards) == 0 {
  4139  		resp.Results = append(resp.Results, fmt.Sprintf("no shards in keyspace %v", keyspace))
  4140  		return
  4141  	}
  4142  
  4143  	si, err := s.ts.GetShard(ctx, keyspace, shards[0])
  4144  	if err != nil {
  4145  		resp.Results = append(resp.Results, fmt.Sprintf("unable to find primary shard %v/%v", keyspace, shards[0]))
  4146  		err = nil
  4147  		return
  4148  	}
  4149  	if !si.HasPrimary() {
  4150  		resp.Results = append(resp.Results, fmt.Sprintf("no primary in shard %v/%v", keyspace, shards[0]))
  4151  		return
  4152  	}
  4153  
  4154  	referenceAlias := si.PrimaryAlias
  4155  	referenceVersion, err := s.GetVersion(ctx, &vtctldatapb.GetVersionRequest{TabletAlias: referenceAlias})
  4156  	if err != nil {
  4157  		resp.Results = append(resp.Results, fmt.Sprintf("unable to get reference version of first shard's primary tablet: %v", err))
  4158  		err = nil
  4159  		return
  4160  	}
  4161  
  4162  	var validateVersionKeyspaceResponseMutex sync.Mutex
  4163  
  4164  	for _, shard := range shards {
  4165  		shardResp := vtctldatapb.ValidateShardResponse{
  4166  			Results: []string{},
  4167  		}
  4168  
  4169  		var (
  4170  			validateShardResponseMutex sync.Mutex
  4171  			tabletWaitGroup            sync.WaitGroup
  4172  		)
  4173  
  4174  		aliases, err := s.ts.FindAllTabletAliasesInShard(ctx, keyspace, shard)
  4175  		if err != nil {
  4176  			errMessage := fmt.Sprintf("unable to find tablet aliases in shard %v: %v", shard, err)
  4177  			shardResp.Results = append(shardResp.Results, errMessage)
  4178  			validateVersionKeyspaceResponseMutex.Lock()
  4179  			resp.Results = append(resp.Results, errMessage)
  4180  			resp.ResultsByShard[shard] = &shardResp
  4181  			validateVersionKeyspaceResponseMutex.Unlock()
  4182  			continue
  4183  		}
  4184  
  4185  		for _, alias := range aliases {
  4186  			if topoproto.TabletAliasEqual(alias, si.PrimaryAlias) {
  4187  				continue
  4188  			}
  4189  
  4190  			tabletWaitGroup.Add(1)
  4191  			go func(alias *topodatapb.TabletAlias, m *sync.Mutex, ctx context.Context) {
  4192  				defer tabletWaitGroup.Done()
  4193  				replicaVersion, err := s.GetVersion(ctx, &vtctldatapb.GetVersionRequest{TabletAlias: alias})
  4194  				if err != nil {
  4195  					validateShardResponseMutex.Lock()
  4196  					shardResp.Results = append(shardResp.Results, fmt.Sprintf("unable to get version for tablet %v: %v", alias, err))
  4197  					validateShardResponseMutex.Unlock()
  4198  					return
  4199  				}
  4200  
  4201  				if referenceVersion.Version != replicaVersion.Version {
  4202  					validateShardResponseMutex.Lock()
  4203  					shardResp.Results = append(shardResp.Results, fmt.Sprintf("primary %v version %v is different than replica %v version %v", topoproto.TabletAliasString(referenceAlias), referenceVersion, topoproto.TabletAliasString(alias), replicaVersion))
  4204  					validateShardResponseMutex.Unlock()
  4205  				}
  4206  			}(alias, &validateShardResponseMutex, ctx)
  4207  		}
  4208  
  4209  		tabletWaitGroup.Wait()
  4210  		validateVersionKeyspaceResponseMutex.Lock()
  4211  		resp.Results = append(resp.Results, shardResp.Results...)
  4212  		resp.ResultsByShard[shard] = &shardResp
  4213  		validateVersionKeyspaceResponseMutex.Unlock()
  4214  	}
  4215  
  4216  	return resp, err
  4217  }
  4218  
  4219  // ValidateVersionShard validates all versions are the same in all
  4220  // tablets in a shard
  4221  func (s *VtctldServer) ValidateVersionShard(ctx context.Context, req *vtctldatapb.ValidateVersionShardRequest) (resp *vtctldatapb.ValidateVersionShardResponse, err error) {
  4222  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateVersionShard")
  4223  	defer span.Finish()
  4224  
  4225  	defer panicHandler(&err)
  4226  
  4227  	shard, err := s.ts.GetShard(ctx, req.Keyspace, req.Shard)
  4228  	if err != nil {
  4229  		err = fmt.Errorf("GetShard(%s) failed: %v", req.Shard, err)
  4230  		return nil, err
  4231  	}
  4232  
  4233  	if !shard.HasPrimary() {
  4234  		err = fmt.Errorf("no primary in shard %v/%v", req.Keyspace, req.Shard)
  4235  		return nil, err
  4236  	}
  4237  
  4238  	log.Infof("Gathering version for primary %v", topoproto.TabletAliasString(shard.PrimaryAlias))
  4239  	primaryVersion, err := s.GetVersion(ctx, &vtctldatapb.GetVersionRequest{
  4240  		TabletAlias: shard.PrimaryAlias,
  4241  	})
  4242  	if err != nil {
  4243  		err = fmt.Errorf("GetVersion(%s) failed: %v", topoproto.TabletAliasString(shard.PrimaryAlias), err)
  4244  		return nil, err
  4245  	}
  4246  
  4247  	aliases, err := s.ts.FindAllTabletAliasesInShard(ctx, req.Keyspace, req.Shard)
  4248  	if err != nil {
  4249  		err = fmt.Errorf("FindAllTabletAliasesInShard(%s, %s) failed: %v", req.Keyspace, req.Shard, err)
  4250  		return nil, err
  4251  	}
  4252  
  4253  	er := concurrency.AllErrorRecorder{}
  4254  	wg := sync.WaitGroup{}
  4255  	for _, alias := range aliases {
  4256  		if topoproto.TabletAliasEqual(alias, shard.PrimaryAlias) {
  4257  			continue
  4258  		}
  4259  
  4260  		wg.Add(1)
  4261  		go s.diffVersion(ctx, primaryVersion.Version, shard.PrimaryAlias, alias, &wg, &er)
  4262  	}
  4263  
  4264  	wg.Wait()
  4265  
  4266  	response := vtctldatapb.ValidateVersionShardResponse{}
  4267  	if er.HasErrors() {
  4268  		response.Results = append(response.Results, er.ErrorStrings()...)
  4269  	}
  4270  
  4271  	return &response, nil
  4272  }
  4273  
  4274  // ValidateVSchema compares the schema of each primary tablet in "keyspace/shards..." to the vschema and errs if there are differences
  4275  func (s *VtctldServer) ValidateVSchema(ctx context.Context, req *vtctldatapb.ValidateVSchemaRequest) (resp *vtctldatapb.ValidateVSchemaResponse, err error) {
  4276  	span, ctx := trace.NewSpan(ctx, "VtctldServer.ValidateVSchema")
  4277  	defer span.Finish()
  4278  
  4279  	defer panicHandler(&err)
  4280  	keyspace := req.Keyspace
  4281  	shards := req.Shards
  4282  	excludeTables := req.ExcludeTables
  4283  	includeViews := req.IncludeViews
  4284  
  4285  	vschm, err := s.ts.GetVSchema(ctx, keyspace)
  4286  	if err != nil {
  4287  		err = fmt.Errorf("GetVSchema(%s) failed: %v", keyspace, err)
  4288  		return nil, err
  4289  	}
  4290  
  4291  	resp = &vtctldatapb.ValidateVSchemaResponse{
  4292  		Results:        []string{},
  4293  		ResultsByShard: make(map[string]*vtctldatapb.ValidateShardResponse, len(shards)),
  4294  	}
  4295  
  4296  	var (
  4297  		wg sync.WaitGroup
  4298  		m  sync.Mutex
  4299  	)
  4300  
  4301  	wg.Add(len(shards))
  4302  
  4303  	for _, shard := range shards {
  4304  		go func(shard string) {
  4305  			defer wg.Done()
  4306  
  4307  			shardResult := vtctldatapb.ValidateShardResponse{
  4308  				Results: []string{},
  4309  			}
  4310  
  4311  			notFoundTables := []string{}
  4312  			si, err := s.ts.GetShard(ctx, keyspace, shard)
  4313  			if err != nil {
  4314  				errorMessage := fmt.Sprintf("GetShard(%v, %v) failed: %v", keyspace, shard, err)
  4315  				shardResult.Results = append(shardResult.Results, errorMessage)
  4316  				m.Lock()
  4317  				resp.Results = append(resp.Results, errorMessage)
  4318  				resp.ResultsByShard[shard] = &shardResult
  4319  				m.Unlock()
  4320  				return
  4321  			}
  4322  			r := &tabletmanagerdatapb.GetSchemaRequest{ExcludeTables: req.ExcludeTables, IncludeViews: req.IncludeViews}
  4323  			primarySchema, err := schematools.GetSchema(ctx, s.ts, s.tmc, si.PrimaryAlias, r)
  4324  			if err != nil {
  4325  				errorMessage := fmt.Sprintf("GetSchema(%s, nil, %v, %v) (%v/%v) failed: %v", si.PrimaryAlias.String(),
  4326  					excludeTables, includeViews, keyspace, shard, err,
  4327  				)
  4328  				shardResult.Results = append(shardResult.Results, errorMessage)
  4329  				m.Lock()
  4330  				resp.Results = append(resp.Results, errorMessage)
  4331  				resp.ResultsByShard[shard] = &shardResult
  4332  				m.Unlock()
  4333  				return
  4334  			}
  4335  			for _, tableDef := range primarySchema.TableDefinitions {
  4336  				if _, ok := vschm.Tables[tableDef.Name]; !ok {
  4337  					if !schema.IsInternalOperationTableName(tableDef.Name) {
  4338  						notFoundTables = append(notFoundTables, tableDef.Name)
  4339  					}
  4340  				}
  4341  			}
  4342  			if len(notFoundTables) > 0 {
  4343  				errorMessage := fmt.Sprintf("%v/%v has tables that are not in the vschema: %v", keyspace, shard, notFoundTables)
  4344  				shardResult.Results = append(shardResult.Results, errorMessage)
  4345  				m.Lock()
  4346  				resp.Results = append(resp.Results, errorMessage)
  4347  				resp.ResultsByShard[shard] = &shardResult
  4348  				m.Unlock()
  4349  			}
  4350  			m.Lock()
  4351  			resp.ResultsByShard[shard] = &shardResult
  4352  			m.Unlock()
  4353  		}(shard)
  4354  	}
  4355  	wg.Wait()
  4356  	return resp, err
  4357  }
  4358  
  4359  // StartServer registers a VtctldServer for RPCs on the given gRPC server.
  4360  func StartServer(s *grpc.Server, ts *topo.Server) {
  4361  	vtctlservicepb.RegisterVtctldServer(s, NewVtctldServer(ts))
  4362  }
  4363  
  4364  // getTopologyCell is a helper method that returns a topology cell given its path.
  4365  func (s *VtctldServer) getTopologyCell(ctx context.Context, cellPath string) (*vtctldatapb.TopologyCell, error) {
  4366  	// extract cell and relative path
  4367  	parts := strings.Split(cellPath, "/")
  4368  	if parts[0] != "" || len(parts) < 2 {
  4369  		err := vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "invalid path: %s", cellPath)
  4370  		return nil, err
  4371  	}
  4372  	cell := parts[1]
  4373  	relativePath := cellPath[len(cell)+1:]
  4374  	topoCell := vtctldatapb.TopologyCell{Name: parts[len(parts)-1], Path: cellPath}
  4375  
  4376  	conn, err := s.ts.ConnForCell(ctx, cell)
  4377  	if err != nil {
  4378  		err := vterrors.Errorf(vtrpc.Code_UNAVAILABLE, "error fetching connection to cell %s: %v", cell, err)
  4379  		return nil, err
  4380  	}
  4381  
  4382  	data, _, dataErr := conn.Get(ctx, relativePath)
  4383  
  4384  	if dataErr == nil {
  4385  		result, err := topo.DecodeContent(relativePath, data, false)
  4386  		if err != nil {
  4387  			err := vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "error decoding file content for cell %s: %v", cellPath, err)
  4388  			return nil, err
  4389  		}
  4390  		topoCell.Data = result
  4391  		// since there is data at this cell, it cannot be a directory cell
  4392  		// so we can early return the topocell
  4393  		return &topoCell, nil
  4394  	}
  4395  
  4396  	children, childrenErr := conn.ListDir(ctx, relativePath, false /*full*/)
  4397  
  4398  	if childrenErr != nil && dataErr != nil {
  4399  		err := vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "cell %s with path %s has no file contents and no children: %v", cell, cellPath, err)
  4400  		return nil, err
  4401  	}
  4402  
  4403  	topoCell.Children = make([]string, len(children))
  4404  
  4405  	for i, c := range children {
  4406  		topoCell.Children[i] = c.Name
  4407  	}
  4408  
  4409  	return &topoCell, nil
  4410  }
  4411  
  4412  // Helper function to get version of a tablet from its debug vars
  4413  var getVersionFromTabletDebugVars = func(tabletAddr string) (string, error) {
  4414  	resp, err := http.Get("http://" + tabletAddr + "/debug/vars")
  4415  	if err != nil {
  4416  		return "", err
  4417  	}
  4418  	defer resp.Body.Close()
  4419  	body, err := io.ReadAll(resp.Body)
  4420  	if err != nil {
  4421  		return "", err
  4422  	}
  4423  
  4424  	var vars struct {
  4425  		BuildHost      string
  4426  		BuildUser      string
  4427  		BuildTimestamp int64
  4428  		BuildGitRev    string
  4429  	}
  4430  	err = json.Unmarshal(body, &vars)
  4431  	if err != nil {
  4432  		return "", err
  4433  	}
  4434  
  4435  	version := fmt.Sprintf("%v", vars)
  4436  	return version, nil
  4437  }
  4438  
  4439  var versionFuncMu sync.Mutex
  4440  var getVersionFromTablet = getVersionFromTabletDebugVars
  4441  
  4442  func SetVersionFunc(versionFunc func(string) (string, error)) {
  4443  	versionFuncMu.Lock()
  4444  	defer versionFuncMu.Unlock()
  4445  	getVersionFromTablet = versionFunc
  4446  }
  4447  
  4448  func GetVersionFunc() func(string) (string, error) {
  4449  	versionFuncMu.Lock()
  4450  	defer versionFuncMu.Unlock()
  4451  	return getVersionFromTablet
  4452  }
  4453  
  4454  // helper method to asynchronously get and diff a version
  4455  func (s *VtctldServer) diffVersion(ctx context.Context, primaryVersion string, primaryAlias *topodatapb.TabletAlias, alias *topodatapb.TabletAlias, wg *sync.WaitGroup, er concurrency.ErrorRecorder) {
  4456  	defer wg.Done()
  4457  	log.Infof("Gathering version for %v", topoproto.TabletAliasString(alias))
  4458  	replicaVersion, err := s.GetVersion(ctx, &vtctldatapb.GetVersionRequest{
  4459  		TabletAlias: alias,
  4460  	})
  4461  	if err != nil {
  4462  		er.RecordError(fmt.Errorf("unable to get version for tablet %v: %v", alias, err))
  4463  		return
  4464  	}
  4465  
  4466  	if primaryVersion != replicaVersion.Version {
  4467  		er.RecordError(fmt.Errorf("primary %v version %v is different than replica %v version %v", topoproto.TabletAliasString(primaryAlias), primaryVersion, topoproto.TabletAliasString(alias), replicaVersion))
  4468  	}
  4469  }