vitess.io/vitess@v0.16.2/go/vt/vtadmin/cluster/cluster.go

vitess.io/vitess@v0.16.2/go/vt/vtadmin/cluster/cluster.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cluster
    18  
    19  import (
    20  	"context"
    21  	"database/sql"
    22  	"encoding/json"
    23  	stderrors "errors"
    24  	"fmt"
    25  	"io"
    26  	"math/rand"
    27  	"sort"
    28  	"strings"
    29  	"sync"
    30  	"text/template"
    31  	"time"
    32  
    33  	"google.golang.org/protobuf/proto"
    34  	"k8s.io/apimachinery/pkg/util/sets"
    35  
    36  	"vitess.io/vitess/go/pools"
    37  	"vitess.io/vitess/go/protoutil"
    38  	"vitess.io/vitess/go/textutil"
    39  	"vitess.io/vitess/go/trace"
    40  	"vitess.io/vitess/go/vt/concurrency"
    41  	"vitess.io/vitess/go/vt/log"
    42  	"vitess.io/vitess/go/vt/logutil"
    43  	"vitess.io/vitess/go/vt/topo/topoproto"
    44  	"vitess.io/vitess/go/vt/vtadmin/cache"
    45  	"vitess.io/vitess/go/vt/vtadmin/cluster/discovery"
    46  	"vitess.io/vitess/go/vt/vtadmin/cluster/internal/caches/schemacache"
    47  	"vitess.io/vitess/go/vt/vtadmin/debug"
    48  	"vitess.io/vitess/go/vt/vtadmin/errors"
    49  	"vitess.io/vitess/go/vt/vtadmin/vtadminproto"
    50  	"vitess.io/vitess/go/vt/vtadmin/vtctldclient"
    51  	"vitess.io/vitess/go/vt/vtadmin/vtsql"
    52  
    53  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    54  	vtadminpb "vitess.io/vitess/go/vt/proto/vtadmin"
    55  	vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata"
    56  )
    57  
    58  // Cluster is the self-contained unit of services required for vtadmin to talk
    59  // to a vitess cluster. This consists of a discovery service, a database
    60  // connection, and a vtctl client.
    61  type Cluster struct {
    62  	ID        string
    63  	Name      string
    64  	Discovery discovery.Discovery
    65  
    66  	DB     vtsql.DB
    67  	Vtctld vtctldclient.Proxy
    68  
    69  	// These fields are kept to power debug endpoints.
    70  	// (TODO|@amason): Figure out if these are needed or if there's a way to
    71  	// push down to the credentials / vtsql.
    72  	// vtgateCredentialsPath string
    73  
    74  	// Fields for generating FQDNs for tablets
    75  	TabletFQDNTmpl *template.Template
    76  
    77  	backupReadPool   *pools.RPCPool
    78  	schemaReadPool   *pools.RPCPool
    79  	topoRWPool       *pools.RPCPool
    80  	topoReadPool     *pools.RPCPool
    81  	workflowReadPool *pools.RPCPool
    82  
    83  	emergencyFailoverPool *pools.RPCPool // ERS-only
    84  	failoverPool          *pools.RPCPool // PRS-only
    85  
    86  	// schemaCache caches schema(s) for different GetSchema(s) requests.
    87  	//
    88  	// - if we call GetSchema, then getSchemaCacheRequest.Keyspace will be
    89  	// non-empty and the cached schemas slice will contain exactly one element,
    90  	// namely for that keyspace's schema.
    91  	// - if we call GetSchemas, then getSchemaCacheRequest == "", and the cached
    92  	// schemas slice will contain one element per keyspace* in the cluster
    93  	// 	*: at the time it was cached; if keyspaces were created/destroyed in
    94  	//  the interim, we won't pick that up until something refreshes the cache.
    95  	schemaCache *cache.Cache[schemacache.Key, []*vtadminpb.Schema]
    96  
    97  	cfg Config
    98  }
    99  
   100  // New creates a new Cluster from a Config.
   101  func New(ctx context.Context, cfg Config) (*Cluster, error) {
   102  	cluster := &Cluster{
   103  		ID:   cfg.ID,
   104  		Name: cfg.Name,
   105  		cfg:  cfg,
   106  	}
   107  
   108  	discoargs := buildPFlagSlice(cfg.DiscoveryFlagsByImpl[cfg.DiscoveryImpl])
   109  
   110  	disco, err := discovery.New(cfg.DiscoveryImpl, cluster.ToProto(), discoargs)
   111  	if err != nil {
   112  		return nil, fmt.Errorf("error creating discovery impl (%s): %w", cfg.DiscoveryImpl, err)
   113  	}
   114  
   115  	cluster.Discovery = disco
   116  
   117  	protocluster := cluster.ToProto()
   118  
   119  	vtsqlargs := buildPFlagSlice(cfg.VtSQLFlags)
   120  
   121  	vtsqlCfg, err := vtsql.Parse(protocluster, disco, vtsqlargs)
   122  	if err != nil {
   123  		return nil, fmt.Errorf("error creating vtsql connection config: %w", err)
   124  	}
   125  
   126  	for _, opt := range cfg.vtsqlConfigOpts {
   127  		vtsqlCfg = opt(vtsqlCfg)
   128  	}
   129  
   130  	vtctldargs := buildPFlagSlice(cfg.VtctldFlags)
   131  
   132  	vtctldCfg, err := vtctldclient.Parse(protocluster, disco, vtctldargs)
   133  	if err != nil {
   134  		return nil, fmt.Errorf("error creating vtctldclient proxy config: %w", err)
   135  	}
   136  
   137  	for _, opt := range cfg.vtctldConfigOpts {
   138  		vtctldCfg = opt(vtctldCfg)
   139  	}
   140  
   141  	cluster.DB, err = vtsql.New(ctx, vtsqlCfg)
   142  	if err != nil {
   143  		return nil, fmt.Errorf("error creating vtsql proxy: %w", err)
   144  	}
   145  
   146  	cluster.Vtctld, err = vtctldclient.New(ctx, vtctldCfg)
   147  	if err != nil {
   148  		return nil, fmt.Errorf("error creating vtctldclient: %w", err)
   149  	}
   150  
   151  	if cfg.TabletFQDNTmplStr != "" {
   152  		cluster.TabletFQDNTmpl, err = template.New(cluster.ID + "-tablet-fqdn").Parse(cfg.TabletFQDNTmplStr)
   153  		if err != nil {
   154  			return nil, fmt.Errorf("failed to parse tablet fqdn template %s: %w", cfg.TabletFQDNTmplStr, err)
   155  		}
   156  	}
   157  
   158  	cluster.backupReadPool = cfg.BackupReadPoolConfig.NewReadPool()
   159  	cluster.schemaReadPool = cfg.SchemaReadPoolConfig.NewReadPool()
   160  	cluster.topoRWPool = cfg.TopoRWPoolConfig.NewRWPool()
   161  	cluster.topoReadPool = cfg.TopoReadPoolConfig.NewReadPool()
   162  	cluster.workflowReadPool = cfg.WorkflowReadPoolConfig.NewReadPool()
   163  
   164  	cluster.emergencyFailoverPool = cfg.EmergencyFailoverPoolConfig.NewRWPool()
   165  	cluster.failoverPool = cfg.FailoverPoolConfig.NewRWPool()
   166  
   167  	if cluster.cfg.SchemaCacheConfig == nil {
   168  		cluster.cfg.SchemaCacheConfig = &cache.Config{}
   169  	}
   170  	cluster.schemaCache = cache.New(func(ctx context.Context, key schemacache.Key) ([]*vtadminpb.Schema, error) {
   171  		// TODO: make a private method to separate the fetching bits from the cache bits
   172  		if key.Keyspace == "" {
   173  			return cluster.GetSchemas(ctx, GetSchemaOptions{
   174  				BaseRequest: &vtctldatapb.GetSchemaRequest{
   175  					IncludeViews: true,
   176  				},
   177  				TableSizeOptions: &vtadminpb.GetSchemaTableSizeOptions{
   178  					AggregateSizes:          true,
   179  					IncludeNonServingShards: key.IncludeNonServingShards,
   180  				},
   181  				isBackfill: true,
   182  			})
   183  		}
   184  
   185  		schema, err := cluster.GetSchema(ctx, key.Keyspace, GetSchemaOptions{
   186  			BaseRequest: &vtctldatapb.GetSchemaRequest{
   187  				IncludeViews: true,
   188  			},
   189  			TableSizeOptions: &vtadminpb.GetSchemaTableSizeOptions{
   190  				AggregateSizes:          true,
   191  				IncludeNonServingShards: key.IncludeNonServingShards,
   192  			},
   193  			isBackfill: true,
   194  		})
   195  		if err != nil {
   196  			return nil, err
   197  		}
   198  
   199  		return []*vtadminpb.Schema{schema}, nil
   200  	}, *cluster.cfg.SchemaCacheConfig)
   201  
   202  	return cluster, nil
   203  }
   204  
   205  // Close closes a cluster, gracefully closing any open proxy connections to
   206  // Vtctld(s) or VTGate(s) in the cluster, as well as gracefully shutting-down
   207  // any background cache goroutines.
   208  //
   209  // Its primary functions are to avoid leaking connections and other resources
   210  // when dynamic clusters are evicted from an API using dynamic clusters, and
   211  // to avoid data races in tests (the latter of these is caused by the cache
   212  // goroutines).
   213  //
   214  // Sub-components of the cluster are `Close`-d concurrently, caches first, then
   215  // proxy connections.
   216  func (c *Cluster) Close() error {
   217  	var (
   218  		wg  sync.WaitGroup
   219  		rec concurrency.AllErrorRecorder
   220  	)
   221  
   222  	// First, close any caches, which may have connections to DB or Vtctld
   223  	// (N.B. (andrew) when we have multiple caches, we can close them
   224  	// concurrently, like we do with the proxies).
   225  	rec.RecordError(c.schemaCache.Close())
   226  
   227  	for _, closer := range []io.Closer{c.DB, c.Vtctld} {
   228  		wg.Add(1)
   229  		go func(closer io.Closer) {
   230  			defer wg.Done()
   231  			rec.RecordError(closer.Close())
   232  		}(closer)
   233  	}
   234  
   235  	if rec.HasErrors() {
   236  		return fmt.Errorf("failed to cleanly close cluster (id=%s): %w", c.ID, rec.Error())
   237  	}
   238  
   239  	return nil
   240  }
   241  
   242  // ToProto returns a value-copy protobuf equivalent of the cluster.
   243  func (c Cluster) ToProto() *vtadminpb.Cluster {
   244  	return &vtadminpb.Cluster{
   245  		Id:   c.ID,
   246  		Name: c.Name,
   247  	}
   248  }
   249  
   250  func buildPFlagSlice(flags map[string]string) []string {
   251  	args := make([]string, 0, len(flags))
   252  	for k, v := range flags {
   253  		// The k=v syntax is needed to account for negating boolean flags.
   254  		args = append(args, "--"+k+"="+v)
   255  	}
   256  
   257  	return args
   258  }
   259  
   260  // parseTablets converts a set of *sql.Rows into a slice of Tablets, for the
   261  // given cluster.
   262  func (c *Cluster) parseTablets(rows *sql.Rows) ([]*vtadminpb.Tablet, error) {
   263  	var tablets []*vtadminpb.Tablet
   264  
   265  	for rows.Next() {
   266  		if err := rows.Err(); err != nil {
   267  			return nil, err
   268  		}
   269  
   270  		tablet, err := c.parseTablet(rows)
   271  		if err != nil {
   272  			return nil, err
   273  		}
   274  
   275  		tablets = append(tablets, tablet)
   276  	}
   277  
   278  	if err := rows.Err(); err != nil {
   279  		return nil, err
   280  	}
   281  
   282  	return tablets, nil
   283  }
   284  
   285  // Fields are:
   286  // Cell | Keyspace | Shard | TabletType (string) | ServingState (string) | Alias | Hostname | PrimaryTermStartTime.
   287  func (c *Cluster) parseTablet(rows *sql.Rows) (*vtadminpb.Tablet, error) {
   288  	var (
   289  		cell            string
   290  		tabletTypeStr   string
   291  		servingStateStr string
   292  		aliasStr        string
   293  		mtstStr         string
   294  		topotablet      topodatapb.Tablet
   295  
   296  		err error
   297  	)
   298  
   299  	if err := rows.Scan(
   300  		&cell,
   301  		&topotablet.Keyspace,
   302  		&topotablet.Shard,
   303  		&tabletTypeStr,
   304  		&servingStateStr,
   305  		&aliasStr,
   306  		&topotablet.Hostname,
   307  		&mtstStr,
   308  	); err != nil {
   309  		return nil, err
   310  	}
   311  
   312  	tablet := &vtadminpb.Tablet{
   313  		Cluster: &vtadminpb.Cluster{
   314  			Id:   c.ID,
   315  			Name: c.Name,
   316  		},
   317  		Tablet: &topotablet,
   318  	}
   319  
   320  	topotablet.Type, err = topoproto.ParseTabletType(tabletTypeStr)
   321  	if err != nil {
   322  		return nil, err
   323  	}
   324  
   325  	tablet.State = vtadminproto.ParseTabletServingState(servingStateStr)
   326  
   327  	topotablet.Alias, err = topoproto.ParseTabletAlias(aliasStr)
   328  	if err != nil {
   329  		return nil, fmt.Errorf("failed to parse tablet_alias %s: %w", aliasStr, err)
   330  	}
   331  
   332  	if topotablet.Alias.Cell != cell {
   333  		// (TODO:@amason) ???
   334  		log.Warningf("tablet cell %s does not match alias %s. ignoring for now", cell, topoproto.TabletAliasString(topotablet.Alias))
   335  	}
   336  
   337  	if mtstStr != "" {
   338  		timeTime, err := time.Parse(time.RFC3339, mtstStr)
   339  		if err != nil {
   340  			return nil, fmt.Errorf("failed parsing primary_term_start_time %s: %w", mtstStr, err)
   341  		}
   342  
   343  		topotablet.PrimaryTermStartTime = logutil.TimeToProto(timeTime)
   344  	}
   345  
   346  	if c.TabletFQDNTmpl != nil {
   347  		tablet.FQDN, err = textutil.ExecuteTemplate(c.TabletFQDNTmpl, tablet)
   348  		if err != nil {
   349  			return nil, fmt.Errorf("failed to execute tablet FQDN template for %+v: %w", tablet, err)
   350  		}
   351  	}
   352  
   353  	return tablet, nil
   354  }
   355  
   356  // CreateKeyspace creates a keyspace in the given cluster, proxying a
   357  // CreateKeyspaceRequest to a vtctld in that cluster.
   358  func (c *Cluster) CreateKeyspace(ctx context.Context, req *vtctldatapb.CreateKeyspaceRequest) (*vtadminpb.Keyspace, error) {
   359  	span, ctx := trace.NewSpan(ctx, "Cluster.CreateKeyspace")
   360  	defer span.Finish()
   361  
   362  	AnnotateSpan(c, span)
   363  
   364  	if req == nil {
   365  		return nil, fmt.Errorf("%w: request cannot be nil", errors.ErrInvalidRequest)
   366  	}
   367  
   368  	if req.Name == "" {
   369  		return nil, fmt.Errorf("%w: keyspace name is required", errors.ErrInvalidRequest)
   370  	}
   371  
   372  	span.Annotate("keyspace", req.Name)
   373  
   374  	if err := c.topoRWPool.Acquire(ctx); err != nil {
   375  		return nil, fmt.Errorf("CreateKeyspace(%+v) failed to acquire topoRWPool: %w", req, err)
   376  	}
   377  	defer c.topoRWPool.Release()
   378  
   379  	resp, err := c.Vtctld.CreateKeyspace(ctx, req)
   380  	if err != nil {
   381  		return nil, err
   382  	}
   383  
   384  	return &vtadminpb.Keyspace{
   385  		Cluster:  c.ToProto(),
   386  		Keyspace: resp.Keyspace,
   387  		Shards:   map[string]*vtctldatapb.Shard{},
   388  	}, nil
   389  }
   390  
   391  // CreateShard creates a shard in the given cluster, proxying a
   392  // CreateShardRequest to a vtctld in that cluster.
   393  func (c *Cluster) CreateShard(ctx context.Context, req *vtctldatapb.CreateShardRequest) (*vtctldatapb.CreateShardResponse, error) {
   394  	span, ctx := trace.NewSpan(ctx, "Cluster.CreateShard")
   395  	defer span.Finish()
   396  
   397  	AnnotateSpan(c, span)
   398  
   399  	if req == nil {
   400  		return nil, fmt.Errorf("%w: request cannot be nil", errors.ErrInvalidRequest)
   401  	}
   402  
   403  	span.Annotate("keyspace", req.Keyspace)
   404  	span.Annotate("shard", req.ShardName)
   405  	span.Annotate("force", req.Force)
   406  	span.Annotate("include_parent", req.IncludeParent)
   407  
   408  	if req.Keyspace == "" {
   409  		return nil, fmt.Errorf("%w: keyspace name is required", errors.ErrInvalidRequest)
   410  	}
   411  
   412  	if req.ShardName == "" {
   413  		return nil, fmt.Errorf("%w: shard name is required", errors.ErrInvalidRequest)
   414  	}
   415  
   416  	if err := c.topoRWPool.Acquire(ctx); err != nil {
   417  		return nil, fmt.Errorf("CreateShard(%+v) failed to acquire topoRWPool: %w", req, err)
   418  	}
   419  	defer c.topoRWPool.Release()
   420  
   421  	return c.Vtctld.CreateShard(ctx, req)
   422  }
   423  
   424  // DeleteKeyspace deletes a keyspace in the given cluster, proxying a
   425  // DeleteKeyspaceRequest to a vtctld in that cluster.
   426  func (c *Cluster) DeleteKeyspace(ctx context.Context, req *vtctldatapb.DeleteKeyspaceRequest) (*vtctldatapb.DeleteKeyspaceResponse, error) {
   427  	span, ctx := trace.NewSpan(ctx, "Cluster.DeleteKeyspace")
   428  	defer span.Finish()
   429  
   430  	AnnotateSpan(c, span)
   431  
   432  	if req == nil {
   433  		return nil, fmt.Errorf("%w: request cannot be nil", errors.ErrInvalidRequest)
   434  	}
   435  
   436  	if req.Keyspace == "" {
   437  		return nil, fmt.Errorf("%w: keyspace name is required", errors.ErrInvalidRequest)
   438  	}
   439  
   440  	span.Annotate("keyspace", req.Keyspace)
   441  
   442  	if err := c.topoRWPool.Acquire(ctx); err != nil {
   443  		return nil, fmt.Errorf("DeleteKeyspace(%+v) failed to acquire topoRWPool: %w", req, err)
   444  	}
   445  	defer c.topoRWPool.Release()
   446  
   447  	return c.Vtctld.DeleteKeyspace(ctx, req)
   448  }
   449  
   450  // DeleteShards deletes one or more shards in the given cluster, proxying a
   451  // single DeleteShardsRequest to a vtctld in that cluster.
   452  func (c *Cluster) DeleteShards(ctx context.Context, req *vtctldatapb.DeleteShardsRequest) (*vtctldatapb.DeleteShardsResponse, error) {
   453  	span, ctx := trace.NewSpan(ctx, "Cluster.DeleteShards")
   454  	defer span.Finish()
   455  
   456  	AnnotateSpan(c, span)
   457  
   458  	if req == nil {
   459  		return nil, fmt.Errorf("%w: request cannot be nil", errors.ErrInvalidRequest)
   460  	}
   461  
   462  	shards := make([]string, len(req.Shards))
   463  	for i, shard := range req.Shards {
   464  		shards[i] = fmt.Sprintf("%s/%s", shard.Keyspace, shard.Name)
   465  	}
   466  
   467  	sort.Strings(shards)
   468  
   469  	span.Annotate("num_shards", len(shards))
   470  	span.Annotate("shards", strings.Join(shards, ", "))
   471  	span.Annotate("recursive", req.Recursive)
   472  	span.Annotate("even_if_serving", req.EvenIfServing)
   473  
   474  	if err := c.topoRWPool.Acquire(ctx); err != nil {
   475  		return nil, fmt.Errorf("DeleteShards(%+v) failed to acquire topoRWPool: %w", req, err)
   476  	}
   477  	defer c.topoRWPool.Release()
   478  
   479  	return c.Vtctld.DeleteShards(ctx, req)
   480  }
   481  
   482  // DeleteTablets deletes one or more tablets in the given cluster.
   483  func (c *Cluster) DeleteTablets(ctx context.Context, req *vtctldatapb.DeleteTabletsRequest) (*vtctldatapb.DeleteTabletsResponse, error) {
   484  	span, ctx := trace.NewSpan(ctx, "Cluster.DeleteTablets")
   485  	defer span.Finish()
   486  
   487  	AnnotateSpan(c, span)
   488  	span.Annotate("tablet_aliases", strings.Join(topoproto.TabletAliasList(req.TabletAliases).ToStringSlice(), ","))
   489  
   490  	if err := c.topoRWPool.Acquire(ctx); err != nil {
   491  		return nil, fmt.Errorf("DeleteTablets(%+v) failed to acquire topoRWPool: %w", req, err)
   492  	}
   493  	defer c.topoRWPool.Release()
   494  
   495  	return c.Vtctld.DeleteTablets(ctx, req)
   496  }
   497  
   498  // EmergencyFailoverShard fails over a shard to a new primary. It assumes the
   499  // old primary is dead or otherwise not responding.
   500  func (c *Cluster) EmergencyFailoverShard(ctx context.Context, req *vtctldatapb.EmergencyReparentShardRequest) (*vtadminpb.EmergencyFailoverShardResponse, error) {
   501  	span, ctx := trace.NewSpan(ctx, "Cluster.EmergencyFailoverShard")
   502  	defer span.Finish()
   503  
   504  	AnnotateSpan(c, span)
   505  	span.Annotate("keyspace", req.Keyspace)
   506  	span.Annotate("shard", req.Shard)
   507  	span.Annotate("new_primary", topoproto.TabletAliasString(req.NewPrimary))
   508  	span.Annotate("ignore_replicas", strings.Join(topoproto.TabletAliasList(req.IgnoreReplicas).ToStringSlice(), ","))
   509  	span.Annotate("prevent_cross_cell_promotion", req.PreventCrossCellPromotion)
   510  
   511  	if d, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout); ok && err == nil {
   512  		span.Annotate("wait_replicas_timeout", d.String())
   513  	}
   514  
   515  	if err := c.emergencyFailoverPool.Acquire(ctx); err != nil {
   516  		return nil, fmt.Errorf("EmergencyFailoverShard(%s/%s) failed to acquire emergencyFailoverPool: %w", req.Keyspace, req.Shard, err)
   517  	}
   518  	defer c.emergencyFailoverPool.Release()
   519  
   520  	resp, err := c.Vtctld.EmergencyReparentShard(ctx, req)
   521  	if err != nil {
   522  		return nil, err
   523  	}
   524  
   525  	return &vtadminpb.EmergencyFailoverShardResponse{
   526  		Cluster:         c.ToProto(),
   527  		Keyspace:        resp.Keyspace,
   528  		Shard:           resp.Shard,
   529  		PromotedPrimary: resp.PromotedPrimary,
   530  		Events:          resp.Events,
   531  	}, nil
   532  }
   533  
   534  // FindAllShardsInKeyspaceOptions modify the behavior of a cluster's
   535  // FindAllShardsInKeyspace method.
   536  type FindAllShardsInKeyspaceOptions struct {
   537  	// skipPool indicates that the caller has already made a successful call to
   538  	// Acquire on the topoReadPool. It is not exported, because the cluster
   539  	// pools are not exported, so it's not possible to manually Acquire from
   540  	// outside this package.
   541  	skipPool bool
   542  }
   543  
   544  // FindAllShardsInKeyspace proxies a FindAllShardsInKeyspace RPC to a cluster's
   545  // vtctld, unpacking the response struct.
   546  //
   547  // It can also optionally ensure the vtctldclient has a valid connection before
   548  // making the RPC call.
   549  func (c *Cluster) FindAllShardsInKeyspace(ctx context.Context, keyspace string, opts FindAllShardsInKeyspaceOptions) (map[string]*vtctldatapb.Shard, error) {
   550  	span, ctx := trace.NewSpan(ctx, "Cluster.FindAllShardsInKeyspace")
   551  	defer span.Finish()
   552  
   553  	AnnotateSpan(c, span)
   554  	span.Annotate("keyspace", keyspace)
   555  
   556  	if !opts.skipPool {
   557  		if err := c.topoReadPool.Acquire(ctx); err != nil {
   558  			return nil, fmt.Errorf("FindAllShardsInKeyspace(%s) failed to acquire topoReadPool: %w", keyspace, err)
   559  		}
   560  		defer c.topoReadPool.Release()
   561  	}
   562  
   563  	resp, err := c.Vtctld.FindAllShardsInKeyspace(ctx, &vtctldatapb.FindAllShardsInKeyspaceRequest{
   564  		Keyspace: keyspace,
   565  	})
   566  	if err != nil {
   567  		return nil, fmt.Errorf("FindAllShardsInKeyspace(cluster = %s, keyspace = %s) failed: %w", c.ID, keyspace, err)
   568  	}
   569  
   570  	return resp.Shards, nil
   571  }
   572  
   573  // FindTablet returns the first tablet in a given cluster that satisfies the filter function.
   574  func (c *Cluster) FindTablet(ctx context.Context, filter func(*vtadminpb.Tablet) bool) (*vtadminpb.Tablet, error) {
   575  	span, ctx := trace.NewSpan(ctx, "Cluster.FindTablet")
   576  	defer span.Finish()
   577  
   578  	AnnotateSpan(c, span)
   579  
   580  	tablets, err := c.findTablets(ctx, filter, 1)
   581  	if err != nil {
   582  		return nil, err
   583  	}
   584  
   585  	if len(tablets) != 1 {
   586  		return nil, errors.ErrNoTablet
   587  	}
   588  
   589  	return tablets[0], nil
   590  }
   591  
   592  // FindTablets returns the first N tablets in the given cluster that satisfy
   593  // the filter function. If N = -1, then all matching tablets are returned.
   594  // Ordering is not guaranteed, and callers should write their filter functions accordingly.
   595  func (c *Cluster) FindTablets(ctx context.Context, filter func(*vtadminpb.Tablet) bool, n int) ([]*vtadminpb.Tablet, error) {
   596  	span, ctx := trace.NewSpan(ctx, "Cluster.FindTablets")
   597  	defer span.Finish()
   598  
   599  	AnnotateSpan(c, span)
   600  
   601  	return c.findTablets(ctx, filter, n)
   602  }
   603  
   604  func (c *Cluster) findTablets(ctx context.Context, filter func(*vtadminpb.Tablet) bool, n int) ([]*vtadminpb.Tablet, error) {
   605  	span, _ := trace.FromContext(ctx)
   606  
   607  	tablets, err := c.GetTablets(ctx)
   608  	if err != nil {
   609  		return nil, err
   610  	}
   611  
   612  	if n == -1 {
   613  		n = len(tablets)
   614  	}
   615  
   616  	if span != nil {
   617  		span.Annotate("max_result_length", n) // this is a bad name; I didn't want just "n", but it's more like, "requested result length".
   618  	}
   619  
   620  	return vtadminproto.FilterTablets(filter, tablets, n), nil
   621  }
   622  
   623  // FindWorkflowsOptions is the set of options for FindWorkflows requests.
   624  type FindWorkflowsOptions struct {
   625  	ActiveOnly      bool
   626  	IgnoreKeyspaces sets.Set[string]
   627  	Filter          func(workflow *vtadminpb.Workflow) bool
   628  }
   629  
   630  // FindWorkflows returns a list of Workflows in this cluster, across the given
   631  // keyspaces and filtering according to the options passed in.
   632  //
   633  // If the list of keyspaces to check is empty, then FindWorkflows will use the
   634  // result of GetKeyspaces to search all keyspaces in the cluster. In this case,
   635  // opts.IgnoreKeyspaces is respected.
   636  //
   637  // Callers should use this function when they want more fine-grained filtering,
   638  // and GetWorkflows when they just want to filter on keyspace name.
   639  //
   640  // Note that if only a subset of keyspaces error on their vtctld GetWorkflows
   641  // rpc, this is treated as a partial success, and the ClusterWorkflows response
   642  // will include any errors in the Warnings slice. If all keyspaces fail, or if
   643  // non-(Vtctld.GetWorkflows) calls fail, this is treated as an error by this
   644  // function.
   645  func (c *Cluster) FindWorkflows(ctx context.Context, keyspaces []string, opts FindWorkflowsOptions) (*vtadminpb.ClusterWorkflows, error) {
   646  	span, ctx := trace.NewSpan(ctx, "Cluster.FindWorkflows")
   647  	defer span.Finish()
   648  
   649  	AnnotateSpan(c, span)
   650  	span.Annotate("active_only", opts.ActiveOnly)
   651  
   652  	return c.findWorkflows(ctx, keyspaces, opts)
   653  }
   654  
   655  func (c *Cluster) findWorkflows(ctx context.Context, keyspaces []string, opts FindWorkflowsOptions) (*vtadminpb.ClusterWorkflows, error) {
   656  	if opts.Filter == nil {
   657  		opts.Filter = func(_ *vtadminpb.Workflow) bool { return true }
   658  	}
   659  
   660  	if opts.IgnoreKeyspaces == nil {
   661  		opts.IgnoreKeyspaces = sets.New[string]()
   662  	}
   663  
   664  	if len(keyspaces) == 0 {
   665  		span, ctx := trace.NewSpan(ctx, "Cluster.GetKeyspaces")
   666  		AnnotateSpan(c, span)
   667  
   668  		if err := c.topoReadPool.Acquire(ctx); err != nil {
   669  			span.Finish()
   670  			return nil, fmt.Errorf("findWorkflows(keyspaces = %v, opts = %+v) failed to acquire topoReadPool: %w", keyspaces, opts, err)
   671  		}
   672  
   673  		resp, err := c.Vtctld.GetKeyspaces(ctx, &vtctldatapb.GetKeyspacesRequest{})
   674  		c.topoReadPool.Release()
   675  
   676  		if err != nil {
   677  			span.Finish()
   678  			return nil, fmt.Errorf("GetKeyspaces(cluster = %s) failed: %w", c.ID, err)
   679  		}
   680  
   681  		for _, ks := range resp.Keyspaces {
   682  			keyspaces = append(keyspaces, ks.Name)
   683  		}
   684  
   685  		span.Finish()
   686  	} else if opts.IgnoreKeyspaces.Len() > 0 {
   687  		log.Warningf("Cluster.findWorkflows: IgnoreKeyspaces was set, but Keyspaces was not empty; ignoring IgnoreKeyspaces in favor of explicitly checking everything in Keyspaces: (%s)", strings.Join(keyspaces, ", "))
   688  		opts.IgnoreKeyspaces = sets.New[string]()
   689  	}
   690  
   691  	// Annotate the parent span with some additional information about the call.
   692  	if span, _ := trace.FromContext(ctx); span != nil {
   693  		span.Annotate("num_keyspaces", len(keyspaces))
   694  		span.Annotate("keyspaces", strings.Join(keyspaces, ","))
   695  		span.Annotate("num_ignore_keyspaces", opts.IgnoreKeyspaces.Len())
   696  		span.Annotate("ignore_keyspaces", strings.Join(sets.List(opts.IgnoreKeyspaces), ","))
   697  	}
   698  
   699  	clusterpb := c.ToProto()
   700  
   701  	var (
   702  		m       sync.Mutex
   703  		wg      sync.WaitGroup
   704  		rec     concurrency.AllErrorRecorder
   705  		results []*vtadminpb.Workflow
   706  	)
   707  
   708  	for _, ks := range keyspaces {
   709  		if opts.IgnoreKeyspaces.Has(ks) {
   710  			log.Infof("Cluster.findWorkflows: ignoring keyspace %s", ks)
   711  
   712  			continue
   713  		}
   714  
   715  		wg.Add(1)
   716  
   717  		go func(ks string) {
   718  			defer wg.Done()
   719  
   720  			span, ctx := trace.NewSpan(ctx, "Cluster.GetWorkflowsForKeyspace")
   721  			defer span.Finish()
   722  
   723  			AnnotateSpan(c, span)
   724  			span.Annotate("keyspace", ks)
   725  			span.Annotate("active_only", opts.ActiveOnly)
   726  
   727  			if err := c.workflowReadPool.Acquire(ctx); err != nil {
   728  				err = fmt.Errorf("GetWorkflows(keyspace = %s, active_only = %v) failed to acquire workflowReadPool: %w", ks, opts.ActiveOnly, err)
   729  				rec.RecordError(err)
   730  
   731  				return
   732  			}
   733  
   734  			resp, err := c.Vtctld.GetWorkflows(ctx, &vtctldatapb.GetWorkflowsRequest{
   735  				Keyspace:   ks,
   736  				ActiveOnly: opts.ActiveOnly,
   737  			})
   738  			c.workflowReadPool.Release()
   739  
   740  			if err != nil {
   741  				err = fmt.Errorf("GetWorkflows(keyspace = %s, active_only = %v) failed: %w", ks, opts.ActiveOnly, err)
   742  				rec.RecordError(err)
   743  
   744  				return
   745  			}
   746  
   747  			workflows := make([]*vtadminpb.Workflow, 0, len(resp.Workflows))
   748  			for _, wf := range resp.Workflows {
   749  				workflow := &vtadminpb.Workflow{
   750  					Cluster:  clusterpb,
   751  					Keyspace: ks,
   752  					Workflow: wf,
   753  				}
   754  
   755  				if opts.Filter(workflow) {
   756  					workflows = append(workflows, workflow)
   757  				}
   758  			}
   759  
   760  			m.Lock()
   761  			results = append(results, workflows...)
   762  			m.Unlock()
   763  		}(ks)
   764  	}
   765  
   766  	wg.Wait()
   767  
   768  	// If every keyspace failed, treat this as an error.
   769  	if rec.HasErrors() && len(rec.Errors) == len(keyspaces) {
   770  		return nil, rec.Error()
   771  	}
   772  
   773  	// Otherwise, append any failures into the warnings slice, and return what
   774  	// results we have.
   775  	return &vtadminpb.ClusterWorkflows{
   776  		Workflows: results,
   777  		Warnings:  rec.ErrorStrings(),
   778  	}, nil
   779  }
   780  
   781  // GetBackups returns a ClusterBackups object for all backups in the cluster.
   782  func (c *Cluster) GetBackups(ctx context.Context, req *vtadminpb.GetBackupsRequest) ([]*vtadminpb.ClusterBackup, error) {
   783  	span, ctx := trace.NewSpan(ctx, "Cluster.GetBackups")
   784  	defer span.Finish()
   785  
   786  	AnnotateSpan(c, span)
   787  
   788  	shardsByKeyspace, err := c.getShardSets(ctx, req.Keyspaces, req.KeyspaceShards)
   789  	if err != nil {
   790  		return nil, err
   791  	}
   792  
   793  	var (
   794  		m            sync.Mutex
   795  		wg           sync.WaitGroup
   796  		rec          concurrency.AllErrorRecorder
   797  		backups      []*vtadminpb.ClusterBackup
   798  		clusterProto = c.ToProto()
   799  	)
   800  
   801  	for ks, shardSet := range shardsByKeyspace {
   802  		for _, shard := range sets.List(shardSet) {
   803  			wg.Add(1)
   804  
   805  			go func(keyspace, shard string) {
   806  				defer wg.Done()
   807  
   808  				span, ctx := trace.NewSpan(ctx, "Cluster.getBackupsForShard")
   809  				defer span.Finish()
   810  
   811  				AnnotateSpan(c, span)
   812  				span.Annotate("keyspace", keyspace)
   813  				span.Annotate("shard", shard)
   814  
   815  				if err := c.backupReadPool.Acquire(ctx); err != nil {
   816  					rec.RecordError(fmt.Errorf("GetBackups(%s/%s) failed to acquire backupReadPool: %w", keyspace, shard, err))
   817  					return
   818  				}
   819  
   820  				resp, err := c.Vtctld.GetBackups(ctx, &vtctldatapb.GetBackupsRequest{
   821  					Keyspace:      keyspace,
   822  					Shard:         shard,
   823  					Limit:         req.RequestOptions.Limit,
   824  					Detailed:      req.RequestOptions.Detailed,
   825  					DetailedLimit: req.RequestOptions.DetailedLimit,
   826  				})
   827  				c.backupReadPool.Release()
   828  
   829  				if err != nil {
   830  					rec.RecordError(fmt.Errorf("GetBackups(%s/%s): %w", keyspace, shard, err))
   831  					return
   832  				}
   833  
   834  				shardBackups := make([]*vtadminpb.ClusterBackup, len(resp.Backups))
   835  				for i, backup := range resp.Backups {
   836  					shardBackups[i] = &vtadminpb.ClusterBackup{
   837  						Cluster: clusterProto,
   838  						Backup:  backup,
   839  					}
   840  				}
   841  
   842  				m.Lock()
   843  				defer m.Unlock()
   844  
   845  				backups = append(backups, shardBackups...)
   846  			}(ks, shard)
   847  		}
   848  	}
   849  
   850  	wg.Wait()
   851  
   852  	if rec.HasErrors() {
   853  		return nil, rec.Error()
   854  	}
   855  
   856  	return backups, nil
   857  }
   858  
   859  func (c *Cluster) getShardSets(ctx context.Context, keyspaces []string, keyspaceShards []string) (map[string]sets.Set[string], error) {
   860  	shardsByKeyspace := map[string]sets.Set[string]{}
   861  
   862  	if len(keyspaces) == 0 && len(keyspaceShards) == 0 {
   863  		// Special case: if nothing was explicitly passed, get all shards in
   864  		// all keyspaces.
   865  		kss, err := c.GetKeyspaces(ctx)
   866  		if err != nil {
   867  			return nil, err
   868  		}
   869  
   870  		for _, ks := range kss {
   871  			shardsByKeyspace[ks.Keyspace.Name] = sets.New[string]()
   872  			for _, shard := range ks.Shards {
   873  				shardsByKeyspace[ks.Keyspace.Name].Insert(shard.Name)
   874  			}
   875  		}
   876  
   877  		return shardsByKeyspace, nil
   878  	}
   879  
   880  	for _, ksShard := range keyspaceShards {
   881  		ks, shard, err := topoproto.ParseKeyspaceShard(ksShard)
   882  		if err != nil {
   883  			return nil, err
   884  		}
   885  
   886  		if _, ok := shardsByKeyspace[ks]; !ok {
   887  			shardsByKeyspace[ks] = sets.New[string](shard)
   888  			continue
   889  		}
   890  
   891  		shardsByKeyspace[ks].Insert(shard)
   892  	}
   893  
   894  	for _, ks := range keyspaces {
   895  		// For each keyspace specified, if it was also one of the keyspaceShards,
   896  		// we added the set in the above loop, so nothing to do. If not, add an
   897  		// empty set to indicate we should take all shards in the GetKeyspace
   898  		// section below.
   899  		if _, ok := shardsByKeyspace[ks]; !ok {
   900  			shardsByKeyspace[ks] = sets.New[string]()
   901  		}
   902  	}
   903  
   904  	var (
   905  		m   sync.Mutex
   906  		wg  sync.WaitGroup
   907  		rec concurrency.AllErrorRecorder
   908  	)
   909  
   910  	m.Lock() // lock the map while we're iterating over it
   911  
   912  	for ksName, shardSet := range shardsByKeyspace {
   913  		wg.Add(1)
   914  
   915  		go func(ksName string, shardSet sets.Set[string]) {
   916  			defer wg.Done()
   917  
   918  			keyspace, err := c.GetKeyspace(ctx, ksName)
   919  			if err != nil {
   920  				if strings.Contains(err.Error(), "node doesn't exist") {
   921  					// (TODO:@ajm188) Make better use of error codes on the
   922  					// vtctld side, and we can do better checking here.
   923  					// Since this is on the client-side of an RPC we can't
   924  					// even use topo.IsErrType(topo.NoNode) :(
   925  					log.Warningf("getShardSets(): keyspace %s does not exist in cluster %s", ksName, c.ID)
   926  					m.Lock()
   927  					defer m.Unlock()
   928  
   929  					delete(shardsByKeyspace, ksName)
   930  					return
   931  				}
   932  
   933  				rec.RecordError(err)
   934  				return
   935  			}
   936  
   937  			fullShardSet := sets.New[string]()
   938  			for _, shard := range keyspace.Shards {
   939  				fullShardSet.Insert(shard.Name)
   940  			}
   941  
   942  			if shardSet.Len() == 0 {
   943  				m.Lock()
   944  				defer m.Unlock()
   945  
   946  				shardsByKeyspace[ksName] = fullShardSet
   947  				return
   948  			}
   949  
   950  			overlap := shardSet.Intersection(fullShardSet)
   951  			if overlap.Len() != shardSet.Len() {
   952  				log.Warningf("getShardSets(): keyspace %s is missing specified shards in cluster %s: %v", ksName, c.ID, sets.List(shardSet.Difference(overlap)))
   953  			}
   954  
   955  			m.Lock()
   956  			defer m.Unlock()
   957  
   958  			shardsByKeyspace[ksName] = overlap
   959  		}(ksName, shardSet)
   960  	}
   961  
   962  	m.Unlock()
   963  	wg.Wait()
   964  
   965  	if rec.HasErrors() {
   966  		return nil, rec.Error()
   967  	}
   968  
   969  	return shardsByKeyspace, nil
   970  }
   971  
   972  // GetCellInfos returns a list of ClusterCellInfo objects for cells in the
   973  // given cluster.
   974  //
   975  // If req.Cells is set, cells are restricted only to cells with those names.
   976  // Note: specifying a cell name that does not exist in the cluster fails the
   977  // overall request.
   978  //
   979  // If req.NamesOnly is set, each ClusterCellInfo will only contain the Cluster
   980  // and Name fields. req.Cells takes precedence over this option.
   981  func (c *Cluster) GetCellInfos(ctx context.Context, req *vtadminpb.GetCellInfosRequest) ([]*vtadminpb.ClusterCellInfo, error) {
   982  	span, ctx := trace.NewSpan(ctx, "Cluster.GetCellInfos")
   983  	defer span.Finish()
   984  
   985  	names := req.Cells
   986  	if len(names) == 0 {
   987  		if err := c.topoReadPool.Acquire(ctx); err != nil {
   988  			return nil, fmt.Errorf("GetCellInfoNames() failed to acquire topoReadPool: %w", err)
   989  		}
   990  		resp, err := c.Vtctld.GetCellInfoNames(ctx, &vtctldatapb.GetCellInfoNamesRequest{})
   991  		c.topoReadPool.Release()
   992  
   993  		if err != nil {
   994  			return nil, fmt.Errorf("failed to GetCellInfoNames: %w", err)
   995  		}
   996  
   997  		names = resp.Names
   998  	}
   999  
  1000  	namesOnly := req.NamesOnly
  1001  	if namesOnly && len(req.Cells) > 0 {
  1002  		log.Warning("Cluster.GetCellInfos: req.Cells and req.NamesOnly set, ignoring NamesOnly")
  1003  		namesOnly = false
  1004  	}
  1005  
  1006  	span.Annotate("names_only", namesOnly)
  1007  	span.Annotate("cells", req.Cells) // deliberately not the cellnames we (maybe) fetched above
  1008  
  1009  	cpb := c.ToProto()
  1010  	infos := make([]*vtadminpb.ClusterCellInfo, 0, len(names))
  1011  	if namesOnly {
  1012  		for _, name := range names {
  1013  			infos = append(infos, &vtadminpb.ClusterCellInfo{
  1014  				Cluster: cpb,
  1015  				Name:    name,
  1016  			})
  1017  		}
  1018  
  1019  		return infos, nil
  1020  	}
  1021  
  1022  	var (
  1023  		m   sync.Mutex
  1024  		wg  sync.WaitGroup
  1025  		rec concurrency.AllErrorRecorder
  1026  	)
  1027  
  1028  	for _, name := range names {
  1029  		wg.Add(1)
  1030  		go func(name string) {
  1031  			defer wg.Done()
  1032  
  1033  			if err := c.topoReadPool.Acquire(ctx); err != nil {
  1034  				rec.RecordError(fmt.Errorf("GetCellInfo(%s) failed to acquire topoReadPool: %w", name, err))
  1035  				return
  1036  			}
  1037  			resp, err := c.Vtctld.GetCellInfo(ctx, &vtctldatapb.GetCellInfoRequest{
  1038  				Cell: name,
  1039  			})
  1040  			c.topoReadPool.Release()
  1041  
  1042  			if err != nil {
  1043  				rec.RecordError(fmt.Errorf("GetCellInfo(%s) failed: %w", name, err))
  1044  				return
  1045  			}
  1046  
  1047  			m.Lock()
  1048  			defer m.Unlock()
  1049  			infos = append(infos, &vtadminpb.ClusterCellInfo{
  1050  				Cluster:  cpb,
  1051  				Name:     name,
  1052  				CellInfo: resp.CellInfo,
  1053  			})
  1054  		}(name)
  1055  	}
  1056  
  1057  	wg.Wait()
  1058  	if rec.HasErrors() {
  1059  		return nil, rec.Error()
  1060  	}
  1061  
  1062  	return infos, nil
  1063  }
  1064  
  1065  // GetCellsAliases returns all CellsAliases in the cluster.
  1066  func (c *Cluster) GetCellsAliases(ctx context.Context) (*vtadminpb.ClusterCellsAliases, error) {
  1067  	span, ctx := trace.NewSpan(ctx, "Cluster.GetCellsAliases")
  1068  	defer span.Finish()
  1069  
  1070  	if err := c.topoReadPool.Acquire(ctx); err != nil {
  1071  		return nil, fmt.Errorf("GetCellsAliases() failed to acquire topoReadPool: %w", err)
  1072  	}
  1073  	defer c.topoReadPool.Release()
  1074  
  1075  	resp, err := c.Vtctld.GetCellsAliases(ctx, &vtctldatapb.GetCellsAliasesRequest{})
  1076  	if err != nil {
  1077  		return nil, err
  1078  	}
  1079  
  1080  	return &vtadminpb.ClusterCellsAliases{
  1081  		Cluster: c.ToProto(),
  1082  		Aliases: resp.Aliases,
  1083  	}, nil
  1084  }
  1085  
  1086  // GetGates returns the list of all VTGates in the cluster.
  1087  func (c *Cluster) GetGates(ctx context.Context) ([]*vtadminpb.VTGate, error) {
  1088  	// (TODO|@ajm188) Support tags in the vtadmin RPC request and pass them
  1089  	// through here.
  1090  	gates, err := c.Discovery.DiscoverVTGates(ctx, []string{})
  1091  	if err != nil {
  1092  		return nil, fmt.Errorf("DiscoverVTGates(cluster = %s): %w", c.ID, err)
  1093  	}
  1094  
  1095  	// This overwrites any Cluster field populated by a particular discovery
  1096  	// implementation.
  1097  	cpb := c.ToProto()
  1098  
  1099  	for _, g := range gates {
  1100  		g.Cluster = cpb
  1101  	}
  1102  
  1103  	return gates, nil
  1104  }
  1105  
  1106  // GetKeyspace returns a single keyspace in the cluster.
  1107  func (c *Cluster) GetKeyspace(ctx context.Context, name string) (*vtadminpb.Keyspace, error) {
  1108  	span, ctx := trace.NewSpan(ctx, "Cluster.GetKeyspace")
  1109  	defer span.Finish()
  1110  
  1111  	AnnotateSpan(c, span)
  1112  	span.Annotate("keyspace", name)
  1113  
  1114  	if err := c.topoReadPool.Acquire(ctx); err != nil {
  1115  		return nil, fmt.Errorf("GetKeyspace(%s) failed to acquire topoReadPool: %w", name, err)
  1116  	}
  1117  	defer c.topoReadPool.Release()
  1118  
  1119  	resp, err := c.Vtctld.GetKeyspace(ctx, &vtctldatapb.GetKeyspaceRequest{
  1120  		Keyspace: name,
  1121  	})
  1122  	if err != nil {
  1123  		return nil, err
  1124  	}
  1125  
  1126  	shards, err := c.FindAllShardsInKeyspace(ctx, name, FindAllShardsInKeyspaceOptions{
  1127  		skipPool: true, // we already acquired before making the GetKeyspace call
  1128  	})
  1129  	if err != nil {
  1130  		return nil, err
  1131  	}
  1132  
  1133  	return &vtadminpb.Keyspace{
  1134  		Cluster:  c.ToProto(),
  1135  		Keyspace: resp.Keyspace,
  1136  		Shards:   shards,
  1137  	}, nil
  1138  }
  1139  
  1140  // GetKeyspaces returns all keyspaces, with their shard maps, in the cluster.
  1141  func (c *Cluster) GetKeyspaces(ctx context.Context) ([]*vtadminpb.Keyspace, error) {
  1142  	span, ctx := trace.NewSpan(ctx, "Cluster.GetKeyspaces")
  1143  	defer span.Finish()
  1144  
  1145  	AnnotateSpan(c, span)
  1146  
  1147  	if err := c.topoReadPool.Acquire(ctx); err != nil {
  1148  		return nil, fmt.Errorf("GetKeyspaces() failed to acquire topoReadPool: %w", err)
  1149  	}
  1150  
  1151  	resp, err := c.Vtctld.GetKeyspaces(ctx, &vtctldatapb.GetKeyspacesRequest{})
  1152  	c.topoReadPool.Release()
  1153  
  1154  	if err != nil {
  1155  		return nil, err
  1156  	}
  1157  
  1158  	var (
  1159  		m         sync.Mutex
  1160  		wg        sync.WaitGroup
  1161  		rec       concurrency.AllErrorRecorder
  1162  		keyspaces = make([]*vtadminpb.Keyspace, len(resp.Keyspaces))
  1163  	)
  1164  
  1165  	for i, ks := range resp.Keyspaces {
  1166  		wg.Add(1)
  1167  		go func(i int, ks *vtctldatapb.Keyspace) {
  1168  			defer wg.Done()
  1169  
  1170  			shards, err := c.FindAllShardsInKeyspace(ctx, ks.Name, FindAllShardsInKeyspaceOptions{})
  1171  			if err != nil {
  1172  				rec.RecordError(err)
  1173  				return
  1174  			}
  1175  
  1176  			keyspace := &vtadminpb.Keyspace{
  1177  				Cluster:  c.ToProto(),
  1178  				Keyspace: ks,
  1179  				Shards:   shards,
  1180  			}
  1181  
  1182  			m.Lock()
  1183  			defer m.Unlock()
  1184  			keyspaces[i] = keyspace
  1185  		}(i, ks)
  1186  	}
  1187  
  1188  	wg.Wait()
  1189  	if rec.HasErrors() {
  1190  		return nil, rec.Error()
  1191  	}
  1192  
  1193  	return keyspaces, nil
  1194  }
  1195  
  1196  // GetTablets returns all tablets in the cluster.
  1197  func (c *Cluster) GetTablets(ctx context.Context) ([]*vtadminpb.Tablet, error) {
  1198  	span, ctx := trace.NewSpan(ctx, "Cluster.GetTablets")
  1199  	defer span.Finish()
  1200  
  1201  	AnnotateSpan(c, span)
  1202  
  1203  	return c.getTablets(ctx)
  1204  }
  1205  
  1206  func (c *Cluster) getTablets(ctx context.Context) ([]*vtadminpb.Tablet, error) {
  1207  	rows, err := c.DB.ShowTablets(ctx)
  1208  	if err != nil {
  1209  		return nil, err
  1210  	}
  1211  
  1212  	return c.parseTablets(rows)
  1213  }
  1214  
  1215  // GetSchemaOptions contains the options that modify the behavior of the
  1216  // (*Cluster).GetSchema method.
  1217  type GetSchemaOptions struct {
  1218  	// BaseRequest is used to share some common parameters to use for the
  1219  	// individual tablet GetSchema RPCs made by (*Cluster).GetSchema, which
  1220  	// takes a copy of this request in order to makeb certain overrides as
  1221  	// needed, so these mutations are transparent to the caller.
  1222  	//
  1223  	// The TabletAlias field is ignored completely by (*Cluster).GetSchema, as
  1224  	// it is overwritten for each tablet RPC that method makes.
  1225  	//
  1226  	// The TableSizesOnly field is overwritten only in certain tablet RPCs when
  1227  	// SizeOpts.AggregateSizes is true. In order to move minimal bytes over the
  1228  	// wire, we assume that schema definitions match across all shards, so we
  1229  	// can get the full schema from just one tablet, and then just the table
  1230  	// size information from the other N-1 tablets.
  1231  	//
  1232  	// The TableNamesOnly field is untouched by (*Cluster).GetSchema when not
  1233  	// doing size aggregation. However, when doing size aggregation, if
  1234  	// TableNamesOnly is true, we log a warning and override it. This is because
  1235  	// TableNamesOnly is mutually exclusive with TableSizesOnly, and size
  1236  	// aggregation requires setting TableSizesOnly in the cases described above.
  1237  	BaseRequest *vtctldatapb.GetSchemaRequest
  1238  	// TableSizeOptions control whether the (*Cluster).GetSchema method performs
  1239  	// cross-shard table size aggregation (via the AggregateSizes field).
  1240  	//
  1241  	// If the AggregateSizes field is false, the rest of this struct is ignored,
  1242  	// no size aggregation is done, and (*Cluster).GetSchema will make exactly
  1243  	// one GetSchema RPC to a SERVING tablet in the keyspace.
  1244  	//
  1245  	// If the AggregateSizes field is true, (*Cluster).GetSchema will make a
  1246  	// FindAllShardsInKeyspace vtctld RPC, and then filter the given Tablets
  1247  	// (described above) to find one SERVING tablet for each shard in the
  1248  	// keyspace, skipping any non-serving shards in the keyspace.
  1249  	TableSizeOptions *vtadminpb.GetSchemaTableSizeOptions
  1250  
  1251  	isBackfill bool
  1252  }
  1253  
  1254  // GetSchema returns the schema for a given keyspace. GetSchema has a few
  1255  // different behaviors depending on the GetSchemaOptions provided, as follows:
  1256  //
  1257  // (1) If opts.SizeOpts.AggregateSizes is true, we will also make a call to
  1258  // FindAllShardsInKeyspace, in order to fan out GetSchema RPCs to a tablet in
  1259  // each shard. If this option is false, we make exactly one GetSchema request to
  1260  // a single, randomly-chosen, tablet in the keyspace.
  1261  //
  1262  // (1.1) If, in size aggregation mode, opts.SizeOpts.IncludeNonServingShards is
  1263  // false (the default), then we will filter out any shards for which
  1264  // IsPrimaryServing is false in the topo, and make GetSchema RPCs to one tablet
  1265  // in every _serving_ shard. Otherwise we will make a GetSchema RPC to one
  1266  // tablet in _every_ shard.
  1267  //
  1268  // (2) Irrespective of whether we're including nonserving shards, or whether
  1269  // we're doing size aggregation at all, we will only make GetSchema RPCs to
  1270  // tablets that are in SERVING state; we don't want to use a tablet that might
  1271  // be in a bad state as the source of truth for a schema. Therefore if we can't
  1272  // find a SERVING tablet for the keyspace (in non-aggregation mode) or for a
  1273  // shard in that keyspace (in aggregation mode), then we will return an error
  1274  // back to the caller.
  1275  func (c *Cluster) GetSchema(ctx context.Context, keyspace string, opts GetSchemaOptions) (*vtadminpb.Schema, error) {
  1276  	span, ctx := trace.NewSpan(ctx, "Cluster.GetSchema")
  1277  	defer span.Finish()
  1278  
  1279  	if opts.TableSizeOptions == nil {
  1280  		opts.TableSizeOptions = &vtadminpb.GetSchemaTableSizeOptions{
  1281  			AggregateSizes:          false,
  1282  			IncludeNonServingShards: false,
  1283  		}
  1284  	}
  1285  
  1286  	if opts.BaseRequest == nil {
  1287  		opts.BaseRequest = &vtctldatapb.GetSchemaRequest{}
  1288  	}
  1289  
  1290  	if opts.TableSizeOptions.AggregateSizes && opts.BaseRequest.TableNamesOnly {
  1291  		log.Warningf("GetSchema(cluster = %s) size aggregation is incompatible with TableNamesOnly, ignoring the latter in favor of aggregating sizes", c.ID)
  1292  		opts.BaseRequest.TableNamesOnly = false
  1293  	}
  1294  
  1295  	AnnotateSpan(c, span)
  1296  	span.Annotate("keyspace", keyspace)
  1297  	annotateGetSchemaRequest(opts.BaseRequest, span)
  1298  	vtadminproto.AnnotateSpanWithGetSchemaTableSizeOptions(opts.TableSizeOptions, span)
  1299  	span.Annotate("is_backfill", opts.isBackfill)
  1300  
  1301  	key := schemacache.Key{
  1302  		ClusterID:               c.ID,
  1303  		Keyspace:                keyspace,
  1304  		IncludeNonServingShards: opts.TableSizeOptions.IncludeNonServingShards,
  1305  	}
  1306  	if !(opts.isBackfill || cache.ShouldRefreshFromIncomingContext(ctx)) {
  1307  		schema, ok, err := schemacache.LoadOne(c.schemaCache, key, schemacache.LoadOptions{
  1308  			BaseRequest:    opts.BaseRequest,
  1309  			AggregateSizes: opts.TableSizeOptions.AggregateSizes,
  1310  		})
  1311  
  1312  		span.Annotate("cache_hit", ok)
  1313  		if ok {
  1314  			return schema, err
  1315  		}
  1316  	}
  1317  
  1318  	// Fetch all tablets for the keyspace.
  1319  	tablets, err := c.FindTablets(ctx, func(tablet *vtadminpb.Tablet) bool {
  1320  		return tablet.Tablet.Keyspace == keyspace
  1321  	}, -1)
  1322  	if err != nil {
  1323  		return nil, fmt.Errorf("%w for keyspace %s", errors.ErrNoTablet, keyspace)
  1324  	}
  1325  
  1326  	tabletsToQuery, err := c.getTabletsToQueryForSchemas(ctx, keyspace, tablets, opts)
  1327  	if err != nil {
  1328  		return nil, err
  1329  	}
  1330  
  1331  	schema, err := c.getSchemaFromTablets(ctx, keyspace, tabletsToQuery, opts)
  1332  	if err != nil {
  1333  		return nil, err
  1334  	}
  1335  
  1336  	go schemacache.AddOrBackfill(c.schemaCache, []*vtadminpb.Schema{schema}, key, cache.DefaultExpiration, schemacache.LoadOptions{
  1337  		BaseRequest:    opts.BaseRequest,
  1338  		AggregateSizes: opts.TableSizeOptions.AggregateSizes,
  1339  	})
  1340  
  1341  	return schema, nil
  1342  }
  1343  
  1344  // GetSchemas returns all of the schemas across all keyspaces in the cluster.
  1345  func (c *Cluster) GetSchemas(ctx context.Context, opts GetSchemaOptions) ([]*vtadminpb.Schema, error) {
  1346  	span, ctx := trace.NewSpan(ctx, "Cluster.GetSchemas")
  1347  	defer span.Finish()
  1348  
  1349  	if opts.TableSizeOptions == nil {
  1350  		opts.TableSizeOptions = &vtadminpb.GetSchemaTableSizeOptions{
  1351  			AggregateSizes:          false,
  1352  			IncludeNonServingShards: false,
  1353  		}
  1354  	}
  1355  
  1356  	if opts.BaseRequest == nil {
  1357  		opts.BaseRequest = &vtctldatapb.GetSchemaRequest{}
  1358  	}
  1359  
  1360  	if opts.TableSizeOptions.AggregateSizes && opts.BaseRequest.TableNamesOnly {
  1361  		log.Warningf("GetSchemas(cluster = %s) size aggregation is incompatible with TableNamesOnly, ignoring the latter in favor of aggregating sizes", c.ID)
  1362  		opts.BaseRequest.TableNamesOnly = false
  1363  	}
  1364  
  1365  	AnnotateSpan(c, span)
  1366  	annotateGetSchemaRequest(opts.BaseRequest, span)
  1367  	vtadminproto.AnnotateSpanWithGetSchemaTableSizeOptions(opts.TableSizeOptions, span)
  1368  	span.Annotate("is_backfill", opts.isBackfill)
  1369  
  1370  	key := schemacache.Key{
  1371  		ClusterID:               c.ID,
  1372  		Keyspace:                "",
  1373  		IncludeNonServingShards: opts.TableSizeOptions.IncludeNonServingShards,
  1374  	}
  1375  	if !(opts.isBackfill || cache.ShouldRefreshFromIncomingContext(ctx)) {
  1376  		schemas, ok, err := schemacache.LoadAll(c.schemaCache, key, schemacache.LoadOptions{
  1377  			BaseRequest:    opts.BaseRequest,
  1378  			AggregateSizes: opts.TableSizeOptions.AggregateSizes,
  1379  		})
  1380  
  1381  		span.Annotate("cache_hit", ok)
  1382  		if ok {
  1383  			return schemas, err
  1384  		}
  1385  	}
  1386  
  1387  	var (
  1388  		m   sync.Mutex
  1389  		wg  sync.WaitGroup
  1390  		rec concurrency.AllErrorRecorder
  1391  
  1392  		tablets   []*vtadminpb.Tablet
  1393  		keyspaces []*vtadminpb.Keyspace
  1394  
  1395  		schemas []*vtadminpb.Schema
  1396  	)
  1397  
  1398  	// Start by collecting the tablets and keyspace names concurrently.
  1399  	wg.Add(1)
  1400  	go func() {
  1401  		defer wg.Done()
  1402  
  1403  		var err error
  1404  		tablets, err = c.GetTablets(ctx)
  1405  		if err != nil {
  1406  			rec.RecordError(err)
  1407  			return
  1408  		}
  1409  	}()
  1410  
  1411  	wg.Add(1)
  1412  	go func() {
  1413  		defer wg.Done()
  1414  
  1415  		// TODO: (ajm188) we can't use c.GetKeyspaces because it also makes a
  1416  		// FindAllShardsInKeyspace call for each keyspace, which we may or may
  1417  		// not need. Refactor that method so we can get better code reuse.
  1418  		span, ctx := trace.NewSpan(ctx, "Cluster.GetKeyspaces")
  1419  		defer span.Finish()
  1420  
  1421  		if err := c.topoReadPool.Acquire(ctx); err != nil {
  1422  			rec.RecordError(fmt.Errorf("GetKeyspaces() failed to acquire topoReadPool: %w", err))
  1423  			return
  1424  		}
  1425  
  1426  		resp, err := c.Vtctld.GetKeyspaces(ctx, &vtctldatapb.GetKeyspacesRequest{})
  1427  		c.topoReadPool.Release()
  1428  
  1429  		if err != nil {
  1430  			rec.RecordError(err)
  1431  			return
  1432  		}
  1433  
  1434  		keyspaces = make([]*vtadminpb.Keyspace, len(resp.Keyspaces))
  1435  		for i, ks := range resp.Keyspaces {
  1436  			keyspaces[i] = &vtadminpb.Keyspace{
  1437  				Cluster:  c.ToProto(),
  1438  				Keyspace: ks,
  1439  			}
  1440  		}
  1441  	}()
  1442  
  1443  	wg.Wait()
  1444  	if rec.HasErrors() {
  1445  		return nil, rec.Error()
  1446  	}
  1447  
  1448  	// Now, fan out to collect the schemas.
  1449  	for _, ks := range keyspaces {
  1450  		wg.Add(1)
  1451  		go func(ctx context.Context, ks *vtadminpb.Keyspace) {
  1452  			defer wg.Done()
  1453  
  1454  			tablets, err := c.getTabletsToQueryForSchemas(ctx, ks.Keyspace.Name, tablets, opts)
  1455  			if err != nil {
  1456  				// Ignore keyspaces without any serving tablets.
  1457  				if stderrors.Is(err, errors.ErrNoServingTablet) {
  1458  					log.Infof(err.Error())
  1459  					return
  1460  				}
  1461  
  1462  				rec.RecordError(fmt.Errorf("opts %+v, err: %w", opts, err))
  1463  				return
  1464  			}
  1465  
  1466  			schema, err := c.getSchemaFromTablets(ctx, ks.Keyspace.Name, tablets, opts)
  1467  			if err != nil {
  1468  				rec.RecordError(err)
  1469  				return
  1470  			}
  1471  
  1472  			// Ignore keyspaces without schemas
  1473  			if schema == nil {
  1474  				log.Infof("No schemas for %s", ks.Keyspace.Name)
  1475  				return
  1476  			}
  1477  
  1478  			if len(schema.TableDefinitions) == 0 {
  1479  				log.Infof("No tables in schema for %s", ks.Keyspace.Name)
  1480  				return
  1481  			}
  1482  
  1483  			m.Lock()
  1484  			schemas = append(schemas, schema)
  1485  			m.Unlock()
  1486  		}(ctx, ks)
  1487  	}
  1488  
  1489  	wg.Wait()
  1490  	if rec.HasErrors() {
  1491  		return nil, rec.Error()
  1492  	}
  1493  
  1494  	go schemacache.AddOrBackfill(c.schemaCache, schemas, key, cache.DefaultExpiration, schemacache.LoadOptions{
  1495  		BaseRequest:    opts.BaseRequest,
  1496  		AggregateSizes: opts.TableSizeOptions.AggregateSizes,
  1497  	})
  1498  
  1499  	return schemas, nil
  1500  }
  1501  
  1502  // Note that for this function we use the tablets parameter, ignoring the
  1503  // opts.Tablets value completely.
  1504  func (c *Cluster) getSchemaFromTablets(ctx context.Context, keyspace string, tablets []*vtadminpb.Tablet, opts GetSchemaOptions) (*vtadminpb.Schema, error) {
  1505  	var (
  1506  		m      sync.Mutex
  1507  		wg     sync.WaitGroup
  1508  		rec    concurrency.AllErrorRecorder
  1509  		schema = &vtadminpb.Schema{
  1510  			Cluster:    c.ToProto(),
  1511  			Keyspace:   keyspace,
  1512  			TableSizes: map[string]*vtadminpb.Schema_TableSize{},
  1513  		}
  1514  		// Instead of starting at false, we start with whatever the base request
  1515  		// specified. If we have exactly one tablet to query (i.e. we're not
  1516  		// doing multi-shard aggregation), it's possible the request was to
  1517  		// literally just get the table sizes; we shouldn't assume. If we have
  1518  		// more than one tablet to query, then we are doing size aggregation,
  1519  		// and we'll flip this to true after spawning the first GetSchema rpc.
  1520  		sizesOnly = opts.BaseRequest.TableSizesOnly
  1521  	)
  1522  
  1523  	for _, tablet := range tablets {
  1524  		wg.Add(1)
  1525  
  1526  		go func(tablet *vtadminpb.Tablet, sizesOnly bool) {
  1527  			defer wg.Done()
  1528  
  1529  			span, ctx := trace.NewSpan(ctx, "Vtctld.GetSchema")
  1530  			defer span.Finish()
  1531  
  1532  			req := proto.Clone(opts.BaseRequest).(*vtctldatapb.GetSchemaRequest)
  1533  			req.TableSizesOnly = sizesOnly
  1534  			req.TabletAlias = tablet.Tablet.Alias
  1535  
  1536  			AnnotateSpan(c, span)
  1537  			annotateGetSchemaRequest(req, span)
  1538  			span.Annotate("keyspace", keyspace)
  1539  			span.Annotate("shard", tablet.Tablet.Shard)
  1540  
  1541  			if err := c.schemaReadPool.Acquire(ctx); err != nil {
  1542  				err = fmt.Errorf("GetSchema(cluster = %s, keyspace = %s, tablet = %s) failed to acquire schemaReadPool: %w", c.ID, keyspace, tablet.Tablet.Alias, err)
  1543  				rec.RecordError(err)
  1544  				return
  1545  			}
  1546  
  1547  			resp, err := c.Vtctld.GetSchema(ctx, req)
  1548  			c.schemaReadPool.Release()
  1549  
  1550  			if err != nil {
  1551  				err = fmt.Errorf("GetSchema(cluster = %s, keyspace = %s, tablet = %s) failed: %w", c.ID, keyspace, tablet.Tablet.Alias, err)
  1552  				rec.RecordError(err)
  1553  
  1554  				return
  1555  			}
  1556  
  1557  			if resp == nil || resp.Schema == nil {
  1558  				return
  1559  			}
  1560  
  1561  			m.Lock()
  1562  			defer m.Unlock()
  1563  
  1564  			if !sizesOnly {
  1565  				schema.TableDefinitions = resp.Schema.TableDefinitions
  1566  			}
  1567  
  1568  			if !opts.TableSizeOptions.AggregateSizes {
  1569  				return
  1570  			}
  1571  
  1572  			for _, td := range resp.Schema.TableDefinitions {
  1573  				tableSize, ok := schema.TableSizes[td.Name]
  1574  				if !ok {
  1575  					tableSize = &vtadminpb.Schema_TableSize{
  1576  						ByShard: map[string]*vtadminpb.Schema_ShardTableSize{},
  1577  					}
  1578  					schema.TableSizes[td.Name] = tableSize
  1579  				}
  1580  
  1581  				if _, ok = tableSize.ByShard[tablet.Tablet.Shard]; ok {
  1582  					err := fmt.Errorf("duplicate shard queries for table %s on shard %s/%s", td.Name, keyspace, tablet.Tablet.Shard)
  1583  					log.Warningf("Impossible: %s", err)
  1584  					rec.RecordError(err)
  1585  
  1586  					return
  1587  				}
  1588  
  1589  				tableSize.RowCount += td.RowCount
  1590  				tableSize.DataLength += td.DataLength
  1591  
  1592  				tableSize.ByShard[tablet.Tablet.Shard] = &vtadminpb.Schema_ShardTableSize{
  1593  					RowCount:   td.RowCount,
  1594  					DataLength: td.DataLength,
  1595  				}
  1596  			}
  1597  		}(tablet, sizesOnly)
  1598  
  1599  		// If we have more than one tablet to query, we definitely don't want to
  1600  		// get more than the sizes twice, so invariably set this to true for
  1601  		// subsequent iterations
  1602  		sizesOnly = true
  1603  	}
  1604  
  1605  	wg.Wait()
  1606  
  1607  	if rec.HasErrors() {
  1608  		return nil, rec.Error()
  1609  	}
  1610  
  1611  	return schema, nil
  1612  }
  1613  
  1614  func (c *Cluster) getTabletsToQueryForSchemas(ctx context.Context, keyspace string, tablets []*vtadminpb.Tablet, opts GetSchemaOptions) ([]*vtadminpb.Tablet, error) {
  1615  	if opts.TableSizeOptions.AggregateSizes {
  1616  		shards, err := c.FindAllShardsInKeyspace(ctx, keyspace, FindAllShardsInKeyspaceOptions{})
  1617  		if err != nil {
  1618  			return nil, err
  1619  		}
  1620  
  1621  		tabletsToQuery := make([]*vtadminpb.Tablet, 0, len(shards))
  1622  
  1623  		for _, shard := range shards {
  1624  			// In certain setups, empty but "serving" shards may required to
  1625  			// provide a contiguous keyspace so that certain keyspace-level
  1626  			// operations will work. In our case, we care about whether the
  1627  			// shard is truly serving, which we define as also having a known
  1628  			// primary (via PrimaryAlias) in addition to the IsPrimaryServing bit.
  1629  			if !shard.Shard.IsPrimaryServing || shard.Shard.PrimaryAlias == nil {
  1630  				if !opts.TableSizeOptions.IncludeNonServingShards {
  1631  					log.Infof("%s/%s is not serving; ignoring because IncludeNonServingShards = false", keyspace, shard.Name)
  1632  					continue
  1633  				}
  1634  			}
  1635  
  1636  			shardTablets := vtadminproto.FilterTablets(func(tablet *vtadminpb.Tablet) bool {
  1637  				return tablet.Tablet.Keyspace == keyspace && tablet.Tablet.Shard == shard.Name && tablet.State == vtadminpb.Tablet_SERVING
  1638  			}, tablets, len(tablets))
  1639  
  1640  			if len(shardTablets) == 0 {
  1641  				return nil, fmt.Errorf("%w for shard %s/%s", errors.ErrNoServingTablet, shard.Keyspace, shard.Name)
  1642  			}
  1643  
  1644  			randomServingTablet := shardTablets[rand.Intn(len(shardTablets))]
  1645  			tabletsToQuery = append(tabletsToQuery, randomServingTablet)
  1646  		}
  1647  
  1648  		return tabletsToQuery, nil
  1649  	}
  1650  
  1651  	keyspaceTablets := vtadminproto.FilterTablets(func(tablet *vtadminpb.Tablet) bool {
  1652  		return tablet.Tablet.Keyspace == keyspace && tablet.State == vtadminpb.Tablet_SERVING
  1653  	}, tablets, len(tablets))
  1654  
  1655  	if len(keyspaceTablets) == 0 {
  1656  		err := fmt.Errorf("%w for keyspace %s", errors.ErrNoServingTablet, keyspace)
  1657  		log.Warningf("%s. Searched tablets: %v", err, vtadminproto.Tablets(tablets).AliasStringList())
  1658  		return nil, err
  1659  	}
  1660  
  1661  	randomServingTablet := keyspaceTablets[rand.Intn(len(keyspaceTablets))]
  1662  	return []*vtadminpb.Tablet{randomServingTablet}, nil
  1663  }
  1664  
  1665  // GetShardReplicationPositions returns a ClusterShardReplicationPosition object
  1666  // for each keyspace/shard in the cluster.
  1667  func (c *Cluster) GetShardReplicationPositions(ctx context.Context, req *vtadminpb.GetShardReplicationPositionsRequest) ([]*vtadminpb.ClusterShardReplicationPosition, error) {
  1668  	span, ctx := trace.NewSpan(ctx, "Cluster.GetShardReplicationPositions")
  1669  	defer span.Finish()
  1670  
  1671  	AnnotateSpan(c, span)
  1672  
  1673  	shardsByKeyspace, err := c.getShardSets(ctx, req.Keyspaces, req.KeyspaceShards)
  1674  	if err != nil {
  1675  		return nil, err
  1676  	}
  1677  
  1678  	var (
  1679  		m            sync.Mutex
  1680  		wg           sync.WaitGroup
  1681  		rec          concurrency.AllErrorRecorder
  1682  		positions    []*vtadminpb.ClusterShardReplicationPosition
  1683  		clusterProto = c.ToProto()
  1684  	)
  1685  
  1686  	for ks, shardSet := range shardsByKeyspace {
  1687  		for _, shard := range sets.List(shardSet) {
  1688  			wg.Add(1)
  1689  
  1690  			go func(keyspace, shard string) {
  1691  				defer wg.Done()
  1692  
  1693  				span, ctx := trace.NewSpan(ctx, "Cluster.getShardReplicationPositionsForShard")
  1694  				defer span.Finish()
  1695  
  1696  				AnnotateSpan(c, span)
  1697  				span.Annotate("keyspace", keyspace)
  1698  				span.Annotate("shard", shard)
  1699  
  1700  				if err := c.topoReadPool.Acquire(ctx); err != nil {
  1701  					rec.RecordError(fmt.Errorf("ShardReplicationPositions(%s/%s) failed to acquire topoReadPool: %w", keyspace, shard, err))
  1702  					return
  1703  				}
  1704  
  1705  				resp, err := c.Vtctld.ShardReplicationPositions(ctx, &vtctldatapb.ShardReplicationPositionsRequest{
  1706  					Keyspace: keyspace,
  1707  					Shard:    shard,
  1708  				})
  1709  				c.topoReadPool.Release()
  1710  
  1711  				if err != nil {
  1712  					rec.RecordError(fmt.Errorf("ShardReplicationPositions(%s/%s): %w", keyspace, shard, err))
  1713  					return
  1714  				}
  1715  
  1716  				m.Lock()
  1717  				defer m.Unlock()
  1718  
  1719  				positions = append(positions, &vtadminpb.ClusterShardReplicationPosition{
  1720  					Cluster:      clusterProto,
  1721  					Keyspace:     keyspace,
  1722  					Shard:        shard,
  1723  					PositionInfo: resp,
  1724  				})
  1725  			}(ks, shard)
  1726  		}
  1727  	}
  1728  
  1729  	wg.Wait()
  1730  
  1731  	if rec.HasErrors() {
  1732  		return nil, rec.Error()
  1733  	}
  1734  
  1735  	return positions, nil
  1736  }
  1737  
  1738  // GetSrvVSchema returns the SrvVSchema for a given cell in the cluster.
  1739  func (c *Cluster) GetSrvVSchema(ctx context.Context, cell string) (*vtadminpb.SrvVSchema, error) {
  1740  	span, ctx := trace.NewSpan(ctx, "Cluster.GetVSchema")
  1741  	defer span.Finish()
  1742  
  1743  	AnnotateSpan(c, span)
  1744  	span.Annotate("cell", cell)
  1745  
  1746  	if err := c.topoReadPool.Acquire(ctx); err != nil {
  1747  		return nil, fmt.Errorf("GetSrvVSchema(%s) failed to acquire topoReadPool: %w", cell, err)
  1748  	}
  1749  	defer c.topoReadPool.Release()
  1750  
  1751  	sv, err := c.Vtctld.GetSrvVSchema(ctx, &vtctldatapb.GetSrvVSchemaRequest{
  1752  		Cell: cell,
  1753  	})
  1754  
  1755  	if err != nil {
  1756  		return nil, err
  1757  	}
  1758  
  1759  	return &vtadminpb.SrvVSchema{
  1760  		Cell:       cell,
  1761  		Cluster:    c.ToProto(),
  1762  		SrvVSchema: sv.SrvVSchema,
  1763  	}, nil
  1764  }
  1765  
  1766  // GetSrvVSchemas returns the SrvVSchema for all cells in the cluster,
  1767  // optionally filtered by cell.
  1768  func (c *Cluster) GetSrvVSchemas(ctx context.Context, cells []string) ([]*vtadminpb.SrvVSchema, error) {
  1769  	span, ctx := trace.NewSpan(ctx, "Cluster.GetVSchemas")
  1770  	defer span.Finish()
  1771  
  1772  	AnnotateSpan(c, span)
  1773  
  1774  	if err := c.topoReadPool.Acquire(ctx); err != nil {
  1775  		return nil, fmt.Errorf("GetSrvVSchema(cluster = %s, cells = %v) failed to acquire topoReadPool: %w", c.ID, cells, err)
  1776  	}
  1777  	resp, err := c.Vtctld.GetSrvVSchemas(ctx, &vtctldatapb.GetSrvVSchemasRequest{
  1778  		Cells: cells,
  1779  	})
  1780  	c.topoReadPool.Release()
  1781  
  1782  	if err != nil {
  1783  		return nil, err
  1784  	}
  1785  
  1786  	svs := make([]*vtadminpb.SrvVSchema, 0, len(resp.SrvVSchemas))
  1787  
  1788  	for cell, s := range resp.SrvVSchemas {
  1789  		svs = append(svs, &vtadminpb.SrvVSchema{
  1790  			Cell:       cell,
  1791  			Cluster:    c.ToProto(),
  1792  			SrvVSchema: s,
  1793  		})
  1794  	}
  1795  
  1796  	return svs, nil
  1797  }
  1798  
  1799  // GetVSchema returns the vschema for a given keyspace in this cluster. The
  1800  // caller is responsible for making at least one call to c.Vtctld.Dial prior to
  1801  // calling this function.
  1802  func (c *Cluster) GetVSchema(ctx context.Context, keyspace string) (*vtadminpb.VSchema, error) {
  1803  	span, ctx := trace.NewSpan(ctx, "Cluster.GetVSchema")
  1804  	defer span.Finish()
  1805  
  1806  	AnnotateSpan(c, span)
  1807  	span.Annotate("keyspace", keyspace)
  1808  
  1809  	if err := c.topoReadPool.Acquire(ctx); err != nil {
  1810  		return nil, fmt.Errorf("GetVSchema(%s) failed to acquire topoReadPool: %w", keyspace, err)
  1811  	}
  1812  	defer c.topoReadPool.Release()
  1813  
  1814  	vschema, err := c.Vtctld.GetVSchema(ctx, &vtctldatapb.GetVSchemaRequest{
  1815  		Keyspace: keyspace,
  1816  	})
  1817  
  1818  	if err != nil {
  1819  		return nil, err
  1820  	}
  1821  
  1822  	return &vtadminpb.VSchema{
  1823  		Cluster: c.ToProto(),
  1824  		Name:    keyspace,
  1825  		VSchema: vschema.VSchema,
  1826  	}, nil
  1827  }
  1828  
  1829  // GetVtctlds returns a list of all Vtctlds in the cluster.
  1830  func (c *Cluster) GetVtctlds(ctx context.Context) ([]*vtadminpb.Vtctld, error) {
  1831  	vtctlds, err := c.Discovery.DiscoverVtctlds(ctx, []string{})
  1832  	if err != nil {
  1833  		return nil, fmt.Errorf("DiscoverVtctlds(cluster = %s): %w", c.ID, err)
  1834  	}
  1835  
  1836  	// This overwrites any Cluster field populated by a particular discovery
  1837  	// implementation.
  1838  	cpb := c.ToProto()
  1839  
  1840  	for _, v := range vtctlds {
  1841  		v.Cluster = cpb
  1842  	}
  1843  
  1844  	return vtctlds, nil
  1845  }
  1846  
  1847  // GetWorkflowOptions is the set of filtering options for GetWorkflow requests.
  1848  type GetWorkflowOptions struct {
  1849  	ActiveOnly bool
  1850  }
  1851  
  1852  // GetWorkflow returns the single Workflow in this cluster for the given
  1853  // keyspace and workflow name. It returns an error if either no workflows or
  1854  // multiple workflows are found.
  1855  func (c *Cluster) GetWorkflow(ctx context.Context, keyspace string, name string, opts GetWorkflowOptions) (*vtadminpb.Workflow, error) {
  1856  	span, ctx := trace.NewSpan(ctx, "Cluster.GetWorkflow")
  1857  	defer span.Finish()
  1858  
  1859  	AnnotateSpan(c, span)
  1860  	span.Annotate("active_only", opts.ActiveOnly)
  1861  	span.Annotate("keyspace", keyspace)
  1862  	span.Annotate("workflow_name", name)
  1863  
  1864  	workflows, err := c.findWorkflows(ctx, []string{keyspace}, FindWorkflowsOptions{
  1865  		ActiveOnly: opts.ActiveOnly,
  1866  		Filter: func(workflow *vtadminpb.Workflow) bool {
  1867  			return workflow.Workflow.Name == name
  1868  		},
  1869  	})
  1870  	if err != nil {
  1871  		return nil, err
  1872  	}
  1873  
  1874  	switch len(workflows.Workflows) {
  1875  	case 0:
  1876  		msg := "%w for keyspace %s and name %s (active_only = %v)"
  1877  		if len(workflows.Warnings) > 0 {
  1878  			return nil, fmt.Errorf(msg+"; warnings: %v", errors.ErrNoWorkflow, keyspace, name, opts.ActiveOnly, workflows.Warnings)
  1879  		}
  1880  
  1881  		return nil, fmt.Errorf(msg, errors.ErrNoWorkflow, keyspace, name, opts.ActiveOnly)
  1882  	case 1:
  1883  		return workflows.Workflows[0], nil
  1884  	default:
  1885  		return nil, fmt.Errorf("%w: found %d workflows in keyspace %s with name %s (active_only = %v); this should be impossible", errors.ErrAmbiguousWorkflow, len(workflows.Workflows), keyspace, name, opts.ActiveOnly)
  1886  	}
  1887  }
  1888  
  1889  // GetWorkflowsOptions is the set of filtering options for GetWorkflows
  1890  // requests.
  1891  type GetWorkflowsOptions struct {
  1892  	ActiveOnly      bool
  1893  	IgnoreKeyspaces sets.Set[string]
  1894  }
  1895  
  1896  // GetWorkflows returns a list of Workflows in this cluster, across the given
  1897  // keyspaces and filtering according to the options passed in.
  1898  //
  1899  // If the list of keyspaces to check is empty, then GetWorkflows will use the
  1900  // result of GetKeyspaces to search all keyspaces in the cluster. In this case,
  1901  // opts.IgnoreKeyspaces is respected.
  1902  func (c *Cluster) GetWorkflows(ctx context.Context, keyspaces []string, opts GetWorkflowsOptions) (*vtadminpb.ClusterWorkflows, error) {
  1903  	span, ctx := trace.NewSpan(ctx, "Cluster.GetWorkflows")
  1904  	defer span.Finish()
  1905  
  1906  	AnnotateSpan(c, span)
  1907  	span.Annotate("active_only", opts.ActiveOnly)
  1908  
  1909  	return c.findWorkflows(ctx, keyspaces, FindWorkflowsOptions{
  1910  		ActiveOnly:      opts.ActiveOnly,
  1911  		IgnoreKeyspaces: opts.IgnoreKeyspaces,
  1912  		Filter:          func(_ *vtadminpb.Workflow) bool { return true },
  1913  	})
  1914  }
  1915  
  1916  // PlannedFailoverShard fails over the shard either to a new primary or away
  1917  // from an old primary. Both the current and candidate primaries must be
  1918  // reachable and running.
  1919  func (c *Cluster) PlannedFailoverShard(ctx context.Context, req *vtctldatapb.PlannedReparentShardRequest) (*vtadminpb.PlannedFailoverShardResponse, error) {
  1920  	span, ctx := trace.NewSpan(ctx, "Cluster.PlannedFailoverShard")
  1921  	defer span.Finish()
  1922  
  1923  	AnnotateSpan(c, span)
  1924  	span.Annotate("keyspace", req.Keyspace)
  1925  	span.Annotate("shard", req.Shard)
  1926  	span.Annotate("new_primary", topoproto.TabletAliasString(req.NewPrimary))
  1927  	span.Annotate("avoid_primary", topoproto.TabletAliasString(req.AvoidPrimary))
  1928  
  1929  	if d, ok, err := protoutil.DurationFromProto(req.WaitReplicasTimeout); ok && err == nil {
  1930  		span.Annotate("wait_replicas_timeout", d.String())
  1931  	}
  1932  
  1933  	if err := c.failoverPool.Acquire(ctx); err != nil {
  1934  		return nil, fmt.Errorf("PlannedFailoverShard(%s/%s): failed to acquire failoverPool: %w", req.Keyspace, req.Shard, err)
  1935  	}
  1936  	defer c.failoverPool.Release()
  1937  
  1938  	resp, err := c.Vtctld.PlannedReparentShard(ctx, req)
  1939  	if err != nil {
  1940  		return nil, err
  1941  	}
  1942  
  1943  	return &vtadminpb.PlannedFailoverShardResponse{
  1944  		Cluster:         c.ToProto(),
  1945  		Keyspace:        resp.Keyspace,
  1946  		Shard:           resp.Shard,
  1947  		PromotedPrimary: resp.PromotedPrimary,
  1948  		Events:          resp.Events,
  1949  	}, nil
  1950  }
  1951  
  1952  // RefreshState reloads the tablet record from a cluster's topo on a tablet.
  1953  func (c *Cluster) RefreshState(ctx context.Context, tablet *vtadminpb.Tablet) error {
  1954  	span, ctx := trace.NewSpan(ctx, "Cluster.RefreshState")
  1955  	defer span.Finish()
  1956  
  1957  	AnnotateSpan(c, span)
  1958  	span.Annotate("tablet_alias", topoproto.TabletAliasString(tablet.Tablet.Alias))
  1959  
  1960  	if err := c.topoReadPool.Acquire(ctx); err != nil {
  1961  		return fmt.Errorf("RefreshState(%v) failed to acquire topoReadPool: %w", topoproto.TabletAliasString(tablet.Tablet.Alias), err)
  1962  	}
  1963  	defer c.topoReadPool.Release()
  1964  
  1965  	_, err := c.Vtctld.RefreshState(ctx, &vtctldatapb.RefreshStateRequest{
  1966  		TabletAlias: tablet.Tablet.Alias,
  1967  	})
  1968  	return err
  1969  }
  1970  
  1971  // RefreshTabletReplicationSource performs a `CHANGE REPLICATION SOURCE TO` on
  1972  // a tablet to replicate from the current primary in the shard.
  1973  func (c *Cluster) RefreshTabletReplicationSource(ctx context.Context, tablet *vtadminpb.Tablet) (*vtadminpb.RefreshTabletReplicationSourceResponse, error) {
  1974  	span, ctx := trace.NewSpan(ctx, "Cluster.RefreshTabletReplicationSource")
  1975  	defer span.Finish()
  1976  
  1977  	AnnotateSpan(c, span)
  1978  	span.Annotate("tablet_alias", topoproto.TabletAliasString(tablet.Tablet.Alias))
  1979  
  1980  	if err := c.topoRWPool.Acquire(ctx); err != nil {
  1981  		return nil, fmt.Errorf("RefreshTabletReplicationSource(%v) failed to acquire topoRWPool: %w", topoproto.TabletAliasString(tablet.Tablet.Alias), err)
  1982  	}
  1983  	defer c.topoRWPool.Release()
  1984  
  1985  	resp, err := c.Vtctld.ReparentTablet(ctx, &vtctldatapb.ReparentTabletRequest{Tablet: tablet.Tablet.Alias})
  1986  	if err != nil {
  1987  		return nil, err
  1988  	}
  1989  
  1990  	return &vtadminpb.RefreshTabletReplicationSourceResponse{
  1991  		Keyspace: resp.Keyspace,
  1992  		Shard:    resp.Shard,
  1993  		Primary:  resp.Primary,
  1994  		Cluster:  c.ToProto(),
  1995  	}, nil
  1996  }
  1997  
  1998  // ReloadSchemas reloads schemas in one or more keyspaces, shards, or tablets
  1999  // in the cluster, depending on the request parameters.
  2000  func (c *Cluster) ReloadSchemas(ctx context.Context, req *vtadminpb.ReloadSchemasRequest) (*vtadminpb.ReloadSchemasResponse, error) {
  2001  	span, ctx := trace.NewSpan(ctx, "Cluster.ReloadSchemas")
  2002  	defer span.Finish()
  2003  
  2004  	AnnotateSpan(c, span)
  2005  
  2006  	var (
  2007  		resp vtadminpb.ReloadSchemasResponse
  2008  		err  error
  2009  	)
  2010  	switch {
  2011  	case len(req.Tablets) > 0:
  2012  		resp.TabletResults, err = c.reloadTabletSchemas(ctx, req)
  2013  	case len(req.KeyspaceShards) > 0:
  2014  		resp.ShardResults, err = c.reloadShardSchemas(ctx, req)
  2015  	default:
  2016  		resp.KeyspaceResults, err = c.reloadKeyspaceSchemas(ctx, req)
  2017  	}
  2018  
  2019  	if err != nil {
  2020  		return nil, err
  2021  	}
  2022  
  2023  	return &resp, nil
  2024  }
  2025  
  2026  // reloadKeyspaceSchemas reloads schemas in one or more keyspaces in the
  2027  // cluster.
  2028  func (c *Cluster) reloadKeyspaceSchemas(ctx context.Context, req *vtadminpb.ReloadSchemasRequest) ([]*vtadminpb.ReloadSchemasResponse_KeyspaceResult, error) {
  2029  	keyspaces, err := func() (keyspaces []*vtctldatapb.Keyspace, err error) {
  2030  		span, ctx := trace.NewSpan(ctx, "Cluster.GetKeyspaces")
  2031  		defer span.Finish()
  2032  
  2033  		if err := c.topoReadPool.Acquire(ctx); err != nil {
  2034  			return nil, fmt.Errorf("ReloadSchemas: failed to acquire topoReadPool: %w", err)
  2035  		}
  2036  
  2037  		// Load all keyspaces up front so we don't have to make one-trip per
  2038  		// keyspace to check its existence.
  2039  		resp, err := c.Vtctld.GetKeyspaces(ctx, &vtctldatapb.GetKeyspacesRequest{})
  2040  		if err != nil {
  2041  			return nil, err
  2042  		}
  2043  
  2044  		// The request specified no keyspace names, so default to all of them.
  2045  		if len(req.Keyspaces) == 0 {
  2046  			return resp.Keyspaces, nil
  2047  		}
  2048  
  2049  		keyspaceNames := sets.New[string](req.Keyspaces...)
  2050  
  2051  		for _, ks := range resp.Keyspaces {
  2052  			if keyspaceNames.Has(ks.Name) {
  2053  				keyspaces = append(keyspaces, ks)
  2054  			}
  2055  		}
  2056  
  2057  		return keyspaces, nil
  2058  	}()
  2059  	if err != nil {
  2060  		return nil, err
  2061  	}
  2062  
  2063  	var (
  2064  		m   sync.Mutex
  2065  		wg  sync.WaitGroup
  2066  		rec concurrency.AllErrorRecorder
  2067  
  2068  		cpb     = c.ToProto()
  2069  		results = make([]*vtadminpb.ReloadSchemasResponse_KeyspaceResult, 0, len(keyspaces))
  2070  	)
  2071  
  2072  	for _, ks := range keyspaces {
  2073  		wg.Add(1)
  2074  		go func(ks *vtctldatapb.Keyspace) {
  2075  			defer wg.Done()
  2076  
  2077  			span, ctx := trace.NewSpan(ctx, "Cluster.ReloadSchemaKeyspace")
  2078  			defer span.Finish()
  2079  
  2080  			AnnotateSpan(c, span)
  2081  			span.Annotate("keyspace", ks.Name)
  2082  			span.Annotate("concurrency", req.Concurrency)
  2083  			span.Annotate("include_primary", req.IncludePrimary)
  2084  			span.Annotate("wait_position", req.WaitPosition)
  2085  
  2086  			resp, err := c.Vtctld.ReloadSchemaKeyspace(ctx, &vtctldatapb.ReloadSchemaKeyspaceRequest{
  2087  				Keyspace:       ks.Name,
  2088  				Concurrency:    req.Concurrency,
  2089  				IncludePrimary: req.IncludePrimary,
  2090  				WaitPosition:   req.WaitPosition,
  2091  			})
  2092  			if err != nil {
  2093  				rec.RecordError(fmt.Errorf("ReloadSchemaKeyspace(%s) failed: %w", ks.Name, err))
  2094  				return
  2095  			}
  2096  
  2097  			m.Lock()
  2098  			defer m.Unlock()
  2099  			results = append(results, &vtadminpb.ReloadSchemasResponse_KeyspaceResult{
  2100  				Keyspace: &vtadminpb.Keyspace{
  2101  					Cluster:  cpb,
  2102  					Keyspace: ks,
  2103  				},
  2104  				Events: resp.Events,
  2105  			})
  2106  		}(ks)
  2107  	}
  2108  
  2109  	wg.Wait()
  2110  	if rec.HasErrors() {
  2111  		return nil, rec.Error()
  2112  	}
  2113  
  2114  	return results, nil
  2115  }
  2116  
  2117  // reloadShardSchemas reloads schemas in one or more shards in the cluster.
  2118  func (c *Cluster) reloadShardSchemas(ctx context.Context, req *vtadminpb.ReloadSchemasRequest) ([]*vtadminpb.ReloadSchemasResponse_ShardResult, error) {
  2119  	shardSets, err := c.getShardSets(ctx, nil, req.KeyspaceShards)
  2120  	if err != nil {
  2121  		return nil, err
  2122  	}
  2123  
  2124  	var (
  2125  		m   sync.Mutex
  2126  		wg  sync.WaitGroup
  2127  		rec concurrency.AllErrorRecorder
  2128  
  2129  		cpb     = c.ToProto()
  2130  		results = make([]*vtadminpb.ReloadSchemasResponse_ShardResult, 0, len(shardSets))
  2131  	)
  2132  
  2133  	for ks, shards := range shardSets {
  2134  		for _, shard := range shards.UnsortedList() {
  2135  			wg.Add(1)
  2136  			go func(keyspace, shard string) {
  2137  				defer wg.Done()
  2138  
  2139  				span, ctx := trace.NewSpan(ctx, "Cluster.reloadShardSchema")
  2140  				defer span.Finish()
  2141  
  2142  				AnnotateSpan(c, span)
  2143  				span.Annotate("keyspace", keyspace)
  2144  				span.Annotate("shard", shard)
  2145  				span.Annotate("concurrency", req.Concurrency)
  2146  				span.Annotate("include_primary", req.IncludePrimary)
  2147  				span.Annotate("wait_position", req.WaitPosition)
  2148  
  2149  				resp, err := c.Vtctld.ReloadSchemaShard(ctx, &vtctldatapb.ReloadSchemaShardRequest{
  2150  					Keyspace:       keyspace,
  2151  					Shard:          shard,
  2152  					Concurrency:    req.Concurrency,
  2153  					IncludePrimary: req.IncludePrimary,
  2154  					WaitPosition:   req.WaitPosition,
  2155  				})
  2156  				if err != nil {
  2157  					rec.RecordError(fmt.Errorf("ReloadSchemaShard(%s/%s) failed: %w", keyspace, shard, err))
  2158  					return
  2159  				}
  2160  
  2161  				m.Lock()
  2162  				defer m.Unlock()
  2163  				results = append(results, &vtadminpb.ReloadSchemasResponse_ShardResult{
  2164  					Shard: &vtadminpb.Shard{
  2165  						Cluster: cpb,
  2166  						Shard: &vtctldatapb.Shard{
  2167  							Keyspace: keyspace,
  2168  							Name:     shard,
  2169  						},
  2170  					},
  2171  					Events: resp.Events,
  2172  				})
  2173  			}(ks, shard)
  2174  		}
  2175  	}
  2176  
  2177  	wg.Wait()
  2178  	if rec.HasErrors() {
  2179  		return nil, rec.Error()
  2180  	}
  2181  
  2182  	return results, nil
  2183  }
  2184  
  2185  // reloadTabletSchemas reloads schemas in one or more tablets in the cluster.
  2186  func (c *Cluster) reloadTabletSchemas(ctx context.Context, req *vtadminpb.ReloadSchemasRequest) ([]*vtadminpb.ReloadSchemasResponse_TabletResult, error) {
  2187  	aliasSet := sets.New[string]()
  2188  	for _, alias := range req.Tablets {
  2189  		aliasSet.Insert(topoproto.TabletAliasString(alias))
  2190  	}
  2191  
  2192  	tablets, err := c.FindTablets(ctx, func(t *vtadminpb.Tablet) bool {
  2193  		return aliasSet.Has(topoproto.TabletAliasString(t.Tablet.Alias))
  2194  	}, -1)
  2195  	if err != nil {
  2196  		return nil, err
  2197  	}
  2198  
  2199  	var (
  2200  		m           sync.Mutex
  2201  		wg          sync.WaitGroup
  2202  		ch          = make(chan *vtadminpb.Tablet)
  2203  		concurrency = int(req.Concurrency)
  2204  
  2205  		results = make([]*vtadminpb.ReloadSchemasResponse_TabletResult, 0, len(tablets))
  2206  	)
  2207  
  2208  	if concurrency < 1 {
  2209  		concurrency = len(tablets)
  2210  	}
  2211  
  2212  	reloadTablet := func(t *vtadminpb.Tablet) *vtadminpb.ReloadSchemasResponse_TabletResult {
  2213  		span, ctx := trace.NewSpan(ctx, "Cluster.reloadTabletSchema")
  2214  		defer span.Finish()
  2215  
  2216  		AnnotateSpan(c, span)
  2217  		span.Annotate("tablet_alias", topoproto.TabletAliasString(t.Tablet.Alias))
  2218  
  2219  		result := &vtadminpb.ReloadSchemasResponse_TabletResult{
  2220  			Tablet: t,
  2221  			Result: "ok",
  2222  		}
  2223  		_, err := c.Vtctld.ReloadSchema(ctx, &vtctldatapb.ReloadSchemaRequest{
  2224  			TabletAlias: t.Tablet.Alias,
  2225  		})
  2226  		if err != nil {
  2227  			result.Result = err.Error()
  2228  		}
  2229  
  2230  		return result
  2231  	}
  2232  
  2233  	wg.Add(concurrency)
  2234  	for i := 0; i < concurrency; i++ {
  2235  		go func() {
  2236  			defer wg.Done()
  2237  			for tablet := range ch {
  2238  				result := reloadTablet(tablet)
  2239  
  2240  				m.Lock()
  2241  				results = append(results, result)
  2242  				m.Unlock()
  2243  			}
  2244  		}()
  2245  	}
  2246  
  2247  	for _, t := range tablets {
  2248  		ch <- t
  2249  	}
  2250  
  2251  	close(ch)
  2252  	wg.Wait()
  2253  
  2254  	return results, nil
  2255  }
  2256  
  2257  // SetWritable toggles the writability of a tablet, setting it to either
  2258  // read-write or read-only.
  2259  func (c *Cluster) SetWritable(ctx context.Context, req *vtctldatapb.SetWritableRequest) error {
  2260  	span, ctx := trace.NewSpan(ctx, "Cluster.SetWritable")
  2261  	defer span.Finish()
  2262  
  2263  	AnnotateSpan(c, span)
  2264  	span.Annotate("tablet_alias", topoproto.TabletAliasString(req.TabletAlias))
  2265  	span.Annotate("writable", req.Writable)
  2266  
  2267  	_, err := c.Vtctld.SetWritable(ctx, req)
  2268  	return err
  2269  }
  2270  
  2271  // TabletExternallyPromoted updates the topo record for a shard to reflect a
  2272  // tablet that was promoted to primary external to Vitess (e.g. orchestrator).
  2273  func (c *Cluster) TabletExternallyPromoted(ctx context.Context, tablet *vtadminpb.Tablet) (*vtadminpb.TabletExternallyPromotedResponse, error) {
  2274  	span, ctx := trace.NewSpan(ctx, "API.TabletExternallyPromoted")
  2275  	defer span.Finish()
  2276  
  2277  	AnnotateSpan(c, span)
  2278  	span.Annotate("tablet_alias", topoproto.TabletAliasString(tablet.Tablet.Alias))
  2279  
  2280  	if err := c.topoRWPool.Acquire(ctx); err != nil {
  2281  		return nil, fmt.Errorf("TabletExternallyPromoted(%s): failed to acquire topoRWPool: %w", topoproto.TabletAliasString(tablet.Tablet.Alias), err)
  2282  	}
  2283  	defer c.topoRWPool.Release()
  2284  
  2285  	resp, err := c.Vtctld.TabletExternallyReparented(ctx, &vtctldatapb.TabletExternallyReparentedRequest{
  2286  		Tablet: tablet.Tablet.Alias,
  2287  	})
  2288  	if err != nil {
  2289  		return nil, err
  2290  	}
  2291  
  2292  	return &vtadminpb.TabletExternallyPromotedResponse{
  2293  		Cluster:    c.ToProto(),
  2294  		Keyspace:   resp.Keyspace,
  2295  		Shard:      resp.Shard,
  2296  		NewPrimary: resp.NewPrimary,
  2297  		OldPrimary: resp.OldPrimary,
  2298  	}, nil
  2299  }
  2300  
  2301  // ToggleTabletReplication either starts or stops replication on the specified
  2302  // tablet.
  2303  func (c *Cluster) ToggleTabletReplication(ctx context.Context, tablet *vtadminpb.Tablet, start bool) (err error) {
  2304  	span, ctx := trace.NewSpan(ctx, "Cluster.ToggleTabletReplication")
  2305  	defer span.Finish()
  2306  
  2307  	AnnotateSpan(c, span)
  2308  	span.Annotate("tablet_alias", topoproto.TabletAliasString(tablet.Tablet.Alias))
  2309  	span.Annotate("start", start)
  2310  	span.Annotate("stop", !start)
  2311  
  2312  	if start {
  2313  		_, err = c.Vtctld.StartReplication(ctx, &vtctldatapb.StartReplicationRequest{
  2314  			TabletAlias: tablet.Tablet.Alias,
  2315  		})
  2316  	} else {
  2317  		_, err = c.Vtctld.StopReplication(ctx, &vtctldatapb.StopReplicationRequest{
  2318  			TabletAlias: tablet.Tablet.Alias,
  2319  		})
  2320  	}
  2321  
  2322  	return err
  2323  }
  2324  
  2325  // Debug returns a map of debug information for a cluster.
  2326  func (c *Cluster) Debug() map[string]any {
  2327  	m := map[string]any{
  2328  		"cluster": c.ToProto(),
  2329  		"config":  c.cfg,
  2330  		"pools": map[string]json.RawMessage{
  2331  			"backup_read_pool":        json.RawMessage(c.backupReadPool.StatsJSON()),
  2332  			"schema_read_pool":        json.RawMessage(c.schemaReadPool.StatsJSON()),
  2333  			"topo_read_pool":          json.RawMessage(c.topoReadPool.StatsJSON()),
  2334  			"topo_rw_pool":            json.RawMessage(c.topoRWPool.StatsJSON()),
  2335  			"workflow_read_pool":      json.RawMessage(c.workflowReadPool.StatsJSON()),
  2336  			"emergency_failover_pool": json.RawMessage(c.emergencyFailoverPool.StatsJSON()),
  2337  			"failover_pool":           json.RawMessage(c.failoverPool.StatsJSON()),
  2338  		},
  2339  		"caches": map[string]any{
  2340  			"schemas": c.schemaCache.Debug(),
  2341  		},
  2342  	}
  2343  
  2344  	if vtsql, ok := c.DB.(debug.Debuggable); ok {
  2345  		m["vtsql"] = vtsql.Debug()
  2346  	}
  2347  
  2348  	if vtctld, ok := c.Vtctld.(debug.Debuggable); ok {
  2349  		m["vtctld"] = vtctld.Debug()
  2350  	}
  2351  
  2352  	return m
  2353  }
  2354  
  2355  // Equal compares the vtctld and vtgate addresses of the clusters for equality
  2356  func (c *Cluster) Equal(otherCluster *Cluster) (bool, error) {
  2357  	ctx := context.Background()
  2358  	vtgateAddresses, err := c.Discovery.DiscoverVTGateAddrs(ctx, []string{})
  2359  	if err != nil {
  2360  		return false, err
  2361  	}
  2362  	otherVtgateAddresses, err := otherCluster.Discovery.DiscoverVTGateAddrs(ctx, []string{})
  2363  	if err != nil {
  2364  		return false, err
  2365  	}
  2366  
  2367  	vtctldAddresses, err := c.Discovery.DiscoverVtctldAddrs(ctx, []string{})
  2368  	if err != nil {
  2369  		return false, err
  2370  	}
  2371  
  2372  	otherVtctldAddresses, err := otherCluster.Discovery.DiscoverVtctldAddrs(ctx, []string{})
  2373  	if err != nil {
  2374  		return false, err
  2375  	}
  2376  
  2377  	return equalAddresses(vtgateAddresses, otherVtgateAddresses) && equalAddresses(vtctldAddresses, otherVtctldAddresses), nil
  2378  }
  2379  
  2380  func equalAddresses(list1 []string, list2 []string) bool {
  2381  	if len(list1) != len(list2) {
  2382  		return false
  2383  	}
  2384  
  2385  	sort.Strings(list1)
  2386  	sort.Strings(list2)
  2387  	for i, e1 := range list1 {
  2388  		e2 := list2[i]
  2389  		if e1 != e2 {
  2390  			return false
  2391  		}
  2392  	}
  2393  
  2394  	return true
  2395  }