vitess.io/vitess@v0.16.2/go/vt/topo/shard.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package topo
    18  
    19  import (
    20  	"context"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"path"
    24  	"reflect"
    25  	"sort"
    26  	"strings"
    27  	"sync"
    28  	"time"
    29  
    30  	"google.golang.org/protobuf/proto"
    31  
    32  	"vitess.io/vitess/go/vt/logutil"
    33  	"vitess.io/vitess/go/vt/proto/vtrpc"
    34  	"vitess.io/vitess/go/vt/vterrors"
    35  
    36  	"vitess.io/vitess/go/event"
    37  	"vitess.io/vitess/go/trace"
    38  	"vitess.io/vitess/go/vt/concurrency"
    39  	"vitess.io/vitess/go/vt/key"
    40  	"vitess.io/vitess/go/vt/log"
    41  	"vitess.io/vitess/go/vt/topo/events"
    42  	"vitess.io/vitess/go/vt/topo/topoproto"
    43  
    44  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    45  )
    46  
    47  const (
    48  	dlTablesAlreadyPresent = "one or more tables are already present in the denylist"
    49  	dlTablesNotPresent     = "cannot remove tables since one or more do not exist in the denylist"
    50  	dlNoCellsForPrimary    = "you cannot specify cells for a primary's tablet control"
    51  )
    52  
    53  // Functions for dealing with shard representations in topology.
    54  
    55  // addCells will merge both cells list, settling on nil if either list is empty
    56  func addCells(left, right []string) []string {
    57  	if len(left) == 0 || len(right) == 0 {
    58  		return nil
    59  	}
    60  
    61  	for _, cell := range right {
    62  		if !InCellList(cell, left) {
    63  			left = append(left, cell)
    64  		}
    65  	}
    66  	return left
    67  }
    68  
    69  // removeCellsFromList will remove the cells from the provided list. It returns
    70  // the new list, and a boolean that indicates the returned list is empty.
    71  func removeCellsFromList(toRemove, fullList []string) []string {
    72  	leftoverCells := make([]string, 0)
    73  	for _, cell := range fullList {
    74  		if !InCellList(cell, toRemove) {
    75  			leftoverCells = append(leftoverCells, cell)
    76  		}
    77  	}
    78  	return leftoverCells
    79  }
    80  
    81  // removeCells will remove the cells from the provided list. It returns
    82  // the new list, and a boolean that indicates the returned list is empty.
    83  func removeCells(cells, toRemove, fullList []string) ([]string, bool) {
    84  	// The assumption here is we already migrated something,
    85  	// and we're reverting that part. So we're gonna remove
    86  	// records only.
    87  	leftoverCells := make([]string, 0, len(cells))
    88  	if len(cells) == 0 {
    89  		// we migrated all the cells already, take the full list
    90  		// and remove all the ones we're not reverting
    91  		for _, cell := range fullList {
    92  			if !InCellList(cell, toRemove) {
    93  				leftoverCells = append(leftoverCells, cell)
    94  			}
    95  		}
    96  	} else {
    97  		// we migrated a subset of the cells,
    98  		// remove the ones we're reverting
    99  		for _, cell := range cells {
   100  			if !InCellList(cell, toRemove) {
   101  				leftoverCells = append(leftoverCells, cell)
   102  			}
   103  		}
   104  	}
   105  
   106  	if len(leftoverCells) == 0 {
   107  		// we don't have any cell left, we need to clear this record
   108  		return nil, true
   109  	}
   110  
   111  	return leftoverCells, false
   112  }
   113  
   114  // IsShardUsingRangeBasedSharding returns true if the shard name
   115  // implies it is using range based sharding.
   116  func IsShardUsingRangeBasedSharding(shard string) bool {
   117  	return strings.Contains(shard, "-")
   118  }
   119  
   120  // ValidateShardName takes a shard name and sanitizes it, and also returns
   121  // the KeyRange.
   122  func ValidateShardName(shard string) (string, *topodatapb.KeyRange, error) {
   123  	if strings.Contains(shard, "/") {
   124  		return "", nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "invalid shardId, may not contain '/': %v", shard)
   125  	}
   126  
   127  	if !IsShardUsingRangeBasedSharding(shard) {
   128  		return shard, nil, nil
   129  	}
   130  
   131  	parts := strings.Split(shard, "-")
   132  	if len(parts) != 2 {
   133  		return "", nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "invalid shardId, can only contain one '-': %v", shard)
   134  	}
   135  
   136  	keyRange, err := key.ParseKeyRangeParts(parts[0], parts[1])
   137  	if err != nil {
   138  		return "", nil, err
   139  	}
   140  
   141  	if len(keyRange.End) > 0 && string(keyRange.Start) >= string(keyRange.End) {
   142  		return "", nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "out of order keys: %v is not strictly smaller than %v", hex.EncodeToString(keyRange.Start), hex.EncodeToString(keyRange.End))
   143  	}
   144  
   145  	return strings.ToLower(shard), keyRange, nil
   146  }
   147  
   148  // ShardInfo is a meta struct that contains metadata to give the data
   149  // more context and convenience. This is the main way we interact with a shard.
   150  type ShardInfo struct {
   151  	keyspace  string
   152  	shardName string
   153  	version   Version
   154  	*topodatapb.Shard
   155  }
   156  
   157  // NewShardInfo returns a ShardInfo basing on shard with the
   158  // keyspace / shard. This function should be only used by Server
   159  // implementations.
   160  func NewShardInfo(keyspace, shard string, value *topodatapb.Shard, version Version) *ShardInfo {
   161  	return &ShardInfo{
   162  		keyspace:  keyspace,
   163  		shardName: shard,
   164  		version:   version,
   165  		Shard:     value,
   166  	}
   167  }
   168  
   169  // Keyspace returns the keyspace a shard belongs to.
   170  func (si *ShardInfo) Keyspace() string {
   171  	return si.keyspace
   172  }
   173  
   174  // ShardName returns the shard name for a shard.
   175  func (si *ShardInfo) ShardName() string {
   176  	return si.shardName
   177  }
   178  
   179  // Version returns the shard version from last time it was read or updated.
   180  func (si *ShardInfo) Version() Version {
   181  	return si.version
   182  }
   183  
   184  // HasPrimary returns true if the Shard has an assigned primary.
   185  func (si *ShardInfo) HasPrimary() bool {
   186  	return !topoproto.TabletAliasIsZero(si.Shard.PrimaryAlias)
   187  }
   188  
   189  // GetPrimaryTermStartTime returns the shard's primary term start time as a Time value.
   190  func (si *ShardInfo) GetPrimaryTermStartTime() time.Time {
   191  	return logutil.ProtoToTime(si.Shard.PrimaryTermStartTime)
   192  }
   193  
   194  // SetPrimaryTermStartTime sets the shard's primary term start time as a Time value.
   195  func (si *ShardInfo) SetPrimaryTermStartTime(t time.Time) {
   196  	si.Shard.PrimaryTermStartTime = logutil.TimeToProto(t)
   197  }
   198  
   199  // GetShard is a high level function to read shard data.
   200  // It generates trace spans.
   201  func (ts *Server) GetShard(ctx context.Context, keyspace, shard string) (*ShardInfo, error) {
   202  	span, ctx := trace.NewSpan(ctx, "TopoServer.GetShard")
   203  	span.Annotate("keyspace", keyspace)
   204  	span.Annotate("shard", shard)
   205  	defer span.Finish()
   206  
   207  	shardPath := shardFilePath(keyspace, shard)
   208  
   209  	data, version, err := ts.globalCell.Get(ctx, shardPath)
   210  
   211  	if err != nil {
   212  		return nil, err
   213  	}
   214  
   215  	value := &topodatapb.Shard{}
   216  	if err = proto.Unmarshal(data, value); err != nil {
   217  		return nil, vterrors.Wrapf(err, "GetShard(%v,%v): bad shard data", keyspace, shard)
   218  	}
   219  	return &ShardInfo{
   220  		keyspace:  keyspace,
   221  		shardName: shard,
   222  		version:   version,
   223  		Shard:     value,
   224  	}, nil
   225  }
   226  
   227  // updateShard updates the shard data, with the right version.
   228  // It also creates a span, and dispatches the event.
   229  func (ts *Server) updateShard(ctx context.Context, si *ShardInfo) error {
   230  	span, ctx := trace.NewSpan(ctx, "TopoServer.UpdateShard")
   231  	span.Annotate("keyspace", si.keyspace)
   232  	span.Annotate("shard", si.shardName)
   233  	defer span.Finish()
   234  
   235  	data, err := proto.Marshal(si.Shard)
   236  	if err != nil {
   237  		return err
   238  	}
   239  	shardPath := shardFilePath(si.keyspace, si.shardName)
   240  	newVersion, err := ts.globalCell.Update(ctx, shardPath, data, si.version)
   241  	if err != nil {
   242  		return err
   243  	}
   244  	si.version = newVersion
   245  
   246  	event.Dispatch(&events.ShardChange{
   247  		KeyspaceName: si.Keyspace(),
   248  		ShardName:    si.ShardName(),
   249  		Shard:        si.Shard,
   250  		Status:       "updated",
   251  	})
   252  	return nil
   253  }
   254  
   255  // UpdateShardFields is a high level helper to read a shard record, call an
   256  // update function on it, and then write it back. If the write fails due to
   257  // a version mismatch, it will re-read the record and retry the update.
   258  // If the update succeeds, it returns the updated ShardInfo.
   259  // If the update method returns ErrNoUpdateNeeded, nothing is written,
   260  // and nil,nil is returned.
   261  //
   262  // Note the callback method takes a ShardInfo, so it can get the
   263  // keyspace and shard from it, or use all the ShardInfo methods.
   264  func (ts *Server) UpdateShardFields(ctx context.Context, keyspace, shard string, update func(*ShardInfo) error) (*ShardInfo, error) {
   265  	for {
   266  		si, err := ts.GetShard(ctx, keyspace, shard)
   267  		if err != nil {
   268  			return nil, err
   269  		}
   270  		if err = update(si); err != nil {
   271  			if IsErrType(err, NoUpdateNeeded) {
   272  				return nil, nil
   273  			}
   274  			return nil, err
   275  		}
   276  		if err = ts.updateShard(ctx, si); !IsErrType(err, BadVersion) {
   277  			return si, err
   278  		}
   279  	}
   280  }
   281  
   282  // CreateShard creates a new shard and tries to fill in the right information.
   283  // This will lock the Keyspace, as we may be looking at other shard servedTypes.
   284  // Using GetOrCreateShard is probably a better idea for most use cases.
   285  func (ts *Server) CreateShard(ctx context.Context, keyspace, shard string) (err error) {
   286  	// Lock the keyspace, because we'll be looking at ServedTypes.
   287  	ctx, unlock, lockErr := ts.LockKeyspace(ctx, keyspace, "CreateShard")
   288  	if lockErr != nil {
   289  		return lockErr
   290  	}
   291  	defer unlock(&err)
   292  
   293  	// validate parameters
   294  	_, keyRange, err := ValidateShardName(shard)
   295  	if err != nil {
   296  		return err
   297  	}
   298  
   299  	value := &topodatapb.Shard{
   300  		KeyRange: keyRange,
   301  	}
   302  
   303  	// Set primary as serving only if its keyrange doesn't overlap
   304  	// with other shards. This applies to unsharded keyspaces also
   305  	value.IsPrimaryServing = true
   306  	sis, err := ts.FindAllShardsInKeyspace(ctx, keyspace)
   307  	if err != nil && !IsErrType(err, NoNode) {
   308  		return err
   309  	}
   310  	for _, si := range sis {
   311  		if si.KeyRange == nil || key.KeyRangesIntersect(si.KeyRange, keyRange) {
   312  			value.IsPrimaryServing = false
   313  			break
   314  		}
   315  	}
   316  
   317  	// Marshal and save.
   318  	data, err := proto.Marshal(value)
   319  	if err != nil {
   320  		return err
   321  	}
   322  	shardPath := shardFilePath(keyspace, shard)
   323  	if _, err := ts.globalCell.Create(ctx, shardPath, data); err != nil {
   324  		// Return error as is, we need to propagate
   325  		// ErrNodeExists for instance.
   326  		return err
   327  	}
   328  
   329  	event.Dispatch(&events.ShardChange{
   330  		KeyspaceName: keyspace,
   331  		ShardName:    shard,
   332  		Shard:        value,
   333  		Status:       "created",
   334  	})
   335  	return nil
   336  }
   337  
   338  // GetOrCreateShard will return the shard object, or create one if it doesn't
   339  // already exist. Note the shard creation is protected by a keyspace Lock.
   340  func (ts *Server) GetOrCreateShard(ctx context.Context, keyspace, shard string) (si *ShardInfo, err error) {
   341  	si, err = ts.GetShard(ctx, keyspace, shard)
   342  	if !IsErrType(err, NoNode) {
   343  		return
   344  	}
   345  
   346  	// create the keyspace, maybe it already exists
   347  	if err = ts.CreateKeyspace(ctx, keyspace, &topodatapb.Keyspace{}); err != nil && !IsErrType(err, NodeExists) {
   348  		return nil, vterrors.Wrapf(err, "CreateKeyspace(%v) failed", keyspace)
   349  	}
   350  
   351  	// make sure a valid vschema has been loaded
   352  	if err = ts.EnsureVSchema(ctx, keyspace); err != nil {
   353  		return nil, vterrors.Wrapf(err, "EnsureVSchema(%v) failed", keyspace)
   354  	}
   355  
   356  	// now try to create with the lock, may already exist
   357  	if err = ts.CreateShard(ctx, keyspace, shard); err != nil && !IsErrType(err, NodeExists) {
   358  		return nil, vterrors.Wrapf(err, "CreateShard(%v/%v) failed", keyspace, shard)
   359  	}
   360  
   361  	// try to read the shard again, maybe someone created it
   362  	// in between the original GetShard and the LockKeyspace
   363  	return ts.GetShard(ctx, keyspace, shard)
   364  }
   365  
   366  // DeleteShard wraps the underlying conn.Delete
   367  // and dispatches the event.
   368  func (ts *Server) DeleteShard(ctx context.Context, keyspace, shard string) error {
   369  	shardPath := shardFilePath(keyspace, shard)
   370  	if err := ts.globalCell.Delete(ctx, shardPath, nil); err != nil {
   371  		return err
   372  	}
   373  	event.Dispatch(&events.ShardChange{
   374  		KeyspaceName: keyspace,
   375  		ShardName:    shard,
   376  		Shard:        nil,
   377  		Status:       "deleted",
   378  	})
   379  	return nil
   380  }
   381  
   382  // GetTabletControl returns the Shard_TabletControl for the given tablet type,
   383  // or nil if it is not in the map.
   384  func (si *ShardInfo) GetTabletControl(tabletType topodatapb.TabletType) *topodatapb.Shard_TabletControl {
   385  	for _, tc := range si.TabletControls {
   386  		if tc.TabletType == tabletType {
   387  			return tc
   388  		}
   389  	}
   390  	return nil
   391  }
   392  
   393  // UpdateSourceDeniedTables will add or remove the listed tables
   394  // in the shard record's TabletControl structures. Note we don't
   395  // support a lot of the corner cases:
   396  //   - only support one table list per shard. If we encounter a different
   397  //     table list that the provided one, we error out.
   398  //   - we don't support DisableQueryService at the same time as DeniedTables,
   399  //     because it's not used in the same context (vertical vs horizontal sharding)
   400  //
   401  // This function should be called while holding the keyspace lock.
   402  func (si *ShardInfo) UpdateSourceDeniedTables(ctx context.Context, tabletType topodatapb.TabletType, cells []string, remove bool, tables []string) error {
   403  	if err := CheckKeyspaceLocked(ctx, si.keyspace); err != nil {
   404  		return err
   405  	}
   406  	if tabletType == topodatapb.TabletType_PRIMARY && len(cells) > 0 {
   407  		return fmt.Errorf(dlNoCellsForPrimary)
   408  	}
   409  	tc := si.GetTabletControl(tabletType)
   410  	if tc == nil {
   411  
   412  		// handle the case where the TabletControl object is new
   413  		if remove {
   414  			// we try to remove from something that doesn't exist,
   415  			// log, but we're done.
   416  			log.Warningf("Trying to remove TabletControl.DeniedTables for missing type %v in shard %v/%v", tabletType, si.keyspace, si.shardName)
   417  			return nil
   418  		}
   419  
   420  		// trying to add more constraints with no existing record
   421  		si.TabletControls = append(si.TabletControls, &topodatapb.Shard_TabletControl{
   422  			TabletType:   tabletType,
   423  			Cells:        cells,
   424  			DeniedTables: tables,
   425  		})
   426  		return nil
   427  	}
   428  
   429  	if tabletType == topodatapb.TabletType_PRIMARY {
   430  		if err := si.updatePrimaryTabletControl(tc, remove, tables); err != nil {
   431  			return err
   432  		}
   433  		return nil
   434  	}
   435  
   436  	// we have an existing record, check table lists matches and
   437  	if remove {
   438  		si.removeCellsFromTabletControl(tc, tabletType, cells)
   439  	} else {
   440  		if !reflect.DeepEqual(tc.DeniedTables, tables) {
   441  			return vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "trying to use two different sets of denied tables for shard %v/%v: %v and %v", si.keyspace, si.shardName, tc.DeniedTables, tables)
   442  		}
   443  
   444  		tc.Cells = addCells(tc.Cells, cells)
   445  	}
   446  	return nil
   447  }
   448  
   449  func (si *ShardInfo) updatePrimaryTabletControl(tc *topodatapb.Shard_TabletControl, remove bool, tables []string) error {
   450  	var newTables []string
   451  	for _, table := range tables {
   452  		exists := false
   453  		for _, blt := range tc.DeniedTables {
   454  			if blt == table {
   455  				exists = true
   456  				break
   457  			}
   458  		}
   459  		if !exists {
   460  			newTables = append(newTables, table)
   461  		}
   462  	}
   463  	if remove {
   464  		if len(newTables) != 0 {
   465  			return vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, dlTablesNotPresent)
   466  		}
   467  		var newDenyList []string
   468  		if len(tables) != 0 { // legacy uses
   469  			for _, blt := range tc.DeniedTables {
   470  				mustDelete := false
   471  				for _, table := range tables {
   472  					if blt == table {
   473  						mustDelete = true
   474  						break
   475  					}
   476  				}
   477  				if !mustDelete {
   478  					newDenyList = append(newDenyList, blt)
   479  				}
   480  			}
   481  		}
   482  		tc.DeniedTables = newDenyList
   483  		if len(tc.DeniedTables) == 0 {
   484  			si.removeTabletTypeFromTabletControl(topodatapb.TabletType_PRIMARY)
   485  		}
   486  		return nil
   487  	}
   488  	if len(newTables) != len(tables) {
   489  		return vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, dlTablesAlreadyPresent)
   490  	}
   491  	tc.DeniedTables = append(tc.DeniedTables, tables...)
   492  	return nil
   493  }
   494  
   495  func (si *ShardInfo) removeTabletTypeFromTabletControl(tabletType topodatapb.TabletType) {
   496  	var tabletControls []*topodatapb.Shard_TabletControl
   497  	for _, tc := range si.TabletControls {
   498  		if tc.TabletType != tabletType {
   499  			tabletControls = append(tabletControls, tc)
   500  		}
   501  	}
   502  	si.TabletControls = tabletControls
   503  }
   504  
   505  func (si *ShardInfo) removeCellsFromTabletControl(tc *topodatapb.Shard_TabletControl, tabletType topodatapb.TabletType, cells []string) {
   506  	result := removeCellsFromList(cells, tc.Cells)
   507  	if len(result) == 0 {
   508  		// we don't have any cell left, we need to clear this record
   509  		si.removeTabletTypeFromTabletControl(tabletType)
   510  	} else {
   511  		tc.Cells = result
   512  	}
   513  }
   514  
   515  //
   516  // Utility functions for shards
   517  //
   518  
   519  // InCellList returns true if the cell list is empty,
   520  // or if the passed cell is in the cell list.
   521  func InCellList(cell string, cells []string) bool {
   522  	if len(cells) == 0 {
   523  		return true
   524  	}
   525  	for _, c := range cells {
   526  		if c == cell {
   527  			return true
   528  		}
   529  	}
   530  	return false
   531  }
   532  
   533  // FindAllTabletAliasesInShard uses the replication graph to find all the
   534  // tablet aliases in the given shard.
   535  //
   536  // It can return ErrPartialResult if some cells were not fetched,
   537  // in which case the result only contains the cells that were fetched.
   538  //
   539  // The tablet aliases are sorted by cell, then by UID.
   540  func (ts *Server) FindAllTabletAliasesInShard(ctx context.Context, keyspace, shard string) ([]*topodatapb.TabletAlias, error) {
   541  	return ts.FindAllTabletAliasesInShardByCell(ctx, keyspace, shard, nil)
   542  }
   543  
   544  // FindAllTabletAliasesInShardByCell uses the replication graph to find all the
   545  // tablet aliases in the given shard.
   546  //
   547  // It can return ErrPartialResult if some cells were not fetched,
   548  // in which case the result only contains the cells that were fetched.
   549  //
   550  // The tablet aliases are sorted by cell, then by UID.
   551  func (ts *Server) FindAllTabletAliasesInShardByCell(ctx context.Context, keyspace, shard string, cells []string) ([]*topodatapb.TabletAlias, error) {
   552  	span, ctx := trace.NewSpan(ctx, "topo.FindAllTabletAliasesInShardbyCell")
   553  	span.Annotate("keyspace", keyspace)
   554  	span.Annotate("shard", shard)
   555  	span.Annotate("num_cells", len(cells))
   556  	defer span.Finish()
   557  	ctx = trace.NewContext(ctx, span)
   558  	var err error
   559  
   560  	// The caller intents to all cells
   561  	if len(cells) == 0 {
   562  		cells, err = ts.GetCellInfoNames(ctx)
   563  		if err != nil {
   564  			return nil, err
   565  		}
   566  	}
   567  
   568  	// read the shard information to find the cells
   569  	si, err := ts.GetShard(ctx, keyspace, shard)
   570  	if err != nil {
   571  		return nil, err
   572  	}
   573  
   574  	resultAsMap := make(map[string]*topodatapb.TabletAlias)
   575  	if si.HasPrimary() {
   576  		if InCellList(si.PrimaryAlias.Cell, cells) {
   577  			resultAsMap[topoproto.TabletAliasString(si.PrimaryAlias)] = si.PrimaryAlias
   578  		}
   579  	}
   580  
   581  	// read the replication graph in each cell and add all found tablets
   582  	wg := sync.WaitGroup{}
   583  	mutex := sync.Mutex{}
   584  	rec := concurrency.AllErrorRecorder{}
   585  	result := make([]*topodatapb.TabletAlias, 0, len(resultAsMap))
   586  	for _, cell := range cells {
   587  		wg.Add(1)
   588  		go func(cell string) {
   589  			defer wg.Done()
   590  			sri, err := ts.GetShardReplication(ctx, cell, keyspace, shard)
   591  			switch {
   592  			case err == nil:
   593  				mutex.Lock()
   594  				for _, node := range sri.Nodes {
   595  					resultAsMap[topoproto.TabletAliasString(node.TabletAlias)] = node.TabletAlias
   596  				}
   597  				mutex.Unlock()
   598  			case IsErrType(err, NoNode):
   599  				// There is no shard replication for this shard in this cell. NOOP
   600  			default:
   601  				rec.RecordError(vterrors.Wrap(err, fmt.Sprintf("GetShardReplication(%v, %v, %v) failed.", cell, keyspace, shard)))
   602  				return
   603  			}
   604  		}(cell)
   605  	}
   606  	wg.Wait()
   607  	err = nil
   608  	if rec.HasErrors() {
   609  		log.Warningf("FindAllTabletAliasesInShard(%v,%v): got partial result: %v", keyspace, shard, rec.Error())
   610  		err = NewError(PartialResult, shard)
   611  	}
   612  
   613  	for _, a := range resultAsMap {
   614  		result = append(result, proto.Clone(a).(*topodatapb.TabletAlias))
   615  	}
   616  	sort.Sort(topoproto.TabletAliasList(result))
   617  	return result, err
   618  }
   619  
   620  // GetTabletMapForShard returns the tablets for a shard. It can return
   621  // ErrPartialResult if it couldn't read all the cells, or all
   622  // the individual tablets, in which case the map is valid, but partial.
   623  // The map is indexed by topoproto.TabletAliasString(tablet alias).
   624  func (ts *Server) GetTabletMapForShard(ctx context.Context, keyspace, shard string) (map[string]*TabletInfo, error) {
   625  	return ts.GetTabletMapForShardByCell(ctx, keyspace, shard, nil)
   626  }
   627  
   628  // GetTabletMapForShardByCell returns the tablets for a shard. It can return
   629  // ErrPartialResult if it couldn't read all the cells, or all
   630  // the individual tablets, in which case the map is valid, but partial.
   631  // The map is indexed by topoproto.TabletAliasString(tablet alias).
   632  func (ts *Server) GetTabletMapForShardByCell(ctx context.Context, keyspace, shard string, cells []string) (map[string]*TabletInfo, error) {
   633  	// if we get a partial result, we keep going. It most likely means
   634  	// a cell is out of commission.
   635  	aliases, err := ts.FindAllTabletAliasesInShardByCell(ctx, keyspace, shard, cells)
   636  	if err != nil && !IsErrType(err, PartialResult) {
   637  		return nil, err
   638  	}
   639  
   640  	// get the tablets for the cells we were able to reach, forward
   641  	// ErrPartialResult from FindAllTabletAliasesInShard
   642  	result, gerr := ts.GetTabletMap(ctx, aliases)
   643  	if gerr == nil && err != nil {
   644  		gerr = err
   645  	}
   646  	return result, gerr
   647  }
   648  
   649  func shardFilePath(keyspace, shard string) string {
   650  	return path.Join(KeyspacesPath, keyspace, ShardsPath, shard, ShardFile)
   651  }
   652  
   653  // WatchShardData wraps the data we receive on the watch channel
   654  // The WatchShard API guarantees exactly one of Value or Err will be set.
   655  type WatchShardData struct {
   656  	Value *topodatapb.Shard
   657  	Err   error
   658  }
   659  
   660  // WatchShard will set a watch on the Shard object.
   661  // It has the same contract as conn.Watch, but it also unpacks the
   662  // contents into a Shard object
   663  func (ts *Server) WatchShard(ctx context.Context, keyspace, shard string) (*WatchShardData, <-chan *WatchShardData, error) {
   664  	shardPath := shardFilePath(keyspace, shard)
   665  	ctx, cancel := context.WithCancel(ctx)
   666  
   667  	current, wdChannel, err := ts.globalCell.Watch(ctx, shardPath)
   668  	if err != nil {
   669  		cancel()
   670  		return nil, nil, err
   671  	}
   672  	value := &topodatapb.Shard{}
   673  	if err := proto.Unmarshal(current.Contents, value); err != nil {
   674  		// Cancel the watch, drain channel.
   675  		cancel()
   676  		for range wdChannel {
   677  		}
   678  		return nil, nil, vterrors.Wrapf(err, "error unpacking initial Shard object")
   679  	}
   680  
   681  	changes := make(chan *WatchShardData, 10)
   682  	// The background routine reads any event from the watch channel,
   683  	// translates it, and sends it to the caller.
   684  	// If cancel() is called, the underlying Watch() code will
   685  	// send an ErrInterrupted and then close the channel. We'll
   686  	// just propagate that back to our caller.
   687  	go func() {
   688  		defer cancel()
   689  		defer close(changes)
   690  
   691  		for wd := range wdChannel {
   692  			if wd.Err != nil {
   693  				// Last error value, we're done.
   694  				// wdChannel will be closed right after
   695  				// this, no need to do anything.
   696  				changes <- &WatchShardData{Err: wd.Err}
   697  				return
   698  			}
   699  
   700  			value := &topodatapb.Shard{}
   701  			if err := proto.Unmarshal(wd.Contents, value); err != nil {
   702  				cancel()
   703  				for range wdChannel {
   704  				}
   705  				changes <- &WatchShardData{Err: vterrors.Wrapf(err, "error unpacking Shard object")}
   706  				return
   707  			}
   708  
   709  			changes <- &WatchShardData{Value: value}
   710  		}
   711  	}()
   712  
   713  	return &WatchShardData{Value: value}, changes, nil
   714  }