vitess.io/vitess@v0.16.2/go/vt/topo/srv_keyspace.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package topo
    18  
    19  import (
    20  	"context"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"path"
    24  	"sync"
    25  
    26  	"google.golang.org/protobuf/proto"
    27  
    28  	"vitess.io/vitess/go/vt/vterrors"
    29  
    30  	"vitess.io/vitess/go/vt/concurrency"
    31  	"vitess.io/vitess/go/vt/key"
    32  	"vitess.io/vitess/go/vt/topo/topoproto"
    33  
    34  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    35  )
    36  
    37  // This file contains the utility methods to manage SrvKeyspace objects.
    38  
    39  func srvKeyspaceFileName(keyspace string) string {
    40  	return path.Join(KeyspacesPath, keyspace, SrvKeyspaceFile)
    41  }
    42  
    43  // WatchSrvKeyspaceData is returned / streamed by WatchSrvKeyspace.
    44  // The WatchSrvKeyspace API guarantees exactly one of Value or Err will be set.
    45  type WatchSrvKeyspaceData struct {
    46  	Value *topodatapb.SrvKeyspace
    47  	Err   error
    48  }
    49  
    50  // WatchSrvKeyspace will set a watch on the SrvKeyspace object.
    51  // It has the same contract as Conn.Watch, but it also unpacks the
    52  // contents into a SrvKeyspace object.
    53  func (ts *Server) WatchSrvKeyspace(ctx context.Context, cell, keyspace string) (*WatchSrvKeyspaceData, <-chan *WatchSrvKeyspaceData, error) {
    54  	conn, err := ts.ConnForCell(ctx, cell)
    55  	if err != nil {
    56  		return &WatchSrvKeyspaceData{Err: err}, nil, nil
    57  	}
    58  
    59  	filePath := srvKeyspaceFileName(keyspace)
    60  	ctx, cancel := context.WithCancel(ctx)
    61  	current, wdChannel, err := conn.Watch(ctx, filePath)
    62  	if err != nil {
    63  		cancel()
    64  		return nil, nil, err
    65  	}
    66  	value := &topodatapb.SrvKeyspace{}
    67  	if err := proto.Unmarshal(current.Contents, value); err != nil {
    68  		// Cancel the watch, drain channel.
    69  		cancel()
    70  		for range wdChannel {
    71  		}
    72  		return nil, nil, vterrors.Wrapf(err, "error unpacking initial SrvKeyspace object")
    73  	}
    74  
    75  	changes := make(chan *WatchSrvKeyspaceData, 10)
    76  
    77  	// The background routine reads any event from the watch channel,
    78  	// translates it, and sends it to the caller.
    79  	// If cancel() is called, the underlying Watch() code will
    80  	// send an ErrInterrupted and then close the channel. We'll
    81  	// just propagate that back to our caller.
    82  	go func() {
    83  		defer cancel()
    84  		defer close(changes)
    85  
    86  		for wd := range wdChannel {
    87  			if wd.Err != nil {
    88  				// Last error value, we're done.
    89  				// wdChannel will be closed right after
    90  				// this, no need to do anything.
    91  				changes <- &WatchSrvKeyspaceData{Err: wd.Err}
    92  				return
    93  			}
    94  
    95  			value := &topodatapb.SrvKeyspace{}
    96  			if err := proto.Unmarshal(wd.Contents, value); err != nil {
    97  				cancel()
    98  				for range wdChannel {
    99  				}
   100  				changes <- &WatchSrvKeyspaceData{Err: vterrors.Wrapf(err, "error unpacking SrvKeyspace object")}
   101  				return
   102  			}
   103  
   104  			changes <- &WatchSrvKeyspaceData{Value: value}
   105  		}
   106  	}()
   107  
   108  	return &WatchSrvKeyspaceData{Value: value}, changes, nil
   109  }
   110  
   111  // GetSrvKeyspaceNames returns the SrvKeyspace objects for a cell.
   112  func (ts *Server) GetSrvKeyspaceNames(ctx context.Context, cell string) ([]string, error) {
   113  	conn, err := ts.ConnForCell(ctx, cell)
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  
   118  	children, err := conn.ListDir(ctx, KeyspacesPath, false /*full*/)
   119  	switch {
   120  	case err == nil:
   121  		return DirEntriesToStringArray(children), nil
   122  	case IsErrType(err, NoNode):
   123  		return nil, nil
   124  	default:
   125  		return nil, err
   126  	}
   127  }
   128  
   129  // GetShardServingCells returns cells where this shard is serving
   130  func (ts *Server) GetShardServingCells(ctx context.Context, si *ShardInfo) (servingCells []string, err error) {
   131  	cells, err := ts.GetCellInfoNames(ctx)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  
   136  	wg := sync.WaitGroup{}
   137  	rec := concurrency.AllErrorRecorder{}
   138  	servingCells = make([]string, 0)
   139  	var mu sync.Mutex
   140  	for _, cell := range cells {
   141  		wg.Add(1)
   142  		go func(cell, keyspace string) {
   143  			defer wg.Done()
   144  			srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, si.keyspace)
   145  			switch {
   146  			case err == nil:
   147  				for _, partition := range srvKeyspace.GetPartitions() {
   148  					for _, shardReference := range partition.ShardReferences {
   149  						if shardReference.GetName() == si.ShardName() {
   150  							func() {
   151  								mu.Lock()
   152  								defer mu.Unlock()
   153  								// Check that this cell hasn't been added already
   154  								for _, servingCell := range servingCells {
   155  									if servingCell == cell {
   156  										return
   157  									}
   158  								}
   159  								servingCells = append(servingCells, cell)
   160  							}()
   161  						}
   162  					}
   163  				}
   164  			case IsErrType(err, NoNode):
   165  				// NOOP
   166  				return
   167  			default:
   168  				rec.RecordError(err)
   169  				return
   170  			}
   171  		}(cell, si.Keyspace())
   172  	}
   173  	wg.Wait()
   174  	if rec.HasErrors() {
   175  		return nil, NewError(PartialResult, rec.Error().Error())
   176  	}
   177  	return servingCells, nil
   178  }
   179  
   180  // GetShardServingTypes returns served types for given shard across all cells
   181  func (ts *Server) GetShardServingTypes(ctx context.Context, si *ShardInfo) (servingTypes []topodatapb.TabletType, err error) {
   182  	cells, err := ts.GetCellInfoNames(ctx)
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  
   187  	wg := sync.WaitGroup{}
   188  	rec := concurrency.AllErrorRecorder{}
   189  	servingTypes = make([]topodatapb.TabletType, 0)
   190  	var mu sync.Mutex
   191  	for _, cell := range cells {
   192  		wg.Add(1)
   193  		go func(cell, keyspace string) {
   194  			defer wg.Done()
   195  			srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, si.keyspace)
   196  			switch {
   197  			case err == nil:
   198  				func() {
   199  					mu.Lock()
   200  					defer mu.Unlock()
   201  					for _, partition := range srvKeyspace.GetPartitions() {
   202  						partitionAlreadyAdded := false
   203  						for _, servingType := range servingTypes {
   204  							if servingType == partition.ServedType {
   205  								partitionAlreadyAdded = true
   206  								break
   207  							}
   208  						}
   209  
   210  						if !partitionAlreadyAdded {
   211  							for _, shardReference := range partition.ShardReferences {
   212  								if shardReference.GetName() == si.ShardName() {
   213  									servingTypes = append(servingTypes, partition.ServedType)
   214  									break
   215  								}
   216  							}
   217  						}
   218  
   219  					}
   220  				}()
   221  			case IsErrType(err, NoNode):
   222  				// NOOP
   223  				return
   224  			default:
   225  				rec.RecordError(err)
   226  				return
   227  			}
   228  		}(cell, si.Keyspace())
   229  	}
   230  	wg.Wait()
   231  	if rec.HasErrors() {
   232  		return nil, NewError(PartialResult, rec.Error().Error())
   233  	}
   234  	return servingTypes, nil
   235  }
   236  
   237  // AddSrvKeyspacePartitions adds partitions to srvKeyspace
   238  func (ts *Server) AddSrvKeyspacePartitions(ctx context.Context, keyspace string, shards []*ShardInfo, tabletType topodatapb.TabletType, cells []string) (err error) {
   239  	if err = CheckKeyspaceLocked(ctx, keyspace); err != nil {
   240  		return err
   241  	}
   242  
   243  	// The caller intents to update all cells in this case
   244  	if len(cells) == 0 {
   245  		cells, err = ts.GetCellInfoNames(ctx)
   246  		if err != nil {
   247  			return err
   248  		}
   249  	}
   250  
   251  	wg := sync.WaitGroup{}
   252  	rec := concurrency.AllErrorRecorder{}
   253  	for _, cell := range cells {
   254  		wg.Add(1)
   255  		go func(cell string) {
   256  			defer wg.Done()
   257  			srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace)
   258  			switch {
   259  			case err == nil:
   260  				partitionFound := false
   261  
   262  				for _, partition := range srvKeyspace.GetPartitions() {
   263  					if partition.GetServedType() != tabletType {
   264  						continue
   265  					}
   266  					partitionFound = true
   267  
   268  					for _, si := range shards {
   269  						found := false
   270  						for _, shardReference := range partition.GetShardReferences() {
   271  							if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) {
   272  								found = true
   273  							}
   274  						}
   275  
   276  						if !found {
   277  							shardReference := &topodatapb.ShardReference{
   278  								Name:     si.ShardName(),
   279  								KeyRange: si.KeyRange,
   280  							}
   281  							partition.ShardReferences = append(partition.GetShardReferences(), shardReference)
   282  						}
   283  					}
   284  				}
   285  
   286  				// Partition does not exist at all, we need to create it
   287  				if !partitionFound {
   288  
   289  					partition := &topodatapb.SrvKeyspace_KeyspacePartition{
   290  						ServedType: tabletType,
   291  					}
   292  
   293  					shardReferences := make([]*topodatapb.ShardReference, 0)
   294  					for _, si := range shards {
   295  						shardReference := &topodatapb.ShardReference{
   296  							Name:     si.ShardName(),
   297  							KeyRange: si.KeyRange,
   298  						}
   299  						shardReferences = append(shardReferences, shardReference)
   300  					}
   301  
   302  					partition.ShardReferences = shardReferences
   303  
   304  					srvKeyspace.Partitions = append(srvKeyspace.GetPartitions(), partition)
   305  				}
   306  
   307  				err = ts.UpdateSrvKeyspace(ctx, cell, keyspace, srvKeyspace)
   308  				if err != nil {
   309  					rec.RecordError(err)
   310  					return
   311  				}
   312  			case IsErrType(err, NoNode):
   313  				// NOOP
   314  			default:
   315  				rec.RecordError(err)
   316  				return
   317  			}
   318  		}(cell)
   319  	}
   320  	wg.Wait()
   321  	if rec.HasErrors() {
   322  		return NewError(PartialResult, rec.Error().Error())
   323  	}
   324  	return nil
   325  }
   326  
   327  // DeleteSrvKeyspacePartitions deletes shards from srvKeyspace partitions
   328  func (ts *Server) DeleteSrvKeyspacePartitions(ctx context.Context, keyspace string, shards []*ShardInfo, tabletType topodatapb.TabletType, cells []string) (err error) {
   329  	if err = CheckKeyspaceLocked(ctx, keyspace); err != nil {
   330  		return err
   331  	}
   332  
   333  	// The caller intents to update all cells in this case
   334  	if len(cells) == 0 {
   335  		cells, err = ts.GetCellInfoNames(ctx)
   336  		if err != nil {
   337  			return err
   338  		}
   339  	}
   340  
   341  	wg := sync.WaitGroup{}
   342  	rec := concurrency.AllErrorRecorder{}
   343  	for _, cell := range cells {
   344  		wg.Add(1)
   345  		go func(cell string) {
   346  			defer wg.Done()
   347  			srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace)
   348  			switch {
   349  			case err == nil:
   350  				for _, partition := range srvKeyspace.GetPartitions() {
   351  					if partition.GetServedType() != tabletType {
   352  						continue
   353  					}
   354  
   355  					for _, si := range shards {
   356  						found := false
   357  						for _, shardReference := range partition.GetShardReferences() {
   358  							// Use shard name rather than key range so it works
   359  							// for both range-based and non-range-based shards.
   360  							if shardReference.GetName() == si.ShardName() {
   361  								found = true
   362  							}
   363  						}
   364  
   365  						if found {
   366  							shardReferences := make([]*topodatapb.ShardReference, 0)
   367  							for _, shardReference := range partition.GetShardReferences() {
   368  								// Use shard name rather than key range so it works
   369  								// for both range-based and non-range-based shards.
   370  								if shardReference.GetName() != si.ShardName() {
   371  									shardReferences = append(shardReferences, shardReference)
   372  								}
   373  							}
   374  							partition.ShardReferences = shardReferences
   375  						}
   376  					}
   377  				}
   378  
   379  				err = ts.UpdateSrvKeyspace(ctx, cell, keyspace, srvKeyspace)
   380  				if err != nil {
   381  					rec.RecordError(err)
   382  					return
   383  				}
   384  			case IsErrType(err, NoNode):
   385  				// NOOP
   386  			default:
   387  				rec.RecordError(err)
   388  				return
   389  			}
   390  		}(cell)
   391  	}
   392  	wg.Wait()
   393  	if rec.HasErrors() {
   394  		return NewError(PartialResult, rec.Error().Error())
   395  	}
   396  	return nil
   397  }
   398  
   399  // UpdateSrvKeyspaceThrottlerConfig updates existing throttler configuration
   400  func (ts *Server) UpdateSrvKeyspaceThrottlerConfig(ctx context.Context, keyspace string, cells []string, update func(throttlerConfig *topodatapb.ThrottlerConfig) *topodatapb.ThrottlerConfig) (updatedCells []string, err error) {
   401  	if err = CheckKeyspaceLocked(ctx, keyspace); err != nil {
   402  		return updatedCells, err
   403  	}
   404  
   405  	// The caller intends to update all cells in this case
   406  	if len(cells) == 0 {
   407  		cells, err = ts.GetCellInfoNames(ctx)
   408  		if err != nil {
   409  			return updatedCells, err
   410  		}
   411  	}
   412  
   413  	wg := sync.WaitGroup{}
   414  	rec := concurrency.AllErrorRecorder{}
   415  	for _, cell := range cells {
   416  		wg.Add(1)
   417  		go func(cell string) {
   418  			defer wg.Done()
   419  			srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace)
   420  			switch {
   421  			case err == nil:
   422  				srvKeyspace.ThrottlerConfig = update(srvKeyspace.ThrottlerConfig)
   423  				if err := ts.UpdateSrvKeyspace(ctx, cell, keyspace, srvKeyspace); err != nil {
   424  					rec.RecordError(err)
   425  					return
   426  				}
   427  				updatedCells = append(updatedCells, cell)
   428  				return
   429  			case IsErrType(err, NoNode):
   430  				// NOOP as not every cell will contain a serving tablet in the keyspace
   431  			default:
   432  				rec.RecordError(err)
   433  				return
   434  			}
   435  		}(cell)
   436  	}
   437  	wg.Wait()
   438  	if rec.HasErrors() {
   439  		return updatedCells, NewError(PartialResult, rec.Error().Error())
   440  	}
   441  	return updatedCells, nil
   442  }
   443  
   444  // UpdateDisableQueryService will make sure the disableQueryService is
   445  // set appropriately in tablet controls in srvKeyspace.
   446  func (ts *Server) UpdateDisableQueryService(ctx context.Context, keyspace string, shards []*ShardInfo, tabletType topodatapb.TabletType, cells []string, disableQueryService bool) (err error) {
   447  	if err = CheckKeyspaceLocked(ctx, keyspace); err != nil {
   448  		return err
   449  	}
   450  
   451  	// The caller intends to update all cells in this case
   452  	if len(cells) == 0 {
   453  		cells, err = ts.GetCellInfoNames(ctx)
   454  		if err != nil {
   455  			return err
   456  		}
   457  	}
   458  
   459  	for _, shard := range shards {
   460  		for _, tc := range shard.TabletControls {
   461  			if len(tc.DeniedTables) > 0 {
   462  				return fmt.Errorf("cannot safely alter DisableQueryService as DeniedTables is set for shard %v", shard)
   463  			}
   464  		}
   465  	}
   466  
   467  	if !disableQueryService {
   468  		for _, si := range shards {
   469  			tc := si.GetTabletControl(tabletType)
   470  			if tc == nil {
   471  				continue
   472  			}
   473  			if tc.Frozen {
   474  				return fmt.Errorf("migrate has gone past the point of no return, cannot re-enable serving for %v/%v", si.keyspace, si.shardName)
   475  			}
   476  		}
   477  	}
   478  
   479  	wg := sync.WaitGroup{}
   480  	rec := concurrency.AllErrorRecorder{}
   481  	for _, cell := range cells {
   482  		wg.Add(1)
   483  		go func(cell string) {
   484  			defer wg.Done()
   485  			srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace)
   486  			switch {
   487  			case err == nil:
   488  				for _, partition := range srvKeyspace.GetPartitions() {
   489  					if partition.GetServedType() != tabletType {
   490  						continue
   491  					}
   492  
   493  					for _, si := range shards {
   494  						found := false
   495  						for _, tabletControl := range partition.GetShardTabletControls() {
   496  							if key.KeyRangeEqual(tabletControl.GetKeyRange(), si.GetKeyRange()) {
   497  								found = true
   498  								tabletControl.QueryServiceDisabled = disableQueryService
   499  							}
   500  						}
   501  
   502  						if !found {
   503  							shardTabletControl := &topodatapb.ShardTabletControl{
   504  								Name:                 si.ShardName(),
   505  								KeyRange:             si.KeyRange,
   506  								QueryServiceDisabled: disableQueryService,
   507  							}
   508  							partition.ShardTabletControls = append(partition.GetShardTabletControls(), shardTabletControl)
   509  						}
   510  					}
   511  				}
   512  
   513  				err = ts.UpdateSrvKeyspace(ctx, cell, keyspace, srvKeyspace)
   514  				if err != nil {
   515  					rec.RecordError(err)
   516  					return
   517  				}
   518  			case IsErrType(err, NoNode):
   519  				// NOOP
   520  			default:
   521  				rec.RecordError(err)
   522  				return
   523  			}
   524  		}(cell)
   525  	}
   526  	wg.Wait()
   527  	if rec.HasErrors() {
   528  		return NewError(PartialResult, rec.Error().Error())
   529  	}
   530  	return nil
   531  }
   532  
   533  // MigrateServedType removes/adds shards from srvKeyspace when migrating a served type.
   534  func (ts *Server) MigrateServedType(ctx context.Context, keyspace string, shardsToAdd, shardsToRemove []*ShardInfo, tabletType topodatapb.TabletType, cells []string) (err error) {
   535  	if err = CheckKeyspaceLocked(ctx, keyspace); err != nil {
   536  		return err
   537  	}
   538  
   539  	// The caller intents to update all cells in this case
   540  	if len(cells) == 0 {
   541  		cells, err = ts.GetCellInfoNames(ctx)
   542  		if err != nil {
   543  			return err
   544  		}
   545  	}
   546  
   547  	wg := sync.WaitGroup{}
   548  	rec := concurrency.AllErrorRecorder{}
   549  	for _, cell := range cells {
   550  		wg.Add(1)
   551  		go func(cell, keyspace string) {
   552  			defer wg.Done()
   553  			srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace)
   554  			switch {
   555  			case err == nil:
   556  				for _, partition := range srvKeyspace.GetPartitions() {
   557  
   558  					// We are finishing the migration, cleaning up tablet controls from the srvKeyspace
   559  					if tabletType == topodatapb.TabletType_PRIMARY {
   560  						partition.ShardTabletControls = nil
   561  					}
   562  
   563  					if partition.GetServedType() != tabletType {
   564  						continue
   565  					}
   566  
   567  					shardReferences := make([]*topodatapb.ShardReference, 0)
   568  
   569  					for _, shardReference := range partition.GetShardReferences() {
   570  						inShardsToRemove := false
   571  						for _, si := range shardsToRemove {
   572  							if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) {
   573  								inShardsToRemove = true
   574  								break
   575  							}
   576  						}
   577  
   578  						if !inShardsToRemove {
   579  							shardReferences = append(shardReferences, shardReference)
   580  						}
   581  					}
   582  
   583  					for _, si := range shardsToAdd {
   584  						alreadyAdded := false
   585  						for _, shardReference := range partition.GetShardReferences() {
   586  							if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) {
   587  								alreadyAdded = true
   588  								break
   589  							}
   590  						}
   591  
   592  						if !alreadyAdded {
   593  							shardReference := &topodatapb.ShardReference{
   594  								Name:     si.ShardName(),
   595  								KeyRange: si.KeyRange,
   596  							}
   597  							shardReferences = append(shardReferences, shardReference)
   598  						}
   599  					}
   600  
   601  					partition.ShardReferences = shardReferences
   602  				}
   603  
   604  				if err := OrderAndCheckPartitions(cell, srvKeyspace); err != nil {
   605  					rec.RecordError(err)
   606  					return
   607  				}
   608  
   609  				err = ts.UpdateSrvKeyspace(ctx, cell, keyspace, srvKeyspace)
   610  				if err != nil {
   611  					rec.RecordError(err)
   612  					return
   613  				}
   614  
   615  			case IsErrType(err, NoNode):
   616  				// Assuming this cell is not active, nothing to do.
   617  			default:
   618  				if err != nil {
   619  					rec.RecordError(err)
   620  					return
   621  				}
   622  			}
   623  		}(cell, keyspace)
   624  	}
   625  	wg.Wait()
   626  	if rec.HasErrors() {
   627  		return NewError(PartialResult, rec.Error().Error())
   628  	}
   629  	return nil
   630  }
   631  
   632  // UpdateSrvKeyspace saves a new SrvKeyspace. It is a blind write.
   633  func (ts *Server) UpdateSrvKeyspace(ctx context.Context, cell, keyspace string, srvKeyspace *topodatapb.SrvKeyspace) error {
   634  	conn, err := ts.ConnForCell(ctx, cell)
   635  	if err != nil {
   636  		return err
   637  	}
   638  
   639  	nodePath := srvKeyspaceFileName(keyspace)
   640  	data, err := proto.Marshal(srvKeyspace)
   641  	if err != nil {
   642  		return err
   643  	}
   644  	_, err = conn.Update(ctx, nodePath, data, nil)
   645  	return err
   646  }
   647  
   648  // DeleteSrvKeyspace deletes a SrvKeyspace.
   649  func (ts *Server) DeleteSrvKeyspace(ctx context.Context, cell, keyspace string) error {
   650  	conn, err := ts.ConnForCell(ctx, cell)
   651  	if err != nil {
   652  		return err
   653  	}
   654  
   655  	nodePath := srvKeyspaceFileName(keyspace)
   656  	return conn.Delete(ctx, nodePath, nil)
   657  }
   658  
   659  // GetSrvKeyspaceAllCells returns the SrvKeyspace for all cells
   660  func (ts *Server) GetSrvKeyspaceAllCells(ctx context.Context, keyspace string) ([]*topodatapb.SrvKeyspace, error) {
   661  	cells, err := ts.GetCellInfoNames(ctx)
   662  	if err != nil {
   663  		return nil, err
   664  	}
   665  
   666  	srvKeyspaces := make([]*topodatapb.SrvKeyspace, len(cells))
   667  	for _, cell := range cells {
   668  		srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace)
   669  		switch {
   670  		case err == nil:
   671  			srvKeyspaces = append(srvKeyspaces, srvKeyspace)
   672  		case IsErrType(err, NoNode):
   673  			// NOOP
   674  		default:
   675  			return srvKeyspaces, err
   676  		}
   677  	}
   678  	return srvKeyspaces, nil
   679  }
   680  
   681  // GetSrvKeyspace returns the SrvKeyspace for a cell/keyspace.
   682  func (ts *Server) GetSrvKeyspace(ctx context.Context, cell, keyspace string) (*topodatapb.SrvKeyspace, error) {
   683  	conn, err := ts.ConnForCell(ctx, cell)
   684  	if err != nil {
   685  		return nil, err
   686  	}
   687  
   688  	nodePath := srvKeyspaceFileName(keyspace)
   689  	data, _, err := conn.Get(ctx, nodePath)
   690  	if err != nil {
   691  		return nil, err
   692  	}
   693  	srvKeyspace := &topodatapb.SrvKeyspace{}
   694  	if err := proto.Unmarshal(data, srvKeyspace); err != nil {
   695  		return nil, vterrors.Wrapf(err, "SrvKeyspace unmarshal failed: %v", data)
   696  	}
   697  	return srvKeyspace, nil
   698  }
   699  
   700  // OrderAndCheckPartitions will re-order the partition list, and check
   701  // it's correct.
   702  func OrderAndCheckPartitions(cell string, srvKeyspace *topodatapb.SrvKeyspace) error {
   703  	// now check them all
   704  	for _, partition := range srvKeyspace.Partitions {
   705  		tabletType := partition.ServedType
   706  		topoproto.ShardReferenceArray(partition.ShardReferences).Sort()
   707  
   708  		// check the first Start is MinKey, the last End is MaxKey,
   709  		// and the values in between match: End[i] == Start[i+1]
   710  		first := partition.ShardReferences[0]
   711  		if first.KeyRange != nil && len(first.KeyRange.Start) != 0 {
   712  			return fmt.Errorf("keyspace partition for %v in cell %v does not start with min key", tabletType, cell)
   713  		}
   714  		last := partition.ShardReferences[len(partition.ShardReferences)-1]
   715  		if last.KeyRange != nil && len(last.KeyRange.End) != 0 {
   716  			return fmt.Errorf("keyspace partition for %v in cell %v does not end with max key", tabletType, cell)
   717  		}
   718  		for i := range partition.ShardReferences[0 : len(partition.ShardReferences)-1] {
   719  			currShard := partition.ShardReferences[i]
   720  			nextShard := partition.ShardReferences[i+1]
   721  			currHasKeyRange := currShard.KeyRange != nil
   722  			nextHasKeyRange := nextShard.KeyRange != nil
   723  			if currHasKeyRange != nextHasKeyRange {
   724  				return fmt.Errorf("shards with inconsistent KeyRanges for %v in cell %v. shards: %v, %v", tabletType, cell, currShard, nextShard)
   725  			}
   726  			if !currHasKeyRange {
   727  				// this is the custom sharding case, all KeyRanges must be nil
   728  				continue
   729  			}
   730  			if !key.KeyRangeContiguous(currShard.KeyRange, nextShard.KeyRange) {
   731  				return fmt.Errorf("non-contiguous KeyRange values for %v in cell %v at shard %v to %v: %v != %v", tabletType, cell, i, i+1, hex.EncodeToString(currShard.KeyRange.End), hex.EncodeToString(nextShard.KeyRange.Start))
   732  			}
   733  		}
   734  	}
   735  
   736  	return nil
   737  }
   738  
   739  // ValidateSrvKeyspace validates that the SrvKeyspace for given keyspace in the provided cells is not corrupted
   740  func (ts *Server) ValidateSrvKeyspace(ctx context.Context, keyspace, cells string) error {
   741  	cellsToValidate, err := ts.ExpandCells(ctx, cells)
   742  	if err != nil {
   743  		return err
   744  	}
   745  	for _, cell := range cellsToValidate {
   746  		srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace)
   747  		if err != nil {
   748  			return err
   749  		}
   750  		err = OrderAndCheckPartitions(cell, srvKeyspace)
   751  		if err != nil {
   752  			return err
   753  		}
   754  	}
   755  	return nil
   756  }