vitess.io/vitess@v0.16.2/go/vt/topotools/rebuild_keyspace.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package topotools
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  
    24  	"vitess.io/vitess/go/vt/concurrency"
    25  	"vitess.io/vitess/go/vt/logutil"
    26  	"vitess.io/vitess/go/vt/topo"
    27  	"vitess.io/vitess/go/vt/topo/topoproto"
    28  
    29  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    30  )
    31  
    32  // RebuildKeyspace rebuilds the serving graph data while locking out other changes.
    33  func RebuildKeyspace(ctx context.Context, log logutil.Logger, ts *topo.Server, keyspace string, cells []string, allowPartial bool) (err error) {
    34  	ctx, unlock, lockErr := ts.LockKeyspace(ctx, keyspace, "RebuildKeyspace")
    35  	if lockErr != nil {
    36  		return lockErr
    37  	}
    38  	defer unlock(&err)
    39  
    40  	return RebuildKeyspaceLocked(ctx, log, ts, keyspace, cells, allowPartial)
    41  }
    42  
    43  // RebuildKeyspaceLocked should only be used with an action lock on the keyspace
    44  // - otherwise the consistency of the serving graph data can't be
    45  // guaranteed.
    46  //
    47  // Take data from the global keyspace and rebuild the local serving
    48  // copies in each cell.
    49  func RebuildKeyspaceLocked(ctx context.Context, log logutil.Logger, ts *topo.Server, keyspace string, cells []string, allowPartial bool) error {
    50  	if err := topo.CheckKeyspaceLocked(ctx, keyspace); err != nil {
    51  		return err
    52  	}
    53  
    54  	ki, err := ts.GetKeyspace(ctx, keyspace)
    55  	if err != nil {
    56  		return err
    57  	}
    58  
    59  	// The caller intents to update all cells in this case
    60  	if len(cells) == 0 {
    61  		cells, err = ts.GetCellInfoNames(ctx)
    62  		if err != nil {
    63  			return err
    64  		}
    65  	}
    66  
    67  	shards, err := ts.FindAllShardsInKeyspace(ctx, keyspace)
    68  	if err != nil {
    69  		return err
    70  	}
    71  
    72  	// This is safe to rebuild as long there are not srvKeyspaces with tablet controls set.
    73  	// Build the list of cells to work on: we get the union
    74  	// of all the Cells of all the Shards, limited to the provided cells.
    75  	//
    76  	// srvKeyspaceMap is a map:
    77  	//   key: cell
    78  	//   value: topo.SrvKeyspace object being built
    79  	srvKeyspaceMap := make(map[string]*topodatapb.SrvKeyspace)
    80  	for _, cell := range cells {
    81  		srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace)
    82  		switch {
    83  		case err == nil:
    84  			for _, partition := range srvKeyspace.GetPartitions() {
    85  				for _, shardTabletControl := range partition.GetShardTabletControls() {
    86  					if shardTabletControl.QueryServiceDisabled {
    87  						return fmt.Errorf("can't rebuild serving keyspace while a migration is on going. TabletControls is set for partition %v", partition)
    88  					}
    89  				}
    90  			}
    91  		case topo.IsErrType(err, topo.NoNode):
    92  			// NOOP
    93  		default:
    94  			return err
    95  		}
    96  		srvKeyspaceMap[cell] = &topodatapb.SrvKeyspace{
    97  			ServedFrom: ki.ComputeCellServedFrom(cell),
    98  		}
    99  		srvKeyspaceMap[cell].ThrottlerConfig = ki.ThrottlerConfig
   100  	}
   101  
   102  	servedTypes := []topodatapb.TabletType{topodatapb.TabletType_PRIMARY, topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY}
   103  
   104  	// for each entry in the srvKeyspaceMap map, we do the following:
   105  	// - get the Shard structures for each shard / cell
   106  	// - if not present, build an empty one from global Shard
   107  	// - sort the shards in the list by range
   108  	// - check the ranges are compatible (no hole, covers everything)
   109  	for cell, srvKeyspace := range srvKeyspaceMap {
   110  		for _, si := range shards {
   111  			// We rebuild keyspace iff shard primary is in a serving state.
   112  			if !si.GetIsPrimaryServing() {
   113  				continue
   114  			}
   115  			// for each type this shard is supposed to serve,
   116  			// add it to srvKeyspace.Partitions
   117  			for _, tabletType := range servedTypes {
   118  				partition := topoproto.SrvKeyspaceGetPartition(srvKeyspace, tabletType)
   119  				if partition == nil {
   120  					partition = &topodatapb.SrvKeyspace_KeyspacePartition{
   121  						ServedType: tabletType,
   122  					}
   123  					srvKeyspace.Partitions = append(srvKeyspace.Partitions, partition)
   124  				}
   125  				partition.ShardReferences = append(partition.ShardReferences, &topodatapb.ShardReference{
   126  					Name:     si.ShardName(),
   127  					KeyRange: si.KeyRange,
   128  				})
   129  			}
   130  		}
   131  
   132  		if !(ki.KeyspaceType == topodatapb.KeyspaceType_SNAPSHOT && allowPartial) {
   133  			// skip this check for SNAPSHOT keyspaces so that incomplete keyspaces can still serve
   134  			if err := topo.OrderAndCheckPartitions(cell, srvKeyspace); err != nil {
   135  				return err
   136  			}
   137  		}
   138  
   139  	}
   140  	// And then finally save the keyspace objects, in parallel.
   141  	rec := concurrency.AllErrorRecorder{}
   142  	wg := sync.WaitGroup{}
   143  	for cell, srvKeyspace := range srvKeyspaceMap {
   144  		wg.Add(1)
   145  		go func(cell string, srvKeyspace *topodatapb.SrvKeyspace) {
   146  			defer wg.Done()
   147  			if err := ts.UpdateSrvKeyspace(ctx, cell, keyspace, srvKeyspace); err != nil {
   148  				rec.RecordError(fmt.Errorf("writing serving data failed: %v", err))
   149  			}
   150  		}(cell, srvKeyspace)
   151  	}
   152  	wg.Wait()
   153  	return rec.Error()
   154  }