vitess.io/vitess@v0.16.2/go/vt/wrangler/shard.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package wrangler
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	"vitess.io/vitess/go/vt/topo"
    24  
    25  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    26  	vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata"
    27  )
    28  
    29  // shard related methods for Wrangler
    30  
    31  // UpdateSrvKeyspacePartitions changes the SrvKeyspaceGraph
    32  // for a shard.  It updates serving graph
    33  //
    34  // This takes the keyspace lock as to not interfere with resharding operations.
    35  func (wr *Wrangler) UpdateSrvKeyspacePartitions(ctx context.Context, keyspace, shard string, tabletType topodatapb.TabletType, cells []string, remove bool) (err error) {
    36  	// lock the keyspace
    37  	ctx, unlock, lockErr := wr.ts.LockKeyspace(ctx, keyspace, "UpdateSrvKeyspacePartitions")
    38  	if lockErr != nil {
    39  		return lockErr
    40  	}
    41  	defer unlock(&err)
    42  
    43  	si, err := wr.ts.GetShard(ctx, keyspace, shard)
    44  	if err != nil {
    45  		return err
    46  	}
    47  
    48  	if remove {
    49  		return wr.ts.DeleteSrvKeyspacePartitions(ctx, keyspace, []*topo.ShardInfo{si}, tabletType, cells)
    50  	}
    51  	return wr.ts.AddSrvKeyspacePartitions(ctx, keyspace, []*topo.ShardInfo{si}, tabletType, cells)
    52  }
    53  
    54  // DeleteShard will do all the necessary changes in the topology server
    55  // to entirely remove a shard.
    56  func (wr *Wrangler) DeleteShard(ctx context.Context, keyspace, shard string, recursive, evenIfServing bool) error {
    57  	// Read the Shard object. If it's not there, try to clean up
    58  	// the topology anyway.
    59  	shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard)
    60  	if err != nil {
    61  		if topo.IsErrType(err, topo.NoNode) {
    62  			wr.Logger().Infof("Shard %v/%v doesn't seem to exist, cleaning up any potential leftover", keyspace, shard)
    63  			return wr.ts.DeleteShard(ctx, keyspace, shard)
    64  		}
    65  		return err
    66  	}
    67  
    68  	servingCells, err := wr.ts.GetShardServingCells(ctx, shardInfo)
    69  	if err != nil {
    70  		return err
    71  	}
    72  	// Check the Serving map for the shard, we don't want to
    73  	// remove a serving shard if not absolutely sure.
    74  	if !evenIfServing && len(servingCells) > 0 {
    75  		return fmt.Errorf("shard %v/%v is still serving, cannot delete it, use even_if_serving flag if needed", keyspace, shard)
    76  	}
    77  
    78  	cells, err := wr.ts.GetCellInfoNames(ctx)
    79  	if err != nil {
    80  		return err
    81  	}
    82  
    83  	// Go through all the cells.
    84  	for _, cell := range cells {
    85  		var aliases []*topodatapb.TabletAlias
    86  
    87  		// Get the ShardReplication object for that cell. Try
    88  		// to find all tablets that may belong to our shard.
    89  		sri, err := wr.ts.GetShardReplication(ctx, cell, keyspace, shard)
    90  		switch {
    91  		case topo.IsErrType(err, topo.NoNode):
    92  			// No ShardReplication object. It means the
    93  			// topo is inconsistent. Let's read all the
    94  			// tablets for that cell, and if we find any
    95  			// in our keyspace / shard, either abort or
    96  			// try to delete them.
    97  			aliases, err = wr.ts.GetTabletAliasesByCell(ctx, cell)
    98  			if err != nil {
    99  				return fmt.Errorf("GetTabletsByCell(%v) failed: %v", cell, err)
   100  			}
   101  		case err == nil:
   102  			// We found a ShardReplication object. We
   103  			// trust it to have all tablet records.
   104  			aliases = make([]*topodatapb.TabletAlias, len(sri.Nodes))
   105  			for i, n := range sri.Nodes {
   106  				aliases[i] = n.TabletAlias
   107  			}
   108  		default:
   109  			return fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err)
   110  		}
   111  
   112  		// Get the corresponding Tablet records. Note
   113  		// GetTabletMap ignores ErrNoNode, and it's good for
   114  		// our purpose, it means a tablet was deleted but is
   115  		// still referenced.
   116  		tabletMap, err := wr.ts.GetTabletMap(ctx, aliases)
   117  		if err != nil {
   118  			return fmt.Errorf("GetTabletMap() failed: %v", err)
   119  		}
   120  
   121  		// Remove the tablets that don't belong to our
   122  		// keyspace/shard from the map.
   123  		for a, ti := range tabletMap {
   124  			if ti.Keyspace != keyspace || ti.Shard != shard {
   125  				delete(tabletMap, a)
   126  			}
   127  		}
   128  
   129  		// Now see if we need to DeleteTablet, and if we can, do it.
   130  		if len(tabletMap) > 0 {
   131  			if !recursive {
   132  				return fmt.Errorf("shard %v/%v still has %v tablets in cell %v; use -recursive or remove them manually", keyspace, shard, len(tabletMap), cell)
   133  			}
   134  
   135  			wr.Logger().Infof("Deleting all tablets in shard %v/%v cell %v", keyspace, shard, cell)
   136  			for tabletAlias, tabletInfo := range tabletMap {
   137  				// We don't care about scrapping or updating the replication graph,
   138  				// because we're about to delete the entire replication graph.
   139  				wr.Logger().Infof("Deleting tablet %v", tabletAlias)
   140  				if err := wr.TopoServer().DeleteTablet(ctx, tabletInfo.Alias); err != nil && !topo.IsErrType(err, topo.NoNode) {
   141  					// We don't want to continue if a DeleteTablet fails for
   142  					// any good reason (other than missing tablet, in which
   143  					// case it's just a topology server inconsistency we can
   144  					// ignore). If we continue and delete the replication
   145  					// graph, the tablet record will be orphaned, since
   146  					// we'll no longer know it belongs to this shard.
   147  					//
   148  					// If the problem is temporary, or resolved externally, re-running
   149  					// DeleteShard will skip over tablets that were already deleted.
   150  					return fmt.Errorf("can't delete tablet %v: %v", tabletAlias, err)
   151  				}
   152  			}
   153  		}
   154  	}
   155  
   156  	// Try to remove the replication graph and serving graph in each cell,
   157  	// regardless of its existence.
   158  	for _, cell := range cells {
   159  		if err := wr.ts.DeleteShardReplication(ctx, cell, keyspace, shard); err != nil && !topo.IsErrType(err, topo.NoNode) {
   160  			wr.Logger().Warningf("Cannot delete ShardReplication in cell %v for %v/%v: %v", cell, keyspace, shard, err)
   161  		}
   162  	}
   163  
   164  	return wr.ts.DeleteShard(ctx, keyspace, shard)
   165  }
   166  
   167  // SourceShardDelete will delete a SourceShard inside a shard, by index.
   168  //
   169  // This takes the keyspace lock as not to interfere with resharding operations.
   170  func (wr *Wrangler) SourceShardDelete(ctx context.Context, keyspace, shard string, uid uint32) (err error) {
   171  	resp, err := wr.VtctldServer().SourceShardDelete(ctx, &vtctldatapb.SourceShardDeleteRequest{
   172  		Keyspace: keyspace,
   173  		Shard:    shard,
   174  		Uid:      uid,
   175  	})
   176  	if err != nil {
   177  		return err
   178  	}
   179  
   180  	if resp.Shard == nil {
   181  		return fmt.Errorf("no SourceShard with uid %v", uid)
   182  	}
   183  
   184  	return nil
   185  }
   186  
   187  // SourceShardAdd will add a new SourceShard inside a shard.
   188  func (wr *Wrangler) SourceShardAdd(ctx context.Context, keyspace, shard string, uid uint32, skeyspace, sshard string, keyRange *topodatapb.KeyRange, tables []string) (err error) {
   189  	resp, err := wr.VtctldServer().SourceShardAdd(ctx, &vtctldatapb.SourceShardAddRequest{
   190  		Keyspace:       keyspace,
   191  		Shard:          shard,
   192  		Uid:            uid,
   193  		SourceKeyspace: skeyspace,
   194  		SourceShard:    sshard,
   195  		KeyRange:       keyRange,
   196  		Tables:         tables,
   197  	})
   198  	if err != nil {
   199  		return err
   200  	}
   201  
   202  	if resp.Shard == nil {
   203  		return fmt.Errorf("uid %v is already in use", uid)
   204  	}
   205  
   206  	return nil
   207  }