github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/store_remove_replica.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"sync/atomic"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    18  	"github.com/cockroachdb/cockroach/pkg/util/log"
    19  	"github.com/cockroachdb/errors"
    20  )
    21  
    22  // RemoveOptions bundles boolean parameters for Store.RemoveReplica.
    23  type RemoveOptions struct {
    24  	DestroyData bool
    25  
    26  	// ignoreDestroyStatus allows a caller to instruct the store to remove
    27  	// replicas which are already marked as destroyed. This is helpful in cases
    28  	// where the caller knows that it set the destroy status and cannot have raced
    29  	// with another goroutine. See Replica.handleChangeReplicasResult().
    30  	ignoreDestroyStatus bool
    31  }
    32  
    33  // RemoveReplica removes the replica from the store's replica map and from the
    34  // sorted replicasByKey btree.
    35  //
    36  // The NextReplicaID from the replica descriptor that was used to make the
    37  // removal decision is passed in. Removal is aborted if the replica ID has
    38  // advanced to or beyond the NextReplicaID since the removal decision was made.
    39  //
    40  // If opts.DestroyReplica is false, replica.destroyRaftMuLocked is not called.
    41  //
    42  // The passed replica must be initialized.
    43  func (s *Store) RemoveReplica(
    44  	ctx context.Context, rep *Replica, nextReplicaID roachpb.ReplicaID, opts RemoveOptions,
    45  ) error {
    46  	rep.raftMu.Lock()
    47  	defer rep.raftMu.Unlock()
    48  	return s.removeInitializedReplicaRaftMuLocked(ctx, rep, nextReplicaID, opts)
    49  }
    50  
    51  // removeReplicaRaftMuLocked removes the passed replica. If the replica is
    52  // initialized the RemoveOptions will be consulted.
    53  func (s *Store) removeReplicaRaftMuLocked(
    54  	ctx context.Context, rep *Replica, nextReplicaID roachpb.ReplicaID, opts RemoveOptions,
    55  ) error {
    56  	rep.raftMu.AssertHeld()
    57  	if rep.IsInitialized() {
    58  		return errors.Wrap(s.removeInitializedReplicaRaftMuLocked(ctx, rep, nextReplicaID, opts),
    59  			"failed to remove replica")
    60  	}
    61  	s.removeUninitializedReplicaRaftMuLocked(ctx, rep, nextReplicaID)
    62  	return nil
    63  }
    64  
    65  // removeInitializedReplicaRaftMuLocked is the implementation of RemoveReplica,
    66  // which is sometimes called directly when the necessary lock is already held.
    67  // It requires that Replica.raftMu is held and that s.mu is not held.
    68  func (s *Store) removeInitializedReplicaRaftMuLocked(
    69  	ctx context.Context, rep *Replica, nextReplicaID roachpb.ReplicaID, opts RemoveOptions,
    70  ) error {
    71  	rep.raftMu.AssertHeld()
    72  
    73  	// Sanity checks before committing to the removal by setting the
    74  	// destroy status.
    75  	var desc *roachpb.RangeDescriptor
    76  	var replicaID roachpb.ReplicaID
    77  	{
    78  		rep.mu.Lock()
    79  
    80  		// Detect if we were already removed.
    81  		if !opts.ignoreDestroyStatus && rep.mu.destroyStatus.Removed() {
    82  			rep.mu.Unlock()
    83  			return nil // already removed, noop
    84  		}
    85  
    86  		desc = rep.mu.state.Desc
    87  		if repDesc, ok := desc.GetReplicaDescriptor(s.StoreID()); ok && repDesc.ReplicaID >= nextReplicaID {
    88  			rep.mu.Unlock()
    89  			// NB: This should not in any way be possible starting in 20.1.
    90  			log.Fatalf(ctx, "replica descriptor's ID has changed (%s >= %s)",
    91  				repDesc.ReplicaID, nextReplicaID)
    92  		}
    93  
    94  		// This is a fatal error as an initialized replica can never become
    95  		/// uninitialized.
    96  		if !rep.isInitializedRLocked() {
    97  			rep.mu.Unlock()
    98  			log.Fatalf(ctx, "uninitialized replica cannot be removed with removeInitializedReplica: %v",
    99  				rep)
   100  		}
   101  
   102  		// Mark the replica as removed before deleting data.
   103  		rep.mu.destroyStatus.Set(roachpb.NewRangeNotFoundError(rep.RangeID, rep.StoreID()),
   104  			destroyReasonRemoved)
   105  		replicaID = rep.mu.replicaID
   106  		rep.mu.Unlock()
   107  	}
   108  
   109  	// Proceed with the removal, all errors encountered from here down are fatal.
   110  
   111  	// Another sanity check that this replica is a part of this store.
   112  	existing, err := s.GetReplica(rep.RangeID)
   113  	if err != nil {
   114  		log.Fatalf(ctx, "cannot remove replica which does not exist: %v", err)
   115  	} else if existing != rep {
   116  		log.Fatalf(ctx, "replica %v replaced by %v before being removed",
   117  			rep, existing)
   118  	}
   119  
   120  	// During merges, the context might have the subsuming range, so we explicitly
   121  	// log the replica to be removed.
   122  	log.Infof(ctx, "removing replica r%d/%d", rep.RangeID, replicaID)
   123  
   124  	s.mu.Lock()
   125  	if placeholder := s.getOverlappingKeyRangeLocked(desc); placeholder != rep {
   126  		// This is a fatal error because uninitialized replicas shouldn't make it
   127  		// this far. This method will need some changes when we introduce GC of
   128  		// uninitialized replicas.
   129  		s.mu.Unlock()
   130  		log.Fatalf(ctx, "replica %+v unexpectedly overlapped by %+v", rep, placeholder)
   131  	}
   132  	// Adjust stats before calling Destroy. This can be called before or after
   133  	// Destroy, but this configuration helps avoid races in stat verification
   134  	// tests.
   135  	s.metrics.subtractMVCCStats(rep.GetMVCCStats())
   136  	s.metrics.ReplicaCount.Dec(1)
   137  	s.mu.Unlock()
   138  
   139  	// The replica will no longer exist, so cancel any rangefeed registrations.
   140  	rep.disconnectRangefeedWithReason(
   141  		roachpb.RangeFeedRetryError_REASON_REPLICA_REMOVED,
   142  	)
   143  
   144  	// Mark the replica as destroyed and (optionally) destroy the on-disk data
   145  	// while not holding Store.mu. This is safe because we're holding
   146  	// Replica.raftMu and the replica is present in Store.mu.replicasByKey
   147  	// (preventing any concurrent access to the replica's key range).
   148  	rep.disconnectReplicationRaftMuLocked(ctx)
   149  	if opts.DestroyData {
   150  		if err := rep.destroyRaftMuLocked(ctx, nextReplicaID); err != nil {
   151  			return err
   152  		}
   153  	}
   154  
   155  	s.mu.Lock()
   156  	defer s.mu.Unlock()
   157  	s.unlinkReplicaByRangeIDLocked(rep.RangeID)
   158  	if placeholder := s.mu.replicasByKey.Delete(rep); placeholder != rep {
   159  		// We already checked that our replica was present in replicasByKey
   160  		// above. Nothing should have been able to change that.
   161  		log.Fatalf(ctx, "replica %+v unexpectedly overlapped by %+v", rep, placeholder)
   162  	}
   163  	if rep2 := s.getOverlappingKeyRangeLocked(desc); rep2 != nil {
   164  		log.Fatalf(ctx, "corrupted replicasByKey map: %s and %s overlapped", rep, rep2)
   165  	}
   166  	delete(s.mu.replicaPlaceholders, rep.RangeID)
   167  	// TODO(peter): Could release s.mu.Lock() here.
   168  	s.maybeGossipOnCapacityChange(ctx, rangeRemoveEvent)
   169  	s.scanner.RemoveReplica(rep)
   170  	return nil
   171  }
   172  
   173  // removeUninitializedReplicaRaftMuLocked removes an uninitialized replica.
   174  // All paths which call this code held the raftMu before calling this method
   175  // and ensured that the removal was sane given the current replicaID and
   176  // initialization status (which only changes under the raftMu).
   177  func (s *Store) removeUninitializedReplicaRaftMuLocked(
   178  	ctx context.Context, rep *Replica, nextReplicaID roachpb.ReplicaID,
   179  ) {
   180  	rep.raftMu.AssertHeld()
   181  
   182  	// Sanity check this removal and set the destroyStatus.
   183  	{
   184  		rep.mu.Lock()
   185  
   186  		// Detect if we were already removed, this is a fatal error
   187  		// because we should have already checked this under the raftMu
   188  		// before calling this method.
   189  		if rep.mu.destroyStatus.Removed() {
   190  			rep.mu.Unlock()
   191  			log.Fatalf(ctx, "uninitialized replica unexpectedly already removed")
   192  		}
   193  
   194  		if rep.isInitializedRLocked() {
   195  			rep.mu.Unlock()
   196  			log.Fatalf(ctx, "cannot remove initialized replica in removeUninitializedReplica: %v", rep)
   197  		}
   198  
   199  		// Mark the replica as removed before deleting data.
   200  		rep.mu.destroyStatus.Set(roachpb.NewRangeNotFoundError(rep.RangeID, rep.StoreID()),
   201  			destroyReasonRemoved)
   202  
   203  		rep.mu.Unlock()
   204  	}
   205  
   206  	// Proceed with the removal.
   207  
   208  	rep.disconnectReplicationRaftMuLocked(ctx)
   209  	if err := rep.destroyRaftMuLocked(ctx, nextReplicaID); err != nil {
   210  		log.Fatalf(ctx, "failed to remove uninitialized replica %v: %v", rep, err)
   211  	}
   212  
   213  	s.mu.Lock()
   214  	defer s.mu.Unlock()
   215  
   216  	// Sanity check, could be removed.
   217  	value, stillExists := s.mu.replicas.Load(int64(rep.RangeID))
   218  	if !stillExists {
   219  		log.Fatalf(ctx, "uninitialized replica was removed in the meantime")
   220  	}
   221  	existing := (*Replica)(value)
   222  	if existing == rep {
   223  		log.Infof(ctx, "removing uninitialized replica %v", rep)
   224  	} else {
   225  		log.Fatalf(ctx, "uninitialized replica %v was unexpectedly replaced", existing)
   226  	}
   227  
   228  	// Only an uninitialized replica can have a placeholder since, by
   229  	// definition, an initialized replica will be present in the
   230  	// replicasByKey map. While the replica will usually consume the
   231  	// placeholder itself, that isn't guaranteed and so this invocation
   232  	// here is crucial (i.e. don't remove it).
   233  	if s.removePlaceholderLocked(ctx, rep.RangeID) {
   234  		atomic.AddInt32(&s.counts.droppedPlaceholders, 1)
   235  	}
   236  	s.unlinkReplicaByRangeIDLocked(rep.RangeID)
   237  }
   238  
   239  // unlinkReplicaByRangeIDLocked removes all of the store's references to the
   240  // provided replica that are keyed by its range ID. The replica may also need
   241  // to be removed from the replicasByKey map.
   242  //
   243  // store.mu must be held.
   244  func (s *Store) unlinkReplicaByRangeIDLocked(rangeID roachpb.RangeID) {
   245  	s.mu.AssertHeld()
   246  	s.unquiescedReplicas.Lock()
   247  	delete(s.unquiescedReplicas.m, rangeID)
   248  	s.unquiescedReplicas.Unlock()
   249  	delete(s.mu.uninitReplicas, rangeID)
   250  	s.replicaQueues.Delete(int64(rangeID))
   251  	s.mu.replicas.Delete(int64(rangeID))
   252  }
   253  
   254  // removePlaceholder removes a placeholder for the specified range if it
   255  // exists, returning true if a placeholder was present and removed and false
   256  // otherwise. Requires that the raftMu of the replica whose place is being held
   257  // is locked.
   258  func (s *Store) removePlaceholder(ctx context.Context, rngID roachpb.RangeID) bool {
   259  	s.mu.Lock()
   260  	defer s.mu.Unlock()
   261  	return s.removePlaceholderLocked(ctx, rngID)
   262  }
   263  
   264  // removePlaceholderLocked removes the specified placeholder. Requires that
   265  // Store.mu and the raftMu of the replica whose place is being held are locked.
   266  func (s *Store) removePlaceholderLocked(ctx context.Context, rngID roachpb.RangeID) bool {
   267  	placeholder, ok := s.mu.replicaPlaceholders[rngID]
   268  	if !ok {
   269  		return false
   270  	}
   271  	switch exRng := s.mu.replicasByKey.Delete(placeholder).(type) {
   272  	case *ReplicaPlaceholder:
   273  		delete(s.mu.replicaPlaceholders, rngID)
   274  		if exRng2 := s.getOverlappingKeyRangeLocked(&exRng.rangeDesc); exRng2 != nil {
   275  			log.Fatalf(ctx, "corrupted replicasByKey map: %s and %s overlapped", exRng, exRng2)
   276  		}
   277  		return true
   278  	case nil:
   279  		log.Fatalf(ctx, "r%d: placeholder not found", rngID)
   280  	default:
   281  		log.Fatalf(ctx, "r%d: expected placeholder, got %T", rngID, exRng)
   282  	}
   283  	return false // appease the compiler
   284  }