github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/client_replica_gc_test.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver_test
    12  
    13  import (
    14  	"context"
    15  	"io/ioutil"
    16  	"os"
    17  	"path/filepath"
    18  	"strconv"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/base"
    23  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    24  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    25  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    26  	"github.com/cockroachdb/cockroach/pkg/storage"
    27  	"github.com/cockroachdb/cockroach/pkg/testutils"
    28  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    29  	"github.com/cockroachdb/errors"
    30  )
    31  
    32  // TestReplicaGCQueueDropReplica verifies that a removed replica is
    33  // immediately cleaned up.
    34  func TestReplicaGCQueueDropReplicaDirect(t *testing.T) {
    35  	defer leaktest.AfterTest(t)()
    36  	mtc := &multiTestContext{}
    37  	const numStores = 3
    38  	rangeID := roachpb.RangeID(1)
    39  
    40  	// Use actual engines (not in memory) because the in-mem ones don't write
    41  	// to disk. The test would still pass if we didn't do this except it
    42  	// would probably look at an empty sideloaded directory and fail.
    43  	tempDir, cleanup := testutils.TempDir(t)
    44  	defer cleanup()
    45  	cache := storage.NewRocksDBCache(1 << 20)
    46  	defer cache.Release()
    47  	for i := 0; i < 3; i++ {
    48  		eng, err := storage.NewRocksDB(storage.RocksDBConfig{
    49  			StorageConfig: base.StorageConfig{
    50  				Dir: filepath.Join(tempDir, strconv.Itoa(i)),
    51  			},
    52  		}, cache)
    53  		if err != nil {
    54  			t.Fatal(err)
    55  		}
    56  		defer eng.Close()
    57  		mtc.engines = append(mtc.engines, eng)
    58  	}
    59  
    60  	// In this test, the Replica on the second Node is removed, and the test
    61  	// verifies that that Node adds this Replica to its RangeGCQueue. However,
    62  	// the queue does a consistent lookup which will usually be read from
    63  	// Node 1. Hence, if Node 1 hasn't processed the removal when Node 2 has,
    64  	// no GC will take place since the consistent RangeLookup hits the first
    65  	// Node. We use the TestingEvalFilter to make sure that the second Node
    66  	// waits for the first.
    67  	cfg := kvserver.TestStoreConfig(nil)
    68  	mtc.storeConfig = &cfg
    69  	mtc.storeConfig.TestingKnobs.EvalKnobs.TestingEvalFilter =
    70  		func(filterArgs kvserverbase.FilterArgs) *roachpb.Error {
    71  			et, ok := filterArgs.Req.(*roachpb.EndTxnRequest)
    72  			if !ok || filterArgs.Sid != 2 {
    73  				return nil
    74  			}
    75  			crt := et.InternalCommitTrigger.GetChangeReplicasTrigger()
    76  			if crt == nil || crt.DeprecatedChangeType != roachpb.REMOVE_REPLICA {
    77  				return nil
    78  			}
    79  			testutils.SucceedsSoon(t, func() error {
    80  				r, err := mtc.stores[0].GetReplica(rangeID)
    81  				if err != nil {
    82  					return err
    83  				}
    84  				if _, ok := r.Desc().GetReplicaDescriptor(2); ok {
    85  					return errors.New("expected second node gone from first node's known replicas")
    86  				}
    87  				return nil
    88  			})
    89  			return nil
    90  		}
    91  
    92  	defer mtc.Stop()
    93  	mtc.Start(t, numStores)
    94  
    95  	mtc.replicateRange(rangeID, 1, 2)
    96  
    97  	{
    98  		repl1, err := mtc.stores[1].GetReplica(rangeID)
    99  		if err != nil {
   100  			t.Fatal(err)
   101  		}
   102  
   103  		// Put some bogus sideloaded data on the replica which we're about to
   104  		// remove. Then, at the end of the test, check that that sideloaded
   105  		// storage is now empty (in other words, GC'ing the Replica took care of
   106  		// cleanup).
   107  		repl1.RaftLock()
   108  		dir := repl1.SideloadedRaftMuLocked().Dir()
   109  		repl1.RaftUnlock()
   110  
   111  		if dir == "" {
   112  			t.Fatal("no sideloaded directory")
   113  		}
   114  		if err := os.MkdirAll(dir, 0755); err != nil {
   115  			t.Fatal(err)
   116  		}
   117  		if err := ioutil.WriteFile(filepath.Join(dir, "i1000000.t100000"), []byte("foo"), 0644); err != nil {
   118  			t.Fatal(err)
   119  		}
   120  
   121  		defer func() {
   122  			if !t.Failed() {
   123  				testutils.SucceedsSoon(t, func() error {
   124  					// Verify that the whole directory for the replica is gone.
   125  					repl1.RaftLock()
   126  					dir := repl1.SideloadedRaftMuLocked().Dir()
   127  					repl1.RaftUnlock()
   128  					_, err := os.Stat(dir)
   129  
   130  					if os.IsNotExist(err) {
   131  						return nil
   132  					}
   133  					return errors.Errorf("replica still has sideloaded files despite GC: %v", err)
   134  				})
   135  			}
   136  		}()
   137  	}
   138  
   139  	mtc.unreplicateRange(rangeID, 1)
   140  
   141  	// Make sure the range is removed from the store.
   142  	testutils.SucceedsSoon(t, func() error {
   143  		if _, err := mtc.stores[1].GetReplica(rangeID); !testutils.IsError(err, "r[0-9]+ was not found") {
   144  			return errors.Errorf("expected range removal: %v", err) // NB: errors.Wrapf(nil, ...) returns nil.
   145  		}
   146  		return nil
   147  	})
   148  }
   149  
   150  // TestReplicaGCQueueDropReplicaOnScan verifies that the range GC queue
   151  // removes a range from a store that no longer should have a replica.
   152  func TestReplicaGCQueueDropReplicaGCOnScan(t *testing.T) {
   153  	defer leaktest.AfterTest(t)()
   154  	mtc := &multiTestContext{}
   155  	cfg := kvserver.TestStoreConfig(nil)
   156  	cfg.TestingKnobs.DisableEagerReplicaRemoval = true
   157  	cfg.Clock = nil // manual clock
   158  	mtc.storeConfig = &cfg
   159  
   160  	defer mtc.Stop()
   161  	mtc.Start(t, 3)
   162  	// Disable the replica gc queue to prevent direct removal of replica.
   163  	mtc.stores[1].SetReplicaGCQueueActive(false)
   164  
   165  	rangeID := roachpb.RangeID(1)
   166  	mtc.replicateRange(rangeID, 1, 2)
   167  	mtc.unreplicateRange(rangeID, 1)
   168  
   169  	// Wait long enough for the direct replica GC to have had a chance and been
   170  	// discarded because the queue is disabled.
   171  	time.Sleep(10 * time.Millisecond)
   172  	if _, err := mtc.stores[1].GetReplica(rangeID); err != nil {
   173  		t.Error("unexpected range removal")
   174  	}
   175  
   176  	// Enable the queue.
   177  	mtc.stores[1].SetReplicaGCQueueActive(true)
   178  
   179  	// Increment the clock's timestamp to make the replica GC queue process the range.
   180  	mtc.advanceClock(context.Background())
   181  	mtc.manualClock.Increment(int64(kvserver.ReplicaGCQueueInactivityThreshold + 1))
   182  
   183  	// Make sure the range is removed from the store.
   184  	testutils.SucceedsSoon(t, func() error {
   185  		store := mtc.stores[1]
   186  		store.MustForceReplicaGCScanAndProcess()
   187  		if _, err := store.GetReplica(rangeID); !testutils.IsError(err, "r[0-9]+ was not found") {
   188  			return errors.Errorf("expected range removal: %v", err) // NB: errors.Wrapf(nil, ...) returns nil.
   189  		}
   190  		return nil
   191  	})
   192  }