github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/gc/gc_old_test.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package gc
    12  
    13  import (
    14  	"context"
    15  	"sort"
    16  	"testing"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/storage"
    23  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    24  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    25  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    26  	"github.com/cockroachdb/cockroach/pkg/util/log"
    27  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    28  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    29  	"github.com/cockroachdb/errors"
    30  )
    31  
    32  // runGCOld is an older implementation of Run. It is used for benchmarking and
    33  // testing.
    34  //
    35  // runGCOld runs garbage collection for the specified descriptor on the
    36  // provided Engine (which is not mutated). It uses the provided gcFn
    37  // to run garbage collection once on all implicated spans,
    38  // cleanupIntentsFn to resolve intents synchronously, and
    39  // cleanupTxnIntentsAsyncFn to asynchronously cleanup intents and
    40  // associated transaction record on success.
    41  func runGCOld(
    42  	ctx context.Context,
    43  	desc *roachpb.RangeDescriptor,
    44  	snap storage.Reader,
    45  	now hlc.Timestamp,
    46  	_ hlc.Timestamp, // exists to make signature match RunGC
    47  	policy zonepb.GCPolicy,
    48  	gcer GCer,
    49  	cleanupIntentsFn CleanupIntentsFunc,
    50  	cleanupTxnIntentsAsyncFn CleanupTxnIntentsAsyncFunc,
    51  ) (Info, error) {
    52  
    53  	iter := rditer.NewReplicaDataIterator(desc, snap,
    54  		true /* replicatedOnly */, false /* seekEnd */)
    55  	defer iter.Close()
    56  
    57  	// Compute intent expiration (intent age at which we attempt to resolve).
    58  	intentExp := now.Add(-IntentAgeThreshold.Nanoseconds(), 0)
    59  	txnExp := now.Add(-kvserverbase.TxnCleanupThreshold.Nanoseconds(), 0)
    60  
    61  	gc := MakeGarbageCollector(now, policy)
    62  
    63  	if err := gcer.SetGCThreshold(ctx, Threshold{
    64  		Key: gc.Threshold,
    65  		Txn: txnExp,
    66  	}); err != nil {
    67  		return Info{}, errors.Wrap(err, "failed to set GC thresholds")
    68  	}
    69  
    70  	var batchGCKeys []roachpb.GCRequest_GCKey
    71  	var batchGCKeysBytes int64
    72  	var expBaseKey roachpb.Key
    73  	var keys []storage.MVCCKey
    74  	var vals [][]byte
    75  	var keyBytes int64
    76  	var valBytes int64
    77  	info := Info{
    78  		Policy:    policy,
    79  		Now:       now,
    80  		Threshold: gc.Threshold,
    81  	}
    82  
    83  	// Maps from txn ID to txn and intent key slice.
    84  	txnMap := map[uuid.UUID]*roachpb.Transaction{}
    85  	intentKeyMap := map[uuid.UUID][]roachpb.Key{}
    86  
    87  	// processKeysAndValues is invoked with each key and its set of
    88  	// values. Intents older than the intent age threshold are sent for
    89  	// resolution and values after the MVCC metadata, and possible
    90  	// intent, are sent for garbage collection.
    91  	processKeysAndValues := func() {
    92  		// If there's more than a single value for the key, possibly send for GC.
    93  		if len(keys) > 1 {
    94  			meta := &enginepb.MVCCMetadata{}
    95  			if err := protoutil.Unmarshal(vals[0], meta); err != nil {
    96  				log.Errorf(ctx, "unable to unmarshal MVCC metadata for key %q: %+v", keys[0], err)
    97  			} else {
    98  				// In the event that there's an active intent, send for
    99  				// intent resolution if older than the threshold.
   100  				startIdx := 1
   101  				if meta.Txn != nil {
   102  					// Keep track of intent to resolve if older than the intent
   103  					// expiration threshold.
   104  					if hlc.Timestamp(meta.Timestamp).Less(intentExp) {
   105  						txnID := meta.Txn.ID
   106  						if _, ok := txnMap[txnID]; !ok {
   107  							txnMap[txnID] = &roachpb.Transaction{
   108  								TxnMeta: *meta.Txn,
   109  							}
   110  							// IntentTxns and PushTxn will be equal here, since
   111  							// pushes to transactions whose record lies in this
   112  							// range (but which are not associated to a remaining
   113  							// intent on it) happen asynchronously and are accounted
   114  							// for separately. Thus higher up in the stack, we
   115  							// expect PushTxn > IntentTxns.
   116  							info.IntentTxns++
   117  							// All transactions in txnMap may be PENDING and
   118  							// cleanupIntentsFn will push them to finalize them.
   119  							info.PushTxn++
   120  						}
   121  						info.IntentsConsidered++
   122  						intentKeyMap[txnID] = append(intentKeyMap[txnID], expBaseKey)
   123  					}
   124  					// With an active intent, GC ignores MVCC metadata & intent value.
   125  					startIdx = 2
   126  				}
   127  				// See if any values may be GC'd.
   128  				if idx, gcTS := gc.Filter(keys[startIdx:], vals[startIdx:]); gcTS != (hlc.Timestamp{}) {
   129  					// Batch keys after the total size of version keys exceeds
   130  					// the threshold limit. This avoids sending potentially large
   131  					// GC requests through Raft. Iterate through the keys in reverse
   132  					// order so that GC requests can be made multiple times even on
   133  					// a single key, with successively newer timestamps to prevent
   134  					// any single request from exploding during GC evaluation.
   135  					for i := len(keys) - 1; i >= startIdx+idx; i-- {
   136  						keyBytes = int64(keys[i].EncodedSize())
   137  						valBytes = int64(len(vals[i]))
   138  
   139  						// Add the total size of the GC'able versions of the keys and values to Info.
   140  						info.AffectedVersionsKeyBytes += keyBytes
   141  						info.AffectedVersionsValBytes += valBytes
   142  
   143  						batchGCKeysBytes += keyBytes
   144  						// If the current key brings the batch over the target
   145  						// size, add the current timestamp to finish the current
   146  						// chunk and start a new one.
   147  						if batchGCKeysBytes >= KeyVersionChunkBytes {
   148  							batchGCKeys = append(batchGCKeys, roachpb.GCRequest_GCKey{Key: expBaseKey, Timestamp: keys[i].Timestamp})
   149  
   150  							err := gcer.GC(ctx, batchGCKeys)
   151  
   152  							// Succeed or fail, allow releasing the memory backing batchGCKeys.
   153  							iter.ResetAllocator()
   154  							batchGCKeys = nil
   155  							batchGCKeysBytes = 0
   156  
   157  							if err != nil {
   158  								// Even though we are batching the GC process, it's
   159  								// safe to continue because we bumped the GC
   160  								// thresholds. We may leave some inconsistent history
   161  								// behind, but nobody can read it.
   162  								log.Warningf(ctx, "%v", err)
   163  								return
   164  							}
   165  						}
   166  					}
   167  					// Add the key to the batch at the GC timestamp, unless it was already added.
   168  					if batchGCKeysBytes != 0 {
   169  						batchGCKeys = append(batchGCKeys, roachpb.GCRequest_GCKey{Key: expBaseKey, Timestamp: gcTS})
   170  					}
   171  					info.NumKeysAffected++
   172  				}
   173  			}
   174  		}
   175  	}
   176  
   177  	// Iterate through the keys and values of this replica's range.
   178  	log.Event(ctx, "iterating through range")
   179  	for ; ; iter.Next() {
   180  		if ok, err := iter.Valid(); err != nil {
   181  			return Info{}, err
   182  		} else if !ok {
   183  			break
   184  		} else if ctx.Err() != nil {
   185  			// Stop iterating if our context has expired.
   186  			return Info{}, err
   187  		}
   188  		iterKey := iter.Key()
   189  		if !iterKey.IsValue() || !iterKey.Key.Equal(expBaseKey) {
   190  			// Moving to the next key (& values).
   191  			processKeysAndValues()
   192  			expBaseKey = iterKey.Key
   193  			if !iterKey.IsValue() {
   194  				keys = []storage.MVCCKey{iter.Key()}
   195  				vals = [][]byte{iter.Value()}
   196  				continue
   197  			}
   198  			// An implicit metadata.
   199  			keys = []storage.MVCCKey{storage.MakeMVCCMetadataKey(iterKey.Key)}
   200  			// A nil value for the encoded MVCCMetadata. This will unmarshal to an
   201  			// empty MVCCMetadata which is sufficient for processKeysAndValues to
   202  			// determine that there is no intent.
   203  			vals = [][]byte{nil}
   204  		}
   205  		keys = append(keys, iter.Key())
   206  		vals = append(vals, iter.Value())
   207  	}
   208  	// Handle last collected set of keys/vals.
   209  	processKeysAndValues()
   210  	if len(batchGCKeys) > 0 {
   211  		if err := gcer.GC(ctx, batchGCKeys); err != nil {
   212  			return Info{}, err
   213  		}
   214  	}
   215  
   216  	// From now on, all keys processed are range-local.
   217  
   218  	// Process local range key entries (txn records, queue last processed times).
   219  	if err := processLocalKeyRange(ctx, snap, desc, txnExp, &info, cleanupTxnIntentsAsyncFn, gcer); err != nil {
   220  		log.Warningf(ctx, "while gc'ing local key range: %s", err)
   221  	}
   222  
   223  	// Clean up the AbortSpan.
   224  	log.Event(ctx, "processing AbortSpan")
   225  	processAbortSpan(ctx, snap, desc.RangeID, txnExp, &info, gcer)
   226  
   227  	log.Eventf(ctx, "GC'ed keys; stats %+v", info)
   228  
   229  	// Push transactions (if pending) and resolve intents.
   230  	var intents []roachpb.Intent
   231  	for txnID, txn := range txnMap {
   232  		intents = append(intents, roachpb.AsIntents(&txn.TxnMeta, intentKeyMap[txnID])...)
   233  	}
   234  	info.ResolveTotal += len(intents)
   235  	log.Eventf(ctx, "cleanup of %d intents", len(intents))
   236  	if err := cleanupIntentsFn(ctx, intents); err != nil {
   237  		return Info{}, err
   238  	}
   239  
   240  	return info, nil
   241  }
   242  
   243  // GarbageCollector GCs MVCC key/values using a zone-specific GC
   244  // policy allows either the union or intersection of maximum # of
   245  // versions and maximum age.
   246  type GarbageCollector struct {
   247  	Threshold hlc.Timestamp
   248  	policy    zonepb.GCPolicy
   249  }
   250  
   251  // MakeGarbageCollector allocates and returns a new GC, with expiration
   252  // computed based on current time and policy.TTLSeconds.
   253  func MakeGarbageCollector(now hlc.Timestamp, policy zonepb.GCPolicy) GarbageCollector {
   254  	return GarbageCollector{
   255  		Threshold: CalculateThreshold(now, policy),
   256  		policy:    policy,
   257  	}
   258  }
   259  
   260  // Filter makes decisions about garbage collection based on the
   261  // garbage collection policy for batches of values for the same
   262  // key. Returns the index of the first key to be GC'd and the
   263  // timestamp including, and after which, all values should be garbage
   264  // collected. If no values should be GC'd, returns -1 for the index
   265  // and the zero timestamp. Keys must be in descending time
   266  // order. Values deleted at or before the returned timestamp can be
   267  // deleted without invalidating any reads in the time interval
   268  // (gc.expiration, \infinity).
   269  //
   270  // The GC keeps all values (including deletes) above the expiration time, plus
   271  // the first value before or at the expiration time. This allows reads to be
   272  // guaranteed as described above. However if this were the only rule, then if
   273  // the most recent write was a delete, it would never be removed. Thus, when a
   274  // deleted value is the most recent before expiration, it can be deleted. This
   275  // would still allow for the tombstone bugs in #6227, so in the future we will
   276  // add checks that disallow writes before the last GC expiration time.
   277  func (gc GarbageCollector) Filter(keys []storage.MVCCKey, values [][]byte) (int, hlc.Timestamp) {
   278  	if gc.policy.TTLSeconds <= 0 {
   279  		return -1, hlc.Timestamp{}
   280  	}
   281  	if len(keys) == 0 {
   282  		return -1, hlc.Timestamp{}
   283  	}
   284  
   285  	// find the first expired key index using binary search
   286  	i := sort.Search(len(keys), func(i int) bool { return keys[i].Timestamp.LessEq(gc.Threshold) })
   287  
   288  	if i == len(keys) {
   289  		return -1, hlc.Timestamp{}
   290  	}
   291  
   292  	// Now keys[i].Timestamp is <= gc.expiration, but the key-value pair is still
   293  	// "visible" at timestamp gc.expiration (and up to the next version).
   294  	if deleted := len(values[i]) == 0; deleted {
   295  		// We don't have to keep a delete visible (since GCing it does not change
   296  		// the outcome of the read). Note however that we can't touch deletes at
   297  		// higher timestamps immediately preceding this one, since they're above
   298  		// gc.expiration and are needed for correctness; see #6227.
   299  		return i, keys[i].Timestamp
   300  	} else if i+1 < len(keys) {
   301  		// Otherwise mark the previous timestamp for deletion (since it won't ever
   302  		// be returned for reads at gc.expiration and up).
   303  		return i + 1, keys[i+1].Timestamp
   304  	}
   305  
   306  	return -1, hlc.Timestamp{}
   307  }
   308  
   309  func mvccVersionKey(key roachpb.Key, ts hlc.Timestamp) storage.MVCCKey {
   310  	return storage.MVCCKey{Key: key, Timestamp: ts}
   311  }
   312  
   313  var (
   314  	aKey  = roachpb.Key("a")
   315  	bKey  = roachpb.Key("b")
   316  	aKeys = []storage.MVCCKey{
   317  		mvccVersionKey(aKey, hlc.Timestamp{WallTime: 2e9, Logical: 0}),
   318  		mvccVersionKey(aKey, hlc.Timestamp{WallTime: 1e9, Logical: 1}),
   319  		mvccVersionKey(aKey, hlc.Timestamp{WallTime: 1e9, Logical: 0}),
   320  	}
   321  	bKeys = []storage.MVCCKey{
   322  		mvccVersionKey(bKey, hlc.Timestamp{WallTime: 2e9, Logical: 0}),
   323  		mvccVersionKey(bKey, hlc.Timestamp{WallTime: 1e9, Logical: 0}),
   324  	}
   325  )
   326  
   327  // TestGarbageCollectorFilter verifies the filter policies for
   328  // different sorts of MVCC keys.
   329  func TestGarbageCollectorFilter(t *testing.T) {
   330  	defer leaktest.AfterTest(t)()
   331  	gcA := MakeGarbageCollector(hlc.Timestamp{WallTime: 0, Logical: 0}, zonepb.GCPolicy{TTLSeconds: 1})
   332  	gcB := MakeGarbageCollector(hlc.Timestamp{WallTime: 0, Logical: 0}, zonepb.GCPolicy{TTLSeconds: 2})
   333  	n := []byte("data")
   334  	d := []byte(nil)
   335  	testData := []struct {
   336  		gc       GarbageCollector
   337  		time     hlc.Timestamp
   338  		keys     []storage.MVCCKey
   339  		values   [][]byte
   340  		expIdx   int
   341  		expDelTS hlc.Timestamp
   342  	}{
   343  		{gcA, hlc.Timestamp{WallTime: 0, Logical: 0}, aKeys, [][]byte{n, n, n}, -1, hlc.Timestamp{}},
   344  		{gcA, hlc.Timestamp{WallTime: 0, Logical: 0}, aKeys, [][]byte{d, d, d}, -1, hlc.Timestamp{}},
   345  		{gcB, hlc.Timestamp{WallTime: 0, Logical: 0}, bKeys, [][]byte{n, n}, -1, hlc.Timestamp{}},
   346  		{gcB, hlc.Timestamp{WallTime: 0, Logical: 0}, bKeys, [][]byte{d, d}, -1, hlc.Timestamp{}},
   347  		{gcA, hlc.Timestamp{WallTime: 1e9, Logical: 0}, aKeys, [][]byte{n, n, n}, -1, hlc.Timestamp{}},
   348  		{gcB, hlc.Timestamp{WallTime: 1e9, Logical: 0}, bKeys, [][]byte{n, n}, -1, hlc.Timestamp{}},
   349  		{gcA, hlc.Timestamp{WallTime: 2e9, Logical: 0}, aKeys, [][]byte{n, n, n}, -1, hlc.Timestamp{}},
   350  		{gcA, hlc.Timestamp{WallTime: 2e9, Logical: 0}, aKeys, [][]byte{d, d, d}, 2, hlc.Timestamp{WallTime: 1e9, Logical: 0}},
   351  		{gcB, hlc.Timestamp{WallTime: 2e9, Logical: 0}, bKeys, [][]byte{n, n}, -1, hlc.Timestamp{}},
   352  		{gcA, hlc.Timestamp{WallTime: 3e9, Logical: 0}, aKeys, [][]byte{n, n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 1}},
   353  		{gcA, hlc.Timestamp{WallTime: 3e9, Logical: 0}, aKeys, [][]byte{d, n, n}, 0, hlc.Timestamp{WallTime: 2e9, Logical: 0}},
   354  		{gcB, hlc.Timestamp{WallTime: 3e9, Logical: 0}, bKeys, [][]byte{n, n}, -1, hlc.Timestamp{}},
   355  		{gcA, hlc.Timestamp{WallTime: 4e9, Logical: 0}, aKeys, [][]byte{n, n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 1}},
   356  		{gcB, hlc.Timestamp{WallTime: 4e9, Logical: 0}, bKeys, [][]byte{n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 0}},
   357  		{gcB, hlc.Timestamp{WallTime: 4e9, Logical: 0}, bKeys, [][]byte{d, n}, 0, hlc.Timestamp{WallTime: 2e9, Logical: 0}},
   358  		{gcA, hlc.Timestamp{WallTime: 5e9, Logical: 0}, aKeys, [][]byte{n, n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 1}},
   359  		{gcB, hlc.Timestamp{WallTime: 5e9, Logical: 0}, bKeys, [][]byte{n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 0}},
   360  		{gcB, hlc.Timestamp{WallTime: 5e9, Logical: 0}, bKeys, [][]byte{d, n}, 0, hlc.Timestamp{WallTime: 2e9, Logical: 0}},
   361  	}
   362  	for i, test := range testData {
   363  		test.gc.Threshold = test.time
   364  		test.gc.Threshold.WallTime -= int64(test.gc.policy.TTLSeconds) * 1e9
   365  		idx, delTS := test.gc.Filter(test.keys, test.values)
   366  		if idx != test.expIdx {
   367  			t.Errorf("%d: expected index %d; got %d", i, test.expIdx, idx)
   368  		}
   369  		if delTS != test.expDelTS {
   370  			t.Errorf("%d: expected deletion timestamp %s; got %s", i, test.expDelTS, delTS)
   371  		}
   372  	}
   373  }