github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/abortspan/abortspan.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package abortspan
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/keys"
    17  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  	"github.com/cockroachdb/cockroach/pkg/storage"
    20  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    21  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    22  	"github.com/cockroachdb/cockroach/pkg/util/log"
    23  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    24  	"github.com/cockroachdb/errors"
    25  )
    26  
    27  // An AbortSpan sets markers for aborted transactions to provide protection
    28  // against an aborted but active transaction not reading values it wrote (due to
    29  // its intents having been removed).
    30  //
    31  // The span is range-specific. It is updated when an intent for an aborted txn
    32  // is cleared from a range, and is consulted before read commands are processed
    33  // on a range.
    34  //
    35  // An AbortSpan is not thread safe.
    36  type AbortSpan struct {
    37  	rangeID roachpb.RangeID
    38  }
    39  
    40  // New returns a new AbortSpan. Every range replica
    41  // maintains an AbortSpan, not just the lease holder.
    42  func New(rangeID roachpb.RangeID) *AbortSpan {
    43  	return &AbortSpan{
    44  		rangeID: rangeID,
    45  	}
    46  }
    47  
    48  func fillUUID(b byte) uuid.UUID {
    49  	var ret uuid.UUID
    50  	for i := range ret.GetBytes() {
    51  		ret[i] = b
    52  	}
    53  	return ret
    54  }
    55  
    56  var txnIDMin = fillUUID('\x00')
    57  var txnIDMax = fillUUID('\xff')
    58  
    59  // MinKey returns the lower bound of the key span associated to an instance for the given RangeID.
    60  func MinKey(rangeID roachpb.RangeID) roachpb.Key {
    61  	return keys.AbortSpanKey(rangeID, txnIDMin)
    62  }
    63  
    64  func (sc *AbortSpan) min() roachpb.Key {
    65  	return MinKey(sc.rangeID)
    66  }
    67  
    68  // MaxKey returns the upper bound of the key span associated to an instance for the given RangeID.
    69  func MaxKey(rangeID roachpb.RangeID) roachpb.Key {
    70  	return keys.AbortSpanKey(rangeID, txnIDMax)
    71  }
    72  
    73  func (sc *AbortSpan) max() roachpb.Key {
    74  	return MaxKey(sc.rangeID)
    75  }
    76  
    77  // ClearData removes all persisted items stored in the cache.
    78  func (sc *AbortSpan) ClearData(e storage.Engine) error {
    79  	iter := e.NewIterator(storage.IterOptions{UpperBound: sc.max()})
    80  	defer iter.Close()
    81  	b := e.NewWriteOnlyBatch()
    82  	defer b.Close()
    83  	err := b.ClearIterRange(iter, sc.min(), sc.max())
    84  	if err != nil {
    85  		return err
    86  	}
    87  	return b.Commit(false /* sync */)
    88  }
    89  
    90  // Get looks up an AbortSpan entry recorded for this transaction ID.
    91  // Returns whether an abort record was found and any error.
    92  func (sc *AbortSpan) Get(
    93  	ctx context.Context, reader storage.Reader, txnID uuid.UUID, entry *roachpb.AbortSpanEntry,
    94  ) (bool, error) {
    95  	// Pull response from disk and read into reply if available.
    96  	key := keys.AbortSpanKey(sc.rangeID, txnID)
    97  	ok, err := storage.MVCCGetProto(ctx, reader, key, hlc.Timestamp{}, entry, storage.MVCCGetOptions{})
    98  	return ok, err
    99  }
   100  
   101  // Iterate walks through the AbortSpan, invoking the given callback for
   102  // each unmarshaled entry with the MVCC key and the decoded entry.
   103  func (sc *AbortSpan) Iterate(
   104  	ctx context.Context, reader storage.Reader, f func(roachpb.Key, roachpb.AbortSpanEntry) error,
   105  ) error {
   106  	_, err := storage.MVCCIterate(ctx, reader, sc.min(), sc.max(), hlc.Timestamp{}, storage.MVCCScanOptions{},
   107  		func(kv roachpb.KeyValue) (bool, error) {
   108  			var entry roachpb.AbortSpanEntry
   109  			if _, err := keys.DecodeAbortSpanKey(kv.Key, nil); err != nil {
   110  				return false, err
   111  			}
   112  			if err := kv.Value.GetProto(&entry); err != nil {
   113  				return false, err
   114  			}
   115  			return false, f(kv.Key, entry)
   116  		})
   117  	return err
   118  }
   119  
   120  // Del removes all AbortSpan entries for the given transaction.
   121  func (sc *AbortSpan) Del(
   122  	ctx context.Context, reader storage.ReadWriter, ms *enginepb.MVCCStats, txnID uuid.UUID,
   123  ) error {
   124  	key := keys.AbortSpanKey(sc.rangeID, txnID)
   125  	return storage.MVCCDelete(ctx, reader, ms, key, hlc.Timestamp{}, nil /* txn */)
   126  }
   127  
   128  // Put writes an entry for the specified transaction ID.
   129  func (sc *AbortSpan) Put(
   130  	ctx context.Context,
   131  	readWriter storage.ReadWriter,
   132  	ms *enginepb.MVCCStats,
   133  	txnID uuid.UUID,
   134  	entry *roachpb.AbortSpanEntry,
   135  ) error {
   136  	key := keys.AbortSpanKey(sc.rangeID, txnID)
   137  	return storage.MVCCPutProto(ctx, readWriter, ms, key, hlc.Timestamp{}, nil /* txn */, entry)
   138  }
   139  
   140  // CopyTo copies the abort span entries to the abort span for the range
   141  // identified by newRangeID. Entries are read from r and written to w. It is
   142  // safe for r and w to be the same object.
   143  //
   144  // CopyTo takes care to only copy records that are required: certain workloads
   145  // create sizable abort spans, and repeated splitting can blow them up further.
   146  // Once it reaches approximately the Raft MaxCommandSize, splits become
   147  // impossible, which is pretty bad (see #25233).
   148  func (sc *AbortSpan) CopyTo(
   149  	ctx context.Context,
   150  	r storage.Reader,
   151  	w storage.ReadWriter,
   152  	ms *enginepb.MVCCStats,
   153  	ts hlc.Timestamp,
   154  	newRangeID roachpb.RangeID,
   155  ) error {
   156  	var abortSpanCopyCount, abortSpanSkipCount int
   157  	// Abort span entries before this span are eligible for GC, so we don't
   158  	// copy them into the new range. We could try to delete them from the LHS
   159  	// as well, but that could create a large Raft command in itself. Plus,
   160  	// we'd have to adjust the stats computations.
   161  	threshold := ts.Add(-kvserverbase.TxnCleanupThreshold.Nanoseconds(), 0)
   162  	var scratch [64]byte
   163  	if err := sc.Iterate(ctx, r, func(k roachpb.Key, entry roachpb.AbortSpanEntry) error {
   164  		if entry.Timestamp.Less(threshold) {
   165  			// The entry would be garbage collected (if GC had run), so
   166  			// don't bother copying it. Note that we can't filter on the key,
   167  			// that is just where the txn record lives, but it doesn't tell
   168  			// us whether the intents that triggered the abort span record
   169  			// where on the LHS, RHS, or both.
   170  			abortSpanSkipCount++
   171  			return nil
   172  		}
   173  
   174  		abortSpanCopyCount++
   175  		var txnID uuid.UUID
   176  		txnID, err := keys.DecodeAbortSpanKey(k, scratch[:0])
   177  		if err != nil {
   178  			return err
   179  		}
   180  		return storage.MVCCPutProto(ctx, w, ms,
   181  			keys.AbortSpanKey(newRangeID, txnID),
   182  			hlc.Timestamp{}, nil, &entry,
   183  		)
   184  	}); err != nil {
   185  		return roachpb.NewReplicaCorruptionError(errors.Wrap(err, "AbortSpan.CopyTo"))
   186  	}
   187  	log.Eventf(ctx, "abort span: copied %d entries, skipped %d", abortSpanCopyCount, abortSpanSkipCount)
   188  	return nil
   189  }