github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/rangefeed/task.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rangefeed
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    17  	"github.com/cockroachdb/cockroach/pkg/storage"
    18  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    19  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    20  	"github.com/cockroachdb/cockroach/pkg/util/log"
    21  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    22  	"github.com/cockroachdb/errors"
    23  )
    24  
    25  // A runnable can be run as an async task.
    26  type runnable interface {
    27  	// Run executes the runnable. Cannot be called multiple times.
    28  	Run(context.Context)
    29  	// Must be called if runnable is not Run.
    30  	Cancel()
    31  }
    32  
    33  // initResolvedTSScan scans over all keys using the provided iterator and
    34  // informs the rangefeed Processor of any intents. This allows the Processor to
    35  // backfill its unresolvedIntentQueue with any intents that were written before
    36  // the Processor was started and hooked up to a stream of logical operations.
    37  // The Processor can initialize its resolvedTimestamp once the scan completes
    38  // because it knows it is now tracking all intents in its key range.
    39  //
    40  // Iterator Contract:
    41  //   The provided Iterator must observe all intents in the Processor's keyspan.
    42  //   An important implication of this is that if the iterator is a
    43  //   TimeBoundIterator, its MinTimestamp cannot be above the keyspan's largest
    44  //   known resolved timestamp, if one has ever been recorded. If one has never
    45  //   been recorded, the TimeBoundIterator cannot have any lower bound.
    46  //
    47  type initResolvedTSScan struct {
    48  	p  *Processor
    49  	it storage.SimpleIterator
    50  }
    51  
    52  func newInitResolvedTSScan(p *Processor, it storage.SimpleIterator) runnable {
    53  	return &initResolvedTSScan{p: p, it: it}
    54  }
    55  
    56  func (s *initResolvedTSScan) Run(ctx context.Context) {
    57  	defer s.Cancel()
    58  	if err := s.iterateAndConsume(ctx); err != nil {
    59  		err = errors.Wrap(err, "initial resolved timestamp scan failed")
    60  		log.Errorf(ctx, "%v", err)
    61  		s.p.StopWithErr(roachpb.NewError(err))
    62  	} else {
    63  		// Inform the processor that its resolved timestamp can be initialized.
    64  		s.p.setResolvedTSInitialized()
    65  	}
    66  }
    67  
    68  func (s *initResolvedTSScan) iterateAndConsume(ctx context.Context) error {
    69  	startKey := storage.MakeMVCCMetadataKey(s.p.Span.Key.AsRawKey())
    70  	endKey := storage.MakeMVCCMetadataKey(s.p.Span.EndKey.AsRawKey())
    71  
    72  	// Iterate through all keys using NextKey. This will look at the first MVCC
    73  	// version for each key. We're only looking for MVCCMetadata versions, which
    74  	// will always be the first version of a key if it exists, so its fine that
    75  	// we skip over all other versions of keys.
    76  	var meta enginepb.MVCCMetadata
    77  	for s.it.SeekGE(startKey); ; s.it.NextKey() {
    78  		if ok, err := s.it.Valid(); err != nil {
    79  			return err
    80  		} else if !ok || !s.it.UnsafeKey().Less(endKey) {
    81  			break
    82  		}
    83  
    84  		// If the key is not a metadata key, ignore it.
    85  		unsafeKey := s.it.UnsafeKey()
    86  		if unsafeKey.IsValue() {
    87  			continue
    88  		}
    89  
    90  		// Found a metadata key. Unmarshal.
    91  		if err := protoutil.Unmarshal(s.it.UnsafeValue(), &meta); err != nil {
    92  			return errors.Wrapf(err, "unmarshaling mvcc meta: %v", unsafeKey)
    93  		}
    94  
    95  		// If this is an intent, inform the Processor.
    96  		if meta.Txn != nil {
    97  			var ops [1]enginepb.MVCCLogicalOp
    98  			ops[0].SetValue(&enginepb.MVCCWriteIntentOp{
    99  				TxnID:           meta.Txn.ID,
   100  				TxnKey:          meta.Txn.Key,
   101  				TxnMinTimestamp: meta.Txn.MinTimestamp,
   102  				Timestamp:       meta.Txn.WriteTimestamp,
   103  			})
   104  			s.p.sendEvent(event{ops: ops[:]}, 0 /* timeout */)
   105  		}
   106  	}
   107  	return nil
   108  }
   109  
   110  func (s *initResolvedTSScan) Cancel() {
   111  	s.it.Close()
   112  }
   113  
   114  // TxnPusher is capable of pushing transactions to a new timestamp and
   115  // cleaning up the intents of transactions that are found to be committed.
   116  type TxnPusher interface {
   117  	// PushTxns attempts to push the specified transactions to a new
   118  	// timestamp. It returns the resulting transaction protos.
   119  	PushTxns(context.Context, []enginepb.TxnMeta, hlc.Timestamp) ([]*roachpb.Transaction, error)
   120  	// CleanupTxnIntentsAsync asynchronously cleans up intents owned
   121  	// by the specified transactions.
   122  	CleanupTxnIntentsAsync(context.Context, []*roachpb.Transaction) error
   123  }
   124  
   125  // txnPushAttempt pushes all old transactions that have unresolved intents on
   126  // the range which are blocking the resolved timestamp from moving forward. It
   127  // does so in two steps.
   128  // 1. it pushes all old transactions to the current timestamp and gathers
   129  //    up the transactions' authoritative transaction records.
   130  // 2. for each transaction that is pushed, it checks the transaction's current
   131  //    status and reacts accordingly:
   132  //    - PENDING:   inform the Processor that the transaction's timestamp has
   133  //                 increased so that the transaction's intents no longer need
   134  //                 to block the resolved timestamp. Even though the intents
   135  //                 may still be at an older timestamp, we know that they can't
   136  //                 commit at that timestamp.
   137  //    - COMMITTED: launch async processes to resolve the transaction's intents
   138  //                 so they will be resolved sometime soon and unblock the
   139  //                 resolved timestamp.
   140  //    - ABORTED:   inform the Processor to stop caring about the transaction.
   141  //                 It will never commit and its intents can be safely ignored.
   142  type txnPushAttempt struct {
   143  	p     *Processor
   144  	txns  []enginepb.TxnMeta
   145  	ts    hlc.Timestamp
   146  	doneC chan struct{}
   147  }
   148  
   149  func newTxnPushAttempt(
   150  	p *Processor, txns []enginepb.TxnMeta, ts hlc.Timestamp, doneC chan struct{},
   151  ) runnable {
   152  	return &txnPushAttempt{
   153  		p:     p,
   154  		txns:  txns,
   155  		ts:    ts,
   156  		doneC: doneC,
   157  	}
   158  }
   159  
   160  func (a *txnPushAttempt) Run(ctx context.Context) {
   161  	defer a.Cancel()
   162  	if err := a.pushOldTxns(ctx); err != nil {
   163  		log.Errorf(ctx, "pushing old intents failed: %v", err)
   164  	}
   165  }
   166  
   167  func (a *txnPushAttempt) pushOldTxns(ctx context.Context) error {
   168  	// Push all transactions using the TxnPusher to the current time.
   169  	// This may cause transaction restarts, but span refreshing should
   170  	// prevent a restart for any transaction that has not been written
   171  	// over at a larger timestamp.
   172  	pushedTxns, err := a.p.TxnPusher.PushTxns(ctx, a.txns, a.ts)
   173  	if err != nil {
   174  		return err
   175  	}
   176  
   177  	// Inform the Processor of the results of the push for each transaction.
   178  	ops := make([]enginepb.MVCCLogicalOp, len(pushedTxns))
   179  	var toCleanup []*roachpb.Transaction
   180  	for i, txn := range pushedTxns {
   181  		switch txn.Status {
   182  		case roachpb.PENDING, roachpb.STAGING:
   183  			// The transaction is still in progress but its timestamp was moved
   184  			// forward to the current time. Inform the Processor that it can
   185  			// forward the txn's timestamp in its unresolvedIntentQueue.
   186  			ops[i].SetValue(&enginepb.MVCCUpdateIntentOp{
   187  				TxnID:     txn.ID,
   188  				Timestamp: txn.WriteTimestamp,
   189  			})
   190  		case roachpb.COMMITTED:
   191  			// The transaction is committed and its timestamp may have moved
   192  			// forward since we last saw an intent. Inform the Processor
   193  			// immediately in case this is the transaction that is holding back
   194  			// the resolved timestamp. However, we still need to wait for the
   195  			// transaction's intents to actually be resolved.
   196  			ops[i].SetValue(&enginepb.MVCCUpdateIntentOp{
   197  				TxnID:     txn.ID,
   198  				Timestamp: txn.WriteTimestamp,
   199  			})
   200  
   201  			// Asynchronously clean up the transaction's intents, which should
   202  			// eventually cause all unresolved intents for this transaction on the
   203  			// rangefeed's range to be resolved. We'll have to wait until the
   204  			// intents are resolved before the resolved timestamp can advance past
   205  			// the transaction's commit timestamp, so the best we can do is help
   206  			// speed up the resolution.
   207  			toCleanup = append(toCleanup, txn)
   208  		case roachpb.ABORTED:
   209  			// The transaction is aborted, so it doesn't need to be tracked
   210  			// anymore nor does it need to prevent the resolved timestamp from
   211  			// advancing. Inform the Processor that it can remove the txn from
   212  			// its unresolvedIntentQueue.
   213  			//
   214  			// NOTE: the unresolvedIntentQueue will ignore MVCCAbortTxn operations
   215  			// before it has been initialized. This is not a concern here though
   216  			// because we never launch txnPushAttempt tasks before the queue has
   217  			// been initialized.
   218  			ops[i].SetValue(&enginepb.MVCCAbortTxnOp{
   219  				TxnID: txn.ID,
   220  			})
   221  
   222  			// While we're here, we might as well also clean up the transaction's
   223  			// intents so that no-one else needs to deal with them. However, it's
   224  			// likely that if our push caused the abort then the transaction's
   225  			// intents will be unknown and we won't be doing much good here.
   226  			toCleanup = append(toCleanup, txn)
   227  		}
   228  	}
   229  
   230  	// Inform the processor of all logical ops.
   231  	a.p.sendEvent(event{ops: ops}, 0 /* timeout */)
   232  
   233  	// Clean up txns, if necessary,
   234  	return a.p.TxnPusher.CleanupTxnIntentsAsync(ctx, toCleanup)
   235  }
   236  
   237  func (a *txnPushAttempt) Cancel() {
   238  	close(a.doneC)
   239  }