github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/txnrecovery/manager.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package txnrecovery
    12  
    13  import (
    14  	"context"
    15  	"sort"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/kv"
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    20  	"github.com/cockroachdb/cockroach/pkg/util/log"
    21  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    22  	"github.com/cockroachdb/cockroach/pkg/util/syncutil/singleflight"
    23  	"github.com/cockroachdb/errors"
    24  )
    25  
    26  // Manager organizes the recovery of transactions whose states require global
    27  // (as opposed to local) coordination to transition away from.
    28  type Manager interface {
    29  	// ResolveIndeterminateCommit attempts to resolve the status of transactions
    30  	// that have been abandoned while in the STAGING state, attempting to commit.
    31  	// Unlike most transitions in the transaction state machine, moving from the
    32  	// STAGING state to any other state requires global coordination instead of
    33  	// localized coordination. This method performs this coordination with the
    34  	// goal of finalizing the transaction as either COMMITTED or ABORTED.
    35  	//
    36  	// The method may also return a transaction in any other state if it is
    37  	// discovered to still be live and undergoing state transitions.
    38  	ResolveIndeterminateCommit(
    39  		context.Context, *roachpb.IndeterminateCommitError,
    40  	) (*roachpb.Transaction, error)
    41  
    42  	// Metrics returns the Manager's metrics struct.
    43  	Metrics() Metrics
    44  }
    45  
    46  const (
    47  	// defaultTaskLimit is the maximum number of recovery processes that may be
    48  	// run concurrently. Once this limit is reached, future attempts to resolve
    49  	// indeterminate transaction commits will wait until other attempts complete.
    50  	defaultTaskLimit = 1024
    51  
    52  	// defaultBatchSize is the maximum number of intents that will be queried in
    53  	// a single batch. Batches that span many ranges will be split into many
    54  	// batches by the DistSender.
    55  	defaultBatchSize = 128
    56  )
    57  
    58  // manager implements the Manager interface.
    59  type manager struct {
    60  	log.AmbientContext
    61  
    62  	clock   *hlc.Clock
    63  	db      *kv.DB
    64  	stopper *stop.Stopper
    65  	metrics Metrics
    66  	txns    singleflight.Group
    67  	sem     chan struct{}
    68  }
    69  
    70  // NewManager returns an implementation of a transaction recovery Manager.
    71  func NewManager(ac log.AmbientContext, clock *hlc.Clock, db *kv.DB, stopper *stop.Stopper) Manager {
    72  	ac.AddLogTag("txn-recovery", nil)
    73  	return &manager{
    74  		AmbientContext: ac,
    75  		clock:          clock,
    76  		db:             db,
    77  		stopper:        stopper,
    78  		metrics:        makeMetrics(),
    79  		sem:            make(chan struct{}, defaultTaskLimit),
    80  	}
    81  }
    82  
    83  // ResolveIndeterminateCommit implements the Manager interface.
    84  func (m *manager) ResolveIndeterminateCommit(
    85  	ctx context.Context, ice *roachpb.IndeterminateCommitError,
    86  ) (*roachpb.Transaction, error) {
    87  	txn := &ice.StagingTxn
    88  	if txn.Status != roachpb.STAGING {
    89  		return nil, errors.Errorf("IndeterminateCommitError with non-STAGING transaction: %v", txn)
    90  	}
    91  
    92  	// Launch a single-flight task to recover the transaction. This may be
    93  	// coalesced with other recovery attempts for the same transaction.
    94  	log.VEventf(ctx, 2, "recovering txn %s from indeterminate commit", txn.ID.Short())
    95  	resC, _ := m.txns.DoChan(txn.ID.String(), func() (interface{}, error) {
    96  		return m.resolveIndeterminateCommitForTxn(txn)
    97  	})
    98  
    99  	// Wait for the inflight request.
   100  	select {
   101  	case res := <-resC:
   102  		if res.Err != nil {
   103  			log.VEventf(ctx, 2, "recovery error: %v", res.Err)
   104  			return nil, res.Err
   105  		}
   106  		txn := res.Val.(*roachpb.Transaction)
   107  		log.VEventf(ctx, 2, "recovered txn %s with status: %s", txn.ID.Short(), txn.Status)
   108  		return txn, nil
   109  	case <-ctx.Done():
   110  		return nil, errors.Wrap(ctx.Err(), "abandoned indeterminate commit recovery")
   111  	}
   112  }
   113  
   114  // resolveIndeterminateCommitForTxn attempts to to resolve the status of
   115  // transactions that have been abandoned while in the STAGING state, attempting
   116  // to commit. It does so by first querying each of the transaction's in-flight
   117  // writes to determine whether any of them failed, trying to prevent at least
   118  // one of them. While doing so, it also monitors the state of the transaction
   119  // and returns early if it ever changes. Once the result of all in-flight writes
   120  // is determined, the method issues a RecoverTxn request with a summary of their
   121  // outcome.
   122  func (m *manager) resolveIndeterminateCommitForTxn(
   123  	txn *roachpb.Transaction,
   124  ) (resTxn *roachpb.Transaction, resErr error) {
   125  	// Record the recovery attempt in the Manager's metrics.
   126  	onComplete := m.updateMetrics()
   127  	defer func() { onComplete(resTxn, resErr) }()
   128  
   129  	// TODO(nvanbenschoten): Set up tracing.
   130  	ctx := m.AnnotateCtx(context.Background())
   131  
   132  	// Launch the recovery task.
   133  	resErr = m.stopper.RunTaskWithErr(ctx,
   134  		"recovery.manager: resolving indeterminate commit",
   135  		func(ctx context.Context) error {
   136  			// Grab semaphore with defaultTaskLimit.
   137  			select {
   138  			case m.sem <- struct{}{}:
   139  				defer func() { <-m.sem }()
   140  			case <-m.stopper.ShouldQuiesce():
   141  				return stop.ErrUnavailable
   142  			}
   143  
   144  			// We probe to determine whether the transaction is implicitly
   145  			// committed or not. If not, we prevent it from ever becoming
   146  			// implicitly committed at this (epoch, timestamp) pair.
   147  			preventedIntent, changedTxn, err := m.resolveIndeterminateCommitForTxnProbe(ctx, txn)
   148  			if err != nil {
   149  				return err
   150  			}
   151  			if changedTxn != nil {
   152  				resTxn = changedTxn
   153  				return nil
   154  			}
   155  
   156  			// Now that we know whether the transaction was implicitly committed
   157  			// or not (implicitly committed = !preventedIntent), we attempt to
   158  			// recover it. If this succeeds, it will either move the transaction
   159  			// record to a COMMITTED or ABORTED status.
   160  			resTxn, err = m.resolveIndeterminateCommitForTxnRecover(ctx, txn, preventedIntent)
   161  			return err
   162  		},
   163  	)
   164  	return resTxn, resErr
   165  }
   166  
   167  // resolveIndeterminateCommitForTxnProbe performs the "probing phase" of the
   168  // indeterminate commit resolution process. This phase queries each of the
   169  // transaction's in-flight writes to determine whether any of them failed,
   170  // trying to prevent at least one of them. While doing so, it also monitors the
   171  // state of the transaction and returns early if it ever changes.
   172  func (m *manager) resolveIndeterminateCommitForTxnProbe(
   173  	ctx context.Context, txn *roachpb.Transaction,
   174  ) (preventedIntent bool, changedTxn *roachpb.Transaction, err error) {
   175  	// Create a QueryTxnRequest that we will periodically send to the
   176  	// transaction's record during recovery processing.
   177  	queryTxnReq := roachpb.QueryTxnRequest{
   178  		RequestHeader: roachpb.RequestHeader{
   179  			Key: txn.Key,
   180  		},
   181  		Txn:           txn.TxnMeta,
   182  		WaitForUpdate: false,
   183  	}
   184  
   185  	// Create a QueryIntentRequest for each of the transaction's in-flight
   186  	// writes. We will attempt to prove that all have succeeded using these
   187  	// requests. There are two possible outcomes from this probing:
   188  	// 1. we find that all of the transaction's in-flight writes at the time that
   189  	//    it was staged to commit have succeeded in being written. This is all the
   190  	//    evidence that we need in order to declare the transaction "implicitly
   191  	//    committed", at which point we can mark it as "explicitly committed" by
   192  	//    moving the transaction's record from the STAGING state to the COMMITTED
   193  	//    state.
   194  	// 2. we find that one or more of the transaction's in-flight writes at the
   195  	//    time that it was staged to commit have not yet succeeded. In this case,
   196  	//    the QueryIntent that found the missing in-flight write atomically ensures
   197  	//    that the intent write will never succeed in the future (NOTE: this is a
   198  	//    side-effect of any QueryIntent request that finds a missing intent). This
   199  	//    guarantees that if we determine that the transaction cannot be committed,
   200  	//    the write we're searching for can never occur after we observe it to be
   201  	//    missing (for instance, if it was delayed) and cause others to determine
   202  	//    that the transaction can be committed. After it has done so, we have all
   203  	//    the evidence that we need in order to declare the transaction commit a
   204  	//    failure and move the transaction's record from the STAGING state to the
   205  	//    ABORTED state. Moving the transaction's record to the ABORTED state will
   206  	//    succeed if the transaction hasn't made any updates to its transaction
   207  	//    record (e.g. if the record has been abandoned). However, it can fail if
   208  	//    the transaction has already refreshed at a higher timestamp in the
   209  	//    current epoch or restarted at a higher epoch.
   210  	queryIntentReqs := make([]roachpb.QueryIntentRequest, 0, len(txn.InFlightWrites))
   211  	for _, w := range txn.InFlightWrites {
   212  		meta := txn.TxnMeta
   213  		meta.Sequence = w.Sequence
   214  		queryIntentReqs = append(queryIntentReqs, roachpb.QueryIntentRequest{
   215  			RequestHeader: roachpb.RequestHeader{
   216  				Key: w.Key,
   217  			},
   218  			Txn: meta,
   219  		})
   220  	}
   221  
   222  	// Sort the query intent requests to maximize batching by range.
   223  	sort.Slice(queryIntentReqs, func(i, j int) bool {
   224  		return queryIntentReqs[i].Header().Key.Compare(queryIntentReqs[j].Header().Key) < 0
   225  	})
   226  
   227  	// Query all of the intents in batches of size defaultBatchSize. The maximum
   228  	// timeout is defaultTimeout, and this is applied to each batch to ensure
   229  	// forward progress is made. A large set of intents might require more time
   230  	// than a single timeout allows.
   231  	//
   232  	// We begin each batch with a query of the transaction's record as well,
   233  	// which will be issued in parallel with the query intent requests. This
   234  	// allows us to break out of recovery processing early if recovery is
   235  	// completed by some other actor before us, or if the transaction begins
   236  	// changes, indicating activity.
   237  	//
   238  	// Loop until either the transaction is observed to change, an in-flight
   239  	// write is prevented, or we run out of in-flight writes to query.
   240  	for len(queryIntentReqs) > 0 {
   241  		var b kv.Batch
   242  		b.Header.Timestamp = m.clock.Now()
   243  		b.AddRawRequest(&queryTxnReq)
   244  		for i := 0; i < defaultBatchSize && len(queryIntentReqs) > 0; i++ {
   245  			b.AddRawRequest(&queryIntentReqs[0])
   246  			queryIntentReqs = queryIntentReqs[1:]
   247  		}
   248  
   249  		if err := m.db.Run(ctx, &b); err != nil {
   250  			// Bail out on the first error.
   251  			return false, nil, err
   252  		}
   253  
   254  		// First, check the QueryTxnResponse to determine whether the
   255  		// state of the transaction record has changed since we began
   256  		// the recovery process.
   257  		resps := b.RawResponse().Responses
   258  		queryTxnResp := resps[0].GetInner().(*roachpb.QueryTxnResponse)
   259  		queriedTxn := &queryTxnResp.QueriedTxn
   260  		if queriedTxn.Status.IsFinalized() ||
   261  			txn.Epoch < queriedTxn.Epoch ||
   262  			txn.WriteTimestamp.Less(queriedTxn.WriteTimestamp) {
   263  			// The transaction was already found to have changed.
   264  			// No need to issue a RecoverTxnRequest, just return
   265  			// the transaction as is.
   266  			return false, queriedTxn, nil
   267  		}
   268  
   269  		// Next, look through the QueryIntentResponses to check whether
   270  		// any of the in-flight writes failed.
   271  		for _, ru := range resps[1:] {
   272  			queryIntentResp := ru.GetInner().(*roachpb.QueryIntentResponse)
   273  			if !queryIntentResp.FoundIntent {
   274  				return true /* preventedIntent */, nil, nil
   275  			}
   276  		}
   277  	}
   278  	return false /* preventedIntent */, nil, nil
   279  }
   280  
   281  // resolveIndeterminateCommitForTxnRecover performs the "recovery phase" of the
   282  // indeterminate commit resolution process. Using the result of the probing
   283  // phase, recovery issues a RecoverTxn request to resolve the state of the
   284  // transaction.
   285  //
   286  // The method will return a finalized transaction if the RecoverTxn request
   287  // succeeds, but it may also return a transaction in any other state if it is
   288  // discovered to still be live and undergoing state transitions. The only
   289  // guarantee is that the returned transaction will not be in an identical state
   290  // to that of the transaction provided.
   291  func (m *manager) resolveIndeterminateCommitForTxnRecover(
   292  	ctx context.Context, txn *roachpb.Transaction, preventedIntent bool,
   293  ) (*roachpb.Transaction, error) {
   294  	var b kv.Batch
   295  	b.Header.Timestamp = m.clock.Now()
   296  	b.AddRawRequest(&roachpb.RecoverTxnRequest{
   297  		RequestHeader: roachpb.RequestHeader{
   298  			Key: txn.Key,
   299  		},
   300  		Txn:                 txn.TxnMeta,
   301  		ImplicitlyCommitted: !preventedIntent,
   302  	})
   303  
   304  	if err := m.db.Run(ctx, &b); err != nil {
   305  		return nil, err
   306  	}
   307  
   308  	resps := b.RawResponse().Responses
   309  	recTxnResp := resps[0].GetInner().(*roachpb.RecoverTxnResponse)
   310  	return &recTxnResp.RecoveredTxn, nil
   311  }
   312  
   313  // Metrics implements the Manager interface.
   314  func (m *manager) Metrics() Metrics {
   315  	return m.metrics
   316  }
   317  
   318  // updateMetrics updates the Manager's metrics to account for a new
   319  // transaction recovery attempt. It returns a function that should
   320  // be called when the recovery attempt completes.
   321  func (m *manager) updateMetrics() func(*roachpb.Transaction, error) {
   322  	m.metrics.AttemptsPending.Inc(1)
   323  	m.metrics.Attempts.Inc(1)
   324  	return func(txn *roachpb.Transaction, err error) {
   325  		m.metrics.AttemptsPending.Dec(1)
   326  		if err != nil {
   327  			m.metrics.Failures.Inc(1)
   328  		} else {
   329  			switch txn.Status {
   330  			case roachpb.COMMITTED:
   331  				m.metrics.SuccessesAsCommitted.Inc(1)
   332  			case roachpb.ABORTED:
   333  				m.metrics.SuccessesAsAborted.Inc(1)
   334  			case roachpb.PENDING, roachpb.STAGING:
   335  				m.metrics.SuccessesAsPending.Inc(1)
   336  			default:
   337  				panic("unexpected")
   338  			}
   339  		}
   340  	}
   341  }