github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/txnrecovery/manager.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package txnrecovery 12 13 import ( 14 "context" 15 "sort" 16 17 "github.com/cockroachdb/cockroach/pkg/kv" 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/util/hlc" 20 "github.com/cockroachdb/cockroach/pkg/util/log" 21 "github.com/cockroachdb/cockroach/pkg/util/stop" 22 "github.com/cockroachdb/cockroach/pkg/util/syncutil/singleflight" 23 "github.com/cockroachdb/errors" 24 ) 25 26 // Manager organizes the recovery of transactions whose states require global 27 // (as opposed to local) coordination to transition away from. 28 type Manager interface { 29 // ResolveIndeterminateCommit attempts to resolve the status of transactions 30 // that have been abandoned while in the STAGING state, attempting to commit. 31 // Unlike most transitions in the transaction state machine, moving from the 32 // STAGING state to any other state requires global coordination instead of 33 // localized coordination. This method performs this coordination with the 34 // goal of finalizing the transaction as either COMMITTED or ABORTED. 35 // 36 // The method may also return a transaction in any other state if it is 37 // discovered to still be live and undergoing state transitions. 38 ResolveIndeterminateCommit( 39 context.Context, *roachpb.IndeterminateCommitError, 40 ) (*roachpb.Transaction, error) 41 42 // Metrics returns the Manager's metrics struct. 43 Metrics() Metrics 44 } 45 46 const ( 47 // defaultTaskLimit is the maximum number of recovery processes that may be 48 // run concurrently. Once this limit is reached, future attempts to resolve 49 // indeterminate transaction commits will wait until other attempts complete. 50 defaultTaskLimit = 1024 51 52 // defaultBatchSize is the maximum number of intents that will be queried in 53 // a single batch. Batches that span many ranges will be split into many 54 // batches by the DistSender. 55 defaultBatchSize = 128 56 ) 57 58 // manager implements the Manager interface. 59 type manager struct { 60 log.AmbientContext 61 62 clock *hlc.Clock 63 db *kv.DB 64 stopper *stop.Stopper 65 metrics Metrics 66 txns singleflight.Group 67 sem chan struct{} 68 } 69 70 // NewManager returns an implementation of a transaction recovery Manager. 71 func NewManager(ac log.AmbientContext, clock *hlc.Clock, db *kv.DB, stopper *stop.Stopper) Manager { 72 ac.AddLogTag("txn-recovery", nil) 73 return &manager{ 74 AmbientContext: ac, 75 clock: clock, 76 db: db, 77 stopper: stopper, 78 metrics: makeMetrics(), 79 sem: make(chan struct{}, defaultTaskLimit), 80 } 81 } 82 83 // ResolveIndeterminateCommit implements the Manager interface. 84 func (m *manager) ResolveIndeterminateCommit( 85 ctx context.Context, ice *roachpb.IndeterminateCommitError, 86 ) (*roachpb.Transaction, error) { 87 txn := &ice.StagingTxn 88 if txn.Status != roachpb.STAGING { 89 return nil, errors.Errorf("IndeterminateCommitError with non-STAGING transaction: %v", txn) 90 } 91 92 // Launch a single-flight task to recover the transaction. This may be 93 // coalesced with other recovery attempts for the same transaction. 94 log.VEventf(ctx, 2, "recovering txn %s from indeterminate commit", txn.ID.Short()) 95 resC, _ := m.txns.DoChan(txn.ID.String(), func() (interface{}, error) { 96 return m.resolveIndeterminateCommitForTxn(txn) 97 }) 98 99 // Wait for the inflight request. 100 select { 101 case res := <-resC: 102 if res.Err != nil { 103 log.VEventf(ctx, 2, "recovery error: %v", res.Err) 104 return nil, res.Err 105 } 106 txn := res.Val.(*roachpb.Transaction) 107 log.VEventf(ctx, 2, "recovered txn %s with status: %s", txn.ID.Short(), txn.Status) 108 return txn, nil 109 case <-ctx.Done(): 110 return nil, errors.Wrap(ctx.Err(), "abandoned indeterminate commit recovery") 111 } 112 } 113 114 // resolveIndeterminateCommitForTxn attempts to to resolve the status of 115 // transactions that have been abandoned while in the STAGING state, attempting 116 // to commit. It does so by first querying each of the transaction's in-flight 117 // writes to determine whether any of them failed, trying to prevent at least 118 // one of them. While doing so, it also monitors the state of the transaction 119 // and returns early if it ever changes. Once the result of all in-flight writes 120 // is determined, the method issues a RecoverTxn request with a summary of their 121 // outcome. 122 func (m *manager) resolveIndeterminateCommitForTxn( 123 txn *roachpb.Transaction, 124 ) (resTxn *roachpb.Transaction, resErr error) { 125 // Record the recovery attempt in the Manager's metrics. 126 onComplete := m.updateMetrics() 127 defer func() { onComplete(resTxn, resErr) }() 128 129 // TODO(nvanbenschoten): Set up tracing. 130 ctx := m.AnnotateCtx(context.Background()) 131 132 // Launch the recovery task. 133 resErr = m.stopper.RunTaskWithErr(ctx, 134 "recovery.manager: resolving indeterminate commit", 135 func(ctx context.Context) error { 136 // Grab semaphore with defaultTaskLimit. 137 select { 138 case m.sem <- struct{}{}: 139 defer func() { <-m.sem }() 140 case <-m.stopper.ShouldQuiesce(): 141 return stop.ErrUnavailable 142 } 143 144 // We probe to determine whether the transaction is implicitly 145 // committed or not. If not, we prevent it from ever becoming 146 // implicitly committed at this (epoch, timestamp) pair. 147 preventedIntent, changedTxn, err := m.resolveIndeterminateCommitForTxnProbe(ctx, txn) 148 if err != nil { 149 return err 150 } 151 if changedTxn != nil { 152 resTxn = changedTxn 153 return nil 154 } 155 156 // Now that we know whether the transaction was implicitly committed 157 // or not (implicitly committed = !preventedIntent), we attempt to 158 // recover it. If this succeeds, it will either move the transaction 159 // record to a COMMITTED or ABORTED status. 160 resTxn, err = m.resolveIndeterminateCommitForTxnRecover(ctx, txn, preventedIntent) 161 return err 162 }, 163 ) 164 return resTxn, resErr 165 } 166 167 // resolveIndeterminateCommitForTxnProbe performs the "probing phase" of the 168 // indeterminate commit resolution process. This phase queries each of the 169 // transaction's in-flight writes to determine whether any of them failed, 170 // trying to prevent at least one of them. While doing so, it also monitors the 171 // state of the transaction and returns early if it ever changes. 172 func (m *manager) resolveIndeterminateCommitForTxnProbe( 173 ctx context.Context, txn *roachpb.Transaction, 174 ) (preventedIntent bool, changedTxn *roachpb.Transaction, err error) { 175 // Create a QueryTxnRequest that we will periodically send to the 176 // transaction's record during recovery processing. 177 queryTxnReq := roachpb.QueryTxnRequest{ 178 RequestHeader: roachpb.RequestHeader{ 179 Key: txn.Key, 180 }, 181 Txn: txn.TxnMeta, 182 WaitForUpdate: false, 183 } 184 185 // Create a QueryIntentRequest for each of the transaction's in-flight 186 // writes. We will attempt to prove that all have succeeded using these 187 // requests. There are two possible outcomes from this probing: 188 // 1. we find that all of the transaction's in-flight writes at the time that 189 // it was staged to commit have succeeded in being written. This is all the 190 // evidence that we need in order to declare the transaction "implicitly 191 // committed", at which point we can mark it as "explicitly committed" by 192 // moving the transaction's record from the STAGING state to the COMMITTED 193 // state. 194 // 2. we find that one or more of the transaction's in-flight writes at the 195 // time that it was staged to commit have not yet succeeded. In this case, 196 // the QueryIntent that found the missing in-flight write atomically ensures 197 // that the intent write will never succeed in the future (NOTE: this is a 198 // side-effect of any QueryIntent request that finds a missing intent). This 199 // guarantees that if we determine that the transaction cannot be committed, 200 // the write we're searching for can never occur after we observe it to be 201 // missing (for instance, if it was delayed) and cause others to determine 202 // that the transaction can be committed. After it has done so, we have all 203 // the evidence that we need in order to declare the transaction commit a 204 // failure and move the transaction's record from the STAGING state to the 205 // ABORTED state. Moving the transaction's record to the ABORTED state will 206 // succeed if the transaction hasn't made any updates to its transaction 207 // record (e.g. if the record has been abandoned). However, it can fail if 208 // the transaction has already refreshed at a higher timestamp in the 209 // current epoch or restarted at a higher epoch. 210 queryIntentReqs := make([]roachpb.QueryIntentRequest, 0, len(txn.InFlightWrites)) 211 for _, w := range txn.InFlightWrites { 212 meta := txn.TxnMeta 213 meta.Sequence = w.Sequence 214 queryIntentReqs = append(queryIntentReqs, roachpb.QueryIntentRequest{ 215 RequestHeader: roachpb.RequestHeader{ 216 Key: w.Key, 217 }, 218 Txn: meta, 219 }) 220 } 221 222 // Sort the query intent requests to maximize batching by range. 223 sort.Slice(queryIntentReqs, func(i, j int) bool { 224 return queryIntentReqs[i].Header().Key.Compare(queryIntentReqs[j].Header().Key) < 0 225 }) 226 227 // Query all of the intents in batches of size defaultBatchSize. The maximum 228 // timeout is defaultTimeout, and this is applied to each batch to ensure 229 // forward progress is made. A large set of intents might require more time 230 // than a single timeout allows. 231 // 232 // We begin each batch with a query of the transaction's record as well, 233 // which will be issued in parallel with the query intent requests. This 234 // allows us to break out of recovery processing early if recovery is 235 // completed by some other actor before us, or if the transaction begins 236 // changes, indicating activity. 237 // 238 // Loop until either the transaction is observed to change, an in-flight 239 // write is prevented, or we run out of in-flight writes to query. 240 for len(queryIntentReqs) > 0 { 241 var b kv.Batch 242 b.Header.Timestamp = m.clock.Now() 243 b.AddRawRequest(&queryTxnReq) 244 for i := 0; i < defaultBatchSize && len(queryIntentReqs) > 0; i++ { 245 b.AddRawRequest(&queryIntentReqs[0]) 246 queryIntentReqs = queryIntentReqs[1:] 247 } 248 249 if err := m.db.Run(ctx, &b); err != nil { 250 // Bail out on the first error. 251 return false, nil, err 252 } 253 254 // First, check the QueryTxnResponse to determine whether the 255 // state of the transaction record has changed since we began 256 // the recovery process. 257 resps := b.RawResponse().Responses 258 queryTxnResp := resps[0].GetInner().(*roachpb.QueryTxnResponse) 259 queriedTxn := &queryTxnResp.QueriedTxn 260 if queriedTxn.Status.IsFinalized() || 261 txn.Epoch < queriedTxn.Epoch || 262 txn.WriteTimestamp.Less(queriedTxn.WriteTimestamp) { 263 // The transaction was already found to have changed. 264 // No need to issue a RecoverTxnRequest, just return 265 // the transaction as is. 266 return false, queriedTxn, nil 267 } 268 269 // Next, look through the QueryIntentResponses to check whether 270 // any of the in-flight writes failed. 271 for _, ru := range resps[1:] { 272 queryIntentResp := ru.GetInner().(*roachpb.QueryIntentResponse) 273 if !queryIntentResp.FoundIntent { 274 return true /* preventedIntent */, nil, nil 275 } 276 } 277 } 278 return false /* preventedIntent */, nil, nil 279 } 280 281 // resolveIndeterminateCommitForTxnRecover performs the "recovery phase" of the 282 // indeterminate commit resolution process. Using the result of the probing 283 // phase, recovery issues a RecoverTxn request to resolve the state of the 284 // transaction. 285 // 286 // The method will return a finalized transaction if the RecoverTxn request 287 // succeeds, but it may also return a transaction in any other state if it is 288 // discovered to still be live and undergoing state transitions. The only 289 // guarantee is that the returned transaction will not be in an identical state 290 // to that of the transaction provided. 291 func (m *manager) resolveIndeterminateCommitForTxnRecover( 292 ctx context.Context, txn *roachpb.Transaction, preventedIntent bool, 293 ) (*roachpb.Transaction, error) { 294 var b kv.Batch 295 b.Header.Timestamp = m.clock.Now() 296 b.AddRawRequest(&roachpb.RecoverTxnRequest{ 297 RequestHeader: roachpb.RequestHeader{ 298 Key: txn.Key, 299 }, 300 Txn: txn.TxnMeta, 301 ImplicitlyCommitted: !preventedIntent, 302 }) 303 304 if err := m.db.Run(ctx, &b); err != nil { 305 return nil, err 306 } 307 308 resps := b.RawResponse().Responses 309 recTxnResp := resps[0].GetInner().(*roachpb.RecoverTxnResponse) 310 return &recTxnResp.RecoveredTxn, nil 311 } 312 313 // Metrics implements the Manager interface. 314 func (m *manager) Metrics() Metrics { 315 return m.metrics 316 } 317 318 // updateMetrics updates the Manager's metrics to account for a new 319 // transaction recovery attempt. It returns a function that should 320 // be called when the recovery attempt completes. 321 func (m *manager) updateMetrics() func(*roachpb.Transaction, error) { 322 m.metrics.AttemptsPending.Inc(1) 323 m.metrics.Attempts.Inc(1) 324 return func(txn *roachpb.Transaction, err error) { 325 m.metrics.AttemptsPending.Dec(1) 326 if err != nil { 327 m.metrics.Failures.Inc(1) 328 } else { 329 switch txn.Status { 330 case roachpb.COMMITTED: 331 m.metrics.SuccessesAsCommitted.Inc(1) 332 case roachpb.ABORTED: 333 m.metrics.SuccessesAsAborted.Inc(1) 334 case roachpb.PENDING, roachpb.STAGING: 335 m.metrics.SuccessesAsPending.Inc(1) 336 default: 337 panic("unexpected") 338 } 339 } 340 } 341 }