github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/abortspan/abortspan.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package abortspan 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/keys" 17 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/storage" 20 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 21 "github.com/cockroachdb/cockroach/pkg/util/hlc" 22 "github.com/cockroachdb/cockroach/pkg/util/log" 23 "github.com/cockroachdb/cockroach/pkg/util/uuid" 24 "github.com/cockroachdb/errors" 25 ) 26 27 // An AbortSpan sets markers for aborted transactions to provide protection 28 // against an aborted but active transaction not reading values it wrote (due to 29 // its intents having been removed). 30 // 31 // The span is range-specific. It is updated when an intent for an aborted txn 32 // is cleared from a range, and is consulted before read commands are processed 33 // on a range. 34 // 35 // An AbortSpan is not thread safe. 36 type AbortSpan struct { 37 rangeID roachpb.RangeID 38 } 39 40 // New returns a new AbortSpan. Every range replica 41 // maintains an AbortSpan, not just the lease holder. 42 func New(rangeID roachpb.RangeID) *AbortSpan { 43 return &AbortSpan{ 44 rangeID: rangeID, 45 } 46 } 47 48 func fillUUID(b byte) uuid.UUID { 49 var ret uuid.UUID 50 for i := range ret.GetBytes() { 51 ret[i] = b 52 } 53 return ret 54 } 55 56 var txnIDMin = fillUUID('\x00') 57 var txnIDMax = fillUUID('\xff') 58 59 // MinKey returns the lower bound of the key span associated to an instance for the given RangeID. 60 func MinKey(rangeID roachpb.RangeID) roachpb.Key { 61 return keys.AbortSpanKey(rangeID, txnIDMin) 62 } 63 64 func (sc *AbortSpan) min() roachpb.Key { 65 return MinKey(sc.rangeID) 66 } 67 68 // MaxKey returns the upper bound of the key span associated to an instance for the given RangeID. 69 func MaxKey(rangeID roachpb.RangeID) roachpb.Key { 70 return keys.AbortSpanKey(rangeID, txnIDMax) 71 } 72 73 func (sc *AbortSpan) max() roachpb.Key { 74 return MaxKey(sc.rangeID) 75 } 76 77 // ClearData removes all persisted items stored in the cache. 78 func (sc *AbortSpan) ClearData(e storage.Engine) error { 79 iter := e.NewIterator(storage.IterOptions{UpperBound: sc.max()}) 80 defer iter.Close() 81 b := e.NewWriteOnlyBatch() 82 defer b.Close() 83 err := b.ClearIterRange(iter, sc.min(), sc.max()) 84 if err != nil { 85 return err 86 } 87 return b.Commit(false /* sync */) 88 } 89 90 // Get looks up an AbortSpan entry recorded for this transaction ID. 91 // Returns whether an abort record was found and any error. 92 func (sc *AbortSpan) Get( 93 ctx context.Context, reader storage.Reader, txnID uuid.UUID, entry *roachpb.AbortSpanEntry, 94 ) (bool, error) { 95 // Pull response from disk and read into reply if available. 96 key := keys.AbortSpanKey(sc.rangeID, txnID) 97 ok, err := storage.MVCCGetProto(ctx, reader, key, hlc.Timestamp{}, entry, storage.MVCCGetOptions{}) 98 return ok, err 99 } 100 101 // Iterate walks through the AbortSpan, invoking the given callback for 102 // each unmarshaled entry with the MVCC key and the decoded entry. 103 func (sc *AbortSpan) Iterate( 104 ctx context.Context, reader storage.Reader, f func(roachpb.Key, roachpb.AbortSpanEntry) error, 105 ) error { 106 _, err := storage.MVCCIterate(ctx, reader, sc.min(), sc.max(), hlc.Timestamp{}, storage.MVCCScanOptions{}, 107 func(kv roachpb.KeyValue) (bool, error) { 108 var entry roachpb.AbortSpanEntry 109 if _, err := keys.DecodeAbortSpanKey(kv.Key, nil); err != nil { 110 return false, err 111 } 112 if err := kv.Value.GetProto(&entry); err != nil { 113 return false, err 114 } 115 return false, f(kv.Key, entry) 116 }) 117 return err 118 } 119 120 // Del removes all AbortSpan entries for the given transaction. 121 func (sc *AbortSpan) Del( 122 ctx context.Context, reader storage.ReadWriter, ms *enginepb.MVCCStats, txnID uuid.UUID, 123 ) error { 124 key := keys.AbortSpanKey(sc.rangeID, txnID) 125 return storage.MVCCDelete(ctx, reader, ms, key, hlc.Timestamp{}, nil /* txn */) 126 } 127 128 // Put writes an entry for the specified transaction ID. 129 func (sc *AbortSpan) Put( 130 ctx context.Context, 131 readWriter storage.ReadWriter, 132 ms *enginepb.MVCCStats, 133 txnID uuid.UUID, 134 entry *roachpb.AbortSpanEntry, 135 ) error { 136 key := keys.AbortSpanKey(sc.rangeID, txnID) 137 return storage.MVCCPutProto(ctx, readWriter, ms, key, hlc.Timestamp{}, nil /* txn */, entry) 138 } 139 140 // CopyTo copies the abort span entries to the abort span for the range 141 // identified by newRangeID. Entries are read from r and written to w. It is 142 // safe for r and w to be the same object. 143 // 144 // CopyTo takes care to only copy records that are required: certain workloads 145 // create sizable abort spans, and repeated splitting can blow them up further. 146 // Once it reaches approximately the Raft MaxCommandSize, splits become 147 // impossible, which is pretty bad (see #25233). 148 func (sc *AbortSpan) CopyTo( 149 ctx context.Context, 150 r storage.Reader, 151 w storage.ReadWriter, 152 ms *enginepb.MVCCStats, 153 ts hlc.Timestamp, 154 newRangeID roachpb.RangeID, 155 ) error { 156 var abortSpanCopyCount, abortSpanSkipCount int 157 // Abort span entries before this span are eligible for GC, so we don't 158 // copy them into the new range. We could try to delete them from the LHS 159 // as well, but that could create a large Raft command in itself. Plus, 160 // we'd have to adjust the stats computations. 161 threshold := ts.Add(-kvserverbase.TxnCleanupThreshold.Nanoseconds(), 0) 162 var scratch [64]byte 163 if err := sc.Iterate(ctx, r, func(k roachpb.Key, entry roachpb.AbortSpanEntry) error { 164 if entry.Timestamp.Less(threshold) { 165 // The entry would be garbage collected (if GC had run), so 166 // don't bother copying it. Note that we can't filter on the key, 167 // that is just where the txn record lives, but it doesn't tell 168 // us whether the intents that triggered the abort span record 169 // where on the LHS, RHS, or both. 170 abortSpanSkipCount++ 171 return nil 172 } 173 174 abortSpanCopyCount++ 175 var txnID uuid.UUID 176 txnID, err := keys.DecodeAbortSpanKey(k, scratch[:0]) 177 if err != nil { 178 return err 179 } 180 return storage.MVCCPutProto(ctx, w, ms, 181 keys.AbortSpanKey(newRangeID, txnID), 182 hlc.Timestamp{}, nil, &entry, 183 ) 184 }); err != nil { 185 return roachpb.NewReplicaCorruptionError(errors.Wrap(err, "AbortSpan.CopyTo")) 186 } 187 log.Eventf(ctx, "abort span: copied %d entries, skipped %d", abortSpanCopyCount, abortSpanSkipCount) 188 return nil 189 }