github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/gc/gc_old_test.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package gc 12 13 import ( 14 "context" 15 "sort" 16 "testing" 17 18 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 19 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/storage" 23 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 24 "github.com/cockroachdb/cockroach/pkg/util/hlc" 25 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 26 "github.com/cockroachdb/cockroach/pkg/util/log" 27 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 28 "github.com/cockroachdb/cockroach/pkg/util/uuid" 29 "github.com/cockroachdb/errors" 30 ) 31 32 // runGCOld is an older implementation of Run. It is used for benchmarking and 33 // testing. 34 // 35 // runGCOld runs garbage collection for the specified descriptor on the 36 // provided Engine (which is not mutated). It uses the provided gcFn 37 // to run garbage collection once on all implicated spans, 38 // cleanupIntentsFn to resolve intents synchronously, and 39 // cleanupTxnIntentsAsyncFn to asynchronously cleanup intents and 40 // associated transaction record on success. 41 func runGCOld( 42 ctx context.Context, 43 desc *roachpb.RangeDescriptor, 44 snap storage.Reader, 45 now hlc.Timestamp, 46 _ hlc.Timestamp, // exists to make signature match RunGC 47 policy zonepb.GCPolicy, 48 gcer GCer, 49 cleanupIntentsFn CleanupIntentsFunc, 50 cleanupTxnIntentsAsyncFn CleanupTxnIntentsAsyncFunc, 51 ) (Info, error) { 52 53 iter := rditer.NewReplicaDataIterator(desc, snap, 54 true /* replicatedOnly */, false /* seekEnd */) 55 defer iter.Close() 56 57 // Compute intent expiration (intent age at which we attempt to resolve). 58 intentExp := now.Add(-IntentAgeThreshold.Nanoseconds(), 0) 59 txnExp := now.Add(-kvserverbase.TxnCleanupThreshold.Nanoseconds(), 0) 60 61 gc := MakeGarbageCollector(now, policy) 62 63 if err := gcer.SetGCThreshold(ctx, Threshold{ 64 Key: gc.Threshold, 65 Txn: txnExp, 66 }); err != nil { 67 return Info{}, errors.Wrap(err, "failed to set GC thresholds") 68 } 69 70 var batchGCKeys []roachpb.GCRequest_GCKey 71 var batchGCKeysBytes int64 72 var expBaseKey roachpb.Key 73 var keys []storage.MVCCKey 74 var vals [][]byte 75 var keyBytes int64 76 var valBytes int64 77 info := Info{ 78 Policy: policy, 79 Now: now, 80 Threshold: gc.Threshold, 81 } 82 83 // Maps from txn ID to txn and intent key slice. 84 txnMap := map[uuid.UUID]*roachpb.Transaction{} 85 intentKeyMap := map[uuid.UUID][]roachpb.Key{} 86 87 // processKeysAndValues is invoked with each key and its set of 88 // values. Intents older than the intent age threshold are sent for 89 // resolution and values after the MVCC metadata, and possible 90 // intent, are sent for garbage collection. 91 processKeysAndValues := func() { 92 // If there's more than a single value for the key, possibly send for GC. 93 if len(keys) > 1 { 94 meta := &enginepb.MVCCMetadata{} 95 if err := protoutil.Unmarshal(vals[0], meta); err != nil { 96 log.Errorf(ctx, "unable to unmarshal MVCC metadata for key %q: %+v", keys[0], err) 97 } else { 98 // In the event that there's an active intent, send for 99 // intent resolution if older than the threshold. 100 startIdx := 1 101 if meta.Txn != nil { 102 // Keep track of intent to resolve if older than the intent 103 // expiration threshold. 104 if hlc.Timestamp(meta.Timestamp).Less(intentExp) { 105 txnID := meta.Txn.ID 106 if _, ok := txnMap[txnID]; !ok { 107 txnMap[txnID] = &roachpb.Transaction{ 108 TxnMeta: *meta.Txn, 109 } 110 // IntentTxns and PushTxn will be equal here, since 111 // pushes to transactions whose record lies in this 112 // range (but which are not associated to a remaining 113 // intent on it) happen asynchronously and are accounted 114 // for separately. Thus higher up in the stack, we 115 // expect PushTxn > IntentTxns. 116 info.IntentTxns++ 117 // All transactions in txnMap may be PENDING and 118 // cleanupIntentsFn will push them to finalize them. 119 info.PushTxn++ 120 } 121 info.IntentsConsidered++ 122 intentKeyMap[txnID] = append(intentKeyMap[txnID], expBaseKey) 123 } 124 // With an active intent, GC ignores MVCC metadata & intent value. 125 startIdx = 2 126 } 127 // See if any values may be GC'd. 128 if idx, gcTS := gc.Filter(keys[startIdx:], vals[startIdx:]); gcTS != (hlc.Timestamp{}) { 129 // Batch keys after the total size of version keys exceeds 130 // the threshold limit. This avoids sending potentially large 131 // GC requests through Raft. Iterate through the keys in reverse 132 // order so that GC requests can be made multiple times even on 133 // a single key, with successively newer timestamps to prevent 134 // any single request from exploding during GC evaluation. 135 for i := len(keys) - 1; i >= startIdx+idx; i-- { 136 keyBytes = int64(keys[i].EncodedSize()) 137 valBytes = int64(len(vals[i])) 138 139 // Add the total size of the GC'able versions of the keys and values to Info. 140 info.AffectedVersionsKeyBytes += keyBytes 141 info.AffectedVersionsValBytes += valBytes 142 143 batchGCKeysBytes += keyBytes 144 // If the current key brings the batch over the target 145 // size, add the current timestamp to finish the current 146 // chunk and start a new one. 147 if batchGCKeysBytes >= KeyVersionChunkBytes { 148 batchGCKeys = append(batchGCKeys, roachpb.GCRequest_GCKey{Key: expBaseKey, Timestamp: keys[i].Timestamp}) 149 150 err := gcer.GC(ctx, batchGCKeys) 151 152 // Succeed or fail, allow releasing the memory backing batchGCKeys. 153 iter.ResetAllocator() 154 batchGCKeys = nil 155 batchGCKeysBytes = 0 156 157 if err != nil { 158 // Even though we are batching the GC process, it's 159 // safe to continue because we bumped the GC 160 // thresholds. We may leave some inconsistent history 161 // behind, but nobody can read it. 162 log.Warningf(ctx, "%v", err) 163 return 164 } 165 } 166 } 167 // Add the key to the batch at the GC timestamp, unless it was already added. 168 if batchGCKeysBytes != 0 { 169 batchGCKeys = append(batchGCKeys, roachpb.GCRequest_GCKey{Key: expBaseKey, Timestamp: gcTS}) 170 } 171 info.NumKeysAffected++ 172 } 173 } 174 } 175 } 176 177 // Iterate through the keys and values of this replica's range. 178 log.Event(ctx, "iterating through range") 179 for ; ; iter.Next() { 180 if ok, err := iter.Valid(); err != nil { 181 return Info{}, err 182 } else if !ok { 183 break 184 } else if ctx.Err() != nil { 185 // Stop iterating if our context has expired. 186 return Info{}, err 187 } 188 iterKey := iter.Key() 189 if !iterKey.IsValue() || !iterKey.Key.Equal(expBaseKey) { 190 // Moving to the next key (& values). 191 processKeysAndValues() 192 expBaseKey = iterKey.Key 193 if !iterKey.IsValue() { 194 keys = []storage.MVCCKey{iter.Key()} 195 vals = [][]byte{iter.Value()} 196 continue 197 } 198 // An implicit metadata. 199 keys = []storage.MVCCKey{storage.MakeMVCCMetadataKey(iterKey.Key)} 200 // A nil value for the encoded MVCCMetadata. This will unmarshal to an 201 // empty MVCCMetadata which is sufficient for processKeysAndValues to 202 // determine that there is no intent. 203 vals = [][]byte{nil} 204 } 205 keys = append(keys, iter.Key()) 206 vals = append(vals, iter.Value()) 207 } 208 // Handle last collected set of keys/vals. 209 processKeysAndValues() 210 if len(batchGCKeys) > 0 { 211 if err := gcer.GC(ctx, batchGCKeys); err != nil { 212 return Info{}, err 213 } 214 } 215 216 // From now on, all keys processed are range-local. 217 218 // Process local range key entries (txn records, queue last processed times). 219 if err := processLocalKeyRange(ctx, snap, desc, txnExp, &info, cleanupTxnIntentsAsyncFn, gcer); err != nil { 220 log.Warningf(ctx, "while gc'ing local key range: %s", err) 221 } 222 223 // Clean up the AbortSpan. 224 log.Event(ctx, "processing AbortSpan") 225 processAbortSpan(ctx, snap, desc.RangeID, txnExp, &info, gcer) 226 227 log.Eventf(ctx, "GC'ed keys; stats %+v", info) 228 229 // Push transactions (if pending) and resolve intents. 230 var intents []roachpb.Intent 231 for txnID, txn := range txnMap { 232 intents = append(intents, roachpb.AsIntents(&txn.TxnMeta, intentKeyMap[txnID])...) 233 } 234 info.ResolveTotal += len(intents) 235 log.Eventf(ctx, "cleanup of %d intents", len(intents)) 236 if err := cleanupIntentsFn(ctx, intents); err != nil { 237 return Info{}, err 238 } 239 240 return info, nil 241 } 242 243 // GarbageCollector GCs MVCC key/values using a zone-specific GC 244 // policy allows either the union or intersection of maximum # of 245 // versions and maximum age. 246 type GarbageCollector struct { 247 Threshold hlc.Timestamp 248 policy zonepb.GCPolicy 249 } 250 251 // MakeGarbageCollector allocates and returns a new GC, with expiration 252 // computed based on current time and policy.TTLSeconds. 253 func MakeGarbageCollector(now hlc.Timestamp, policy zonepb.GCPolicy) GarbageCollector { 254 return GarbageCollector{ 255 Threshold: CalculateThreshold(now, policy), 256 policy: policy, 257 } 258 } 259 260 // Filter makes decisions about garbage collection based on the 261 // garbage collection policy for batches of values for the same 262 // key. Returns the index of the first key to be GC'd and the 263 // timestamp including, and after which, all values should be garbage 264 // collected. If no values should be GC'd, returns -1 for the index 265 // and the zero timestamp. Keys must be in descending time 266 // order. Values deleted at or before the returned timestamp can be 267 // deleted without invalidating any reads in the time interval 268 // (gc.expiration, \infinity). 269 // 270 // The GC keeps all values (including deletes) above the expiration time, plus 271 // the first value before or at the expiration time. This allows reads to be 272 // guaranteed as described above. However if this were the only rule, then if 273 // the most recent write was a delete, it would never be removed. Thus, when a 274 // deleted value is the most recent before expiration, it can be deleted. This 275 // would still allow for the tombstone bugs in #6227, so in the future we will 276 // add checks that disallow writes before the last GC expiration time. 277 func (gc GarbageCollector) Filter(keys []storage.MVCCKey, values [][]byte) (int, hlc.Timestamp) { 278 if gc.policy.TTLSeconds <= 0 { 279 return -1, hlc.Timestamp{} 280 } 281 if len(keys) == 0 { 282 return -1, hlc.Timestamp{} 283 } 284 285 // find the first expired key index using binary search 286 i := sort.Search(len(keys), func(i int) bool { return keys[i].Timestamp.LessEq(gc.Threshold) }) 287 288 if i == len(keys) { 289 return -1, hlc.Timestamp{} 290 } 291 292 // Now keys[i].Timestamp is <= gc.expiration, but the key-value pair is still 293 // "visible" at timestamp gc.expiration (and up to the next version). 294 if deleted := len(values[i]) == 0; deleted { 295 // We don't have to keep a delete visible (since GCing it does not change 296 // the outcome of the read). Note however that we can't touch deletes at 297 // higher timestamps immediately preceding this one, since they're above 298 // gc.expiration and are needed for correctness; see #6227. 299 return i, keys[i].Timestamp 300 } else if i+1 < len(keys) { 301 // Otherwise mark the previous timestamp for deletion (since it won't ever 302 // be returned for reads at gc.expiration and up). 303 return i + 1, keys[i+1].Timestamp 304 } 305 306 return -1, hlc.Timestamp{} 307 } 308 309 func mvccVersionKey(key roachpb.Key, ts hlc.Timestamp) storage.MVCCKey { 310 return storage.MVCCKey{Key: key, Timestamp: ts} 311 } 312 313 var ( 314 aKey = roachpb.Key("a") 315 bKey = roachpb.Key("b") 316 aKeys = []storage.MVCCKey{ 317 mvccVersionKey(aKey, hlc.Timestamp{WallTime: 2e9, Logical: 0}), 318 mvccVersionKey(aKey, hlc.Timestamp{WallTime: 1e9, Logical: 1}), 319 mvccVersionKey(aKey, hlc.Timestamp{WallTime: 1e9, Logical: 0}), 320 } 321 bKeys = []storage.MVCCKey{ 322 mvccVersionKey(bKey, hlc.Timestamp{WallTime: 2e9, Logical: 0}), 323 mvccVersionKey(bKey, hlc.Timestamp{WallTime: 1e9, Logical: 0}), 324 } 325 ) 326 327 // TestGarbageCollectorFilter verifies the filter policies for 328 // different sorts of MVCC keys. 329 func TestGarbageCollectorFilter(t *testing.T) { 330 defer leaktest.AfterTest(t)() 331 gcA := MakeGarbageCollector(hlc.Timestamp{WallTime: 0, Logical: 0}, zonepb.GCPolicy{TTLSeconds: 1}) 332 gcB := MakeGarbageCollector(hlc.Timestamp{WallTime: 0, Logical: 0}, zonepb.GCPolicy{TTLSeconds: 2}) 333 n := []byte("data") 334 d := []byte(nil) 335 testData := []struct { 336 gc GarbageCollector 337 time hlc.Timestamp 338 keys []storage.MVCCKey 339 values [][]byte 340 expIdx int 341 expDelTS hlc.Timestamp 342 }{ 343 {gcA, hlc.Timestamp{WallTime: 0, Logical: 0}, aKeys, [][]byte{n, n, n}, -1, hlc.Timestamp{}}, 344 {gcA, hlc.Timestamp{WallTime: 0, Logical: 0}, aKeys, [][]byte{d, d, d}, -1, hlc.Timestamp{}}, 345 {gcB, hlc.Timestamp{WallTime: 0, Logical: 0}, bKeys, [][]byte{n, n}, -1, hlc.Timestamp{}}, 346 {gcB, hlc.Timestamp{WallTime: 0, Logical: 0}, bKeys, [][]byte{d, d}, -1, hlc.Timestamp{}}, 347 {gcA, hlc.Timestamp{WallTime: 1e9, Logical: 0}, aKeys, [][]byte{n, n, n}, -1, hlc.Timestamp{}}, 348 {gcB, hlc.Timestamp{WallTime: 1e9, Logical: 0}, bKeys, [][]byte{n, n}, -1, hlc.Timestamp{}}, 349 {gcA, hlc.Timestamp{WallTime: 2e9, Logical: 0}, aKeys, [][]byte{n, n, n}, -1, hlc.Timestamp{}}, 350 {gcA, hlc.Timestamp{WallTime: 2e9, Logical: 0}, aKeys, [][]byte{d, d, d}, 2, hlc.Timestamp{WallTime: 1e9, Logical: 0}}, 351 {gcB, hlc.Timestamp{WallTime: 2e9, Logical: 0}, bKeys, [][]byte{n, n}, -1, hlc.Timestamp{}}, 352 {gcA, hlc.Timestamp{WallTime: 3e9, Logical: 0}, aKeys, [][]byte{n, n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 1}}, 353 {gcA, hlc.Timestamp{WallTime: 3e9, Logical: 0}, aKeys, [][]byte{d, n, n}, 0, hlc.Timestamp{WallTime: 2e9, Logical: 0}}, 354 {gcB, hlc.Timestamp{WallTime: 3e9, Logical: 0}, bKeys, [][]byte{n, n}, -1, hlc.Timestamp{}}, 355 {gcA, hlc.Timestamp{WallTime: 4e9, Logical: 0}, aKeys, [][]byte{n, n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 1}}, 356 {gcB, hlc.Timestamp{WallTime: 4e9, Logical: 0}, bKeys, [][]byte{n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 0}}, 357 {gcB, hlc.Timestamp{WallTime: 4e9, Logical: 0}, bKeys, [][]byte{d, n}, 0, hlc.Timestamp{WallTime: 2e9, Logical: 0}}, 358 {gcA, hlc.Timestamp{WallTime: 5e9, Logical: 0}, aKeys, [][]byte{n, n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 1}}, 359 {gcB, hlc.Timestamp{WallTime: 5e9, Logical: 0}, bKeys, [][]byte{n, n}, 1, hlc.Timestamp{WallTime: 1e9, Logical: 0}}, 360 {gcB, hlc.Timestamp{WallTime: 5e9, Logical: 0}, bKeys, [][]byte{d, n}, 0, hlc.Timestamp{WallTime: 2e9, Logical: 0}}, 361 } 362 for i, test := range testData { 363 test.gc.Threshold = test.time 364 test.gc.Threshold.WallTime -= int64(test.gc.policy.TTLSeconds) * 1e9 365 idx, delTS := test.gc.Filter(test.keys, test.values) 366 if idx != test.expIdx { 367 t.Errorf("%d: expected index %d; got %d", i, test.expIdx, idx) 368 } 369 if delTS != test.expDelTS { 370 t.Errorf("%d: expected deletion timestamp %s; got %s", i, test.expDelTS, delTS) 371 } 372 } 373 }