github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_protected_timestamp_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "fmt" 16 "testing" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 20 "github.com/cockroachdb/cockroach/pkg/keys" 21 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts" 22 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb" 23 "github.com/cockroachdb/cockroach/pkg/roachpb" 24 "github.com/cockroachdb/cockroach/pkg/util/hlc" 25 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 26 "github.com/cockroachdb/cockroach/pkg/util/stop" 27 "github.com/cockroachdb/cockroach/pkg/util/uuid" 28 "github.com/cockroachdb/errors" 29 "github.com/stretchr/testify/require" 30 ) 31 32 // TestProtectedTimestampRecordApplies exercises 33 // Replica.protectedTimestampWillApply() at a low level. 34 // It does so by passing a Replica connected to an already 35 // shut down store to a variety of test cases. 36 func TestProtectedTimestampRecordApplies(t *testing.T) { 37 defer leaktest.AfterTest(t)() 38 ctx := context.Background() 39 40 makeArgs := func(r *Replica, ts, aliveAt hlc.Timestamp) roachpb.AdminVerifyProtectedTimestampRequest { 41 args := roachpb.AdminVerifyProtectedTimestampRequest{ 42 RecordID: uuid.MakeV4(), 43 Protected: ts, 44 RecordAliveAt: aliveAt, 45 } 46 args.Key, args.EndKey = r.Desc().StartKey.AsRawKey(), r.Desc().EndKey.AsRawKey() 47 return args 48 } 49 for _, testCase := range []struct { 50 name string 51 // Note that the store underneath the passed in Replica has been stopped. 52 // This leaves the test to mutate the Replica state as it sees fit. 53 test func(t *testing.T, r *Replica, mt *manualCache) 54 }{ 55 56 // Test that if the lease started after the timestamp at which the record 57 // was known to be live then we know that the Replica cannot GC until it 58 // reads protected timestamp state after the lease start time. If the 59 // relevant record is not found then it must have been removed. 60 { 61 name: "lease started after", 62 test: func(t *testing.T, r *Replica, mt *manualCache) { 63 r.mu.state.Lease.Start = r.store.Clock().Now() 64 l, _ := r.GetLease() 65 aliveAt := l.Start.Prev() 66 ts := aliveAt.Prev() 67 args := makeArgs(r, ts, aliveAt) 68 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 69 require.True(t, willApply) 70 require.NoError(t, err) 71 }, 72 }, 73 // If the GC threshold is already newer than the timestamp we want to 74 // protect then we failed. 75 { 76 name: "gc threshold is after ts", 77 test: func(t *testing.T, r *Replica, mt *manualCache) { 78 thresh := r.store.Clock().Now() 79 r.mu.state.GCThreshold = &thresh 80 ts := thresh.Prev().Prev() 81 aliveAt := ts.Next() 82 args := makeArgs(r, ts, aliveAt) 83 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 84 require.False(t, willApply) 85 require.NoError(t, err) 86 }, 87 }, 88 // If the GC threshold we're about to protect is newer than the timestamp 89 // we want to protect then we're almost certain to fail. Treat it as a 90 // failure. 91 { 92 name: "pending GC threshold is newer than the timestamp we want to protect", 93 test: func(t *testing.T, r *Replica, mt *manualCache) { 94 thresh := r.store.Clock().Now() 95 require.NoError(t, r.markPendingGC(hlc.Timestamp{}, thresh)) 96 ts := thresh.Prev().Prev() 97 aliveAt := ts.Next() 98 args := makeArgs(r, ts, aliveAt) 99 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 100 require.False(t, willApply) 101 require.NoError(t, err) 102 }, 103 }, 104 // If the timestamp at which the record is known to be alive is newer than 105 // our current view of the protected timestamp subsystem and we don't 106 // already see the record, then we will refresh. In this case we refresh 107 // and find it. We also verify that we cannot set the pending gc threshold 108 // to above the timestamp we put in the record. 109 { 110 name: "newer aliveAt triggers refresh leading to success", 111 test: func(t *testing.T, r *Replica, mt *manualCache) { 112 ts := r.store.Clock().Now() 113 aliveAt := ts.Next() 114 mt.asOf = ts.Prev() 115 args := makeArgs(r, ts, aliveAt) 116 mt.refresh = func(_ context.Context, refreshTo hlc.Timestamp) error { 117 require.Equal(t, refreshTo, aliveAt) 118 mt.records = append(mt.records, &ptpb.Record{ 119 ID: args.RecordID, 120 Timestamp: ts, 121 Spans: []roachpb.Span{ 122 { 123 Key: roachpb.Key(r.startKey()), 124 EndKey: roachpb.Key(r.startKey().Next()), 125 }, 126 }, 127 }) 128 mt.asOf = refreshTo.Next() 129 return nil 130 } 131 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 132 require.True(t, willApply) 133 require.NoError(t, err) 134 require.Equal(t, 135 fmt.Sprintf("cannot set gc threshold to %v because read at %v < min %v", 136 ts.Next(), ts, aliveAt.Next()), 137 r.markPendingGC(ts, ts.Next()).Error()) 138 }, 139 }, 140 // If the timestamp at which the record is known to be alive is older than 141 // our current view of the protected timestamp subsystem and we don't 142 // already see the record, then we know that the record must have been 143 // deleted already. Ensure we fail. 144 { 145 name: "record does not exist", 146 test: func(t *testing.T, r *Replica, mt *manualCache) { 147 ts := r.store.Clock().Now() 148 aliveAt := ts.Next() 149 mt.asOf = aliveAt.Next() 150 args := makeArgs(r, ts, aliveAt) 151 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 152 require.False(t, willApply) 153 require.NoError(t, err) 154 }, 155 }, 156 // If we see the record then we know we're good. 157 { 158 name: "record already exists", 159 test: func(t *testing.T, r *Replica, mt *manualCache) { 160 ts := r.store.Clock().Now() 161 aliveAt := ts.Next() 162 args := makeArgs(r, ts, aliveAt) 163 mt.asOf = aliveAt.Next() 164 mt.records = append(mt.records, &ptpb.Record{ 165 ID: args.RecordID, 166 Timestamp: ts, 167 Spans: []roachpb.Span{ 168 { 169 Key: keys.MinKey, 170 EndKey: keys.MaxKey, 171 }, 172 }, 173 }) 174 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 175 require.True(t, willApply) 176 require.NoError(t, err) 177 }, 178 }, 179 // Ensure that a failure to Refresh propagates. 180 { 181 name: "refresh fails", 182 test: func(t *testing.T, r *Replica, mt *manualCache) { 183 ts := r.store.Clock().Now() 184 aliveAt := ts.Next() 185 mt.asOf = ts.Prev() 186 mt.refresh = func(_ context.Context, refreshTo hlc.Timestamp) error { 187 return errors.New("boom") 188 } 189 args := makeArgs(r, ts, aliveAt) 190 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 191 require.False(t, willApply) 192 require.EqualError(t, err, "boom") 193 }, 194 }, 195 // Ensure NLE propagates. 196 { 197 name: "not leaseholder before refresh", 198 test: func(t *testing.T, r *Replica, mt *manualCache) { 199 r.mu.Lock() 200 lease := *r.mu.state.Lease 201 lease.Sequence++ 202 lease.Replica = roachpb.ReplicaDescriptor{ 203 ReplicaID: 2, 204 StoreID: 2, 205 NodeID: 2, 206 } 207 r.mu.state.Lease = &lease 208 r.mu.Unlock() 209 ts := r.store.Clock().Now() 210 aliveAt := ts.Prev().Prev() 211 mt.asOf = ts.Prev() 212 args := makeArgs(r, ts, aliveAt) 213 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 214 require.False(t, willApply) 215 require.Regexp(t, "NotLeaseHolderError", err.Error()) 216 }, 217 }, 218 // Ensure NLE after performing a refresh propagates. 219 { 220 name: "not leaseholder after refresh", 221 test: func(t *testing.T, r *Replica, mt *manualCache) { 222 ts := r.store.Clock().Now() 223 aliveAt := ts.Next() 224 mt.asOf = ts.Prev() 225 mt.refresh = func(ctx context.Context, refreshTo hlc.Timestamp) error { 226 r.mu.Lock() 227 defer r.mu.Unlock() 228 lease := *r.mu.state.Lease 229 lease.Sequence++ 230 lease.Replica = roachpb.ReplicaDescriptor{ 231 ReplicaID: 2, 232 StoreID: 2, 233 NodeID: 2, 234 } 235 r.mu.state.Lease = &lease 236 return nil 237 } 238 args := makeArgs(r, ts, aliveAt) 239 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 240 require.False(t, willApply) 241 require.Regexp(t, "NotLeaseHolderError", err.Error()) 242 }, 243 }, 244 // If refresh succeeds but the timestamp of the cache does not advance as 245 // anticipated, ensure that an assertion failure error is returned. 246 { 247 name: "successful refresh does not update timestamp (assertion failure)", 248 test: func(t *testing.T, r *Replica, mt *manualCache) { 249 ts := r.store.Clock().Now() 250 aliveAt := ts.Next() 251 mt.asOf = ts.Prev() 252 mt.refresh = func(_ context.Context, refreshTo hlc.Timestamp) error { 253 return nil 254 } 255 args := makeArgs(r, ts, aliveAt) 256 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 257 require.False(t, willApply) 258 require.EqualError(t, err, "cache was not updated after being refreshed") 259 require.True(t, errors.IsAssertionFailure(err), "%v", err) 260 }, 261 }, 262 // If a request header is for a key span which is not owned by this replica, 263 // ensure that a roachpb.RangeKeyMismatchError is returned. 264 { 265 name: "request span is respected", 266 test: func(t *testing.T, r *Replica, mt *manualCache) { 267 ts := r.store.Clock().Now() 268 aliveAt := ts.Next() 269 mt.asOf = ts.Prev() 270 args := makeArgs(r, ts, aliveAt) 271 r.mu.state.Desc.StartKey = roachpb.RKey(keys.TableDataMax) 272 willApply, err := r.protectedTimestampRecordApplies(ctx, &args) 273 require.False(t, willApply) 274 require.EqualError(t, err, "key range /Min-/Max outside of bounds of range /Table/Max-/Max") 275 }, 276 }, 277 } { 278 t.Run(testCase.name, func(t *testing.T) { 279 tc := testContext{} 280 tsc := TestStoreConfig(nil) 281 mc := &manualCache{} 282 tsc.ProtectedTimestampCache = mc 283 // Under extreme stressrace scenarios the single replica can somehow 284 // lose the lease. Make the timeout extremely long. 285 tsc.RaftConfig.RangeLeaseRaftElectionTimeoutMultiplier = 100 286 stopper := stop.NewStopper() 287 tc.StartWithStoreConfig(t, stopper, tsc) 288 stopper.Stop(ctx) 289 testCase.test(t, tc.repl, mc) 290 }) 291 } 292 } 293 294 // TestCheckProtectedTimestampsForGC exercises 295 // Replica.checkProtectedTimestampsForGC() at a low level. 296 // It does so by passing a Replica connected to an already 297 // shut down store to a variety of test cases. 298 func TestCheckProtectedTimestampsForGC(t *testing.T) { 299 defer leaktest.AfterTest(t)() 300 ctx := context.Background() 301 302 makePolicy := func(ttlSec int32) zonepb.GCPolicy { 303 return zonepb.GCPolicy{TTLSeconds: ttlSec} 304 } 305 for _, testCase := range []struct { 306 name string 307 // Note that the store underneath the passed in Replica has been stopped. 308 // This leaves the test to mutate the Replica state as it sees fit. 309 test func(t *testing.T, r *Replica, mt *manualCache) 310 }{ 311 { 312 name: "lease is too new", 313 test: func(t *testing.T, r *Replica, mt *manualCache) { 314 r.mu.state.Lease.Start = r.store.Clock().Now() 315 canGC, _, gcTimestamp, _ := r.checkProtectedTimestampsForGC(ctx, makePolicy(10)) 316 require.False(t, canGC) 317 require.Zero(t, gcTimestamp) 318 }, 319 }, 320 { 321 name: "have overlapping but new enough that it's okay", 322 test: func(t *testing.T, r *Replica, mt *manualCache) { 323 ts := r.store.Clock().Now() 324 mt.asOf = r.store.Clock().Now().Next() 325 mt.records = append(mt.records, &ptpb.Record{ 326 ID: uuid.MakeV4(), 327 Timestamp: ts, 328 Spans: []roachpb.Span{ 329 { 330 Key: keys.MinKey, 331 EndKey: keys.MaxKey, 332 }, 333 }, 334 }) 335 // We should allow gc to proceed with the normal new threshold if that 336 // threshold is earlier than all of the records. 337 canGC, _, gcTimestamp, _ := r.checkProtectedTimestampsForGC(ctx, makePolicy(10)) 338 require.True(t, canGC) 339 require.Equal(t, mt.asOf, gcTimestamp) 340 }, 341 }, 342 { 343 // In this case we have a record which protects some data but we can 344 // set the threshold to a later point. 345 name: "have overlapping but can still GC some", 346 test: func(t *testing.T, r *Replica, mt *manualCache) { 347 ts := r.store.Clock().Now().Add(-11*time.Second.Nanoseconds(), 0) 348 mt.asOf = r.store.Clock().Now().Next() 349 mt.records = append(mt.records, &ptpb.Record{ 350 ID: uuid.MakeV4(), 351 Timestamp: ts, 352 Spans: []roachpb.Span{ 353 { 354 Key: keys.MinKey, 355 EndKey: keys.MaxKey, 356 }, 357 }, 358 }) 359 // We should allow gc to proceed up to the timestamp which precedes the 360 // protected timestamp. This means we expect a GC timestamp 10 seconds 361 // after ts.Prev() given the policy. 362 canGC, _, gcTimestamp, _ := r.checkProtectedTimestampsForGC(ctx, makePolicy(10)) 363 require.True(t, canGC) 364 require.Equal(t, ts.Prev().Add(10*time.Second.Nanoseconds(), 0), gcTimestamp) 365 }, 366 }, 367 { 368 // In this case we have a record which is right up against the GC 369 // threshold. 370 name: "have overlapping but have already GC'd right up to the threshold", 371 test: func(t *testing.T, r *Replica, mt *manualCache) { 372 r.mu.Lock() 373 th := *r.mu.state.GCThreshold 374 r.mu.Unlock() 375 mt.asOf = r.store.Clock().Now().Next() 376 mt.records = append(mt.records, &ptpb.Record{ 377 ID: uuid.MakeV4(), 378 Timestamp: th.Next(), 379 Spans: []roachpb.Span{ 380 { 381 Key: keys.MinKey, 382 EndKey: keys.MaxKey, 383 }, 384 }, 385 }) 386 // We should not allow GC if the threshold is already the predecessor 387 // of the earliest valid record. 388 canGC, _, gcTimestamp, _ := r.checkProtectedTimestampsForGC(ctx, makePolicy(10)) 389 require.False(t, canGC) 390 require.Zero(t, gcTimestamp) 391 }, 392 }, 393 { 394 name: "failed record does not prevent GC", 395 test: func(t *testing.T, r *Replica, mt *manualCache) { 396 ts := r.store.Clock().Now() 397 id := uuid.MakeV4() 398 thresh := ts.Next() 399 r.mu.state.GCThreshold = &thresh 400 mt.asOf = thresh.Next() 401 mt.records = append(mt.records, &ptpb.Record{ 402 ID: id, 403 Timestamp: ts, 404 Spans: []roachpb.Span{ 405 { 406 Key: keys.MinKey, 407 EndKey: keys.MaxKey, 408 }, 409 }, 410 }) 411 canGC, _, gcTimestamp, _ := r.checkProtectedTimestampsForGC(ctx, makePolicy(10)) 412 require.True(t, canGC) 413 require.Equal(t, mt.asOf, gcTimestamp) 414 }, 415 }, 416 } { 417 t.Run(testCase.name, func(t *testing.T) { 418 tc := testContext{} 419 tsc := TestStoreConfig(nil) 420 mc := &manualCache{} 421 tsc.ProtectedTimestampCache = mc 422 stopper := stop.NewStopper() 423 tc.StartWithStoreConfig(t, stopper, tsc) 424 stopper.Stop(ctx) 425 testCase.test(t, tc.repl, mc) 426 }) 427 } 428 } 429 430 type manualCache struct { 431 asOf hlc.Timestamp 432 records []*ptpb.Record 433 refresh func(ctx context.Context, asOf hlc.Timestamp) error 434 } 435 436 func (c *manualCache) Iterate( 437 ctx context.Context, start, end roachpb.Key, it protectedts.Iterator, 438 ) hlc.Timestamp { 439 query := roachpb.Span{Key: start, EndKey: end} 440 for _, r := range c.records { 441 for _, sp := range r.Spans { 442 if query.Overlaps(sp) { 443 it(r) 444 break 445 } 446 } 447 } 448 return c.asOf 449 } 450 451 func (c *manualCache) Refresh(ctx context.Context, asOf hlc.Timestamp) error { 452 if c.refresh == nil { 453 c.asOf = asOf 454 return nil 455 } 456 return c.refresh(ctx, asOf) 457 } 458 459 func (c *manualCache) QueryRecord( 460 ctx context.Context, id uuid.UUID, 461 ) (exists bool, asOf hlc.Timestamp) { 462 for _, r := range c.records { 463 if r.ID == id { 464 return true, c.asOf 465 } 466 } 467 return false, c.asOf 468 } 469 470 var _ protectedts.Cache = (*manualCache)(nil)