github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/closedts/minprop/tracker_test.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package minprop 12 13 import ( 14 "context" 15 "fmt" 16 "runtime" 17 "sync/atomic" 18 "testing" 19 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/ctpb" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 25 "github.com/cockroachdb/errors" 26 "github.com/kr/pretty" 27 "github.com/stretchr/testify/assert" 28 "golang.org/x/sync/errgroup" 29 ) 30 31 const ( 32 _ ctpb.Epoch = iota 33 ep1 34 ep2 35 ep3 36 ) 37 38 func TestTrackerClosure(t *testing.T) { 39 ctx := context.Background() 40 tracker := NewTracker() 41 _, done := tracker.Track(ctx) 42 43 done(ctx, ep1, 100, 200) 44 done(ctx, ep1, 0, 0) 45 } 46 47 func ExampleTracker_Close() { 48 ctx := context.Background() 49 tracker := NewTracker() 50 _, slow := tracker.Track(ctx) 51 _, _, _ = tracker.Close(hlc.Timestamp{WallTime: 1e9}, ep1) 52 _, fast := tracker.Track(ctx) 53 54 fmt.Println("Slow proposal finishes at LAI 2") 55 slow(ctx, ep1, 99, 2) 56 closed, m, ok := tracker.Close(hlc.Timestamp{WallTime: 2e9}, ep1) 57 fmt.Println("Closed:", closed, m, ok) 58 59 fmt.Println("Fast proposal finishes at LAI 1") 60 fast(ctx, ep1, 99, 1) 61 fmt.Println(tracker) 62 closed, m, ok = tracker.Close(hlc.Timestamp{WallTime: 3e9}, ep1) 63 fmt.Println("Closed:", closed, m, ok) 64 fmt.Println("Note how the MLAI has 'regressed' from 2 to 1. The consumer") 65 fmt.Println("needs to track the maximum over all deltas received.") 66 67 // Output: 68 // Slow proposal finishes at LAI 2 69 // Closed: 1.000000000,0 map[99:2] true 70 // Fast proposal finishes at LAI 1 71 // 72 // closed=1.000000000,0 73 // | next=2.000000000,0 74 // | left | right 75 // | 0 # 0 76 // | 1 e 1 77 // | 1 @ (r99) 78 // v v 79 // ---------------------------------------------------------> time 80 // 81 // Closed: 2.000000000,0 map[99:1] true 82 // Note how the MLAI has 'regressed' from 2 to 1. The consumer 83 // needs to track the maximum over all deltas received. 84 } 85 86 func TestTrackerDoubleRelease(t *testing.T) { 87 var exited bool 88 log.SetExitFunc(true /* hideStack */, func(int) { exited = true }) 89 defer log.ResetExitFunc() 90 91 ctx := context.Background() 92 tracker := NewTracker() 93 94 _, release := tracker.Track(ctx) 95 release(ctx, ep1, 0, 0) 96 release(ctx, ep1, 4, 10) 97 98 if !exited { 99 t.Fatal("expected fatal error") 100 } 101 } 102 103 func TestTrackerReleaseZero(t *testing.T) { 104 ctx := context.Background() 105 tracker := NewTracker() 106 trackedTs1, release1 := tracker.Track(ctx) 107 trackedTs2, release2 := tracker.Track(ctx) 108 release2(ctx, ep1, 2, 0) 109 leftTs, _, _ := tracker.Close(trackedTs2, ep1) 110 leftTs.Logical += 2 111 release1(ctx, ep1, 1, 0) 112 closedTs, mlais, ok := tracker.Close(leftTs, ep1) 113 if !ok { 114 t.Fatalf("expected closed to succeed") 115 } else if closedTs != trackedTs1 { 116 t.Fatalf("expected to have closed %v, got %v %v", trackedTs1, closedTs, mlais) 117 } else if mlai1, found := mlais[1]; !found { 118 t.Fatalf("expected to find mlai for range 1") 119 } else if mlai1 != 0 { 120 t.Fatalf("expected to find zero mlai for range 1, got %v", mlai1) 121 } else if mlai2, found := mlais[2]; !found { 122 t.Fatalf("expected to find mlai for range 2") 123 } else if mlai2 != 0 { 124 t.Fatalf("expected to find zero mlai for range 2, got %v", mlai2) 125 } 126 } 127 128 type modelClient struct { 129 lai map[roachpb.RangeID]*int64 // read-only map, values accessed atomically 130 mu struct { 131 syncutil.Mutex 132 closed []hlc.Timestamp // closed timestamps 133 released []map[roachpb.RangeID]ctpb.LAI // known released LAIs, rotated on Close 134 m map[roachpb.RangeID]ctpb.LAI // max over all maps returned from Close() 135 } 136 } 137 138 // Operate a Tracker concurrently and verify that closed timestamps don't regress 139 // and that the emitted MLAIs are not obviously inconsistent with commands we know 140 // finished. 141 func TestTrackerConcurrentUse(t *testing.T) { 142 ctx := context.Background() 143 tracker := NewTracker() 144 145 const ( 146 numCmds = 1000 // operations to carry out in total 147 closeEvery = 20 // turn every i'th operation into a Close 148 numRanges = 5 149 ) 150 151 var mc modelClient 152 mc.mu.m = map[roachpb.RangeID]ctpb.LAI{} 153 mc.mu.closed = make([]hlc.Timestamp, 1) 154 mc.mu.released = []map[roachpb.RangeID]ctpb.LAI{{}, {}, {}} 155 156 mc.lai = map[roachpb.RangeID]*int64{} 157 for i := roachpb.RangeID(1); i <= numRanges; i++ { 158 mc.lai[i] = new(int64) 159 } 160 161 get := func(i int) (roachpb.RangeID, ctpb.LAI) { 162 rangeID := roachpb.RangeID(1 + (i % numRanges)) 163 return rangeID, ctpb.LAI(atomic.AddInt64(mc.lai[rangeID], 1)) 164 } 165 166 // It becomes a lot more complicated to collect the released indexes 167 // correctly when multiple calls to Close are in-flight at any given time. 168 // The intended use case is for Close to be called from a single goroutine, 169 // so the test specializes to that situation. 170 // 171 // NB: The `mc.mu` sections are intentionally kept small to allow for more 172 // interleaving between tracked commands and close operations. 173 var closeMU syncutil.Mutex 174 close := func(newNext hlc.Timestamp) error { 175 closeMU.Lock() 176 defer closeMU.Unlock() 177 mc.mu.Lock() 178 // Note last closed timestamp. 179 prevClosed := mc.mu.closed[len(mc.mu.closed)-1] 180 181 mc.mu.Unlock() 182 183 t.Log("before closing:", tracker) 184 // Ignore epoch mismatches which may occur before any values have been 185 // released from the tracker. 186 closed, m, _ := tracker.Close(newNext, ep1) 187 if closed.Less(prevClosed) { 188 return errors.Errorf("closed timestamp regressed from %s to %s", prevClosed, closed) 189 } else if prevClosed == closed && len(m) != 0 { 190 return errors.Errorf("closed timestamp %s not incremented, but MLAIs %v emitted", prevClosed, m) 191 } 192 193 mc.mu.Lock() 194 defer mc.mu.Unlock() 195 196 if closed != prevClosed { 197 // The released bucket is rotated after each call to Close (we can't 198 // really do it before because we only want to rotate when a new 199 // closed timestamp was established). 200 // 201 // Taking into account the call to Close we just performed, the 202 // - current bucket contains: commands that could be on the left 203 // (expected) or the right: A command could start after our call to 204 // Close but make it into the pre-rotation bucket. 205 // - previous bucket contains commands that could be on the left 206 // or emitted 207 // - bucket before that contains commands that definitely must have 208 // been emitted. 209 // 210 // So we check the latter bucket. Trying to close the synchronization 211 // gap would allow checking the middle bucket instead, but this would 212 // weaken the test overall. 213 released := mc.mu.released[len(mc.mu.released)-3] 214 // Rotate released commands bucket. 215 mc.mu.released = append(mc.mu.released, map[roachpb.RangeID]ctpb.LAI{}) 216 217 for rangeID, mlai := range m { 218 // Intuitively you expect mc.mu.m[rangeID] < mlai, but this 219 // doesn't always hold. A slow proposal could get assigned a 220 // higher lease index on the left side than a "newer" 221 // proposal on the right. The client really has to track the 222 // maximum. 223 // 224 if mc.mu.m[rangeID] < mlai { 225 mc.mu.m[rangeID] = mlai 226 } 227 228 if trackedMLAI, rMLAI := mc.mu.m[rangeID], released[rangeID]; rMLAI > trackedMLAI { 229 return errors.Errorf( 230 "incorrect MLAI %d for r%d does not reflect %d:\nemitted: %+v\n%s\nreleased: %s\naggregate: %s", 231 trackedMLAI, rangeID, rMLAI, m, tracker, pretty.Sprint(mc.mu.released), pretty.Sprint(mc.mu.m), 232 ) 233 } 234 } 235 } 236 237 // Store latest closed timestamp. 238 mc.mu.closed = append(mc.mu.closed, closed) 239 return nil 240 } 241 242 newNext := func(i int) hlc.Timestamp { 243 return hlc.Timestamp{WallTime: int64(i) * 1e9} 244 } 245 246 run := func(i int) error { 247 if i%closeEvery == 1 { 248 return close(newNext(i)) 249 } 250 251 mc.mu.Lock() 252 prevClosed := mc.mu.closed[len(mc.mu.closed)-1] 253 mc.mu.Unlock() 254 255 ts, done := tracker.Track(ctx) 256 if ts.Less(prevClosed) { 257 return errors.Errorf("%d: proposal forwarded to %s, but closed %s", i, ts, prevClosed) 258 } 259 260 runtime.Gosched() 261 262 var rangeID roachpb.RangeID 263 var lai ctpb.LAI 264 switch i % 3 { 265 case 0: 266 // Successful evaluation. 267 rangeID, lai = get(i) 268 done(ctx, ep1, rangeID, lai) 269 case 1: 270 // Successful evaluation followed by deferred zero call. 271 rangeID, lai = get(i) 272 done(ctx, ep1, rangeID, lai) 273 done(ctx, ep1, 0, 0) 274 case 2: 275 // Failed evaluation. Burns a LAI. 276 done(ctx, ep1, 0, 0) 277 default: 278 panic("the impossible happened") 279 } 280 281 mc.mu.Lock() 282 if lai != 0 { 283 mc.mu.released[len(mc.mu.released)-1][rangeID] = lai 284 } 285 mc.mu.Unlock() 286 287 return nil 288 } 289 290 var g errgroup.Group 291 for i := 0; i < numCmds; i++ { 292 i := i 293 g.Go(func() error { 294 return run(i) 295 }) 296 } 297 298 if err := g.Wait(); err != nil { 299 t.Fatal(err) 300 } 301 302 // We'd like to at least assert something about the MLAIs below, namely that 303 // the final view of the client state is equivalent to the MLAIs that were 304 // actually used by the proposals. To get there, we need to close out twice: 305 // once to flush the right side to the left, and another time to force it 306 // to be output. 307 for i := 0; i < 2; i++ { 308 if err := close(newNext(numCmds + i)); err != nil { 309 t.Fatal(err) 310 } 311 } 312 313 t.Log(tracker) 314 315 for rangeID, addr := range mc.lai { 316 assignedMLAI := ctpb.LAI(atomic.LoadInt64(addr)) 317 mlai := mc.mu.m[rangeID] 318 319 if assignedMLAI > mlai { 320 t.Errorf("r%d: assigned %d, but only %d reflected in final MLAI map", rangeID, assignedMLAI, mlai) 321 } 322 } 323 } 324 325 // ExampleTracker_EpochChanges tests the interactions between epoch values 326 // passed to Close and epoch values of proposals being tracked. 327 func ExampleTracker_Close_epochChange() { 328 ts1 := hlc.Timestamp{WallTime: 1e9} 329 ts2 := hlc.Timestamp{WallTime: 2e9} 330 ts3 := hlc.Timestamp{WallTime: 3e9} 331 332 ctx := context.Background() 333 tracker := NewTracker() 334 fmt.Println("The newly initialized tracker has a zero closed timestamp:") 335 fmt.Println(tracker) 336 337 fmt.Println("A first command arrives on range 1 (though the range isn't known yet to the Tracker).") 338 ts, r1e1lai1 := tracker.Track(ctx) 339 fmt.Println("All commands initially start out on the right. The command has its timestamp forwarded to", ts, ".") 340 fmt.Println("The command finished quickly and is released in epoch 1.") 341 r1e1lai1(ctx, ep1, 1, 1) 342 fmt.Println(tracker) 343 344 fmt.Println("Another proposal arrives on range 2 but does not complete before the next call to Close().") 345 _, r2e2lai1 := tracker.Track(ctx) 346 fmt.Println(tracker) 347 348 fmt.Println("The system closes out a timestamp expecting liveness epoch 2 (registering", ts1, "as the next", 349 "timestamp to close out).") 350 closed, mlai, ok := tracker.Close(ts1, ep2) 351 fmt.Println("The Close() call fails due to the liveness epoch mismatch between", 352 "the expected current epoch and the tracked data, returning", closed, mlai, ok) 353 fmt.Println("The Close() call evicts the tracked range 1 LAI.") 354 fmt.Println(tracker) 355 356 fmt.Println("The proposal on range 2 is released in epoch 2.") 357 r2e2lai1(ctx, ep2, 2, 1) 358 fmt.Println(tracker) 359 360 fmt.Println("Another proposal arrives on range 1 and quickly finishes with", 361 "LAI 2 but is still in epoch 1 and is not tracked.") 362 _, r1e1lai2 := tracker.Track(ctx) 363 r1e1lai2(ctx, ep1, 2, 2) 364 fmt.Println("Meanwhile a proposal arrives on range 2 and quickly finishes with", 365 "LAI 2 in epoch 2.") 366 _, r2e2lai2 := tracker.Track(ctx) 367 r2e2lai2(ctx, ep2, 2, 2) 368 fmt.Println(tracker) 369 370 fmt.Println("A new proposal arrives on range 1 and quickly finishes with LAI 2 in epoch 3.") 371 fmt.Println("This new epoch evicts the data on the right side corresponding to epoch 2.") 372 _, r1e3lai2 := tracker.Track(ctx) 373 r1e3lai2(ctx, ep3, 1, 2) 374 fmt.Println(tracker) 375 376 closed, mlai, ok = tracker.Close(ts2, ep2) 377 fmt.Println("The next call to Close() occurs in epoch 2 and successfully returns:", closed, mlai, ok) 378 closed, mlai, ok = tracker.Close(ts3, ep2) 379 fmt.Println("Subsequent calls to Close() at later times but still in epoch 2 do not move the tracker state.") 380 fmt.Println("They return the previous closed timestamp with an empty mlai map:", closed, mlai, ok, ".") 381 fmt.Println("Data corresponding to epoch 3 is retained.") 382 fmt.Println(tracker) 383 closed, mlai, ok = tracker.Close(ts3, ep3) 384 fmt.Println("The next call to Close() occurs in epoch 3 and successfully returns:", closed, mlai, ok, ".") 385 386 // Output: 387 // The newly initialized tracker has a zero closed timestamp: 388 // 389 // closed=0,0 390 // | next=0,1 391 // | left | right 392 // | 0 # 0 393 // | 1 e 1 394 // v v 395 // ---------------------------------------------------------> time 396 // 397 // A first command arrives on range 1 (though the range isn't known yet to the Tracker). 398 // All commands initially start out on the right. The command has its timestamp forwarded to 0,2 . 399 // The command finished quickly and is released in epoch 1. 400 // 401 // closed=0,0 402 // | next=0,1 403 // | left | right 404 // | 0 # 0 405 // | 1 e 1 406 // | @ 1 (r1) 407 // v v 408 // ---------------------------------------------------------> time 409 // 410 // Another proposal arrives on range 2 but does not complete before the next call to Close(). 411 // 412 // closed=0,0 413 // | next=0,1 414 // | left | right 415 // | 0 # 1 416 // | 1 e 1 417 // | @ 1 (r1) 418 // v v 419 // ---------------------------------------------------------> time 420 // 421 // The system closes out a timestamp expecting liveness epoch 2 (registering 1.000000000,0 as the next timestamp to close out). 422 // The Close() call fails due to the liveness epoch mismatch between the expected current epoch and the tracked data, returning 0,0 map[] false 423 // The Close() call evicts the tracked range 1 LAI. 424 // 425 // closed=0,1 426 // | next=1.000000000,0 427 // | left | right 428 // | 1 # 0 429 // | 2 e 2 430 // v v 431 // ---------------------------------------------------------> time 432 // 433 // The proposal on range 2 is released in epoch 2. 434 // 435 // closed=0,1 436 // | next=1.000000000,0 437 // | left | right 438 // | 0 # 0 439 // | 2 e 2 440 // | 1 @ (r2) 441 // v v 442 // ---------------------------------------------------------> time 443 // 444 // Another proposal arrives on range 1 and quickly finishes with LAI 2 but is still in epoch 1 and is not tracked. 445 // Meanwhile a proposal arrives on range 2 and quickly finishes with LAI 2 in epoch 2. 446 // 447 // closed=0,1 448 // | next=1.000000000,0 449 // | left | right 450 // | 0 # 0 451 // | 2 e 2 452 // | 1 @ (r2) 453 // | @ 2 (r2) 454 // v v 455 // ---------------------------------------------------------> time 456 // 457 // A new proposal arrives on range 1 and quickly finishes with LAI 2 in epoch 3. 458 // This new epoch evicts the data on the right side corresponding to epoch 2. 459 // 460 // closed=0,1 461 // | next=1.000000000,0 462 // | left | right 463 // | 0 # 0 464 // | 2 e 3 465 // | @ 2 (r1) 466 // | 1 @ (r2) 467 // v v 468 // ---------------------------------------------------------> time 469 // 470 // The next call to Close() occurs in epoch 2 and successfully returns: 1.000000000,0 map[2:1] true 471 // Subsequent calls to Close() at later times but still in epoch 2 do not move the tracker state. 472 // They return the previous closed timestamp with an empty mlai map: 1.000000000,0 map[] true . 473 // Data corresponding to epoch 3 is retained. 474 // 475 // closed=1.000000000,0 476 // | next=2.000000000,0 477 // | left | right 478 // | 0 # 0 479 // | 3 e 3 480 // | 2 @ (r1) 481 // v v 482 // ---------------------------------------------------------> time 483 // 484 // The next call to Close() occurs in epoch 3 and successfully returns: 2.000000000,0 map[1:2] true . 485 } 486 487 // TestTrackerMultipleEpochsReleased tests that when proposals submitted between 488 // calls to Close span multiple epochs, only data for the highest epoch are 489 // retained and reported. 490 func TestTrackerMultipleEpochsReleased(t *testing.T) { 491 ts0 := hlc.Timestamp{Logical: 1} 492 ts1 := hlc.Timestamp{WallTime: 1e9} 493 ts2 := hlc.Timestamp{WallTime: 2e9} 494 ts3 := hlc.Timestamp{WallTime: 3e9} 495 496 ctx := context.Background() 497 tracker := NewTracker() 498 499 // Track and release a proposal on range 1 in ep1. 500 _, r1e1lai1 := tracker.Track(ctx) 501 r1e1lai1(ctx, ep1, 1, 1) 502 // Begin tracking a proposal on range 2 which won't be released until after 503 // the next call to Close. 504 _, r2e2lai1 := tracker.Track(ctx) 505 // Close the current left side and assert that the tracker reports an empty 506 // MLAI map in epoch 1 for the initial timestamp value. 507 assertClosed(tracker.Close(ts1, ep1))(t, ts0, mlais{}, true) 508 // Track and release another proposal on range 1 in epoch 1 with LAI 2. 509 // This proposal is on the right side. 510 _, r1e1lai2 := tracker.Track(ctx) 511 r1e1lai2(ctx, ep1, 1, 2) 512 // Release the proposal for range 2 in epoch 2 which should be on the left 513 // side. This release call will invalidate the LAI for range 1 that was 514 // recorded in epoch 1 both on the left and right side. 515 r2e2lai1(ctx, ep2, 2, 1) 516 // Close the current left side and assert that the tracker value on the 517 // range 1 epoch 1 value from the first interval is not present. 518 assertClosed(tracker.Close(ts2, ep2))(t, ts1, mlais{2: 1}, true) 519 assertClosed(tracker.Close(ts2, ep2))(t, ts1, nil, true) 520 assertClosed(tracker.Close(ts3, ep2))(t, ts2, mlais{}, true) 521 } 522 523 type mlais = map[roachpb.RangeID]ctpb.LAI 524 525 func assertClosed( 526 ts hlc.Timestamp, m mlais, ok bool, 527 ) func(t *testing.T, expTs hlc.Timestamp, expM mlais, expOk bool) { 528 return func( 529 t *testing.T, expTs hlc.Timestamp, expM mlais, expOk bool, 530 ) { 531 t.Helper() 532 assert.Equal(t, expOk, ok) 533 assert.Equal(t, expTs, ts) 534 assert.EqualValues(t, expM, m) 535 } 536 }