github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/closedts/minprop/tracker_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package minprop
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"runtime"
    17  	"sync/atomic"
    18  	"testing"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/ctpb"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    25  	"github.com/cockroachdb/errors"
    26  	"github.com/kr/pretty"
    27  	"github.com/stretchr/testify/assert"
    28  	"golang.org/x/sync/errgroup"
    29  )
    30  
    31  const (
    32  	_ ctpb.Epoch = iota
    33  	ep1
    34  	ep2
    35  	ep3
    36  )
    37  
    38  func TestTrackerClosure(t *testing.T) {
    39  	ctx := context.Background()
    40  	tracker := NewTracker()
    41  	_, done := tracker.Track(ctx)
    42  
    43  	done(ctx, ep1, 100, 200)
    44  	done(ctx, ep1, 0, 0)
    45  }
    46  
    47  func ExampleTracker_Close() {
    48  	ctx := context.Background()
    49  	tracker := NewTracker()
    50  	_, slow := tracker.Track(ctx)
    51  	_, _, _ = tracker.Close(hlc.Timestamp{WallTime: 1e9}, ep1)
    52  	_, fast := tracker.Track(ctx)
    53  
    54  	fmt.Println("Slow proposal finishes at LAI 2")
    55  	slow(ctx, ep1, 99, 2)
    56  	closed, m, ok := tracker.Close(hlc.Timestamp{WallTime: 2e9}, ep1)
    57  	fmt.Println("Closed:", closed, m, ok)
    58  
    59  	fmt.Println("Fast proposal finishes at LAI 1")
    60  	fast(ctx, ep1, 99, 1)
    61  	fmt.Println(tracker)
    62  	closed, m, ok = tracker.Close(hlc.Timestamp{WallTime: 3e9}, ep1)
    63  	fmt.Println("Closed:", closed, m, ok)
    64  	fmt.Println("Note how the MLAI has 'regressed' from 2 to 1. The consumer")
    65  	fmt.Println("needs to track the maximum over all deltas received.")
    66  
    67  	// Output:
    68  	// Slow proposal finishes at LAI 2
    69  	// Closed: 1.000000000,0 map[99:2] true
    70  	// Fast proposal finishes at LAI 1
    71  	//
    72  	//   closed=1.000000000,0
    73  	//       |            next=2.000000000,0
    74  	//       |          left | right
    75  	//       |             0 # 0
    76  	//       |             1 e 1
    77  	//       |             1 @        (r99)
    78  	//       v               v
    79  	// ---------------------------------------------------------> time
    80  	//
    81  	// Closed: 2.000000000,0 map[99:1] true
    82  	// Note how the MLAI has 'regressed' from 2 to 1. The consumer
    83  	// needs to track the maximum over all deltas received.
    84  }
    85  
    86  func TestTrackerDoubleRelease(t *testing.T) {
    87  	var exited bool
    88  	log.SetExitFunc(true /* hideStack */, func(int) { exited = true })
    89  	defer log.ResetExitFunc()
    90  
    91  	ctx := context.Background()
    92  	tracker := NewTracker()
    93  
    94  	_, release := tracker.Track(ctx)
    95  	release(ctx, ep1, 0, 0)
    96  	release(ctx, ep1, 4, 10)
    97  
    98  	if !exited {
    99  		t.Fatal("expected fatal error")
   100  	}
   101  }
   102  
   103  func TestTrackerReleaseZero(t *testing.T) {
   104  	ctx := context.Background()
   105  	tracker := NewTracker()
   106  	trackedTs1, release1 := tracker.Track(ctx)
   107  	trackedTs2, release2 := tracker.Track(ctx)
   108  	release2(ctx, ep1, 2, 0)
   109  	leftTs, _, _ := tracker.Close(trackedTs2, ep1)
   110  	leftTs.Logical += 2
   111  	release1(ctx, ep1, 1, 0)
   112  	closedTs, mlais, ok := tracker.Close(leftTs, ep1)
   113  	if !ok {
   114  		t.Fatalf("expected closed to succeed")
   115  	} else if closedTs != trackedTs1 {
   116  		t.Fatalf("expected to have closed %v, got %v %v", trackedTs1, closedTs, mlais)
   117  	} else if mlai1, found := mlais[1]; !found {
   118  		t.Fatalf("expected to find mlai for range 1")
   119  	} else if mlai1 != 0 {
   120  		t.Fatalf("expected to find zero mlai for range 1, got %v", mlai1)
   121  	} else if mlai2, found := mlais[2]; !found {
   122  		t.Fatalf("expected to find mlai for range 2")
   123  	} else if mlai2 != 0 {
   124  		t.Fatalf("expected to find zero mlai for range 2, got %v", mlai2)
   125  	}
   126  }
   127  
   128  type modelClient struct {
   129  	lai map[roachpb.RangeID]*int64 // read-only map, values accessed atomically
   130  	mu  struct {
   131  		syncutil.Mutex
   132  		closed   []hlc.Timestamp                // closed timestamps
   133  		released []map[roachpb.RangeID]ctpb.LAI // known released LAIs, rotated on Close
   134  		m        map[roachpb.RangeID]ctpb.LAI   // max over all maps returned from Close()
   135  	}
   136  }
   137  
   138  // Operate a Tracker concurrently and verify that closed timestamps don't regress
   139  // and that the emitted MLAIs are not obviously inconsistent with commands we know
   140  // finished.
   141  func TestTrackerConcurrentUse(t *testing.T) {
   142  	ctx := context.Background()
   143  	tracker := NewTracker()
   144  
   145  	const (
   146  		numCmds    = 1000 // operations to carry out in total
   147  		closeEvery = 20   // turn every i'th operation into a Close
   148  		numRanges  = 5
   149  	)
   150  
   151  	var mc modelClient
   152  	mc.mu.m = map[roachpb.RangeID]ctpb.LAI{}
   153  	mc.mu.closed = make([]hlc.Timestamp, 1)
   154  	mc.mu.released = []map[roachpb.RangeID]ctpb.LAI{{}, {}, {}}
   155  
   156  	mc.lai = map[roachpb.RangeID]*int64{}
   157  	for i := roachpb.RangeID(1); i <= numRanges; i++ {
   158  		mc.lai[i] = new(int64)
   159  	}
   160  
   161  	get := func(i int) (roachpb.RangeID, ctpb.LAI) {
   162  		rangeID := roachpb.RangeID(1 + (i % numRanges))
   163  		return rangeID, ctpb.LAI(atomic.AddInt64(mc.lai[rangeID], 1))
   164  	}
   165  
   166  	// It becomes a lot more complicated to collect the released indexes
   167  	// correctly when multiple calls to Close are in-flight at any given time.
   168  	// The intended use case is for Close to be called from a single goroutine,
   169  	// so the test specializes to that situation.
   170  	//
   171  	// NB: The `mc.mu` sections are intentionally kept small to allow for more
   172  	// interleaving between tracked commands and close operations.
   173  	var closeMU syncutil.Mutex
   174  	close := func(newNext hlc.Timestamp) error {
   175  		closeMU.Lock()
   176  		defer closeMU.Unlock()
   177  		mc.mu.Lock()
   178  		// Note last closed timestamp.
   179  		prevClosed := mc.mu.closed[len(mc.mu.closed)-1]
   180  
   181  		mc.mu.Unlock()
   182  
   183  		t.Log("before closing:", tracker)
   184  		// Ignore epoch mismatches which may occur before any values have been
   185  		// released from the tracker.
   186  		closed, m, _ := tracker.Close(newNext, ep1)
   187  		if closed.Less(prevClosed) {
   188  			return errors.Errorf("closed timestamp regressed from %s to %s", prevClosed, closed)
   189  		} else if prevClosed == closed && len(m) != 0 {
   190  			return errors.Errorf("closed timestamp %s not incremented, but MLAIs %v emitted", prevClosed, m)
   191  		}
   192  
   193  		mc.mu.Lock()
   194  		defer mc.mu.Unlock()
   195  
   196  		if closed != prevClosed {
   197  			// The released bucket is rotated after each call to Close (we can't
   198  			// really do it before because we only want to rotate when a new
   199  			// closed timestamp was established).
   200  			//
   201  			// Taking into account the call to Close we just performed, the
   202  			// - current bucket contains: commands that could be on the left
   203  			//   (expected) or the right: A command could start after our call to
   204  			//   Close but make it into the pre-rotation bucket.
   205  			// - previous bucket contains commands that could be on the left
   206  			//   or emitted
   207  			// - bucket before that contains commands that definitely must have
   208  			//   been emitted.
   209  			//
   210  			// So we check the latter bucket. Trying to close the synchronization
   211  			// gap would allow checking the middle bucket instead, but this would
   212  			// weaken the test overall.
   213  			released := mc.mu.released[len(mc.mu.released)-3]
   214  			// Rotate released commands bucket.
   215  			mc.mu.released = append(mc.mu.released, map[roachpb.RangeID]ctpb.LAI{})
   216  
   217  			for rangeID, mlai := range m {
   218  				// Intuitively you expect mc.mu.m[rangeID] < mlai, but this
   219  				// doesn't always hold. A slow proposal could get assigned a
   220  				// higher lease index on the left side than a "newer"
   221  				// proposal on the right. The client really has to track the
   222  				// maximum.
   223  				//
   224  				if mc.mu.m[rangeID] < mlai {
   225  					mc.mu.m[rangeID] = mlai
   226  				}
   227  
   228  				if trackedMLAI, rMLAI := mc.mu.m[rangeID], released[rangeID]; rMLAI > trackedMLAI {
   229  					return errors.Errorf(
   230  						"incorrect MLAI %d for r%d does not reflect %d:\nemitted: %+v\n%s\nreleased: %s\naggregate: %s",
   231  						trackedMLAI, rangeID, rMLAI, m, tracker, pretty.Sprint(mc.mu.released), pretty.Sprint(mc.mu.m),
   232  					)
   233  				}
   234  			}
   235  		}
   236  
   237  		// Store latest closed timestamp.
   238  		mc.mu.closed = append(mc.mu.closed, closed)
   239  		return nil
   240  	}
   241  
   242  	newNext := func(i int) hlc.Timestamp {
   243  		return hlc.Timestamp{WallTime: int64(i) * 1e9}
   244  	}
   245  
   246  	run := func(i int) error {
   247  		if i%closeEvery == 1 {
   248  			return close(newNext(i))
   249  		}
   250  
   251  		mc.mu.Lock()
   252  		prevClosed := mc.mu.closed[len(mc.mu.closed)-1]
   253  		mc.mu.Unlock()
   254  
   255  		ts, done := tracker.Track(ctx)
   256  		if ts.Less(prevClosed) {
   257  			return errors.Errorf("%d: proposal forwarded to %s, but closed %s", i, ts, prevClosed)
   258  		}
   259  
   260  		runtime.Gosched()
   261  
   262  		var rangeID roachpb.RangeID
   263  		var lai ctpb.LAI
   264  		switch i % 3 {
   265  		case 0:
   266  			// Successful evaluation.
   267  			rangeID, lai = get(i)
   268  			done(ctx, ep1, rangeID, lai)
   269  		case 1:
   270  			// Successful evaluation followed by deferred zero call.
   271  			rangeID, lai = get(i)
   272  			done(ctx, ep1, rangeID, lai)
   273  			done(ctx, ep1, 0, 0)
   274  		case 2:
   275  			// Failed evaluation. Burns a LAI.
   276  			done(ctx, ep1, 0, 0)
   277  		default:
   278  			panic("the impossible happened")
   279  		}
   280  
   281  		mc.mu.Lock()
   282  		if lai != 0 {
   283  			mc.mu.released[len(mc.mu.released)-1][rangeID] = lai
   284  		}
   285  		mc.mu.Unlock()
   286  
   287  		return nil
   288  	}
   289  
   290  	var g errgroup.Group
   291  	for i := 0; i < numCmds; i++ {
   292  		i := i
   293  		g.Go(func() error {
   294  			return run(i)
   295  		})
   296  	}
   297  
   298  	if err := g.Wait(); err != nil {
   299  		t.Fatal(err)
   300  	}
   301  
   302  	// We'd like to at least assert something about the MLAIs below, namely that
   303  	// the final view of the client state is equivalent to the MLAIs that were
   304  	// actually used by the proposals. To get there, we need to close out twice:
   305  	// once to flush the right side to the left, and another time to force it
   306  	// to be output.
   307  	for i := 0; i < 2; i++ {
   308  		if err := close(newNext(numCmds + i)); err != nil {
   309  			t.Fatal(err)
   310  		}
   311  	}
   312  
   313  	t.Log(tracker)
   314  
   315  	for rangeID, addr := range mc.lai {
   316  		assignedMLAI := ctpb.LAI(atomic.LoadInt64(addr))
   317  		mlai := mc.mu.m[rangeID]
   318  
   319  		if assignedMLAI > mlai {
   320  			t.Errorf("r%d: assigned %d, but only %d reflected in final MLAI map", rangeID, assignedMLAI, mlai)
   321  		}
   322  	}
   323  }
   324  
   325  // ExampleTracker_EpochChanges tests the interactions between epoch values
   326  // passed to Close and epoch values of proposals being tracked.
   327  func ExampleTracker_Close_epochChange() {
   328  	ts1 := hlc.Timestamp{WallTime: 1e9}
   329  	ts2 := hlc.Timestamp{WallTime: 2e9}
   330  	ts3 := hlc.Timestamp{WallTime: 3e9}
   331  
   332  	ctx := context.Background()
   333  	tracker := NewTracker()
   334  	fmt.Println("The newly initialized tracker has a zero closed timestamp:")
   335  	fmt.Println(tracker)
   336  
   337  	fmt.Println("A first command arrives on range 1 (though the range isn't known yet to the Tracker).")
   338  	ts, r1e1lai1 := tracker.Track(ctx)
   339  	fmt.Println("All commands initially start out on the right. The command has its timestamp forwarded to", ts, ".")
   340  	fmt.Println("The command finished quickly and is released in epoch 1.")
   341  	r1e1lai1(ctx, ep1, 1, 1)
   342  	fmt.Println(tracker)
   343  
   344  	fmt.Println("Another proposal arrives on range 2 but does not complete before the next call to Close().")
   345  	_, r2e2lai1 := tracker.Track(ctx)
   346  	fmt.Println(tracker)
   347  
   348  	fmt.Println("The system closes out a timestamp expecting liveness epoch 2 (registering", ts1, "as the next",
   349  		"timestamp to close out).")
   350  	closed, mlai, ok := tracker.Close(ts1, ep2)
   351  	fmt.Println("The Close() call fails due to the liveness epoch mismatch between",
   352  		"the expected current epoch and the tracked data, returning", closed, mlai, ok)
   353  	fmt.Println("The Close() call evicts the tracked range 1 LAI.")
   354  	fmt.Println(tracker)
   355  
   356  	fmt.Println("The proposal on range 2 is released in epoch 2.")
   357  	r2e2lai1(ctx, ep2, 2, 1)
   358  	fmt.Println(tracker)
   359  
   360  	fmt.Println("Another proposal arrives on range 1 and quickly finishes with",
   361  		"LAI 2 but is still in epoch 1 and is not tracked.")
   362  	_, r1e1lai2 := tracker.Track(ctx)
   363  	r1e1lai2(ctx, ep1, 2, 2)
   364  	fmt.Println("Meanwhile a proposal arrives on range 2 and quickly finishes with",
   365  		"LAI 2 in epoch 2.")
   366  	_, r2e2lai2 := tracker.Track(ctx)
   367  	r2e2lai2(ctx, ep2, 2, 2)
   368  	fmt.Println(tracker)
   369  
   370  	fmt.Println("A new proposal arrives on range 1 and quickly finishes with LAI 2 in epoch 3.")
   371  	fmt.Println("This new epoch evicts the data on the right side corresponding to epoch 2.")
   372  	_, r1e3lai2 := tracker.Track(ctx)
   373  	r1e3lai2(ctx, ep3, 1, 2)
   374  	fmt.Println(tracker)
   375  
   376  	closed, mlai, ok = tracker.Close(ts2, ep2)
   377  	fmt.Println("The next call to Close() occurs in epoch 2 and successfully returns:", closed, mlai, ok)
   378  	closed, mlai, ok = tracker.Close(ts3, ep2)
   379  	fmt.Println("Subsequent calls to Close() at later times but still in epoch 2 do not move the tracker state.")
   380  	fmt.Println("They return the previous closed timestamp with an empty mlai map:", closed, mlai, ok, ".")
   381  	fmt.Println("Data corresponding to epoch 3 is retained.")
   382  	fmt.Println(tracker)
   383  	closed, mlai, ok = tracker.Close(ts3, ep3)
   384  	fmt.Println("The next call to Close() occurs in epoch 3 and successfully returns:", closed, mlai, ok, ".")
   385  
   386  	// Output:
   387  	// The newly initialized tracker has a zero closed timestamp:
   388  	//
   389  	//   closed=0,0
   390  	//       |            next=0,1
   391  	//       |          left | right
   392  	//       |             0 # 0
   393  	//       |             1 e 1
   394  	//       v               v
   395  	// ---------------------------------------------------------> time
   396  	//
   397  	// A first command arrives on range 1 (though the range isn't known yet to the Tracker).
   398  	// All commands initially start out on the right. The command has its timestamp forwarded to 0,2 .
   399  	// The command finished quickly and is released in epoch 1.
   400  	//
   401  	//   closed=0,0
   402  	//       |            next=0,1
   403  	//       |          left | right
   404  	//       |             0 # 0
   405  	//       |             1 e 1
   406  	//       |               @ 1      (r1)
   407  	//       v               v
   408  	// ---------------------------------------------------------> time
   409  	//
   410  	// Another proposal arrives on range 2 but does not complete before the next call to Close().
   411  	//
   412  	//   closed=0,0
   413  	//       |            next=0,1
   414  	//       |          left | right
   415  	//       |             0 # 1
   416  	//       |             1 e 1
   417  	//       |               @ 1      (r1)
   418  	//       v               v
   419  	// ---------------------------------------------------------> time
   420  	//
   421  	// The system closes out a timestamp expecting liveness epoch 2 (registering 1.000000000,0 as the next timestamp to close out).
   422  	// The Close() call fails due to the liveness epoch mismatch between the expected current epoch and the tracked data, returning 0,0 map[] false
   423  	// The Close() call evicts the tracked range 1 LAI.
   424  	//
   425  	//   closed=0,1
   426  	//       |            next=1.000000000,0
   427  	//       |          left | right
   428  	//       |             1 # 0
   429  	//       |             2 e 2
   430  	//       v               v
   431  	// ---------------------------------------------------------> time
   432  	//
   433  	// The proposal on range 2 is released in epoch 2.
   434  	//
   435  	//   closed=0,1
   436  	//       |            next=1.000000000,0
   437  	//       |          left | right
   438  	//       |             0 # 0
   439  	//       |             2 e 2
   440  	//       |             1 @        (r2)
   441  	//       v               v
   442  	// ---------------------------------------------------------> time
   443  	//
   444  	// Another proposal arrives on range 1 and quickly finishes with LAI 2 but is still in epoch 1 and is not tracked.
   445  	// Meanwhile a proposal arrives on range 2 and quickly finishes with LAI 2 in epoch 2.
   446  	//
   447  	//   closed=0,1
   448  	//       |            next=1.000000000,0
   449  	//       |          left | right
   450  	//       |             0 # 0
   451  	//       |             2 e 2
   452  	//       |             1 @        (r2)
   453  	//       |               @ 2      (r2)
   454  	//       v               v
   455  	// ---------------------------------------------------------> time
   456  	//
   457  	// A new proposal arrives on range 1 and quickly finishes with LAI 2 in epoch 3.
   458  	// This new epoch evicts the data on the right side corresponding to epoch 2.
   459  	//
   460  	//   closed=0,1
   461  	//       |            next=1.000000000,0
   462  	//       |          left | right
   463  	//       |             0 # 0
   464  	//       |             2 e 3
   465  	//       |               @ 2      (r1)
   466  	//       |             1 @        (r2)
   467  	//       v               v
   468  	// ---------------------------------------------------------> time
   469  	//
   470  	// The next call to Close() occurs in epoch 2 and successfully returns: 1.000000000,0 map[2:1] true
   471  	// Subsequent calls to Close() at later times but still in epoch 2 do not move the tracker state.
   472  	// They return the previous closed timestamp with an empty mlai map: 1.000000000,0 map[] true .
   473  	// Data corresponding to epoch 3 is retained.
   474  	//
   475  	//   closed=1.000000000,0
   476  	//       |            next=2.000000000,0
   477  	//       |          left | right
   478  	//       |             0 # 0
   479  	//       |             3 e 3
   480  	//       |             2 @        (r1)
   481  	//       v               v
   482  	// ---------------------------------------------------------> time
   483  	//
   484  	// The next call to Close() occurs in epoch 3 and successfully returns: 2.000000000,0 map[1:2] true .
   485  }
   486  
   487  // TestTrackerMultipleEpochsReleased tests that when proposals submitted between
   488  // calls to Close span multiple epochs, only data for the highest epoch are
   489  // retained and reported.
   490  func TestTrackerMultipleEpochsReleased(t *testing.T) {
   491  	ts0 := hlc.Timestamp{Logical: 1}
   492  	ts1 := hlc.Timestamp{WallTime: 1e9}
   493  	ts2 := hlc.Timestamp{WallTime: 2e9}
   494  	ts3 := hlc.Timestamp{WallTime: 3e9}
   495  
   496  	ctx := context.Background()
   497  	tracker := NewTracker()
   498  
   499  	// Track and release a proposal on range 1 in ep1.
   500  	_, r1e1lai1 := tracker.Track(ctx)
   501  	r1e1lai1(ctx, ep1, 1, 1)
   502  	// Begin tracking a proposal on range 2 which won't be released until after
   503  	// the next call to Close.
   504  	_, r2e2lai1 := tracker.Track(ctx)
   505  	// Close the current left side and assert that the tracker reports an empty
   506  	// MLAI map in epoch 1 for the initial timestamp value.
   507  	assertClosed(tracker.Close(ts1, ep1))(t, ts0, mlais{}, true)
   508  	// Track and release another proposal on range 1 in epoch 1 with LAI 2.
   509  	// This proposal is on the right side.
   510  	_, r1e1lai2 := tracker.Track(ctx)
   511  	r1e1lai2(ctx, ep1, 1, 2)
   512  	// Release the proposal for range 2 in epoch 2 which should be on the left
   513  	// side. This release call will invalidate the LAI for range 1 that was
   514  	// recorded in epoch 1 both on the left and right side.
   515  	r2e2lai1(ctx, ep2, 2, 1)
   516  	// Close the current left side and assert that the tracker value on the
   517  	// range 1 epoch 1 value from the first interval is not present.
   518  	assertClosed(tracker.Close(ts2, ep2))(t, ts1, mlais{2: 1}, true)
   519  	assertClosed(tracker.Close(ts2, ep2))(t, ts1, nil, true)
   520  	assertClosed(tracker.Close(ts3, ep2))(t, ts2, mlais{}, true)
   521  }
   522  
   523  type mlais = map[roachpb.RangeID]ctpb.LAI
   524  
   525  func assertClosed(
   526  	ts hlc.Timestamp, m mlais, ok bool,
   527  ) func(t *testing.T, expTs hlc.Timestamp, expM mlais, expOk bool) {
   528  	return func(
   529  		t *testing.T, expTs hlc.Timestamp, expM mlais, expOk bool,
   530  	) {
   531  		t.Helper()
   532  		assert.Equal(t, expOk, ok)
   533  		assert.Equal(t, expTs, ts)
   534  		assert.EqualValues(t, expM, m)
   535  	}
   536  }